# MLR Functions

In [1]:
def mlr(df, label):
  from sklearn import preprocessing
  import numpy as np
  import pandas as pd
  import statsmodels.api as sm
  df_dummy = df.copy() 
  for col in df_dummy:
    if not pd.api.types.is_numeric_dtype(df_dummy[col]): 
      df_dummy = df_dummy.join(pd.get_dummies(df_dummy[col], prefix=col, drop_first=True))
  df_num = df_dummy.select_dtypes(np.number)
  df_minmax = pd.DataFrame(preprocessing.MinMaxScaler().fit_transform(df_num), columns=df_num.columns)
  df_minmax.head()
  y = df_minmax[label]
  X = df_minmax.select_dtypes(np.number).assign(const=1)
  X=df_minmax.drop(columns=[label]).assign(const=1)
  model=sm.OLS(y,X)
  results=model.fit()
  return results
  

In [3]:
#calculate metrics

def calculateMetrics(df, label):
  import numpy as np
  import statsmodels.api as sm
  import pandas as pd
  for i in df:
    if not pd.api.types.is_numeric_dtype(df[i]):
      df = df.join(pd.get_dummies(df[i], prefix=i, drop_first=True))
  y = df[label]
  X = df.select_dtypes(np.number).assign(const=1)
  X = X.drop(columns=[label])
  model1 = sm.OLS(y,X)
  results1 = model1.fit()
  residuals = np.array(df[label]) - np.array(results1.fittedvalues)
  rmse = np.sqrt(sum(residuals**2) / len(df[label]))
  mae = np.mean(abs(residuals))
  dit = {
      'RMSE' : round(rmse,4),
      'MAE' : round(mae,4),
      'R-Squared' : round(results1.rsquared,4),
      'Label Mean' : round(np.mean(df[label]),4)
  }
  return dit

def calculateMLRandMetrics(df, label):
  results = mlr(df, label)
  print(results.summary())
  print("\nMLR Metrics\n")
  print(calculateMetrics(df, label))