In [39]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [40]:
datasets = ['QUES.csv', 'UIMS.csv']

In [41]:
def regParameters(X_train, X_test , y_train, y_test):
    reg = linear_model.LinearRegression()
    reg.fit(X_train, y_train)

    y_pred=reg.predict(X_test)
    
    residual=y_test-y_pred
    MRE=sum((abs(residual)/y_test))
    MMRE=MRE/len(y_test)
    SQResidual=residual*residual
    SSE=sum(SQResidual)
    MSE=SSE/len(y_pred)
    RMSE=np.sqrt(MSE)
    MAE=sum(abs(residual))/len(y_pred)
    
    p = X_test.shape[1]
    n = len(y_test)
    
    r_squared = metrics.r2_score(y_test, y_pred)
    adj_r_squared = 1 - ((1 - r_squared) * (n - 1) / (n - p - 1))
    
    return {
        'R^2 score': r_squared,
        'Adjusted R^2 score': adj_r_squared,
        'MAE': MAE,
        'MSE': MSE,
        'RMSE': RMSE,
        'MMRE': MMRE,
    }

In [42]:
def regAnalysis(dataset):
    df = pd.read_csv("../dataset/" + dataset)
    X= df.iloc[:,:-1]
    Y = df.iloc[:,-1]
    print(f"Dataset: {dataset} with shape {df.shape}")

    X_train, X_test , y_train, y_test = train_test_split(X, Y,test_size=0.3,random_state=1)
    
    results = {
        'Full Model': regParameters(X_train, X_test , y_train, y_test),
        'OO Model': regParameters(X_train.iloc[:,:-2], X_test.iloc[:,:-2] , y_train, y_test),
        'Size Model': regParameters(X_train.iloc[:,-2:], X_test.iloc[:,-2:] , y_train, y_test)
    }
    
    results_df = pd.DataFrame(results).T
    print(results_df.to_string())
    print()

In [43]:
for dataset in datasets:
    regAnalysis(dataset)

Dataset: QUES.csv with shape (71, 10)
            R^2 score  Adjusted R^2 score        MAE         MSE       RMSE      MMRE
Full Model   0.490968            0.109194  19.871678  652.205548  25.538315  0.382753
OO Model     0.581541            0.372311  18.176604  536.157781  23.155081  0.315706
Size Model   0.577141            0.532629  16.751197  541.795614  23.276503  0.407710

Dataset: UIMS.csv with shape (39, 11)
            R^2 score  Adjusted R^2 score        MAE          MSE       RMSE      MMRE
Full Model   0.228656           -7.484784  43.964687  7806.831216  88.356274  1.391784
OO Model     0.208083           -1.903697  44.671002  8015.054940  89.526839  1.356919
Size Model   0.225861            0.053830  48.655127  7835.120704  88.516217  3.277077

