In [18]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [19]:
datasets = ['QUES.csv', 'UIMS.csv']

In [20]:
def regParameters(X_train, X_test , y_train, y_test):
    reg = linear_model.LinearRegression()
    reg.fit(X_train, y_train)

    print('Coefficients: ', reg.coef_)
    # variance score: 1 means perfect prediction
    print('Variance score train: {}'.format(reg.score(X_train, y_train)))
    print('Variance score test: {}'.format(reg.score(X_test, y_test)))
    
    y_pred=reg.predict(X_test)
    
    residual=y_test-y_pred
    MRE=sum((abs(residual)/y_test))
    MMRE=MRE/len(y_test)
    print('MMRE:',MMRE)
    SQResidual=residual*residual
    SSE=sum(SQResidual)
    MSE=SSE/len(y_pred)
    print('MSE:', MSE)
    RMSE=np.sqrt(MSE)
    print('RMSE:', RMSE)
    MAE=sum(abs(residual))/len(y_pred)
    print('MAE:',MAE)
    
    p = X_test.shape[1]
    n = len(y_test)
    
    r_squared = metrics.r2_score(y_test, y_pred)
    adj_r_squared = 1 - ((1 - r_squared) * (n - 1) / (n - p - 1))
    
    print('R^2 score:', r_squared)
    print('Adjusted R^2 score:', adj_r_squared)

In [21]:
def regAnalysis(dataset):
    df = pd.read_csv("../dataset/" + dataset)
    X= df.iloc[:,:-1]
    Y = df.iloc[:,-1]
    print(f"Dataset: {dataset} with shape {df.shape}")

    X_train, X_test , y_train, y_test = train_test_split(X, Y,test_size=0.3,random_state=1)
    print("Values of Full Model")
    regParameters(X_train, X_test , y_train, y_test)
    
    X_train_oo = X_train.iloc[:,:-2]
    X_test_oo = X_test.iloc[:,:-2]
    print("Values of OO Model")
    regParameters(X_train_oo, X_test_oo , y_train, y_test)

    X_train_size = X_train.iloc[:,-2:]
    X_test_size = X_test.iloc[:,-2:]

In [22]:
for dataset in datasets:
    regAnalysis(dataset)

Dataset: QUES.csv with shape (71, 10)
Values of Full Model
Coefficients:  [  2.46926869  -1.48354162   1.0000219   -4.79520102 -11.39207493
  -2.21080312  -1.89839075   3.06716216   0.42226446]
Variance score train: 0.780034871156154
Variance score test: 0.490968090578458
MMRE: 0.3827529851647567
MSE: 652.2055476969198
RMSE: 25.538315286974584
MAE: 19.87167832469529
R^2 score: 0.490968090578458
Adjusted R^2 score: 0.10919415851230152
Values of OO Model
Coefficients:  [15.70591911  1.2092084   0.90287434 -7.21463794 -8.83709287  0.74008062
  3.96410509]
Variance score train: 0.4810100958663531
Variance score test: 0.5815407889424657
MMRE: 0.315706422736392
MSE: 536.1577808486403
RMSE: 23.155081102182308
MAE: 18.176603502241765
R^2 score: 0.5815407889424657
Adjusted R^2 score: 0.3723111834136986
Dataset: UIMS.csv with shape (39, 11)
Values of Full Model
Coefficients:  [ 5.20974931  2.32607938  5.24603829 -2.23118975  0.59155037  7.65401718
  2.32844038 -3.87349491  3.60965333  0.00826599