### Importing libraries

In [713]:
import pandas as pds
import numpy as nmpy
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score

### Importing dataset

In [714]:
xl = pds.read_excel('C:/Users/Beagle/Downloads/ENB2012_data.xlsx')
xl.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,28.28


### Getting columns to use

In [715]:
cols = ['X1','X2','X3','X4','X5','X6','X7','X8','Y2']
xl = xl[cols]
xl.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,Y2
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,28.28


### Defining X and y for training and prediction

In [716]:
dvalues = xl.values
X = dvalues[::-1]
y = dvalues[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=40)
print(X_train.shape); print(X_test.shape)

(537, 9)
(231, 9)


### Linear Regression

In [717]:
lnr = LinearRegression()
lnr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

### Predicting via the model

In [718]:
y_pred = lnr.predict(X_test)
y_pred_on_train_data = lnr.predict(X_train)


### Scores, MSE and 5-Fold CV

In [719]:
lnr.score(X_train, y_train)

0.8950117113486398

In [720]:
lnr.score(X_test, y_test)

0.8696024455434436

In [721]:
print("MSE", mean_squared_error(y_test,y_pred))

MSE 10.839089411389473


In [722]:
cv = KFold(n_splits=5, random_state=1, shuffle=True)
scores = cross_val_score(lnr, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
scores = np.absolute(scores)
print(scores)
print('Mean MSE: %.3f' % (np.mean(scores)))

[2.58943034 2.2955933  2.43483118 2.10607045 2.14949009]
Mean MSE: 2.315


### Lasso Regression

In [723]:
ls = Lasso(alpha=0.1)
ls.fit(X_train, y_train) 

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

### Predicting via the Model

In [724]:
y_pred_ls = ls.predict(X_test)
y_pred_on_train_data_ls = ls.predict(X_train)

### Score, MSE and 5-Fold CV

In [725]:
ls.score(X_train, y_train)

0.8717487589887787

In [726]:
ls.score(X_test, y_test)

0.8428952457895338

In [727]:
print("MSE", mean_squared_error(y_test,y_pred_ls))

MSE 13.059082932486604


In [728]:
cv = KFold(n_splits=5, random_state=1, shuffle=True)
scores = cross_val_score(ls, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
scores = np.absolute(scores)
print(scores)
print('Mean MSE: %.3f' % (np.mean(scores)))

[2.91299073 2.57006556 2.90487627 2.43588102 2.34297339]
Mean MSE: 2.633


### Ridge Regression

In [729]:
rg = Ridge(alpha=0.1)
rg.fit(X_train, y_train)

Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

### Predicting via the model

In [730]:
y_pred_rg = rg.predict(X_test)
y_pred_train_data_rg = rg.predict(X_train)

### Score, MSE and 5-Fold CV

In [731]:
rg.score(X_train, y_train)

0.8899475286199939

In [732]:
rg.score(X_test, y_test)

0.8652483628386411

In [733]:
print("MSE", mean_squared_error(y_test,y_pred_rg))

MSE 11.201015614213016


In [734]:
cv = KFold(n_splits=5, random_state=1, shuffle=True)
scores = cross_val_score(rg, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
scores = np.absolute(scores)
print(scores)
print('Mean MSE: %.3f' % (np.mean(scores)))

[2.66893668 2.35730946 2.49791788 2.13731907 2.20945644]
Mean MSE: 2.374


### ElasticNet Regression

In [735]:
en = ElasticNet(alpha = 0.1)
en.fit(X_train, y_train)

ElasticNet(alpha=0.1, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

### Predicting via the model

In [736]:
y_pred_en = en.predict(X_test)
y_pred_train_data_en = en.predict(X_train)

### Score, MSE and 5-Fold CV

In [737]:
en.score(X_train, y_train)

0.8500259883905215

In [738]:
en.score(X_test, y_test)

0.8175006764691911

In [739]:
print("MSE", mean_squared_error(y_test,y_pred_en))

MSE 15.169966135580928


In [740]:
cv = KFold(n_splits=5, random_state=1, shuffle=True)
scores = cross_val_score(en, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
scores = np.absolute(scores)
print(scores)
print('Mean MSE: %.3f' % (np.mean(scores)))

[3.22595952 2.87665423 3.22875864 2.79898289 2.69281327]
Mean MSE: 2.965
