In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [57]:
df=pd.read_excel('https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx')

In [58]:
#rename columns

column_names = {'X1':'Relative_Compactness', 'X2': 'Surface_Area',
'X3': 'Wall_Area', 'X4': 'Roof_Area', 'X5': 'Overall_Height',
'X6': 'Orientation', 'X7': 'Glazing_Area',
'X8': 'Glazing_Area_Distribution',
'Y1': 'Heating_Load', 'Y2': 'Cooling_Load'}

In [75]:
df = df.rename(columns=column_names)
df.head()

Unnamed: 0,Relative_Compactness,Surface_Area,Wall_Area,Roof_Area,Overall_Height,Orientation,Glazing_Area,Glazing_Area_Distribution,Heating_Load,Cooling_Load
0,0.98,514.5,294.0,110.25,7.0,2,0.0,0,15.55,21.33
1,0.98,514.5,294.0,110.25,7.0,3,0.0,0,15.55,21.33
2,0.98,514.5,294.0,110.25,7.0,4,0.0,0,15.55,21.33
3,0.98,514.5,294.0,110.25,7.0,5,0.0,0,15.55,21.33
4,0.9,563.5,318.5,122.5,7.0,2,0.0,0,20.84,28.28


# Mean Absolute Error (MAE)

In [80]:
#Firstly, we normalise our dataset to a common scale using the min max scaler

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normalised_df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
features_df = normalised_df.drop(columns=['Heating_Load', 'Cooling_Load'])
heating_target = normalised_df['Heating_Load']

In [81]:
#Now, we split our dataset into the training and testing dataset. Recall that we had earlier segmented the features and target variables.

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features_df, heating_target, test_size=0.3, random_state=1)

In [82]:
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()

In [83]:
#fit the model to the training dataset

linear_model.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [84]:
#obtain predictions

predicted_values = linear_model.predict(x_test)

In [85]:
#MAE

from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_test, predicted_values)

round(mae, 3)

0.063

# Residual Sum of Squares (RSS) 

In [96]:
rss = np.sum(np.square(y_test - predicted_values))

round(rss, 3) #prints 1.823


1.817

# Root Mean Square Error (RMSE)

In [89]:
from sklearn.metrics import mean_squared_error

rmse = np.sqrt(mean_squared_error(y_test, predicted_values))

round(rmse, 3) 

0.089

# R-Squared 

In [90]:
from sklearn.metrics import r2_score

r2_score = r2_score(y_test, predicted_values)

round(r2_score, 3) #prints 0.893

0.894

# Penalization Methods

# Ridge Regression

In [91]:
from sklearn.linear_model import Ridge

ridge_reg = Ridge(alpha=0.5)

ridge_reg.fit(x_train, y_train)

Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

# Feature Selection and Lasso Regression


In [92]:
from sklearn.linear_model import Lasso

lasso_reg = Lasso(alpha=0.001)

lasso_reg.fit(x_train, y_train)

Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [107]:
#comparing the effects of regularisation

def get_weights_df(model, feat, col_name):
    get_weights_df

In [106]:
#this function returns the weight of every feature

weights = pd.Series(model.coef_, feat.columns).sort_values()
weights_df = pd.DataFrame(weights).reset_index()
weights_df.columns = ['Features', col_name]
weights_df[col_name].round(3)
return weights_df

NameError: name 'model' is not defined

In [104]:
linear_model_weights = get_weights_df(linear_model, x_train, 'Linear_Model_Weight')
ridge_weights_df = get_weights_df(ridge_reg, x_train, 'Ridge_Weight')
lasso_weights_df = get_weights_df(lasso_reg, x_train, 'Lasso_weight')
final_weights = pd.merge(linear_model_weights, ridge_weights_df, on='Features')
final_weights = pd.merge(final_weights, lasso_weights_df, on='Features')

ValueError: can not merge DataFrame with instance of type <class 'NoneType'>