In [191]:
# import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

In [192]:
energy_data = pd.read_csv('energy_data.csv')
energy_data.sample(2)

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
7862,2016-03-06 07:20:00,50,0,18.79,37.9,16.7,40.2,20.2,38.09,17.39,...,18.033333,41.826667,2.833333,745.5,91.666667,7.0,36.0,1.633333,4.016206,4.016206
13944,2016-04-17 13:00:00,70,0,22.5,43.556667,21.926667,40.526667,23.066667,38.5,22.1,...,21.0,39.493333,6.0,755.8,72.0,1.0,40.0,1.3,6.583746,6.583746


In [193]:
# drop date and lights
energy_data.drop(['date','lights'], axis = 1, inplace = True)

In [195]:
# normalize dataset
scaler = MinMaxScaler()
Normalized = pd.DataFrame(scaler.fit_transform(energy_data), columns= energy_data.columns)
Normalized.sample(2)

Unnamed: 0,Appliances,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
2033,0.345794,0.380148,0.44317,0.239157,0.644953,0.268085,0.722698,0.267267,0.757149,0.297075,...,0.229969,0.779525,0.467846,0.778295,0.703947,0.285714,0.6,0.559578,0.794104,0.794104
9150,0.028037,0.432946,0.233281,0.065423,0.604649,0.373878,0.336501,0.422523,0.236449,0.378404,...,0.468262,0.40287,0.102894,0.853488,0.855263,0.071429,0.923077,0.144796,0.039366,0.039366


In [196]:
# independent columns
x = Normalized.drop(['Appliances'], axis = 1)

# dependent columns(to be predicted)
y = Normalized['Appliances']

In [197]:
# split dataset into test and train
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.3, random_state = 42)

## Simple Linear Regression Model using temperature in the living room in Celsius (x = T2) and the temperature outside the building (y = T6)

In [198]:
x_train = xtrain[['T2']]
y_train = xtrain['T6']
x_test = xtest[['T2']]
y_test = xtest['T6']

Simple_Lr = LinearRegression()
Simple_Lr.fit(x_train, y_train)
predicted_Simple_Lr = Simple_Lr.predict(x_test)

In [199]:
r2_value = r2_score(y_test, predicted_Simple_Lr)
round(r2_value, 2)

0.64

## Multiple Linear Regression 

In [200]:
# Linear Model
Lr = LinearRegression()
Lr.fit(xtrain, ytrain)
predictedLr = Lr.predict(xtest)

In [201]:
# Mean absolute Error in 2 decimal places
mean_absolute_error = round(mean_absolute_error(ytest, predictedLr),2)
print('Mean Absolute Error =', mean_absolute_error)

Mean Absolute Error = 0.05


In [202]:
# residual sum of squares in 2 decimal places
residual_sum_of_squares = round(np.sum(np.square(ytest-predictedLr)),2)
print('Residual Sum of Squares =', residual_sum_of_squares)

Residual Sum of Squares = 45.35


In [203]:
# Root Mean Squared Error in three decimal places
Root_Mean_Squared_Error = round(np.sqrt(mean_squared_error(ytest,predictedLr)),3)
print('Root Mean Squared Error =', Root_Mean_Squared_Error)

Root Mean Squared Error = 0.088


In [204]:
# Coefficient of Determination(r-squard) in two decimal places
Coefficient_of_Determination = round(r2_score(ytest,predictedLr),2)
print('Coefficient_of_Determination =', Coefficient_of_Determination)

Coefficient_of_Determination = 0.15


In [185]:
# Finding the features with the lowest and hightest weight

Model_weight = pd.Series(Lr.coef_, xtrain.columns)

# for highest weight
print('Feature with highest weight is',Model_weight.idxmax())

#for lowest weight
print('Feature with lowest weight is',Model_weight.idxmin())

Feature with highest weight is RH_1
Feature with lowest weight is RH_2


## Ridge Regression

In [165]:
# Ridge Regression using alpha = 0.4
RR = Ridge(alpha = 0.4)
RR.fit(xtrain, ytrain)
predictedRR = RR.predict(xtest)

In [169]:
Lr_rmse= np.sqrt(mean_squared_error(ytest, predictedLr))
RR_rmse = np.sqrt(mean_squared_error(ytest, predictedRR))
if Lr_rmse == RR_rmse:
    print('There exists no change')
else:
    print('There is a change')

There is a change


## Lasso Regression

In [175]:
LassoR = Lasso(alpha = 0.001)
LassoR.fit(xtrain, ytrain)
predictedLassoR = LassoR.predict(xtest)

In [183]:
Lr_weight = pd.Series(LassoR.coef_, xtrain.columns)
print('Number of features with non-zero weights =',Lr_weight[Lr_weight!=0].count())

Number of features with non-zero weights = 4


In [188]:
# RMSE with the Lasso Regression in 3 decimal places
LassoR_rmse = round(np.sqrt(mean_squared_error(ytest, predictedLassoR)),3)
print('Root mean squared error for Lasso Regression is',LassoR_rmse)

Root mean squared error for Lasso Regression is 0.094
