In [1]:
# import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# read in the dataset into pandas
energy = pd.read_csv('Datasets\energydata_complete.csv')

In [3]:
energy.head()

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-11 17:00:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016-01-11 17:10:00,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,2016-01-11 17:20:00,50,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,...,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668
3,2016-01-11 17:30:00,50,40,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,...,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0,45.410389,45.410389
4,2016-01-11 17:40:00,60,40,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,...,17.0,45.4,6.133333,733.9,92.0,5.666667,47.666667,4.9,10.084097,10.084097


In [4]:
# checking for the number of rows and columns in the dataset
energy.shape

(19735, 29)

In [5]:
# checking for the datatype of each column 
energy.dtypes

date            object
Appliances       int64
lights           int64
T1             float64
RH_1           float64
T2             float64
RH_2           float64
T3             float64
RH_3           float64
T4             float64
RH_4           float64
T5             float64
RH_5           float64
T6             float64
RH_6           float64
T7             float64
RH_7           float64
T8             float64
RH_8           float64
T9             float64
RH_9           float64
T_out          float64
Press_mm_hg    float64
RH_out         float64
Windspeed      float64
Visibility     float64
Tdewpoint      float64
rv1            float64
rv2            float64
dtype: object

In [6]:
# import MinMaxScaler from sklearn library 
from sklearn.preprocessing import MinMaxScaler

In [7]:
scaler = MinMaxScaler()

In [9]:
# normalise the dataset using the MinMaxScaler method
normalised_energy = pd.DataFrame(scaler.fit_transform(energy.drop(['date', 'lights'], axis = 1)), columns=energy.drop(['date', 'lights'], axis = 1).columns)

In [10]:
normalised_energy.head()

Unnamed: 0,Appliances,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,0.046729,0.32735,0.566187,0.225345,0.684038,0.215188,0.746066,0.351351,0.764262,0.175506,...,0.223032,0.67729,0.37299,0.097674,0.894737,0.5,0.953846,0.538462,0.265449,0.265449
1,0.046729,0.32735,0.541326,0.225345,0.68214,0.215188,0.748871,0.351351,0.782437,0.175506,...,0.2265,0.678532,0.369239,0.1,0.894737,0.47619,0.894872,0.533937,0.372083,0.372083
2,0.037383,0.32735,0.530502,0.225345,0.679445,0.215188,0.755569,0.344745,0.778062,0.175506,...,0.219563,0.676049,0.365488,0.102326,0.894737,0.452381,0.835897,0.529412,0.572848,0.572848
3,0.037383,0.32735,0.52408,0.225345,0.678414,0.215188,0.758685,0.341441,0.770949,0.175506,...,0.219563,0.671909,0.361736,0.104651,0.894737,0.428571,0.776923,0.524887,0.908261,0.908261
4,0.046729,0.32735,0.531419,0.225345,0.676727,0.215188,0.758685,0.341441,0.762697,0.178691,...,0.219563,0.671909,0.357985,0.106977,0.894737,0.404762,0.717949,0.520362,0.201611,0.201611


In [11]:
# setting the feature variable
X = normalised_energy.drop('Appliances', axis = 1)

In [12]:
# setting the target variable
y = normalised_energy.Appliances

In [13]:
# import train_test_split from the sklearn library
from sklearn.model_selection import train_test_split

In [14]:
# splitting the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 42)

In [15]:
# import LinearRegression model from sklearn library
from sklearn.linear_model import LinearRegression

In [16]:
linreg = LinearRegression()

In [17]:
# Training the model
linreg.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [18]:
# making predictions on a set-aside sample
y_pred = linreg.predict(x_test)

In [19]:
# import metrics from sklearn
from sklearn import metrics

In [20]:
# calculating mean_absolute_error
print(metrics.mean_absolute_error(y_test, y_pred))

0.050133623977429535


In [21]:
# calculating mean_squared_error
print(metrics.mean_squared_error(y_test, y_pred))

0.007658778867195367


In [22]:
# calculating root_mean_squared_error
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.0875144494766171


In [23]:
# caculating r_squared
print(metrics.r2_score(y_test, y_pred))

0.14890246319303535


In [33]:
linreg_weights = pd.DataFrame()

In [34]:
linreg_weights["features"] = x_train.columns

In [35]:
linreg_weights['weights'] = pd.Series(linreg.coef_)

In [36]:
# printing out the model weights/coefficient value for each feature
linreg_weights

Unnamed: 0,features,weights
0,T1,-0.003281
1,RH_1,0.553547
2,T2,-0.236178
3,RH_2,-0.456698
4,T3,0.290627
5,RH_3,0.096048
6,T4,0.028981
7,RH_4,0.026386
8,T5,-0.015657
9,RH_5,0.016006


In [57]:
# getting the maxixmum weight value
linreg_weights['weights'].max()

0.5535465998386385

In [58]:
# getting the minimum weight value
linreg_weights['weights'].min()

-0.4566979483384993

In [24]:
# import Lasso regression from sklearn library 
from sklearn.linear_model import Lasso

In [26]:
lasso = Lasso(alpha = 0.001) 

In [27]:
# training the model
lasso.fit(x_train, y_train)

Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [28]:
y_pred1 = lasso.predict(x_test)

In [29]:
print(lasso.coef_)

[ 0.          0.01787993  0.         -0.          0.          0.
 -0.          0.         -0.          0.          0.         -0.
 -0.         -0.          0.         -0.00011004 -0.         -0.
  0.         -0.         -0.04955749  0.00291176  0.          0.
 -0.         -0.        ]


In [39]:
# calculating mean_absolute_error
print(metrics.mean_absolute_error(y_test, y_pred1))

0.055256639821262235


In [40]:
# calculating mean_squarred_error
print(metrics.mean_squared_error(y_test, y_pred1))

0.008757535449401907


In [41]:
# calculating root_mean_squarred_error
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred1)))

0.09358170467245137


In [42]:
# import ridge regression from sklearn library 
from sklearn.linear_model import Ridge 

In [43]:
ridge = Ridge(alpha = 0.4) 

In [45]:
# train the model
ridge.fit(x_train, y_train)

Ridge(alpha=0.4, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [46]:
y_pred2 = ridge.predict(x_test)

In [47]:
# calculating mean_absolute_error
print(metrics.mean_absolute_error(y_test, y_pred2))

0.050087445840923846


In [48]:
# calculating mean_squared_error
print(metrics.mean_squared_error(y_test, y_pred2))

0.007662176129398588


In [49]:
# calculating root_mean_square_error
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred2)))

0.08753385704628003
