In [159]:
#importing neccessary libraries to be used later in this notebook
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [160]:
df = pd.read_csv('energydata_complete.csv')
df.head()

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-11 17:00:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016-01-11 17:10:00,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,2016-01-11 17:20:00,50,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,...,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668
3,2016-01-11 17:30:00,50,40,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,...,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0,45.410389,45.410389
4,2016-01-11 17:40:00,60,40,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,...,17.0,45.4,6.133333,733.9,92.0,5.666667,47.666667,4.9,10.084097,10.084097


In [161]:
#dropping the data and ligts column from the dataset
df.drop(['date','lights'],axis=1,inplace=True)

#scaling the dataset with minmax scaler to keep the values in range of 0-1
scaler = MinMaxScaler()
data = scaler.fit_transform(df)
df = pd.DataFrame(data, columns=df.columns)

In [162]:
#for simple linear rigression, getting the X and y variables
y1 = df['T6']
X1 = df[['T2']]

In [163]:
#fitting the model (Training)
model = LinearRegression()
model.fit(X1,y1)

print(f'Model R2_Score: {round(model.score(X1,y1),2)}')

Model R2_Score: 0.64


In [164]:
#setting the paramters for multiple regression
X2 = df.drop('Appliances', axis=1)
y2 = df['Appliances']

In [165]:
#using train test split to divide our dataset
X_train,X_test,y_train,y_test = train_test_split(X2,y2,test_size=0.3,random_state=42)
print(f'X_train: {len(X_train)} X_test: {len(X_test)} y_train: {len(y_train)} y_test: {len(y_test)}')

X_train: 13814 X_test: 5921 y_train: 13814 y_test: 5921


In [166]:
#fitting our model (train)
model.fit(X_train,y_train)
#usinf the trained model for predictions
y_pred = model.predict(X_test)

In [167]:
#using imported metrics to calculate for accuracy,and errors
R2 = r2_score(y_test,y_pred)
MAE = mean_absolute_error(y_test, y_pred)
RSS = np.sum(np.square(y_test - y_pred))
RMSE = np.sqrt(mean_squared_error(y_test,y_pred))

print(f'R2_Score {round(R2,2)} MEA {round(MAE,2)} RSS {round(RSS,2)} RMSE {round(RMSE,3)}')

R2_Score 0.15 MEA 0.05 RSS 45.35 RMSE 0.088


In [169]:
#finding min and max of coefficients
coef_order = pd.DataFrame(zip(X2.columns,model.coef_),
            columns=['Features','Coefficients']).sort_values('Coefficients')

coef_order

Unnamed: 0,Features,Coefficients
3,RH_2,-0.456698
18,T_out,-0.32186
2,T2,-0.236178
16,T9,-0.189941
15,RH_8,-0.157595
20,RH_out,-0.077671
13,RH_7,-0.044614
17,RH_9,-0.0398
8,T5,-0.015657
0,T1,-0.003281


In [154]:
#using ridge model as instructed
reg = Ridge(alpha=0.4)
reg.fit(X_train,y_train)

#making predictions with ridge model
reg_pred = reg.predict(X_test)

In [155]:
#calculating for RMSE
RMSE = np.sqrt(mean_squared_error(y_test,reg_pred))

print(f'RMSE {round(RMSE,3)}')

RMSE 0.088


In [156]:
#using Lasso model as instructed
lass = Lasso(alpha=0.001)
lass.fit(X_train,y_train)

lass_pred = lass.predict(X_test)

In [157]:
#getting the coefficients
len(lass.coef_[lass.coef_ !=0])

4

In [170]:
#calculating for the RMSE
RMSE = np.sqrt(mean_squared_error(y_test,lass_pred))

print(f'RMSE {round(RMSE,3)}')

RMSE 0.094
