In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split , cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso,Ridge,ElasticNet, SGDRegressor
from sklearn.metrics import  mean_squared_error, mean_absolute_error

In [2]:
data = pd.read_csv('QuizB_data.csv')
data.drop(['date','lights'],  axis=1 , inplace=True)
data.head(4)

Unnamed: 0,Appliances,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,60,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,45.566667,17.166667,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,60,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,45.9925,17.166667,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,50,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,45.89,17.166667,...,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668
3,50,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,45.723333,17.166667,...,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0,45.410389,45.410389


In [11]:
#Q12
scaler = MinMaxScaler()

'''Simple linear regression'''
main = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)

y = main['T6']
X = main[['T2']]

#split into train and test

Xtrain, Xval, ytrain, yval = train_test_split(X,y, test_size=0.3, random_state=42)


reg_mod1 = LinearRegression()

model_1 = Pipeline([ ('scaler' , scaler),
                    ('lin_reg', reg_mod1) 
                   ])
model_1.fit(Xtrain,ytrain)
print('R2_score:',round(model_1.score(Xval,yval), 2))

R2_score: 0.64


In [4]:
#Q

'''Multiple linear regression'''
y = main['Appliances']
X = main.drop('Appliances',axis=1 )


#split into train and test
Xtrain, Xval, ytrain, yval = train_test_split(X,y, test_size=0.3, random_state=42)

#instantiate model
reg_mod2 =LinearRegression()

#encapsolate workflow i
model_2 = Pipeline([
    ('lin_reg', reg_mod2) 
                   ])
#fit
model_2.fit(Xtrain,ytrain)

yhat = model_2.predict(Xval)

mae = mean_absolute_error(yval, yhat)
print('MAE:', round(mae, 2))

rss = np.sum(np.square(yval-yhat))
print('RSS:', round(rss, 2))

rmse = np.sqrt(mean_squared_error(yval, yhat))
print('RMSE:', round(rmse, 3))
print('R2_score:', round(model_2.score(Xval,yval), 3))

MAE: 0.05
RSS: 45.34
RMSE: 0.088
R2_score: 0.149


In [5]:
def get_weights_df(model, feat, col_name):
    #this function returns the weight of every feature
    weights = pd.Series(model.coef_, feat.columns).sort_values()
    weights_df = pd.DataFrame(weights).reset_index()
    weights_df.columns = ['Features', col_name]
    weights_df[col_name].round(3)
    return weights_df

linear_model_weights = get_weights_df(model_2['lin_reg'], Xtrain, 'Linear_Model_Weight')

In [6]:
linear_model_weights

Unnamed: 0,Features,Linear_Model_Weight
0,rv2,-213606800000.0
1,RH_2,-0.4568444
2,T_out,-0.3218387
3,T2,-0.2363117
4,T9,-0.1899568
5,RH_8,-0.1576669
6,RH_out,-0.07762611
7,RH_7,-0.04454292
8,RH_9,-0.03975591
9,T5,-0.01563443


In [7]:
#Q

'''ridge regression'''

#instantiate model
reg_mod3 = Ridge(alpha = 0.4)

#encapsulate process
model_3 = Pipeline([
                    ('lin_reg', reg_mod3) 
                   ])
#fit model
model_3.fit(Xtrain,ytrain)

print('R2_score:', round(model_3.score(Xval,yval), 3))

#RMSE
yhat = model_3.predict(Xval)
rmse = np.sqrt(mean_squared_error(yval, yhat))
print('RMSE:', round(rmse, 3))

'''Both R**2 and RMSE  of Ridge remain unchanged'''

R2_score: 0.149
RMSE: 0.088


'Both R**2 and RMSE  of Ridge remain unchanged'

In [8]:
#Q

'''Lasso regression-L1'''
reg_mod4 = Lasso(alpha = 0.001)

model_4 = Pipeline([# ('scaler' , scaler),
                    ('lin_reg', reg_mod4) 
                   ])
model_4.fit(Xtrain,ytrain)
print('R2_score:', round(model_4.score(Xval,yval), 3))

Lasso_weights = get_weights_df(model_4['lin_reg'], Xtrain, 'Lasso_Weight') 
Lasso_weights

R2_score: 0.027


Unnamed: 0,Features,Lasso_Weight
0,RH_out,-0.049557
1,RH_8,-0.00011
2,T1,0.0
3,Tdewpoint,0.0
4,Visibility,0.0
5,Press_mm_hg,-0.0
6,T_out,0.0
7,RH_9,-0.0
8,T9,-0.0
9,T8,0.0


In [9]:
#lasso RMSE
yhat = model_4.predict(Xval)
#Root Mean Square Error

rmse = np.sqrt(mean_squared_error(yval, yhat))
print('Lasso RMSE:',round(rmse, 3))

Lasso RMSE: 0.094
