In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

In [2]:
df = pd.read_csv('energydata_complete.csv')

# 12

In [3]:
x = df[['T2']]
y = df['T6']
lm = LinearRegression()
lm.fit(x,y)

LinearRegression()

In [4]:
r2 = lm.score(x,y)
print(f'The R^2 value is {r2:.2f}')

The R^2 value is 0.64


# 13

In [5]:
df.drop(columns=['date','lights'], axis=1, inplace=True)

In [6]:
scale = MinMaxScaler()
df_norm = pd.DataFrame(scale.fit_transform(df), columns=df.columns)

In [7]:
X = df_norm.drop(columns=['Appliances'], axis=1)
Y = df_norm['Appliances']

In [8]:
X_tr, X_te, Y_tr, Y_te = train_test_split(X, Y, test_size=0.3, random_state=42)

In [9]:
multi_reg = LinearRegression()
multi_reg.fit(X_tr,Y_tr)

LinearRegression()

In [10]:
#Predicted values
Y_pred = multi_reg.predict(X_te)

In [11]:
mae = mean_absolute_error(Y_te,Y_pred)
print(f'The mean absolute error on test set is {mae:.2f}')

The mean absolute error on test set is 0.05


# 14

In [12]:
rss = np.sum(np.square(Y_te - Y_pred))
print(f'The residual sum of squares is {rss:.2f}')

The residual sum of squares is 45.35


# 15

In [13]:
mse = mean_squared_error(Y_te,Y_pred)
rmse = np.sqrt(mse)
print(f'The root mean squared error is {rmse:.3f}')

The root mean squared error is 0.088


# 16

In [14]:
r2 = r2_score(Y_te,Y_pred)
print(f'The coefficient of determination is {r2:.2f}')

The coefficient of determination is 0.15


# 17

In [15]:
pd.Series(multi_reg.coef_,X.columns).sort_values()

rv2           -2.272929e+10
RH_2          -4.567049e-01
T_out         -3.218349e-01
T2            -2.361811e-01
T9            -1.899199e-01
RH_8          -1.575971e-01
RH_out        -7.764895e-02
RH_7          -4.460652e-02
RH_9          -3.980865e-02
T5            -1.568022e-02
T1            -3.281051e-03
Press_mm_hg    6.843231e-03
T7             1.031790e-02
Visibility     1.230574e-02
RH_5           1.600980e-02
RH_4           2.639574e-02
T4             2.900284e-02
Windspeed      2.919021e-02
RH_6           3.805791e-02
RH_3           9.602772e-02
T8             1.019904e-01
Tdewpoint      1.177408e-01
T6             2.364231e-01
T3             2.906232e-01
RH_1           5.535569e-01
rv1            2.272929e+10
dtype: float64

# 18

In [16]:
ridge_reg = Ridge(alpha=0.4)
ridge_reg.fit(X_tr,Y_tr)

Ridge(alpha=0.4)

In [17]:
Y_pred_ridge = ridge_reg.predict(X_te)

In [18]:
ridge_rmse = np.sqrt(mean_squared_error(Y_te,Y_pred_ridge))
print(f'RMSE for ridge regression model is {ridge_rmse:.2f}')

RMSE for ridge regression model is 0.09


# 19

In [19]:
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(X_tr,Y_tr)

Lasso(alpha=0.001)

In [20]:
pd.Series(lasso_reg.coef_,X.columns).sort_values()

RH_out        -0.049557
RH_8          -0.000110
T1             0.000000
Tdewpoint      0.000000
Visibility     0.000000
Press_mm_hg   -0.000000
T_out          0.000000
RH_9          -0.000000
T9            -0.000000
T8             0.000000
RH_7          -0.000000
rv1           -0.000000
T7            -0.000000
T6             0.000000
RH_5           0.000000
T5            -0.000000
RH_4           0.000000
T4            -0.000000
RH_3           0.000000
T3             0.000000
RH_2          -0.000000
T2             0.000000
RH_6          -0.000000
rv2           -0.000000
Windspeed      0.002912
RH_1           0.017880
dtype: float64

# 20

In [21]:
lasso_mse = np.sqrt(mean_squared_error(lasso_reg.predict(X_te),Y_te))
print(f'RMSE for lasso regression model is {lasso_mse:.3f}')

RMSE for lasso regression model is 0.094
