In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [2]:
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv')

In [3]:
data.columns

Index(['date', 'Appliances', 'lights', 'T1', 'RH_1', 'T2', 'RH_2', 'T3',
       'RH_3', 'T4', 'RH_4', 'T5', 'RH_5', 'T6', 'RH_6', 'T7', 'RH_7', 'T8',
       'RH_8', 'T9', 'RH_9', 'T_out', 'Press_mm_hg', 'RH_out', 'Windspeed',
       'Visibility', 'Tdewpoint', 'rv1', 'rv2'],
      dtype='object')

In [4]:
columns = {'T1':'Temperature in kitchen area',
'RH_1':'Humidity in kitchen area',
'T2':'Temperature in living room area',
'RH_2':'Humidity in living room area',
'T3':'Temperature in laundry room area',
'RH_3':'Humidity in laundry room area',
'T4':'Temperature in office room',
'RH_4':'Humidity in office room',
'T5':'Temperature in bathroom',
'RH_5':'Humidity in bathroom',
'T6':'Temperature outside the building',
'RH_6':'Humidity outside the building',
'T7':'Temperature in ironing room',
'RH_7':'Humidity in ironing room',
'T8':'Temperature in teenager room 2',
'RH_8':'Humidity in teenager room 2',
'T9':'Temperature in parents room',
'RH_9':'Humidity in parents room',
'To':'Temperature outside',
'RH_out':'Humidity outside',
}

In [5]:
data = data.rename(columns=columns)

In [6]:
data.columns

Index(['date', 'Appliances', 'lights', 'Temperature in kitchen area',
       'Humidity in kitchen area', 'Temperature in living room area',
       'Humidity in living room area', 'Temperature in laundry room area',
       'Humidity in laundry room area', 'Temperature in office room',
       'Humidity in office room', 'Temperature in bathroom',
       'Humidity in bathroom', 'Temperature outside the building',
       'Humidity outside the building', 'Temperature in ironing room',
       'Humidity in ironing room', 'Temperature in teenager room 2',
       'Humidity in teenager room 2', 'Temperature in parents room',
       'Humidity in parents room', 'T_out', 'Press_mm_hg', 'Humidity outside',
       'Windspeed', 'Visibility', 'Tdewpoint', 'rv1', 'rv2'],
      dtype='object')

In [7]:
data_clean = data.drop(columns=['date','lights'])

In [9]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
normal_data = pd.DataFrame(scaler.fit_transform(data_clean), columns=data_clean.columns)

In [10]:
features_data = normal_data.drop(columns=['Appliances'])
heating_target = normal_data['Appliances']

In [12]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features_data, heating_target, test_size=0.3, random_state=42)

In [38]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [39]:
predicted = lin_reg.predict(x_test)

In [16]:
temp_livingroom = data[['Temperature in living room area']]
temp_outside = data[['Temperature outside the building']]

In [17]:
lr_train, lr_test, out_train, out_test = train_test_split(temp_livingroom, temp_outside, test_size=0.3, random_state=42)

In [18]:
lin_reg.fit(lr_train, out_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [19]:
predict_outside = lin_reg.predict(lr_test)

In [22]:
from sklearn.metrics import r2_score
r2_out = r2_score(out_test, predict_outside)
round(r2_out, 2)

0.64

In [33]:
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, predicted)
round(mae, 2)

0.05

In [34]:
rss = np.sum(np.square(y_test - predicted))
round(rss,2)

45.35

In [32]:
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, predicted))
print(round(rmse, 3))

0.088


In [35]:
r2 = r2_score(y_test, predicted)
round(r2, 2)

0.15

In [41]:
def get_weights(model, feature, column):
  weights = pd.Series(model.coef_, feature.columns).sort_values()
  weights_df = pd.DataFrame(weights).reset_index()
  weights_df.columns = ['Features', column]
  weights_df[column].round(3)
  return weights_df

lin_weights = get_weights(lin_reg, x_train, 'Linear_Model_Weight')

In [42]:
lin_weights

Unnamed: 0,Features,Linear_Model_Weight
0,Humidity in living room area,-0.456698
1,T_out,-0.32186
2,Temperature in living room area,-0.236178
3,Temperature in parents room,-0.189941
4,Humidity in teenager room 2,-0.157595
5,Humidity outside,-0.077671
6,Humidity in ironing room,-0.044614
7,Humidity in parents room,-0.0398
8,Temperature in bathroom,-0.015657
9,Temperature in kitchen area,-0.003281


In [43]:
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha = 0.4)
ridge_reg.fit(x_train, y_train)

Ridge(alpha=0.4, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [44]:
ridge_predict = ridge_reg.predict(x_test)

In [50]:
rmse_ridge = np.sqrt(mean_squared_error(y_test, ridge_predict))
print(round(rmse_ridge, 3))

0.088


In [46]:
from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(x_train, y_train)

Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [47]:
lasso_weights = get_weights(lasso_reg, x_train, 'Lasso_Model_Weight')

In [48]:
lasso_weights

Unnamed: 0,Features,Lasso_Model_Weight
0,Humidity outside,-0.049557
1,Humidity in teenager room 2,-0.00011
2,Temperature in kitchen area,0.0
3,Tdewpoint,0.0
4,Visibility,0.0
5,Press_mm_hg,-0.0
6,T_out,0.0
7,Humidity in parents room,-0.0
8,Temperature in parents room,-0.0
9,Temperature in teenager room 2,0.0


In [49]:
lasso_predict = lasso_reg.predict(x_test)

In [51]:
rmse_lasso = np.sqrt(mean_squared_error(y_test, lasso_predict))
print(round(rmse_lasso, 3))

0.094
