# Random Forest Regressor


# Dataset - 3

In [32]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import DatetimeTickFormatter
from bokeh.layouts import row, column

In [33]:
df = pd.read_excel('/content/3-hour-load-weather-data.xlsx')
df

Unnamed: 0,DATE,max-temp,min-temp,RH-0830,RH-1730,MW
0,2017-01-01 00:00:00,20.3,9.2,100,80,1815.571045
1,2017-01-01 01:00:00,20.3,9.2,100,80,1576.699585
2,2017-01-01 02:00:00,20.3,9.2,100,80,1428.967896
3,2017-01-01 03:00:00,20.3,9.2,100,80,1356.272705
4,2017-01-01 04:00:00,20.3,9.2,100,80,1354.029175
...,...,...,...,...,...,...
26275,2019-12-31 19:00:00,9.4,4.8,91,69,4157.812988
26276,2019-12-31 20:00:00,9.4,4.8,91,69,4008.450439
26277,2019-12-31 21:00:00,9.4,4.8,91,69,3757.650391
26278,2019-12-31 22:00:00,9.4,4.8,91,69,3556.840576


## Finding the correlation between the features

In [34]:
df.corr(method='pearson')

Unnamed: 0,max-temp,min-temp,RH-0830,RH-1730,MW
max-temp,1.0,0.882116,-0.628697,-0.371664,0.666025
min-temp,0.882116,1.0,-0.464201,-0.127366,0.768115
RH-0830,-0.628697,-0.464201,1.0,0.538387,-0.33854
RH-1730,-0.371664,-0.127366,0.538387,1.0,-0.054682
MW,0.666025,0.768115,-0.33854,-0.054682,1.0


## Split Data into Training and Testing

In [35]:
# MANUAL SPLIT

train = df.loc[ df['DATE'].dt.year < 2019 ]
test = df.loc[ df['DATE'].dt.year >= 2019 ]

x_train = train[['max-temp',	'min-temp',	'RH-0830',	'RH-1730']]
y_train = train['MW']

x_test = test[['max-temp',	'min-temp',	'RH-0830',	'RH-1730']]
y_test = test['MW']


## Creating the model using SVM

In [36]:
# Fit regression model
from sklearn.svm import SVR

svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e4)
svr_poly = SVR(kernel='poly', C=1e4, degree=2)

In [37]:
y_rbf = svr_rbf.fit(x_train, y_train)
y_lin = svr_lin.fit(x_train, y_train)
y_poly = svr_poly.fit(x_train, y_train)

In [38]:
print(y_rbf)
print(y_lin)
print(y_poly)

SVR(C=10000.0, gamma=0.1)
SVR(C=10000.0, kernel='linear')
SVR(C=10000.0, degree=2, kernel='poly')


## Now we move on the the Prediction Analysis!

In [39]:
predictions1 = y_rbf.predict(x_test)
predictions2 = y_lin.predict(x_test)
predictions3 = y_poly.predict(x_test)

In [40]:
# pred and test

def visualize(predictions):

  x = test['DATE']
  y1 = y_test
  y2 = predictions

  p = figure(title="Date vs MW", 
            sizing_mode="stretch_width",
            x_axis_type='datetime',
            x_axis_label='Date',
            y_axis_label='MW')

  p.line(x, y1, legend_label="Test", line_width=1)
  p.line(x, y2, legend_label="Predicted", line_width=1, color='red')
  p.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")

  output_notebook()
  show(p)

In [41]:
visualize(predictions1)

In [42]:
visualize(predictions2) 

In [43]:
visualize(predictions3)

## SCORE

In [44]:
score1 = y_rbf.score(x_test,y_test)
score2 = y_lin.score(x_test,y_test)
score3 = y_poly.score(x_test,y_test)

In [45]:
print(score1)
print(score2)
print(score3)

0.45994165778453044
0.4230673174112166
0.6630487859301881


In [46]:
from sklearn.metrics import explained_variance_score
print(explained_variance_score(y_test, predictions1))
print(explained_variance_score(y_test, predictions2))
print(explained_variance_score(y_test, predictions3))

0.46009146340827023
0.5064311198523133
0.6692004223600633


In [47]:
from sklearn.metrics import r2_score 
print(r2_score(y_test, predictions1))
print(r2_score(y_test, predictions2))
print(r2_score(y_test, predictions3))

0.45994165778453044
0.4230673174112166
0.6630487859301881


In [48]:
from sklearn import metrics

print('MAE:', metrics.mean_absolute_error(y_test, predictions1))
print('MSE:', metrics.mean_squared_error(y_test, predictions1))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions1)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions2))
print('MSE:', metrics.mean_squared_error(y_test, predictions2))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions2)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions3))
print('MSE:', metrics.mean_squared_error(y_test, predictions3))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions3)))

print("\n\n")

MAE: 715.2921819423225
MSE: 828684.9445255254
RMSE: 910.321341354538



MAE: 760.9057424621262
MSE: 885266.2586504415
RMSE: 940.8858903450734



MAE: 591.3033434623594
MSE: 517030.0619629097
RMSE: 719.0480247959171





# Dataset - 4

In [49]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import DatetimeTickFormatter
from bokeh.layouts import row, column

In [50]:
df = pd.read_excel('/content/4-day-load-weather-data.xlsx', parse_dates=True)
df

Unnamed: 0,date,max-temp,min-temp,RH-0830,RH-1730,MW
0,2017-01-01,20.3,9.2,100,80,3536.238770
1,2017-01-02,23.2,9.3,100,82,3639.738770
2,2017-01-03,24.3,9.5,100,77,3673.321289
3,2017-01-04,24.0,8.9,97,66,3898.860840
4,2017-01-05,25.2,10.4,97,71,3547.965820
...,...,...,...,...,...,...
1090,2019-12-27,13.4,4.2,86,76,4976.180664
1091,2019-12-28,14.4,2.4,100,83,4708.879395
1092,2019-12-29,13.3,3.1,94,79,4831.750488
1093,2019-12-30,15.8,2.6,100,97,5298.331055


## Finding the correlation between the features

In [51]:
df.corr(method='pearson')

Unnamed: 0,max-temp,min-temp,RH-0830,RH-1730,MW
max-temp,1.0,0.882116,-0.628697,-0.371664,0.64371
min-temp,0.882116,1.0,-0.464201,-0.127366,0.77607
RH-0830,-0.628697,-0.464201,1.0,0.538387,-0.327478
RH-1730,-0.371664,-0.127366,0.538387,1.0,-0.01656
MW,0.64371,0.77607,-0.327478,-0.01656,1.0


In [52]:
# x = df['day']
# y = df['MW']

## Split Data into Training and Testing

In [53]:
# MANUAL SPLIT

train = df.loc[ df['date'].dt.year < 2019 ]
test = df.loc[ df['date'].dt.year >= 2019 ]

x_train = train[['max-temp',	'min-temp',	'RH-0830',	'RH-1730']]
y_train = train['MW']

x_test = test[['max-temp',	'min-temp',	'RH-0830',	'RH-1730']]
y_test = test['MW']


## Creating the model using SVM

In [54]:
# Fit regression model
from sklearn.svm import SVR

svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e4)
svr_poly = SVR(kernel='poly', C=1e4, degree=2)

In [55]:
y_rbf = svr_rbf.fit(x_train, y_train)
y_lin = svr_lin.fit(x_train, y_train)
y_poly = svr_poly.fit(x_train, y_train)

In [56]:
print(y_rbf)
print(y_lin)
print(y_poly)

SVR(C=10000.0, gamma=0.1)
SVR(C=10000.0, kernel='linear')
SVR(C=10000.0, degree=2, kernel='poly')


## Now we move on the the Prediction Analysis!

In [57]:
predictions1 = y_rbf.predict(x_test)
predictions2 = y_lin.predict(x_test)
predictions3 = y_poly.predict(x_test)

In [61]:
# pred and test

def visualize(predictions):

  x = test['date']
  y1 = y_test
  y2 = predictions

  p = figure(title="Date vs MW", 
            sizing_mode="stretch_width",
            x_axis_type='datetime',
            x_axis_label='Date',
            y_axis_label='MW')

  p.line(x, y1, legend_label="Test", line_width=1)
  p.line(x, y2, legend_label="Predicted", line_width=1, color='red')
  p.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")

  output_notebook()
  show(p)

In [62]:
visualize(predictions1)

In [63]:
visualize(predictions2) 

In [64]:
visualize(predictions3)

## SCORE

In [65]:
score1 = y_rbf.score(x_test,y_test)
score2 = y_lin.score(x_test,y_test)
score3 = y_poly.score(x_test,y_test)

In [66]:
print(score1)
print(score2)
print(score3)

0.5520321550676435
0.4880310887714362
0.7453081175668177


In [67]:
from sklearn.metrics import explained_variance_score
print(explained_variance_score(y_test, predictions1))
print(explained_variance_score(y_test, predictions2))
print(explained_variance_score(y_test, predictions3))

0.5813834349683084
0.5594503224759204
0.7857645415331315


In [68]:
from sklearn.metrics import r2_score 
print(r2_score(y_test, predictions1))
print(r2_score(y_test, predictions2))
print(r2_score(y_test, predictions3))

0.5520321550676435
0.4880310887714362
0.7453081175668177


In [69]:
from sklearn import metrics

print('MAE:', metrics.mean_absolute_error(y_test, predictions1))
print('MSE:', metrics.mean_squared_error(y_test, predictions1))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions1)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions2))
print('MSE:', metrics.mean_squared_error(y_test, predictions2))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions2)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions3))
print('MSE:', metrics.mean_squared_error(y_test, predictions3))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions3)))

print("\n\n")

MAE: 560.7873352055827
MSE: 533798.109936179
RMSE: 730.6148848307014



MAE: 649.2714255839585
MSE: 610061.727089268
RMSE: 781.0644833106087



MAE: 429.4976916628312
MSE: 303490.63442142674
RMSE: 550.8998406438567





# Dataset - 5

In [70]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import DatetimeTickFormatter
from bokeh.layouts import row, column

In [71]:
df = pd.read_excel('/content/5-hour-load-holiday-data.xlsx', parse_dates=True)
df

Unnamed: 0,DATE,day,MW
0,2017-01-01 00:00:00,2,1815.571045
1,2017-01-01 01:00:00,2,1576.699585
2,2017-01-01 02:00:00,2,1428.967896
3,2017-01-01 03:00:00,2,1356.272705
4,2017-01-01 04:00:00,2,1354.029175
...,...,...,...
26275,2019-12-31 19:00:00,0,4157.812988
26276,2019-12-31 20:00:00,0,4008.450439
26277,2019-12-31 21:00:00,0,3757.650391
26278,2019-12-31 22:00:00,0,3556.840576


## Finding the correlation between the features

In [72]:
df.corr(method='pearson')

Unnamed: 0,day,MW
day,1.0,-0.127804
MW,-0.127804,1.0


In [73]:
# x = df[['day']]
# y = df['MW']

## Split Data into Training and Testing

In [74]:
# MANUAL SPLIT

train = df.loc[ df['DATE'].dt.year < 2019 ]
test = df.loc[ df['DATE'].dt.year >= 2019 ]

x_train = train[['day']]
y_train = train['MW']

x_test = test[['day']]
y_test = test['MW']


## Creating the model using SVM

In [75]:
# Fit regression model
from sklearn.svm import SVR

svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e4)
svr_poly = SVR(kernel='poly', C=1e4, degree=2)

In [76]:
y_rbf = svr_rbf.fit(x_train, y_train)
y_lin = svr_lin.fit(x_train, y_train)
y_poly = svr_poly.fit(x_train, y_train)

In [77]:
print(y_rbf)
print(y_lin)
print(y_poly)

SVR(C=10000.0, gamma=0.1)
SVR(C=10000.0, kernel='linear')
SVR(C=10000.0, degree=2, kernel='poly')


## Now we move on the the Prediction Analysis!

In [78]:
predictions1 = y_rbf.predict(x_test)
predictions2 = y_lin.predict(x_test)
predictions3 = y_poly.predict(x_test)

In [79]:
# pred and test

def visualize(predictions):

  x = test['DATE']
  y1 = y_test
  y2 = predictions

  p = figure(title="Date vs MW", 
            sizing_mode="stretch_width",
            x_axis_type='datetime',
            x_axis_label='Date',
            y_axis_label='MW')

  p.line(x, y1, legend_label="Test", line_width=1)
  p.line(x, y2, legend_label="Predicted", line_width=1, color='red')
  p.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")

  output_notebook()
  show(p)

In [80]:
visualize(predictions1)

In [81]:
visualize(predictions2) 

In [82]:
visualize(predictions3)

## SCORE

In [83]:
score1 = y_rbf.score(x_test,y_test)
score2 = y_lin.score(x_test,y_test)
score3 = y_poly.score(x_test,y_test)

In [84]:
print(score1)
print(score2)
print(score3)

-0.009001815990244744
-0.00935316581277612
-0.009350905060669268


In [85]:
from sklearn.metrics import explained_variance_score
print(explained_variance_score(y_test, predictions1))
print(explained_variance_score(y_test, predictions2))
print(explained_variance_score(y_test, predictions3))

0.011495779664544181
0.011324095600179218
0.011058000308535765


In [86]:
from sklearn.metrics import r2_score 
print(r2_score(y_test, predictions1))
print(r2_score(y_test, predictions2))
print(r2_score(y_test, predictions3))

-0.009001815990244744
-0.00935316581277612
-0.009350905060669268


In [87]:
from sklearn import metrics

print('MAE:', metrics.mean_absolute_error(y_test, predictions1))
print('MSE:', metrics.mean_squared_error(y_test, predictions1))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions1)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions2))
print('MSE:', metrics.mean_squared_error(y_test, predictions2))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions2)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions3))
print('MSE:', metrics.mean_squared_error(y_test, predictions3))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions3)))

print("\n\n")

MAE: 1016.4208024077243
MSE: 1548248.6771335348
RMSE: 1244.2864128220378



MAE: 1016.6891618953091
MSE: 1548787.8009381944
RMSE: 1244.5030337199642



MAE: 1016.2212602635052
MSE: 1548784.3319588494
RMSE: 1244.5016399984572





# Dataset - 6

In [88]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import DatetimeTickFormatter
from bokeh.layouts import row, column

In [89]:
df = pd.read_excel('/content/6-day-load-holiday-data.xlsx', parse_dates=True)
df

Unnamed: 0,date,day,MW
0,2017-01-01,2,3536.238770
1,2017-01-02,0,3639.738770
2,2017-01-03,0,3673.321289
3,2017-01-04,0,3898.860840
4,2017-01-05,2,3547.965820
...,...,...,...
1090,2019-12-27,0,4976.180664
1091,2019-12-28,1,4708.879395
1092,2019-12-29,1,4831.750488
1093,2019-12-30,0,5298.331055


## Finding the correlation between the features

In [90]:
df.corr(method='pearson')

Unnamed: 0,day,MW
day,1.0,-0.150346
MW,-0.150346,1.0


In [91]:
# x = df['day']
# y = df['MW']

## Split Data into Training and Testing

In [92]:
# MANUAL SPLIT

train = df.loc[ df['date'].dt.year < 2019 ]
test = df.loc[ df['date'].dt.year >= 2019 ]

x_train = train[['day']]
y_train = train['MW']

x_test = test[['day']]
y_test = test['MW']


## Creating the model using SVM

In [93]:
# Fit regression model
from sklearn.svm import SVR

svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e4)
svr_poly = SVR(kernel='poly', C=1e4, degree=2)

In [94]:
y_rbf = svr_rbf.fit(x_train, y_train)
y_lin = svr_lin.fit(x_train, y_train)
y_poly = svr_poly.fit(x_train, y_train)

In [95]:
print(y_rbf)
print(y_lin)
print(y_poly)

SVR(C=10000.0, gamma=0.1)
SVR(C=10000.0, kernel='linear')
SVR(C=10000.0, degree=2, kernel='poly')


## Now we move on the the Prediction Analysis!

In [96]:
predictions1 = y_rbf.predict(x_test)
predictions2 = y_lin.predict(x_test)
predictions3 = y_poly.predict(x_test)

In [97]:
# pred and test

def visualize(predictions):

  x = test['date']
  y1 = y_test
  y2 = predictions

  p = figure(title="Date vs MW", 
            sizing_mode="stretch_width",
            x_axis_type='datetime',
            x_axis_label='Date',
            y_axis_label='MW')

  p.line(x, y1, legend_label="Test", line_width=1)
  p.line(x, y2, legend_label="Predicted", line_width=1, color='red')
  p.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")

  output_notebook()
  show(p)

In [98]:
visualize(predictions1)

In [99]:
visualize(predictions2) 

In [100]:
visualize(predictions3)

## SCORE

In [101]:
score1 = y_rbf.score(x_test,y_test)
score2 = y_lin.score(x_test,y_test)
score3 = y_poly.score(x_test,y_test)

In [102]:
print(score1)
print(score2)
print(score3)

-0.1386998447504919
-0.1462310845563637
-0.150964277003534


In [103]:
from sklearn.metrics import explained_variance_score
print(explained_variance_score(y_test, predictions1))
print(explained_variance_score(y_test, predictions2))
print(explained_variance_score(y_test, predictions3))

0.013848332115781403
0.015223092211009148
0.01656387360184086


In [104]:
from sklearn.metrics import r2_score 
print(r2_score(y_test, predictions1))
print(r2_score(y_test, predictions2))
print(r2_score(y_test, predictions3))

-0.1386998447504919
-0.1462310845563637
-0.150964277003534


In [105]:
from sklearn import metrics

print('MAE:', metrics.mean_absolute_error(y_test, predictions1))
print('MSE:', metrics.mean_squared_error(y_test, predictions1))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions1)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions2))
print('MSE:', metrics.mean_squared_error(y_test, predictions2))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions2)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions3))
print('MSE:', metrics.mean_squared_error(y_test, predictions3))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions3)))

print("\n\n")

MAE: 926.3226854787423
MSE: 1356873.7841088069
RMSE: 1164.8492538130447



MAE: 926.6477094927232
MSE: 1365848.0031723594
RMSE: 1168.6950000630445



MAE: 926.8068995883326
MSE: 1371488.071339853
RMSE: 1171.105491123602





# Dataset - 7

In [106]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import DatetimeTickFormatter
from bokeh.layouts import row, column

In [107]:
df = pd.read_excel('/content/7-hour-load-weather-holiday-data.xlsx', parse_dates=True)
df

Unnamed: 0,DATE,max-temp,min-temp,RH-0830,RH-1730,day,MW
0,2017-01-01 00:00:00,20.3,9.2,100,80,2,1815.571045
1,2017-01-01 01:00:00,20.3,9.2,100,80,2,1576.699585
2,2017-01-01 02:00:00,20.3,9.2,100,80,2,1428.967896
3,2017-01-01 03:00:00,20.3,9.2,100,80,2,1356.272705
4,2017-01-01 04:00:00,20.3,9.2,100,80,2,1354.029175
...,...,...,...,...,...,...,...
26275,2019-12-31 19:00:00,9.4,4.8,91,69,0,4157.812988
26276,2019-12-31 20:00:00,9.4,4.8,91,69,0,4008.450439
26277,2019-12-31 21:00:00,9.4,4.8,91,69,0,3757.650391
26278,2019-12-31 22:00:00,9.4,4.8,91,69,0,3556.840576


## Finding the correlation between the features

In [108]:
df.corr(method='pearson')

Unnamed: 0,max-temp,min-temp,RH-0830,RH-1730,day,MW
max-temp,1.0,0.882116,-0.628697,-0.371664,-0.034996,0.666025
min-temp,0.882116,1.0,-0.464201,-0.127366,-0.050936,0.768115
RH-0830,-0.628697,-0.464201,1.0,0.538387,-0.005248,-0.33854
RH-1730,-0.371664,-0.127366,0.538387,1.0,-0.029325,-0.054682
day,-0.034996,-0.050936,-0.005248,-0.029325,1.0,-0.127804
MW,0.666025,0.768115,-0.33854,-0.054682,-0.127804,1.0


In [109]:
x = df[['max-temp',	'min-temp',	'RH-0830',	'RH-1730', 'day']]
y = df['MW']

## Split Data into Training and Testing

In [110]:
# MANUAL SPLIT

train = df.loc[ df['DATE'].dt.year < 2019 ]
test = df.loc[ df['DATE'].dt.year >= 2019 ]

x_train = train[['max-temp',	'min-temp',	'RH-0830',	'RH-1730', 'day']]
y_train = train['MW']

x_test = test[['max-temp',	'min-temp',	'RH-0830',	'RH-1730', 'day']]
y_test = test['MW']


## Creating the model using SVM

In [111]:
# Fit regression model
from sklearn.svm import SVR

svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e4)
svr_poly = SVR(kernel='poly', C=1e4, degree=2)

In [112]:
y_rbf = svr_rbf.fit(x_train, y_train)
y_lin = svr_lin.fit(x_train, y_train)
y_poly = svr_poly.fit(x_train, y_train)

In [113]:
print(y_rbf)
print(y_lin)
print(y_poly)

SVR(C=10000.0, gamma=0.1)
SVR(C=10000.0, kernel='linear')
SVR(C=10000.0, degree=2, kernel='poly')


## Now we move on the the Prediction Analysis!

In [114]:
predictions1 = y_rbf.predict(x_test)
predictions2 = y_lin.predict(x_test)
predictions3 = y_poly.predict(x_test)

In [115]:
# pred and test

def visualize(predictions):

  x = test['DATE']
  y1 = y_test
  y2 = predictions

  p = figure(title="Date vs MW", 
            sizing_mode="stretch_width",
            x_axis_type='datetime',
            x_axis_label='Date',
            y_axis_label='MW')

  p.line(x, y1, legend_label="Test", line_width=1)
  p.line(x, y2, legend_label="Predicted", line_width=1, color='red')
  p.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")

  output_notebook()
  show(p)

In [116]:
visualize(predictions1)

In [117]:
visualize(predictions2) 

In [118]:
visualize(predictions3)

## SCORE

In [119]:
score1 = y_rbf.score(x_test,y_test)
score2 = y_lin.score(x_test,y_test)
score3 = y_poly.score(x_test,y_test)

In [120]:
print(score1)
print(score2)
print(score3)

0.45212942807362955
0.5623258274321774
0.6695808034577813


In [121]:
from sklearn.metrics import explained_variance_score
print(explained_variance_score(y_test, predictions1))
print(explained_variance_score(y_test, predictions2))
print(explained_variance_score(y_test, predictions3))

0.4522085449941118
0.564563360866502
0.6744586079466564


In [122]:
from sklearn.metrics import r2_score 
print(r2_score(y_test, predictions1))
print(r2_score(y_test, predictions2))
print(r2_score(y_test, predictions3))

0.45212942807362955
0.5623258274321774
0.6695808034577813


In [123]:
from sklearn import metrics

print('MAE:', metrics.mean_absolute_error(y_test, predictions1))
print('MSE:', metrics.mean_squared_error(y_test, predictions1))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions1)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions2))
print('MSE:', metrics.mean_squared_error(y_test, predictions2))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions2)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions3))
print('MSE:', metrics.mean_squared_error(y_test, predictions3))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions3)))

print("\n\n")

MAE: 720.5424588616004
MSE: 840672.3107756993
RMSE: 916.881841229119



MAE: 662.7989038863782
MSE: 671582.9921759004
RMSE: 819.5016730769379



MAE: 580.4202442757969
MSE: 507007.09933208034
RMSE: 712.0443099499358





# Dataset - 8

In [124]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import DatetimeTickFormatter
from bokeh.layouts import row, column

In [125]:
df = pd.read_excel('/content/8-day-load-weather-holiday-data.xlsx', parse_dates=True)
df

Unnamed: 0,date,max-temp,min-temp,RH-0830,RH-1730,MW,day
0,2017-01-01,20.3,9.2,100,80,3536.238770,2
1,2017-01-02,23.2,9.3,100,82,3639.738770,0
2,2017-01-03,24.3,9.5,100,77,3673.321289,0
3,2017-01-04,24.0,8.9,97,66,3898.860840,0
4,2017-01-05,25.2,10.4,97,71,3547.965820,2
...,...,...,...,...,...,...,...
1090,2019-12-27,13.4,4.2,86,76,4976.180664,0
1091,2019-12-28,14.4,2.4,100,83,4708.879395,1
1092,2019-12-29,13.3,3.1,94,79,4831.750488,1
1093,2019-12-30,15.8,2.6,100,97,5298.331055,0


## Finding the correlation between the features

In [126]:
df.corr(method='pearson')

Unnamed: 0,max-temp,min-temp,RH-0830,RH-1730,MW,day
max-temp,1.0,0.882116,-0.628697,-0.371664,0.64371,-0.034996
min-temp,0.882116,1.0,-0.464201,-0.127366,0.77607,-0.050936
RH-0830,-0.628697,-0.464201,1.0,0.538387,-0.327478,-0.005248
RH-1730,-0.371664,-0.127366,0.538387,1.0,-0.01656,-0.029325
MW,0.64371,0.77607,-0.327478,-0.01656,1.0,-0.150346
day,-0.034996,-0.050936,-0.005248,-0.029325,-0.150346,1.0


In [127]:
x = df[['max-temp',	'min-temp',	'RH-0830',	'RH-1730', 'day']]
y = df['MW']

## Split Data into Training and Testing

In [128]:
# MANUAL SPLIT

train = df.loc[ df['date'].dt.year < 2019 ]
test = df.loc[ df['date'].dt.year >= 2019 ]

x_train = train[['max-temp',	'min-temp',	'RH-0830',	'RH-1730', 'day']]
y_train = train['MW']

x_test = test[['max-temp',	'min-temp',	'RH-0830',	'RH-1730', 'day']]
y_test = test['MW']


## Creating the model using SVM

In [129]:
# Fit regression model
from sklearn.svm import SVR

svr_rbf = SVR(kernel='rbf', C=1e4, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e4)
svr_poly = SVR(kernel='poly', C=1e4, degree=2)

In [130]:
y_rbf = svr_rbf.fit(x_train, y_train)
y_lin = svr_lin.fit(x_train, y_train)
y_poly = svr_poly.fit(x_train, y_train)

In [131]:
print(y_rbf)
print(y_lin)
print(y_poly)

SVR(C=10000.0, gamma=0.1)
SVR(C=10000.0, kernel='linear')
SVR(C=10000.0, degree=2, kernel='poly')


## Now we move on the the Prediction Analysis!

In [132]:
predictions1 = y_rbf.predict(x_test)
predictions2 = y_lin.predict(x_test)
predictions3 = y_poly.predict(x_test)

In [133]:
# pred and test

def visualize(predictions):

  x = test['date']
  y1 = y_test
  y2 = predictions

  p = figure(title="Date vs MW", 
            sizing_mode="stretch_width",
            x_axis_type='datetime',
            x_axis_label='Date',
            y_axis_label='MW')

  p.line(x, y1, legend_label="Test", line_width=1)
  p.line(x, y2, legend_label="Predicted", line_width=1, color='red')
  p.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")

  output_notebook()
  show(p)

In [134]:
visualize(predictions1)

In [135]:
visualize(predictions2) 

In [136]:
visualize(predictions3)

## SCORE

In [137]:
score1 = y_rbf.score(x_test,y_test)
score2 = y_lin.score(x_test,y_test)
score3 = y_poly.score(x_test,y_test)

In [138]:
print(score1)
print(score2)
print(score3)

0.5393479604591558
0.4997051457958459
0.6900768318736263


In [139]:
from sklearn.metrics import explained_variance_score
print(explained_variance_score(y_test, predictions1))
print(explained_variance_score(y_test, predictions2))
print(explained_variance_score(y_test, predictions3))

0.570159891601808
0.569960290066279
0.7298000193173593


In [140]:
from sklearn.metrics import r2_score 
print(r2_score(y_test, predictions1))
print(r2_score(y_test, predictions2))
print(r2_score(y_test, predictions3))

0.5393479604591558
0.4997051457958459
0.6900768318736263


In [141]:
from sklearn import metrics

print('MAE:', metrics.mean_absolute_error(y_test, predictions1))
print('MSE:', metrics.mean_squared_error(y_test, predictions1))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions1)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions2))
print('MSE:', metrics.mean_squared_error(y_test, predictions2))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions2)))

print("\n\n")

print('MAE:', metrics.mean_absolute_error(y_test, predictions3))
print('MSE:', metrics.mean_squared_error(y_test, predictions3))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions3)))

print("\n\n")

MAE: 568.793848873858
MSE: 548912.5856394432
RMSE: 740.8863513653381



MAE: 639.5841729362346
MSE: 596150.9304876553
RMSE: 772.1081080312881



MAE: 435.4276325289809
MSE: 369304.188331357
RMSE: 607.7040302082561



