In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import pandas_bokeh
pandas_bokeh.output_notebook()

from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook

output_notebook()



In [2]:
input_df=pd.read_excel('./content/L1_Power_Generation.xlsx')
input_df

Unnamed: 0.1,Unnamed: 0,Datetime,"Generation, MW"
0,0,2019-01-01 00:00:00,128804
1,1,2019-01-01 01:00:00,126535
2,2,2019-01-01 02:00:00,125344
3,3,2019-01-01 03:00:00,122449
4,4,2019-01-01 04:00:00,120854
...,...,...,...
17539,17539,2020-12-31 19:00:00,140784
17540,17540,2020-12-31 20:00:00,137256
17541,17541,2020-12-31 21:00:00,136139
17542,17542,2020-12-31 22:00:00,133950


In [3]:
# create a dataset to train and test a machine learning regression model
matrix_load = input_df[['Generation, MW']].values.reshape(-1, 24)
matrix_load_next_1 = matrix_load[1:-2, :]
#matrix_load_next_2 = matrix_load[2:-1, :]
matrix_load_next_3 = matrix_load[3:, :]
matrix_load = matrix_load[:-3, :]

matrix = np.hstack((matrix_load, matrix_load_next_1, matrix_load_next_3))

df_proccessed = pd.DataFrame(matrix)
df_proccessed

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,62,63,64,65,66,67,68,69,70,71
0,128804,126535,125344,122449,120854,121461,122030,122598,122377,123269,...,139959,140482,141673,143673,143484,141555,139651,136919,134306,130571
1,124826,122703,120674,120478,120213,122105,124057,125956,128001,131826,...,140986,142208,142591,144895,143540,142412,139904,138222,134606,131112
2,125932,123618,122476,122513,122598,124179,126319,128688,130720,133938,...,140356,140423,142518,144131,143723,142243,140423,138382,135068,131939
3,127516,124864,123370,123325,124409,125807,128177,130649,133566,135764,...,139123,138712,139835,142358,142521,141036,139352,137664,134477,131292
4,127229,125549,124463,123961,125082,126286,128384,131044,134263,137264,...,142843,143680,143821,145979,146145,144851,141809,140071,136659,131829
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723,130347,129073,127936,128391,129462,132459,136626,141587,145318,149018,...,144384,145173,146080,147506,147080,145671,142281,140312,136648,132357
724,130598,128710,127954,128812,130194,133217,137594,142730,146340,150038,...,151040,149669,148849,150927,149650,147366,144523,142536,139943,135525
725,132999,130747,129526,129213,130264,132466,135096,139208,142136,145745,...,150006,149698,149986,151308,150120,148452,145150,142722,140558,137463
726,130427,128400,126712,126577,127542,128528,130733,134139,136340,139506,...,147991,146958,146892,148323,148164,145643,143217,141149,138900,134424


In [4]:
# split the dataset
train_df, test_df = train_test_split(matrix, test_size=0.2, shuffle = False)

In [5]:
# train the linear regression model
regr_model = LinearRegression()
regr_model.fit(train_df[:, :-24], train_df[:, -24:])

In [6]:
# define forecast accuracy metrics

def calc_mae(ground_truth, prediction):
  return np.abs(ground_truth - prediction).mean()

def calc_mape(ground_truth, prediction):
  return (np.abs(ground_truth - prediction) / ground_truth).mean() * 100.

def calc_r2(ground_truth, prediction):
  return r2_score(ground_truth, prediction)

# a function to validate models

def validate(model, val_X, val_y):
  y_hat = model.predict(val_X).flatten()
  val_y = val_y.flatten()

  l = len(val_y.flatten())
  print('  ')
  print('MAE: {:.2f} MW'.format(calc_mae(val_y, y_hat)))
  print('MAPE: {:.2f} %'.format(calc_mape(val_y, y_hat)))
  print('R2: {:.2f}'.format(calc_r2(val_y, y_hat)))

  p = figure(width = 800, height = 400)

  l = len(val_y)

  p.scatter(np.arange(l), val_y, line_width = 2, color = 'green')
  p.line(np.arange(l), val_y, line_width = 2, color = 'green')

  p.scatter(np.arange(l), y_hat, line_width = 2, color = 'blue')
  p.line(np.arange(l), y_hat, line_width = 2, color = 'blue')

  show(p)

In [7]:
validate (regr_model, test_df[:, :-24], test_df[:, -24:])

  
MAE: 1664.31 MW
MAPE: 1.36 %
R2: 0.98


In [8]:
load = np.array([[128804, 126535, 125344, 122449, 120854, 121461, 122030, 122598, 122377, 123269, 124364, 126580, 128019, 129108, 129677, 130747, 132926, 135281, 135868, 135293, 133405, 132485, 130616, 127724, 124826, 122703, 120674, 120478, 120213, 122105, 124057, 125956, 128001, 131826, 133397, 134458, 135341, 135067, 135560, 136734, 137626, 139591, 139886, 139095, 136444, 134480, 132459, 128882]])

load_predicted = regr_model.predict(load)

p = figure(width = 800, height = 400)
p.scatter(np.arange(24), load_predicted.flatten(), line_width = 2, color = 'blue')
p.line(np.arange(24), load_predicted.flatten(), line_width = 2, color = 'blue')
show(p)

load_predicted

array([[127180.39546588, 123913.69457696, 123350.09221301,
        123084.88076115, 123675.68478548, 125399.24102781,
        128772.45528162, 132277.6113987 , 135048.59300658,
        137619.74071464, 138738.54596077, 138759.23369965,
        139133.06734389, 139431.71232282, 140053.89472367,
        139677.35786957, 140796.07540214, 142102.77444172,
        141883.70803373, 140264.46372112, 137772.21885133,
        135743.63568326, 133878.25798214, 130176.14855579]])