In [223]:
import datetime

import pandas as pd
import tensorflow as tf
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras

In [224]:
df = pd.read_csv("manipulated_pun.csv", sep=";", decimal=',', header='infer')
df.head()

Unnamed: 0,Data,Ora,PUN
0,20170101,1,53.3
1,20170101,2,52.0
2,20170101,3,51.0
3,20170101,4,47.27
4,20170101,5,45.49


In [225]:
df["Data"] = df["Data"].apply(lambda x: str(x))
df["Data"] = df["Data"].apply(lambda x: datetime.datetime.strptime(x,"%Y%m%d"))
df = df.loc[df['Data'] < '2020-01-01']

In [226]:
df["Ora2"] = pd.to_datetime(df.Ora, unit="h").dt.strftime("%H:%M")
df["Datetime"] = df["Data"].dt.strftime("%Y-%m-%d") + " " + df["Ora2"]
df.index = pd.DatetimeIndex(df.Datetime)

In [227]:
df.tail(30)

Unnamed: 0_level_0,Data,Ora,PUN,Ora2,Datetime
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-12-30 19:00:00,2019-12-30,19,52.44,19:00,2019-12-30 19:00
2019-12-30 20:00:00,2019-12-30,20,53.78,20:00,2019-12-30 20:00
2019-12-30 21:00:00,2019-12-30,21,48.51,21:00,2019-12-30 21:00
2019-12-30 22:00:00,2019-12-30,22,42.94,22:00,2019-12-30 22:00
2019-12-30 23:00:00,2019-12-30,23,38.27,23:00,2019-12-30 23:00
2019-12-30 00:00:00,2019-12-30,24,37.55,00:00,2019-12-30 00:00
2019-12-31 01:00:00,2019-12-31,1,37.55,01:00,2019-12-31 01:00
2019-12-31 02:00:00,2019-12-31,2,34.22,02:00,2019-12-31 02:00
2019-12-31 03:00:00,2019-12-31,3,30.95,03:00,2019-12-31 03:00
2019-12-31 04:00:00,2019-12-31,4,29.8,04:00,2019-12-31 04:00


In [228]:
target = df["PUN"]
df["df24"] = target.shift(24)

In [229]:
df

Unnamed: 0_level_0,Data,Ora,PUN,Ora2,Datetime,df24
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-01 01:00:00,2017-01-01,1,53.30,01:00,2017-01-01 01:00,
2017-01-01 02:00:00,2017-01-01,2,52.00,02:00,2017-01-01 02:00,
2017-01-01 03:00:00,2017-01-01,3,51.00,03:00,2017-01-01 03:00,
2017-01-01 04:00:00,2017-01-01,4,47.27,04:00,2017-01-01 04:00,
2017-01-01 05:00:00,2017-01-01,5,45.49,05:00,2017-01-01 05:00,
...,...,...,...,...,...,...
2019-12-31 20:00:00,2019-12-31,20,56.48,20:00,2019-12-31 20:00,53.78
2019-12-31 21:00:00,2019-12-31,21,53.24,21:00,2019-12-31 21:00,48.51
2019-12-31 22:00:00,2019-12-31,22,50.68,22:00,2019-12-31 22:00,42.94
2019-12-31 23:00:00,2019-12-31,23,48.61,23:00,2019-12-31 23:00,38.27


In [230]:
df.dropna(inplace = True)
mask_test = df.index > "2019-12-25"
mask_train = df.index <= "2019-12-25"

test = df.loc[mask_test]
train = df.loc[mask_train]

In [231]:
trainX = train['df24'][24:].values
testX = test['df24'].values
trainY = train['PUN'][24:].values
testY = test["PUN"].values

trainX.shape = ( 26065 , 1)
testX.shape = ( 167 , 1) 
trainY.shape = ( 26065 , 1)
testY.shape = ( 167 , 1)

In [232]:
# NNR works better with scaled data
scaler = MinMaxScaler(feature_range=(0,1))
trainX = scaler.fit_transform(trainX)
trainY = scaler.fit_transform(trainY)

In [233]:
model = tf.keras.Sequential()

model.add(keras.layers.Dense(100, activation='relu', input_shape=(trainX.shape[1],)))
model.add(keras.layers.Dense(1024, activation='relu'))
model.add(keras.layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4,
                                                  verbose=0, mode='auto')
model.fit(trainX, trainY, epochs=100, batch_size=100, callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100

In [None]:
predicted = model.predict(testX)
predicted = scaler.inverse_transform(predicted)
predicted_ = pd.DataFrame(predicted)
predicted_.head()

In [None]:
testY = pd.DataFrame(testY)
testY = testY.reset_index()
results_vect = pd.concat([testY, predicted_], axis=1)
results_vect.columns = ['Datetime', 'Target', 'Reg']
results_vect 

In [None]:
predicted_final = predicted_.shift(-24)
results_vect = pd.concat([testY, predicted_final], axis=1)
results_vect.columns = ['Datetime', 'Target', 'predicted_final']

In [None]:
results_vect.dropna(inplace=True)
results_vect[['Target', 'predicted_final']].plot()

In [None]:
# computing MAE value
results_vect['diff'] = results_vect['Target'] - results_vect['predicted_final']
results_vect['diff'] = results_vect['diff'].apply(lambda x: abs(x))
results_vect['diff'].mean()

In [None]:
# computing MAPE value
results_vect['diff2'] = results_vect['diff']/results_vect['Target']
results_vect['diff2'].mean()

## Checking Performances on Training Set

In [None]:
predicted2 = model.predict(trainX)
predicted2 = scaler.inverse_transform(predicted2)
predicted_2 = pd.DataFrame(predicted2)
predicted_2.head()

In [None]:
results_vect2 = pd.concat([trainY, predicted_final2], axis=1)
results_vect2.columns = ['Datetime', 'Target', 'predicted_final']

In [None]:
# %%
trainY = train['PUN'][168:].values
trainY.shape = ( 25944, 1)
trainY= pd.DataFrame(trainY)
trainY = trainY.reset_index()

In [None]:
predicted_final2 = predicted_2.shift(-24)
len(predicted_final2)

In [None]:
results_vect2 = pd.concat([trainY, predicted_2], axis=1)
results_vect2.columns = ['Datetime', 'Target', 'Reg']
results_vect2 

In [None]:
results_vect2.dropna(inplace=True)
results_vect2[['Target', 'predicted_final']].plot()

In [None]:
# computing MAE value
results_vect2['diff'] = results_vect2['Target'] - results_vect2['predicted_final']
results_vect2['diff'] = results_vect2['diff'].apply(lambda x: abs(x))
results_vect2['diff'].mean()

In [None]:
# computing MAPE value
results_vect2['diff2'] = results_vect2['diff']/results_vect2['Target']
results_vect2['diff2'].mean()