In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:

def convert2matrix(data_arr, look_back):
    X, Y =[], []
    for i in range(len(data_arr)-look_back):
        d=i+look_back  
        X.append(data_arr[i:d,0])
        Y.append(data_arr[d,0])
    return np.array(X), np.array(Y)

def model_loss(history):
    plt.figure(figsize=(8,4))
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Test Loss')
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epochs')
    plt.legend(loc='upper right')
    plt.show()

def prediction_plot(testY, test_predict):
      len_prediction=[x for x in range(len(testY))]
      plt.figure(figsize=(8,4))
      plt.plot(len_prediction, testY[:l], marker='.', label="actual")
      plt.plot(len_prediction, test_predict[:l], 'r', label="prediction")
      plt.tight_layout()
      sns.despine(top=True)
      plt.subplots_adjust(left=0.07)
      plt.ylabel('Ads Daily Spend', size=15)
      plt.xlabel('Time step', size=15)
      plt.legend(fontsize=15)
      plt.show()

In [4]:
data = pd.read_excel('data/Krakow_2018_2021_main.xlsx')

In [5]:
data['GODZ_MIN'] = data['GODZ_MIN'].apply(lambda x: '00:00' if x == '24:00' else x)
data['date_time'] = data['DATA'].astype(str).replace(' ', '').replace('-', '-') +' '+ data['GODZ_MIN']
data['date_time'] = pd.to_datetime(data['date_time'], format='mixed')#format='%y-%m-%d %H:%M')
id = data['PPE'].unique().tolist()

In [6]:
data.to_csv('./data/clear.csv', index='ignore')

In [7]:
data

Unnamed: 0,PPE,DATA,GODZ_MIN,ENERGIA,ZUŻYCIE,JEDN,FLAGA_ZMCZ,STATUS_REKORDU,TARYFA,RNK,CNT_PROF,date_time
0,590322449101272445,2018-01-01,00:15,A+,10.272,kWh,0,Ok,,1,1,2018-01-01 00:15:00
1,590322449101272445,2018-01-01,00:30,A+,10.272,kWh,0,Ok,,1,1,2018-01-01 00:30:00
2,590322449101272445,2018-01-01,00:45,A+,10.240,kWh,0,Ok,,1,1,2018-01-01 00:45:00
3,590322449101272445,2018-01-01,01:00,A+,10.200,kWh,0,Ok,,1,1,2018-01-01 01:00:00
4,590322449101272445,2018-01-01,01:15,A+,10.184,kWh,0,Ok,,1,1,2018-01-01 01:15:00
...,...,...,...,...,...,...,...,...,...,...,...,...
981375,590322449101302913,2021-12-31,23:00,A+,6.190,kWh,0,Ok,,1,1,2021-12-31 23:00:00
981376,590322449101302913,2021-12-31,23:15,A+,5.600,kWh,0,Ok,,1,1,2021-12-31 23:15:00
981377,590322449101302913,2021-12-31,23:30,A+,5.710,kWh,0,Ok,,1,1,2021-12-31 23:30:00
981378,590322449101302913,2021-12-31,23:45,A+,5.630,kWh,0,Ok,,1,1,2021-12-31 23:45:00


In [8]:
sub_data = data.loc[data['PPE'] == id[0]]
sub_data = sub_data[['date_time','ZUŻYCIE']]
# sub_data['date_time'] = pd.to_datetime(sub_data['date_time'], format='%y-%m-%d%H:%M')
sub_data_mean = sub_data.resample('H', on='date_time').mean()
sub_data_sum = sub_data.resample('H', on='date_time').sum()

sub_data_sum

Unnamed: 0_level_0,ZUŻYCIE
date_time,Unnamed: 1_level_1
2018-01-01 00:00:00,40.944
2018-01-01 01:00:00,40.648
2018-01-01 02:00:00,40.424
2018-01-01 03:00:00,40.432
2018-01-01 04:00:00,40.416
...,...
2021-12-31 19:00:00,42.960
2021-12-31 20:00:00,42.752
2021-12-31 21:00:00,42.648
2021-12-31 22:00:00,42.512


In [9]:
train_size = 900
train, test = sub_data_mean.values[0:train_size,:],sub_data_mean.values[train_size:len(sub_data_mean.values),:]
# setup look_back window 
look_back = 30
#convert dataset into right shape in order to input into the DNN
trainX, trainY = convert2matrix(train, look_back)
testX, testY = convert2matrix(test, look_back)

In [10]:
from keras.models import Sequential
from keras.layers import Dense
def model_dnn(look_back):
    model=Sequential()
    model.add(Dense(units=32, input_dim=look_back, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics = ['mse', 'mae'])
    return model

: 

: 

In [None]:
model=model_dnn(look_back)

history=model.fit(trainX,
                  trainY, 
                  epochs=100, 
                  batch_size=30, 
                  verbose=1, 
                  validation_data=(testX,testY),
                  callbacks=[EarlyStopping(monitor='val_loss', patience=10)],
                  shuffle=False)

In [None]:
train_score = model.evaluate(trainX, trainY, verbose=0)
print('Train Root Mean Squared Error(RMSE): %.2f; Train Mean Absolute Error(MAE) : %.2f ' 
% (np.sqrt(train_score[1]), train_score[2]))
test_score = model.evaluate(testX, testY, verbose=0)
print('Test Root Mean Squared Error(RMSE): %.2f; Test Mean Absolute Error(MAE) : %.2f ' 
% (np.sqrt(test_score[1]), test_score[2]))
model_loss(history)

In [None]:
prediction_plot(testY, test_predict)