In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [2]:
#Repetition of the data cleaning

data = pd.read_csv('data.txt', sep=';', skiprows=2)

In [3]:
data.drop('Unnamed: 10', axis = 1, inplace = True)

In [4]:
#Convert 'Datum' column to datetime

data.loc[:, 'Datum'] = pd.to_datetime(data['Datum'], format='%d.%m.%Y %H:%M')
data.drop([7202], axis = 0, inplace = True)
data.reset_index(drop=True, inplace = True)

In [5]:
data['ZE [MW]'] = data['ZE [MW]'].astype(float)

In [6]:
df_for_prediction = data
columns_to_sum = ['PE [MW]', 'PPE [MW]', 'JE [MW]', 'VE [MW]', 'PVE [MW]', 'AE [MW]', 'ZE [MW]', 'VTE [MW]', 'FVE [MW]']
df_for_prediction['sum_of_consumptions'] = df_for_prediction[columns_to_sum].sum(axis = 1)
df_for_prediction.drop(columns_to_sum, axis = 1, inplace = True)

In [7]:
df_for_prediction.sort_values(by='Datum', inplace=True, ascending=True)

In [8]:
df_for_prediction.set_index('Datum', inplace=True)

In [9]:
#Adding periodicity for index values

df_for_prediction.index = pd.DatetimeIndex(df_for_prediction.index).to_period('H')

In [10]:
#Splitting the data into train and test datasets

train, test = df_for_prediction.iloc[:10667], df_for_prediction.iloc[10667:]

In [30]:
#Fitting the model by train data

model = ExponentialSmoothing(train, trend='add', damped=True, seasonal = 'add', seasonal_periods = 24).fit()



In [31]:
#Prediciting the values for the test dataset

results = pd.DataFrame(index = test.index)
results['Predicted'] = model.predict(start=test.index[0], end = test.index[-1])
results

Unnamed: 0_level_0,Predicted
Datum,Unnamed: 1_level_1
2019-03-21 12:00,11200.316401
2019-03-21 13:00,11048.580628
2019-03-21 14:00,11031.810726
2019-03-21 15:00,10941.105598
2019-03-21 16:00,11096.541765
...,...
2019-05-20 06:00,11416.339574
2019-05-20 07:00,11657.309724
2019-05-20 08:00,11700.637710
2019-05-20 09:00,11517.615245


In [32]:
#Comparing the results

results['Real_values'] = test['sum_of_consumptions']
results['Diff_percentage'] = (results['Predicted'] / results['Real_values'] - 1) * 100
results

Unnamed: 0_level_0,Predicted,Real_values,Diff_percentage
Datum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-03-21 12:00,11200.316401,10989.6,1.917416
2019-03-21 13:00,11048.580628,10858.5,1.750524
2019-03-21 14:00,11031.810726,10550.6,4.560980
2019-03-21 15:00,10941.105598,10493.1,4.269526
2019-03-21 16:00,11096.541765,10300.3,7.730277
...,...,...,...
2019-05-20 06:00,11416.339574,9561.1,19.404039
2019-05-20 07:00,11657.309724,10072.7,15.731728
2019-05-20 08:00,11700.637710,10195.3,14.765016
2019-05-20 09:00,11517.615245,10111.4,13.907226


In [33]:
results.describe()

Unnamed: 0,Predicted,Real_values,Diff_percentage
count,1438.0,1438.0,1438.0
mean,10878.728332,9417.169124,16.733657
std,673.303848,1084.080786,12.719943
min,9771.31766,6672.0,-9.316057
25%,10303.252757,8635.95,7.478262
50%,11040.196078,9337.5,14.926816
75%,11403.248164,10208.95,23.886142
max,11899.568316,12191.7,68.11574
