In [1]:
import pandas as pd

import time

from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error

import warnings
warnings.filterwarnings('ignore')

  from pandas.core import datetools


In [2]:
filename = 'time_series_60min_singleindex.csv'

data = pd.read_csv(filename, parse_dates=[0])

In [3]:
data.head()

Unnamed: 0,utc_timestamp,cet_cest_timestamp,AL_load_entsoe_power_statistics,AT_load_entsoe_power_statistics,AT_load_entsoe_transparency,AT_solar_generation_actual,AT_wind_onshore_generation_actual,BA_load_entsoe_power_statistics,BA_load_entsoe_transparency,BE_load_entsoe_power_statistics,...,SI_price_day_ahead,SI_solar_generation_actual,SI_wind_onshore_generation_actual,SK_load_entsoe_power_statistics,SK_load_entsoe_transparency,SK_price_day_ahead,SK_solar_generation_actual,TR_load_entsoe_power_statistics,UA_west_load_entsoe_power_statistics,interpolated_values
0,2005-12-31 23:00:00,2006-01-01T00:00:00+0100,,6297.0,,,,1425.0,,9829.0,...,,,,3332.0,,,,,719.0,
1,2006-01-01 00:00:00,2006-01-01T01:00:00+0100,,6005.0,,,,1358.0,,9447.0,...,,,,3165.0,,,,,695.0,
2,2006-01-01 01:00:00,2006-01-01T02:00:00+0100,,5743.0,,,,1283.0,,9062.0,...,,,,3040.0,,,,,654.0,
3,2006-01-01 02:00:00,2006-01-01T03:00:00+0100,,5397.0,,,,1175.0,,8589.0,...,,,,3021.0,,,,,644.0,
4,2006-01-01 03:00:00,2006-01-01T04:00:00+0100,,5213.0,,,,1134.0,,8247.0,...,,,,2959.0,,,,,638.0,


In [4]:
col_name = ['utc_timestamp', 'PT_load_entsoe_power_statistics']

In [5]:
df_time = data[col_name]

In [6]:
df_time = df_time.loc[(df_time['utc_timestamp'] >= pd.Timestamp('2014-01-01'))&\
                      (df_time['utc_timestamp'] < pd.Timestamp('2014-12-31'))]

In [7]:
df_train = df_time.loc[(df_time['utc_timestamp'] < pd.Timestamp('2014-10-01'))]

df_val = df_time.loc[(df_time['utc_timestamp'] >= pd.Timestamp('2014-10-01')) &\
                      (df_time['utc_timestamp'] < pd.Timestamp('2014-12-01'))]

df_test = df_time.loc[(df_time['utc_timestamp'] >= pd.Timestamp('2014-12-01'))]

In [None]:
def train_arima(train, prevCount, odr):
    history = [x for x in train.values[:prevCount]]
    predictions = list()
    predictions_naive = list()
    predictions_naive_average = list()
    observedData = train[prevCount:]
    
    originalObserevedData = observedData
    ran = int(len(observedData)/24)  # number of days
    for t in range(ran):
        predictions_naive.extend(24*[sum(history) / float(len(history))])
        counter = 24 # because we set tmp to initial history of 24 items.
        days = int(prevCount/24)
        tmp = list()
        tmp.extend(history[:24]);
        for x in range(1, days):
            for item in range(24):
                tmp[item] = tmp[item] + history[counter]
                counter = counter + 1
                
        for item in tmp:
            predictions_naive_average.append(float(item/days))
            
        #arima
        model = ARIMA(history, order=odr)
        model_fit = model.fit(disp=0)
        yhat = model_fit.forecast(steps=24)
        predictions.extend(list(yhat[0]))
        obs = observedData[:24]
        history.extend(obs.values)
        history = history[24:]
        observedData = observedData[24:]
    return originalObserevedData, predictions, predictions_naive, predictions_naive_average

In [None]:
originalObserevedData, predictions, predictions_naive, predictions_naive_average = \
train_arima(df_train[col_name[1]], 360, (5,1,0))

In [None]:
result_df = pd.DataFrame([originalObserevedData.values, predictions, \
                          predictions_naive, predictions_naive_average]).transpose()

result_df.columns = ['originalObserevedData', 'predictions', 'predictions_naive', 'predictions_naive_average']

In [None]:
result_df.head()

In [None]:
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
plt.plot(result_df[['originalObserevedData', 'predictions']])
plt.show()

In [None]:
def val_arima(valData, prevCount, odr, valHistory):
    history = [x for x in valHistory.values]
    predictions = list()
    predictions_naive = list()
    predictions_naive_average = list()
    observedData = valData
    ran = int(len(valData)/24)
    for t in range(ran):
        predictions_naive.extend(24*[sum(history) / float(len(history))])
        counter = 24
        days = int(prevCount/24)
        tmp = list()
        tmp.extend(history[:24]);
        for x in range(1, days):
            for item in range(24):
                tmp[item] = tmp[item] + history[counter]
                counter = counter + 1
                
        for item in tmp:
            predictions_naive_average.append(float(item/days))
            
        #arima
        model = ARIMA(history, order=odr)
#         model = SARIMAX(history,order=(1,1,1), seasonal_order=(7,1,1,96))
#         sp = [0,0,0,0,0,0,0,0,0,0,1.]
        model_fit = model.fit(disp=0)
        yhat = model_fit.forecast(steps=24)
#         print(yhat)
        predictions.extend(list(yhat[0]))
#         predictions.extend(list(yhat))
        obs = observedData[:24]
        history.extend(obs.values)
        history = history[24:]
        observedData = observedData[24:]
    return predictions, predictions_naive, predictions_naive_average

In [None]:
predictions, predictions_naive, predictions_naive_average= \
val_arima(df_val[col_name[1]], 360, (5,1,0), df_test[col_name[1]])

In [None]:
result_val = pd.DataFrame([predictions, predictions_naive, predictions_naive_average]).transpose()

result_val.columns=['predictions', 'predictions_naive', 'predictions_naive_average']

In [None]:
result_val.head()

In [None]:
observedList = [x for x in df_val[col_name[1]].values]

valError = predictions
naiveError = predictions_naive
naiveAverageError = predictions_naive_average

In [None]:
error = mean_absolute_error(observedList, valError)
error1 = mean_absolute_error(observedList, naiveError)
error2 = mean_absolute_error(observedList, naiveAverageError)
print("Val MAE: ", str(error))
print("Naive error MAE: ", str(error1))
print("Naive error average MAE: ", str(error2))


In [None]:
def test_arima(testHistory, odr, ran, testData):
    history = [x for x in testHistory.values]
    predictions = list()
    observedData = testData
    ran = int(ran/24)
    for t in range(ran):
        #arima
        model = ARIMA(history, order=odr)
        model_fit = model.fit(disp=0)
        yhat = model_fit.forecast(steps=24)
        predictions.extend(list(yhat[0]))
        obs = observedData[:24]
        history.extend(obs.values)
        history = history[24:]
        observedData = observedData[24:]
    return predictions

In [None]:
testHistory = df_val[col_name[1]][-360:]
ran = df_test.shape[0]
final_pred = test_arima(testHistory, (5,1,0), ran, df_test[col_name[1]])