In [13]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [22]:
df = pd.read_csv('../data_NO2.csv', sep=';')
df['datetime_utc'] = pd.to_datetime(df['datetime_utc'])
df['ret'] = (df['spot_price']-df['spot_price'].shift(1))/df['spot_price'].shift(1)
df['spot_price_lag1'] = df['spot_price'].shift(1)
df['ret_lag1'] = df['ret'].shift(1)
df = df[~df['ret'].isna()].copy()

fig = make_subplots(rows=2, cols=1, subplot_titles=('Spot Price', 
                                    'Return'))

fig.append_trace(go.Scatter(
    x=df['datetime_utc'],
    y=df['spot_price'],
), row=1, col=1)

fig.append_trace(go.Scatter(
    x=df['datetime_utc'],
    y=df['ret'],
), row=2, col=1)

fig.update_layout(height=600)
fig.show()

In [24]:
df['date'] = df['datetime_utc'].dt.date
n_obs = df.groupby('date')['ret'].count()
odd_dates = n_obs[n_obs != 24].index
df.drop('date', axis=1, inplace=True)
display(df[df['datetime_utc'].dt.date.isin(odd_dates)])

if len(odd_dates) > 0:
    df = df[df['datetime_utc'] < pd.to_datetime('2018-09-13 00:00:00+00:00')].copy()
    df.loc[len(df)+1, :] = [pd.to_datetime('2016-10-30 00:00:00+00:00'), 3450.5, 3027.0, 32.025, -0.003888, 32.150, -0.012895]
    df.loc[len(df)+1, :] = [pd.to_datetime('2017-10-29 00:00:00+00:00'), 3584.5, 3890.0, 22.325, -0.074036, 24.110, -0.055251]
df.sort_values(by='datetime_utc', inplace=True)
df['pred_ret'] = df['ret'].shift(24)
df['pred_price'] = df['spot_price'].shift(24)
df = df.dropna(axis=0)
df.reset_index(inplace=True, drop=True)

Unnamed: 0,datetime_utc,volume_demand,volume_production,spot_price,ret,spot_price_lag1,ret_lag1
7273,2016-10-30 01:00:00+00:00,3450.5,3027.0,32.025,-0.003888,32.15,-0.012895
7274,2016-10-30 02:00:00+00:00,3453.0,3284.0,32.14,0.003591,32.025,-0.003888
7275,2016-10-30 03:00:00+00:00,3453.0,3374.0,32.16,0.000622,32.14,0.003591
7276,2016-10-30 04:00:00+00:00,3482.0,3762.0,32.73,0.017724,32.16,0.000622
7277,2016-10-30 05:00:00+00:00,3558.0,4303.0,33.37,0.019554,32.73,0.017724
7278,2016-10-30 06:00:00+00:00,3694.0,5088.0,34.17,0.023974,33.37,0.019554
7279,2016-10-30 07:00:00+00:00,3833.0,6105.0,35.02,0.024876,34.17,0.023974
7280,2016-10-30 08:00:00+00:00,3990.0,6850.0,36.49,0.041976,35.02,0.024876
7281,2016-10-30 09:00:00+00:00,4011.0,6859.0,36.99,0.013702,36.49,0.041976
7282,2016-10-30 10:00:00+00:00,3988.0,6893.0,37.11,0.003244,36.99,0.013702


## Test and training sets

In [25]:
train_perc = 0.8
cutoff = int(len(df)*train_perc)
cutoff_time = df.iloc[cutoff,:]['datetime_utc']
print(cutoff_time)
train = df[df['datetime_utc'] < cutoff_time]
test = df[df['datetime_utc'] >= cutoff_time]

2018-02-28 00:00:00+00:00


# Return

In [26]:
import numpy as np
def eval(y_true, y_pred):
    print(len(y_true))
    MSE = ((y_true-y_pred)**2).mean()
    print(f"MSE: {round(MSE, 5)}")
    MAE = np.abs((y_true-y_pred)).mean()
    print(f"MAE: {round(MAE, 5)}")
    RMSE = MSE**(1/2)
    print(f"RMSE: {round(RMSE, 5)}")
print("Training set:")
eval(train['ret'], train['pred_ret'])

print("\nTest set:")
eval(test['ret'], test['pred_ret'])

Training set:
18912
MSE: 0.00509
MAE: 0.02688
RMSE: 0.07138

Test set:
4728
MSE: 0.02457
MAE: 0.03381
RMSE: 0.15674


# Price

In [27]:
print("Training set:")
eval(train['spot_price'], train['pred_price'])

print("\nTest set:")
eval(test['spot_price'], test['pred_price'])

Training set:
18912
MSE: 13.74528
MAE: 1.82784
RMSE: 3.70746

Test set:
4728
MSE: 20.50678
MAE: 2.48613
RMSE: 4.52844


### 1 ahead

In [21]:
print("Training set:")
eval(train['spot_price'], train['spot_price_lag1'])

print("\nTest set:")
eval(test['spot_price'], test['spot_price_lag1'])

Training set:
18912
MSE: 3.13017
MAE: 0.69827
RMSE: 1.76923

Test set:
4728
MSE: 4.32855
MAE: 1.06858
RMSE: 2.08052


In [28]:
print("Training set:")
eval(train['ret'], train['ret_lag1'])

print("\nTest set:")
eval(test['ret'], test['ret_lag1'])

Training set:
18912
MSE: 0.00526
MAE: 0.02641
RMSE: 0.07253

Test set:
4728
MSE: 0.03235
MAE: 0.03541
RMSE: 0.17987
