In [24]:
import pandas as pd
from sklearn import metrics
import numpy as np
import seaborn as sns
from matplotlib.colors import ListedColormap
from math import sqrt

areas = ['DK1','DK2']
files = ['consumption','price','wind']

#metrics = mae or rmse
metric = 'mae'

data_dir = "../../data/data_sanitized/"
forecast_dir = '../../data/forecasts/'

data = {}
for file in files:
    data[file] = {}
    for area in areas:
        data[file][area] = pd.read_csv(data_dir + file+"_"+ area+".csv")

models_variables = {'consumption':{
    'nordpool_prognosis': 1,
    "['dayofweek', 'consumption_prognosis']":2,
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']":3,
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'wind_prognosis']":4
},'wind':{
    'nordpool_prognosis': 1,
    "['dayofweek', 'wind_prognosis']": 2,
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']": 3,
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'consumption_prognosis']": 4
},'price':{
    "['dayofweek']": 1,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7']": 2,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day']": 3,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day', 'consumption_prognosis', 'wind_prognosis']": 4,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day', 'consumption_prognosis_for_price', 'wind_prognosis_for_price']": 5
}}

def get_forecast_file(no):
    return pd.read_csv(forecast_dir + no +".csv")

In [25]:

forecasts_file = forecast_dir + 'files.txt'
forecasts = {}

with open(forecasts_file, 'r+') as f:
    lines = f.read().splitlines()
    if not lines:
        print('Empty file')
    for line in lines:
        cur_line = line.split('.', 1)
        if not cur_line[0] =='':
            no = cur_line[0]
            cur_line[1] =  cur_line[1].split('|')
            forecasts[no] = {"data":get_forecast_file(no),'file':cur_line[1][0],'area':cur_line[1][1],'window':cur_line[1][2],'start_date':cur_line[1][3],'last_date':cur_line[1][4], 'std_fn':cur_line[1][5], 'variables':cur_line[1][6]}

len(forecasts)

94

In [26]:
fmae = {"consumption":{},"wind":{},"price":{}}
for f in forecasts:
    fc = forecasts[f]
    if not fc['area'] in fmae[fc['file']]:
        fmae[fc['file']][fc['area']] = {}
    if not fc['window'] in fmae[fc['file']][fc['area']]:
        fmae[fc['file']][fc['area']][fc['window']] = {}
    dates= fc['start_date'].replace('-','.') + '-' + fc['last_date'].replace('-','.')
    if not dates in fmae[fc['file']][fc['area']][fc['window']]:
        fmae[fc['file']][fc['area']][fc['window']][dates] = {}
    if not fc['std_fn'] in fmae[fc['file']][fc['area']][fc['window']][dates]:
        fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']] = {}
    model = 0
    if fc['variables'] in models_variables[fc['file']]:
        model = models_variables[fc['file']][fc['variables']]
        if not model in fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']]:
            fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']][model] = 0
        
    first_index = fc['data'].iloc[0,0]
    last_index = fc['data'].iloc[-1,0]
    y_pred = fc['data'].loc[:,'0':'23']
    y_true = data[fc['file']][fc['area']].loc[first_index:last_index,'0':'23']
    
    
    if metric == 'mae':
        fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']][model] = metrics.mean_absolute_error(y_true,y_pred)
    elif metric == 'rmse':
        fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']][model] = sqrt(metrics.mean_squared_error(y_true,y_pred))

In [27]:
df = pd.DataFrame.from_records(np.ones((1,1)))
# df['Area'] = ['DK1']*8+['DK2']*8
# df['Window'] = (['728']*4 + ['364']*4)*2
# df['Window'] = (['None']*4 + ['asinh']*4)*2
# df['Dates'] = (['2019.01.01-2019.12.31','2019.01.01-2020.05.12','2019.05.13-2020.05.12','2020.01.01-2020.05.12'])*4
df['Area'] = ['DK1']
df['Dates'] = ['2019.01.01-2019.12.31']
df['Window'] = ['182']

df = df.groupby(['Area','Dates','Window']).sum()
# df = df.rename(columns={0:1,1:3,2:4,3:5})


file = 'price'
for area in fmae[file]:
    for window in fmae[file][area]:
        for dates in fmae[file][area][window]:
            for std_fn in fmae[file][area][window][dates]:
                for model in fmae[file][area][window][dates][std_fn]:
                    model_col = 'P' + str(model) + ("<br>("+std_fn+")" if not std_fn == 'None' else '')
                    df.loc[(area,dates,window),model_col] = fmae[file][area][window][dates][std_fn][model]

                    
df = df.drop(columns=[0])
df = df.reindex(sorted(df.columns),axis=1)
df = df[df.index.get_level_values(0) == 'DK1']                    
df = df.sort_index()
cm = ListedColormap(sns.color_palette('Purples',20).as_hex())
cm2 = ListedColormap(sns.color_palette('Greens',20).as_hex())
display = df.style.background_gradient(cmap=cm,axis=1,vmin=53,vmax=130)


display

  xa[xa < 0] = -1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P2 (hp),P3 (hp),P4 (hp),P5,P5 (asinh),P5 (asinh-hp),P5 (hp)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
DK1,2019.01.01-2019.12.31,128,51.77257,45.943968,41.024573,,,,41.024573
DK1,2019.01.01-2019.12.31,182,,,,,,,
DK1,2019.01.01-2019.12.31,364,49.46992,43.432851,41.101237,,,,41.138467
DK1,2019.01.01-2019.12.31,728,49.927721,43.818173,39.722246,38.773604,37.099335,37.510658,39.097276
DK1,2019.01.01-2020.05.12,128,54.580682,47.55538,41.876152,,,,41.876152
DK1,2019.01.01-2020.05.12,364,53.505275,45.591444,42.502604,,,,42.649051
DK1,2019.01.01-2020.05.12,728,53.725084,45.488476,40.743059,40.339211,40.46973,40.704062,40.299679
DK1,2019.05.13-2020.05.12,128,50.77722,45.300142,39.351418,,,,39.351418
DK1,2019.05.13-2020.05.12,364,51.416184,43.912683,41.44107,,,,42.051792
DK1,2019.05.13-2020.05.12,728,50.06187,42.429383,38.072764,39.42125,39.850178,39.117719,37.925312


In [5]:
display #method

IndexError: list index out of range

<pandas.io.formats.style.Styler at 0x7fb375038d50>

In [152]:
display #method == asinh

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 2 (None),Model 2 (asinh),Model 2 (asinh-hp),Model 3 (None),Model 3 (asinh),Model 3 (asinh-hp),Model 4 (None),Model 4 (asinh),Model 4 (asinh-hp)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
DK1,2019.01.01-2019.12.31,182,27.875199,42.027358,42.027358,27.84812,41.98654,41.98654,27.958317,42.108796,42.108796


In [153]:
display #method == asinh

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 2 (None),Model 2 (asinh),Model 2 (asinh-hp),Model 3 (None),Model 3 (asinh),Model 3 (asinh-hp),Model 4 (None),Model 4 (asinh),Model 4 (asinh-hp)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
DK1,2019.01.01-2019.12.31,182,27.875199,42.027358,42.027358,27.84812,41.98654,41.98654,27.958317,42.108796,42.108796


In [138]:
display #method == None

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 2 (None),Model 2 (asinh),Model 2 (asinh-hp),Model 3 (None),Model 3 (asinh),Model 3 (asinh-hp),Model 4 (None),Model 4 (asinh),Model 4 (asinh-hp)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
DK1,2019.01.01-2019.12.31,182,21.026883,40.377925,40.377925,21.36433,40.765326,40.765326,21.460788,40.879039,40.879039
