In [5]:
import pandas as pd
from sklearn import metrics
import numpy as np
import seaborn as sns
from matplotlib.colors import ListedColormap
from math import sqrt

areas = ['DK1','DK2']
files = ['consumption','price','wind']

data_dir = "../../data/data_sanitized/"
forecast_dir = '../../data/forecasts/'

data = {}
for file in files:
    data[file] = {}
    for area in areas:
        data[file][area] = pd.read_csv(data_dir + file+"_"+ area+".csv")

models_variables = {'consumption':{
    'nordpool_prognosis': 1,
    "['dayofweek', 'consumption_prognosis']":2,
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']":3,
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'wind_prognosis']":4
},'wind':{
    'nordpool_prognosis': 1,
    "['dayofweek', 'wind_prognosis']": 2,
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']": 3,
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'consumption_prognosis']": 4
},'price':{
    "['dayofweek']": 1,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7']": 2,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day']": 3,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day', 'consumption_prognosis', 'wind_prognosis']": 4,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day', 'consumption_prognosis_for_price', 'wind_prognosis_for_price']": 5
}}

In [6]:
def get_forecast_file(no):
    return pd.read_csv(forecast_dir + no +".csv")

In [7]:

forecasts_file = forecast_dir + 'files.txt'
forecasts = {}

with open(forecasts_file, 'r+') as f:
    lines = f.read().splitlines()
    if not lines:
        print('Empty file')
    for line in lines:
        cur_line = line.split('.', 1)
        no = cur_line[0]
        cur_line[1] =  cur_line[1].split('|')
        forecasts[no] = {"data":get_forecast_file(no),'file':cur_line[1][0],'area':cur_line[1][1],'window':cur_line[1][2],'start_date':cur_line[1][3],'last_date':cur_line[1][4], 'std_fn':cur_line[1][5], 'variables':cur_line[1][6]}


In [8]:
len(forecasts)

56

In [9]:
fmae = {"consumption":{},"wind":{},"price":{}}
for f in forecasts:
    fc = forecasts[f]
    if not fc['area'] in fmae[fc['file']]:
        fmae[fc['file']][fc['area']] = {}
    if not fc['window'] in fmae[fc['file']][fc['area']]:
        fmae[fc['file']][fc['area']][fc['window']] = {}
    dates= fc['start_date'].replace('-','.') + '-' + fc['last_date'].replace('-','.')
    if not dates in fmae[fc['file']][fc['area']][fc['window']]:
        fmae[fc['file']][fc['area']][fc['window']][dates] = {}
    if not fc['std_fn'] in fmae[fc['file']][fc['area']][fc['window']][dates]:
        fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']] = {}
    model = 0
    if fc['variables'] in models_variables[fc['file']]:
        model = models_variables[fc['file']][fc['variables']]
        if not model in fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']]:
            fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']][model] = 0
        
    first_index = fc['data'].iloc[0,0]
    last_index = fc['data'].iloc[-1,0]
    y_pred = fc['data'].loc[:,'0':'23']
    y_true = data[fc['file']][fc['area']].loc[first_index:last_index,'0':'23']
    
    
    fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']][model] = metrics.mean_absolute_error(y_true,y_pred)

In [10]:
fmae

{'consumption': {'DK1': {'182': {'2019.01.01-2019.12.31': {'asinh': {1: 22.70439497716895,
      2: 40.377924638498534,
      3: 40.76532586357589},
     'asinh-hp': {2: 117.28449946454263, 3: 117.65423490772808},
     'None': {2: 21.026882850321652, 3: 21.36432964216983}},
    '2019.05.13-2020.05.12': {'None': {1: 24.52214253187614,
      2: 22.830232347985177,
      3: 23.16187285709371},
     'asinh': {2: 41.83969856816527, 3: 42.221619819560175},
     'asinh-hp': {2: 116.52618250441198, 3: 117.07413278689658}},
    '2020.01.01-2020.05.12': {'None': {1: 26.461936090225567,
      2: 24.659923884091842,
      3: 25.018677793425628},
     'asinh': {2: 43.894513576724904, 3: 44.08825378472582},
     'asinh-hp': {2: 114.22795323482693, 3: 114.87894039112449}},
    '2019.01.01-2020.05.12': {'None': {1: 23.70791499330656,
      2: 21.997152845284376,
      3: 22.340290092204015},
     'asinh': {2: 41.31709397340638, 3: 41.652774485087825},
     'asinh-hp': {2: 116.46819294134546, 3: 116.91

In [12]:
df = pd.DataFrame.from_records(np.ones((1,1)))
# df['Area'] = ['DK1']*8+['DK2']*8
# df['Window'] = (['728']*4 + ['364']*4)*2
# df['Window'] = (['None']*4 + ['asinh']*4)*2
# df['Dates'] = (['2019.01.01-2019.12.31','2019.01.01-2020.05.12','2019.05.13-2020.05.12','2020.01.01-2020.05.12'])*4
df['Area'] = ['DK1']
df['Dates'] = ['2019.01.01-2019.12.31']
df['Window'] = ['182']

df = df.groupby(['Area','Dates','Window']).sum()
# df = df.rename(columns={0:1,1:3,2:4,3:5})


file = 'consumption'
for area in fmae[file]:
    for window in fmae[file][area]:
        for dates in fmae[file][area][window]:
            for std_fn in fmae[file][area][window][dates]:
                for model in fmae[file][area][window][dates][std_fn]:
                    model_col = 'Model ' + str(model) + " ("+std_fn+")"
                    df.loc[(area,dates,window),model_col] = fmae[file][area][window][dates][std_fn][model]

                    
df = df.drop(columns=[0])
df = df.reindex(sorted(df.columns),axis=1)
df = df[df.index.get_level_values(0) == 'DK1']
df = df.sort_index()
cm = ListedColormap(sns.color_palette('Purples',20).as_hex())
cm2 = ListedColormap(sns.color_palette('Greens',20).as_hex())
display = df.style.background_gradient(cmap=cm,axis=None)

display

  xa[xa < 0] = -1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 1 (None),Model 1 (asinh),Model 2 (None),Model 2 (asinh),Model 2 (asinh-hp),Model 3 (None),Model 3 (asinh),Model 3 (asinh-hp)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
DK1,2019.01.01-2019.12.31,182,,22.704395,21.026883,40.377925,117.284499,21.36433,40.765326,117.654235
DK1,2019.01.01-2019.12.31,728,22.704395,,20.481759,30.258628,73.279327,20.4128,30.174136,73.295879
DK1,2019.01.01-2020.05.12,182,23.707915,,21.997153,41.317094,116.468193,22.34029,41.652774,116.913042
DK1,2019.01.01-2020.05.12,728,23.707915,,21.529247,32.84555,80.86697,21.492532,32.563926,80.636488
DK1,2019.05.13-2020.05.12,182,24.522143,,22.830232,41.839699,116.526183,23.161873,42.22162,117.074133
DK1,2019.05.13-2020.05.12,728,24.522143,,22.32649,34.322466,86.819411,22.299777,33.969337,86.509594
DK1,2020.01.01-2020.05.12,182,26.461936,,24.659924,43.894514,114.227953,25.018678,44.088254,114.87894
DK1,2020.01.01-2020.05.12,728,26.461936,,24.403931,39.944996,101.690199,24.455705,39.122371,100.781767


In [140]:
df[df.index.get_level_values(0) == 'DK1']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 3 (None),Model 1 (None),Model 2 (None),Model 4 (None),Model 3 (asinh),Model 1 (asinh),Model 2 (asinh),Model 4 (asinh)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [220]:
display

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 5 (asinh)
Area,Dates,Window,Unnamed: 3_level_1
DK1,2019.01.01-2019.12.31,182,38.914769
DK1,2019.01.01-2019.12.31,364,38.588546
DK1,2019.01.01-2019.12.31,728,37.013201
DK1,2019.01.01-2020.05.12,182,41.270317
DK1,2019.01.01-2020.05.12,364,43.275821
DK1,2019.01.01-2020.05.12,728,43.230931
DK1,2019.05.13-2020.05.12,182,39.984806
DK1,2019.05.13-2020.05.12,364,44.201518
DK1,2019.05.13-2020.05.12,728,43.683977
DK1,2020.01.01-2020.05.12,182,48.454032
