In [238]:
import pandas as pd
from sklearn import metrics
import numpy as np
import seaborn as sns
from matplotlib.colors import ListedColormap
from math import sqrt

areas = ['DK1','DK2']
files = ['consumption','price','wind']

data_dir = "../../data/data_sanitized/"
forecast_dir = '../../data/forecasts/price/'

data = {}
for file in files:
    data[file] = {}
    for area in areas:
        data[file][area] = pd.read_csv(data_dir + file+"_"+ area+".csv")

models_variables = {'consumption':{
    'nordpool_prognosis': 1,
    "['dayofweek', 'consumption_prognosis']":2,
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']":3,
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'wind_prognosis']":4
},'wind':{
    'nordpool_prognosis': 1,
    "['dayofweek', 'wind_prognosis']": 2,
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']": 3,
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'consumption_prognosis']": 4
},'price':{
    "['dayofweek']": 1,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7']": 2,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day']": 3,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day', 'consumption_prognosis', 'wind_prognosis']": 4,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day', 'consumption_prognosis_for_price', 'wind_prognosis_for_price']": 5
}}

In [239]:
def get_forecast_file(no):
    return pd.read_csv(forecast_dir + no +".csv")

In [240]:

forecasts_file = forecast_dir + 'files.txt'
forecasts = {}

with open(forecasts_file, 'r+') as f:
    lines = f.read().splitlines()
    if not lines:
        print('Empty file')
    for line in lines:
        cur_line = line.split('.', 1)
        no = cur_line[0]
        cur_line[1] =  cur_line[1].split('|')
        forecasts[no] = {"data":get_forecast_file(no),'file':cur_line[1][0],'area':cur_line[1][1],'window':cur_line[1][2],'start_date':cur_line[1][3],'last_date':cur_line[1][4], 'std_fn':cur_line[1][5], 'variables':cur_line[1][6]}


In [241]:
len(forecasts)

216

In [242]:
fmae = {"consumption":{},"wind":{},"price":{}}
for f in forecasts:
    fc = forecasts[f]
    if not fc['area'] in fmae[fc['file']]:
        fmae[fc['file']][fc['area']] = {}
    if not fc['window'] in fmae[fc['file']][fc['area']]:
        fmae[fc['file']][fc['area']][fc['window']] = {}
    dates= fc['start_date'].replace('-','.') + '-' + fc['last_date'].replace('-','.')
    if not dates in fmae[fc['file']][fc['area']][fc['window']]:
        fmae[fc['file']][fc['area']][fc['window']][dates] = {}
    if not fc['std_fn'] in fmae[fc['file']][fc['area']][fc['window']][dates]:
        fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']] = {}
    model = 0
    if fc['variables'] in models_variables[fc['file']]:
        model = models_variables[fc['file']][fc['variables']]
        if not model in fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']]:
            fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']][model] = 0
        
    first_index = fc['data'].iloc[0,0]
    last_index = fc['data'].iloc[-1,0]
    y_pred = fc['data'].loc[:,'0':'23']
    y_true = data[fc['file']][fc['area']].loc[first_index:last_index,'0':'23']
    
    
    fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']][model] = sqrt(metrics.mean_squared_error(y_true,y_pred))


In [243]:
fmae

{'consumption': {},
 'wind': {},
 'price': {'DK1': {'182': {'2019.01.01-2019.12.31': {'None': {3: 66.58555811326912,
      1: 88.55209547444964,
      2: 73.45370444097776,
      4: 58.069433242726504},
     'asinh': {3: 66.03419955790442,
      1: 89.37763297865058,
      2: 74.59866539125704,
      4: 55.35380591001702,
      5: 57.97532824870776}},
    '2019.05.13-2020.05.12': {'None': {3: 61.46753322324631,
      1: 95.46523006663541,
      2: 67.53187479965354,
      4: 53.22844205425971},
     'asinh': {3: 61.41899339121841,
      1: 96.98367017790686,
      2: 68.56319010248872,
      4: 54.27947309376068,
      5: 55.86472456840183}},
    '2020.01.01-2020.05.12': {'None': {3: 67.62013106117294,
      1: 127.43077867612985,
      2: 78.77467468281864,
      4: 57.80730040508155},
     'asinh': {3: 68.75921352865812,
      1: 129.4757928228491,
      2: 80.86372642247552,
      4: 61.374396125664965,
      5: 63.64050514985981}},
    '2019.01.01-2020.05.12': {'None': {3: 66.86342

In [245]:
df = pd.DataFrame.from_records(np.ones((1,1)))
# df['Area'] = ['DK1']*8+['DK2']*8
# df['Window'] = (['728']*4 + ['364']*4)*2
# df['Window'] = (['None']*4 + ['asinh']*4)*2
# df['Dates'] = (['2019.01.01-2019.12.31','2019.01.01-2020.05.12','2019.05.13-2020.05.12','2020.01.01-2020.05.12'])*4
df['Area'] = ['DK1']
df['Dates'] = ['2019.01.01-2019.12.31']
df['Window'] = ['182']

df = df.groupby(['Area','Dates','Window']).sum()
# df = df.rename(columns={0:1,1:3,2:4,3:5})


file = 'price'
for area in fmae[file]:
    for window in fmae[file][area]:
        for dates in fmae[file][area][window]:
            for std_fn in fmae[file][area][window][dates]:
                for model in fmae[file][area][window][dates][std_fn]:
                    model_col = 'Model ' + str(model) + " ("+std_fn+")"
                    df.loc[(area,dates,window),model_col] = fmae[file][area][window][dates][std_fn][model]

                    
df = df.drop(columns=[0])
df = df.reindex(sorted(df.columns),axis=1)
df = df[df.index.get_level_values(0) == 'DK1']
df = df.sort_index()
cm = ListedColormap(sns.color_palette('Purples',20).as_hex())
cm2 = ListedColormap(sns.color_palette('Greens',20).as_hex())
display = df.style.background_gradient(cmap=cm,axis=None)

display

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 1 (None),Model 1 (asinh),Model 2 (None),Model 2 (asinh),Model 3 (None),Model 3 (asinh),Model 4 (None),Model 4 (asinh),Model 5 (asinh)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
DK1,2019.01.01-2019.12.31,182,88.552095,89.377633,73.453704,74.598665,66.585558,66.0342,58.069433,55.353806,57.975328
DK1,2019.01.01-2019.12.31,364,93.087824,94.352975,72.811845,73.924743,64.198902,64.47504,56.728553,56.218734,58.61137
DK1,2019.01.01-2019.12.31,728,85.255977,86.419926,72.516739,72.25795,64.563703,64.153815,56.330472,55.766616,56.110416
DK1,2019.01.01-2020.05.12,182,100.419523,101.646696,74.911766,76.322214,66.863426,66.772849,57.999542,57.023961,59.293787
DK1,2019.01.01-2020.05.12,364,108.382915,110.865235,74.942412,76.967591,65.578259,66.328687,57.807665,59.25964,63.341491
DK1,2019.01.01-2020.05.12,728,117.686433,119.273737,75.27715,76.389407,65.993905,66.19916,57.040865,59.885926,65.515682
DK1,2019.05.13-2020.05.12,182,95.46523,96.98367,67.531875,68.56319,61.467533,61.418993,53.228442,54.279473,55.864725
DK1,2019.05.13-2020.05.12,364,107.685682,110.712737,68.521489,70.734229,61.043412,61.916248,55.357007,57.718104,62.111698
DK1,2019.05.13-2020.05.12,728,121.312715,122.971495,68.616275,70.469465,60.995904,61.666861,53.387447,57.608427,65.885615
DK1,2020.01.01-2020.05.12,182,127.430779,129.475793,78.774675,80.863726,67.620131,68.759214,57.8073,61.374396,63.640505


In [140]:
df[df.index.get_level_values(0) == 'DK1']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 3 (None),Model 1 (None),Model 2 (None),Model 4 (None),Model 3 (asinh),Model 1 (asinh),Model 2 (asinh),Model 4 (asinh)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [220]:
display

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 5 (asinh)
Area,Dates,Window,Unnamed: 3_level_1
DK1,2019.01.01-2019.12.31,182,38.914769
DK1,2019.01.01-2019.12.31,364,38.588546
DK1,2019.01.01-2019.12.31,728,37.013201
DK1,2019.01.01-2020.05.12,182,41.270317
DK1,2019.01.01-2020.05.12,364,43.275821
DK1,2019.01.01-2020.05.12,728,43.230931
DK1,2019.05.13-2020.05.12,182,39.984806
DK1,2019.05.13-2020.05.12,364,44.201518
DK1,2019.05.13-2020.05.12,728,43.683977
DK1,2020.01.01-2020.05.12,182,48.454032
