In [26]:
import pandas as pd
from sklearn import metrics
import numpy as np
import seaborn as sns
from matplotlib.colors import ListedColormap
from math import sqrt

areas = ['DK1','DK2']
files = ['consumption','price','wind']

data_dir = "../../data/data_sanitized/"
forecast_dir = '../../data/forecasts/wind/'

data = {}
for file in files:
    data[file] = {}
    for area in areas:
        data[file][area] = pd.read_csv(data_dir + file+"_"+ area+".csv")

models_variables = {'consumption':{
    'nordpool_prognosis': 1,
    "['dayofweek', 'consumption_prognosis']":2,
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']":3,
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'wind_prognosis']":4
},'wind':{
    'nordpool_prognosis': 1,
    "['dayofweek', 'wind_prognosis']": 2,
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']": 3,
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'consumption_prognosis']": 4
},'price':{
    "['dayofweek']": 1,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7']": 2,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day']": 3,
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day', 'consumption_prognosis', 'wind_prognosis']": 4
}}

In [27]:
def get_forecast_file(no):
    return pd.read_csv(forecast_dir + no +".csv")

In [28]:

forecasts_file = forecast_dir + 'files.txt'
forecasts = {}

with open(forecasts_file, 'r+') as f:
    lines = f.read().splitlines()
    if not lines:
        print('Empty file')
    for line in lines:
        cur_line = line.split('.', 1)
        no = cur_line[0]
        cur_line[1] =  cur_line[1].split('|')
        forecasts[no] = {"data":get_forecast_file(no),'file':cur_line[1][0],'area':cur_line[1][1],'window':cur_line[1][2],'start_date':cur_line[1][3],'last_date':cur_line[1][4], 'std_fn':cur_line[1][5], 'variables':cur_line[1][6]}


In [29]:
len(forecasts)

168

In [30]:
fmae = {"consumption":{},"wind":{},"price":{}}
for f in forecasts:
    fc = forecasts[f]
    if not fc['area'] in fmae[fc['file']]:
        fmae[fc['file']][fc['area']] = {}
    if not fc['window'] in fmae[fc['file']][fc['area']]:
        fmae[fc['file']][fc['area']][fc['window']] = {}
    dates= fc['start_date'].replace('-','.') + '-' + fc['last_date'].replace('-','.')
    if not dates in fmae[fc['file']][fc['area']][fc['window']]:
        fmae[fc['file']][fc['area']][fc['window']][dates] = {}
    if not fc['std_fn'] in fmae[fc['file']][fc['area']][fc['window']][dates]:
        fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']] = {}
    model = 0
    if fc['variables'] in models_variables[fc['file']]:
        model = models_variables[fc['file']][fc['variables']]
        if not model in fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']]:
            fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']][model] = 0
        
    first_index = fc['data'].iloc[0,0]
    last_index = fc['data'].iloc[-1,0]
    y_pred = fc['data'].loc[:,'0':'23']
    y_true = data[fc['file']][fc['area']].loc[first_index:last_index,'0':'23']
    
    
    fmae[fc['file']][fc['area']][fc['window']][dates][fc['std_fn']][model] = sqrt(metrics.mean_squared_error(y_true,y_pred))


In [31]:
fmae

{'consumption': {},
 'wind': {'DK1': {'182': {'2019.01.01-2019.12.31': {'asinh': {1: 318.38489865714797,
      2: 367.489058836791,
      3: 369.1930702647855,
      4: 368.8782920680236},
     'None': {2: 290.3545986710702,
      3: 292.1398108582901,
      4: 292.8944800216812}},
    '2019.05.13-2020.05.12': {'asinh': {1: 436.8404434464716,
      2: 406.49654744508786,
      3: 408.8466817701705,
      4: 408.6536221295931},
     'None': {2: 327.2397209915117,
      3: 329.80748342659996,
      4: 329.7963502111217}},
    '2020.01.01-2020.05.12': {'asinh': {1: 584.5162980033824,
      2: 473.30287064188013,
      3: 478.05458344052323,
      4: 478.5779635461962},
     'None': {2: 393.9593095484042,
      3: 398.24380279384894,
      4: 397.3968922567874}},
    '2019.01.01-2020.05.12': {'asinh': {1: 406.86960429038567,
      2: 398.50793966413744,
      3: 401.1682200677061,
      4: 401.1226446604607},
     'None': {2: 321.3105009426169,
      3: 323.8967063422148,
      4: 324.1183

In [34]:
df = pd.DataFrame.from_records(np.ones((1,1)))
# df['Area'] = ['DK1']*8+['DK2']*8
# df['Window'] = (['728']*4 + ['364']*4)*2
# df['Window'] = (['None']*4 + ['asinh']*4)*2
# df['Dates'] = (['2019.01.01-2019.12.31','2019.01.01-2020.05.12','2019.05.13-2020.05.12','2020.01.01-2020.05.12'])*4
df['Area'] = ['DK1']
df['Dates'] = ['2019.01.01-2019.12.31']
df['Window'] = ['182']

df = df.groupby(['Area','Dates','Window']).sum()
# df = df.rename(columns={0:1,1:3,2:4,3:5})


file = 'wind'
for area in fmae[file]:
    for window in fmae[file][area]:
        for dates in fmae[file][area][window]:
            for std_fn in fmae[file][area][window][dates]:
                for model in fmae[file][area][window][dates][std_fn]:
                    model_col = 'Model ' + str(model) + " ("+std_fn+")"
                    df.loc[(area,dates,window),model_col] = fmae[file][area][window][dates][std_fn][model]

                    
df = df.drop(columns=[0])
df = df[df.index.get_level_values(0) == 'DK2']
df = df.sort_index()
cm = ListedColormap(sns.color_palette('Purples',20).as_hex())
cm2 = ListedColormap(sns.color_palette('Greens',20).as_hex())
display = df.style.background_gradient(cmap=cm,axis=None)

display

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 1 (asinh),Model 2 (asinh),Model 3 (asinh),Model 4 (asinh),Model 2 (None),Model 3 (None),Model 4 (None)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
DK2,2019.01.01-2019.12.31,182,89.692353,116.154447,117.190195,116.498073,90.576995,91.036874,91.390983
DK2,2019.01.01-2019.12.31,364,89.692353,110.471759,110.326201,109.651575,89.196772,89.297236,89.237428
DK2,2019.01.01-2019.12.31,728,89.692353,105.140561,105.256967,104.918033,89.525286,89.519276,89.464245
DK2,2019.01.01-2020.05.12,182,93.437162,117.509184,118.91405,118.290468,93.84194,94.697121,95.107356
DK2,2019.01.01-2020.05.12,364,93.437162,114.54267,114.884931,114.473742,92.486348,92.898494,92.978135
DK2,2019.01.01-2020.05.12,728,93.437162,113.682709,114.033447,113.881175,92.547781,92.708086,92.7592
DK2,2019.05.13-2020.05.12,182,93.684466,119.121235,120.798422,120.100026,94.98941,95.765894,96.047434
DK2,2019.05.13-2020.05.12,364,93.684466,108.018185,108.491043,108.303648,93.450492,93.775196,93.870937
DK2,2019.05.13-2020.05.12,728,93.684466,113.029049,113.398263,113.215839,93.766138,93.844232,93.853261
DK2,2020.01.01-2020.05.12,182,103.017205,121.149259,123.521383,123.075404,102.267973,104.082899,104.630389


In [140]:
df[df.index.get_level_values(0) == 'DK1']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 3 (None),Model 1 (None),Model 2 (None),Model 4 (None),Model 3 (asinh),Model 1 (asinh),Model 2 (asinh),Model 4 (asinh)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [150]:
display

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Model 3 (None),Model 1 (None),Model 2 (None),Model 4 (None),Model 3 (asinh),Model 1 (asinh),Model 2 (asinh),Model 4 (asinh)
Area,Dates,Window,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
DK2,2019.01.01-2019.12.31,182,42.800522,60.685783,47.103923,40.867716,48.602061,59.265083,47.253681,42.37362
DK2,2019.01.01-2019.12.31,364,41.864222,66.643572,46.536799,41.773766,45.644619,65.894549,47.206494,43.792746
DK2,2019.01.01-2019.12.31,728,40.620861,57.641463,44.836256,38.663786,42.308261,56.997347,44.530276,39.184981
DK2,2019.01.01-2020.05.12,182,44.593005,72.088336,50.790176,42.240303,51.154143,71.634624,51.720635,46.129633
DK2,2019.01.01-2020.05.12,364,43.848764,80.282137,50.643427,43.034634,49.179894,81.045797,52.490468,47.965528
DK2,2019.01.01-2020.05.12,728,42.993153,84.403612,49.712208,40.72542,46.675904,84.125969,50.872467,44.977529
DK2,2019.05.13-2020.05.12,182,42.340374,68.703284,47.331708,38.983829,46.360871,69.280873,48.413124,42.724299
DK2,2019.05.13-2020.05.12,364,41.888804,80.654765,48.135879,41.183152,46.035386,82.458458,50.490049,45.432988
DK2,2019.05.13-2020.05.12,728,41.124731,88.502231,47.364384,38.987806,44.14104,88.186925,48.700699,43.230876
DK2,2020.01.01-2020.05.12,182,49.512224,103.381058,60.906583,46.007178,58.157977,105.581108,63.979568,56.437491
