In [126]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib.colors import ListedColormap
import sys  
sys.path.insert(0, '../../db_test')
from db_test import *

area = 'DK2'
# areas = ['DK1','DK2']
files = ['consumption','price','wind']


data_dir = "../../../data/data_sanitized/"
forecast_dir = '../../../data/forecasts/price/'

data = {}
for file in files:
    data[file] = {}
    data[file][area] = pd.read_csv(data_dir + file+"_"+ area+".csv")

models_variables = {'consumption':{
    'nordpool_prognosis': 'C1',
    "['dayofweek', 'consumption_prognosis']":'C2',
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']":'C3',
    "['dayofweek', 'consumption_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'wind_prognosis']":'C4'
},'wind':{
    'nordpool_prognosis': 'W1',
    "['dayofweek', 'wind_prognosis']": 'W2',
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7']": 'W3',
    "['dayofweek', 'wind_prognosis', 'prev_day1', 'prev_day2', 'prev_day7', 'consumption_prognosis']": 'W4'
},'price':{
    "['dayofweek']": 'P1',
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7']": 'P2',
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day']": 'P3',
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day', 'consumption_prognosis', 'wind_prognosis']": 'P4',
    "['dayofweek', 'prev_day1', 'prev_day2', 'prev_day7', 'min_day', 'max_day', 'last_val_day', 'consumption_prognosis_for_price', 'wind_prognosis_for_price']": 'P5'
}}

models_to_test = {
    'consumption': {
        'DK1': [
            'C1.728',
            'C2.182',
            'C2.364',
            'C2.728',
            'C2.182.asinh',
            'C2.364.asinh',
            'C2.728.asinh',
            'C3.182',
            'C3.364',
            'C3.728',
            'C3.182.asinh',
            'C3.364.asinh',
            'C3.728.asinh',
            'C4.182',
            'C4.364',
            'C4.728',
            'C4.182.asinh',
            'C4.364.asinh',
            'C4.728.asinh'
        ],
        'DK2': [
            'C2.182',
            'C2.364',
            'C2.728',
            'C3.182',
            'C3.364',
            'C3.728',
            'C3.182.asinh',
            'C3.364.asinh',
            'C3.728.asinh',
            'C3.182.hp',
            'C3.364.hp',
            'C3.728.hp',
            'C4.182',
            'C4.364',
            'C4.728',
            'C4.364.asinh',
            'C4.728.asinh',
            'C4.182.hp',
            'C4.364.hp',
            'C4.728.hp',
        ]
    },
    'wind': {
        'DK1': [
            'W2.182.asinh-hp',
            'W2.364.asinh-hp',
            'W2.728.asinh-hp',
            'W2.182.hp',
            'W2.364.hp',
            'W2.728.hp',
            'W3.182.asinh-hp',
            'W3.364.asinh-hp',
            'W3.728.asinh-hp',
            'W3.182.hp',
            'W3.364.hp',
            'W3.728.hp',
            'W4.182.asinh-hp',
            'W4.364.asinh-hp',
            'W4.728.asinh-hp',
            'W4.182.hp',
            'W4.364.hp',
            'W4.728.hp'
        ],
        'DK2': [
            'W1.728',
            'W2.182',
            'W2.364',
            'W2.728',
            'W2.728.asinh',
            'W3.182',
            'W3.364',
            'W3.728',
            'W3.182.asinh',
            'W3.364.asinh',
            'W3.728.asinh',
        ]
    },
    'price': {
        'DK1': [
            'P4.182',
            'P4.728',
            'P4.182.asinh',
            'P4.364.asinh',
            'P4.728.asinh',
            'P4.182.hp',
            'P4.364.hp',
            'P4.728.hp',
            'P4.182.asinh-hp',
            'P4.364.asinh-hp',
            'P4.728.asinh-hp',
            'P5.182',
            'P5.728',
            'P5.182.asinh',
            'P5.728.asinh',
            'P5.182.hp',
            'P5.728.hp',
            'P5.182.asinh-hp',
            'P5.728.asinh-hp',
        ],
        'DK2': [
            'P4.182',
            'P4.364',
            'P4.728',
            'P4.182.asinh',
            'P4.364.asinh',
            'P4.728.asinh',
            'P4.182.hp',
            'P4.728.hp',
            'P4.182.asinh-hp',
            'P4.364.asinh-hp',
            'P4.728.asinh-hp',
            'P5.182',
            'P5.728',
            'P5.182.asinh',
            'P5.728.asinh',
            'P5.182.hp',
            'P5.728.hp',
            'P5.182.asinh-hp',
            'P5.364.asinh-hp',
            'P5.728.asinh-hp',
        ]
    }
}

def get_forecast_file(no):
    return pd.read_csv(forecast_dir + no +".csv")

flatten = lambda l: [item for sublist in l.values.tolist() for item in sublist]

In [127]:
forecasts_file = forecast_dir + 'files.txt'
forecasts = {}

with open(forecasts_file, 'r+') as f:
    lines = f.read().splitlines()
    if not lines:
        print('Empty file')
    for line in lines:
        cur_line = line.split('.', 1)
        if not cur_line[0] =='':
            no = cur_line[0]
            cur_line[1] =  cur_line[1].split('|')
            forecasts[no] = {"data":get_forecast_file(no),'file':cur_line[1][0],'area':cur_line[1][1],'window':cur_line[1][2],'start_date':cur_line[1][3],'last_date':cur_line[1][4], 'std_fn':cur_line[1][5], 'variables':cur_line[1][6]}

len(forecasts)

433

In [128]:
table = {"consumption":{},"wind":{},"price":{}}
forecasts_list = {}
for f in forecasts:
    fc = forecasts[f]
    model = fc['variables']
    
    if not fc['area'] == area:
        continue
    
    if fc['variables'] in models_variables[fc['file']]:
        model = models_variables[fc['file']][fc['variables']]
    fc['name'] = fc['start_date'] + '.' + fc['last_date'] + '.' + str(model) + ('.' + fc['std_fn'] if not fc['std_fn'] == 'None' else '')
    fc['name'] = str(model) + '.' + fc['window'] + ('.' + fc['std_fn'] if not fc['std_fn'] == 'None' else '')

    if not fc['name'] in models_to_test[fc['file']][fc['area']]:
        continue

    fc['start_date'] = fc['start_date'].replace('-','.')
    fc['last_date'] = fc['last_date'].replace('-','.')
    dates = fc['start_date'] + '-' + fc['last_date']
    
    
    if not fc['area'] in table[fc['file']]:
        table[fc['file']][fc['area']] = {}
    if not dates in table[fc['file']][fc['area']]:
        table[fc['file']][fc['area']][dates] = {}
    
    if not fc['name'] in table[fc['file']][fc['area']][dates]:
        table[fc['file']][fc['area']][dates][fc['name']] = {}
        
    fdm = table[fc['file']][fc['area']][dates][fc['name']]
    for model_tt in models_to_test[fc['file']][fc['area']]:
#         if fc['name'] == model_tt:
#             continue
#         if model_tt in table[fc['file']][fc['area']][dates]:
#             continue
        fdm[model_tt] = {"DM":0,"P-value":0}
    
    forecast_full_name = fc['file'] + '.' + fc['area'] + '.' + dates + '.' + model + '.' + fc['window'] + ('.' + fc['std_fn'] if not fc['std_fn'] == 'None' else '')
    forecasts_list[forecast_full_name] = fc['data']

print(len(forecasts_list))

80


In [129]:
from re import compile as re_compile
for file in table:
    for a in table[file]:
        for d in table[file][a]:
            for f1 in table[file][a][d]:
                forecast_name1 = file + '.' + a + '.' + d + '.' + f1
                first_index = forecasts_list[forecast_name1].iloc[0,0]
                last_index = forecasts_list[forecast_name1].iloc[-1,0]
                
                true_val = flatten(data[file][a].loc[first_index:last_index,'0':'23'])
            
                pred1_val = flatten(forecasts_list[forecast_name1].loc[:,'0':'23'])
                
                for f2 in table[file][a][d][f1]:
                    if f1 == f2:
                        continue
                    
                    forecast_name2 = file + '.' + a + '.' + d + '.' + f2
                    pred2_val = flatten(forecasts_list[forecast_name2].loc[:,'0':'23'])

                    if pred2_val == pred1_val:
                        continue
                    rt = dm_test(true_val,pred1_val,pred2_val,h = 1, crit="MAD")
                    
                    table[file][a][d][f1][f2]['DM'] = round(rt[0],3)
                    table[file][a][d][f1][f2]['P-value'] = round(rt[1],4)

In [134]:
df = pd.DataFrame.from_records(np.ones((1,1)))
df['Area'] = [area]
df['Dates'] = ['2019.01.01-2019.12.31']
file = 'price'
df['Model 1'] = [models_to_test[file][area][0]]
df['Model 2'] = [models_to_test[file][area][1]]


df = df.groupby(['Area','Dates','Model 1','Model 2']).sum()
# df = df.rename(columns={0:1,1:3,2:4,3:5})




for area in table[file]:
    for dates in table[file][area]:
        for model1 in table[file][area][dates]:
                for model2 in table[file][area][dates][model1]:
                    df.loc[(area,dates,model1,model2),'DM'] = table[file][area][dates][model1][model2]['DM']
#                     df.loc[(area,dates,model1,model2),'DM'] = table[file][area][dates][model1][model2]['P-value']

                    
df = df.drop(columns=[0])
df = df.pivot_table(index=['Area','Dates','Model 1'], columns='Model 2', values='DM')
df.columns = [i.replace('.','<br>') for i in df.columns]
df = df.sort_index()
# df = df[df.index.get_level_values(1) == '2019.01.01-2019.12.31']
# df = df[df.index.get_level_values(1) == '2019.05.13-2020.05.12']
# df = df[df.index.get_level_values(1) == '2019.01.01-2020.05.12']
df = df[df.index.get_level_values(1) == '2020.01.01-2020.05.12']
pd.set_option('precision', 3)
cm = ListedColormap(sns.diverging_palette(220, 10, sep=80, n=7))
display = df.style.background_gradient(cmap=cm,vmax=20,vmin=-20)

display

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P4 182,P4 182 asinh,P4 182 asinh-hp,P4 182 hp,P4 364,P4 364 asinh,P4 364 asinh-hp,P4 728,P4 728 asinh,P4 728 asinh-hp,P4 728 hp,P5 182,P5 182 asinh,P5 182 asinh-hp,P5 182 hp,P5 364 asinh-hp,P5 728,P5 728 asinh,P5 728 asinh-hp,P5 728 hp
Area,Dates,Model 1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
DK2,2020.01.01-2020.05.12,P4.182,0.0,-10.694,-10.52,4.671,-1.651,-10.221,-10.105,-0.965,-6.738,-5.996,0.711,4.1,-10.563,-10.405,5.658,-9.972,-0.709,-6.613,-5.888,0.954
DK2,2020.01.01-2020.05.12,P4.182.asinh,10.694,0.0,3.714,10.839,7.346,-4.761,-4.601,6.429,-0.944,0.024,7.959,10.936,1.826,3.572,11.07,-4.42,6.688,-0.791,0.157,8.202
DK2,2020.01.01-2020.05.12,P4.182.asinh-hp,10.52,-3.714,0.0,10.674,7.237,-4.921,-4.766,6.333,-1.056,-0.089,7.871,10.762,-0.659,1.645,10.905,-4.585,6.592,-0.903,0.044,8.113
DK2,2020.01.01-2020.05.12,P4.182.hp,-4.671,-10.839,-10.674,0.0,-1.802,-10.312,-10.2,-1.08,-6.819,-6.08,0.599,1.76,-10.707,-10.558,4.002,-10.066,-0.824,-6.694,-5.971,0.843
DK2,2020.01.01-2020.05.12,P4.364,1.651,-7.346,-7.237,1.802,0.0,-12.905,-12.67,0.38,-6.748,-5.826,2.495,1.908,-7.202,-7.106,2.045,-12.449,0.725,-6.588,-5.69,2.806
DK2,2020.01.01-2020.05.12,P4.364.asinh,10.221,4.761,4.921,10.312,12.905,0.0,2.767,10.431,3.56,4.683,11.73,10.331,4.805,4.953,10.415,4.795,10.696,3.723,4.816,11.972
DK2,2020.01.01-2020.05.12,P4.364.asinh-hp,10.105,4.601,4.766,10.2,12.67,-2.767,0.0,10.322,3.384,4.53,11.655,10.217,4.647,4.8,10.305,4.048,10.587,3.548,4.664,11.896
DK2,2020.01.01-2020.05.12,P4.728,0.965,-6.429,-6.333,1.08,-0.38,-10.431,-10.322,0.0,-8.481,-7.41,13.528,1.167,-6.315,-6.23,1.273,-10.136,7.563,-8.252,-7.214,14.721
DK2,2020.01.01-2020.05.12,P4.728.asinh,6.738,0.944,1.056,6.819,6.748,-3.56,-3.384,8.481,0.0,8.03,10.109,6.857,1.015,1.117,6.932,-3.19,8.783,3.528,8.499,10.387
DK2,2020.01.01-2020.05.12,P4.728.asinh-hp,5.996,-0.024,0.089,6.08,5.826,-4.683,-4.53,7.41,-8.03,0.0,9.139,6.122,0.054,0.157,6.199,-4.341,7.71,-6.467,3.071,9.419


In [102]:
display

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,P4 182,P4 182 asinh,P4 182 asinh-hp,P4 182 hp,P4 364,P4 364 asinh,P4 364 asinh-hp,P4 364 hp,P4 728,P4 728 asinh,P4 728 asinh-hp,P4 728 hp,P5 182,P5 182 asinh,P5 182 asinh-hp,P5 182 hp,P5 364,P5 364 hp,P5 728,P5 728 asinh,P5 728 asinh-hp,P5 728 hp
Area,Dates,Model 1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
DK1,2019.01.01-2019.12.31,P4.182,0.0,20.277,21.059,6.486,0.82,13.093,13.878,2.594,11.473,16.592,11.859,6.578,-4.806,18.971,20.01,-0.064,-0.233,2.467,11.149,16.426,13.152,8.062
DK1,2019.01.01-2019.12.31,P4.182.asinh,-20.277,0.0,8.591,-19.74,-11.638,-0.063,0.978,-10.004,-1.659,4.243,0.071,-5.096,-21.158,-7.439,-1.211,-20.393,-12.61,-10.209,-2.009,4.022,1.328,-3.887
DK1,2019.01.01-2019.12.31,P4.182.asinh-hp,-21.059,-8.591,0.0,-20.606,-12.081,-0.623,0.424,-10.478,-2.134,3.729,-0.405,-5.571,-21.854,-9.599,-6.071,-21.195,-13.027,-10.671,-2.484,3.496,0.856,-4.353
DK1,2019.01.01-2019.12.31,P4.182.hp,-6.486,19.74,20.606,0.0,0.483,12.763,13.59,2.26,11.207,16.34,11.672,6.348,-6.618,18.411,19.531,-3.249,-0.565,2.129,10.856,16.148,12.965,7.835
DK1,2019.01.01-2019.12.31,P4.364,-0.82,11.638,12.081,-0.483,0.0,20.513,20.854,9.98,18.675,20.663,12.743,7.759,-1.326,10.801,11.387,-0.813,-8.053,10.953,20.258,21.261,14.558,10.293
DK1,2019.01.01-2019.12.31,P4.364.asinh,-13.093,0.063,0.623,-12.763,-20.513,0.0,5.001,-17.443,-2.459,6.782,0.16,-6.578,-13.505,-0.8,-0.077,-13.022,-22.127,-18.154,-3.168,6.739,1.845,-5.22
DK1,2019.01.01-2019.12.31,P4.364.asinh-hp,-13.878,-0.978,-0.424,-13.59,-20.854,-5.001,0.0,-19.101,-3.957,5.387,-1.079,-8.323,-14.18,-1.801,-1.097,-13.753,-21.68,-19.257,-4.612,5.049,0.708,-6.872
DK1,2019.01.01-2019.12.31,P4.364.hp,-2.594,10.004,10.478,-2.26,-9.98,17.443,19.101,0.0,17.478,19.336,11.838,6.473,-3.053,9.153,9.765,-2.551,-10.25,-1.136,18.011,19.514,13.678,9.183
DK1,2019.01.01-2019.12.31,P4.728,-11.473,1.659,2.134,-11.207,-18.675,2.459,3.957,-17.478,0.0,11.267,2.907,-10.471,-11.735,0.921,1.529,-11.328,-18.875,-16.944,-2.163,10.499,5.171,-8.138
DK1,2019.01.01-2019.12.31,P4.728.asinh,-16.592,-4.243,-3.729,-16.34,-20.663,-6.782,-5.387,-19.336,-11.267,0.0,-9.148,-14.964,-16.961,-5.024,-4.366,-16.573,-21.525,-19.546,-11.97,-1.784,-7.124,-13.795


In [20]:
import random

random.seed(123)
actual_lst = range(0,100)
pred2_lst = range(80,180)
pred1_lst = range(0,100)

actual_lst = random.sample(actual_lst,100)
pred1_lst = random.sample(pred1_lst,100)
pred2_lst = random.sample(pred2_lst,100)

rt = dm_test(actual_lst,pred1_lst,pred2_lst,h = 1, crit="MAD")
print(rt)
rt = dm_test(actual_lst,pred1_lst,pred2_lst,h = 1, crit="MSE")
print(rt)
rt = dm_test(actual_lst,pred1_lst,pred2_lst,h = 1, crit="poly", power=4)
print(rt)

dm_return(DM=-9.228367936305471, p_value=5.2733566900864244e-15)
dm_return(DM=-8.60656865528822, p_value=1.1839305042979687e-13)
dm_return(DM=-6.203866595889205, p_value=1.284969716752485e-08)
