In [1]:
%matplotlib inline
import os
import pandas as pd
import re
import pickle as pkl
from utils.metrics import *

root_baseline_data_path = '../output/%s/%s/%s/'
seq_len = 96


def get_borders(data_name, data_len):
    border1s = [0, 12 * 30 * 24 * 4 - seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - seq_len]
    border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]

    if data_name in ['ETTm1', 'ETTm2']:
        return border1s, border2s

    num_train = int(data_len * 0.7)
    num_test = int(data_len * 0.2)
    num_vali = data_len - num_train - num_test
    border1s = [0, num_train - seq_len, data_len - num_test - seq_len]
    border2s = [num_train, num_train + num_vali, data_len]

    return border1s, border2s


def get_eb(exp_str):
    eb = re.findall('eb[_.0-9]+', exp_str)[0]
    eb = eb.replace('eb', '')
    eb = eb.replace('_', '')
    eb = float(eb)
    if eb >= 1:
        eb *= 0.01

    return eb


def load(path):
    with open(path, 'rb') as f:
        return pkl.load(f)


def metrics_ensemble(pred, true):
    mae = MAE(pred, true)
    rmse = RMSE(pred, true)
    rse = RSE(pred, true)
    nrmse = NRMSE(pred, true)
    corr = CORR(pred, true)
    psnr = PSNR(pred, true)

    return {'mae': mae,
            'rmse': rmse,
            'nrmse': nrmse,
            'rse': rse,
            'corr': corr,
            'psnr': psnr}


def load_pkl(path):
    with open(path, 'rb') as f:
        return pkl.load(f)


def get_baseline(model:str, data:str, model_prefix:str):
    raw_data_path = root_baseline_data_path%(model, 'raw', data) + model_prefix + '/true.npy'
    raw_data = np.load(raw_data_path)

    metrics = []
    ebs = []
    for root, dr, file in os.walk(root_baseline_data_path%(model, 'raw', data)):
        for exp in dr:
            results = metrics_ensemble(np.load(root+'/'+ exp +'/pred.npy'), raw_data)
            metrics.append(results)
            ebs.append(0.0)

    df = pd.DataFrame(metrics)
    df['eb'] = ebs
    return df.groupby(['eb']).median()


def get_transformation_error(data_file: str, data_name: str,
                            eblc_name: str, target_ot: str, ebs_values: tuple):
    df = pd.read_parquet(f'../data/compressed/{eblc_name}/{data_file}')
    tmetrics = []
    ebs = []
    border1s, border2s = get_borders(data_name, len(df))
    raw_df = df[f'{target_ot}-R'].values[border1s[2]:border2s[2]]
    for eb in ebs_values:
        decomp_target_var = df[[f'{target_ot}-E{eb}']].values[border1s[2]:border2s[2]][:, 0]
        results = metrics_ensemble(decomp_target_var, raw_df)
        tmetrics.append(results)
        ebs.append(eb)

    df = pd.DataFrame(tmetrics)
    df['eb'] = ebs
    return df

def get_forecasting_results(model: str, data_file: str, model_prefix: str, data_name: str,
                            eblc_name: str, target_ot: str, ebs_values: tuple):
    df = pd.read_parquet(f'../data/compressed/{eblc_name}/{data_file}')
    metrics = []
    ebs = []
    border1s, border2s = get_borders(data_name, len(df))
    raw_df = df[f'{target_ot}-R'].values[border1s[2]:border2s[2]]
    for eb in ebs_values:
        decomp_target_var = df[[f'{target_ot}-E{eb}']].values[border1s[2]:border2s[2]][:, 0]
        results = metrics_ensemble(decomp_target_var, raw_df)
        metrics.append(results)
        ebs.append(eb)

    df = pd.DataFrame(metrics)
    df['eb'] = ebs
    dec_error = df.groupby(['eb']).median()

    raw_data_path = root_baseline_data_path%(model, 'raw', data_name) + model_prefix + '/true.npy'
    raw_data = np.load(raw_data_path)

    metrics = []
    ebs = []
    for root, dr, file in os.walk(root_baseline_data_path%(model, eblc_name, data_name)):
        for exp in dr:
            try:
                results = metrics_ensemble(np.load(root+'/' + exp + '/prediction.npy'), raw_data)
            except FileNotFoundError:
                results = metrics_ensemble(np.load(root+'/' + exp + '/pred.npy'), raw_data)
            metrics.append(results)
            ebs.append(get_eb(exp))

    df = pd.DataFrame(metrics)
    df['eb'] = ebs
    forecasting_results = df.groupby(['eb']).median()
    forecasting_results.sort_index(inplace=True)
    return forecasting_results, dec_error


def concat_baseline_forecasting_result(baseline_results, forecasting_results, dec_error):
    concat_forecasting_results = pd.concat([baseline_results, forecasting_results], axis=0)
    metric_indexed_results = pd.DataFrame()
    metric_indexed_results['error'] = [0.0]+list(dec_error['nrmse'].values)
    metric_indexed_results['mae'] = concat_forecasting_results['mae'].values
    metric_indexed_results['rmse'] = concat_forecasting_results['rmse'].values
    metric_indexed_results['nrmse'] = concat_forecasting_results['nrmse'].values
    metric_indexed_results['rse'] = concat_forecasting_results['rse'].values
    metric_indexed_results['corr'] = concat_forecasting_results['corr'].values
    metric_indexed_results['data_corr'] = [1.0]+list(dec_error['corr'].values)
    metric_indexed_results.set_index('error', inplace=True)
    metric_indexed_results['eb'] = [0.0]+list(dec_error.index)
    return metric_indexed_results



In [8]:
data_results = pd.DataFrame()
baseline_results = get_baseline('dlinear', 'aus', 'dlinear_aus_ftS_sl96_ll48_pl24_test_it_0_eb_0')
sz_forecasting_results, sz_dec_error = get_forecasting_results(model='dlinear',
                                                             model_prefix='dlinear_aus_ftS_sl96_ll48_pl24_test_it_0_eb_0',
                                                             data_name='aus',
                                                             data_file='aus_electrical_demand_points.parquet',
                                                             eblc_name='sz',
                                                             target_ot='y',
                                                             ebs_values=(0.01, 0.03, 0.05, 0.07, 0.10, 0.15, 0.20, 0.25, 0.30, 0.40, 0.50, 0.65, 0.8))
concatenated = concat_baseline_forecasting_result(baseline_results, sz_forecasting_results, sz_dec_error)
concatenated['eblc'] = 'sz'
concatenated.at[0, 'eblc'] = 'baseline'
data_results = pd.concat([data_results, concatenated])
data_results.drop_duplicates(inplace=True)
pmc_forecasting_results, pmc_dec_error = get_forecasting_results(model='dlinear',
                                                             model_prefix='dlinear_aus_ftS_sl96_ll48_pl24_test_it_0_eb_0',
                                                             data_name='aus',
                                                             data_file='aus_electrical_demand_points.parquet',
                                                             eblc_name='pmc',
                                                             target_ot='y',
                                                             ebs_values=(1.0, 3.0, 5.0, 7.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0))
concatenated = concat_baseline_forecasting_result(baseline_results, pmc_forecasting_results, pmc_dec_error)
concatenated['eblc'] = 'pmc'
concatenated.at[0, 'eblc'] = 'baseline'
data_results = pd.concat([data_results, concatenated])
data_results.drop_duplicates(inplace=True)
swing_forecasting_results, swing_dec_error = get_forecasting_results(model='dlinear',
                                                             model_prefix='dlinear_aus_ftS_sl96_ll48_pl24_test_it_0_eb_0',
                                                             data_name='aus',
                                                             data_file='aus_electrical_demand_points.parquet',
                                                             eblc_name='swing',
                                                             target_ot='y',
                                                             ebs_values=(1.0, 3.0, 5.0, 7.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0))
concatenated = concat_baseline_forecasting_result(baseline_results, swing_forecasting_results, swing_dec_error)
concatenated['eblc'] = 'swing'
concatenated.at[0, 'eblc'] = 'baseline'
data_results = pd.concat([data_results, concatenated])
data_results.drop_duplicates(inplace=True)
data_results['data'] = 'aus'
data_results.to_csv('../results/tfe/dlinear_results.csv')

In [3]:
data_results = pd.DataFrame()
baseline_results = get_baseline('informer', 'aus', 'informer_aus_ftS_sl96_ll48_pl24_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_eb0_train_rawTrue_test_0')
sz_forecasting_results, sz_dec_error = get_forecasting_results(model='informer',
                                                             model_prefix='informer_aus_ftS_sl96_ll48_pl24_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_eb0_train_rawTrue_test_0',
                                                             data_name='aus',
                                                             data_file='aus_electrical_demand_points.parquet',
                                                             eblc_name='sz',
                                                             target_ot='y',
                                                             ebs_values=(0.01, 0.03, 0.05, 0.07, 0.10, 0.15, 0.20, 0.25, 0.30, 0.40, 0.50, 0.65, 0.8))
concatenated = concat_baseline_forecasting_result(baseline_results, sz_forecasting_results, sz_dec_error)
concatenated['eblc'] = 'sz'
concatenated.at[0, 'eblc'] = 'baseline'
data_results = pd.concat([data_results, concatenated])
data_results.drop_duplicates(inplace=True)
pmc_forecasting_results, pmc_dec_error = get_forecasting_results(model='informer',
                                                             model_prefix='informer_aus_ftS_sl96_ll48_pl24_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_eb0_train_rawTrue_test_0',
                                                             data_name='aus',
                                                             data_file='aus_electrical_demand_points.parquet',
                                                             eblc_name='pmc',
                                                             target_ot='y',
                                                             ebs_values=(1.0, 3.0, 5.0, 7.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0))
concatenated = concat_baseline_forecasting_result(baseline_results, pmc_forecasting_results, pmc_dec_error)
concatenated['eblc'] = 'pmc'
concatenated.at[0, 'eblc'] = 'baseline'
data_results = pd.concat([data_results, concatenated])
data_results.drop_duplicates(inplace=True)
swing_forecasting_results, swing_dec_error = get_forecasting_results(model='informer',
                                                             model_prefix='informer_aus_ftS_sl96_ll48_pl24_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_eb0_train_rawTrue_test_0',
                                                             data_name='aus',
                                                             data_file='aus_electrical_demand_points.parquet',
                                                             eblc_name='swing',
                                                             target_ot='y',
                                                             ebs_values=(1.0, 3.0, 5.0, 7.0, 10.0, 15.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0))
concatenated = concat_baseline_forecasting_result(baseline_results, swing_forecasting_results, swing_dec_error)
concatenated['eblc'] = 'swing'
concatenated.at[0, 'eblc'] = 'baseline'
data_results = pd.concat([data_results, concatenated])
data_results.drop_duplicates(inplace=True)
data_results['data'] = 'aus'
data_results.to_csv('../results/tfe/per_model/informer_results.csv')