In [462]:
import numpy as np
from kneed import KneeLocator
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os
from os.path import join


def compute_cr(raw, compressed, as_percentage=False):
    if as_percentage:
        return (1 - np.round(compressed / raw, 2)) * 100

    return np.round(raw / compressed, 2)


def get_cr_pweather():
    pweather_pmc = json.load(open(join('..', 'results', 'cr', 'pmc_pweather_cr.json')))
    pweather_swing = json.load(open(join('..', 'results', 'cr', 'swing_pweather_cr.json')))
    pweather_sz = json.load(open(join('..', 'results', 'cr', 'sz_pweather_cr.json')))

    list_cr = []

    for data, name in [(pweather_pmc, 'pweather_pmc'),
                       (pweather_swing, 'pweather_swing')]:

        uncompressed = data['OT-R']['segments']['gzip']
        df_ratio = pd.DataFrame()

        for k, v in data.items():
            lossy_ratio = {}
            for key, value in data[k]['segments'].items():
                lossy_ratio[key] = [compute_cr(uncompressed, value)]
            df_lossy = pd.DataFrame.from_dict(lossy_ratio)
            df_lossy['error_bound'] = np.round(float(k[4:]) * 0.01, 4) if len(k) > 4 else 0.0
            df_ratio = pd.concat([df_lossy, df_ratio])

        df_ratio['compression'] = name.split('_')[1]
        df_ratio['data'] = name.split('_')[0]
        list_cr.append(df_ratio)

    for data, name in [(pweather_sz, 'pweather_sz')]:
        uncompressed = data['OT-R']
        lossy_ratio = []
        error_bound = []
        for k, v in data.items():
            lossy_ratio.append(compute_cr(uncompressed, v))
            error_bound.append(float(k[1:]) if k.startswith('E') else 0)

        df_ratio = pd.DataFrame(zip(lossy_ratio, error_bound), columns=['gzip', 'error_bound'])
        df_ratio['compression'] = name.split('_')[1]
        df_ratio['data'] = name.split('_')[0]
        list_cr.append(df_ratio)

    concat_df = pd.concat(list_cr)
    concat_df = concat_df.sort_values('error_bound')
    concat_df.reset_index(inplace=True, drop=True)
    concat_df.to_csv(join('..', 'results', 'cr', 'pweather_cr.csv'), index=None)
    return concat_df


def join_pweather_cr():
    pweather = pd.read_csv('../results/tfe/pprocessed_weather.csv')
    cr_pw = get_cr_pweather()
    cr_pw['data'] = 'WEATHER'
    cr_pw.compression = cr_pw.compression.str.upper()
    ncr_pw = cr_pw.rename({'error_bound': 'eb', 'gzip': 'cr'}, axis=1).set_index(['data', 'compression', 'eb'])
    joined = pweather.join(ncr_pw, on=['data', 'compression', 'eb'])
    joined.to_csv('../results/tfe/pprocessed_weather.csv', index=False)


def process_weather():
    all_data_results = pd.read_csv('../results/tfe/all_models_results.csv')
    small_eb_weather = pd.read_csv('../results/tfe/smaller_eb_weather_all.csv', index_col=0)
    # small_eb_weather = small_eb_weather[small_eb_weather.eb > 0]
    small_eb_weather.model = small_eb_weather.model.str.upper()
    small_eb_weather.data = small_eb_weather.data.str.upper()
    small_eb_weather.eblc = small_eb_weather.eblc.str.upper()
    small_eb_weather.rename({'value': 'forecasting error', 'eblc': 'compression', 'metric': 'tfe_metric', 'tfe': 'TFE'},
                            axis=1, inplace=True)
    small_eb_weather = small_eb_weather[(small_eb_weather.te_metric == 'nrmse') & (small_eb_weather.tfe_metric == 'nrmse')]
    all_data_results = all_data_results[all_data_results.data == 'WEATHER']
    not_so_small_weather = all_data_results[
        (all_data_results.data == 'WEATHER') & (all_data_results.eb > 0.03) & (
                    all_data_results.te_metric == 'nrmse')].drop(['error', 'compression ratio'], axis=1)
    preprocessed_weather = pd.concat([small_eb_weather, not_so_small_weather])
    preprocessed_weather.to_csv('../results/tfe/processed_weather.csv', index=False)

def detect_elbow(data_name, model, eblc):
    model_results = pd.read_csv('../results/tfe/all_models_results.csv')
    model_results = model_results[(model_results.data == data_name) &
                                      (model_results.te_metric == 'nrmse') &
                                      (model_results.compression == eblc) &
                                      (model_results.model == model)]
    y = model_results.TFE.values # forecasting accuracy lost
    x = model_results.te.values  # decompression error

    # Using Kneedle to find the knee
    elbow = KneeLocator(x, y, curve='convex', direction='increasing', S=0.5)

    return elbow.elbow

def get_tfe(model_results):
    baseline_result  = model_results[model_results.eblc == 'baseline']
    model_results['tfe'] = (model_results.nrmse.values - baseline_result.nrmse.values)/baseline_result.nrmse.values
    baseline_result = baseline_result.append([baseline_result]*2, ignore_index=True)
    baseline_result.at[0, 'eblc'] = 'pmc'
    baseline_result.at[1, 'eblc'] = 'sz'
    baseline_result.at[2, 'eblc'] = 'swing'
    baseline_result['tfe'] = 0.
    return pd.concat([baseline_result, model_results])


def detect_elbow_aus(model, eblc):
    model_result = pd.read_csv(f'../results/tfe/per_model/{model}_results.csv')
    cr_results = pd.read_csv('../results/cr/all_cr.csv')
    cr_results = cr_results[(cr_results.data == 'aus') & (cr_results.compression == eblc)]
    model_result = model_result[model_result.data == 'aus']
    model_result = get_tfe(model_result)
    model_result = model_result[(model_result.eblc == eblc) & (model_result.tfe < 0.5)]
    model_result.eb = model_result.eb.astype(float)
    uebs = model_result.eb.unique()
    if 'sz' not in eblc:
        uebs *= 0.01
        model_result.eb *= 0.01

    cr_results = cr_results[cr_results.error_bound.isin(uebs)]
    cr_results.rename({'error_bound': 'eb', 'compression': 'eblc', 'gzip': 'cr'}, axis=1, inplace=True)
    cr_results.eb = cr_results.eb.astype(float)
    cr_results.set_index(['eb', 'eblc', 'data'], inplace=True)
    model_result.set_index(['eb', 'eblc', 'data'], inplace=True)
    joined = model_result.join(cr_results)
    # display(joined)
    joined = joined.reset_index()
    y = joined.tfe.values # forecasting accuracy lost
    x = joined.error.values # decompression error
    cr = joined.cr.values
    eb = joined.eb.values
    elbow = KneeLocator(x, y, curve='convex', direction='increasing', S=1)
    return elbow.elbow, y[np.argwhere(x == elbow.elbow)[0]], cr[np.argwhere(x == elbow.elbow)[0]], eb[np.argwhere(x == elbow.elbow)[0]]


def detect_elbow_pweather(data_name, model, eblc):
    if 'WEATHER' not in data_name:
        # print(data_name, model, eblc)
        model_results = pd.read_csv('../results/tfe/all_models_results.csv')
        model_results = model_results[(model_results.data == data_name) &
                                          (model_results.te_metric == 'nrmse') &
                                          (model_results.compression == eblc) &
                                          (model_results.model == model)]
        y = model_results.TFE.values # forecasting accuracy lost
        x = model_results.te.values  # decompression error
        cr = model_results['compression ratio'].values
        eb = model_results.eb.values
    else:
        weather_results = pd.read_csv('../results/tfe/pprocessed_weather.csv')
        weather_results = weather_results[(weather_results.te_metric == 'nrmse') &
                                          (weather_results.compression == eblc) &
                                          (weather_results.model == model)]

        y = weather_results.TFE.values # forecasting accuracy lost
        x = weather_results.te.values  # decompression error
        cr = weather_results.cr.values
        eb = weather_results.eb.values

    elbow = KneeLocator(x, y, curve='convex', direction='increasing', S=0.5)
    # print('Elbow', elbow.elbow)  # This will print the x value where the knee/elbow occurs


    # if 'WEATHER' in data_name:
    #     # Visualization
    #     plt.figure(figsize=(10,6))
    #     plt.plot(x, y, 'b-')
    #     plt.xlabel("Decompression Error")
    #     plt.ylabel("Forecasting Accuracy Lost")
    #     plt.vlines(elbow.elbow, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')
    #     plt.show()

    return elbow.elbow, y[np.argwhere(x == elbow.elbow)[0]], cr[np.argwhere(x == elbow.elbow)[0]], eb[np.argwhere(x == elbow.elbow)[0]]


In [273]:
# all_elbows = {'data':[], 'compression':[], 'model':[], 'elbow':[]}

# for dataset in ['ETTM1', 'ETTM2', 'SOLAR', 'WEATHER', 'WIND']:
#     for model in ['DLINEAR', 'GRU', 'NBEATS', 'INFORMER', 'TRANSFORMER']:
#         for eblc in ['PMC', 'SWING', 'SZ']:
#             elbow = detect_elbow(dataset, model, eblc)
#             all_elbows['data'].append(dataset)
#             all_elbows['compression'].append(eblc)
#             all_elbows['model'].append(model)
#             all_elbows['elbow'].append(elbow)

In [461]:
all_elbows = {'data':[], 'compression':[], 'model':[], 'te':[], 'tfe':[], 'cr':[], 'eb': []}

for dataset in ['ETTM1', 'ETTM2', 'SOLAR', 'WEATHER', 'WIND']:
    for model in ['DLINEAR', 'GRU', 'NBEATS', 'INFORMER', 'TRANSFORMER']:
        for eblc in ['PMC', 'SWING', 'SZ']:
            if 'SOLAR' in dataset and 'GRU' in model:
                continue
            print(dataset, model, eblc)
            elbow, tfe, cr, eb = detect_elbow_pweather(dataset, model, eblc)
            all_elbows['data'].append(dataset)
            all_elbows['compression'].append(eblc)
            all_elbows['model'].append(model)
            all_elbows['te'].append(elbow)
            all_elbows['tfe'].append(tfe[0])
            all_elbows['cr'].append(cr[0])
            all_elbows['eb'].append(eb[0])

elbow_df = pd.DataFrame(all_elbows)
# nogru_df = elbow_df[~((elbow_df.model == 'GRU') & (elbow_df.data == 'SOLAR'))]
elbow_df.groupby(['compression', 'data']).mean()

ETTM1 DLINEAR PMC
ETTM1 DLINEAR SWING
ETTM1 DLINEAR SZ
ETTM1 GRU PMC
ETTM1 GRU SWING
ETTM1 GRU SZ
ETTM1 NBEATS PMC
ETTM1 NBEATS SWING
ETTM1 NBEATS SZ
ETTM1 INFORMER PMC
ETTM1 INFORMER SWING
ETTM1 INFORMER SZ
ETTM1 TRANSFORMER PMC
ETTM1 TRANSFORMER SWING
ETTM1 TRANSFORMER SZ
ETTM2 DLINEAR PMC
ETTM2 DLINEAR SWING
ETTM2 DLINEAR SZ
ETTM2 GRU PMC
ETTM2 GRU SWING
ETTM2 GRU SZ
ETTM2 NBEATS PMC
ETTM2 NBEATS SWING
ETTM2 NBEATS SZ
ETTM2 INFORMER PMC
ETTM2 INFORMER SWING
ETTM2 INFORMER SZ
ETTM2 TRANSFORMER PMC
ETTM2 TRANSFORMER SWING
ETTM2 TRANSFORMER SZ
SOLAR DLINEAR PMC
SOLAR DLINEAR SWING
SOLAR DLINEAR SZ
SOLAR NBEATS PMC
SOLAR NBEATS SWING
SOLAR NBEATS SZ
SOLAR INFORMER PMC
SOLAR INFORMER SWING
SOLAR INFORMER SZ
SOLAR TRANSFORMER PMC
SOLAR TRANSFORMER SWING
SOLAR TRANSFORMER SZ
WEATHER DLINEAR PMC
WEATHER DLINEAR SWING
WEATHER DLINEAR SZ
WEATHER GRU PMC
WEATHER GRU SWING
WEATHER GRU SZ
WEATHER NBEATS PMC
WEATHER NBEATS SWING
WEATHER NBEATS SZ
WEATHER INFORMER PMC
WEATHER INFORMER SWING
WEATHE

Unnamed: 0_level_0,Unnamed: 1_level_0,te,tfe,cr,eb
compression,data,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
PMC,ETTM1,0.027511,0.025021,11.732,0.22
PMC,ETTM2,0.023216,0.080671,8.8,0.12
PMC,SOLAR,0.014758,0.00427,8.51,0.375
PMC,WEATHER,0.02979,0.01351,26.984,0.0176
PMC,WIND,0.043517,0.063031,21.654,0.17
SWING,ETTM1,0.025371,0.006671,3.766,0.16
SWING,ETTM2,0.016406,0.049288,4.75,0.07
SWING,SOLAR,0.0216,0.042805,3.49,0.275
SWING,WEATHER,0.034478,0.032075,12.266,0.017
SWING,WIND,0.032647,0.009227,6.99,0.1


In [463]:
all_elbows = {'compression':[], 'model':[], 'te':[], 'tfe':[], 'cr':[], 'eb':[]}
for model in ['dlinear', 'nbeats', 'informer', 'transformer']:
    for eblc in ['pmc', 'swing', 'sz']:
        print(model, eblc)
        elbow, tfe, cr, eb = detect_elbow_aus(model, eblc)
        all_elbows['model'].append(model)
        all_elbows['compression'].append(eblc)
        all_elbows['te'].append(elbow)
        all_elbows['tfe'].append(tfe[0])
        all_elbows['cr'].append(cr[0])
        all_elbows['eb'].append(eb[0])

elbow_df = pd.DataFrame(all_elbows)
elbow_df.groupby(['compression']).mean()

dlinear pmc
dlinear swing
dlinear sz
nbeats pmc
nbeats swing
nbeats sz
informer pmc
informer swing
informer sz
transformer pmc
transformer swing
transformer sz


Unnamed: 0_level_0,te,tfe,cr,eb
compression,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
pmc,0.020974,0.076906,8.3225,0.055
swing,0.022528,0.059341,5.515,0.055
sz,0.017925,0.039544,23.6575,0.045


In [None]:
for eblc in ['PMC', 'SWING', 'SZ']:
    df_pivot = elbow_df[elbow_df['compression'] == eblc].pivot_table(index='data', columns='model', values='elbow', aggfunc='mean')

    # Create the heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(df_pivot, annot=True, cmap="YlGnBu", cbar_kws={'label': 'Elbow Value'})
    plt.title(f"Heatmap for {eblc} Compression")
    plt.show()

In [455]:
process_weather()