In [2]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import wandb
import seaborn as sns
%matplotlib qt

In [2]:
# df = pd.read_pickle('logs_teconer.pkl')
df = pd.read_pickle('logs_msmsa_plus_test.pkl')

## Hyper MAE Lineplots

In [3]:

# set all Matplotlib fonts to Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'

# datasets = ['Hyper-A', 'Hyper-I', 'Hyper-G', 'Hyper-LN', 'Hyper-RW', 'Hyper-GU']
datasets = ['Hyper-A', 'Hyper-LN', 'Hyper-I', 'Hyper-RW', 'Hyper-G', 'Hyper-GU']

methods = ['Naive', 'PH', 'DDM', 'ADWIN', 'KSWIN', 'MSMSA']
color_palette = {'Naive':'#A46750', 'PH':'#EDB732', 'DDM':'#DA4C4C', 'ADWIN':'#A0C75C', 'KSWIN':'#E57439', 'MSMSA':'#5387DD'}

# load logs.pkl file
df = pd.read_pickle('logs.pkl')
# df = df[df['dataset'].isin(datasets)]
# df = df[df['method'].isin(methods)]


fig, axes = plt.subplots(3, 2, figsize=(5, 9))
for dataset in datasets:
    dataset_name = dataset
    i = datasets.index(dataset) // 2
    j = datasets.index(dataset) % 2
    if i == 0 and j == 0:
        lg = True
    else:  
        lg = False
    
    sns.lineplot(   data=df[(df['dataset']==dataset_name)],
                    x='noise_var', y='MAE', hue='method', style='method',
                    ax=axes[i,j],
                    palette=color_palette,
                    legend=lg
                    ).set_title(dataset_name)

                            

    axes[i,j].set(ylabel='MAE', xlabel='Noise scale, $\sigma$', yscale='linear', xticks=range(df.noise_var.max()+1))


fig.legend(ncol=3,loc="upper center", bbox_to_anchor=(0.5, 0.97))
axes[0,0].get_legend().remove()
# increase the vertical spacing between the subplots
plt.tight_layout()
fig.subplots_adjust(top=0.85, hspace=0.45)
# fig.subplots_adjust(hspace=0.3)

## Hyper Training Size Lineplots (subplots)

In [4]:
# set all Matplotlib fonts to Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'

# datasets = ['Hyper-A', 'Hyper-I', 'Hyper-G', 'Hyper-LN', 'Hyper-RW', 'Hyper-GU']
datasets = ['Hyper-A', 'Hyper-LN', 'Hyper-I', 'Hyper-RW', 'Hyper-G', 'Hyper-GU']
methods = ['Naive', 'PH', 'DDM', 'ADWIN', 'KSWIN', 'MSMSA']
color_palette = {'Naive':'#A46750', 'PH':'#EDB732', 'DDM':'#DA4C4C', 'ADWIN':'#A0C75C', 'KSWIN':'#E57439', 'MSMSA':'#5387DD'}

# load logs.pkl file
df = pd.read_pickle('logs.pkl')
# df = df[df['dataset'].isin(datasets)]
# df = df[df['method'].isin(methods)]


fig, axes = plt.subplots(3, 2, figsize=(5, 9))
for dataset in datasets:
    dataset_name = dataset
    i = datasets.index(dataset) // 2
    j = datasets.index(dataset) % 2
    if i == 0 and j == 0:
        lg = True
    else:  
        lg = False
    
    sns.lineplot(   data=df[(df['dataset']==dataset_name)],
                    x='noise_var', y='MeanValidityHorizon', hue='method', style='method',
                    ax=axes[i,j],
                    palette=color_palette,
                    legend=lg,
    
                    ).set_title(dataset_name)

                            

    axes[i,j].set(ylabel='Average training size', xlabel='Noise scale, $\sigma$', yscale='log', ylim=(20, 1000), xticks=range(df.noise_var.max()+1))



fig.legend(ncol=3,loc="upper center", bbox_to_anchor=(0.5, 0.97))
axes[0,0].get_legend().remove()
# increase the vertical spacing between the subplots
plt.tight_layout()
fig.subplots_adjust(top=0.85, hspace=0.45)
# fig.subplots_adjust(hspace=0.3)

## Real-world Training Size Barplot

In [11]:
df = pd.read_pickle('logs_teconer.pkl')
# df = pd.read_pickle('logs_real_taxi_test2.pkl')
datasets = ['Bike (daily)', 'Bike (hourly)', 'Household energy', 'Friction']
datasets = ['Teconer']
# use sns barplot with df grouped by dataset, method, and learning_model across MAE
# print(df.groupby(['dataset', 'method','learning_model'])['MeanValidityHorizon'].mean())
# df_ = df.groupby(['method','learning_model'])['MeanValidityHorizon'].mean()
# sns.barplot(data=df, x='dataset', y='MAE', hue='method', palette=color_palette)

# select methods == 'MSMSA' from df as df_

fig, axes = plt.subplots(2, 2, figsize=(8, 6))

# create a loop to plot each dataset in a subplot
for dataset in datasets:
    i = datasets.index(dataset) // 2
    j = datasets.index(dataset) % 2
    df_ = df[df['dataset']==dataset]
    sns.barplot(
        ax=axes[i,j],
        data=df_,
        x="method", y="MeanValidityHorizon", hue="learning_model", palette='Set2',
        errorbar="sd", log=True, width=0.5, errwidth=1.5
    ).set_title(dataset)
    axes[i,j].set(ylabel='Average training size', xlabel=None, yscale='log')
    # remove the legend from the subplots
    axes[i,j].get_legend().remove()
    for item in axes[i,j].get_xticklabels():
        item.set_rotation(45)
fig.legend(ncol=2,loc="upper center", bbox_to_anchor=(0.5, 0.97),)
# keep the legend for the first subplot
# axes[0,0].get_legend().remove()
plt.tight_layout()
fig.subplots_adjust(top=0.85, hspace=0.45)

plt.tight_layout()


# print(df.groupby(['dataset', 'method','learning_model'])['MeanValidityHorizon'].mean()['Air quality'])



In [1]:
import pandas as pd
df = pd.read_pickle('test.pkl')
# df = pd.read_pickle('test_.pkl')
# df = pd.read_pickle('logs_teconer.pkl')
# print(df.groupby(['dataset', 'method','learning_model'])['MAE'].mean()['Teconer'])
print(df.groupby(['dataset', 'method','learning_model'])['MAE'].mean())

dataset           method  learning_model       
Bike (daily)      ADWIN   DecisionTreeRegressor     683.536427
                  AUE     DecisionTreeRegressor    1349.793712
                  DDM     DecisionTreeRegressor     681.584642
                  KSWIN   DecisionTreeRegressor     682.315298
                  MSMSA   DecisionTreeRegressor     689.259396
                  Naive   DecisionTreeRegressor     697.937900
                  PH      DecisionTreeRegressor     693.740976
Bike (hourly)     ADWIN   DecisionTreeRegressor     107.892319
                  AUE     DecisionTreeRegressor     117.279445
                  DDM     DecisionTreeRegressor     106.909105
                  KSWIN   DecisionTreeRegressor     108.947355
                  MSMSA   DecisionTreeRegressor     103.526415
                  Naive   DecisionTreeRegressor     106.216874
                  PH      DecisionTreeRegressor     107.534050
Household energy  ADWIN   DecisionTreeRegressor       0.301901
       