In [8]:
import pandas as pd
from seml import get_results
import seaborn as sns

import matplotlib.pyplot as plt

import numpy as np

In [None]:
df = get_results(
    'dp_timeseries_standard_train_standard_eval',
    [
        'config.seed',
        'config.estimator_name',
        'config.top_level_mode',
        'config.estimator_kwargs.trainer_kwargs.max_epochs',
        'config.instances_per_sequence',
        'config.estimator_kwargs.batch_size',
        'result.metrics_test.mean_wQuantileLoss',
        'result.metrics_test.MASE',
    ],
    to_data_frame=True,
    filter_dict={
        'config.dataset_kwargs.dataset_name': 'solar_10_minutes',
        'config.estimator_kwargs.trainer_kwargs.max_epochs': {'$in': [16000, 8000, 4000, 2000, 1000]},
        'config.instances_per_sequence': {'$in': [1, 2, 4, 8, 16]},
        'config.estimator_kwargs.batch_size': 128,
        'config.estimator_kwargs.relative_context_length': {'$in': [4]},
        'config.estimator_kwargs.lags_seq': {'$in': [None,
                                                     [1, 2, 3, 4, 5, 6, 7, 23, 24, 25, 47, 48, 49, 71, 72, 73,
           95, 96, 97, 119, 120, 121, 143, 144, 145, 167, 168, 169]]},
        'config.seed': {'$in': [0, 1, 2, 3, 4]}
    }
)

df = df.loc[
    (df['config.estimator_kwargs.trainer_kwargs.max_epochs']
     * df['config.instances_per_sequence']) == 16000
]

len(df)

In [10]:
columns = df.columns[1:]

df = df.rename(columns={
    c: c.split('.')[-1]
    for c in columns
})

In [None]:
def mode_renamer(x):
    rename_dict = {
        'shuffling': 'Shuffling',
        'iteration': 'Iteration',
        'sampling_without_replacement': 'WOR'
    }

    return rename_dict[x]

df['top_level_mode'] = df['top_level_mode'].apply(mode_renamer)
df.head()

In [12]:
def plot(df) -> None:

    original_df = df.copy()

    fig, axs = plt.subplots(nrows=2, ncols=2)

    for n, estimator_name in enumerate(original_df['estimator_name'].unique()):
        i, j = np.unravel_index(n, (2, 2))
        ax = axs[i, j]

        df = original_df.loc[original_df['estimator_name'] == estimator_name]
        df = df.sort_values(by=['instances_per_sequence', 'estimator_name', 'top_level_mode'])

        sns.barplot(df, x='instances_per_sequence', y='mean_wQuantileLoss', hue='top_level_mode',
                    errorbar='sd', ax=ax)

        ax.set_xlabel('Subsequences $\lambda$')
        ax.set_ylabel('CRPS')

        ax.legend(title=None, loc='lower right')
        ax.set_title(estimator_name.removesuffix('Estimator'))

In [None]:
plot(df)