In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
from glob import glob

from IPython.display import display
import numpy as np
import pandas as pd
from tqdm import tqdm

from common.utils import median_ensemble

In [2]:
from common.summary_utils import EnsembleStatistics
from common.metrics import smape_1, smape_2, nd, mape

# M4
from resources.m4.dataset import M4Dataset, M4Meta
from resources.m4.evaluator import M4Evaluator

class M4Statistics:
    def __init__(self, **args):
        self.training, self.target = M4Dataset(M4Meta.dataset_path).standard_split()
        self.evaluator = M4Evaluator(self.target, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))
    
# M3
from resources.m3.dataset import M3Dataset, M3Meta
from resources.m3.evaluator import M3Evaluator

class M3Statistics:
    def __init__(self, **args):
        self.training, self.target = M3Dataset(M3Meta.dataset_path).standard_split()
        self.evaluator = M3Evaluator(self.target, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))

    
# TOURISM
from resources.tourism.dataset import TourismDataset, TourismMeta
from resources.tourism.evaluator import TourismEvaluator

class TourismStatistics:
    def __init__(self, **args):
        self.training, self.target = TourismDataset(TourismMeta.dataset_path).standard_split()
        self.evaluator = TourismEvaluator(self.target, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))
    

def collect_statistics(filter_path, evaluator):
    statistics = EnsembleStatistics(filter_path=filter_path, evaluator=evaluator)

    bootstrap = statistics.bootstrap(ensemble_keys=['repeats', 'lookback_period', 'loss_name'], 
                                     bootstrap_key='repeats', 
                                     bootstrap_size=5, 
                                     number_of_samples=1)

    return statistics, bootstrap

#######################################

def assemble_results(experiment_path):
#     m4_statistics, m4_bootstrap = collect_statistics(f'{experiment_path}/M4.csv', M4Statistics())
    m3_statistics, m3_bootstrap = collect_statistics(f'{experiment_path}/M3.csv', M3Statistics())
    tourism_statistics, tourism_bootstrap = collect_statistics(f'{experiment_path}/tourism.csv', TourismStatistics())
    
    result = {
#         'm4_custom': m4_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['Daily', 'D+W', 'D+W+M', 'D+W+M+Y']],
#         'm4': m4_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['Yearly', 'Quarterly', 'Monthly', 'Others', 'Average']],
        'm3': m3_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['M3Year', 'M3Quart', 'M3Month', 'M3Other', 'Average']],
        'tourism': tourism_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['Yearly', 'Quarterly', 'Monthly', 'Average']],
    }
    
    for k, v in result.items():
        print(k)
        display(v)
    
    return m3_statistics, tourism_statistics

In [73]:
for i in range(2,8):
    j = f'repeats=0,lookback_period={str(i)},loss_name=MAPE,source_dataset=M4'
    print(j)
    j = m3.parameters.index(j)
    print(M3Statistics().evaluate(m3.predictions[j].values))

repeats=0,lookback_period=2,loss_name=MAPE,source_dataset=M4
OrderedDict([('M3Year', 28.27), ('M3Quart', 13.93), ('M3Month', 20.65), ('M3Other', 14.33), ('Average', 20.11)])
repeats=0,lookback_period=3,loss_name=MAPE,source_dataset=M4
OrderedDict([('M3Year', 38.9), ('M3Quart', 14.96), ('M3Month', 21.7), ('M3Other', 18.49), ('Average', 22.28)])
repeats=0,lookback_period=4,loss_name=MAPE,source_dataset=M4
OrderedDict([('M3Year', 47.98), ('M3Quart', 16.0), ('M3Month', 21.79), ('M3Other', 22.18), ('Average', 23.6)])
repeats=0,lookback_period=5,loss_name=MAPE,source_dataset=M4
OrderedDict([('M3Year', 56.14), ('M3Quart', 17.6), ('M3Month', 23.23), ('M3Other', 25.42), ('Average', 25.83)])
repeats=0,lookback_period=6,loss_name=MAPE,source_dataset=M4
OrderedDict([('M3Year', 63.48), ('M3Quart', 19.23), ('M3Month', 24.75), ('M3Other', 28.62), ('Average', 28.04)])
repeats=0,lookback_period=7,loss_name=MAPE,source_dataset=M4
OrderedDict([('M3Year', 70.0), ('M3Quart', 21.73), ('M3Month', 25.01), ('M

In [27]:
for i in range(2,8):
    j = f'repeats=0,lookback_period={str(i)},loss_name=MAPE,source_dataset=M4'
    print(j)
    j = tourism.parameters.index(j)
    print(TourismStatistics().evaluate(tourism.predictions[j].values))

repeats=0,lookback_period=2,loss_name=MAPE,source_dataset=M4
OrderedDict([('Yearly', 37.88), ('Quarterly', 57.59), ('Monthly', 88.13), ('Average', 73.52)])
repeats=0,lookback_period=3,loss_name=MAPE,source_dataset=M4
OrderedDict([('Yearly', 42.9), ('Quarterly', 51.41), ('Monthly', 77.0), ('Average', 65.93)])
repeats=0,lookback_period=4,loss_name=MAPE,source_dataset=M4
OrderedDict([('Yearly', 47.0), ('Quarterly', 46.0), ('Monthly', 69.41), ('Average', 60.55)])
repeats=0,lookback_period=5,loss_name=MAPE,source_dataset=M4
OrderedDict([('Yearly', 50.58), ('Quarterly', 45.04), ('Monthly', 70.59), ('Average', 61.57)])
repeats=0,lookback_period=6,loss_name=MAPE,source_dataset=M4
OrderedDict([('Yearly', 53.85), ('Quarterly', 43.74), ('Monthly', 69.75), ('Average', 61.22)])
repeats=0,lookback_period=7,loss_name=MAPE,source_dataset=M4
OrderedDict([('Yearly', 57.42), ('Quarterly', 43.68), ('Monthly', 67.39), ('Average', 60.27)])


In [92]:
df = pd.read_csv('project/shared_replicate/repeats=1,lookback_period=2,loss_name=MAPE,source_dataset=M4/tourism.csv').set_index('id')
TourismStatistics().evaluate(df.values)

OrderedDict([('Yearly', 42.55),
             ('Quarterly', 44.65),
             ('Monthly', 77.4),
             ('Average', 64.5)])

In [91]:
df = pd.read_csv('project/shared_replicate/repeats=0,lookback_period=2,loss_name=MAPE,source_dataset=M4/M3.csv').set_index('id')
M3Statistics().evaluate(df.values)

OrderedDict([('M3Year', 38.67),
             ('M3Quart', 39.21),
             ('M3Month', 50.93),
             ('M3Other', 55.16),
             ('Average', 47.89)])

In [90]:
df = pd.read_csv('project/shared_replicate/repeats=0,lookback_period=2,loss_name=MAPE,source_dataset=M4/tourism.csv').set_index('id')
TourismStatistics().evaluate(df.values)

OrderedDict([('Yearly', 67.31),
             ('Quarterly', 69.49),
             ('Monthly', 99.82),
             ('Average', 87.84)])

In [5]:
print('ForecastPFN')
m3, tourism = assemble_results(
    'project/mf_replicate_testnoiseT_shuffle5Millilon.20230502-140223/*source_dataset=M4')


ForecastPFN


100%|██████████| 180/180 [00:02<00:00, 62.87it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 180/180 [00:01<00:00, 111.20it/s]
100%|██████████| 1/1 [00:00<00:00,  7.88it/s]

m3



  'm3': m3_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['M3Year', 'M3Quart', 'M3Month', 'M3Other', 'Average']],
  'tourism': tourism_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['Yearly', 'Quarterly', 'Monthly', 'Average']],


evaluation_key,M3Year,M3Quart,M3Month,M3Other,Average
metric,37.83,12.61,16.01,10.71,17.54


tourism


evaluation_key,Yearly,Quarterly,Monthly,Average
metric,57.68,18.78,36.42,35.28


In [6]:
print('metalearned baseline')
assemble_results('project/shared_original/shared/*source_dataset=M4')

metalearned baseline


100%|██████████| 180/180 [00:02<00:00, 62.62it/s]
100%|██████████| 1/1 [00:00<00:00,  2.98it/s]
100%|██████████| 180/180 [00:01<00:00, 112.72it/s]
100%|██████████| 1/1 [00:00<00:00, 10.57it/s]
  'm3': m3_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['M3Year', 'M3Quart', 'M3Month', 'M3Other', 'Average']],
  'tourism': tourism_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['Yearly', 'Quarterly', 'Monthly', 'Average']],


m3


evaluation_key,M3Year,M3Quart,M3Month,M3Other,Average
metric,15.23,9.1,13.25,4.34,12.45


tourism


evaluation_key,Yearly,Quarterly,Monthly,Average
metric,23.47,14.56,19.33,18.79


(<common.summary_utils.EnsembleStatistics at 0x7fe18dec65e0>,
 <common.summary_utils.EnsembleStatistics at 0x7fe1942f41c0>)