In [18]:
# utils
import pandas as pd
import numpy as np
from scipy.stats import norm
from sklearn.metrics import roc_auc_score, log_loss

from statsforecast.models import SeasonalNaive
from statsforecast import StatsForecast
from neuralforecast import NeuralForecast


# train
import pandas as pd
import os

from datasetsforecast.m3 import M3

from neuralforecast.auto import AutoMLP, AutoNHITS, AutoLSTM, AutoGRU, AutoDeepAR
from neuralforecast.losses.pytorch import MQLoss, DistributionLoss
from ray import tune
from ray.tune.search.hyperopt import HyperOptSearch

from utilsforecast.losses import smape


  from .autonotebook import tqdm as notebook_tqdm
2024-08-09 22:25:41,300	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-08-09 22:25:41,907	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [19]:
df, _, _ = M3.load(directory='./', group='Monthly')
df.head(2)

  freq = pd.tseries.frequencies.to_offset(class_group.freq)


Unnamed: 0,unique_id,ds,y
0,M1,1990-01-31,2640.0
1,M1,1990-02-28,2640.0


In [20]:
def preprocess_dataset(df, horizon, percentiles):
    for percentile in percentiles:
        # obtain percentile value based on the train data
        df[f'y_percentile_{percentile}'] = df.groupby('unique_id')['y'].transform(lambda x: x.iloc[:-horizon].quantile(percentile/100))
        # add classification if timeseries value exceeds the percentile
        df[f'y_above_percentile_{percentile}'] = (df['y'] >= df[f'y_percentile_{percentile}']).astype(int)
        
    # use the last "horizon" timesteps as test timesteps
    test_df = df.groupby('unique_id').tail(horizon)
    train_df = df.drop(test_df.index)
    return train_df, test_df

In [21]:
PERCENTILES = [90, 95, 99]
HORIZON = 12

In [22]:
train_df, test_df = preprocess_dataset(df, horizon=HORIZON, percentiles=PERCENTILES)
test_df.head(2)

Unnamed: 0,unique_id,ds,y,y_percentile_90,y_above_percentile_90,y_percentile_95,y_above_percentile_95,y_percentile_99,y_above_percentile_99
56,M1,1994-09-30,1560.0,6180.0,0,7110.0,0,8868.0,0
57,M1,1994-10-31,1440.0,6180.0,0,7110.0,0,8868.0,0


In [59]:
test_df = test_df.reset_index()
test_df.index

RangeIndex(start=0, stop=17136, step=1)

In [40]:
LEVELS = [80, 90, 98]

In [41]:
snaive = SeasonalNaive(season_length=12)  # Quarterly -> 4
sf = StatsForecast(models=[snaive], freq='M')
pred_sf = sf.forecast(df=train_df, h=HORIZON, level=LEVELS)
pred_sf.head(2)

  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)


Unnamed: 0_level_0,ds,SeasonalNaive,SeasonalNaive-lo-80,SeasonalNaive-lo-90,SeasonalNaive-lo-98,SeasonalNaive-hi-80,SeasonalNaive-hi-90,SeasonalNaive-hi-98
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M1,1994-09-30,4800.0,983.408875,-98.54171,-2128.101074,8616.59082,9698.541992,11728.100586
M1,1994-10-31,3000.0,-816.591125,-1898.541748,-3928.101074,6816.591309,7898.541504,9928.100586


In [42]:
pred_sf['y_true'] = test_df['y'].to_list()
y_above_percentile_cols = [col for col in test_df.columns if "percentile" in col]

for col in y_above_percentile_cols:
    pred_sf[col] = test_df[col].to_list()
pred_sf.head(2)

Unnamed: 0_level_0,ds,SeasonalNaive,SeasonalNaive-lo-80,SeasonalNaive-lo-90,SeasonalNaive-lo-98,SeasonalNaive-hi-80,SeasonalNaive-hi-90,SeasonalNaive-hi-98,y_true,y_percentile_90,y_above_percentile_90,y_percentile_95,y_above_percentile_95,y_percentile_99,y_above_percentile_99
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M1,1994-09-30,4800.0,983.408875,-98.54171,-2128.101074,8616.59082,9698.541992,11728.100586,1560.0,6180.0,0,7110.0,0,8868.0,0
M1,1994-10-31,3000.0,-816.591125,-1898.541748,-3928.101074,6816.591309,7898.541504,9928.100586,1440.0,6180.0,0,7110.0,0,8868.0,0


In [43]:
pred_nf = pd.read_csv("empirical_analysis/M3_Monthly_95_MQLoss/pred_df_M3_95_MQLoss.csv")

"""
pred_nf = pred_nf.drop(columns=['y_true', 'y_true_above_thr', 'SeasonalNaive',
       'SeasonalNaive_forecast_above_thr_prob',
       'SeasonalNaive_forecast_above_thr', 'AutoMLP_forecast_above_thr_prob',
       'AutoMLP_forecast_above_thr', 'AutoNHITS_forecast_above_thr_prob',
       'AutoNHITS_forecast_above_thr', 'AutoLSTM_forecast_above_thr_prob',
       'AutoLSTM_forecast_above_thr', 'AutoGRU_forecast_above_thr_prob',
       'AutoGRU_forecast_above_thr', 'unique_id.1'])
"""

pred_nf.columns

Index(['unique_id', 'ds', 'AutoMLP-median', 'AutoMLP-lo-90', 'AutoMLP-lo-80',
       'AutoMLP-lo-70', 'AutoMLP-hi-70', 'AutoMLP-hi-80', 'AutoMLP-hi-90',
       'AutoNHITS-median', 'AutoNHITS-lo-90', 'AutoNHITS-lo-80',
       'AutoNHITS-lo-70', 'AutoNHITS-hi-70', 'AutoNHITS-hi-80',
       'AutoNHITS-hi-90', 'AutoLSTM-median', 'AutoLSTM-lo-90',
       'AutoLSTM-lo-80', 'AutoLSTM-lo-70', 'AutoLSTM-hi-70', 'AutoLSTM-hi-80',
       'AutoLSTM-hi-90', 'AutoGRU-median', 'AutoGRU-lo-90', 'AutoGRU-lo-80',
       'AutoGRU-lo-70', 'AutoGRU-hi-70', 'AutoGRU-hi-80', 'AutoGRU-hi-90',
       'y_true', 'y_true_above_thr', 'SeasonalNaive',
       'SeasonalNaive_forecast_above_thr_prob',
       'SeasonalNaive_forecast_above_thr', 'AutoMLP_forecast_above_thr_prob',
       'AutoMLP_forecast_above_thr', 'AutoNHITS_forecast_above_thr_prob',
       'AutoNHITS_forecast_above_thr', 'AutoLSTM_forecast_above_thr_prob',
       'AutoLSTM_forecast_above_thr', 'AutoGRU_forecast_above_thr_prob',
       'AutoGRU_foreca

In [44]:
pred_nf['y_true'] = test_df['y'].to_list()
for col in y_above_percentile_cols:
    pred_nf[col] = test_df[col].to_list()
pred_nf.columns

Index(['unique_id', 'ds', 'AutoMLP-median', 'AutoMLP-lo-90', 'AutoMLP-lo-80',
       'AutoMLP-lo-70', 'AutoMLP-hi-70', 'AutoMLP-hi-80', 'AutoMLP-hi-90',
       'AutoNHITS-median', 'AutoNHITS-lo-90', 'AutoNHITS-lo-80',
       'AutoNHITS-lo-70', 'AutoNHITS-hi-70', 'AutoNHITS-hi-80',
       'AutoNHITS-hi-90', 'AutoLSTM-median', 'AutoLSTM-lo-90',
       'AutoLSTM-lo-80', 'AutoLSTM-lo-70', 'AutoLSTM-hi-70', 'AutoLSTM-hi-80',
       'AutoLSTM-hi-90', 'AutoGRU-median', 'AutoGRU-lo-90', 'AutoGRU-lo-80',
       'AutoGRU-lo-70', 'AutoGRU-hi-70', 'AutoGRU-hi-80', 'AutoGRU-hi-90',
       'y_true', 'y_true_above_thr', 'SeasonalNaive',
       'SeasonalNaive_forecast_above_thr_prob',
       'SeasonalNaive_forecast_above_thr', 'AutoMLP_forecast_above_thr_prob',
       'AutoMLP_forecast_above_thr', 'AutoNHITS_forecast_above_thr_prob',
       'AutoNHITS_forecast_above_thr', 'AutoLSTM_forecast_above_thr_prob',
       'AutoLSTM_forecast_above_thr', 'AutoGRU_forecast_above_thr_prob',
       'AutoGRU_foreca

In [45]:
names = [col.removesuffix("-median") for col in pred_nf.columns]
pred_nf.columns = names
pred_nf

Unnamed: 0,unique_id,ds,AutoMLP,AutoMLP-lo-90,AutoMLP-lo-80,AutoMLP-lo-70,AutoMLP-hi-70,AutoMLP-hi-80,AutoMLP-hi-90,AutoNHITS,...,AutoLSTM_forecast_above_thr,AutoGRU_forecast_above_thr_prob,AutoGRU_forecast_above_thr,unique_id.1,y_percentile_90,y_above_percentile_90,y_percentile_95,y_above_percentile_95,y_percentile_99,y_above_percentile_99
0,M1,1995-09-30,1467.5474,-411.66992,-58.630370,227.89722,2689.4020,3133.3228,3507.9050,2223.8800,...,0,0.000000,0,M1,6180.0,0,7110.00,0,8868.000,0
1,M1,1995-10-31,1362.4500,-354.95460,81.466064,406.45850,3032.2090,3619.2397,4234.5435,1776.7000,...,0,0.000000,0,M1,6180.0,0,7110.00,0,8868.000,0
2,M1,1995-11-30,1354.3980,-327.85522,63.120360,379.12964,2937.1116,3374.2080,4339.9250,910.7019,...,0,0.000000,0,M1,6180.0,0,7110.00,0,8868.000,0
3,M1,1995-12-31,1765.9194,-261.55664,288.807860,578.76000,3174.9436,3703.8640,4616.0970,2130.4326,...,0,0.000000,0,M1,6180.0,0,7110.00,0,8868.000,0
4,M1,1996-01-31,2280.5957,95.78345,562.115230,884.05884,4072.7222,4617.1040,5350.0796,2437.8174,...,0,0.000000,0,M1,6180.0,0,7110.00,0,8868.000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17131,M999,1994-10-31,4989.4790,4792.60000,4896.463000,4902.47070,5067.3975,5081.1420,5149.2870,4864.8640,...,1,0.587022,1,M999,5181.3,1,5219.37,1,5254.854,0
17132,M999,1994-11-30,4983.2046,4752.60700,4846.161600,4885.35740,5052.2050,5071.0176,5151.9870,4825.5010,...,0,0.518861,1,M999,5181.3,1,5219.37,1,5254.854,0
17133,M999,1994-12-31,4943.0947,4770.05000,4825.196000,4818.86700,5041.5645,5054.4490,5109.1416,4784.2344,...,0,0.453027,0,M999,5181.3,1,5219.37,0,5254.854,0
17134,M999,1995-01-31,4926.2630,4759.57700,4823.895000,4795.83600,5002.2476,4996.6064,5110.6396,4813.2980,...,0,0.356847,0,M999,5181.3,0,5219.37,0,5254.854,0


In [46]:
pred_nf.columns

Index(['unique_id', 'ds', 'AutoMLP', 'AutoMLP-lo-90', 'AutoMLP-lo-80',
       'AutoMLP-lo-70', 'AutoMLP-hi-70', 'AutoMLP-hi-80', 'AutoMLP-hi-90',
       'AutoNHITS', 'AutoNHITS-lo-90', 'AutoNHITS-lo-80', 'AutoNHITS-lo-70',
       'AutoNHITS-hi-70', 'AutoNHITS-hi-80', 'AutoNHITS-hi-90', 'AutoLSTM',
       'AutoLSTM-lo-90', 'AutoLSTM-lo-80', 'AutoLSTM-lo-70', 'AutoLSTM-hi-70',
       'AutoLSTM-hi-80', 'AutoLSTM-hi-90', 'AutoGRU', 'AutoGRU-lo-90',
       'AutoGRU-lo-80', 'AutoGRU-lo-70', 'AutoGRU-hi-70', 'AutoGRU-hi-80',
       'AutoGRU-hi-90', 'y_true', 'y_true_above_thr', 'SeasonalNaive',
       'SeasonalNaive_forecast_above_thr_prob',
       'SeasonalNaive_forecast_above_thr', 'AutoMLP_forecast_above_thr_prob',
       'AutoMLP_forecast_above_thr', 'AutoNHITS_forecast_above_thr_prob',
       'AutoNHITS_forecast_above_thr', 'AutoLSTM_forecast_above_thr_prob',
       'AutoLSTM_forecast_above_thr', 'AutoGRU_forecast_above_thr_prob',
       'AutoGRU_forecast_above_thr', 'unique_id.1', 'y_pe

In [49]:
test_df[(test_df['unique_id']=='M1')]

Unnamed: 0,unique_id,ds,y,y_percentile_90,y_above_percentile_90,y_percentile_95,y_above_percentile_95,y_percentile_99,y_above_percentile_99
56,M1,1994-09-30,1560.0,6180.0,0,7110.0,0,8868.0,0
57,M1,1994-10-31,1440.0,6180.0,0,7110.0,0,8868.0,0
58,M1,1994-11-30,240.0,6180.0,0,7110.0,0,8868.0,0
59,M1,1994-12-31,1800.0,6180.0,0,7110.0,0,8868.0,0
60,M1,1995-01-31,4680.0,6180.0,0,7110.0,0,8868.0,0
61,M1,1995-02-28,1800.0,6180.0,0,7110.0,0,8868.0,0
62,M1,1995-03-31,1680.0,6180.0,0,7110.0,0,8868.0,0
63,M1,1995-04-30,3720.0,6180.0,0,7110.0,0,8868.0,0
64,M1,1995-05-31,2160.0,6180.0,0,7110.0,0,8868.0,0
65,M1,1995-06-30,480.0,6180.0,0,7110.0,0,8868.0,0


In [62]:
test_df

Unnamed: 0,unique_id,ds,y,y_percentile_90,y_above_percentile_90,y_percentile_95,y_above_percentile_95,y_percentile_99,y_above_percentile_99
56,M1,1994-09-30,1560.0,6180.0,0,7110.00,0,8868.000,0
57,M1,1994-10-31,1440.0,6180.0,0,7110.00,0,8868.000,0
58,M1,1994-11-30,240.0,6180.0,0,7110.00,0,8868.000,0
59,M1,1994-12-31,1800.0,6180.0,0,7110.00,0,8868.000,0
60,M1,1995-01-31,4680.0,6180.0,0,7110.00,0,8868.000,0
...,...,...,...,...,...,...,...,...,...
167557,M999,1993-10-31,5225.9,5181.3,1,5219.37,1,5254.854,0
167558,M999,1993-11-30,5236.3,5181.3,1,5219.37,1,5254.854,0
167559,M999,1993-12-31,5186.6,5181.3,1,5219.37,0,5254.854,0
167560,M999,1994-01-31,5143.4,5181.3,0,5219.37,0,5254.854,0


In [63]:
def predict_exceedance_from_quantiles(pred_df, test_df, models_names, percentiles, filename="exceendace_percentiles.csv"):
    exceedance_df = pd.DataFrame()

    for percentile in percentiles:

        z_low, z_high = norm.ppf([1-percentile/100, percentile/100])
        conf_int_length = percentile - (100-percentile)

        for model_name in models_names:
            model_exceedance_preds = []

            for i in range(pred_df.shape[0]):
                row_pred = pred_df.iloc[i, :]
                row_test = test_df.iloc[i, :]

                std = (row_pred[f'{model_name}-hi-{conf_int_length}'] - row_pred[f'{model_name}-lo-{conf_int_length}']) / (z_high - z_low)
                mean = row_pred[f'{model_name}-hi-{conf_int_length}'] - z_high * std
                
                probability = 1 - norm.cdf(row_test[f'y_percentile_{percentile}'], loc=mean, scale=std)
                classification = 1 if probability >= 0.5 else 0
                model_exceedance_preds.append(classification)
            
            exceedance_df[f'{model_name}_{percentile}'] = model_exceedance_preds
    
    exceedance_df.to_csv(filename, index=False)
    return exceedance_df

In [74]:
ss = predict_exceedance_from_quantiles(pred_sf, test_df, ['SeasonalNaive'], PERCENTILES)

In [75]:
ss.head(5)

Unnamed: 0,SeasonalNaive_90,SeasonalNaive_95,SeasonalNaive_99
0,0,0,0
1,0,0,0
2,0,0,0
3,0,0,0
4,0,0,0


In [69]:
ss['SeasonalNaive_99'].sum()

828

In [None]:
def predict_exceedance_from_params(pred_df, test_df, models_names, percentiles, filename="exceendace_params.csv"):
    exceedance_df = pd.DataFrame()

    for percentile in percentiles:

        for model_name in models_names:
            model_exceedance_preds = []

            for i in range(pred_df.shape[0]):
                row_pred = pred_df.iloc[i, :]
                row_test = test_df.iloc[i, :]

                mean = row_pred[f'{model_name}-loc']
                std = row_pred[f'{model_name}-scale']
                
                probability = 1 - norm.cdf(row_test[f'y_percentile_{percentile}'], loc=mean, scale=std)
                classification = 1 if probability >= 0.5 else 0
                model_exceedance_preds.append(classification)
            
            exceedance_df[f'{model_name}_{percentile}'] = model_exceedance_preds
    
    exceedance_df.to_csv(filename, index=False)
    return exceedance_df

In [None]:
predict_exceedance_from_quantiles(pred_sf, test_df, ['SeasonalNaive'], test_percentiles)
predict_exceedance_from_quantiles(pred_nf, test_df, models.keys(), test_percentiles)
if "AutoDeepAR" in list(models.keys()):
    models_names = list(models.keys())
    models_names.remove("AutoDeepAR")
    predict_exceedance_from_params(pred_nf, test_df, models_names, test_percentiles)

In [20]:
import pandas as pd

nf = pd.read_csv("empirical_analysis/M3_Monthly_DistributionLoss/forecast_nf_M3_Monthly_DistributionLoss.csv")
sf = pd.read_csv("empirical_analysis/M3_Monthly_DistributionLoss/forecast_sf_M3_Monthly_DistributionLoss.csv")

In [21]:
sf

Unnamed: 0,unique_id,ds,SeasonalNaive,SeasonalNaive-lo-80,SeasonalNaive-lo-90,SeasonalNaive-lo-98,SeasonalNaive-hi-80,SeasonalNaive-hi-90,SeasonalNaive-hi-98,y_true,y_above_percentile_90,y_above_percentile_95,y_above_percentile_99
0,M1,1994-09-30,4800.0,983.4089,-98.54171,-2128.1010,8616.5910,9698.5420,11728.101,1560.0,0,0,0
1,M1,1994-10-31,3000.0,-816.5911,-1898.54170,-3928.1010,6816.5913,7898.5415,9928.101,1440.0,0,0,0
2,M1,1994-11-30,3120.0,-696.5911,-1778.54170,-3808.1010,6936.5913,8018.5415,10048.101,240.0,0,0,0
3,M1,1994-12-31,5880.0,2063.4090,981.45830,-1048.1011,9696.5910,10778.5420,12808.101,1800.0,0,0,0
4,M1,1995-01-31,2640.0,-1176.5912,-2258.54170,-4288.1010,6456.5913,7538.5415,9568.101,4680.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17131,M999,1993-10-31,5252.8,4977.4830,4899.43460,4753.0290,5528.1167,5606.1650,5752.571,5225.9,1,1,0
17132,M999,1993-11-30,5213.7,4938.3833,4860.33500,4713.9290,5489.0170,5567.0654,5713.471,5236.3,1,1,0
17133,M999,1993-12-31,5219.4,4944.0830,4866.03470,4719.6290,5494.7170,5572.7650,5719.171,5186.6,1,0,0
17134,M999,1994-01-31,5228.6,4953.2830,4875.23500,4728.8290,5503.9170,5581.9653,5728.371,5143.4,0,0,0


In [22]:
nf.head()

Unnamed: 0,unique_id,ds,AutoMLP,AutoMLP-median,AutoMLP-lo-98,AutoMLP-lo-90,AutoMLP-lo-80,AutoMLP-hi-80,AutoMLP-hi-90,AutoMLP-hi-98,...,AutoDeepAR-lo-98,AutoDeepAR-lo-90,AutoDeepAR-lo-80,AutoDeepAR-hi-80,AutoDeepAR-hi-90,AutoDeepAR-hi-98,y_true,y_above_percentile_90,y_above_percentile_95,y_above_percentile_99
0,M1,1994-09-30,3491.0586,3553.7593,-1625.1648,-179.677,583.53296,6265.0303,7193.1714,8837.818,...,1318.3179,1501.6833,1825.3433,3164.017,3418.2427,3631.3176,1560.0,0,0,0
1,M1,1994-10-31,3295.9807,3330.1047,-2930.286,-1321.0813,-464.0979,7095.7,8145.1895,10108.044,...,955.3069,1604.1776,1765.9188,3139.6326,3268.736,3676.4521,1440.0,0,0,0
2,M1,1994-11-30,3927.5337,3992.3464,-3791.4656,-1278.6033,93.60889,7666.8604,8996.326,11205.3955,...,1041.585,1485.56,1807.5367,3108.9753,3285.1155,3988.8962,240.0,0,0,0
3,M1,1994-12-31,3859.8657,3907.3462,-4590.501,-2363.3933,-980.2781,8385.337,9748.595,12795.107,...,1237.2661,1573.1642,1787.3351,3135.0688,3282.1116,3710.321,1800.0,0,0,0
4,M1,1995-01-31,3212.5676,3174.3289,-3638.7834,-1670.5076,-779.2473,7159.545,8400.697,9782.582,...,984.0437,1504.4197,1684.5812,3135.0862,3363.6096,3544.5557,4680.0,0,0,0


In [23]:
df = pd.merge(sf, nf, how='inner', on=['unique_id', 'ds'])
df

Unnamed: 0,unique_id,ds,SeasonalNaive,SeasonalNaive-lo-80,SeasonalNaive-lo-90,SeasonalNaive-lo-98,SeasonalNaive-hi-80,SeasonalNaive-hi-90,SeasonalNaive-hi-98,y_true_x,...,AutoDeepAR-lo-98,AutoDeepAR-lo-90,AutoDeepAR-lo-80,AutoDeepAR-hi-80,AutoDeepAR-hi-90,AutoDeepAR-hi-98,y_true_y,y_above_percentile_90_y,y_above_percentile_95_y,y_above_percentile_99_y
0,M1,1994-09-30,4800.0,983.4089,-98.54171,-2128.1010,8616.5910,9698.5420,11728.101,1560.0,...,1318.3179,1501.6833,1825.3433,3164.0170,3418.2427,3631.3176,1560.0,0,0,0
1,M1,1994-10-31,3000.0,-816.5911,-1898.54170,-3928.1010,6816.5913,7898.5415,9928.101,1440.0,...,955.3069,1604.1776,1765.9188,3139.6326,3268.7360,3676.4521,1440.0,0,0,0
2,M1,1994-11-30,3120.0,-696.5911,-1778.54170,-3808.1010,6936.5913,8018.5415,10048.101,240.0,...,1041.5850,1485.5600,1807.5367,3108.9753,3285.1155,3988.8962,240.0,0,0,0
3,M1,1994-12-31,5880.0,2063.4090,981.45830,-1048.1011,9696.5910,10778.5420,12808.101,1800.0,...,1237.2661,1573.1642,1787.3351,3135.0688,3282.1116,3710.3210,1800.0,0,0,0
4,M1,1995-01-31,2640.0,-1176.5912,-2258.54170,-4288.1010,6456.5913,7538.5415,9568.101,4680.0,...,984.0437,1504.4197,1684.5812,3135.0862,3363.6096,3544.5557,4680.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17131,M999,1993-10-31,5252.8,4977.4830,4899.43460,4753.0290,5528.1167,5606.1650,5752.571,5225.9,...,5072.8403,5100.7650,5122.1157,5215.8060,5227.2144,5247.7510,5225.9,1,1,0
17132,M999,1993-11-30,5213.7,4938.3833,4860.33500,4713.9290,5489.0170,5567.0654,5713.471,5236.3,...,5079.4385,5107.9980,5133.8486,5214.4290,5235.1934,5266.2230,5236.3,1,1,0
17133,M999,1993-12-31,5219.4,4944.0830,4866.03470,4719.6290,5494.7170,5572.7650,5719.171,5186.6,...,5066.8620,5084.7090,5108.4120,5218.2570,5233.9100,5264.5776,5186.6,1,0,0
17134,M999,1994-01-31,5228.6,4953.2830,4875.23500,4728.8290,5503.9170,5581.9653,5728.371,5143.4,...,5088.8984,5106.8574,5110.3584,5227.8813,5242.7876,5262.4795,5143.4,0,0,0


In [24]:
df.columns

Index(['unique_id', 'ds', 'SeasonalNaive', 'SeasonalNaive-lo-80',
       'SeasonalNaive-lo-90', 'SeasonalNaive-lo-98', 'SeasonalNaive-hi-80',
       'SeasonalNaive-hi-90', 'SeasonalNaive-hi-98', 'y_true_x',
       'y_above_percentile_90_x', 'y_above_percentile_95_x',
       'y_above_percentile_99_x', 'AutoMLP', 'AutoMLP-median', 'AutoMLP-lo-98',
       'AutoMLP-lo-90', 'AutoMLP-lo-80', 'AutoMLP-hi-80', 'AutoMLP-hi-90',
       'AutoMLP-hi-98', 'AutoMLP-loc', 'AutoMLP-scale', 'AutoLSTM',
       'AutoLSTM-median', 'AutoLSTM-lo-98', 'AutoLSTM-lo-90', 'AutoLSTM-lo-80',
       'AutoLSTM-hi-80', 'AutoLSTM-hi-90', 'AutoLSTM-hi-98', 'AutoLSTM-loc',
       'AutoLSTM-scale', 'AutoDeepAR', 'AutoDeepAR-median', 'AutoDeepAR-lo-98',
       'AutoDeepAR-lo-90', 'AutoDeepAR-lo-80', 'AutoDeepAR-hi-80',
       'AutoDeepAR-hi-90', 'AutoDeepAR-hi-98', 'y_true_y',
       'y_above_percentile_90_y', 'y_above_percentile_95_y',
       'y_above_percentile_99_y'],
      dtype='object')

In [9]:
import pandas as pd

df = pd.read_csv("empirical_analysis/M3_Monthly_DistributionLoss/forecast_df_M3_Monthly_DistributionLoss.csv")
df.set_index('ds')
df.index

RangeIndex(start=0, stop=17136, step=1)

In [3]:
df.columns

Index(['unique_id', 'ds', 'SeasonalNaive', 'SeasonalNaive-lo-80',
       'SeasonalNaive-lo-90', 'SeasonalNaive-lo-98', 'SeasonalNaive-hi-80',
       'SeasonalNaive-hi-90', 'SeasonalNaive-hi-98', 'AutoMLP',
       'AutoMLP-lo-98', 'AutoMLP-lo-90', 'AutoMLP-lo-80', 'AutoMLP-hi-80',
       'AutoMLP-hi-90', 'AutoMLP-hi-98', 'AutoMLP-loc', 'AutoMLP-scale',
       'AutoLSTM', 'AutoLSTM-lo-98', 'AutoLSTM-lo-90', 'AutoLSTM-lo-80',
       'AutoLSTM-hi-80', 'AutoLSTM-hi-90', 'AutoLSTM-hi-98', 'AutoLSTM-loc',
       'AutoLSTM-scale', 'AutoDeepAR', 'AutoDeepAR-lo-98', 'AutoDeepAR-lo-90',
       'AutoDeepAR-lo-80', 'AutoDeepAR-hi-80', 'AutoDeepAR-hi-90',
       'AutoDeepAR-hi-98', 'y_true', 'y_percentile_90',
       'y_above_percentile_90', 'y_percentile_95', 'y_above_percentile_95',
       'y_percentile_99', 'y_above_percentile_99'],
      dtype='object')

In [2]:
from utilsforecast.losses import smape


In [4]:
smape(df, models=['SeasonalNaive', 'AutoMLP', 'AutoLSTM', 'AutoDeepAR'], id_col='unique_id', target_col='y_true')

Unnamed: 0,unique_id,SeasonalNaive,AutoMLP,AutoLSTM,AutoDeepAR
0,M1,0.440255,0.999964,0.345205,0.289185
1,M10,0.122964,0.999829,0.137261,0.072463
2,M100,0.047072,0.999441,0.117505,0.052385
3,M1000,0.012317,0.999704,0.073909,0.024544
4,M1001,0.013155,0.999722,0.103644,0.019316
...,...,...,...,...,...
1423,M995,0.003249,0.998763,0.074401,0.005172
1424,M996,0.011770,0.999603,0.109048,0.011411
1425,M997,0.038246,0.998593,0.055371,0.029732
1426,M998,0.027663,0.999732,0.074415,0.034295


In [57]:
import pandas as pd

df = pd.read_csv("M3_Monthly_MQLoss/exceedance_percentiles_Monthly_MQLoss.csv")
ser = df['AutoMLP_prob_90']
ser = ser.dropna(ignore_index=True)
ser.isna().sum()

0