In [1]:
from anomaly import io, tmm, adm
from sklearn.metrics import f1_score
import pandas as pd
import numpy as np

import anomaly.utils.modelselect_utils as mu
import anomaly.utils.statsutils as su

In [2]:
predictor_dict = {
"naive_predictor" : tmm.NaivePredictor(),
"ar_predictor" : tmm.AR(),
"ma_predictor" : tmm.MA(),
"arma_predictor" : tmm.ARMA(),
"poly_predictor" : tmm.Polynomial(),
}

### Compute the features and the scores of the models for each time series in the benchmark

In [None]:
%%capture --no-stdout

for benchmark_index in range(2,3):
    bench = io.BenchmarkDataset(benchmark_index)

    ## for debug
    #bench.len = 5

    score_dict = mu.compute_predictor_scores(predictor_dict, bench, detector=adm.KSigma())
    score_df = pd.concat([score_dict[model_name].assign(model_name=model_name)
                      for model_name in score_dict.keys()])
    
    score_df.to_csv(f"saved_data/score_df_{benchmark_index}.csv", index_label="ts_index")
    
    features = mu.compute_benchmark_features(bench)
    features.to_csv(f"saved_data/features_{benchmark_index}.csv", index_label="ts_index")

 96%|██████████████████████████████████████████████████▉  | 96/100 [05:11<00:14,  3.70s/it]

### Analyse the results

In [6]:
score_df = pd.read_csv(f"saved_data/score_df_{benchmark_index}.csv", index_col="ts_index")
features = pd.read_csv(f"saved_data/features_{benchmark_index}.csv", index_col="ts_index")

In [7]:
features.head()

Unnamed: 0_level_0,periodicity,trend_score,seasonality_score,nonlinearity,skew,kurtosis,lyapunov
ts_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1.0,1.0,0.0,0.934517,0.083447,-0.719147,-0.000418
1,77.0,-0.077289,-3.66281,1.01455,0.045395,-0.432972,-0.000689
2,157.0,-1.737175,-0.31264,1.014645,0.01155,-0.610619,0.000884
3,35.0,-0.021986,-21.816881,0.368837,0.124895,-0.883236,-0.001314
4,199.0,-0.029197,-3.816528,0.71445,0.101515,-0.827419,-0.000919


In [8]:
score_df = pd.read_csv(f"saved_data/score_df_{benchmark_index}.csv")

In [9]:
score_df

Unnamed: 0,ts_index,bias,mad,mape,mse,sae,f1,recall,model_name
0,0,-3.793342,219.740122,0.364106,94380.15,312250.7,0.4,1.0,naive_predictor
1,1,-2.891046,672.837596,1.509236,722647.7,956102.2,0.285714,1.0,naive_predictor
2,2,-1.933118,323.333956,0.955834,180896.5,459457.6,0.285714,1.0,naive_predictor
3,3,-10.02302,104.670272,0.034638,560503.9,148736.5,0.4,0.333333,naive_predictor
4,4,-5.177551,391.676889,0.404856,382276.9,556572.9,0.4,0.333333,naive_predictor
5,0,-6.063748,193.402067,0.307673,74654.88,274824.3,0.333333,1.0,ar_predictor
6,1,-6.552432,518.378271,1.12609,439742.1,736615.5,0.4,1.0,ar_predictor
7,2,-3.808951,264.172482,0.722112,121832.6,375389.1,0.222222,1.0,ar_predictor
8,3,-9.200717,135.081588,0.184265,462009.9,191950.9,0.315789,0.333333,ar_predictor
9,4,-7.522152,363.278187,0.352766,329140.3,516218.3,0.545455,0.666667,ar_predictor


In [10]:
score_df.groupby("model_name").mean()

Unnamed: 0_level_0,ts_index,bias,mad,mape,mse,sae,f1,recall
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ar_predictor,2.0,-6.6296,294.862519,0.538581,285475.9,418999.6,0.36336,0.8
arma_predictor,2.0,-3.478542,290.879887,0.514362,250308.1,413340.3,0.490303,0.866667
ma_predictor,2.0,0.7619852,507.910396,0.985939,520789.1,721740.7,0.432982,0.8
naive_predictor,2.0,-4.763615,342.451767,0.653734,388141.0,486624.0,0.354286,0.733333
poly_predictor,2.0,2.362798e-12,918.887485,1.658767,1268022.0,1305739.0,1.0,1.0
