# Synthetic Test
Given a synthetic signal called 'my_data', the folder `custom_data` should contain two files:
1. `custom_data/my_data.csv` for the timeseries
2. `custom_data/my_data_truth.csv` for the anomalies.

The following example showcases `my_data = synthetic_5`

In [2]:
from time import time
from tqdm import tqdm 
import site
import sys
import pandas as pd 
import numpy as np
import ast 
import matplotlib.pyplot as plt

site.addsitedir('Orion/')
site.addsitedir('MLPrimitives/')
site.addsitedir('mSSA/')


from orion.benchmark import benchmark, _summarize_results_datasets
from Orion.orion.evaluation import CONTEXTUAL_METRICS as METRICS
from Orion.orion.evaluation import contextual_confusion_matrix
from functools import partial

In [11]:
if ('accuracy' in METRICS): del METRICS['accuracy']
METRICS['confusion_matrix'] = contextual_confusion_matrix
metrics = {k: partial(fun, weighted=False) for k, fun in METRICS.items()}

def make_hyperparams(datasets, rank):
    hyperparams = {}
    rank_dict = {"mssa": {"orion.primitives.mssa.mSSATAD#1": {"rank": rank}}}
    for k in datasets:
        hyperparams[k] = rank_dict
    return hyperparams


score_dataframes = []
summary_dataframes = []

In [13]:
%%capture

ranks = [None] + list(range(1,10))
datasets = {
    'custom': ['synthetic_5']
}

for rank in ranks:
    pipelines = ['mssa']
    data = datasets
    for k, v in data.items():
        print(len(v))
    print(data)
    hyperparameters = make_hyperparams(data, rank)
    scores = benchmark(pipelines=pipelines, datasets=data, metrics=metrics, rank='f1', hyperparameters=hyperparameters, detrend=True)
    scores['rank'] = rank
    score_dataframes.append(scores)
    scores['confusion_matrix'] = [str(x) for x in scores['confusion_matrix']]
    
    score_summary = _summarize_results_datasets(scores, metrics)
    score_summary['rank'] = rank
    summary_dataframes.append(score_summary)


pd.concat(score_dataframes, ignore_index=True).to_pickle("mssa_synthetic_scores.pkl")
pd.concat(summary_dataframes, ignore_index=True).to_pickle("mssa_synthetic_summaries.pkl")



In [14]:
scores = pd.read_pickle('mssa_synthetic_scores.pkl')
summaries = pd.read_pickle('mssa_synthetic_summaries.pkl')

In [15]:
scores

Unnamed: 0,pipeline,rank,f1,recall,precision,confusion_matrix,status,elapsed,split,dataset,signal,fp,fn,tp
0,mssa,,0.0,0.0,0.0,"(0, 3, 0)",0,1.218059,False,custom,synthetic_5,0,3,0
1,mssa,1.0,1.0,1.0,1.0,"(0, 0, 3)",0,0.930442,False,custom,synthetic_5,0,0,3
2,mssa,2.0,0.0,0.0,0.0,"(0, 3, 0)",0,0.394485,False,custom,synthetic_5,0,3,0
3,mssa,3.0,0.5,0.333333,1.0,"(0, 2, 1)",0,0.637397,False,custom,synthetic_5,0,2,1
4,mssa,4.0,0.8,0.666667,1.0,"(0, 1, 2)",0,0.78246,False,custom,synthetic_5,0,1,2
5,mssa,5.0,0.0,0.0,0.0,"(0, 3, 0)",0,1.931797,False,custom,synthetic_5,0,3,0
6,mssa,6.0,0.0,0.0,0.0,"(0, 3, 0)",0,1.330421,False,custom,synthetic_5,0,3,0
7,mssa,7.0,0.0,0.0,0.0,"(1, 3, 0)",0,0.920709,False,custom,synthetic_5,1,3,0
8,mssa,8.0,0.0,0.0,0.0,"(1, 3, 0)",0,0.964817,False,custom,synthetic_5,1,3,0
9,mssa,9.0,0.0,0.0,0.0,"(1, 3, 0)",0,0.862359,False,custom,synthetic_5,1,3,0


In [16]:
summaries

Unnamed: 0,dataset,pipeline,fp,fn,tp,f1,rank
0,custom,mssa,0,3,0,,
1,custom,mssa,0,0,3,1.0,1.0
2,custom,mssa,0,3,0,,2.0
3,custom,mssa,0,2,1,0.5,3.0
4,custom,mssa,0,1,2,0.8,4.0
5,custom,mssa,0,3,0,,5.0
6,custom,mssa,0,3,0,,6.0
7,custom,mssa,1,3,0,0.0,7.0
8,custom,mssa,1,3,0,0.0,8.0
9,custom,mssa,1,3,0,0.0,9.0
