In [1]:
from vadetisweb.anomaly_algorithms.detection.cluster import cluster_gaussian_mixture
from vadetisweb.anomaly_algorithms.detection.histogram import histogram
from vadetisweb.anomaly_algorithms.detection.svm import svm
from vadetisweb.anomaly_algorithms.detection.isolation_forest import isolation_forest
from vadetisweb.anomaly_algorithms.detection.lisa import lisa_pearson, lisa_dtw, lisa_geo
from vadetisweb.anomaly_algorithms.detection.robust_pca import robust_pca_huber_loss
from vadetisweb.models import DataSet, TimeSeries

import numpy as np
import pprint
pp = pprint.PrettyPrinter(indent=4)

In [None]:
# Temp1 Single - TS LENGTH
dataset_name = 'Temperature TS14'
ts_name = 'REH'
dimensions = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]

In [3]:
#input
def get_dataset(title, ts_name=None):
    ts_id = None
    dataset = DataSet.objects.filter(title=title).first()
    training_dataset = dataset.training_dataset.all().first()
    if ts_name is not None:
        ts_id = dataset.timeseries_set.all().filter(name=ts_name).first().id

    return dataset, training_dataset, ts_id

In [4]:
def lisa_pearson_detection_time_it(df, df_class, time_series_id, maximize_score='F1-Score', window_size=10):
    result = %timeit -o lisa_pearson(df, df_class, time_series_id, maximize_score=maximize_score, window_size=window_size)
    return result

In [5]:
def lisa_dtw_detection_time_it(df, df_class, time_series_id, maximize_score='F1-Score', window_size=10):
    result = %timeit -o lisa_dtw(df, df_class, time_series_id, maximize_score=maximize_score, window_size=window_size, distance_function='euclidean')
    return result

In [6]:
def lisa_geo_detection_time_it(df, df_class, time_series_id, maximize_score='F1-Score'):
    result = %timeit -o lisa_geo(df, df_class, time_series_id, maximize_score=maximize_score)
    return result

In [7]:
def rpca_detection_time_it(df, df_class, df_train, df_train_class, delta=1, n_components=2, maximize_score='F1-Score', train_size=0.5):
    result = %timeit -o robust_pca_huber_loss(df, df_class, df_train, df_train_class, delta=delta, n_components=n_components, maximize_score=maximize_score, train_size=train_size)
    return result

In [8]:
def histogram_detection_time_it(df, df_class, df_train, df_train_class, maximize_score='F1-Score', train_size=0.5):
    result = %timeit -o histogram(df, df_class, df_train, df_train_class, maximize_score=maximize_score, train_size=train_size)
    return result

In [9]:
def cluster_detection_time_it(df, df_class, df_train, df_train_class, maximize_score='F1-Score', n_components=3, n_init=3, train_size=0.5):
    result = %timeit -o cluster_gaussian_mixture(df, df_class, df_train, df_train_class, maximize_score=maximize_score, n_components=n_components, n_init=n_init, train_size=train_size)
    return result

In [10]:
def svm_detection_time_it(df, df_class, df_train, df_train_class, maximize_score='F1-Score', nu=0.95, kernel='rbf', train_size=0.5):
    result = %timeit -o svm(df, df_class, df_train, df_train_class, maximize_score=maximize_score, nu=nu, kernel=kernel, train_size=train_size)
    return result

In [11]:
def isolation_forest_detection_time_it(df, df_class, df_train, df_train_class, maximize_score='F1-Score', n_jobs=-1, bootstrap=False, n_estimators=40, train_size=0.5):
    result = %timeit -o isolation_forest(df, df_class, df_train, df_train_class, maximize_score=maximize_score, n_jobs=n_jobs, bootstrap=bootstrap, n_estimators=n_estimators, train_size=train_size)
    return result

In [13]:
# TIME TS LENGTH - Pearson
dataset, _, ts_id = get_dataset(dataset_name, ts_name)
lisa_pearson_results = []
for l in dimensions:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    
    r = lisa_pearson_detection_time_it(df, df_class, ts_id)
    lisa_pearson_results.append(np.round(np.average(r.all_runs), 2))

1.3 s ± 16.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1.48 s ± 18.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1.64 s ± 25.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1.72 s ± 12 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1.97 s ± 26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.08 s ± 44.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.31 s ± 61.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.43 s ± 2.23 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.6 s ± 2.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.86 s ± 61.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
# TIME TS LENGTH - DTW
dataset, _, ts_id = get_dataset(dataset_name, ts_name)
lisa_dtw_results = []
for l in dimensions:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    
    r = lisa_dtw_detection_time_it(df, df_class, ts_id)
    lisa_dtw_results.append(np.round(np.average(r.all_runs), 2))

1.98 s ± 52.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
3.05 s ± 68.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
4.18 s ± 95.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
5.27 s ± 72.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
6.2 s ± 161 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
7.2 s ± 81.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
8.39 s ± 133 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
9.82 s ± 126 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
10.8 s ± 226 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
11.8 s ± 172 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
# TIME TS LENGTH - LISA VANILLA
dataset, _, ts_id = get_dataset(dataset_name, ts_name)
lisa_geo_results = []
for l in dimensions:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    
    r = lisa_geo_detection_time_it(df, df_class, ts_id)
    lisa_geo_results.append(np.round(np.average(r.all_runs), 2))

1.65 s ± 18.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1.82 s ± 17.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2 s ± 23.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.17 s ± 15.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.37 s ± 23.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.53 s ± 29.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.73 s ± 24.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.92 s ± 11.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
3.05 s ± 30.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
3.23 s ± 36.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
# TIME TS LENGTH - RPCA
dataset, training_dataset, _ = get_dataset(dataset_name)
rpca_results = []
for l in dimensions:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    
    r = rpca_detection_time_it(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    rpca_results.append(np.round(np.average(r.all_runs), 2))

2.37 s ± 52.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.58 s ± 40.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.5 s ± 115 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.54 s ± 81.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.51 s ± 17.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.62 s ± 62.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.64 s ± 78.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.66 s ± 87.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.7 s ± 65.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.78 s ± 59.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
# TIME TS LENGTH - HISTOGRAM
dataset, training_dataset, _ = get_dataset(dataset_name)
histogram_results = []
for l in dimensions:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    
    r = histogram_detection_time_it(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    histogram_results.append(np.round(np.average(r.all_runs), 2))

2.41 s ± 75.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.46 s ± 43.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.49 s ± 56.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.58 s ± 22.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.56 s ± 72 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.63 s ± 70.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.68 s ± 71.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.67 s ± 54.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.69 s ± 39.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.7 s ± 1.69 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
# TIME TS LENGTH - CLUSTER
dataset, training_dataset, _ = get_dataset(dataset_name)
cluster_results = []
for l in dimensions:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    
    r = cluster_detection_time_it(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    cluster_results.append(np.round(np.average(r.all_runs), 2))

2.52 s ± 82.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.56 s ± 59.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.62 s ± 81.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.76 s ± 152 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.8 s ± 109 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.77 s ± 77.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.73 s ± 102 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.74 s ± 59.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.78 s ± 77.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.84 s ± 70.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [19]:
# TIME TS LENGTH - SVM
dataset, training_dataset, _ = get_dataset(dataset_name)
svm_results = []
for l in dimensions:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    
    r = svm_detection_time_it(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    svm_results.append(np.round(np.average(r.all_runs), 2))

2.39 s ± 48.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.37 s ± 81.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.43 s ± 50.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.39 s ± 34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.41 s ± 842 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.58 s ± 65 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.55 s ± 1.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.6 s ± 6.23 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.66 s ± 42.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.75 s ± 48.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [20]:
# TIME TS LENGTH - ISOLATION FOREST
dataset, training_dataset, _ = get_dataset(dataset_name)
isolation_results = []
for l in dimensions:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    
    r = isolation_forest_detection_time_it(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    isolation_results.append(np.round(np.average(r.all_runs), 2))

2.66 s ± 25.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.71 s ± 25.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.77 s ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.75 s ± 48.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.84 s ± 29.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.9 s ± 23.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.94 s ± 28.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2.94 s ± 49.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
3 s ± 47.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
3.03 s ± 57.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [21]:
scores = [{ 'title' : 'LISA (Pearson)', 'scores' : lisa_pearson_results }, 
          { 'title' : 'LISA (DTW)', 'scores' : lisa_dtw_results }, 
          { 'title' : 'LISA (Vanilla)', 'scores' :  lisa_geo_results }, 
          { 'title' : 'RPCA', 'scores' :  rpca_results }, 
          { 'title' : 'Histogram', 'scores' :  histogram_results }, 
          { 'title' : 'Cluster', 'scores' :  cluster_results }, 
          { 'title' : 'SVM', 'scores' :  svm_results }, 
          { 'title' : 'Isolation Forest', 'scores' :  isolation_results }
         ]

In [22]:
print(scores)

[{'title': 'LISA (Pearson)', 'scores': [1.3, 1.48, 1.64, 1.72, 1.97, 2.08, 2.31, 2.43, 2.6, 2.86]}, {'title': 'LISA (DTW)', 'scores': [1.98, 3.05, 4.18, 5.27, 6.2, 7.2, 8.39, 9.82, 10.79, 11.77]}, {'title': 'LISA (Vanilla)', 'scores': [1.65, 1.82, 2.0, 2.17, 2.37, 2.53, 2.73, 2.92, 3.05, 3.23]}, {'title': 'RPCA', 'scores': [2.37, 2.58, 2.5, 2.54, 2.51, 2.62, 2.64, 2.66, 2.7, 2.78]}, {'title': 'Histogram', 'scores': [2.41, 2.46, 2.49, 2.58, 2.56, 2.63, 2.68, 2.67, 2.69, 2.7]}, {'title': 'Cluster', 'scores': [2.52, 2.56, 2.62, 2.76, 2.8, 2.77, 2.73, 2.74, 2.78, 2.84]}, {'title': 'SVM', 'scores': [2.39, 2.37, 2.43, 2.39, 2.41, 2.58, 2.55, 2.6, 2.66, 2.75]}, {'title': 'Isolation Forest', 'scores': [2.66, 2.71, 2.77, 2.75, 2.84, 2.9, 2.94, 2.94, 3.0, 3.03]}]


In [3]:
# import numpy as np
# scores = [{'title': 'LISA (Pearson)', 'scores': [1.3, 1.48, 1.64, 1.72, 1.97, 2.08, 2.31, 2.43, 2.6, 2.86]}, 
#           {'title': 'LISA (DTW)', 'scores': [1.98, 3.05, 4.18, 5.27, 6.2, 7.2, 8.39, 9.82, 10.79, 11.77]}, 
#           {'title': 'LISA (Vanilla)', 'scores': [1.65, 1.82, 2.0, 2.17, 2.37, 2.53, 2.73, 2.92, 3.05, 3.23]}, 
#           {'title': 'RPCA', 'scores': [2.37, 2.58, 2.5, 2.54, 2.51, 2.62, 2.64, 2.66, 2.7, 2.78]}, 
#           {'title': 'Histogram', 'scores': [2.41, 2.46, 2.49, 2.58, 2.56, 2.63, 2.68, 2.67, 2.69, 2.7]}, 
#           {'title': 'Cluster', 'scores': [2.52, 2.56, 2.62, 2.76, 2.8, 2.77, 2.73, 2.74, 2.78, 2.84]}, 
#           {'title': 'SVM', 'scores': [2.39, 2.37, 2.43, 2.39, 2.41, 2.58, 2.55, 2.6, 2.66, 2.75]}, 
#           {'title': 'Isolation Forest', 'scores': [2.66, 2.71, 2.77, 2.75, 2.84, 2.9, 2.94, 2.94, 3.0, 3.03]}]


In [4]:
results = []
for score in scores:
    res = []
    values = score['scores']
    for i in range(len(dimensions)):
        dim = int(dimensions[i] / 10)
        val = int(values[i]*1000)
        res.append((dim, val))
    results.append({'title' : score['title'], 'plotdata' : res})    

In [5]:
pp.pprint(results)

[   {   'plotdata': [   (10, 1300),
                        (20, 1480),
                        (30, 1640),
                        (40, 1720),
                        (50, 1970),
                        (60, 2080),
                        (70, 2310),
                        (80, 2430),
                        (90, 2600),
                        (100, 2860)],
        'title': 'LISA (Pearson)'},
    {   'plotdata': [   (10, 1980),
                        (20, 3050),
                        (30, 4180),
                        (40, 5270),
                        (50, 6200),
                        (60, 7200),
                        (70, 8390),
                        (80, 9820),
                        (90, 10790),
                        (100, 11770)],
        'title': 'LISA (DTW)'},
    {   'plotdata': [   (10, 1650),
                        (20, 1820),
                        (30, 2000),
                        (40, 2170),
                        (50, 2370),
                        (6