In [1]:
from vadetisweb.anomaly_algorithms.detection.cluster import cluster_gaussian_mixture
from vadetisweb.anomaly_algorithms.detection.histogram import histogram
from vadetisweb.anomaly_algorithms.detection.svm import svm
from vadetisweb.anomaly_algorithms.detection.isolation_forest import isolation_forest
from vadetisweb.anomaly_algorithms.detection.lisa import lisa_pearson, lisa_dtw, lisa_geo
from vadetisweb.anomaly_algorithms.detection.robust_pca import robust_pca_huber_loss
from vadetisweb.models import DataSet, TimeSeries

import numpy as np
import pprint
pp = pprint.PrettyPrinter(indent=4)

In [1]:
# Temp1 Multi - TS LENGTH
dataset_name = 'Temperature TS8 Multi'
dimensions = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]

In [3]:
#input
def get_dataset(title, ts_name=None):
    ts_id = None
    dataset = DataSet.objects.filter(title=title).first()
    training_dataset = dataset.training_dataset.all().first()

    return dataset, training_dataset

In [4]:
def rpca_detection_l(df, df_class, df_train, df_train_class, delta=1, n_components=2, maximize_score='F1-Score', train_size=0.5):
    return robust_pca_huber_loss(df, df_class, df_train, df_train_class, delta=delta, n_components=n_components, maximize_score=maximize_score, train_size=train_size)

In [5]:
def histogram_detection_l(df, df_class, df_train, df_train_class, maximize_score='F1-Score', train_size=0.5):
    return histogram(df, df_class, df_train, df_train_class, maximize_score=maximize_score, train_size=train_size)

In [6]:
def cluster_detection_l(df, df_class, df_train, df_train_class, maximize_score='F1-Score', n_components=3, n_init=3, train_size=0.5):
    return cluster_gaussian_mixture(df, df_class, df_train, df_train_class, maximize_score=maximize_score, n_components=n_components, n_init=n_init, train_size=train_size)

In [7]:
def svm_detection_l(df, df_class, df_train, df_train_class, maximize_score='F1-Score', nu=0.95, kernel='rbf', train_size=0.5):
    return svm(df, df_class, df_train, df_train_class, maximize_score=maximize_score, nu=nu, kernel=kernel, train_size=train_size)

In [8]:
def isolation_forest_detection_l(df, df_class, df_train, df_train_class, maximize_score='F1-Score', n_jobs=-1, bootstrap=False, n_estimators=40, train_size=0.5):
    return isolation_forest(df, df_class, df_train, df_train_class, maximize_score=maximize_score, n_jobs=n_jobs, bootstrap=bootstrap, n_estimators=n_estimators, train_size=train_size)

In [10]:
# TS LENGTH - RPCA
dataset, training_dataset = get_dataset(dataset_name)

# Results array [nmi, rmse, f1]
r_results = []
for l in ts_lengths:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    scores, y_hat_results, df_common_class, info = rpca_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    r_results.append(result)
    
rpca_results = np.array(r_results)   

In [11]:
# TS LENGTH - HISTOGRAM
dataset, training_dataset = get_dataset(dataset_name)

# Results array [nmi, rmse, f1]
h_results = []
for l in ts_lengths:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    scores, y_hat_results, df_common_class, info = histogram_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    h_results.append(result)
    
histogram_results = np.array(h_results)    

In [12]:
# TS LENGTH - CLUSTER
dataset, training_dataset = get_dataset(dataset_name)

# Results array [nmi, rmse, f1]
c_results = []
for l in ts_lengths:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    scores, y_hat_results, df_common_class, info = cluster_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    c_results.append(result)
    
cluster_results = np.array(c_results)    

In [13]:
# TS LENGTH - SVM
dataset, training_dataset = get_dataset(dataset_name)

# Results array [nmi, rmse, f1]
s_results = []
for l in ts_lengths:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    scores, y_hat_results, df_common_class, info = svm_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    s_results.append(result)
    
svm_results = np.array(s_results)    

In [14]:
# TS LENGTH - ISOLATION FOREST
dataset, training_dataset = get_dataset(dataset_name)

# Results array [nmi, rmse, f1]
i_results = []
for l in ts_lengths:
    df = dataset.dataframe[0:l]
    df_class = dataset.dataframe_class[0:l]
    scores, y_hat_results, df_common_class, info = isolation_forest_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    i_results.append(result)

isolation_results = np.array(i_results)

In [15]:
scores = [{ 'title' : 'RPCA', 'scores' :  rpca_results }, 
          { 'title' : 'Histogram', 'scores' :  histogram_results }, 
          { 'title' : 'Cluster', 'scores' :  cluster_results }, 
          { 'title' : 'SVM', 'scores' :  svm_results }, 
          { 'title' : 'Isolation Forest', 'scores' :  isolation_results }
         ]

In [16]:
print(scores)

[{'title': 'RPCA', 'scores': array([[0.092, 0.56 , 0.263],
       [0.082, 0.445, 0.183],
       [0.053, 0.537, 0.13 ],
       [0.051, 0.475, 0.112],
       [0.132, 0.28 , 0.247],
       [0.579, 0.018, 0.718],
       [0.559, 0.019, 0.698],
       [0.664, 0.016, 0.8  ],
       [0.586, 0.023, 0.734],
       [0.59 , 0.021, 0.734]])}, {'title': 'Histogram', 'scores': array([[0.048, 0.26 , 0.   ],
       [0.032, 0.245, 0.   ],
       [0.025, 0.217, 0.   ],
       [0.021, 0.218, 0.   ],
       [0.025, 0.196, 0.   ],
       [0.02 , 0.16 , 0.   ],
       [0.008, 0.164, 0.017],
       [0.009, 0.201, 0.024],
       [0.027, 0.104, 0.19 ],
       [0.025, 0.101, 0.179]])}, {'title': 'Cluster', 'scores': array([[0.247, 0.24 , 0.455],
       [0.207, 0.185, 0.351],
       [0.485, 0.043, 0.649],
       [0.463, 0.038, 0.615],
       [0.68 , 0.02 , 0.821],
       [0.366, 0.028, 0.485],
       [0.369, 0.027, 0.486],
       [0.533, 0.024, 0.678],
       [0.431, 0.033, 0.571],
       [0.434, 0.03 , 0.571]])}

In [2]:
# import numpy as np
# scores = [{'title': 'RPCA', 'scores': np.array([[0.092, 0.56 , 0.263],
#        [0.082, 0.445, 0.183],
#        [0.053, 0.537, 0.13 ],
#        [0.051, 0.475, 0.112],
#        [0.132, 0.28 , 0.247],
#        [0.579, 0.018, 0.718],
#        [0.559, 0.019, 0.698],
#        [0.664, 0.016, 0.8  ],
#        [0.586, 0.023, 0.734],
#        [0.59 , 0.021, 0.734]])}, {'title': 'Histogram', 'scores': np.array([[0.048, 0.26 , 0.   ],
#        [0.032, 0.245, 0.   ],
#        [0.025, 0.217, 0.   ],
#        [0.021, 0.218, 0.   ],
#        [0.025, 0.196, 0.   ],
#        [0.02 , 0.16 , 0.   ],
#        [0.008, 0.164, 0.017],
#        [0.009, 0.201, 0.024],
#        [0.027, 0.104, 0.19 ],
#        [0.025, 0.101, 0.179]])}, {'title': 'Cluster', 'scores': np.array([[0.247, 0.24 , 0.455],
#        [0.207, 0.185, 0.351],
#        [0.485, 0.043, 0.649],
#        [0.463, 0.038, 0.615],
#        [0.68 , 0.02 , 0.821],
#        [0.366, 0.028, 0.485],
#        [0.369, 0.027, 0.486],
#        [0.533, 0.024, 0.678],
#        [0.431, 0.033, 0.571],
#        [0.434, 0.03 , 0.571]])}, {'title': 'SVM', 'scores': np.array([[0.407, 0.12 , 0.625],
#        [0.343, 0.095, 0.513],
#        [0.325, 0.067, 0.524],
#        [0.262, 0.072, 0.431],
#        [0.847, 0.006, 0.93 ],
#        [0.057, 0.04 , 0.077],
#        [0.099, 0.037, 0.133],
#        [0.374, 0.032, 0.5  ],
#        [0.193, 0.064, 0.453],
#        [0.184, 0.062, 0.436]])}, {'title': 'Isolation Forest', 'scores': np.array([[0.388, 0.13 , 0.606],
#        [0.189, 0.205, 0.328],
#        [0.144, 0.177, 0.293],
#        [0.061, 0.305, 0.153],
#        [0.297, 0.094, 0.484],
#        [0.342, 0.072, 0.527],
#        [0.253, 0.089, 0.446],
#        [0.2  , 0.146, 0.381],
#        [0.028, 0.118, 0.197],
#        [0.024, 0.119, 0.179]])}]

In [5]:
#NMI
results = []
for score in scores:
    res = []
    values = score['scores']
    for i in range(len(dimensions)):
        dim = int(dimensions[i]/10)
        val = values[i][0]
        res.append((dim, val))
    results.append({'title' : score['title'], 'plotdata' : res}) 

In [6]:
pp.pprint(results)

[   {   'plotdata': [   (10, 0.092),
                        (20, 0.082),
                        (30, 0.053),
                        (40, 0.051),
                        (50, 0.132),
                        (60, 0.579),
                        (70, 0.559),
                        (80, 0.664),
                        (90, 0.586),
                        (100, 0.59)],
        'title': 'RPCA'},
    {   'plotdata': [   (10, 0.048),
                        (20, 0.032),
                        (30, 0.025),
                        (40, 0.021),
                        (50, 0.025),
                        (60, 0.02),
                        (70, 0.008),
                        (80, 0.009),
                        (90, 0.027),
                        (100, 0.025)],
        'title': 'Histogram'},
    {   'plotdata': [   (10, 0.247),
                        (20, 0.207),
                        (30, 0.485),
                        (40, 0.463),
                        (50, 0.68),
                 

In [7]:
#RMSE
results = []
for score in scores:
    res = []
    values = score['scores']
    for i in range(len(dimensions)):
        dim = int(dimensions[i]/10)
        val = values[i][1]
        res.append((dim, val))
    results.append({'title' : score['title'], 'plotdata' : res}) 

In [8]:
pp.pprint(results)

[   {   'plotdata': [   (10, 0.56),
                        (20, 0.445),
                        (30, 0.537),
                        (40, 0.475),
                        (50, 0.28),
                        (60, 0.018),
                        (70, 0.019),
                        (80, 0.016),
                        (90, 0.023),
                        (100, 0.021)],
        'title': 'RPCA'},
    {   'plotdata': [   (10, 0.26),
                        (20, 0.245),
                        (30, 0.217),
                        (40, 0.218),
                        (50, 0.196),
                        (60, 0.16),
                        (70, 0.164),
                        (80, 0.201),
                        (90, 0.104),
                        (100, 0.101)],
        'title': 'Histogram'},
    {   'plotdata': [   (10, 0.24),
                        (20, 0.185),
                        (30, 0.043),
                        (40, 0.038),
                        (50, 0.02),
                    

In [9]:
#F1
results = []
for score in scores:
    res = []
    values = score['scores']
    for i in range(len(dimensions)):
        dim = int(dimensions[i]/10)
        val = values[i][2]
        res.append((dim, val))
    results.append({'title' : score['title'], 'plotdata' : res}) 

In [10]:
pp.pprint(results)

[   {   'plotdata': [   (10, 0.263),
                        (20, 0.183),
                        (30, 0.13),
                        (40, 0.112),
                        (50, 0.247),
                        (60, 0.718),
                        (70, 0.698),
                        (80, 0.8),
                        (90, 0.734),
                        (100, 0.734)],
        'title': 'RPCA'},
    {   'plotdata': [   (10, 0.0),
                        (20, 0.0),
                        (30, 0.0),
                        (40, 0.0),
                        (50, 0.0),
                        (60, 0.0),
                        (70, 0.017),
                        (80, 0.024),
                        (90, 0.19),
                        (100, 0.179)],
        'title': 'Histogram'},
    {   'plotdata': [   (10, 0.455),
                        (20, 0.351),
                        (30, 0.649),
                        (40, 0.615),
                        (50, 0.821),
                        (60, 0