In [1]:
from vadetisweb.anomaly_algorithms.detection.cluster import cluster_gaussian_mixture
from vadetisweb.anomaly_algorithms.detection.histogram import histogram
from vadetisweb.anomaly_algorithms.detection.svm import svm
from vadetisweb.anomaly_algorithms.detection.isolation_forest import isolation_forest
from vadetisweb.anomaly_algorithms.detection.lisa import lisa_pearson, lisa_dtw, lisa_geo
from vadetisweb.anomaly_algorithms.detection.robust_pca import robust_pca_huber_loss
from vadetisweb.models import DataSet, TimeSeries

import pprint
pp = pprint.PrettyPrinter(indent=4)

In [1]:
# A2 Multi - Contamination Level
dataset_names = [
    'A2 Yahoo',
    'A2 Yahoo Contamination 100',
    'A2 Yahoo Contamination 150',
    'A2 Yahoo Contamination 200',
    'A2 Yahoo Contamination 250'
]
dimensions = [50, 100, 150, 200, 250]

In [3]:
#input
def get_dataset(title, ts_name=None):
    ts_id = None
    dataset = DataSet.objects.filter(title=title).first()
    training_dataset = dataset.training_dataset.all().first()

    return dataset, training_dataset

In [4]:
def rpca_detection_l(df, df_class, df_train, df_train_class, delta=1, n_components=2, maximize_score='F1-Score', train_size=0.5):
    return robust_pca_huber_loss(df, df_class, df_train, df_train_class, delta=delta, n_components=n_components, maximize_score=maximize_score, train_size=train_size)

In [5]:
def histogram_detection_l(df, df_class, df_train, df_train_class, maximize_score='F1-Score', train_size=0.5):
    return histogram(df, df_class, df_train, df_train_class, maximize_score=maximize_score, train_size=train_size)

In [6]:
def cluster_detection_l(df, df_class, df_train, df_train_class, maximize_score='F1-Score', n_components=3, n_init=3, train_size=0.5):
    return cluster_gaussian_mixture(df, df_class, df_train, df_train_class, maximize_score=maximize_score, n_components=n_components, n_init=n_init, train_size=train_size)

In [7]:
def svm_detection_l(df, df_class, df_train, df_train_class, maximize_score='F1-Score', nu=0.95, kernel='rbf', train_size=0.5):
    return svm(df, df_class, df_train, df_train_class, maximize_score=maximize_score, nu=nu, kernel=kernel, train_size=train_size)

In [8]:
def isolation_forest_detection_l(df, df_class, df_train, df_train_class, maximize_score='F1-Score', n_jobs=-1, bootstrap=False, n_estimators=40, train_size=0.5):
    return isolation_forest(df, df_class, df_train, df_train_class, maximize_score=maximize_score, n_jobs=n_jobs, bootstrap=bootstrap, n_estimators=n_estimators, train_size=train_size)

In [10]:
# Cont Level - RPCA
# Results array [nmi, rmse, f1]

r_results = []
for dataset_name in dataset_names:
    dataset, training_dataset = get_dataset(dataset_name)
    df = dataset.dataframe
    df_class = dataset.dataframe_class
    scores, y_hat_results, df_common_class, info = rpca_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    r_results.append(result)
    
rpca_results = np.array(r_results)   

In [11]:
# Cont Level - HISTOGRAM
# Results array [nmi, rmse, f1]

h_results = []
for dataset_name in dataset_names:
    dataset, training_dataset = get_dataset(dataset_name)
    df = dataset.dataframe
    df_class = dataset.dataframe_class
    scores, y_hat_results, df_common_class, info = histogram_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    h_results.append(result)
    
histogram_results = np.array(h_results)  

In [12]:
# Cont Level - CLUSTER
# Results array [nmi, rmse, f1]

c_results = []
for dataset_name in dataset_names:
    dataset, training_dataset = get_dataset(dataset_name)
    df = dataset.dataframe
    df_class = dataset.dataframe_class
    scores, y_hat_results, df_common_class, info = cluster_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    c_results.append(result)
    
cluster_results = np.array(c_results)  

In [13]:
# Cont Level - SVM
# Results array [nmi, rmse, f1]

s_results = []
for dataset_name in dataset_names:
    dataset, training_dataset = get_dataset(dataset_name)
    df = dataset.dataframe
    df_class = dataset.dataframe_class
    scores, y_hat_results, df_common_class, info = svm_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    s_results.append(result)
    
svm_results = np.array(s_results)    

In [14]:
# Cont Level - ISOLATION FOREST
# Results array [nmi, rmse, f1]

i_results = []
for dataset_name in dataset_names:
    dataset, training_dataset = get_dataset(dataset_name)
    df = dataset.dataframe
    df_class = dataset.dataframe_class
    scores, y_hat_results, df_common_class, info = isolation_forest_detection_l(df, df_class, training_dataset.dataframe, training_dataset.dataframe_class)
    result = [np.round(info['nmi'], 3), np.round(info['rmse'], 3), np.round(info['f1_score'], 3)]
    i_results.append(result)

isolation_results = np.array(i_results)

In [15]:
scores = [{ 'title' : 'RPCA', 'scores' :  rpca_results }, 
          { 'title' : 'Histogram', 'scores' :  histogram_results }, 
          { 'title' : 'Cluster', 'scores' :  cluster_results }, 
          { 'title' : 'SVM', 'scores' :  svm_results }, 
          { 'title' : 'Isolation Forest', 'scores' :  isolation_results }
         ]

In [16]:
print(scores)

[{'title': 'RPCA', 'scores': array([[0.114, 0.106, 0.348],
       [0.24 , 0.117, 0.579],
       [0.316, 0.125, 0.69 ],
       [0.355, 0.124, 0.733],
       [0.434, 0.107, 0.791]])}, {'title': 'Histogram', 'scores': array([[0.002, 0.896, 0.104],
       [0.001, 0.799, 0.214],
       [0.002, 0.748, 0.309],
       [0.007, 0.73 , 0.341],
       [0.003, 0.697, 0.397]])}, {'title': 'Cluster', 'scores': array([[0.897, 0.004, 0.959],
       [0.79 , 0.02 , 0.914],
       [0.827, 0.021, 0.943],
       [0.839, 0.021, 0.951],
       [0.706, 0.052, 0.891]])}, {'title': 'SVM', 'scores': array([[0.052, 0.089, 0.241],
       [0.019, 0.155, 0.191],
       [0.034, 0.201, 0.235],
       [0.079, 0.208, 0.345],
       [0.06 , 0.246, 0.314]])}, {'title': 'Isolation Forest', 'scores': array([[0.042, 0.507, 0.163],
       [0.091, 0.468, 0.333],
       [0.049, 0.466, 0.406],
       [0.073, 0.43 , 0.471],
       [0.062, 0.424, 0.504]])}]


In [2]:
# import numpy as np
# scores = [{'title': 'RPCA', 'scores': np.array([[0.114, 0.106, 0.348],
#        [0.24 , 0.117, 0.579],
#        [0.316, 0.125, 0.69 ],
#        [0.355, 0.124, 0.733],
#        [0.434, 0.107, 0.791]])}, {'title': 'Histogram', 'scores': np.array([[0.002, 0.896, 0.104],
#        [0.001, 0.799, 0.214],
#        [0.002, 0.748, 0.309],
#        [0.007, 0.73 , 0.341],
#        [0.003, 0.697, 0.397]])}, {'title': 'Cluster', 'scores': np.array([[0.897, 0.004, 0.959],
#        [0.79 , 0.02 , 0.914],
#        [0.827, 0.021, 0.943],
#        [0.839, 0.021, 0.951],
#        [0.706, 0.052, 0.891]])}, {'title': 'SVM', 'scores': np.array([[0.052, 0.089, 0.241],
#        [0.019, 0.155, 0.191],
#        [0.034, 0.201, 0.235],
#        [0.079, 0.208, 0.345],
#        [0.06 , 0.246, 0.314]])}, {'title': 'Isolation Forest', 'scores': np.array([[0.042, 0.507, 0.163],
#        [0.091, 0.468, 0.333],
#        [0.049, 0.466, 0.406],
#        [0.073, 0.43 , 0.471],
#        [0.062, 0.424, 0.504]])}]

In [3]:
#NMI
results = []
for score in scores:
    res = []
    values = score['scores']
    for i in range(len(dimensions)):
        dim = dimensions[i]
        val = values[i][0]
        res.append((dim, val))
    results.append({'title' : score['title'], 'plotdata' : res}) 

In [4]:
pp.pprint(results)

[   {   'plotdata': [   (50, 0.114),
                        (100, 0.24),
                        (150, 0.316),
                        (200, 0.355),
                        (250, 0.434)],
        'title': 'RPCA'},
    {   'plotdata': [   (50, 0.002),
                        (100, 0.001),
                        (150, 0.002),
                        (200, 0.007),
                        (250, 0.003)],
        'title': 'Histogram'},
    {   'plotdata': [   (50, 0.897),
                        (100, 0.79),
                        (150, 0.827),
                        (200, 0.839),
                        (250, 0.706)],
        'title': 'Cluster'},
    {   'plotdata': [   (50, 0.052),
                        (100, 0.019),
                        (150, 0.034),
                        (200, 0.079),
                        (250, 0.06)],
        'title': 'SVM'},
    {   'plotdata': [   (50, 0.042),
                        (100, 0.091),
                        (150, 0.049),
                   

In [5]:
#RMSE
results = []
for score in scores:
    res = []
    values = score['scores']
    for i in range(len(dimensions)):
        dim = dimensions[i]
        val = values[i][1]
        res.append((dim, val))
    results.append({'title' : score['title'], 'plotdata' : res}) 

In [6]:
pp.pprint(results)

[   {   'plotdata': [   (50, 0.106),
                        (100, 0.117),
                        (150, 0.125),
                        (200, 0.124),
                        (250, 0.107)],
        'title': 'RPCA'},
    {   'plotdata': [   (50, 0.896),
                        (100, 0.799),
                        (150, 0.748),
                        (200, 0.73),
                        (250, 0.697)],
        'title': 'Histogram'},
    {   'plotdata': [   (50, 0.004),
                        (100, 0.02),
                        (150, 0.021),
                        (200, 0.021),
                        (250, 0.052)],
        'title': 'Cluster'},
    {   'plotdata': [   (50, 0.089),
                        (100, 0.155),
                        (150, 0.201),
                        (200, 0.208),
                        (250, 0.246)],
        'title': 'SVM'},
    {   'plotdata': [   (50, 0.507),
                        (100, 0.468),
                        (150, 0.466),
                  

In [9]:
#F1
results = []
for score in scores:
    res = []
    values = score['scores']
    for i in range(len(dimensions)):
        dim = dimensions[i]
        val = values[i][2]
        res.append((dim, val))
    results.append({'title' : score['title'], 'plotdata' : res}) 

In [10]:
pp.pprint(results)

[   {   'plotdata': [   (50, 0.348),
                        (100, 0.579),
                        (150, 0.69),
                        (200, 0.733),
                        (250, 0.791)],
        'title': 'RPCA'},
    {   'plotdata': [   (50, 0.104),
                        (100, 0.214),
                        (150, 0.309),
                        (200, 0.341),
                        (250, 0.397)],
        'title': 'Histogram'},
    {   'plotdata': [   (50, 0.959),
                        (100, 0.914),
                        (150, 0.943),
                        (200, 0.951),
                        (250, 0.891)],
        'title': 'Cluster'},
    {   'plotdata': [   (50, 0.241),
                        (100, 0.191),
                        (150, 0.235),
                        (200, 0.345),
                        (250, 0.314)],
        'title': 'SVM'},
    {   'plotdata': [   (50, 0.163),
                        (100, 0.333),
                        (150, 0.406),
                 