In [1]:
import random
import numpy as np
import pandas as pd


from evaluator import evaluate

from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
from sklearn.svm import OneClassSVM as OC_SVM

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
# tf.random.set_seed(0)

### Yahoo S5

In [2]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [3]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader()
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        clf = OC_SVM().fit(X_train)
        # 1 -> 0 for inliers, -1 -> 1 for outliers.
        X_test_rec = -clf.decision_function(X_test)
        X_test_rec = X_test_rec.reshape(y_tests[i].shape)

        
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=False)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/67 [00:00<?, ?it/s]

yahoo_A1 0.3636363272727291 0.0 0.6438678921830517
yahoo_A1 0.999999942307695 0.0 0.4282296616752487
yahoo_A1 0.9729729177501851 0.110763219695074 0.7855774100845188
yahoo_A1 0.9999999000000052 0.0 0.477855453851317
yahoo_A1 0.9090908429752097 0.6787907644060283 0.509019599239708
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.999999930000003 0.7999999503333362 0.5399060923615245
yahoo_A1 0.9999999166666703 0.0 0.54482756792073
yahoo_A1 0.9444443936771287 0.9367312086314072 0.5445296199974236
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.9276595237229543 0.939422892403123 0.5129757294461055
yahoo_A1 0.9333332773333359 0.08379448662263521 0.8242469826016497
yahoo_A1 0.3636363272727291 0.1327590829365982 0.16626698452358818
yahoo_A1 0.9230768591716004 0.0 0.5619047524058958
yahoo_A1 0.9999999484127009 0.2539682531000226 0.559228649096019
yahoo_A1 0.9999999250000032 0.7499999447916703 0.45023695546202946
yahoo_A1 0.999999943333336 0.0 0.5106382943473669
yahoo_A1 0.999999947674421 0.0 0.6098191198712684
yahoo_A1 0.75

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.28571425306122533 0.008462445160613953 0.5021753596687958
yahoo_A3 0.7999999200000041 0.3904761520680306 0.4770458921868842
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.8888888197530895 0.6076855226707233 0.45991983038879375
yahoo_A3 0.019417473786407864 0.005133690600401427 0.23007966972508434
yahoo_A3 0.02352940943021934 0.0072558148119338615 0.2535140519487589
yahoo_A3 0.07692305591716472 0.011390086222512565 0.46373917273955134
yahoo_A3 0.4999999375000028 0.0 0.3186959307636228
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.33333329444444565 0.010354239881166242 0.5731462810073856
yahoo_A3 0.33333329444444565 0.031925821780094406 0.42645289719710366
yahoo_A3 0.33333329444444565 0.013543945955639554 0.555110209227433
yahoo_A3 0.02941176172145329 0.00746268580975726 0.8300197975487119
yahoo_A3 0.28571425306122533 0.008522551344650625 0.44327308489258244
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.9090908429752097 0.7633332859111138 0.5226452800235182
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.0202

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A4 0.7999999200000041 0.3374306962665778 0.28176978429727406
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.009523808566893424 0.0023923442572285672 0.5228627706714042
yahoo_A4 0.04255318315980203 0.007351247807714425 0.5418326421231424
yahoo_A4 0.010050249256332211 0.002758116778856883 0.3047808611942676
yahoo_A4 0.8571427836734729 0.2525092463411058 0.6514999835822004
yahoo_A4 0.8888888197530895 0.20515598349244885 0.723446879173658
yahoo_A4 0.4999999375000028 0.026864800914764585 0.4328010500245025
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.23529408581315192 0.06947197351556658 0.40485693918510335
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.1999999725000028 0.04288068020561243 0.6294078298588459
yahoo_A4 0.4999999250000064 0.004891489841093674 0.8481075271467937
yahoo_A4 0.14285710000001145 0.03701619764722695 0.42149998937820016
yahoo_A4 0.04545453657024923 0.007587556705594939 0.5044820463885508
yahoo_A4 0.009592325179856162 0.002752077489294918 0.1499003908814944
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.090909066528

In [4]:
yahoo_results = pd.DataFrame(total_scores)

In [5]:
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.609481,0.180767,0.397197
yahoo_A2,0.0,0.0,0.0
yahoo_A3,0.365774,0.137405,0.396134
yahoo_A4,0.251383,0.063947,0.344698


### NASA

In [6]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [7]:
for loader in [load_nasa]:
    datasets = loader()
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        clf = OC_SVM().fit(X_train)
        # 1 -> 0 for inliers, -1 -> 1 for outliers.
        X_test_rec = -clf.decision_function(X_test)
        X_test_rec = X_test_rec.reshape(y_tests[i].shape)

        
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=False)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.13237733345371766 0.017229667008321563 0.023491769262070215
D2 0.239109990245366 0.07736669558433912 0.09214854523418256


In [8]:
nasa_results = pd.DataFrame(total_scores)

In [9]:
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.132377,0.01723,0.023492
D2,0.23911,0.077367,0.092149


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader()
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        clf = OC_SVM().fit(X_train)
        # 1 -> 0 for inliers, -1 -> 1 for outliers.
        X_test_rec = -clf.decision_function(X_test)
        X_test_rec = X_test_rec.reshape(y_tests[i].shape)

        
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=False)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.45610564765178774 0.29044377899549245 0.7568291575896886
smd 0.5374553642724784 0.4281995155907894 0.1873317312855692
smd 0.13177100695673052 0.05076801258074222 0.4343943173273155
smd 0.183730677530404 0.070025784907337 0.5841973783862764
smd 0.3728813151840193 0.01191899480453335 0.6056449625367526
smd 0.8591906329457708 0.5832235568773836 0.8123091667137141
smd 0.17318469934796973 0.08547074465164277 0.14237526607757378
smd 0.26966289682085814 0.057775044739286074 0.41348534477040916
smd 0.4535103869952418 0.3407342450239448 0.353122006959464
smd 0.24340570102914458 0.11113982108651937 0.22302670919608544
smd 0.4812833723723604 0.41451944192119394 0.6188243452414239
smd 0.32922619221106897 0.09942958116670797 0.39353887445890345
smd 0.24761900708310183 0.07884986110262711 0.519802002387287
smd 0.23891397981074217 0.05049525098673728 0.6860021971737942
smd 0.7861885293870144 0.050454763876202485 0.7363370180960099
smd 0.04961831561936049 0.0006313143055188895 0.6996885699040141

In [12]:
smd_results = pd.DataFrame(total_scores)

In [13]:
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.334786,0.162137,0.50724
