In [1]:
import random
import numpy as np
import pandas as pd


from evaluator import evaluate

from data_loader import load_kdd_cup_urc, load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4, load_power_demand # Univariate Datasets
from data_loader import load_nasa, load_ecg, load_gesture, load_smd # Multivariate Datasets

from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
from sklearn.neighbors import LocalOutlierFactor as LOF

# THESE LINES ARE FOR REPRODUCIBILITY
random.seed(0)
np.random.seed(0)
# tf.random.set_seed(0)

### Yahoo S5

In [2]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [3]:
for loader in [load_yahoo_A1, load_yahoo_A2, load_yahoo_A3, load_yahoo_A4]:
    datasets = loader()
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        clf = LOF(novelty=True, n_jobs=32).fit(X_train)
        # 1 -> 0 for inliers, -1 -> 1 for outliers.
        X_test_rec = -clf.decision_function(X_test)
        X_test_rec = X_test_rec.reshape(y_tests[i].shape)
        
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=False)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/67 [00:00<?, ?it/s]

yahoo_A1 0.9999999000000052 0.4999999375000063 0.09198112745472166
yahoo_A1 0.999999942307695 0.9230768955275719 0.016746411350428723
yahoo_A1 0.9444443893518544 0.842105243409232 0.013030148114256592
yahoo_A1 0.09090908181818227 0.023809522562358344 0.00699300664172659
yahoo_A1 0.7999999360000031 0.6150793287462228 0.018823529093610153
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.8888888197530895 0.6944444003950644 0.021126760135885745
yahoo_A1 0.9999999166666703 0.6666666055555603 0.027586205970669863
yahoo_A1 0.287878762798441 0.15929203258343735 0.0
yahoo_A1 0.0 0.0 -0.0
yahoo_A1 0.2777777444058681 0.05847953191751309 0.0
yahoo_A1 0.9374999441406277 0.8730468521630876 0.42010541895985787
yahoo_A1 0.07792205707539746 0.007352941083958095 0.0051958432663621315
yahoo_A1 0.9999999333333361 0.8333332883333356 0.021428571066326535
yahoo_A1 0.9508196206261782 0.9008108252968194 0.001967729236791059
yahoo_A1 0.0 0.0 0.0
yahoo_A1 0.999999943333336 0.9333333085252997 0.009456264710136423
yahoo_A1 0.9662

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A2 0.0 0.0 0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.0 0.0 -0.0
yahoo_A2 0.

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.28571425306122533 0.008453829209802309 0.32095046312720765
yahoo_A3 0.7999999200000041 0.33333329722222554 0.0013306719447332893
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.8888888197530895 0.5999999580000026 0.0
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.017241374256838495 0.002713833917201305 0.1743641202182223
yahoo_A3 0.07692305591716472 0.007246376538542332 0.039254822369632035
yahoo_A3 0.4999999375000028 0.0 0.11510312321942952
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.33333329444444565 0.007897440673361756 0.1456913798225068
yahoo_A3 0.33333329444444565 0.018014705413792177 0.027655310062594134
yahoo_A3 0.33333329444444565 0.013543945955639554 0.3831663249252173
yahoo_A3 0.02941176172145329 0.00746268580975726 0.21968188653684473
yahoo_A3 0.28571425306122533 0.006695709617988059 0.4277108361590943
yahoo_A3 0.0 0.0 -0.0
yahoo_A3 0.9090908429752097 0.718333289169447 0.007214428711981079
yahoo_A3 0.0 0.0 0.0
yahoo_A3 0.020202018161412108 0.0051020403009163365 0.33001984764811054

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

yahoo_A4 0.4999999375000028 0.0 0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.008097165178908029 0.002032520121125012 0.04174949879885739
yahoo_A4 0.04255318315980203 0.005555555265432114 0.17529879598101666
yahoo_A4 0.010050249256332211 0.0012690354688989702 0.24103584447389786
yahoo_A4 0.8571427836734729 0.4999999583333364 0.16049999595540004
yahoo_A4 0.8888888197530895 0.5999999588333359 0.04969939779364743
yahoo_A4 0.4999999375000028 0.0 0.0026613438894665786
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.0 0.0 0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.1999999725000028 0.041929270045562704 0.24617430977565857
yahoo_A4 0.4999999250000064 0.12991269794946186 0.7410358193742976
yahoo_A4 0.2499999437500102 0.031249998437500058 0.009999999748000006
yahoo_A4 0.04651161882098564 0.0060975606558596246 0.3466135284170102
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.0 0.0 -0.0
yahoo_A4 0.0 0.0 0.0
yahoo_A4 0.2857142367346995 0.0499999965000002 0.037274548345235574
yahoo_A4 0.0 0.0 -0.0
y

In [4]:
yahoo_results = pd.DataFrame(total_scores)

In [5]:
yahoo_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
yahoo_A1,0.534654,0.370639,0.051206
yahoo_A2,0.0,0.0,0.0
yahoo_A3,0.362414,0.126611,0.111553
yahoo_A4,0.234707,0.063954,0.075182


### NASA

In [6]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [7]:
for loader in [load_nasa]:
    datasets = loader()
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        clf = LOF(novelty=True, n_jobs=32).fit(X_train)
        # 1 -> 0 for inliers, -1 -> 1 for outliers.
        X_test_rec = -clf.decision_function(X_test)        
        X_test_rec = X_test_rec.reshape(y_tests[i].shape)
        
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=False)

        total_scores['dataset'].append(f'D{i+1}')
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(f'D{i+1}', np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

D1 0.27691717680931155 0.06326111389153746 0.05818273884468656
D2 0.17418910929857026 0.022050569409797712 0.011746298950966886


In [8]:
nasa_results = pd.DataFrame(total_scores)

In [9]:
nasa_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D1,0.276917,0.063261,0.058183
D2,0.174189,0.022051,0.011746


### SMD

In [10]:
total_scores = {'dataset': [], 'f1': [], 'pr_auc': [], 'roc_auc': []}

In [11]:
for loader in [load_smd]:
    datasets = loader()
    x_trains, x_tests, y_tests = datasets['x_train'], datasets['x_test'], datasets['y_test']
    
    for i in tqdm(range(len(x_trains))):
        X_train = x_trains[i]
        X_test = x_tests[i]
        
        clf = LOF(novelty=True, n_jobs=32).fit(X_train)
        # 1 -> 0 for inliers, -1 -> 1 for outliers.
        X_test_rec = -clf.decision_function(X_test)
        X_test_rec = X_test_rec.reshape(y_tests[i].shape)
        
        scores = evaluate(X_test, X_test_rec, y_tests[i], is_reconstructed=False)

        total_scores['dataset'].append(loader.__name__.replace('load_', ''))
        total_scores['f1'].append(np.max(scores['f1']))
        total_scores['pr_auc'].append(scores['pr_auc'])
        total_scores['roc_auc'].append(scores['roc_auc'])
        print(loader.__name__.replace('load_', ''), np.max(scores['f1']), scores['pr_auc'], scores['roc_auc'])   

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

smd 0.4275491470697072 0.3867900784990223 0.4796741557648693
smd 0.06765712114644812 0.014370975372284897 0.08838946909742083
smd 0.08503355570417467 0.03333539599274667 0.43838913234897586
smd 0.06974381344297607 0.027857754202542617 0.4225699428540658
smd 0.5194804732669968 0.4825758650310196 0.25794819087880544
smd 0.5941391505342919 0.6246389607248989 0.6297691477144405
smd 0.341447945326267 0.16409650953541777 0.23473706372921424
smd 0.26378375483418876 0.18134674392955874 0.212152563306662
smd 0.3215590243982268 0.18525684065132028 0.06143980133837958
smd 0.10101511900680198 0.019237644010614337 0.015327927562266678
smd 0.11834316270264536 0.07023666429034979 0.15730335645944335
smd 0.32856435336728446 0.2994171195454677 0.19950359007554297
smd 0.27407402411366777 0.21402674496893465 0.5275849905211638
smd 0.4272559508846928 0.31056329806193167 0.32121069119178797
smd 0.7860962071352515 0.7292212840536423 0.6610055911729585
smd 0.9266666163177805 0.8816770274199182 0.809391540236

In [12]:
smd_results = pd.DataFrame(total_scores)

In [13]:
smd_results.groupby('dataset').mean()

Unnamed: 0_level_0,f1,pr_auc,roc_auc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
smd,0.335422,0.264015,0.367445
