In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from tqdm.notebook import tqdm_notebook

from sklearn.neighbors import LocalOutlierFactor
from sklearn.neighbors import NearestNeighbors

from util import load_data, load_data_fashion, create_dataset

import time

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
(x_train, y_train), (x_test, y_test) = load_data(reshape=1)
(f_x_train, f_y_train), (f_x_test, f_y_test) = load_data_fashion(reshape=1)

In [None]:
start1 = time.time()
start2 = time.perf_counter()
start3 = time.process_time()



lof = LocalOutlierFactor(n_neighbors=20, novelty=True)
lof.fit(x_train[y_train==normal])

#Compute scores
scores = lof.decision_function(x_test)
# Baseline regularizaion and range normalization
scores = 1 - scores
scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))

#Compute labels and AUC
labels = np.copy(y_test)
labels[y_test == normal ] = 0
labels[y_test != normal ] = 1

AUC = roc_auc_score(labels, scores)


end1 = time.time()
end2 = time.perf_counter()
end3 = time.process_time()
print(end1 - start1)
print(end2 - start2)
print(end3 - start3)

In [None]:
AUCs = []
for normal in range(10):
    lof = LocalOutlierFactor(n_neighbors=20, novelty=True)
    lof.fit(x_train[y_train==normal])
    
    #Compute scores
    scores = lof.decision_function(x_test)
    # Baseline regularizaion and range normalization
    scores = 1 - scores
    scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
    
    #Compute labels and AUC
    labels = np.copy(y_test)
    labels[y_test == normal ] = 0
    labels[y_test != normal ] = 1

    AUC = roc_auc_score(labels, scores)
    AUCs.append(AUC)
print(AUCs)

# CASE 2

In [5]:
x_train.shape

(60000, 784)

In [6]:
x = np.zeros((4500,784))
for normal in range(10):
    indice = 0
    for j in range(10):
        if j != normal:
            x[indice*500:(indice+1)*500] = x_train[y_train==j][:500]
            indice += 1
    print(x.shape)

(4500, 784)
(4500, 784)
(4500, 784)
(4500, 784)
(4500, 784)
(4500, 784)
(4500, 784)
(4500, 784)
(4500, 784)
(4500, 784)


In [8]:
AUCs = []

x = np.zeros((4500,784))
for normal in range(10):
    # Sample 500 anomalous
    indice = 0
    for j in range(10):
        if j != normal:
            x[indice*500:(indice+1)*500] = x_train[y_train==j][:500]
            indice += 1   
    
    print(normal)
    lof = LocalOutlierFactor(n_neighbors=20, novelty=True)
    lof.fit( x )
    
    #Compute scores
    scores = lof.decision_function(x_test)
    # Baseline regularizaion and range normalization
    scores = 1 - scores
    scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
    
    #Compute labels and AUC
    labels = np.copy(y_test)
    labels[y_test != normal ] = 0
    labels[y_test == normal ] = 1

    AUC = roc_auc_score(labels, scores)
    print(AUC)
    AUCs.append(AUC)
    
print(AUCs)

0
0.8947884519661523
1
0.5907808512911489
2
0.9709410375564452
3
0.7786960208812872
4
0.8503099029368284
5
0.8702181938314711
6
0.8995669247524544
7
0.8802724559416152
8
0.8998756046302013
9
0.7238304266164635
[0.8947884519661523, 0.5907808512911489, 0.9709410375564452, 0.7786960208812872, 0.8503099029368284, 0.8702181938314711, 0.8995669247524544, 0.8802724559416152, 0.8998756046302013, 0.7238304266164635]


# CASE 3

In [4]:
xt = x_train[:6000]
    
lof = LocalOutlierFactor(n_neighbors=20, novelty=True)
lof.fit( xt )


x = np.copy( x_test )
x[5000:] = f_x_test[:5000]

labels = np.zeros( y_test.shape )
labels[5000:] = 1


#Compute scores
scores = lof.decision_function( x )
# Baseline regularizaion and range normalization
scores = 1 - scores
scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))

AUC = roc_auc_score(labels, scores)
print(AUC)

0.95738528


# KNN

In [None]:
start = time.process_time()
knn = NearestNeighbors(n_neighbors=20).fit( x_train[y_train == normal] )

# Compute regular normal scores
distances, indices = knn.kneighbors( x_test )

end = time.process_time()
print(end - start)

In [None]:
AUCs = []
for normal in range(10):
    knn = NearestNeighbors(n_neighbors=20).fit( x_train[y_train == normal] )
    
    # Compute regular normal scores
    distances, indices = knn.kneighbors( x_test )
    scores = np.mean(distances, axis=1)
    scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
    
    # Compute labels
    labels = np.copy(y_test)
    labels[y_test == normal ] = 0
    labels[y_test != normal ] = 1

    AUC = roc_auc_score(labels, scores)
    AUCs.append(AUC)
print(AUCs)

# CASE 2

In [3]:
x = np.zeros((4500,784))

AUCs = []
for normal in range(10):
    print(normal)
    # Sample 500 anomalous
    indice = 0
    for j in range(10):
        if j != normal:
            x[indice*500:(indice+1)*500] = x_train[y_train==j][:500]
            indice += 1 
    
    knn = NearestNeighbors(n_neighbors=20).fit( x )
    
    # Compute regular normal scores
    distances, indices = knn.kneighbors( x_test )
    scores = np.mean(distances, axis=1)
    scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
    
    # Compute labels
    labels = np.copy(y_test)
    labels[y_test != normal ] = 0
    labels[y_test == normal ] = 1

    AUC = roc_auc_score(labels, scores)
    print(AUC)
    AUCs.append(AUC)
print(AUCs)

0
0.9588601294176208
1
0.3620382089641241
2
0.9240535037584106
3
0.8600740096256566
4
0.6924473072411412
5
0.8688877466416948
6
0.9085147299150012
7
0.6200646282164486
8
0.8654757804398973
9
0.5703846121201038
[0.9588601294176208, 0.3620382089641241, 0.9240535037584106, 0.8600740096256566, 0.6924473072411412, 0.8688877466416948, 0.9085147299150012, 0.6200646282164486, 0.8654757804398973, 0.5703846121201038]


# CASE 3

In [5]:
xt = x_train[:6000]
    
knn = NearestNeighbors(n_neighbors=20).fit( xt )


x = np.copy( x_test )
x[5000:] = f_x_test[:5000]

labels = np.zeros( y_test.shape )
labels[5000:] = 1


# Compute regular normal scores
distances, indices = knn.kneighbors( x )
scores = np.mean(distances, axis=1)
scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))

AUC = roc_auc_score(labels, scores)
print(AUC)

0.95405572
