In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
from tensorflow.keras.datasets import mnist, fashion_mnist
from sklearn import metrics
from sklearn.metrics import roc_auc_score, plot_roc_curve
from sklearn.mixture import GaussianMixture

from util import load_data, load_data_fashion, create_dataset

import time

import tensorflow as tf

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

IndexError: list index out of range

In [2]:
(x_train, y_train), (x_test, y_test) = load_data()
(f_x_train, f_y_train), (f_x_test, f_y_test) = load_data_fashion()

In [3]:
def create_model():
    model = keras.Sequential([
        layers.Input( shape=(784) ),
        layers.Dense(32, activation='relu' ),
        layers.Dense(784, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy')
    return model

def fit_model(model, x_train, y_train, x_test, y_test, normal=4, verbose=0, caseii=False, caseiii=False):
    if caseii:
        train = x_train[y_train!=normal]
        test  = x_test[y_test!=normal]
    if caseiii:
        train = x_train
        test  = x_test
    else:
        train = x_train[y_train==normal]
        test  = x_test[y_test==normal]
    history = model.fit(train, train,
                    epochs=50,
                    batch_size=256,
                    verbose=verbose,
                    shuffle=True,
                    validation_data=(test, test))
    return model

In [9]:
model = create_model()

gmm = GaussianMixture(n_components=1)
start = time.process_time()

model = fit_model(model, x_train, y_train, x_test, y_test, normal=4)
# Get model that projects into feature space
layer_name = model.layers[0].name
feature_model = keras.models.Model(inputs=model.input,
              outputs=model.get_layer(layer_name).output)
normal_features = feature_model.predict(x_train)
gmm.fit(normal_features)

end = time.process_time()
print(end - start)

start = time.process_time()

features = feature_model.predict(x_test)
score = gmm.score_samples(features)

end = time.process_time()
print(end - start)

23.203125
0.96875


All instances

In [1]:
evals = np.zeros((10, 30))
for i in range(10):
    # Evaluate for all numbers
    for j in tqdm_notebook(range(30)):
        # Evaluate each method 30 times
        model = create_model()
        model = fit_model(model, x_train, y_train, x_test, y_test, normal=i)
        # Get model that projects into feature space
        layer_name = model.layers[0].name
        feature_model = keras.models.Model(inputs=model.input,
                      outputs=model.get_layer(layer_name).output)
        # Get normal features
        normal_features = feature_model.predict(x_train)
        # Fit GMM to normal features
        gmm = GaussianMixture(n_components=1)
        gmm.fit(normal_features)
        
        # Get test data
        x = x_test
        y = y_test
        labels = np.copy( y )
        labels[ y == i ] = 0
        labels[ y != i ] = 1
        
        # Compute features for test case
        features = feature_model.predict(x)
        # Compute scores
        score = gmm.score_samples(features)
        # Map score between 0 and 1
        s = score * -1
        s = 1 - (s - np.min(s)) / (np.max(s) - np.min(s))
        # Compute AUC-ROC
        AUC = roc_auc_score(labels, s)
        evals[i,j] = AUC
    print(np.mean(evals[i,:])*100)

print(np.mean(evals, axis=1))
print(np.std(evals, axis=1))

NameError: name 'np' is not defined

In [10]:
print(np.mean(evals, axis=1) * 100)
print(np.std(evals, axis=1)  * 100)

[93.14247704 85.51126648 91.29327726 82.74696601 79.81857323 83.8341677
 92.2299808  82.49434977 85.45497091 77.40533912]
[0.77544615 3.91560576 0.49507067 1.15537152 1.16293946 0.96491736
 0.7094856  2.05252932 0.86333316 1.62585302]


Percentage-contamination test

In [4]:
evals = np.zeros((10, 30))
for i in range(10):
    # Evaluate for all numbers
    for j in tqdm_notebook(range(30)):
        # Evaluate each method 30 times
        model = create_model()
        model = fit_model(model, x_train, y_train, x_test, y_test, normal=i)
        # Get model that projects into feature space
        layer_name = model.layers[0].name
        feature_model = keras.models.Model(inputs=model.input,
                      outputs=model.get_layer(layer_name).output)
        # Get normal features
        normal_features = feature_model.predict(x_train)
        # Fit GMM to normal features
        gmm = GaussianMixture(n_components=1)
        gmm.fit(normal_features)
        
        # Get test data
        x, y, labels = create_dataset(x_test, y_test, normal=i)
        
        # Compute features for test case
        features = feature_model.predict(x)
        # Compute scores
        score = gmm.score_samples(features)
        # Map score between 0 and 1
        s = score * -1
        s = 1 - (s - np.min(s)) / (np.max(s) - np.min(s))
        # Compute AUC-ROC
        AUC = roc_auc_score(labels, s)
        evals[i,j] = AUC
    print(evals[i,:])

print(np.mean(evals, axis=1))
print(np.std(evals, axis=1))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.93047688 0.94612661 0.88854644 0.92208455 0.93576635 0.95593503
 0.88826531 0.95487297 0.90905873 0.90850687 0.90778842 0.90225948
 0.94699084 0.89328405 0.93239275 0.92497918 0.93512078 0.92903998
 0.91895044 0.93618284 0.95333195 0.89334652 0.91098501 0.94122241
 0.92365681 0.94751145 0.92105373 0.95914202 0.91806539 0.95389421]


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.83335652 0.84031223 0.86496638 0.90105109 0.79030837 0.8746812
 0.69557153 0.88775794 0.79618981 0.79625164 0.91371822 0.86002009
 0.85646495 0.84137878 0.77966613 0.85829662 0.81983152 0.915148
 0.8723549  0.79397171 0.85284798 0.87838318 0.79433496 0.92583662
 0.85570755 0.87919468 0.84675014 0.89051704 0.83419893 0.84967154]


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.92087806 0.90529964 0.91396467 0.87410555 0.84523144 0.92915176
 0.89229278 0.90750783 0.92143709 0.90414431 0.92207066 0.90889609
 0.92631    0.90492695 0.92862999 0.912036   0.86776983 0.93217986
 0.91670394 0.8912213  0.89645759 0.9172816  0.88439177 0.90536486
 0.91546474 0.92529442 0.89888007 0.92866726 0.88413089 0.92312351]


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.83098716 0.80322517 0.89076561 0.8390844  0.80816587 0.82770317
 0.86831683 0.81169493 0.8556318  0.87249289 0.81436134 0.78217822
 0.83516322 0.83506519 0.81829232 0.81461621 0.83980982 0.82171356
 0.82170375 0.80094108 0.84027056 0.81559651 0.86206254 0.80687188
 0.78557004 0.82124302 0.82663464 0.79784335 0.83295755 0.83679051]


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.78220083 0.78254027 0.82358205 0.79773293 0.80351375 0.79918328
 0.77971158 0.83222243 0.80643502 0.77063918 0.80967516 0.80408978
 0.84683906 0.80588986 0.77214096 0.84055422 0.81528112 0.73707544
 0.78560555 0.81519883 0.78977144 0.79742435 0.76250283 0.77614228
 0.79979016 0.84883458 0.81654632 0.82088708 0.78196425 0.82688391]


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.85981565 0.85727454 0.84683607 0.84281266 0.80102143 0.84505481
 0.86681614 0.87025411 0.83431739 0.86500997 0.81094918 0.86454908
 0.84039611 0.85619083 0.83684604 0.86020179 0.85771051 0.85013702
 0.88380668 0.81562033 0.84953911 0.85980319 0.84519183 0.87174888
 0.83156452 0.81725212 0.90371201 0.84933981 0.86235675 0.8574863 ]


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.90838118 0.93058455 0.92104863 0.93590162 0.917754   0.96371564
 0.94275183 0.91422016 0.95049365 0.91308934 0.91981994 0.91507916
 0.90547799 0.90989257 0.93568415 0.92782272 0.94201244 0.95023269
 0.95240736 0.8960508  0.90514092 0.93487952 0.92433238 0.91234995
 0.92282098 0.90708725 0.89142963 0.90434716 0.91269789 0.92889918]


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.78005175 0.84377243 0.8312304  0.83934305 0.83836085 0.80668467
 0.85424616 0.86279325 0.81104794 0.78849496 0.83517812 0.79444486
 0.83406369 0.82644214 0.82026557 0.85794832 0.8470496  0.79196101
 0.79665483 0.80641079 0.81135016 0.79356654 0.78591666 0.80221752
 0.80610857 0.79155491 0.84605795 0.78606777 0.8424219  0.77293075]


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.85315132 0.86258014 0.85564472 0.8654926  0.88887608 0.84452919
 0.85417802 0.8713699  0.87015463 0.85889243 0.85407325 0.86152202
 0.83943762 0.88091397 0.89433433 0.85219796 0.87402045 0.87662909
 0.83317269 0.86737837 0.87627289 0.86325064 0.84834262 0.8178561
 0.88167875 0.83831664 0.83753091 0.85742572 0.84195198 0.84364916]


HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))


[0.7964066  0.79291329 0.78356181 0.73654927 0.74785348 0.78369918
 0.72072143 0.76080621 0.79537627 0.78047081 0.78945922 0.75926562
 0.76781246 0.84435133 0.77297393 0.74142617 0.81085086 0.69595423
 0.77245386 0.73822724 0.77961711 0.83672688 0.81023266 0.71619778
 0.73403723 0.74215231 0.77158053 0.78094182 0.78480801 0.77944048]
[0.9262946  0.84662467 0.90679382 0.82725844 0.80102862 0.85045383
 0.92321351 0.81682124 0.85882747 0.7708956 ]
[0.02087999 0.04782806 0.02011352 0.02454774 0.02525727 0.02115016
 0.01750313 0.02553555 0.01733862 0.03317766]


In [5]:
print(np.mean(evals, axis=1)*100)
print(np.std(evals, axis=1)*100)

[92.62945995 84.66246748 90.67938158 82.72584387 80.10286161 85.04538283
 92.3213509  81.68212383 85.88274735 77.08956029]
[2.08799918 4.78280553 2.01135195 2.45477398 2.52572705 2.11501648
 1.75031322 2.55355453 1.7338621  3.31776615]


# Case 2

In [9]:
evals = np.zeros((10))
for i in range(10):
    print(i)
    # Evaluate each method 30 times
    model = create_model()
    model = fit_model(model, x_train, y_train, x_test, y_test, normal=i, caseii=True)
    # Get model that projects into feature space
    layer_name = model.layers[0].name
    feature_model = keras.models.Model(inputs=model.input,
                  outputs=model.get_layer(layer_name).output)
    # Get normal features
    normal_features = feature_model.predict(x_train)
    # Fit GMM to normal features
    gmm = GaussianMixture(n_components=9)
    gmm.fit(normal_features)

    # Get test data
    x = x_test
    y = y_test
    labels = np.copy( y )
    labels[ y != i ] = 0
    labels[ y == i ] = 1

    # Compute features for test case
    features = feature_model.predict(x)
    # Compute scores
    score = gmm.score_samples(features)
    # Map score between 0 and 1
    s = score * -1
    s = 1 - (s - np.min(s)) / (np.max(s) - np.min(s))
    # Compute AUC-ROC
    AUC = roc_auc_score(labels, s)
    evals[i] = AUC

print(evals)

0
1
2
3
4
5
6
7
8
9
[0.57616408 0.92776046 0.3055902  0.39810813 0.5639985  0.3243118
 0.60906849 0.60808009 0.37578868 0.63470661]


# CASE 3

In [7]:
# Evaluate each method 30 times
model = create_model()
model = fit_model(model, x_train, y_train, x_test, y_test, caseiii=True)
# Get model that projects into feature space
layer_name = model.layers[0].name
feature_model = keras.models.Model(inputs=model.input,
              outputs=model.get_layer(layer_name).output)
# Get normal features
normal_features = feature_model.predict(x_train)
# Fit GMM to normal features
gmm = GaussianMixture(n_components=1)
gmm.fit(normal_features)


x = np.copy( x_test )
x[5000:] = f_x_test[:5000]

labels = np.zeros( y_test.shape )
labels[5000:] = 1

# Compute features for test case
features = feature_model.predict( x )
# Compute scores
score = gmm.score_samples(features)
# Map score between 0 and 1
s = score * -1
s = 1 - (s - np.min(s)) / (np.max(s) - np.min(s))
# Compute AUC-ROC
AUC = roc_auc_score(labels, s)
print(AUC)

0.08828140000000001
