## Load libraries

In [1]:
import os
import numpy as np
from sklearn.metrics import accuracy_score

from alphai_watson.transformer import NullTransformer
from alphai_rickandmorty_oracle.datasource.kddcup99 import KDDCup99DataSource
from alphai_rickandmorty_oracle.detective import RickAndMortyDetective
from alphai_rickandmorty_oracle.model_kddcup99 import RickAndMorty

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
DEBUG:matplotlib:CACHEDIR=/home/ubuntu/.cache/matplotlib
DEBUG:matplotlib.font_manager:Using fontManager instance from /home/ubuntu/.cache/matplotlib/fontList.json
DEBUG:matplotlib.backends:backend agg version v2.2


Enabling weight norm
Uppercase local vars:
	BATCH_SIZE: 50
	CRITIC_ITERS: 5
	DEFAULT_FIT_EPOCHS: 1000
	DEFAULT_LEARN_RATE: 0.0001
	DEFAULT_TRAIN_ITERS: 5000
	DEFAULT_Z_DIM: 32
	DIAGNOSIS_LEARN_RATE: 0.01
	DIM: 64
	DISC_FILTER_SIZE: 5
	INIT_KERNEL: <function variance_scaling_initializer.<locals>._initializer at 0x7ff0954b48c8>
	LAMBDA: 10
	LAMBDA_2: 2.0
	OUTPUT_DIM: 121


## Define KDDCup99 Datasource

In [2]:
file_path = '../../tests/resources'
data_filename = os.path.join(file_path, 'kddcup.data_10_percent_corrected')
header_filename = os.path.join(file_path, 'kddcup.names')

kdd_datasource = KDDCup99DataSource(source_file=data_filename,
                                    header_file=header_filename,
                                    transformer=NullTransformer(8, 8))

DEBUG:root:Start file parsing.
DEBUG:root:Normal (97278, 122); Train (68095, 121); Test(29183, 121)
DEBUG:root:Abnormal (396743, 121)
DEBUG:root:End file parsing.


In [3]:
data_normal_train = kdd_datasource.get_train_data('NORMAL')
data_normal_test = kdd_datasource.get_train_data('NORMAL_TEST')
data_abnormal_test = kdd_datasource.get_train_data('ABNORMAL_TEST')

## Define Model

In [4]:
model_dir = './kddcup99_models'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

batch_size = 64
output_dimensions = 121
train_iters = 1000
plot_save_path = model_dir


model = RickAndMorty(batch_size=batch_size, 
                     output_dimensions=output_dimensions, 
                     train_iters=train_iters,
                     plot_save_path=plot_save_path)

detective = RickAndMortyDetective(model_configuration={
    'model': model,
    'batch_size': batch_size,
    'output_dimensions': output_dimensions,
    'train_iters': train_iters,
    'save_path' : '{}/KDDCup99-model'.format(model_dir),
    'plot_save_path': plot_save_path
    
})

detective.train(data_normal_train)

DEBUG:root:Starting session
DEBUG:root:Start training loop...
INFO:root:Initialising Model
INFO:root:Training iteration 0 of 1000
DEBUG:matplotlib.font_manager:findfont: Matching :family=sans-serif:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0 to DejaVu Sans ('/opt/anaconda/envs/ai/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf') with score of 0.050000


iter 0	train disc cost	6.711755275726318	time	0.3869016170501709
iter 1	train disc cost	6.526790618896484	time	0.14352178573608398
iter 2	train disc cost	6.2651543617248535	time	0.10967803001403809
iter 3	train disc cost	5.803945064544678	time	0.10353279113769531
iter 4	train disc cost	5.6229705810546875	time	0.10345315933227539


INFO:root:Saving fake samples to png: [[0.64837104 0.45833108 0.67791617 ... 0.55965966 0.5119039  0.42390925]
 [0.70779145 0.558568   0.64304256 ... 0.4496233  0.6136117  0.36454576]
 [0.7185568  0.5454069  0.57614297 ... 0.49597174 0.43837422 0.45847866]
 ...
 [0.6392329  0.4676007  0.5346414  ... 0.59858793 0.62400293 0.32686457]
 [0.69485927 0.62396234 0.63495016 ... 0.5356669  0.49900445 0.41628933]
 [0.5689892  0.5506251  0.62281126 ... 0.60878927 0.6899633  0.25054687]]
INFO:root:Training iteration 100 of 1000


iter 99	train disc cost	0.288692831993103	time	0.08977015394913523


INFO:root:Saving fake samples to png: [[0.81771994 0.6124605  0.55324763 ... 0.78908813 0.69261396 0.23729232]
 [0.8289133  0.7123673  0.54745924 ... 0.59395945 0.74733084 0.22739401]
 [0.8534212  0.67180234 0.46492895 ... 0.62691414 0.5791229  0.3203486 ]
 ...
 [0.8053246  0.6504261  0.4284427  ... 0.8154437  0.789074   0.17772292]
 [0.8402308  0.76833546 0.5290825  ... 0.715132   0.65584266 0.27215216]
 [0.77820504 0.76266634 0.45565903 ... 0.8395594  0.87304014 0.10813519]]
INFO:root:Training iteration 200 of 1000


iter 199	train disc cost	0.01857040449976921	time	0.08039513349533081


INFO:root:Saving fake samples to png: [[0.94838375 0.8510176  0.28995743 ... 0.9421591  0.90896904 0.07630498]
 [0.9331827  0.87473446 0.32771417 ... 0.78800553 0.89026666 0.09922128]
 [0.94797176 0.85600346 0.23816599 ... 0.8045145  0.8052584  0.15728438]
 ...
 [0.9412672  0.8759424  0.18937404 ... 0.9534435  0.94372094 0.06173687]
 [0.94922847 0.91647446 0.29343155 ... 0.8852174  0.8622329  0.11484148]
 [0.9492416  0.9343219  0.1586493  ... 0.96883047 0.9748991  0.02264176]]
INFO:root:Training iteration 300 of 1000


iter 299	train disc cost	0.10337979346513748	time	0.08665913105010986


INFO:root:Saving fake samples to png: [[0.98706275 0.9589157  0.07364547 ... 0.9186247  0.98519486 0.01690124]
 [0.976319   0.9531074  0.12857579 ... 0.73305243 0.96988374 0.03137267]
 [0.98314685 0.94566005 0.07767431 ... 0.7285348  0.94726944 0.05409878]
 ...
 [0.98518384 0.96411705 0.0444338  ... 0.9337251  0.99085945 0.01483549]
 [0.9851415  0.97406447 0.09265817 ... 0.8372686  0.9681028  0.03604205]
 [0.98985296 0.9859942  0.0287344  ... 0.9578661  0.9972222  0.00398622]]
INFO:root:Training iteration 400 of 1000


iter 399	train disc cost	-0.2378252148628235	time	0.0827556037902832


INFO:root:Saving fake samples to png: [[9.74375904e-01 9.34233189e-01 2.04239879e-02 ... 7.45524883e-01
  9.97738242e-01 3.68534471e-03]
 [9.62037683e-01 9.34401512e-01 5.24163842e-02 ... 5.01128554e-01
  9.92721498e-01 9.03412607e-03]
 [9.74214435e-01 9.26849008e-01 2.66136844e-02 ... 4.80113089e-01
  9.89224851e-01 1.59480125e-02]
 ...
 [9.73975778e-01 9.39717472e-01 1.12750335e-02 ... 7.81179130e-01
  9.98639524e-01 2.93797557e-03]
 [9.74125624e-01 9.63493824e-01 3.08585074e-02 ... 6.12896681e-01
  9.93596315e-01 9.77065880e-03]
 [9.76016164e-01 9.75974500e-01 5.67975547e-03 ... 8.22324216e-01
  9.99693871e-01 7.59515329e-04]]
INFO:root:Training iteration 500 of 1000


iter 499	train disc cost	-0.13588787615299225	time	0.08333842277526855


INFO:root:Saving fake samples to png: [[8.5939908e-01 7.6106906e-01 5.9203650e-03 ... 3.0465633e-01
  9.9954081e-01 8.9151313e-04]
 [8.7654543e-01 8.3106911e-01 1.8317852e-02 ... 2.0315610e-01
  9.9811906e-01 2.6449577e-03]
 [9.0626156e-01 7.9711676e-01 8.4074419e-03 ... 1.8030758e-01
  9.9751735e-01 4.7558025e-03]
 ...
 [8.7055814e-01 7.6746392e-01 2.5806096e-03 ... 3.3479410e-01
  9.9978596e-01 5.4378365e-04]
 [9.0423810e-01 8.8569576e-01 8.9624273e-03 ... 2.3456527e-01
  9.9855942e-01 2.5027022e-03]
 [8.3121794e-01 8.7014091e-01 1.2336663e-03 ... 3.3699021e-01
  9.9995267e-01 1.3676430e-04]]
INFO:root:Training iteration 600 of 1000


iter 599	train disc cost	-0.2380102574825287	time	0.08435963392257691


INFO:root:Saving fake samples to png: [[5.0521272e-01 3.6507919e-01 3.8013780e-03 ... 9.0895176e-02
  9.9971431e-01 3.6078776e-04]
 [6.4162314e-01 5.5983752e-01 1.1049644e-02 ... 7.5340591e-02
  9.9889022e-01 1.0902679e-03]
 [6.7491066e-01 4.7366408e-01 4.9547758e-03 ... 6.9744624e-02
  9.9843949e-01 1.9973875e-03]
 ...
 [4.8917845e-01 3.3784595e-01 1.1895751e-03 ... 8.7744862e-02
  9.9989414e-01 1.3474315e-04]
 [6.7625666e-01 6.2414676e-01 4.3588504e-03 ... 7.9703525e-02
  9.9926406e-01 8.4788119e-04]
 [3.4076899e-01 4.5417732e-01 5.7622691e-04 ... 8.4012754e-02
  9.9997699e-01 3.8529524e-05]]
INFO:root:Training iteration 700 of 1000


iter 699	train disc cost	0.8008814454078674	time	0.08121698379516601


INFO:root:Saving fake samples to png: [[1.3609548e-01 1.6306643e-01 1.5510957e-03 ... 2.8494418e-02
  9.9980170e-01 1.6994741e-04]
 [2.7947465e-01 3.3208236e-01 5.4232203e-03 ... 2.9011648e-02
  9.9921858e-01 5.4552202e-04]
 [3.0649471e-01 2.5405771e-01 2.1493593e-03 ... 2.6874643e-02
  9.9908507e-01 8.6136139e-04]
 ...
 [9.8164938e-02 1.3687128e-01 4.2712217e-04 ... 2.0924181e-02
  9.9993384e-01 5.1274808e-05]
 [2.5364417e-01 3.4910572e-01 1.9115319e-03 ... 2.8942579e-02
  9.9956185e-01 3.6990878e-04]
 [4.9421843e-02 1.7867056e-01 1.7195167e-04 ... 1.7542383e-02
  9.9998772e-01 1.3150013e-05]]
INFO:root:Training iteration 800 of 1000


iter 799	train disc cost	0.016719885170459747	time	0.08627968549728393


INFO:root:Saving fake samples to png: [[4.7859710e-02 1.0000555e-01 5.6494150e-04 ... 9.6100327e-03
  9.9991500e-01 6.0396647e-05]
 [1.3226809e-01 2.1278566e-01 2.3007845e-03 ... 1.1429341e-02
  9.9961591e-01 1.9913338e-04]
 [1.4217010e-01 1.4979765e-01 8.1268640e-04 ... 9.9235689e-03
  9.9965787e-01 2.5940966e-04]
 ...
 [2.8310178e-02 7.3928416e-02 1.4183662e-04 ... 5.5959853e-03
  9.9997497e-01 1.4166483e-05]
 [9.6157074e-02 1.9870809e-01 7.6140277e-04 ... 9.6501168e-03
  9.9981123e-01 1.2357498e-04]
 [1.3020564e-02 8.7834693e-02 5.2725834e-05 ... 4.3509318e-03
  9.9999559e-01 3.1971158e-06]]
INFO:root:Training iteration 900 of 1000


iter 899	train disc cost	-0.43257036805152893	time	0.0792544937133789


INFO:root:Saving fake samples to png: [[2.5673050e-02 2.5257900e-01 2.6058516e-04 ... 2.4245013e-03
  9.9997044e-01 1.9977560e-05]
 [7.9632714e-02 3.7459582e-01 9.0704614e-04 ... 3.9244206e-03
  9.9986315e-01 6.6873152e-05]
 [7.6940536e-02 3.0687836e-01 3.5912343e-04 ... 3.0989470e-03
  9.9988854e-01 8.5228654e-05]
 ...
 [1.4382098e-02 2.1656638e-01 6.0275877e-05 ... 1.4056654e-03
  9.9999309e-01 4.0553200e-06]
 [5.5703443e-02 4.0110412e-01 2.8830062e-04 ... 2.4155374e-03
  9.9993777e-01 3.4852164e-05]
 [5.8667287e-03 2.4774915e-01 1.7337727e-05 ... 8.8589056e-04
  9.9999905e-01 6.8630624e-07]]
DEBUG:root:Training complete.


iter 999	train disc cost	-0.5084922909736633	time	0.0891205382347107


## Evaluate results

#### Train data (Normal)

In [5]:
def model_accuracy(data, status, mean=None):
    eval = detective.detect(data).data * -1
    if mean is None:
        mean = np.median(eval)
    ground_truth = [status] * len(eval)
    prediction = [1 if x >= mean else 0 for x in eval]
    print('Accuracy: {0:.2f}%'.format(100*accuracy_score(ground_truth, prediction)))
    return mean

In [6]:
mean = model_accuracy(data_normal_train, 1)

INFO:root:Running detector on <alphai_watson.datasource.Sample object at 0x7ff047d25b70>
INFO:root:Detection completed in 1.0041051115840673


Accuracy: 50.00%


#### Test data (Normal)

In [7]:
_ = model_accuracy(data_normal_test, 1, mean)

INFO:root:Running detector on <alphai_watson.datasource.Sample object at 0x7ff047d25f60>
INFO:root:Detection completed in 0.4701633285731077


Accuracy: 50.25%


#### Test data (Abnormal)

In [8]:
_ = model_accuracy(data_abnormal_test, 0, mean)

INFO:root:Running detector on <alphai_watson.datasource.Sample object at 0x7ff047d259b0>
INFO:root:Detection completed in 5.389211254194379


Accuracy: 52.16%
