## Load libraries

In [1]:
import os
import numpy as np
from sklearn.metrics import accuracy_score, classification_report

from alphai_watson.performance import GANPerformanceAnalysis
from alphai_watson.transformer import NullTransformer
from alphai_rickandmorty_oracle.datasource.kddcup99 import KDDCup99DataSource
from alphai_rickandmorty_oracle.detective import RickAndMortyDetective
from alphai_rickandmorty_oracle.model_kddcup99 import RickAndMorty

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
DEBUG:matplotlib:CACHEDIR=/home/ubuntu/.cache/matplotlib
DEBUG:matplotlib.font_manager:Using fontManager instance from /home/ubuntu/.cache/matplotlib/fontList.json
DEBUG:matplotlib.backends:backend agg version v2.2


Enabling weight norm
Uppercase local vars:
	BATCH_SIZE: 50
	CRITIC_ITERS: 5
	DEFAULT_FIT_EPOCHS: 1000
	DEFAULT_LEARN_RATE: 0.0001
	DEFAULT_TRAIN_ITERS: 5000
	DEFAULT_Z_DIM: 32
	DIAGNOSIS_LEARN_RATE: 0.01
	DIM: 64
	DISC_FILTER_SIZE: 5
	INIT_KERNEL: <function variance_scaling_initializer.<locals>._initializer at 0x7fc2210c1730>
	LAMBDA: 10
	LAMBDA_2: 2.0
	OUTPUT_DIM: 121


## Define KDDCup99 Datasource

In [2]:
file_path = '../../tests/resources'
data_filename = os.path.join(file_path, 'kddcup.data_10_percent_corrected')
header_filename = os.path.join(file_path, 'kddcup.names')

kdd_datasource = KDDCup99DataSource(source_file=data_filename,
                                    header_file=header_filename,
                                    transformer=NullTransformer(8, 8))

DEBUG:root:Start file parsing.
DEBUG:root:Normal (97278, 122); Train (68095, 121); Test(29183, 121)
DEBUG:root:Abnormal (396743, 121)
DEBUG:root:End file parsing.


In [3]:
data_normal_train = kdd_datasource.get_train_data('NORMAL')
data_normal_test = kdd_datasource.get_train_data('NORMAL_TEST')
data_abnormal_test = kdd_datasource.get_train_data('ABNORMAL_TEST')

## Define Model

In [4]:
model_dir = './kddcup99_models'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

batch_size = 64
output_dimensions = 121
train_iters = 3000
plot_save_path = model_dir


model = RickAndMorty(batch_size=batch_size, 
                     output_dimensions=output_dimensions, 
                     train_iters=train_iters,
                     plot_save_path=plot_save_path)

detective = RickAndMortyDetective(model_configuration={
    'model': model,
    'batch_size': batch_size,
    'output_dimensions': output_dimensions,
    'train_iters': train_iters,
    'save_path' : '{}/KDDCup99-model'.format(model_dir),
    'plot_save_path': plot_save_path
    
})

detective.train(data_normal_train)

DEBUG:root:Starting session
DEBUG:root:Start training loop...
INFO:root:Initialising Model
INFO:root:Training iteration 0 of 3000
DEBUG:matplotlib.font_manager:findfont: Matching :family=sans-serif:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0 to DejaVu Sans ('/opt/anaconda/envs/ai/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf') with score of 0.050000


iter 0	train disc cost	1.4071215391159058	time	0.247755765914917
iter 1	train gen cost	0.6588791608810425	train disc cost	1.3958030939102173	time	0.09719562530517578
iter 2	train gen cost	0.6646692752838135	train disc cost	1.3857131004333496	time	0.06753754615783691
iter 3	train gen cost	0.6699368953704834	train disc cost	1.377500295639038	time	0.06844258308410645
iter 4	train gen cost	0.674412727355957	train disc cost	1.3695532083511353	time	0.06746673583984375


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 100 of 3000


iter 99	train gen cost	1.2602397203445435	train disc cost	0.7529851198196411	time	0.06808638823659796


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 200 of 3000


iter 199	train gen cost	5.943995952606201	train disc cost	0.28247761726379395	time	0.05816848278045654


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 300 of 3000


iter 299	train gen cost	7.217431545257568	train disc cost	0.2805635929107666	time	0.061468570232391356


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 400 of 3000


iter 399	train gen cost	4.52791690826416	train disc cost	0.34277477860450745	time	0.060043869018554685


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 500 of 3000


iter 499	train gen cost	2.278439998626709	train disc cost	0.4412485361099243	time	0.05746912956237793


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 600 of 3000


iter 599	train gen cost	1.4501769542694092	train disc cost	0.9874557256698608	time	0.05820643424987793


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 700 of 3000


iter 699	train gen cost	1.124018907546997	train disc cost	1.0553301572799683	time	0.05476428508758545


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 800 of 3000


iter 799	train gen cost	1.3796292543411255	train disc cost	0.7455384135246277	time	0.05985280752182007


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 900 of 3000


iter 899	train gen cost	1.1100599765777588	train disc cost	0.9758090376853943	time	0.052684993743896485


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1000 of 3000


iter 999	train gen cost	1.010939598083496	train disc cost	0.882219135761261	time	0.06120589017868042


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1100 of 3000


iter 1099	train gen cost	0.9261056780815125	train disc cost	1.1756350994110107	time	0.05135472059249878


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1200 of 3000


iter 1199	train gen cost	0.8139371275901794	train disc cost	1.1128382682800293	time	0.06503406047821045


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1300 of 3000


iter 1299	train gen cost	0.7896631360054016	train disc cost	1.020133376121521	time	0.053377180099487304


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1400 of 3000


iter 1399	train gen cost	0.7670508623123169	train disc cost	1.0484912395477295	time	0.0625006914138794


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1500 of 3000


iter 1499	train gen cost	0.7454116344451904	train disc cost	1.0302425622940063	time	0.05068778991699219


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1600 of 3000


iter 1599	train gen cost	0.816632091999054	train disc cost	1.0425776243209839	time	0.06208821535110474


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1700 of 3000


iter 1699	train gen cost	0.9019127488136292	train disc cost	1.0469155311584473	time	0.05053019285202026


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1800 of 3000


iter 1799	train gen cost	0.9431647658348083	train disc cost	1.0772778987884521	time	0.06223613977432251


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 1900 of 3000


iter 1899	train gen cost	0.9380932450294495	train disc cost	1.0653858184814453	time	0.053418161869049074


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2000 of 3000


iter 1999	train gen cost	0.9487429857254028	train disc cost	1.0839864015579224	time	0.05969647884368896


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2100 of 3000


iter 2099	train gen cost	0.9222937226295471	train disc cost	1.058181881904602	time	0.061387062072753906


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2200 of 3000


iter 2199	train gen cost	0.9785505533218384	train disc cost	1.075264573097229	time	0.057845098972320555


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2300 of 3000


iter 2299	train gen cost	0.9717816710472107	train disc cost	1.0973656177520752	time	0.05955288648605347


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2400 of 3000


iter 2399	train gen cost	0.9702647924423218	train disc cost	1.0874359607696533	time	0.05524017572402954


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2500 of 3000


iter 2499	train gen cost	0.9653502702713013	train disc cost	1.0947058200836182	time	0.060248329639434814


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2600 of 3000


iter 2599	train gen cost	0.9564698934555054	train disc cost	1.1066787242889404	time	0.053198204040527344


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2700 of 3000


iter 2699	train gen cost	0.9452099800109863	train disc cost	1.1007076501846313	time	0.0628880000114441


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2800 of 3000


iter 2799	train gen cost	0.9468779563903809	train disc cost	1.0860507488250732	time	0.05175078868865967


INFO:root:Saving fake samples to png.
INFO:root:Training iteration 2900 of 3000


iter 2899	train gen cost	0.9473578929901123	train disc cost	1.0943915843963623	time	0.06559078454971314


INFO:root:Saving fake samples to png.
DEBUG:root:Training complete.


iter 2999	train gen cost	0.9460355639457703	train disc cost	1.0943360328674316	time	0.053946518898010255


## Evaluate results
***

### Collate ground truth

In [5]:
n1 = np.ones(len(data_normal_test.data))
n2 = np.zeros(len(data_abnormal_test.data))

ground_truth = np.hstack((n1, n2))

### Collate detection results

In [6]:
test_results_normal = detective.detect(data_normal_test).data 
test_results_abnormal = detective.detect(data_abnormal_test).data 

predictions = np.hstack((test_results_normal, test_results_abnormal))

INFO:root:Running detector on <alphai_watson.datasource.Sample object at 0x7fc1daae51d0>
INFO:root:Detection completed in 0.46583758294582367
INFO:root:Running detector on <alphai_watson.datasource.Sample object at 0x7fc1daae5278>
INFO:root:Detection completed in 5.483396312221885


### Calculate ROC Score

In [7]:
roc_score = GANPerformanceAnalysis({}).analyse(
  detection_result=predictions,
  expected_truth=ground_truth
)

print('ROC Score: {}'.format(roc_score))

ROC Score: 0.734019941461691


### Calculate training accuracy

In [8]:
def model_accuracy(data, status, threshold=None):
    results = detective.detect(data).data 
    if threshold is None:
        threshold = np.median(results)
    ground_truth = [status] * len(results)
    prediction = [1 if x >= threshold else 0 for x in results]
    print('Accuracy: {0:.2f}%'.format(100*accuracy_score(ground_truth, prediction)))
    return threshold

In [9]:
threshold = model_accuracy(data_normal_train, 1)

INFO:root:Running detector on <alphai_watson.datasource.Sample object at 0x7fc1daae5208>
INFO:root:Detection completed in 0.9801656883209944


Accuracy: 50.00%


### Generate classification report

In [10]:
class_predictions = [1 if x >= threshold else 0 for x in predictions]

target_names = ['ABNORMAL', 'NORMAL']
print(classification_report(ground_truth, class_predictions, target_names=target_names))

              precision    recall  f1-score   support

    ABNORMAL       0.96      0.93      0.95    396743
      NORMAL       0.34      0.50      0.41     29183

   micro avg       0.90      0.90      0.90    425926
   macro avg       0.65      0.72      0.68    425926
weighted avg       0.92      0.90      0.91    425926

