## Load libraries

In [None]:
import os
import logging

import numpy as np
from sklearn.metrics import accuracy_score, classification_report

from alphai_watson.performance import GANPerformanceAnalysis
from alphai_watson.transformer import NullTransformer
from alphai_rickandmorty_oracle.datasource.kddcup99 import KDDCup99DataSource
from alphai_rickandmorty_oracle.detective import RickAndMortyDetective
from alphai_rickandmorty_oracle.model import RickAndMorty

from alphai_rickandmorty_oracle.networks.kddcup99 import KDDCup99GanArchitecture

logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

## Define KDDCup99 Datasource

In [None]:
file_path = '../../tests/resources'
data_filename = os.path.join(file_path, 'kddcup.data_10_percent_corrected')
header_filename = os.path.join(file_path, 'kddcup.names')

kdd_datasource = KDDCup99DataSource(source_file=data_filename,
                                    header_file=header_filename,
                                    transformer=NullTransformer(8, 8))

data_normal_train = kdd_datasource.get_train_data('NORMAL')
data_normal_test = kdd_datasource.get_train_data('NORMAL_TEST')
data_abnormal_test = kdd_datasource.get_train_data('ABNORMAL_TEST')

## Define Model

In [None]:
model_dir = './kddcup99_models'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

batch_size = 64
output_dimensions = 121
plot_dimensions = (11, 11)
train_iters = 100
plot_save_path = model_dir

architecture = KDDCup99GanArchitecture(output_dimensions, plot_dimensions)

model = RickAndMorty(architecture=architecture,
                     batch_size=batch_size,
                     train_iters=train_iters,
                     plot_save_path=plot_save_path)

detective = RickAndMortyDetective(model_configuration={
    'model': model,
    'batch_size': model.batch_size,
    'output_dimensions': model.architecture.output_dimensions,
    'save_path' : '{}/KDDCup99-model'.format(model_dir),
    'plot_save_path': plot_save_path
    
})

### Train

In [None]:
detective.train(data_normal_train)

## Evaluate results
***

### Collate ground truth

In [None]:
n1 = np.ones(len(data_normal_test.data))
n2 = np.zeros(len(data_abnormal_test.data))

ground_truth = np.hstack((n1, n2))

### Collate detection results

In [None]:
test_results_normal = detective.detect(data_normal_test).data 
test_results_abnormal = detective.detect(data_abnormal_test).data 

predictions = np.hstack((test_results_normal, test_results_abnormal))

### Calculate ROC Score

In [None]:
roc_score = GANPerformanceAnalysis({}).analyse(
  detection_result=predictions,
  expected_truth=ground_truth
)

print('ROC Score: {}'.format(roc_score))

### Calculate training accuracy

In [None]:
def model_accuracy(data, status, threshold=None):
    results = detective.detect(data).data 
    if threshold is None:
        threshold = np.median(results)
    ground_truth = [status] * len(results)
    prediction = [1 if x >= threshold else 0 for x in results]
    print('Accuracy: {0:.2f}%'.format(100*accuracy_score(ground_truth, prediction)))
    return threshold

In [None]:
threshold = model_accuracy(data_normal_train, 1)

### Generate classification report

In [None]:
class_predictions = [1 if x >= threshold else 0 for x in predictions]

target_names = ['ABNORMAL', 'NORMAL']
print(classification_report(ground_truth, class_predictions, target_names=target_names))