In [2]:
import os
import pandas as pd
import numpy as np
from alphai_watson.datasource.brainwaves import BrainwavesDataSource
from alphai_rickandmorty_oracle.datasource.kddcup99 import KDDCup99DataSource

from alphai_watson.performance import GANPerformanceAnalysis
from alphai_watson.transformer import NullTransformer

from alphai_rickandmorty_oracle.detective import RickAndMortyDetective

In [3]:
file_path = '../../tests/resources'
data_filename = os.path.join(file_path, 'kddcup.data_10_percent_corrected')
header_filename = os.path.join(file_path, 'kddcup.names')


data = pd.read_csv(data_filename, header=None)
header = pd.read_csv(header_filename, delimiter=':', skiprows=1, header=None)
header.columns = ['column', 'column_type']

data.columns = header.column.tolist() + ['attack']
data['attack'] = data['attack'].str.replace('.', '')
data['label'] = 1
data.loc[data['attack'] == 'normal', 'label'] = 0

symbolic_columns = header.loc[header.column_type == ' symbolic.'].column.tolist()
# print(symbolic_columns)

for scol in symbolic_columns:
    data[scol] = pd.Categorical(data[scol])
    one_hot_cols = pd.get_dummies(data[scol], prefix=scol)
    data = pd.concat([data, one_hot_cols], axis=1)

data = data.drop(columns=symbolic_columns)
data = data.drop(columns=['attack'])

# data.loc[data.attack != 'normal' , ['attack', 'label']].head(20)

data_normal = data.loc[data['label'] == 0]
data_abnormal = data.loc[data['label'] == 1]

data_normal_train = data_normal.sample(frac=0.7)
data_normal_test = data_normal.loc[~data_normal.index.isin(data_normal_train.index)]

data_test = pd.concat([data_normal_test, data_abnormal])

print(data_normal.shape, data_normal_train.shape, data_normal_test.shape)

data_normal_train = data_normal_train.drop(columns=['label'])
save_filename = os.path.join(file_path, 'kddcup99_10_percent_normal.csv')
data_normal_train.to_csv(save_filename, header=False, index=False)


(97278, 122) (68095, 122) (29183, 122)


In [14]:
data_normal_train.head(5)

Unnamed: 0,duration,src_bytes,dst_bytes,wrong_fragment,urgent,hot,num_failed_logins,num_compromised,root_shell,su_attempted,...,flag_S3,flag_SF,flag_SH,land_0,land_1,logged_in_0,logged_in_1,is_host_login_0,is_guest_login_0,is_guest_login_1
456559,0,105,146,0,0,0,0,0,0,0,...,0,1,0,1,0,1,0,1,1,0
345102,0,105,145,0,0,0,0,0,0,0,...,0,1,0,1,0,1,0,1,1,0
76900,0,1250,327,0,0,0,0,0,0,0,...,0,1,0,1,0,0,1,1,1,0
107285,0,207,3050,0,0,0,0,0,0,0,...,0,1,0,1,0,0,1,1,1,0
89801,0,224,3124,0,0,0,0,0,0,0,...,0,1,0,1,0,0,1,1,1,0


In [6]:
kdd_datasource = KDDCup99DataSource(source_file=save_filename, 
                                    transformer=NullTransformer(8, 8))

kdd_data = kdd_datasource.get_train_data('NORMAL')

DEBUG:root:Start file parsing.
DEBUG:root:End file parsing.


In [84]:
test_data_file = os.path.join('../../tests/resources', 'brainwaves_normal_sample_1.hd5')
n_sensors = 16
n_timesteps = 784 // n_sensors

train_data_source = BrainwavesDataSource(source_file=test_data_file,
                                         transformer=NullTransformer(number_of_timesteps=n_timesteps, 
                                                                     number_of_sensors=n_sensors))

train_data = train_data_source.get_train_data('NORMAL')

train_data.data


DEBUG:root:Start file parsing
DEBUG:root:end file parsing


array([[[  0,  13,  15, ...,  26,  32,  34],
        [-26,  -4,  11, ...,  30,  44,  49],
        [-42, -35, -19, ...,  11,   5,   2],
        ...,
        [ 45,  30,  25, ...,  26,  17,   6],
        [ 15,   8,  -6, ...,  -9, -14, -27],
        [-18, -20, -19, ..., -20, -22, -32]],

       [[ 28,  38,  44, ..., -63, -58, -44],
        [ 54,  47,  31, ..., -27, -17,  -8],
        [  5,   7,   8, ...,  40,  33,  20],
        ...,
        [  2,   4,  -3, ...,  28,  22,  22],
        [-36, -38, -39, ...,  -8,  -9,  -7],
        [-32, -25, -30, ..., -13, -11,  -4]],

       [[-23, -14, -21, ..., -10, -12, -23],
        [ -3,   0,  -4, ...,  -3, -10,  -7],
        [ 16,  14,  22, ...,   4,   6,  14],
        ...,
        [ 16,  16,  21, ...,  24,  27,  36],
        [ -7,  -3,   1, ...,  11,   2,  -2],
        [ -3,  -3,  -2, ..., -13, -19, -22]],

       ...,

       [[ 30,  15,  11, ...,  11,  10,   0],
        [ 12,  -3,  -8, ...,  17,   0, -12],
        [-17, -17, -11, ...,   7,   6,   4

In [6]:
train_data.data.shape

(29970, 16, 8)

In [None]:
detective = RickAndMortyDetective(model_configuration={
    'batch_size': 64,
    'output_dimensions': 784,
    'train_iters': 200,
    'plot_save_path' : './'
})

detective.train(train_data)