In [1]:
import os
import pandas as pd
import numpy as np
from alphai_watson.datasource.brainwaves import BrainwavesDataSource
from alphai_rickandmorty_oracle.datasource.kddcup99 import KDDCup99DataSource

from alphai_watson.performance import GANPerformanceAnalysis
from alphai_watson.transformer import NullTransformer

from alphai_rickandmorty_oracle.detective import RickAndMortyDetective

  from ._conv import register_converters as _register_converters
  return f(*args, **kwds)


Enabling weight norm
Uppercase local vars:
	BATCH_SIZE: 50
	CRITIC_ITERS: 5
	DEFAULT_FIT_EPOCHS: 1000
	DEFAULT_LEARN_RATE: 0.0001
	DEFAULT_TRAIN_ITERS: 5000
	DEFAULT_Z_DIM: 128
	DIAGNOSIS_LEARN_RATE: 0.01
	DIM: 64
	DISC_FILTER_SIZE: 5
	LAMBDA: 10
	LAMBDA_2: 2.0
	OUTPUT_DIM: 784


In [2]:
file_path = '../../tests/resources'
data_filename = os.path.join(file_path, 'kddcup.data_10_percent_corrected')
header_filename = os.path.join(file_path, 'kddcup.names')


data = pd.read_csv(data_filename, header=None)
header = pd.read_csv(header_filename, delimiter=':', skiprows=1, header=None)
header.columns = ['column', 'column_type']

data.columns = header.column.tolist() + ['attack']
data['attack'] = data['attack'].str.replace('.', '')
data['label'] = 1
data.loc[data['attack'] == 'normal', 'label'] = 0

symbolic_columns = header.loc[header.column_type == ' symbolic.'].column.tolist()
# print(symbolic_columns)

for scol in symbolic_columns:
    data[scol] = pd.Categorical(data[scol])
    one_hot_cols = pd.get_dummies(data[scol], prefix=scol)
    data = pd.concat([data, one_hot_cols], axis=1)

data = data.drop(columns=symbolic_columns)
data = data.drop(columns=['attack'])

# data.loc[data.attack != 'normal' , ['attack', 'label']].head(20)

data_normal = data.loc[data['label'] == 0]
data_abnormal = data.loc[data['label'] == 1]

data_normal_train = data_normal.sample(frac=0.7)
data_normal_test = data_normal.loc[~data_normal.index.isin(data_normal_train.index)]

print(data_normal.shape, data_normal_train.shape, data_normal_test.shape)

data_normal_train = data_normal_train.drop(columns=['label'])
data_normal_test = data_normal_test.drop(columns=['label'])
data_abnormal = data_abnormal.drop(columns=['label'])

save_filename_1 = os.path.join(file_path, 'kddcup99_10_percent_normal.csv')
save_filename_2 = os.path.join(file_path, 'kddcup99_10_percent_normal_test.csv')
save_filename_3 = os.path.join(file_path, 'kddcup99_10_percent_abnormal_test.csv')

data_normal_train.to_csv(save_filename_1, header=False, index=False)
data_normal_test.to_csv(save_filename_2, header=False, index=False)
data_abnormal.to_csv(save_filename_3, header=False, index=False)

(97278, 122) (68095, 122) (29183, 122)


In [3]:
kdd_datasource = KDDCup99DataSource(source_file=save_filename_2, 
                                    transformer=NullTransformer(8, 8))

kdd_data = kdd_datasource.get_train_data('NORMAL')

DEBUG:root:Start file parsing.
DEBUG:root:End file parsing.


In [4]:
kdd_data.data

array([[0.000e+00, 2.390e+02, 4.860e+02, ..., 1.000e+00, 1.000e+00,
        0.000e+00],
       [0.000e+00, 2.100e+02, 1.510e+02, ..., 1.000e+00, 1.000e+00,
        0.000e+00],
       [0.000e+00, 2.410e+02, 2.590e+02, ..., 1.000e+00, 1.000e+00,
        0.000e+00],
       ...,
       [0.000e+00, 2.890e+02, 2.440e+02, ..., 1.000e+00, 1.000e+00,
        0.000e+00],
       [0.000e+00, 2.910e+02, 1.200e+03, ..., 1.000e+00, 1.000e+00,
        0.000e+00],
       [0.000e+00, 2.190e+02, 1.234e+03, ..., 1.000e+00, 1.000e+00,
        0.000e+00]])