In [2]:
import pandas
from tensorflow.keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from keras import backend as K

In [3]:
# load dataset
dataframe = pandas.read_csv("unsw-nb15/test2000.csv", header=None)
dataset = dataframe.values
X = dataset[:,0:39].astype(float)
Y = dataset[:,39]
Y[0]

'Normal'

In [4]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
print(encoded_Y[0])
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

class_list = list(encoder.classes_)
ind_list = list(encoder.transform(class_list))
class_list

5


['Analysis',
 'Backdoor',
 'DoS',
 'Exploits',
 'Fuzzers',
 'Normal',
 'Reconnaissance',
 'Shellcode',
 'Worms']

In [5]:
# define baseline model
def baseline_model():
	# create model
	model = Sequential()
	model.add(Dense(20, input_dim=39, activation='relu'))
	#model.add(Dense(10, activation='relu'))
	# model.add(Dense(22, activation='relu'))
	model.add(Dense(9, activation='softmax'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	# set learning rate	
	K.set_value(model.optimizer.learning_rate, 0.01)
	return model

In [18]:
estimator = KerasClassifier(build_fn=baseline_model, epochs=100, batch_size=500, verbose=0)
kfold = KFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 42.30% (5.14%)


In [21]:
estimator.fit(X, dummy_y, epochs=100, batch_size=500, verbose=0)


<tensorflow.python.keras.callbacks.History at 0x7f85ca1ec880>

In [52]:
dataframe_test = pandas.read_csv("unsw-nb15/multi_test.csv", header=None)
dataset_test = dataframe_test.values
print(dataset_test[0])


[1.21478000e-01 6.00000000e+00 4.00000000e+00 2.58000000e+02
 1.72000000e+02 7.40874900e+01 2.52000000e+02 2.54000000e+02
 1.41589424e+04 8.49536523e+03 0.00000000e+00 0.00000000e+00
 2.42956000e+01 8.37500000e+00 3.01775470e+01 1.18306040e+01
 2.55000000e+02 6.21772692e+08 2.20253363e+09 2.55000000e+02
 0.00000000e+00 0.00000000e+00 0.00000000e+00 4.30000000e+01
 4.30000000e+01 0.00000000e+00 0.00000000e+00 1.00000000e+00
 0.00000000e+00 1.00000000e+00 1.00000000e+00 1.00000000e+00
 1.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 1.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+00]


In [53]:
X_test = dataset_test[:,0:39].astype(float)
Y_test = dataset_test[:,39].astype(int)
Y_test[0]

5

In [57]:
y_pred_keras = (estimator.predict(X_test) > 0.5).astype("int32").ravel()
counter = 0
counter_acc = 0
counter_miss = 0
print(y_pred_keras[0])
for i in range(len(y_pred_keras)):
    if (y_pred_keras[i] != 5 & Y_test[i] == 5):
        counter += 1
    if (y_pred_keras[i] == 5 & Y_test[i] != 5):
        counter_miss += 1
    if (y_pred_keras[i] == Y_test[i]):
        counter_acc += 1
    
fpr = counter/len(y_pred_keras)
miss = counter_miss/len(y_pred_keras)
print("False Positive Rate: " + str(fpr))
print("Miss Rate: " + str(miss))
print("Accuracy: " + str(1 - miss - fpr))

1
False Positive Rate: 0.3258393644384371
Miss Rate: 0.2004037846253871
Accuracy: 0.4737568509361758
