# Intrusion Detection System
-----------------

## Load Preprocessed Data

In [12]:
import pandas as pd

# load dataset
df = pd.read_csv('../data/preprocessed/binary_standard_combined.csv')
# rename column '0' to ' Label'
df = df.rename(columns={'0': ' Label'})
df.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,2.561161,-0.439612,-0.009825,-0.010426,-0.053793,-0.007145,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0.0
1,2.571503,-0.439609,-0.011158,-0.009424,-0.054393,-0.007143,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0.0
2,2.571558,-0.439611,-0.011158,-0.009424,-0.054393,-0.007143,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0.0
3,2.088968,-0.439612,-0.011158,-0.009424,-0.054393,-0.007143,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0.0
4,2.561052,-0.439612,-0.009825,-0.010426,-0.053793,-0.007145,-0.281253,-0.210906,-0.280686,-0.245193,...,0.002547,-0.125799,-0.104619,-0.149404,-0.101068,-0.352126,-0.109516,-0.357072,-0.339184,0.0


In [14]:
df.shape

(2830743, 79)

## Create Small Dataset

In [80]:
# create df with N samples of each class which holds at least N samples
N = 2000
small_df = pd.DataFrame()
for label in df[' Label'].unique():
    if df[df[' Label'] == label].shape[0] >= N:
        small_df = pd.concat([small_df, df[df[' Label'] == label].sample(N)], ignore_index=True)
    else:
        small_df = pd.concat([small_df, df[df[' Label'] == label]], ignore_index=True)

print(small_df[' Label'].value_counts())
print('Shape: ', small_df.shape)

 Label
0    2000
1    2000
Name: count, dtype: int64
Shape:  (4000, 79)


## Split Data

In [7]:
from sklearn.model_selection import train_test_split

small_df = df

# split data
X = small_df.drop(columns=[' Label'])
y = small_df[' Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

## Model Creation

In [9]:
# Create DNN model from tensorflow
import setuptools.dist
import tensorflow as tf
from tensorflow import keras

# define the keras model
model = keras.Sequential()
model.add(keras.layers.Dense(40, input_dim=X_train.shape[1], activation='relu')) # hidden layer
model.add(keras.layers.Dense(40, activation='relu')) # hidden layer
model.add(keras.layers.Dense(20, activation='relu')) # hidden layer
model.add(keras.layers.Dense(1, activation='sigmoid')) # output layer
# set learning rate
opt = keras.optimizers.Adam(learning_rate=0.01)
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [10]:
# fit the keras model on the dataset
model.fit(X_train, y_train, epochs=10, batch_size=100)

Epoch 1/10
[1m28308/28308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 2ms/step - accuracy: 0.8018 - loss: nan
Epoch 2/10
[1m28308/28308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 2ms/step - accuracy: 0.8020 - loss: nan
Epoch 3/10
[1m 5189/28308[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m56s[0m 2ms/step - accuracy: 0.8022 - loss: nan

KeyboardInterrupt: 

In [92]:
# evaluate the keras model
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9260 - loss: 5563.6104
Accuracy: 93.33
