In [1]:
!pip install tensorflow-addons


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.1.2[0m[39;49m -> [0m[32;49m22.2.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, make_scorer, f1_score, roc_auc_score
import warnings
import tensorflow_addons as tfa
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv("./attack_dataset.csv") # attack dataset
attack_classes = pd.read_csv("./attack_labels.csv") # labels
bonafide = pd.read_csv('./bonafide_dataset_20191121.csv.gz') # bonafide dataset

df_labeled = df.merge(attack_classes, how='inner', left_on='ip.src', right_on='ip')
df_labeled.drop(['ip'], axis=1, inplace=True)
df_labeled.head(2)

bonafide['label'] = "bonafide" # create column label on bonafide dataset

if (df_labeled.columns == bonafide.columns).all():
    examples_attack = df_labeled.shape[0]
    examples_bonafide = bonafide.shape[0]
    total = examples_attack+examples_bonafide
    print('Total examples of {0} with {1:0.2f} of attack and {2:0.2f} bonafide packets'.format(total, examples_attack/total, examples_bonafide/total))
    
fields = ['eth.type', 'ip.id', 'ip.flags', 'ip.checksum', 'ip.dsfield', 'tcp.flags', 'tcp.checksum']

for field in fields:
    df_labeled[field] = df_labeled[field].apply(lambda x: int(str(x), 16))
    
bonafide = bonafide.fillna(0)
for field in fields:
    bonafide[field] = bonafide[field].apply(lambda x: int(str(x), 16))
    
full_data = pd.concat([bonafide, df_labeled])

wrong_proto = full_data[full_data['ip.proto'] != 6]['label'].value_counts().values
full_data = full_data[full_data['ip.proto'] == 6]

full_data.drop(columns=['frame_info.time', 'frame_info.encap_type', 'frame_info.time_epoch', 'frame_info.number', 
                        'frame_info.len', 'frame_info.cap_len', 'eth.type', 'ip.flags', 'ip.src', 'ip.dst',
                        'ip.version', 'ip.proto', 'tcp.flags'], axis=1, inplace=True)

# remove columns with zero variance
full_data.drop(columns=['ip.hdr_len', 'ip.tos', 'ip.flags.rb', 
                        'ip.flags.mf', 'ip.frag_offset'], axis=1, inplace=True)

# It is removed ttl because previous attempt shows that it is learning the LAN architecture TTL=62
# (from scan tools TTL=64 minus 2 routers in the infrastructure)

# sequence, checksum and acknowledge features because they are random

# removed source and destination ports to be agnostic regarding the service ports

# removed tcp.options.mss_val because it is dificult to be retrieved as LKM

# removed tcp.window_size to allow running iperf on ESP32 and to reduce false positives

full_data.drop(columns=["ip.checksum", "ip.ttl", "tcp.checksum", "tcp.dstport", "tcp.seq", "tcp.srcport", 
                        "tcp.ack", "tcp.options.mss_val", "tcp.window_size"], axis=1, inplace=True)

full_data.info()

Total examples of 140163 with 0.26 of attack and 0.74 bonafide packets
<class 'pandas.core.frame.DataFrame'>
Int64Index: 128455 entries, 1 to 37068
Data columns (total 15 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   ip.id               128455 non-null  int64  
 1   ip.flags.df         128455 non-null  float64
 2   ip.len              128455 non-null  float64
 3   ip.dsfield          128455 non-null  int64  
 4   tcp.len             128455 non-null  float64
 5   tcp.hdr_len         128455 non-null  float64
 6   tcp.flags.fin       128455 non-null  float64
 7   tcp.flags.syn       128455 non-null  float64
 8   tcp.flags.reset     128455 non-null  float64
 9   tcp.flags.push      128455 non-null  float64
 10  tcp.flags.ack       128455 non-null  float64
 11  tcp.flags.urg       128455 non-null  float64
 12  tcp.flags.cwr       128455 non-null  float64
 13  tcp.urgent_pointer  128455 non-null  float64
 14  label         

In [4]:
full_data.label[full_data.label == "bonafide"] = 0 # convert bonafide label to 0
full_data.label[full_data.label != 0] = 1 # convert attack labels to 1
full_data['label'].value_counts()

full_data = full_data.fillna(0)
X = full_data.drop(columns = ["label"])
y = full_data.label

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, stratify=y_train)

In [5]:
class Sign(tf.keras.layers.Layer):
  def __init__(self):
    super(Sign, self).__init__()
    
  def call(self, inputs):
    return tf.sign(inputs)


In [6]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(1, input_shape=(x_train.shape[1],), use_bias=True, kernel_regularizer=tf.keras.regularizers.l2(0.01))
])
model.compile(loss='hinge',
              optimizer='adadelta',
              metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 15        
                                                                 
Total params: 15
Trainable params: 15
Non-trainable params: 0
_________________________________________________________________


2022-08-15 16:10:41.254111: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-15 16:10:41.397483: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2022-08-15 16:10:41.397502: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2022-08-15 16:10:41.398396: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN

In [7]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train,y_train)).batch(256)
val_ds = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(1024)

In [None]:
history = model.fit(train_ds, validation_data=val_ds, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [None]:
mlp_pred = (model.predict(x_test) > 0.5).astype(np.int32)
print(f1_score(y_test, mlp_pred))

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model.
with open('svm.tflite', 'wb') as f:
  f.write(tflite_model)