In [1]:
!pip install tensorflow-addons


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.1.2[0m[39;49m -> [0m[32;49m22.2.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, make_scorer, f1_score, roc_auc_score
import warnings
import tensorflow_addons as tfa
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv("./attack_dataset.csv") # attack dataset
attack_classes = pd.read_csv("./attack_labels.csv") # labels
bonafide = pd.read_csv('./bonafide_dataset_20191121.csv.gz') # bonafide dataset

df_labeled = df.merge(attack_classes, how='inner', left_on='ip.src', right_on='ip')
df_labeled.drop(['ip'], axis=1, inplace=True)
df_labeled.head(2)

bonafide['label'] = "bonafide" # create column label on bonafide dataset

if (df_labeled.columns == bonafide.columns).all():
    examples_attack = df_labeled.shape[0]
    examples_bonafide = bonafide.shape[0]
    total = examples_attack+examples_bonafide
    print('Total examples of {0} with {1:0.2f} of attack and {2:0.2f} bonafide packets'.format(total, examples_attack/total, examples_bonafide/total))
    
fields = ['eth.type', 'ip.id', 'ip.flags', 'ip.checksum', 'ip.dsfield', 'tcp.flags', 'tcp.checksum']

for field in fields:
    df_labeled[field] = df_labeled[field].apply(lambda x: int(str(x), 16))
    
bonafide = bonafide.fillna(0)
for field in fields:
    bonafide[field] = bonafide[field].apply(lambda x: int(str(x), 16))
    
full_data = pd.concat([bonafide, df_labeled])

wrong_proto = full_data[full_data['ip.proto'] != 6]['label'].value_counts().values
full_data = full_data[full_data['ip.proto'] == 6]

full_data.drop(columns=['frame_info.time', 'frame_info.encap_type', 'frame_info.time_epoch', 'frame_info.number', 
                        'frame_info.len', 'frame_info.cap_len', 'eth.type', 'ip.flags', 'ip.src', 'ip.dst',
                        'ip.version', 'ip.proto', 'tcp.flags'], axis=1, inplace=True)

# remove columns with zero variance
full_data.drop(columns=['ip.hdr_len', 'ip.tos', 'ip.flags.rb', 
                        'ip.flags.mf', 'ip.frag_offset'], axis=1, inplace=True)

# It is removed ttl because previous attempt shows that it is learning the LAN architecture TTL=62
# (from scan tools TTL=64 minus 2 routers in the infrastructure)

# sequence, checksum and acknowledge features because they are random

# removed source and destination ports to be agnostic regarding the service ports

# removed tcp.options.mss_val because it is dificult to be retrieved as LKM

# removed tcp.window_size to allow running iperf on ESP32 and to reduce false positives

full_data.drop(columns=["ip.checksum", "ip.ttl", "tcp.checksum", "tcp.dstport", "tcp.seq", "tcp.srcport", 
                        "tcp.ack", "tcp.options.mss_val", "tcp.window_size"], axis=1, inplace=True)

full_data.info()

Total examples of 140163 with 0.26 of attack and 0.74 bonafide packets
<class 'pandas.core.frame.DataFrame'>
Int64Index: 128455 entries, 1 to 37068
Data columns (total 15 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   ip.id               128455 non-null  int64  
 1   ip.flags.df         128455 non-null  float64
 2   ip.len              128455 non-null  float64
 3   ip.dsfield          128455 non-null  int64  
 4   tcp.len             128455 non-null  float64
 5   tcp.hdr_len         128455 non-null  float64
 6   tcp.flags.fin       128455 non-null  float64
 7   tcp.flags.syn       128455 non-null  float64
 8   tcp.flags.reset     128455 non-null  float64
 9   tcp.flags.push      128455 non-null  float64
 10  tcp.flags.ack       128455 non-null  float64
 11  tcp.flags.urg       128455 non-null  float64
 12  tcp.flags.cwr       128455 non-null  float64
 13  tcp.urgent_pointer  128455 non-null  float64
 14  label         

In [4]:
full_data.label[full_data.label == "bonafide"] = 0 # convert bonafide label to 0
full_data.label[full_data.label != 0] = 1 # convert attack labels to 1
full_data['label'].value_counts()

full_data = full_data.fillna(0)
X = full_data.drop(columns = ["label"])
y = full_data.label

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, stratify=y_train)

In [5]:
class Sign(tf.keras.layers.Layer):
  def __init__(self):
    super(Sign, self).__init__()
    
  def call(self, inputs):
    return tf.sign(inputs)


In [6]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(1, input_shape=(x_train.shape[1],), use_bias=True, kernel_regularizer=tf.keras.regularizers.l2(0.01))
])
model.compile(loss='hinge',
              optimizer='adadelta',
              metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 15        
                                                                 
Total params: 15
Trainable params: 15
Non-trainable params: 0
_________________________________________________________________


2022-08-15 16:10:41.254111: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-15 16:10:41.397483: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2022-08-15 16:10:41.397502: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2022-08-15 16:10:41.398396: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN

In [7]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train,y_train)).batch(256)
val_ds = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(1024)

In [11]:
history = model.fit(train_ds, validation_data=val_ds, epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [14]:
mlp_pred = (model.predict(x_test) > 0.5).astype(np.int32)
print(f1_score(y_test, mlp_pred))
print(tf.keras.metrics.Accuracy()(y_test, mlp_pred))

0.34097484976630316
tf.Tensor(0.76949126, shape=(), dtype=float32)


In [10]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model.
with open('svm.tflite', 'wb') as f:
  f.write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpzd707o0_/assets


2022-08-15 16:42:40.520554: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-08-15 16:42:40.520609: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-08-15 16:42:40.524022: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /tmp/tmpzd707o0_
2022-08-15 16:42:40.526103: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-08-15 16:42:40.526168: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /tmp/tmpzd707o0_
2022-08-15 16:42:40.535013: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2022-08-15 16:42:40.537224: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-08-15 16:42:40.612761: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /tmp/tmpzd707o0_
2022-08