In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.neural_network import MLPClassifier
import joblib
%matplotlib inline

In [3]:
# Loading Dataset
traffic = pd.read_csv('Mirai_dataset.csv', sep=',')

In [4]:
# seeing info for the file
traffic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 764136 entries, 0 to 764135
Columns: 117 entries, null to attack
dtypes: float64(115), int64(2)
memory usage: 682.1 MB


In [5]:
# Drop out all data except output set
X = traffic.drop(columns=["attack"])
# data without the Attack type
X

Unnamed: 0,null,null.1,null.2,null.3,null.4,null.5,null.6,null.7,null.8,null.9,...,null.106,null.107,null.108,null.109,null.110,null.111,null.112,null.113,null.114,null.115
0,1,1.999983,60.000000,0.000000e+00,1.999990,60.000000,9.094947e-13,1.999997,60.000000,4.547474e-13,...,0.000000e+00,0.0,0.0,1.000000,60.0,0.000000,60.000000,0.000000e+00,0.0,0.0
1,2,1.000000,86.000000,0.000000e+00,1.000000,86.000000,0.000000e+00,1.000000,86.000000,0.000000e+00,...,0.000000e+00,0.0,0.0,1.000000,86.0,0.000000,86.000000,0.000000e+00,0.0,0.0
2,3,1.999272,86.000000,9.094947e-13,1.999563,86.000000,0.000000e+00,1.999854,86.000000,9.094947e-13,...,0.000000e+00,0.0,0.0,1.000000,86.0,0.000000,86.000000,0.000000e+00,0.0,0.0
3,4,1.000000,60.000000,0.000000e+00,1.000000,60.000000,0.000000e+00,1.000000,60.000000,0.000000e+00,...,0.000000e+00,0.0,0.0,1.000000,60.0,0.000000,60.000000,0.000000e+00,0.0,0.0
4,5,1.000000,74.000000,0.000000e+00,1.000000,74.000000,0.000000e+00,1.000000,74.000000,0.000000e+00,...,0.000000e+00,0.0,0.0,1.000000,74.0,0.000000,74.000000,0.000000e+00,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
764131,764132,18.897961,60.185845,3.594147e+00,51.700968,60.182642,3.492246e+00,257.391336,60.137137,2.455690e+00,...,5.456968e-12,0.0,0.0,30682.136500,60.0,0.000004,60.000000,1.546141e-11,0.0,0.0
764132,764133,19.775173,60.176447,3.414055e+00,52.499151,60.179163,3.426350e+00,258.055986,60.136606,2.446246e+00,...,4.547474e-12,0.0,0.0,30682.736491,60.0,0.000004,60.000000,1.500666e-11,0.0,0.0
764133,764134,20.692078,60.167920,3.250494e+00,53.366680,60.175805,3.362736e+00,258.838753,60.136078,2.436867e+00,...,5.002221e-12,0.0,0.0,30683.478094,60.0,0.000004,60.000000,1.546141e-11,0.0,0.0
764134,764135,21.566383,60.160134,3.101020e+00,54.171935,60.172560,3.301221e+00,259.523519,60.135554,2.427549e+00,...,5.002221e-12,0.0,0.0,30684.104181,60.0,0.000004,60.000000,1.591616e-11,0.0,0.0


In [6]:
# set y to only be the attack
y = traffic['attack']
y

0         0
1         0
2         0
3         0
4         0
         ..
764131    1
764132    1
764133    1
764134    1
764135    1
Name: attack, Length: 764136, dtype: int64

In [7]:
# Split the data out into training and testing sets
#  everytime this is ran, we get a different training set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# setting the model to tbe a Decision tree
nn_classifier = MLPClassifier(solver='sgd', random_state=1, max_iter=1200, alpha=1e-4)

# test the model with the input and output data
nn_classifier.fit(X_train, y_train)
predictions = nn_classifier.predict(X_test)

# test the accuracy of the data
score = accuracy_score(y_test, predictions)
# print Score value
print(f"The accuracy score is: {score}")

The accuracy score is: 0.8414201648047251


In [8]:
# Seeing the f1_measure
f_measure = f1_score(y_test, predictions, average='weighted')
print(f"The f-measure is: {f_measure}")

The f-measure is: 0.7689585541332332


In [9]:
# Split the data out into training and testing sets
#  everytime this is ran, we get a different training set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# setting the model to tbe a Decision tree
nn_classifier = MLPClassifier(solver='adam', random_state=1, max_iter=1200, alpha=1e-4)

# test the model with the input and output data
nn_classifier.fit(X_train, y_train)
predictions = nn_classifier.predict(X_test)

# test the accuracy of the data
score = accuracy_score(y_test, predictions)
# print Score value
print(f"The accuracy score is: {score}")

The accuracy score is: 0.884523778365221


In [10]:
# Seeing the f1_measure
f_measure = f1_score(y_test, predictions, average='weighted')
print(f"The f-measure is: {f_measure}")

The f-measure is: 0.857378272529025


In [12]:
# using joblib to store the model
joblib.dump(nn_classifier, "mirai_attack_NN_classifier.joblib")

['mirai_attack_NN_classifier.joblib']

In [13]:
# using the joblib file to recall model
nn_model = joblib.load("mirai_attack_NN_classifier.joblib")
predictions = nn_model.predict(X_test)
score = accuracy_score(y_test, predictions)
score

0.884523778365221