### Model Creation

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
tf.test.is_built_with_cuda()

True

In [5]:
dataset = pd.read_csv("./MachineLearningCVE/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")
feature_list = dataset.columns.values

In [6]:
dataset.shape  
dataset = dataset.replace(np.inf,np.nan)    # replacing inf with nan
dataset = dataset.fillna(dataset.mean(numeric_only=True)) # ghen converting nan to mean values

In [7]:
X = dataset.iloc[:,:-1].values
print("shape of X",X.shape)

Y = dataset.iloc[:,-1].values
print("shape of Y",Y.shape)

shape of X (225745, 78)
shape of Y (225745,)


In [8]:
print("is NaN present:",np.any(np.isnan(X)))  # to check whether the array contains nan
print("is inf present:",np.any(np.isinf(X)))  # to check whether the array contains inf
X[X < 0] = 0   # to replace all negative values with zeros

is NaN present: False
is inf present: False


In [9]:
from sklearn.feature_selection import SelectKBest  # feature selection
from sklearn.feature_selection import chi2
from sklearn.feature_selection import mutual_info_classif

In [10]:
bestfeatures = SelectKBest(score_func = mutual_info_classif, k=50)
fit = bestfeatures.fit(X,Y)
#create df for scores
dfscores = pd.DataFrame(fit.scores_)
#create df for column names
dfcolumns = pd.DataFrame(feature_list)

#concat two dataframes for better visualization 
featureScores = pd.concat([dfcolumns,dfscores],axis=1)

#naming the dataframe columns
featureScores.columns = ['Selected_columns','Score_chi2']  
#print 50 best features
print(featureScores.nlargest(50,'Score_chi2')) 


                Selected_columns  Score_chi2
4    Total Length of Fwd Packets    0.652296
63             Subflow Fwd Bytes    0.652015
52           Average Packet Size    0.563777
5    Total Length of Bwd Packets    0.548783
65             Subflow Bwd Bytes    0.547825
54          Avg Bwd Segment Size    0.544711
12        Bwd Packet Length Mean    0.544562
55           Fwd Header Length.1    0.543643
34             Fwd Header Length    0.543565
0               Destination Port    0.538125
10         Bwd Packet Length Max    0.529737
66        Init_Win_bytes_forward    0.499228
8         Fwd Packet Length Mean    0.490620
53          Avg Fwd Segment Size    0.489939
6          Fwd Packet Length Max    0.486429
35             Bwd Header Length    0.476930
23                   Fwd IAT Max    0.460175
20                 Fwd IAT Total    0.457232
21                  Fwd IAT Mean    0.447460
62           Subflow Fwd Packets    0.410725
2              Total Fwd Packets    0.410311
22        

In [11]:
# print(featureScores.nlargest(50,'Score_chi2').Selected_columns.values)
featureScore_after_filter = featureScores.nlargest(50,'Score_chi2')
print(featureScore_after_filter.index[0])
count = 0
ind = []
for i in featureScore_after_filter.Score_chi2:
    if i < 0.2:
        ind.append(featureScore_after_filter.index[count])
    count = count + 1
featureScore_after_filter = featureScore_after_filter.drop(ind,axis = 0)  # contains all the filtered features
X = pd.DataFrame(X)
X = X.loc[:,featureScore_after_filter.index] # contains data after filter from feature selection
print(X)

4
          4     63   52   5    65   54   12    55    34       0   ...  \
0       12.0  12.0  9.0  0.0  0.0  0.0  0.0  40.0  40.0  54865.0  ...   
1        6.0   6.0  9.0  6.0  6.0  6.0  6.0  20.0  20.0  55054.0  ...   
2        6.0   6.0  9.0  6.0  6.0  6.0  6.0  20.0  20.0  55055.0  ...   
3        6.0   6.0  9.0  6.0  6.0  6.0  6.0  20.0  20.0  46236.0  ...   
4       12.0  12.0  9.0  0.0  0.0  0.0  0.0  40.0  40.0  54863.0  ...   
...      ...   ...  ...  ...  ...  ...  ...   ...   ...      ...  ...   
225740   6.0   6.0  9.0  6.0  6.0  6.0  6.0  20.0  20.0  61374.0  ...   
225741   6.0   6.0  9.0  6.0  6.0  6.0  6.0  20.0  20.0  61378.0  ...   
225742   6.0   6.0  9.0  6.0  6.0  6.0  6.0  20.0  20.0  61375.0  ...   
225743  12.0  12.0  9.0  0.0  0.0  0.0  0.0  40.0  40.0  61323.0  ...   
225744   6.0   6.0  9.0  6.0  6.0  6.0  6.0  20.0  20.0  61326.0  ...   

                   36   39     1             14   11     18   17  \
0       666666.666700  6.0    3.0  4.000000e+06  0.0 

In [12]:
from sklearn.preprocessing import LabelEncoder
labelencoder_y= LabelEncoder()  
Y = labelencoder_y.fit_transform(Y)  

In [13]:
from sklearn.model_selection import train_test_split  
x_train, x_test, y_train, y_test= train_test_split(X, Y, test_size= 0.2, random_state=0)

In [14]:
from sklearn.preprocessing import StandardScaler  # scaling of the data

scaler_X = StandardScaler()
x_train_scaled = scaler_X.fit_transform(x_train) # preprocessed training data
x_test_scaled = scaler_X.fit_transform(x_test) # preprocessed testing data

In [15]:
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
    keras.layers.Dense(64, input_shape=(45,), activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(2, activation='sigmoid')
])
 

In [16]:
# Compiling the model
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])
 

In [17]:
# fitting the model
model.fit(x_train_scaled, y_train, epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1ff87f80af0>

In [18]:
model.evaluate(x_test_scaled, y_test,64)



[0.0029791747219860554, 0.9992690682411194]

In [19]:
y_pred = model.predict(x_test_scaled,64)
y_pred = np.argmax(y_pred,axis = 1)
y_pred

array([0, 0, 1, ..., 0, 0, 1], dtype=int64)

In [20]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.9992690868014795

In [69]:
# model.predict(x_test_scaled[2].reshape(1,-1))
x_test_scaled[2].reshape(1,-1)

array([[-0.27738582, -0.27738582,  1.14408913,  0.137769  ,  0.137769  ,
         0.92878733,  0.92878733, -0.10679647, -0.10679647, -0.44589202,
         0.83919496,  0.47512355, -0.3093438 , -0.3093438 , -0.2792493 ,
         0.04532061, -0.47598851, -0.49096568, -0.43305804, -0.11647557,
        -0.11647557, -0.4847499 , -0.09525446,  1.15679871,  0.43595419,
         0.82929725, -0.27922469, -0.29759103, -0.09307815, -0.25782377,
         0.75655787, -0.08823673, -0.20570462,  0.0569975 ,  0.0569975 ,
        -0.11249304,  0.68596957, -0.51605685, -0.02475563, -0.31669887,
        -0.50487211, -0.55649425, -0.12234392, -0.58359852, -0.28901442]])

### Model Demonstration

In [None]:
dataset_2 = pd.read_csv("./Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")

In [101]:

X_recieved = dataset.iloc[18885,:-1].values  # to test a particular value on when recieved from IOT device
X_recieved = pd.DataFrame(X_recieved)
X_recieved = X_recieved.loc[featureScore_after_filter.index,:]
X_recieved = np.array(X_recieved).reshape(1,-1)
X_recieved = np.asarray(X_recieved).astype(np.float32)

In [113]:
pred = model.predict(X_recieved)
pred

array([[0., 1.]], dtype=float32)

In [114]:
# SMS service
import requests
import base64

if(pred[0][1] == 1):
  appId = "01GH72WYV283GVWHYW2VAZYBBR"
  accessKey = "b597f6be-6dbe-4a0a-859a-0d605259648e"
  accessSecret = "W~f5K5yF-lFBN_lk9.lu5Qja6R"
  projectId = "82ee0b94-e39f-42e5-bb3d-353a07588059"
  channel = "SMS"
  identity = "+918792884722"
  url = "https://us.conversation.api.sinch.com/v1/projects/" + projectId + "/messages:send"

  data = accessKey + ":" + accessSecret
  encodedBytes = base64.b64encode(data.encode("utf-8"))
  accessToken = str(encodedBytes, "utf-8")

  payload = {
    "app_id": appId,
    "recipient": {
        "identified_by": {
            "channel_identities": [
              {
                  "channel": channel,
                  "identity": identity
              }  
              ]
        }
    },
    "message": {
        "text_message": {
            "text": "Attack Alert"
        }
    }  
  }

  headers = {
    "Content-Type": "application/json",
    "Authorization": "Basic " + accessToken
  }

  response = requests.post(url, json=payload, headers=headers)

  data = response.json()
  print(data)

{'message_id': '01GH9V233DGX5G281JTHKB9RB0', 'accepted_time': '2022-11-07T20:03:42.829Z'}


In [115]:
# Email service

import smtplib

if(pred[0][1] == 1):
    # creates SMTP session
    s = smtplib.SMTP('smtp.gmail.com', 587)
    
    # start TLS for security
    s.starttls()
    
    # Authentication
    s.login("adigupta239@gmail.com", "mxwzguwzdcxcrqdw")
    
    # message to be sent
    subject = "Suspicious activity detected on your network"
    text = 'Dear user, our model have detected some malicious traffic on your network which could be a possible attempt of a DDOS attack. You can perform the following action :\n \n 1.Disconnect all your devices from the network.\n 2.Check if any unknown software is installed on your device. \n 3.Contact a security personnel ASAP. \n  \nHope you find this alert helpful and took the action at right time.'

    message = 'Subject: {}\n\n{}'.format(subject, text)

    # sending the mail
    s.sendmail("adigupta239@gmail.com", "arcyjain2002@gmail.com", message)
    
    # terminating the session
    s.quit()