In [283]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [284]:
import pandas as pd
import time
from sklearn.model_selection import train_test_split # For splitting the data
from sklearn.preprocessing import StandardScaler # For normalizing the data
from sklearn.metrics import confusion_matrix # For creating the confurion matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

In [285]:
# This function gives us the time value in miliseconds.
def current_milli_time():
    return round(time.time() * 1000)

# This function calculates the accuracy rate of a confusion matrix.
def calculate_AR(confusion_matrix):
    return (confusion_matrix[0][0] + confusion_matrix[1][1]) / (confusion_matrix[0][0] + confusion_matrix[0][1] + confusion_matrix[1][0] + confusion_matrix[1][1])

def print_values(algorithm:str, accuracy_rate:float, training_time:int):
    print(algorithm, "Accuracy rate", accuracy_rate, "Training time:", training_time)

In [286]:
#Load the dataset
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Detecting-RPL-Attacks/Results/combined-files.csv", sep=";")

In [287]:
# Source and destination IP addresses are extracted from the datasets before the normalization process.
X=df.iloc[:,3:16].values
y=df.iloc[:,16:17].values.ravel()

In [288]:
# The dataset was split into test and training datasets in the amount of 2/3. (2/3 training, 1/3 testing).
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)

In [289]:
# Normalizing the data
sc=StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

## Logistic Regression

In [290]:
from sklearn.feature_selection import SelectKBest, f_classif

# Perform feature selection
selector = SelectKBest(score_func=f_classif, k=10)  # Select top 10 features (adjust the value as needed)
x_train_selected = selector.fit_transform(x_train, y_train)
x_test_selected = selector.transform(x_test)

# Train the model on the selected features
lr_start_time = current_milli_time()  # Obtaining initial time of the training
logr = LogisticRegression(random_state=0) # Creating the logistic regression object
logr.fit(x_train_selected, y_train) # Training the data
lr_end_time = current_milli_time()  # Obtaining ending time of the training
lr_duration = lr_end_time - lr_start_time # Calculating the duration
y_pred_lr = logr.predict(x_test_selected) # Predicting data
cm_lr = confusion_matrix(y_test, y_pred_lr) # Creating confusion matrix
ar_lr = calculate_AR(cm_lr) # Calculating accuracy rate.

# Print the accuracy
print_values("Logistic Regression", ar_lr, lr_duration)


Logistic Regression Accuracy rate 0.6362359550561798 Training time: 335


## Random Forest

In [291]:
###### Scripts for Random Forest Classification
rf_start_time = current_milli_time() # Obtaining initial time of the training
rfc = RandomForestClassifier(n_estimators=100, criterion='gini') # Creating the Random Forest Classification object
rfc.fit(x_train, y_train) # Training the data
rf_end_time = current_milli_time() # Obtaining ending time of the training
rf_duration = rf_end_time - rf_start_time # Calculating the duration
y_pred_rfc = rfc.predict(x_test) # Predicting data
cm_rfc = confusion_matrix(y_test, y_pred_rfc) # Creating confusion matrix
ar_rfc = calculate_AR(cm_rfc) # Calculating accuracy rate.
ar_rfc

0.8712328767123287

## Naive Bayes

In [292]:
###### Scripts for Naive Bayes Classifier
nbstart_time = current_milli_time() # Obtaining initial time of the training
gnb = GaussianNB() # Creating the Naive Bayes Classifier object
gnb.fit(x_train,y_train) # Training the data
nbend_time = current_milli_time() # Obtaining ending time of the training
NBduration = nbend_time - nbstart_time # Calculating the duration
y_pred_nb = gnb.predict(x_test) # Predicting data
cm_nb = confusion_matrix(y_test,y_pred_nb) # Creating confusion matrix
ar_nb = calculate_AR(cm_nb) # Calculating accuracy rate.
ar_nb

0.6387665198237885

## KNN Classifier

In [293]:
###### Scripts for KNN Classifier
knnstart_time = current_milli_time() # Obtaining initial time of the training
knn = KNeighborsClassifier() # Creating the KNN Classifier object
knn.fit(x_train,y_train) # Training the data
knnend_time = current_milli_time() # Obtaining ending time of the training
knnduration=knnend_time - knnstart_time # Calculating the duration
y_pred_knn = knn.predict(x_test) # Predicting data
cm_knn = confusion_matrix(y_test,y_pred_knn) # Creating confusion matrix
ar_knn = calculate_AR(cm_knn) # Calculating accuracy rate.

## Decision Tree 

In [294]:
###### Scripts for Decision Tree Classifier
dtstart_time = current_milli_time() # Obtaining initial time of the training
dtc=DecisionTreeClassifier(criterion='entropy') # Creating the Decision Tree Classifier object
dtc.fit(x_train,y_train) # Training the data
dtend_time = current_milli_time() # Obtaining ending time of the training
DTduration = dtend_time - dtstart_time # Calculating the duration
y_pred_dtc = dtc.predict(x_test) # Predicting data
cm_dtc = confusion_matrix(y_test,y_pred_dtc) # Creating confusion matrix
ar_dtc = calculate_AR(cm_dtc) # Calculating accuracy rate.
ar_dtc 

0.8130563798219584

## Accuracy

In [295]:
# Printing the results
print("")
print_values("Logistic Regression", ar_lr, LRduration)
print_values("Random Forest Classifation", ar_rfc, RFduration)
print_values("Decision Tree Classifier", ar_dtc, DTduration)
print_values("Naive Bayes Classifier", ar_nb, NBduration)
print_values("KNN Classifier", ar_knn, knnduration)



Logistic Regression Accuracy rate 0.6362359550561798 Training time: 249
Random Forest Classifation Accuracy rate 0.8712328767123287 Training time: 226
Decision Tree Classifier Accuracy rate 0.8130563798219584 Training time: 118
Naive Bayes Classifier Accuracy rate 0.6387665198237885 Training time: 7
KNN Classifier Accuracy rate 0.7474747474747475 Training time: 17
