# Comparative Analysis

## Data Pre-processing

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from sklearn import tree
from sklearn.linear_model import LogisticRegression

# Define dataframes for csvs
nonattack_csv = pd.read_csv('./multiclass_test/comp_analysis/non-attack_random_sample.csv')
dos_csv = pd.read_csv('./multiclass_test/comp_analysis/dos_output_OS.csv')

# Create copies of dataframes
nonattack_copy = nonattack_csv.copy().head(16000)
dos_copy = dos_csv.copy()

# Correct labels of dataframes
nonattack_copy['Label'] = 0
dos_copy['Label'] = 1

In [None]:
# Drop last two columns (attack category and data label)
nonattack_to_normal = nonattack_copy.drop(columns=['attack_cat', 'Label'])
dos_to_normal = dos_copy.drop(columns=['attack_cat', 'Label'])

# Normalise data
nonattack_normal = nonattack_to_normal / 255
dos_normal = dos_to_normal / 255

# Add dropped columns back
nonattack_normal = pd.concat([nonattack_normal, nonattack_copy[['attack_cat', 'Label']]], axis=1)
dos_normal = pd.concat([dos_normal, dos_copy[['attack_cat', 'Label']]], axis=1)
nonattack_normal.head()

In [None]:
# Train test split 70:30
nonattack_train_split = nonattack_normal.head(11200)
dos_train_split = dos_normal.head(11200)

nonattack_test_split = nonattack_normal.tail(4800)
dos_test_split = dos_normal.tail(4800)

# Train set - validatin split 80:20, create X and y sets
X_train = pd.concat([nonattack_train_split.head(8960), dos_train_split.head(8960)], axis=0)
y_train = X_train[['Label']]
X_train = X_train.drop(columns=['attack_cat', 'Label'])

# Validation set - validation split 80:20, create X and y sets
X_val = pd.concat([nonattack_train_split.tail(2240), dos_train_split.tail(2240)], axis=0)
y_val = X_val[['Label']]
X_val = X_val.drop(columns=['attack_cat', 'Label'])

# Test set
X_test = pd.concat([nonattack_test_split, dos_test_split], axis=0)
y_test = X_test[['Label']]
X_test = X_test.drop(columns=['attack_cat', 'Label'])

## 1D Convolutional Neural Network (CNN)

In [None]:
# Model architecture for 1D numeric data (adapted from TensorFlow documentation)
model = models.Sequential()
model.add(layers.Conv1D(32, 3, activation='relu', input_shape=(None, 1)))
model.add(layers.MaxPooling1D(2))

model.add(layers.Conv1D(64, 3, activation='relu'))
model.add(layers.MaxPooling1D(2))

model.add(layers.Conv1D(64, 3, activation='relu'))

# Dense Layer
model.add(layers.Dense(64, activation='relu', input_shape=(None, 64)))

# Output Layer
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
# Training
history = model.fit(x=X_train, y=y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

In [None]:
# Accuracy
test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)

In [None]:
# Make Predictions
predictions = model.predict(X_test)

# Get final prediction
last_predictions = predictions[:, -1, 0]

# Convert to binary predictions
binary_predictions = np.where(last_predictions > 0.5, 1, 0)

In [None]:
# Evaluation
TN, FP, FN, TP = confusion_matrix(y_test.values, binary_predictions).ravel()
FPR = FP / (FP + TN)

print("True Positives:", TP)
print("True Negatives:", TN)
print("False Positives:", FP)
print("False Negatives:", FN)
print("False Positive Rate:", FPR)

precision = precision_score(y_test.values, binary_predictions)
recall = recall_score(y_test.values, binary_predictions)

# Proportion of true positive predictions out of all positive predictions
print("Precision:", precision)

# Proportion of true positive predictions out of all actual positive cases
print("Recall:", recall)

## Support Vector Machine (SVM)

In [None]:
# Model architecture
svm = models.Sequential([
    layers.Dense(1, input_shape=(X_train.shape[1],), activation='sigmoid')
])

In [None]:
# Compile the model
svm.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Training
svm.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), verbose=1)

In [None]:
# Accuracy
test_acc = svm.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)

In [None]:
# Make Predictions
predictions_svm = svm.predict(X_test)

# Convert to binary predictions
binary_predictions_svm = np.where(predictions > 0.5, 1, 0)

In [None]:
# Evaluation
TN, FP, FN, TP = confusion_matrix(y_test.values, binary_predictions_svm).ravel()
FPR = FP / (FP + TN)

print("True Positives:", TP)
print("True Negatives:", TN)
print("False Positives:", FP)
print("False Negatives:", FN)
print("False Positive Rate:", FPR)

precision = precision_score(y_test.values, binary_predictions_svm)
recall = recall_score(y_test.values, binary_predictions_svm)

# Proportion of true positive predictions out of all positive predictions
print("Precision:", precision)

# Proportion of true positive predictions out of all actual positive cases
print("Recall:", recall)

## Logistic Regression (LR)

In [None]:
# Model architecture
lr = LogisticRegression()

In [None]:
# Training
lr.fit(X_train, y_train.values.ravel())

In [None]:
# Make Predictions
predictions_lr = lr.predict(X_test)

In [None]:
# Accuracy
test_acc = sum(predictions_lr == y_test.values.ravel()) / len(y_test)
print('Test accuracy:', test_acc)

In [None]:
# Evaluation
TN, FP, FN, TP = confusion_matrix(y_test.values, predictions_lr).ravel()
FPR = FP / (FP + TN)

print("True Positives:", TP)
print("True Negatives:", TN)
print("False Positives:", FP)
print("False Negatives:", FN)
print("False Positive Rate:", FPR)

precision = precision_score(y_test.values, predictions_lr)
recall = recall_score(y_test.values, predictions_lr)

# Proportion of true positive predictions out of all positive predictions
print("Precision:", precision)

# Proportion of true positive predictions out of all actual positive cases
print("Recall:", recall)