In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import *
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier

import xgboost, lightgbm
from mlxtend.classifier import EnsembleVoteClassifier

import warnings
warnings.filterwarnings("ignore")

OSError: dlopen(/Users/calvinseamons/IntruDetect/myenv/lib/python3.12/site-packages/lightgbm/lib/lib_lightgbm.so, 0x0006): Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib
  Referenced from: <C3EB28DD-60B6-3334-AFA2-72BBBF9DBAEF> /Users/calvinseamons/IntruDetect/myenv/lib/python3.12/site-packages/lightgbm/lib/lib_lightgbm.so
  Reason: tried: '/usr/local/opt/libomp/lib/libomp.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/usr/local/opt/libomp/lib/libomp.dylib' (no such file), '/usr/local/opt/libomp/lib/libomp.dylib' (no such file), '/usr/local/lib/libomp.dylib' (no such file), '/usr/lib/libomp.dylib' (no such file, not in dyld cache)

In [None]:
training_df = pd.read_csv('../UNSW_NB15/UNSW_NB15_training-set.csv')
testing_df = pd.read_csv('../UNSW_NB15/UNSW_NB15_testing-set.csv')
combined_data = pd.concat([training_df, testing_df]).drop(['id'],axis=1)

combined_data.head()

In [None]:
# Determine the categorical and numerical columns
categorical_columns = combined_data.select_dtypes(include=['object']).columns
print(f'The categorical columns are: {categorical_columns} \n')

# Determine how many unique values are in each categorical column
for column in categorical_columns:
    print(f'The column {column} has {combined_data[column].nunique()} unique values')

# Convert the categorical columns to numerical
le = LabelEncoder()
for column in categorical_columns:
    combined_data[column] = le.fit_transform(combined_data[column])

print("\n", combined_data.head())

In [None]:
# Use the train_test_split function to split the data into training and testing sets
X = combined_data.drop(['label'], axis=1)
y = combined_data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# Run a grid search to find the best hyperparameters
parameter_space = {
    'hidden_layer_sizes': [(50,), (100,), (100, 50), (150, 100, 50)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'learning_rate_init': [0.001, 0.01, 0.1]
}

mlp = MLPClassifier(max_iter=100)
clf = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3)
clf.fit(X_train, y_train)

print("Best parameters found:\n", clf.best_params_)

In [None]:
# Train the model using the best hyperparameters
model = MLPClassifier(hidden_layer_sizes=(100,),
                      activation='relu',
                      solver='sgd',
                      learning_rate_init= 0.1,
                      max_iter=100)

model.fit(X_train, y_train)

# Evaluate the model, turn the predictions into a numpy array
y_pred = model.predict(X_test)
y_pred = np.array(y_pred)

y_test = np.array(y_test)

accuracy = np.mean(y_pred == y_test)
print(f'The accuracy of the model is: {accuracy}')



In [None]:
RFC = RandomForestClassifier(n_estimators=150, random_state=42, n_jobs=-1)
ETC = ExtraTreesClassifier(n_estimators=200, random_state=42, n_jobs=-1)
XGB = xgboost.XGBClassifier(n_estimators=150, n_jobs=-1)
GBM = lightgbm.LGBMClassifier(objective='binary', n_estimators= 500, n_jobs=-1, verbosity=-1)

list_of_CLFs_names = []
list_of_CLFs = [RFC, ETC, XGB, GBM]
ranking = []

for clf in list_of_CLFs:
    _ = clf.fit(X_train,y_train)
    pred = clf.score(X_test,y_test)
    name = str(type(clf)).split(".")[-1][:-2]
    print("Acc: %0.5f for the %s" % (pred, name))

    ranking.append(pred)
    list_of_CLFs_names.append(name)

In [None]:
# Now do it where the label is the attack_cat

