In [2]:
# STEP 1: Upload the ZIP file (archives.zip)
from google.colab import files
uploaded = files.upload()

Saving archives.zip to archives.zip


In [3]:
# STEP 2: Unzip the file into 'archives/' folder
import zipfile
import os

with zipfile.ZipFile("archives.zip", "r") as zip_ref:
    zip_ref.extractall("archives")

print("Extraction complete.")


Extraction complete.


In [4]:
# STEP 3: Load training and testing datasets
import pandas as pd

train_path = "archives/archives/UNSW_NB15_training-set.csv"
test_path = "archives/archives/UNSW_NB15_testing-set.csv"

df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)

print("Train shape:", df_train.shape)
print("Test shape:", df_test.shape)


Train shape: (82332, 45)
Test shape: (175341, 45)


In [5]:
from sklearn.preprocessing import LabelEncoder

# # Drop any unnamed columns (if any)
df_train = df_train.loc[:, ~df_train.columns.str.contains('^Unnamed')]
df_test = df_test.loc[:, ~df_test.columns.str.contains('^Unnamed')]

# Drop label and attack_cat
X_train = df_train.drop(['label', 'attack_cat'], axis=1)
y_train = df_train['label']

X_test = df_test.drop(['label', 'attack_cat'], axis=1)
y_test = df_test['label']

# Separate features and labels
X_train = df_train.drop('label', axis=1)
y_train = df_train['label']

X_test = df_test.drop('label', axis=1)
y_test = df_test['label']


# Encode categorical columns
cat_cols = X_train.select_dtypes(include=['object']).columns

encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    combined = pd.concat([X_train[col], X_test[col]], axis=0).astype(str)
    le.fit(combined)

    X_train[col] = le.transform(X_train[col].astype(str))
    X_test[col] = le.transform(X_test[col].astype(str))

    encoders[col] = le

# Encode label column (if not numeric)
if y_train.dtype == 'object':
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train.astype(str))
    y_test = label_encoder.transform(y_test.astype(str))


In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# STEP 6: Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Accuracy: 94.95%

Classification Report:
              precision    recall  f1-score   support

           0       0.86      1.00      0.93     56000
           1       1.00      0.93      0.96    119341

    accuracy                           0.95    175341
   macro avg       0.93      0.96      0.94    175341
weighted avg       0.96      0.95      0.95    175341



In [7]:
import pickle

# Save the trained model to a .pkl file
with open("unsw_ids_model.pkl", "wb") as f:
    pickle.dump(model, f)
