In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
from sklearn.tree import export_graphviz
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# A. Load UNSW-NB15 train/test
# ──────────────────────────────────────────────────────────────────────────────
unsw_train_path = "/content/drive/MyDrive/Research Project/UNSW_NB15_training-set.csv"
unsw_test_path  = "/content/drive/MyDrive/Research Project/UNSW_NB15_testing-set.csv"

unsw_train = pd.read_csv(unsw_train_path)
unsw_test  = pd.read_csv(unsw_test_path)

print("UNSW-NB15 columns:", list(unsw_train.columns)[:10], "... (total:", len(unsw_train.columns), ")")

# ──────────────────────────────────────────────────────────────────────────────
# B. Ensure binary label
# ──────────────────────────────────────────────────────────────────────────────
if "label" not in unsw_train.columns:
    def _binlab(df):
        if "attack_cat" in df.columns:
            return (df["attack_cat"].astype(str).str.lower() != "normal").astype(int)
        raise ValueError("UNSW-NB15 needs either 'label' or 'attack_cat' to build a binary label.")
    unsw_train["label"] = _binlab(unsw_train)
    unsw_test["label"]  = _binlab(unsw_test)
else:
    unsw_train["label"] = (unsw_train["label"] != 0).astype(int)
    unsw_test["label"]  = (unsw_test["label"]  != 0).astype(int)

# ──────────────────────────────────────────────────────────────────────────────
# C. Choose numeric features
# ──────────────────────────────────────────────────────────────────────────────
categorical_cols_unsw = [c for c in ["proto","service","state","attack_cat","srcip","dstip"]
                         if c in unsw_train.columns]
drop_cols_unsw = categorical_cols_unsw + ["label"]

num_cols_unsw = [c for c in unsw_train.columns
                 if c not in drop_cols_unsw and pd.api.types.is_numeric_dtype(unsw_train[c])]

X_train = unsw_train[num_cols_unsw].values
X_test  = unsw_test[num_cols_unsw].values
y_train     = unsw_train["label"].values
y_test      = unsw_test["label"].values

In [None]:
# 5) Instantiate and train the Random Forest
clf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    min_samples_leaf=5,
    n_jobs=-1,
    random_state=42
)
clf.fit(X_train, y_train)

In [None]:
# 6) Predictions & evaluation
y_pred = clf.predict(X_test)
print("Classification Report:\n",
      classification_report(y_test, y_pred, target_names=["normal","attack"]))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.metrics import roc_auc_score

def evaluate_model(clf, X_test, y_test):
    y_pred = clf.predict(X_test)
    y_proba = clf.predict_proba(X_test)[:,1] if hasattr(clf, "predict_proba") else None

    results = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1": f1_score(y_test, y_pred),
        "ROC-AUC": roc_auc_score(y_test, y_proba) if y_proba is not None else None,
        "ConfusionMatrix": confusion_matrix(y_test, y_pred).tolist()
    }
    return results

In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 6) Save to file
# ──────────────────────────────────────────────────────────────────────────────
import json
results = evaluate_model(clf, X_test, y_test)
with open("/content/drive/MyDrive/Results/UNSW/Random Forest/Baseline_Random_Forest_Results.txt", "w") as f:
    json.dump(results, f, indent=2)