# 📊 Model Evaluation & Comparison Notebook
Evaluate Logistic Regression, Random Forest, and XGBoost models on phishing data.

## 🔹 Step 1: Load Dataset and Split into Train/Test

In [1]:

import pandas as pd
from sklearn.model_selection import train_test_split

# Load and preprocess data
df = pd.read_csv("/workspaces/phishing-flask-api/data/phishing.csv")
X = df.drop("Result", axis=1)
y = df["Result"].map({-1: 0, 1: 1})  # Convert labels to binary (0 = legitimate, 1 = phishing)

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


## 🔹 Step 2: Train Models

In [3]:

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Initialize models
model_lr = LogisticRegression(max_iter=1000)
model_rf = RandomForestClassifier(random_state=42)
model_xgb = XGBClassifier(eval_metric='logloss', use_label_encoder=False)

# Train models
model_lr.fit(X_train, y_train)
model_rf.fit(X_train, y_train)
model_xgb.fit(X_train, y_train)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


## 🔹 Step 3: Evaluate and Compare Models

In [4]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def evaluate_model(model, X, y):
    y_pred = model.predict(X)
    y_proba = model.predict_proba(X)[:, 1]
    return {
        "Accuracy": accuracy_score(y, y_pred),
        "Precision": precision_score(y, y_pred),
        "Recall": recall_score(y, y_pred),
        "F1 Score": f1_score(y, y_pred),
        "ROC-AUC": roc_auc_score(y, y_proba)
    }

# Evaluate all models
results = {
    "Logistic Regression": evaluate_model(model_lr, X_test, y_test),
    "Random Forest": evaluate_model(model_rf, X_test, y_test),
    "XGBoost": evaluate_model(model_xgb, X_test, y_test)
}

# Display results
results_df = pd.DataFrame(results).T.round(3)
display(results_df)


Unnamed: 0,Accuracy,Precision,Recall,F1 Score,ROC-AUC
Logistic Regression,0.922,0.931,0.932,0.931,0.978
Random Forest,0.967,0.961,0.982,0.971,0.995
XGBoost,0.97,0.965,0.983,0.974,0.996
