In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix, classification_report, roc_curve
)

import matplotlib.pyplot as plt
import seaborn as sns
import joblib


In [7]:
X_train = joblib.load("../data/processed_data/X_train.pkl")
X_test = joblib.load("../data/processed_data/X_test.pkl")
y_train = joblib.load("../data/processed_data/Y_train.pkl")
y_test = joblib.load("../data/processed_data/Y_test.pkl")


In [8]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=200,random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss',random_state=42)
}

In [9]:
results={}

In [12]:
for name,model in models.items():
    print(f"Training : {name}")
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:1]
    results[name]={
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred),
        #"ROC-AUC": roc_auc_score(y_test, y_pred_proba)
    }

results_df = pd.DataFrame(results).T
print("Model Performance Summary :")
display(results_df)

Training : Logistic Regression
Training : Random Forest
Training : XGBoost


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Model Performance Summary :


Unnamed: 0,Accuracy,Precision,Recall,F1 Score
Logistic Regression,0.947728,0.97415,0.919865,0.94623
Random Forest,0.999894,0.999789,1.0,0.999894
XGBoost,0.999666,0.999332,1.0,0.999666
