In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    classification_report
)


In [2]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1] if hasattr(model, "predict_proba") else None
    
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    
    if y_prob is not None:
        print("ROC-AUC:", roc_auc_score(
            y_test.map({'No':0,'Yes':1}), y_prob
        ))


In [3]:
import joblib

X_train_final = joblib.load('../models/X_train_final.pkl')
X_test_scaled = joblib.load('../models/X_test_scaled.pkl')
y_train_final = joblib.load('../models/y_train_final.pkl')
y_test = joblib.load('../models/y_test.pkl')

LOGISTIC REGRESSION

In [4]:
lr = LogisticRegression(max_iter=1000)

lr.fit(X_train_final, y_train_final)

evaluate_model(lr, X_test_scaled, y_test)


[[952  83]
 [213 161]]
              precision    recall  f1-score   support

          No       0.82      0.92      0.87      1035
         Yes       0.66      0.43      0.52       374

    accuracy                           0.79      1409
   macro avg       0.74      0.68      0.69      1409
weighted avg       0.78      0.79      0.77      1409

ROC-AUC: 0.8309514583171871


RANDOM FOREST

In [5]:
rf = RandomForestClassifier(
    n_estimators=200,
    random_state=42
)

rf.fit(X_train_final, y_train_final)

evaluate_model(rf, X_test_scaled, y_test)

[[905 130]
 [162 212]]
              precision    recall  f1-score   support

          No       0.85      0.87      0.86      1035
         Yes       0.62      0.57      0.59       374

    accuracy                           0.79      1409
   macro avg       0.73      0.72      0.73      1409
weighted avg       0.79      0.79      0.79      1409

ROC-AUC: 0.83461210571185


SUPPORT VECTOR MACHINE

In [6]:
svm = SVC(
    probability=True,
    random_state=42
)

svm.fit(X_train_final, y_train_final)

evaluate_model(svm, X_test_scaled, y_test)

[[   0 1035]
 [   0  374]]
              precision    recall  f1-score   support

          No       0.00      0.00      0.00      1035
         Yes       0.27      1.00      0.42       374

    accuracy                           0.27      1409
   macro avg       0.13      0.50      0.21      1409
weighted avg       0.07      0.27      0.11      1409

ROC-AUC: 0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
