# Evaluation of both pipeline models

## Logistic Regression

Since there is a full classification report from random forest, the classification report will be done manually based on the confusion matrix.

In [7]:
import numpy as np

# Logistic Regression confusion matrix values
lr_TN = 39
lr_FP = 26
lr_FN = 27
lr_TP = 93

# Confusion matrix array (if you need it)
confusion_matrix_lr = np.array([[lr_TN, lr_FP],
                                [lr_FN, lr_TP]])

print("Confusion Matrix (Logistic Regression)\n")
print(f"[[{lr_TN:3d} {lr_FP:3d}]")
print(f" [{lr_FN:3d} {lr_TP:3d}]]\n")

# Supports (actual class counts)
lr_support_0 = lr_TN + lr_FP
lr_support_1 = lr_TP + lr_FN
lr_total = lr_support_0 + lr_support_1

# Class 0 metrics
lr_precision_0 = lr_TN / (lr_TN + lr_FN)
lr_recall_0 = lr_TN / (lr_TN + lr_FP)
lr_f1_0 = 2 * lr_precision_0 * lr_recall_0 / (lr_precision_0 + lr_recall_0)

# Class 1 metrics
lr_precision_1 = lr_TP / (lr_TP + lr_FP)
lr_recall_1 = lr_TP / (lr_TP + lr_FN)
lr_f1_1 = 2 * lr_precision_1 * lr_recall_1 / (lr_precision_1 + lr_recall_1)

# Accuracy
lr_accuracy = (lr_TP + lr_TN) / lr_total

# Print simplified classification report
print("Classification Report (Logistic Regression)\n")
print(f"{'Class':>12} {'Precision':>10} {'Recall':>10} {'F1-score':>10} {'Support':>10}")
print("-" * 60)
print(f"{'0':>12} {lr_precision_0:10.2f} {lr_recall_0:10.2f} {lr_f1_0:10.2f} {lr_support_0:10d}")
print(f"{'1':>12} {lr_precision_1:10.2f} {lr_recall_1:10.2f} {lr_f1_1:10.2f} {lr_support_1:10d}")
print("-" * 60)
print(f"{'accuracy':>12} {'':>10} {'':>10} {lr_accuracy:10.2f} {lr_total:10d}")




Confusion Matrix (Logistic Regression)

[[ 39  26]
 [ 27  93]]

Classification Report (Logistic Regression)

       Class  Precision     Recall   F1-score    Support
------------------------------------------------------------
           0       0.59       0.60       0.60         65
           1       0.78       0.78       0.78        120
------------------------------------------------------------
    accuracy                             0.71        185


### Random Forest

In [6]:
# Random Forest confusion matrix values

rf_TN = 38
rf_FP = 27
rf_FN = 19
rf_TP = 101

# Supports
rf_support_0 = rf_TN + rf_FP
rf_support_1 = rf_TP + rf_FN

# Per-class metrics
rf_precision_0 = 0.67
rf_recall_0 = 0.58
rf_f1_0 = 0.62

rf_precision_1 = 0.79
rf_recall_1 = 0.84
rf_f1_1 = 0.81

# Accuracy
rf_accuracy = 0.75

# Print confusion matrix
print("Confusion Matrix (Random Forest)\n")
print(f"[[{rf_TN:3d} {rf_FP:3d}]")
print(f" [{rf_FN:3d} {rf_TP:3d}]]\n")

# Print simplified classification report
print("Classification Report (Random Forest)\n")
print(f"{'Class':>12} {'Precision':>10} {'Recall':>10} {'F1-score':>10} {'Support':>10}")
print("-" * 60)
print(f"{'0':>12} {rf_precision_0:10.2f} {rf_recall_0:10.2f} {rf_f1_0:10.2f} {rf_support_0:10d}")
print(f"{'1':>12} {rf_precision_1:10.2f} {rf_recall_1:10.2f} {rf_f1_1:10.2f} {rf_support_1:10d}")
print("-" * 60)
print(f"{'accuracy':>12} {'':>10} {'':>10} {rf_accuracy:10.2f} {rf_support_0 + rf_support_1:10d}")


Confusion Matrix (Random Forest)

[[ 38  27]
 [ 19 101]]

Classification Report (Random Forest)

       Class  Precision     Recall   F1-score    Support
------------------------------------------------------------
           0       0.67       0.58       0.62         65
           1       0.79       0.84       0.81        120
------------------------------------------------------------
    accuracy                             0.75        185


In [10]:
# Logistic Regression metrics
lr_accuracy = lr_accuracy
lr_precision = lr_precision_1
lr_recall = lr_recall_1
lr_f1 = lr_f1_1

# Random Forest metrics
rf_accuracy = rf_accuracy
rf_precision = rf_precision_1
rf_recall = rf_recall_1
rf_f1 = rf_f1_1


In [12]:
import matplotlib.pyplot as plt
import numpy as np

# Metrics and values
metrics = ["Accuracy", "Precision", "Recall", "F1-score"]

lr_values = [lr_accuracy, lr_precision, lr_recall, lr_f1]
rf_values = [rf_accuracy, rf_precision, rf_recall, rf_f1]

x = np.arange(len(metrics))
width = 0.35

plt.figure(figsize=(10,6))
plt.bar(x - width/2, lr_values, width, label="Logistic Regression")
plt.bar(x + width/2, rf_values, width, label="Random Forest")

plt.xticks(x, metrics)
plt.ylabel("Score")
plt.title("Model Performance Comparison (Target: status)")
plt.ylim(0,1)
plt.legend()

plt.tight_layout()
plt.savefig("../docs/model_comparison.png", dpi=200)
plt.close()

print("Saved: docs/model_comparison.png")


Saved: docs/model_comparison.png
