In [18]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report, roc_auc_score, accuracy_score
import seaborn as sns

sns.set(style="darkgrid")


In [59]:
def compute_metrics(y_true, y_pred):
    # ROC Curve and AUC
    # fpr, tpr, _ = roc_curve(y_true, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    tpr = tp / (tp + fn) if (tp + fn) > 0 else 0

    roc_auc = roc_auc_score(y_true, y_pred)

    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    fnr = cm[1][0] / (cm[1][0] + cm[1][1])  # False Negative Rate
    precision = cm[1][1] / (cm[1][1] + cm[0][1]) if (cm[1][1] + cm[0][1]) != 0 else 0
    recall = cm[1][1] / (cm[1][1] + cm[1][0])
    
    return {
        "roc_auc": roc_auc,
        "fnr": fnr,
        "precision": precision,
        "recall": recall,
        "fpr": fpr,
        "tpr": tpr,
    }


In [60]:
def plot_confusion_matrix(cm, model_name):
    plt.figure(figsize=(6, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False, square=True)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix: {model_name}")
    plt.savefig(f"plots/{model_name}_confusion_matrix.png")
    plt.close()



In [61]:
def plot_roc_curve(fpr, tpr, roc_auc, model_name):
    plt.figure()
    plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {roc_auc:.2f})")
    plt.plot([0, 1], [0, 1], "k--", label="Random Guess")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"ROC Curve for {model_name}")
    plt.legend(loc="lower right")
    plt.savefig(f"plots/{model_name}_roc_curve.png")
    plt.close()


In [62]:
def plot_model_comparison(metrics_df, metric_x="fpr", metric_y="roc_auc"):
    plt.figure(figsize=(8, 6))
    for _, row in metrics_df.iterrows():
        plt.scatter(row[metric_x], row[metric_y], label=row["model"], s=100)
        plt.text(row[metric_x], row[metric_y], row["model"], fontsize=9, ha="right")
    plt.xlabel(metric_x.upper())
    plt.ylabel(metric_y.upper())
    plt.title(f"Model Comparison: {metric_x.upper()} vs {metric_y.upper()}")
    plt.grid(True, linestyle="--", alpha=0.7)
    plt.tight_layout()
    plt.savefig("plots/model_comparison_scatter.png")
    plt.close()
    print("Model comparison scatter plot saved!")


In [80]:
def generate_plots_and_metrics(prediction_folder="artifacts/predictions"):
    model_metrics = []

    # Iterate over prediction files
    for file in os.listdir(prediction_folder):
        if file.endswith(".csv"):
            model_name = file.replace("_predictions.csv", "")
            print(f"Processing predictions for: {model_name}")

            # Load predictions
            predictions = pd.read_csv(os.path.join(prediction_folder, file))
            y_true = predictions["y_true"]
            y_pred = predictions["y_pred"]

            # Compute metrics
            metrics = compute_metrics(y_true, y_pred)
            metrics["model"] = model_name
            model_metrics.append(metrics)

            # Plot ROC Curve
            plot_roc_curve(metrics["fpr"], metrics["tpr"], metrics["roc_auc"], model_name)

            # Plot Confusion Matrix
            cm = confusion_matrix(y_true, y_pred)
            plot_confusion_matrix(cm, model_name)

    # Create scatter plot for model comparison
    metrics_df = pd.DataFrame(model_metrics)
    plot_model_comparison(metrics_df, metric_x="fnr", metric_y="roc_auc")
    
    return metrics_df


In [81]:
metrics_df = generate_plots_and_metrics()

Processing predictions for: dense_nn
Processing predictions for: dense_nn_SMOTE
Processing predictions for: lightgbm_bce
Processing predictions for: light_nn
Processing predictions for: light_nn_SMOTE
Processing predictions for: logistic_regression
Processing predictions for: random_forest
Model comparison scatter plot saved!


In [77]:
from tabulate import tabulate

print(tabulate(metrics_df, headers="keys", tablefmt="pretty"))


+---+--------------------+----------------------+-----------------------+--------------------+-----------------------+--------------------+---------------------+
|   |      roc_auc       |         fnr          |       precision       |       recall       |          fpr          |        tpr         |        model        |
+---+--------------------+----------------------+-----------------------+--------------------+-----------------------+--------------------+---------------------+
| 0 |        0.5         |         1.0          |          0.0          |        0.0         |          0.0          |        0.0         |      dense_nn       |
| 1 | 0.5762475057530141 | 0.043478260869565216 | 0.0019208521598672865 | 0.9565217391304348 |  0.8040267276244065   | 0.9565217391304348 |   dense_nn_SMOTE    |
| 2 | 0.9595236284268865 | 0.07079646017699115  |  0.12696493349455865  | 0.9292035398230089 | 0.010156282969235746  | 0.9292035398230089 |    lightgbm_bce     |
| 3 | 0.4067883655323736 | 0

In [78]:
def save_table_as_image(df, output_file="table.png"):
    fig, ax = plt.subplots(figsize=(8, len(df) * 0.5))  # Adjust size based on the number of rows
    ax.axis("tight")
    ax.axis("off")
    table = ax.table(
        cellText=df.values,
        colLabels=df.columns,
        cellLoc="center",
        loc="center"
    )
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.auto_set_column_width(col=list(range(len(df.columns))))
    # plt.show()
    plt.savefig(output_file, bbox_inches="tight")
    plt.close()
    print(f"Table saved as {output_file}")

# Example usage
save_table_as_image(metrics_df, "plots/model_evaluation_metrics.png")

Table saved as plots/model_evaluation_metrics.png
