In [6]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report, roc_auc_score
import seaborn as sns


In [9]:
def compute_metrics(y_true, y_pred):
    # ROC Curve and AUC
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_pred)

    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    fnr = cm[1][0] / (cm[1][0] + cm[1][1])  # False Negative Rate
    precision = cm[1][1] / (cm[1][1] + cm[0][1]) if (cm[1][1] + cm[0][1]) != 0 else 0
    recall = cm[1][1] / (cm[1][1] + cm[1][0])

    return {
        "roc_auc": roc_auc,
        "fnr": fnr,
        "precision": precision,
        "recall": recall,
        "fpr": fpr,
        "tpr": tpr,
    }


In [10]:
def plot_confusion_matrix(cm, model_name):
    plt.figure(figsize=(6, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False, square=True)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix: {model_name}")
    plt.savefig(f"artifacts/{model_name}_confusion_matrix.png")
    plt.close()



In [11]:
def plot_roc_curve(fpr, tpr, roc_auc, model_name):
    plt.figure()
    plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {roc_auc:.2f})")
    plt.plot([0, 1], [0, 1], "k--", label="Random Guess")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"ROC Curve for {model_name}")
    plt.legend(loc="lower right")
    plt.savefig(f"artifacts/{model_name}_roc_curve.png")
    plt.close()


In [12]:
def plot_model_comparison(metrics_df, metric_x="fnr", metric_y="roc_auc"):
    plt.figure(figsize=(8, 6))
    for _, row in metrics_df.iterrows():
        plt.scatter(row[metric_x], row[metric_y], label=row["model"], s=100)
        plt.text(row[metric_x], row[metric_y], row["model"], fontsize=9, ha="right")
    plt.xlabel(metric_x.upper())
    plt.ylabel(metric_y.upper())
    plt.title(f"Model Comparison: {metric_x.upper()} vs {metric_y.upper()}")
    plt.grid(True, linestyle="--", alpha=0.7)
    plt.tight_layout()
    plt.savefig("artifacts/model_comparison_scatter.png")
    plt.close()
    print("Model comparison scatter plot saved!")


In [13]:
def generate_plots_and_metrics(prediction_folder="artifacts/predictions"):
    model_metrics = []

    # Iterate over prediction files
    for file in os.listdir(prediction_folder):
        if file.endswith(".csv"):
            model_name = file.replace("_predictions.csv", "")
            print(f"Processing predictions for: {model_name}")

            # Load predictions
            predictions = pd.read_csv(os.path.join(prediction_folder, file))
            y_true = predictions["y_true"]
            y_pred = predictions["y_pred"]

            # Compute metrics
            metrics = compute_metrics(y_true, y_pred)
            metrics["model"] = model_name
            model_metrics.append(metrics)

            # Plot ROC Curve
            plot_roc_curve(metrics["fpr"], metrics["tpr"], metrics["roc_auc"], model_name)

            # Plot Confusion Matrix
            cm = confusion_matrix(y_true, y_pred)
            plot_confusion_matrix(cm, model_name)

    # Create scatter plot for model comparison
    metrics_df = pd.DataFrame(model_metrics)
    plot_model_comparison(metrics_df, metric_x="fnr", metric_y="roc_auc")


In [14]:
generate_plots_and_metrics()

Processing predictions for: random_forest
Processing predictions for: logistic_regression
Model comparison scatter plot saved!
