In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix, classification_report, roc_auc_score, roc_curve, RocCurveDisplay
from sklearn.model_selection import StratifiedKFold




In [5]:
def create_model_results_df (all_y_train_true, all_y_train_pred, all_y_test_true, all_y_test_pred, train_accuracies, train_recalls, test_accuracies, test_recalls, all_y_train_pred_proba, all_y_test_pred_proba, model_name, test_flag):

    from sklearn.metrics import log_loss
    print(f"Model name received: '{model_name}'")

    report_dict = classification_report(all_y_test_true, all_y_test_pred, output_dict=True)
    cm = confusion_matrix(all_y_test_true, all_y_test_pred)

    metrics = []
    values = []

    # Add cross-validation Train Accuracy Mean
    avg_train_accuracy = np.mean(train_accuracies)
    metrics.append(f'{test_flag} Train Accuracy Mean')
    values.append(avg_train_accuracy)

    # Add Cross Validation Train Accuracy Standard Deviation
    std_train_accuracy = np.std(train_accuracies)
    metrics.append(f'{test_flag} Standard Deviation')
    values.append(std_train_accuracy)

    # Add Test Accuracy Mean
    avg_test_accuracy = np.mean(test_accuracies)
    metrics.append(f'{test_flag} Accuracy Mean')
    values.append(avg_test_accuracy)

    # Add Test Accuracy Standard Deviation
    std_test_accuracy = np.std(test_accuracies)
    metrics.append(f'{test_flag} Accuracy Standard Deviation')
    values.append(std_test_accuracy)

    # Add Train Recall Mean
    avg_train_recall = np.mean(train_recalls)
    metrics.append(f'{test_flag} Train Recall Mean')
    values.append(avg_train_recall)

    # Add Recall Standard Deviation
    std_train_recall = np.std(train_recalls)
    metrics.append(f'{test_flag} rain Recall Standard Deviation')
    values.append(std_train_recall)

    # Add Test Recall Mean
    avg_test_recall = np.mean(test_recalls)
    metrics.append(f'{test_flag}  Test Recall Mean')
    values.append(avg_test_recall)

    # Add Recall Standard Deviation
    std_test_recall = np.std(test_recalls)
    metrics.append(f'{test_flag}  Test Recall Standard Deviation')
    values.append(std_test_recall)

    # Calculate overall log loss
    train_log_loss = log_loss(all_y_train_true, all_y_train_pred_proba)
    metrics.append(f'{test_flag} Train Log Loss')
    values.append(train_log_loss)
    print(train_log_loss)

    test_log_loss = log_loss(all_y_test_true, all_y_test_pred_proba)
    metrics.append(f'{test_flag} Test Log Loss')
    values.append(test_log_loss)
    print(test_log_loss)

    # Add overall accuracy from the classification report
    metrics.append('Overall Accuracy')
    values.append(report_dict['accuracy'])

    # Add confusion matrix values
    cm_labels = ['True Negative (Class 0)', 'False Positive (Class 1)', 'False Negative (Class 0)', 'True Positive (Class 1)']
    for label, value in zip(cm_labels, cm.ravel()):
        metrics.append(f'Confusion Matrix - {label}')
        values.append(value)

    # Add precision, recall, and f1-score for each class
    for class_label in sorted(report_dict.keys()):
        if class_label in  {'0.0', '1.0', '0', '1'}:  # This checks if the key is a class label:  # This checks if the key is a class label
        #if class_label.isdigit():  # This checks if the key is a class label
            for metric in ['precision', 'recall', 'f1-score']:
                metrics.append(f'{metric.capitalize()} (Class {class_label})')
                values.append(report_dict[class_label][metric])

    # Add macro and weighted averages
    for avg_type in ['macro avg', 'weighted avg']:
        for metric in ['precision', 'recall', 'f1-score']:
            metrics.append(f'{avg_type.capitalize()} {metric.capitalize()}')
            values.append(report_dict[avg_type][metric])

    # Calculate and add AUC-ROC score
    if all_y_test_pred_proba is not None:
        auc_roc = roc_auc_score(all_y_test_true, all_y_test_pred_proba)
        metrics.append('AUC-ROC')
        values.append(auc_roc)

    # Create the DataFrame
    results_df = pd.DataFrame({
        'metric': metrics,
        model_name: values
    })

    # Format the numeric values to 3 decimal places
    results_df[model_name] = results_df[model_name].apply(lambda x: f'{x:.3f}' if isinstance(x, (int, float)) else str(x))

    # Present the confusion matrix
    cm_fig, ax = plt.subplots(figsize=(10,7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, ax=ax)
    ax.set_title(f'Confusion Matrix {model_name}')
    ax.set_ylabel('Actual Class\n(0 not upset and 1 upset)')
    ax.set_xlabel('Predicted Class\n(0 not upset and 1 upset)')

    roc_fig = None

    if all_y_test_pred_proba is not None:
      # Plot ROC curve
      roc_fig, roc_ax = plt.subplots(figsize=(8, 6))
      RocCurveDisplay.from_predictions(
            all_y_test_true,
            all_y_test_pred_proba,
            ax=roc_ax,
            name=model_name
      )
      roc_ax.set_title(f'ROC Curve - {model_name}')
      roc_ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')

    return results_df, cm_fig, roc_fig
