## Model Evaluation and Baseline Testing

**Imports for metrics and plots**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_auc_score, roc_curve
)


**Metric function definitions**

In [None]:
# Add the functions that compute the metrics and plot confusion matrices
def evaluate_classification(y_true, y_pred, y_proba=None):
    metrics = {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred),
        'Recall': recall_score(y_true, y_pred),
        'F1 Score': f1_score(y_true, y_pred)
    }
    if y_proba is not None:
        metrics['ROC-AUC'] = roc_auc_score(y_true, y_proba)
    return metrics


**Confusion Matrix visualization**

In [None]:
def plot_confusion_matrix(y_true, y_pred, model_name="Model"):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.title(f'Confusion Matrix - {model_name}')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

**Performance comparison setup**

In [None]:
results = {}

def compare_model_performance(model_name, y_true, y_pred, y_proba=None):
    results[model_name] = evaluate_classification(y_true, y_pred, y_proba)


**Baseline model training**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train baseline model
lr = LogisticRegression(max_iter=500, random_state=42)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
y_proba_lr = lr.predict_proba(X_test)[:, 1]


**Evaluate and visualize**

In [None]:
# Compute metrics
baseline_metrics = evaluate_classification(y_test, y_pred_lr, y_proba_lr)
compare_model_performance("Logistic Regression (Baseline)", y_test, y_pred_lr, y_proba_lr)

# Display confusion matrix
plot_confusion_matrix(y_test, y_pred_lr, "Logistic Regression (Baseline)")

# Show metrics
pd.DataFrame([baseline_metrics])
