# 02: Baseline Models

This notebook trains and evaluates baseline machine learning models:
- Logistic Regression (with proper scaling)
- Random Forest
- Model evaluation on train, validation, and test sets
- Feature importance analysis
- Save trained models and predictions for fairness analysis

**Note:** Fairness analysis is performed separately in notebook 03_fairness_analysis.ipynb

In [None]:
# Check if running in Google Colab
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

# Install packages (for Google Colab)
if IN_COLAB:
    !pip install scikit-learn matplotlib seaborn fairlearn -q

# Imports
import pandas as pd
import numpy as np
import pickle
import json
import time
from pathlib import Path
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score, confusion_matrix, roc_curve)
import matplotlib.pyplot as plt
import seaborn as sns

# Plotting setup
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Set random seed for reproducibility
np.random.seed(42)

print("=" * 60)
print("BASELINE MODELS - INSURANCE CLAIM PREDICTION")
print("=" * 60)
print("‚úì Setup complete!")


## 1. Load Preprocessed Data

Load the processed data from the preprocessing notebook. Works both locally and on Google Colab.


In [None]:
# Determine data directory (works for both local and Colab)
if IN_COLAB:
    # Mount Google Drive for Colab
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Try common Colab paths
    possible_paths = [
        Path('/content/drive/MyDrive/6.3950_project/processed_data'),
        Path('/content/drive/Shareddrives/Insurance Fairness/processed_data'),
        Path('/content/drive/MyDrive/insurance-fairness/results'),
        Path('../results')
    ]
else:
    # Local paths
    possible_paths = [
        Path('../results'),
        Path('./results')
    ]

data_dir = None
for path in possible_paths:
    if path.exists() and (path / 'X_train.pkl').exists():
        data_dir = path
        break

if data_dir is None:
    error_msg = (
        "Could not find preprocessed data. Please run notebook 01_preprocessing.ipynb first.\n"
        f"Tried paths: {', '.join([str(p) for p in possible_paths])}\n"
        "\nTroubleshooting tips:\n"
        "  - Ensure you've run 01_preprocessing.ipynb completely\n"
        "  - Check that the results/ directory exists\n"
        "  - For Colab: Verify Google Drive is mounted correctly"
    )
    raise FileNotFoundError(error_msg)

print(f"‚úì Loading data from: {data_dir}")
print("Loading preprocessed data...")

# Load all data splits
with open(data_dir / 'X_train.pkl', 'rb') as f: X_train = pickle.load(f)
with open(data_dir / 'X_val.pkl', 'rb') as f: X_val = pickle.load(f)
with open(data_dir / 'X_test.pkl', 'rb') as f: X_test = pickle.load(f)
with open(data_dir / 'y_train.pkl', 'rb') as f: y_train = pickle.load(f)
with open(data_dir / 'y_val.pkl', 'rb') as f: y_val = pickle.load(f)
with open(data_dir / 'y_test.pkl', 'rb') as f: y_test = pickle.load(f)
with open(data_dir / 'protected_train.pkl', 'rb') as f: protected_train = pickle.load(f)
with open(data_dir / 'protected_val.pkl', 'rb') as f: protected_val = pickle.load(f)
with open(data_dir / 'protected_test.pkl', 'rb') as f: protected_test = pickle.load(f)
with open(data_dir / 'feature_names.pkl', 'rb') as f: feature_names = pickle.load(f)

# Validate data shapes
assert X_train.shape[1] == X_val.shape[1] == X_test.shape[1], "Feature count mismatch"
assert len(y_train) == len(X_train), "Label/feature count mismatch"
assert len(protected_train) == len(X_train), "Protected attributes count mismatch"

# Load scaler (data is already scaled, but we keep it for consistency)
try:
    with open(data_dir / 'scaler.pkl', 'rb') as f: scaler = pickle.load(f)
    print("‚úì Scaler loaded (data is already scaled)")
except FileNotFoundError:
    print("‚ö† Scaler not found, but data should already be scaled")
    scaler = None

print("\n‚úÖ DATA LOADED!")
print(f"   Training: {X_train.shape[0]:,} samples √ó {X_train.shape[1]} features")
print(f"   Validation: {X_val.shape[0]:,} samples")
print(f"   Test: {X_test.shape[0]:,} samples")
print(f"   Class balance (train): {(y_train == 0).sum():,} No, {(y_train == 1).sum():,} Yes")


## 2. Logistic Regression

Logistic Regression is interpretable and widely used in insurance. Note: The data is already scaled from preprocessing.


In [None]:
# ============================================================
# MODEL 1: LOGISTIC REGRESSION
# ============================================================

print("\n" + "=" * 60)
print("LOGISTIC REGRESSION")
print("=" * 60)

# Train model (data is already scaled)
logistic_model = LogisticRegression(
    class_weight='balanced', 
    max_iter=1000, 
    random_state=42,
    solver='lbfgs'  # Good default solver
)
print("Training Logistic Regression...")
start_time = time.time()
logistic_model.fit(X_train, y_train)
training_time = time.time() - start_time
print(f"Training completed in {training_time:.2f} seconds")

# Make predictions on all sets
print("Generating predictions...")
y_train_pred_lr = logistic_model.predict(X_train)
y_train_proba_lr = logistic_model.predict_proba(X_train)[:, 1]
y_val_pred_lr = logistic_model.predict(X_val)
y_val_proba_lr = logistic_model.predict_proba(X_val)[:, 1]
y_test_pred_lr = logistic_model.predict(X_test)
y_test_proba_lr = logistic_model.predict_proba(X_test)[:, 1]

# Calculate metrics for validation and test sets
def calculate_metrics(y_true, y_pred, y_proba, set_name):
    """
    Calculate comprehensive classification metrics.
       
    Parameters:
    -----------
    y_true : array-like
        True labels
    y_pred : array-like
        Predicted labels
    y_proba : array-like
        Predicted probabilities for positive class
    set_name : str
        Name of the dataset (for tracking)
           
    Returns:
    --------
    dict : Dictionary containing accuracy, precision, recall, f1, roc_auc
    """
    return {
        'set': set_name,
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1': f1_score(y_true, y_pred, zero_division=0),
        'roc_auc': roc_auc_score(y_true, y_proba) if len(np.unique(y_true)) > 1 else 0.0
    }

lr_val_metrics = calculate_metrics(y_val, y_val_pred_lr, y_val_proba_lr, 'Validation')
lr_test_metrics = calculate_metrics(y_test, y_test_pred_lr, y_test_proba_lr, 'Test')
lr_train_metrics = calculate_metrics(y_train, y_train_pred_lr, y_train_proba_lr, 'Train')

print("\nüìä Train Set Performance:")
for metric in ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']:
    print(f"   {metric.upper():12s}: {lr_train_metrics[metric]:.4f}")

print("\nüìä Validation Set Performance:")
for metric in ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']:
    print(f"   {metric.upper():12s}: {lr_val_metrics[metric]:.4f}")

print("\nüìä Test Set Performance:")
for metric in ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']:
    print(f"   {metric.upper():12s}: {lr_test_metrics[metric]:.4f}")

# Confusion Matrices
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

for idx, (y_true, y_pred, title) in enumerate([
    (y_train, y_train_pred_lr, 'Train Set'),
    (y_val, y_val_pred_lr, 'Validation Set'),
    (y_test, y_test_pred_lr, 'Test Set')
]):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx], cbar=False)
    axes[idx].set_title(f'Logistic Regression - {title}', fontweight='bold')
    axes[idx].set_xlabel('Predicted')
    axes[idx].set_ylabel('Actual')
    axes[idx].set_xticklabels(['No', 'Yes'])
    axes[idx].set_yticklabels(['No', 'Yes'])

plt.tight_layout()
plt.show()

# ROC Curves
fig, ax = plt.subplots(figsize=(8, 6))

fpr_train, tpr_train, _ = roc_curve(y_train, y_train_proba_lr)
fpr_val, tpr_val, _ = roc_curve(y_val, y_val_proba_lr)
fpr_test, tpr_test, _ = roc_curve(y_test, y_test_proba_lr)

ax.plot(fpr_train, tpr_train, 'darkgreen', lw=2, 
        label=f'Train (AUC={lr_train_metrics["roc_auc"]:.3f})')
ax.plot(fpr_val, tpr_val, 'darkorange', lw=2, 
        label=f'Val (AUC={lr_val_metrics["roc_auc"]:.3f})')
ax.plot(fpr_test, tpr_test, 'darkblue', lw=2, 
        label=f'Test (AUC={lr_test_metrics["roc_auc"]:.3f})')
ax.plot([0, 1], [0, 1], 'gray', lw=2, linestyle='--', label='Random')
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('Logistic Regression - ROC Curves', fontsize=12, fontweight='bold')
ax.legend()
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

print("\n‚úì Logistic Regression complete!")


## 3. Random Forest

Random Forest can capture non-linear relationships and feature interactions. Trees don't require scaling.


In [None]:
# ============================================================
# MODEL 2: RANDOM FOREST
# ============================================================

print("\n" + "=" * 60)
print("RANDOM FOREST")
print("=" * 60)

# Train model
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
print("Training Random Forest (this may take a minute)...")
start_time = time.time()
rf_model.fit(X_train, y_train)
training_time = time.time() - start_time
print(f"Training completed in {training_time:.2f} seconds")

# Make predictions on all sets
print("Generating predictions...")
y_train_pred_rf = rf_model.predict(X_train)
y_train_proba_rf = rf_model.predict_proba(X_train)[:, 1]
y_val_pred_rf = rf_model.predict(X_val)
y_val_proba_rf = rf_model.predict_proba(X_val)[:, 1]
y_test_pred_rf = rf_model.predict(X_test)
y_test_proba_rf = rf_model.predict_proba(X_test)[:, 1]

# Calculate metrics
rf_val_metrics = calculate_metrics(y_val, y_val_pred_rf, y_val_proba_rf, 'Validation')
rf_test_metrics = calculate_metrics(y_test, y_test_pred_rf, y_test_proba_rf, 'Test')
rf_train_metrics = calculate_metrics(y_train, y_train_pred_rf, y_train_proba_rf, 'Train')

print("\nüìä Train Set Performance:")
for metric in ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']:
    print(f"   {metric.upper():12s}: {rf_train_metrics[metric]:.4f}")

print("\nüìä Validation Set Performance:")
for metric in ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']:
    print(f"   {metric.upper():12s}: {rf_val_metrics[metric]:.4f}")

print("\nüìä Test Set Performance:")
for metric in ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']:
    print(f"   {metric.upper():12s}: {rf_test_metrics[metric]:.4f}")

# Confusion Matrices
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

for idx, (y_true, y_pred, title) in enumerate([
    (y_train, y_train_pred_rf, 'Train Set'),
    (y_val, y_val_pred_rf, 'Validation Set'),
    (y_test, y_test_pred_rf, 'Test Set')
]):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Greens', ax=axes[idx], cbar=False)
    axes[idx].set_title(f'Random Forest - {title}', fontweight='bold')
    axes[idx].set_xlabel('Predicted')
    axes[idx].set_ylabel('Actual')
    axes[idx].set_xticklabels(['No', 'Yes'])
    axes[idx].set_yticklabels(['No', 'Yes'])

plt.tight_layout()
plt.show()

# ROC Curves
fig, ax = plt.subplots(figsize=(8, 6))

fpr_train, tpr_train, _ = roc_curve(y_train, y_train_proba_rf)
fpr_val, tpr_val, _ = roc_curve(y_val, y_val_proba_rf)
fpr_test, tpr_test, _ = roc_curve(y_test, y_test_proba_rf)

ax.plot(fpr_train, tpr_train, 'darkgreen', lw=2, 
        label=f'Train (AUC={rf_train_metrics["roc_auc"]:.3f})')
ax.plot(fpr_val, tpr_val, 'darkorange', lw=2, 
        label=f'Val (AUC={rf_val_metrics["roc_auc"]:.3f})')
ax.plot(fpr_test, tpr_test, 'darkblue', lw=2, 
        label=f'Test (AUC={rf_test_metrics["roc_auc"]:.3f})')
ax.plot([0, 1], [0, 1], 'gray', lw=2, linestyle='--', label='Random')
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('Random Forest - ROC Curves', fontsize=12, fontweight='bold')
ax.legend()
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

print("\n‚úì Random Forest complete!")


## 4. Feature Importance Analysis

Understanding which features drive the Random Forest predictions.


In [None]:
# ============================================================
# FEATURE IMPORTANCE - RANDOM FOREST
# ============================================================

print("\n" + "=" * 60)
print("FEATURE IMPORTANCE ANALYSIS")
print("=" * 60)

# Get feature importances
if len(feature_names) != len(rf_model.feature_importances_):
    raise ValueError(f"Feature names count ({len(feature_names)}) doesn't match model features ({len(rf_model.feature_importances_)})")

feature_importance = pd.DataFrame({
    'feature': feature_names,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

print("\nüìä Top 15 Most Important Features:")
print(feature_importance.head(15).to_string(index=False))

# Visualize top features
top_n = 15
fig, ax = plt.subplots(figsize=(10, 8))
top_features = feature_importance.head(top_n)

ax.barh(range(len(top_features)), top_features['importance'], color='forestgreen', alpha=0.8)
ax.set_yticks(range(len(top_features)))
ax.set_yticklabels(top_features['feature'])
ax.invert_yaxis()
ax.set_xlabel('Importance', fontsize=12)
ax.set_title(f'Random Forest - Top {top_n} Feature Importances', fontsize=14, fontweight='bold')
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.show()

print("\n‚úì Feature importance analysis complete!")


## 5. Model Comparison

Compare both models on train, validation, and test sets.


In [None]:
# ============================================================
# MODEL COMPARISON
# ============================================================

print("\n" + "=" * 60)
print("MODEL COMPARISON")
print("=" * 60)

# Create comprehensive comparison dataframe
comparison_data = {
    'Model': ['Logistic Regression', 'Logistic Regression', 'Logistic Regression',
              'Random Forest', 'Random Forest', 'Random Forest'],
    'Set': ['Train', 'Validation', 'Test', 'Train', 'Validation', 'Test'],
    'Accuracy': [lr_train_metrics['accuracy'], lr_val_metrics['accuracy'], lr_test_metrics['accuracy'],
                 rf_train_metrics['accuracy'], rf_val_metrics['accuracy'], rf_test_metrics['accuracy']],
    'Precision': [lr_train_metrics['precision'], lr_val_metrics['precision'], lr_test_metrics['precision'],
                  rf_train_metrics['precision'], rf_val_metrics['precision'], rf_test_metrics['precision']],
    'Recall': [lr_train_metrics['recall'], lr_val_metrics['recall'], lr_test_metrics['recall'],
               rf_train_metrics['recall'], rf_val_metrics['recall'], rf_test_metrics['recall']],
    'F1-Score': [lr_train_metrics['f1'], lr_val_metrics['f1'], lr_test_metrics['f1'],
                 rf_train_metrics['f1'], rf_val_metrics['f1'], rf_test_metrics['f1']],
    'ROC-AUC': [lr_train_metrics['roc_auc'], lr_val_metrics['roc_auc'], lr_test_metrics['roc_auc'],
                rf_train_metrics['roc_auc'], rf_val_metrics['roc_auc'], rf_test_metrics['roc_auc']]
}

comparison = pd.DataFrame(comparison_data)
print("\n", comparison.to_string(index=False))

# Visualize comparison
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC']

for idx, metric in enumerate(metrics):
    ax = axes[idx // 3, idx % 3]
    x = np.arange(3)  # Train, Validation, Test
    width = 0.35
    
    lr_values = [
        lr_train_metrics[metric.lower().replace('-', '_')],
        lr_val_metrics[metric.lower().replace('-', '_')], 
        lr_test_metrics[metric.lower().replace('-', '_')]
    ]
    rf_values = [
        rf_train_metrics[metric.lower().replace('-', '_')],
        rf_val_metrics[metric.lower().replace('-', '_')], 
        rf_test_metrics[metric.lower().replace('-', '_')]
    ]
    
    ax.bar(x - width/2, lr_values, width, label='Logistic Regression', 
           color='darkorange', alpha=0.8)
    ax.bar(x + width/2, rf_values, width, label='Random Forest', 
           color='forestgreen', alpha=0.8)
    
    ax.set_xlabel('Dataset', fontsize=10)
    ax.set_ylabel('Score', fontsize=10)
    ax.set_title(metric, fontsize=12, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels(['Train', 'Validation', 'Test'])
    ax.legend()
    ax.grid(axis='y', alpha=0.3)
    ax.set_ylim([0, 1.0])

plt.tight_layout()
plt.show()

# Determine best model based on validation ROC-AUC
print("\nüèÜ BEST MODEL (based on Validation ROC-AUC):")
if rf_val_metrics['roc_auc'] > lr_val_metrics['roc_auc']:
    print(f"   Random Forest (Val ROC-AUC: {rf_val_metrics['roc_auc']:.4f})")
    best_model = 'Random Forest'
    best_model_obj = rf_model
    best_val_pred = y_val_pred_rf
    best_test_pred = y_test_pred_rf
    best_val_proba = y_val_proba_rf
    best_test_proba = y_test_proba_rf
else:
    print(f"   Logistic Regression (Val ROC-AUC: {lr_val_metrics['roc_auc']:.4f})")
    best_model = 'Logistic Regression'
    best_model_obj = logistic_model
    best_val_pred = y_val_pred_lr
    best_test_pred = y_test_pred_lr
    best_val_proba = y_val_proba_lr
    best_test_proba = y_test_proba_lr

print(f"\n   Test Set Performance:")
if best_model == 'Random Forest':
    print(f"   ROC-AUC: {rf_test_metrics['roc_auc']:.4f}")
    print(f"   F1-Score: {rf_test_metrics['f1']:.4f}")
else:
    print(f"   ROC-AUC: {lr_test_metrics['roc_auc']:.4f}")
    print(f"   F1-Score: {lr_test_metrics['f1']:.4f}")

print("\nüìà Summary Statistics:")
print(f"   Overfitting check (Train vs Val ROC-AUC difference):")
print(f"   LR: {abs(lr_train_metrics['roc_auc'] - lr_val_metrics['roc_auc']):.4f}")
print(f"   RF: {abs(rf_train_metrics['roc_auc'] - rf_val_metrics['roc_auc']):.4f}")

lr_overfit = abs(lr_train_metrics['roc_auc'] - lr_val_metrics['roc_auc'])
rf_overfit = abs(rf_train_metrics['roc_auc'] - rf_val_metrics['roc_auc'])
if lr_overfit > 0.1:
    print(f"   ‚ö†Ô∏è  WARNING: Logistic Regression may be overfitting (gap: {lr_overfit:.4f})")
if rf_overfit > 0.1:
    print(f"   ‚ö†Ô∏è  WARNING: Random Forest may be overfitting (gap: {rf_overfit:.4f})")

## 6. Save Models and Predictions

Save all models, predictions, and metrics for downstream fairness analysis.


In [None]:
# ============================================================
# SAVE MODELS AND PREDICTIONS
# ============================================================

print("\n" + "=" * 60)
print("SAVING MODELS AND PREDICTIONS")
print("=" * 60)

# Create results directory
results_dir = Path('../results')
results_dir.mkdir(exist_ok=True)

# Save models
print("Saving models...")
with open(results_dir / 'logistic_regression_model.pkl', 'wb') as f:
    pickle.dump(logistic_model, f)
    
with open(results_dir / 'random_forest_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

print("‚úì Models saved")

# Save predictions and probabilities
assert len(y_test_pred_lr) == len(y_test), "LR Prediction length mismatch"
assert len(y_test_proba_lr) == len(y_test), "LR Probability length mismatch"
assert len(y_test_pred_rf) == len(y_test), "RF Prediction length mismatch"
assert len(y_test_proba_rf) == len(y_test), "RF Probability length mismatch"

print("Saving predictions...")

# Logistic Regression predictions
with open(results_dir / 'lr_train_predictions.pkl', 'wb') as f:
    pickle.dump(y_train_pred_lr, f)
with open(results_dir / 'lr_val_predictions.pkl', 'wb') as f:
    pickle.dump(y_val_pred_lr, f)
with open(results_dir / 'lr_test_predictions.pkl', 'wb') as f:
    pickle.dump(y_test_pred_lr, f)

with open(results_dir / 'lr_train_proba.pkl', 'wb') as f:
    pickle.dump(y_train_proba_lr, f)
with open(results_dir / 'lr_val_proba.pkl', 'wb') as f:
    pickle.dump(y_val_proba_lr, f)
with open(results_dir / 'lr_test_proba.pkl', 'wb') as f:
    pickle.dump(y_test_proba_lr, f)

# Random Forest predictions
with open(results_dir / 'rf_train_predictions.pkl', 'wb') as f:
    pickle.dump(y_train_pred_rf, f)
with open(results_dir / 'rf_val_predictions.pkl', 'wb') as f:
    pickle.dump(y_val_pred_rf, f)
with open(results_dir / 'rf_test_predictions.pkl', 'wb') as f:
    pickle.dump(y_test_pred_rf, f)

with open(results_dir / 'rf_train_proba.pkl', 'wb') as f:
    pickle.dump(y_train_proba_rf, f)
with open(results_dir / 'rf_val_proba.pkl', 'wb') as f:
    pickle.dump(y_val_proba_rf, f)
with open(results_dir / 'rf_test_proba.pkl', 'wb') as f:
    pickle.dump(y_test_proba_rf, f)

print("‚úì Predictions saved")

# Save metrics
print("Saving metrics...")
baseline_metrics = {
    'logistic_regression': {
        'train': {k: float(v) for k, v in lr_train_metrics.items() if k != 'set'},
        'validation': {k: float(v) for k, v in lr_val_metrics.items() if k != 'set'},
        'test': {k: float(v) for k, v in lr_test_metrics.items() if k != 'set'}
    },
    'random_forest': {
        'train': {k: float(v) for k, v in rf_train_metrics.items() if k != 'set'},
        'validation': {k: float(v) for k, v in rf_val_metrics.items() if k != 'set'},
        'test': {k: float(v) for k, v in rf_test_metrics.items() if k != 'set'}
    },
    'best_model': best_model,
    'feature_importance': feature_importance.to_dict('records')[:20]  # Top 20 features
}

with open(results_dir / 'baseline_metrics.json', 'w') as f:
    json.dump(baseline_metrics, f, indent=2)

print("‚úì Metrics saved")

print(f"\n‚úÖ All files saved to: {results_dir.absolute()}")
print("\nSaved files:")
print("  Models:")
print("    - logistic_regression_model.pkl")
print("    - random_forest_model.pkl")
print("  Predictions (Logistic Regression):")
print("    - lr_train_predictions.pkl, lr_val_predictions.pkl, lr_test_predictions.pkl")
print("    - lr_train_proba.pkl, lr_val_proba.pkl, lr_test_proba.pkl")
print("  Predictions (Random Forest):")
print("    - rf_train_predictions.pkl, rf_val_predictions.pkl, rf_test_predictions.pkl")
print("    - rf_train_proba.pkl, rf_val_proba.pkl, rf_test_proba.pkl")
print("  Metrics:")
print("    - baseline_metrics.json")

print("\n" + "=" * 60)
print("NOTEBOOK COMPLETE!")
print("=" * 60)
print("Next steps:")
print("  ‚Üí Proceed to notebook 03_fairness_analysis.ipynb")
print("  ‚Üí Load models and predictions from ../results/ directory")
