# Imports and Setup

In [None]:
from pathlib import Path
import sys
import yaml
import joblib
import numpy as np
import warnings
import shap
import pandas as pd

# Add parent directory to sys.path for local imports
ROOT = Path().resolve().parent
SAVE_DIR = ROOT / "saved"
sys.path.append(str(ROOT))

# Load config.yaml
with open("../config.yaml", "r") as f:
    config = yaml.safe_load(f)

# Access primary metric
primary_metric = config["general"]["primary_metric"]

# Import relevant utility functions
from utils.utils import (
    evaluate_pipeline,
    summarize_model_results
)

# Load data from disk
X = joblib.load(SAVE_DIR / "X.pkl")
X_train = joblib.load(SAVE_DIR / "X_train.pkl")
y_train = joblib.load(SAVE_DIR / "y_train.pkl")
X_test = joblib.load(SAVE_DIR / "X_test.pkl")
y_test = joblib.load(SAVE_DIR / "y_test.pkl")

# Bundle if needed
data = [X, X_train, y_train, X_test, y_test]

# Training

### DummyClassifier

In [None]:
dummy_pipeline, dummy_data = evaluate_pipeline('dummy_classifier', data)

### Logistic Regression

In [None]:
logistic_pipeline, logistic_data = evaluate_pipeline('logistic_regression', data)

### Random Forest

In [None]:
forest_pipeline, forest_data = evaluate_pipeline('random_forest', data)

### LightGBM

In [None]:
light_pipeline, light_data = evaluate_pipeline('lightgbm', data)

# Analysis

In [None]:
# Precomputed metric results from evaluation
model_data = {
    'dummy_classifier': dummy_data,
    'logistic_regression': logistic_data,
    'random_forest': forest_data,
    'lightgbm': light_data
}

fitted_pipelines = {
    'dummy_classifier': dummy_pipeline,
    'logistic_regression': logistic_pipeline,
    'random_forest': forest_pipeline,
    'lightgbm': light_pipeline
}

metrics_to_display = {
    'F1': '{:.2%}',
    'Accuracy': '{:.2%}',
    'Precision': '{:.2%}'
}

best_model, best_pipeline = summarize_model_results(
    model_data,
    primary_metric,
    metrics_to_display,
    fitted_pipelines
)

#### Model performance metrics
Summarize the key evaluation metrics and what they say about the model's overall predictive power. Highlight any strengths or weaknesses revealed by these numbers

#### Feature importance analysis
Describe which features contribute most to the model's decisions. Include insights from feature importance scores, SHAP values, and more. Explain why certain features might be especially influential

#### Overfitting/underfitting and generalization
Discuss evidence of overfitting/underfitting, if any. Use training vs. validation scores, learning curves, or cross-validation results to support analysis. Explain how well the model is expected to perform on unseen data

#### Comparison to baseline models
Compare the model's performance to the basline models, Dummy and Logistic Regression. Highlight improvements and explain why this model is a better choice than those options

#### Error analysis
Common failure cases (e.g., certain classes, edge cases) and examples of misclassified instances

#### Model deployment considerations
Inference time and scalability. Will it work for real time predictions?

#### Data quality and preprocessing impact
Effect of missing data handling and impact of feature engineering

#### Summary
Brief summary containing the most important points from the above information

In [None]:
# Predict
y_pred = best_pipeline.predict(X_test)

# Find first incorrect prediction
if len(np.where(y_pred != y_test)[0]) > 0:
    wrong_index = np.where(y_pred != y_test)[0][0]

    # Transform the test set using the fitted preprocessor
    X_transformed = best_pipeline.named_steps['preprocessing'].transform(X_test)
    features = best_pipeline.named_steps['preprocessing'].get_feature_names_out()
    X_df = pd.DataFrame(X_transformed, columns=features)

    try:
        # Generate SHAP explainer
        explainer = shap.Explainer(best_pipeline.named_steps['model'], X_df, feature_names=features)
        shap_values = explainer(X_df.iloc[[wrong_index]])

        # Plot SHAP waterfall
        shap.plots.waterfall(shap_values[wrong_index])

    except Exception as e:
        warnings.warn(f'SHAP could not generate explanation: {e}')
else:
    print(f'No incorrect predictions for {best_model}')

Analyze what went wrong and why

# Summary

#### Key insights

#### Limitations and possible improvements

#### Business implications