# Classic Credit Scoring Model Development

Interactive notebook for step-by-step development of a classic credit risk model.
Uses **Weight of Evidence (WoE)** encoding, **Logistic Regression**, and a **point-based Scorecard**.

**Steps:**
1. Setup and imports
2. Load config
3. Load and split data
4. WoE binning fit
5. IV summary and visualization
6. WoE visualization for top features
7. IV-based feature selection
8. PSI check on WoE values
9. Correlation on WoE features
10. Logistic Regression fit
11. Model diagnostics
12. Scorecard generation
13. Scorecard application
14. Evaluation metrics
15. Lift tables
16. Bootstrap CI and Score PSI
17. Save outputs
18. Alternative: full pipeline mode

Each cell is self-contained and re-runnable.

## 1. Setup & Imports

In [None]:
import sys
from pathlib import Path
import os

project_root = str(Path.cwd().parent) if Path.cwd().name == "notebooks" else str(Path.cwd())
if project_root not in sys.path:
    sys.path.insert(0, project_root)
os.chdir(project_root)

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import random
import logging
import matplotlib.pyplot as plt

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)-5s | %(message)s",
    datefmt="%H:%M:%S",
)

from src.model_development.data_loader import load_and_split
from src.features.woe_transformer import WoETransformer
from src.classic_model.model_adapter import ClassicModelAdapter
from src.classic_model.scorecard import ScorecardGenerator
from src.model_development.evaluator import (
    evaluate_model_quarterly,
    bootstrap_auc_ci,
    compute_score_psi,
)

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

print(f"Project root: {project_root}")
print(f"Working dir:  {os.getcwd()}")

## 2. Load Config

In [None]:
import yaml
from src.config.schema import ClassicPipelineConfig

config_path = "config/classic_model.yaml"
with open(config_path) as f:
    raw_config = yaml.safe_load(f)

config = ClassicPipelineConfig(**raw_config)

seed = config.reproducibility.global_seed
np.random.seed(seed)
random.seed(seed)
n_jobs = config.reproducibility.n_jobs

print(f"Input:          {config.data.input_path}")
print(f"Train end date: {config.splitting.train_end_date}")
print(f"Target:         {config.data.target_column}")
print(f"Seed:           {seed}")
print(f"n_jobs:         {n_jobs}")
print(f"WoE bins:       {config.woe.n_bins}")
print(f"WoE monotonic:  {config.woe.monotonic}")
print(f"IV range:       [{config.woe.min_iv}, {config.woe.max_iv}]")
print(f"LogReg solver:  {config.logistic.solver}")
print(f"LogReg C:       {config.logistic.C}")
print(f"Scorecard:      score={config.scorecard.target_score}, odds={config.scorecard.target_odds}, pdo={config.scorecard.pdo}")

## 3. Load & Split Data

In [None]:
datasets = load_and_split(
    input_path=config.data.input_path,
    train_end_date=config.splitting.train_end_date,
    target_column=config.data.target_column,
    date_column=config.data.date_column,
    id_columns=list(config.data.id_columns),
    meta_columns=list(config.data.exclude_columns),
    test_size=config.splitting.test_size,
    stratify=config.splitting.stratify,
    random_state=seed,
)

target = config.data.target_column
features = list(datasets.feature_columns)

print(f"Features: {len(features)}")
print(f"Train: {len(datasets.train):,} rows, bad rate: {datasets.train[target].mean():.2%}")
print(f"Test:  {len(datasets.test):,} rows, bad rate: {datasets.test[target].mean():.2%}")
for label in datasets.oot_labels:
    qdf = datasets.oot_quarters[label]
    print(f"OOT {label}: {len(qdf):,} rows, bad rate: {qdf[target].mean():.2%}")

## 4. WoE Binning Fit

In [None]:
woe_config = {
    "model": {
        "logistic_regression": {
            "woe_binning": {
                "n_bins": config.woe.n_bins,
                "min_bin_size": config.woe.min_bin_size,
                "monotonic": config.woe.monotonic,
                "missing_bin": config.woe.missing_bin,
            }
        }
    }
}

woe_transformer = WoETransformer(config=woe_config, name="ClassicWoE")
woe_transformer.fit(datasets.train, features, target_column=target)

print(f"Fitted WoE for {len(woe_transformer.fitted_features)} features")

# Transform all sets
woe_train = woe_transformer.transform(datasets.train)
woe_test = woe_transformer.transform(datasets.test)
woe_oot = {}
for label, qdf in datasets.oot_quarters.items():
    woe_oot[label] = woe_transformer.transform(qdf)

print(f"WoE columns added: {sum(1 for c in woe_train.columns if c.endswith('_woe'))}")

## 5. IV Summary Visualization

In [None]:
iv_summary = woe_transformer.get_iv_summary()
iv_df = pd.DataFrame([
    {"Feature": f, "IV": iv, "Category": woe_transformer.get_iv_category(iv)}
    for f, iv in sorted(iv_summary.items(), key=lambda x: -x[1])
])

print(f"Total features with IV: {len(iv_df)}")
print(f"\nIV Category Distribution:")
print(iv_df["Category"].value_counts().to_string())

print(f"\nTop 20 features by IV:")
print(iv_df.head(20).to_string(index=False))

In [None]:
# IV distribution histogram
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram of all IV values
axes[0].hist(iv_df["IV"], bins=50, edgecolor="black", alpha=0.7)
axes[0].axvline(x=config.woe.min_iv, color="red", linestyle="--", label=f"min_iv={config.woe.min_iv}")
axes[0].axvline(x=config.woe.max_iv, color="orange", linestyle="--", label=f"max_iv={config.woe.max_iv}")
axes[0].set_xlabel("Information Value")
axes[0].set_ylabel("Count")
axes[0].set_title("IV Distribution (All Features)")
axes[0].legend()

# Top 20 features bar chart
top20 = iv_df.head(20)
colors = []
for iv in top20["IV"]:
    if iv > config.woe.max_iv:
        colors.append("red")
    elif iv >= 0.3:
        colors.append("#2ecc71")
    elif iv >= 0.1:
        colors.append("#3498db")
    else:
        colors.append("#95a5a6")

axes[1].barh(range(len(top20)), top20["IV"].values, color=colors)
axes[1].set_yticks(range(len(top20)))
axes[1].set_yticklabels(top20["Feature"].values, fontsize=8)
axes[1].invert_yaxis()
axes[1].set_xlabel("Information Value")
axes[1].set_title("Top 20 Features by IV")
axes[1].axvline(x=config.woe.min_iv, color="red", linestyle="--", alpha=0.5)
axes[1].axvline(x=config.woe.max_iv, color="orange", linestyle="--", alpha=0.5)

plt.tight_layout()
plt.show()

## 6. WoE Visualization for Top Features

In [None]:
# Visualize WoE bins for top 4 features by IV
top_features = iv_df[
    (iv_df["IV"] >= config.woe.min_iv) & (iv_df["IV"] <= config.woe.max_iv)
]["Feature"].tolist()[:4]

if top_features:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    axes = axes.flatten()

    for idx, feat in enumerate(top_features):
        ax = axes[idx]
        bins = woe_transformer.get_woe_table(feat)
        if bins is None:
            continue

        regular_bins = sorted(
            [b for b in bins if b.bin_id >= 0],
            key=lambda b: b.lower_bound,
        )
        if not regular_bins:
            continue

        labels = [f"[{b.lower_bound:.2g}, {b.upper_bound:.2g}]" for b in regular_bins]
        woe_vals = [b.woe for b in regular_bins]
        bad_rates = [b.bad_count / b.count if b.count > 0 else 0 for b in regular_bins]

        color = ["#e74c3c" if w < 0 else "#2ecc71" for w in woe_vals]
        ax.bar(range(len(woe_vals)), woe_vals, color=color, alpha=0.8)
        ax.set_xticks(range(len(labels)))
        ax.set_xticklabels(labels, rotation=45, fontsize=7, ha="right")
        ax.axhline(y=0, color="black", linewidth=0.5)
        ax.set_ylabel("WoE")

        iv_val = iv_summary.get(feat, 0)
        ax.set_title(f"{feat}\nIV={iv_val:.4f}", fontsize=10)

        # Add bad rate annotation
        ax2 = ax.twinx()
        ax2.plot(range(len(bad_rates)), bad_rates, "ko-", markersize=4, alpha=0.5)
        ax2.set_ylabel("Bad Rate", fontsize=8)

    plt.tight_layout()
    plt.show()
else:
    print("No features in the valid IV range to visualize.")

## 7. IV-Based Feature Selection

In [None]:
selected_features = []
for feat, iv in sorted(iv_summary.items(), key=lambda x: -x[1]):
    if config.woe.min_iv <= iv <= config.woe.max_iv:
        selected_features.append(feat)

woe_selected = [f"{f}_woe" for f in selected_features]

print(f"IV selection: {len(iv_summary)} -> {len(selected_features)} features")
print(f"  min_iv={config.woe.min_iv}, max_iv={config.woe.max_iv}")
print(f"\nSelected features ({len(selected_features)}):")
for i, feat in enumerate(selected_features[:20], 1):
    print(f"  {i}. {feat} (IV={iv_summary[feat]:.4f})")
if len(selected_features) > 20:
    print(f"  ... and {len(selected_features) - 20} more")

## 8. PSI Check on WoE Values

In [None]:
date_col = config.data.date_column
train_dates = woe_train[date_col]
median_date = train_dates.median()
mask_first = train_dates <= median_date
mask_second = train_dates > median_date

first_half = woe_train.loc[mask_first]
second_half = woe_train.loc[mask_second]

print(f"PSI split: first_half={len(first_half):,}, second_half={len(second_half):,}")

psi_rows = []
psi_threshold = 0.25

for feat in woe_selected:
    try:
        exp = first_half[feat].dropna().values
        act = second_half[feat].dropna().values
        if len(exp) < 10 or len(act) < 10:
            psi_val = None
        else:
            _, bins = pd.qcut(exp, q=10, retbins=True, duplicates="drop")
            bins[0] = -np.inf
            bins[-1] = np.inf
            exp_pct = pd.cut(exp, bins=bins).value_counts(normalize=True).sort_index().clip(lower=1e-4)
            act_pct = pd.cut(act, bins=bins).value_counts(normalize=True).sort_index().clip(lower=1e-4)
            all_bins = exp_pct.index.union(act_pct.index)
            exp_pct = exp_pct.reindex(all_bins, fill_value=1e-4)
            act_pct = act_pct.reindex(all_bins, fill_value=1e-4)
            psi_val = float(((act_pct - exp_pct) * np.log(act_pct / exp_pct)).sum())
            if not np.isfinite(psi_val):
                psi_val = None
    except Exception:
        psi_val = None

    status = "N/A" if psi_val is None else ("Stable" if psi_val < 0.10 else ("Moderate" if psi_val < 0.25 else "Unstable"))
    psi_rows.append({"Feature": feat, "PSI": round(psi_val, 4) if psi_val else None, "Status": status})

psi_df = pd.DataFrame(psi_rows)
unstable = psi_df[psi_df["Status"] == "Unstable"]

if len(unstable) > 0:
    print(f"\nUnstable features ({len(unstable)}):")
    print(unstable.to_string(index=False))
    # Drop unstable
    woe_selected = [f for f in woe_selected if f not in unstable["Feature"].tolist()]
    selected_features = [c.replace("_woe", "") for c in woe_selected]
    print(f"\nAfter PSI filter: {len(woe_selected)} WoE features")
else:
    print(f"\nAll {len(woe_selected)} features are stable (PSI < {psi_threshold})")

print(f"\nPSI summary:")
print(psi_df["Status"].value_counts().to_string())

## 9. Correlation on WoE Features

In [None]:
corr_threshold = 0.80

if len(woe_selected) < 2:
    print("Not enough features for correlation check")
    corr_pairs_df = pd.DataFrame()
else:
    corr_matrix = woe_train[woe_selected].corr(method="pearson").abs()

    to_drop = set()
    pairs = []

    for i in range(len(woe_selected)):
        for j in range(i + 1, len(woe_selected)):
            feat_a = woe_selected[i]
            feat_b = woe_selected[j]
            corr_val = corr_matrix.iloc[i, j]

            if corr_val >= corr_threshold:
                orig_a = feat_a.replace("_woe", "")
                orig_b = feat_b.replace("_woe", "")
                iv_a = iv_summary.get(orig_a, 0)
                iv_b = iv_summary.get(orig_b, 0)

                if iv_a >= iv_b:
                    to_drop.add(feat_b)
                    drop, keep = feat_b, feat_a
                else:
                    to_drop.add(feat_a)
                    drop, keep = feat_a, feat_b

                pairs.append({
                    "Feature_A": feat_a, "Feature_B": feat_b,
                    "Correlation": round(corr_val, 4),
                    "Dropped": drop, "Kept": keep,
                })

    corr_pairs_df = pd.DataFrame(pairs)
    woe_selected = [f for f in woe_selected if f not in to_drop]
    selected_features = [c.replace("_woe", "") for c in woe_selected]

    print(f"Correlation elimination: {len(corr_matrix)} -> {len(woe_selected)} features")
    print(f"  threshold={corr_threshold}, {len(to_drop)} dropped")

    if len(corr_pairs_df) > 0:
        print(f"\nCorrelated pairs ({len(corr_pairs_df)}):")
        print(corr_pairs_df.head(15).to_string(index=False))

print(f"\nFinal WoE features ({len(woe_selected)}):")
for i, feat in enumerate(woe_selected, 1):
    orig = feat.replace("_woe", "")
    print(f"  {i}. {feat} (IV={iv_summary.get(orig, 0):.4f})")

## 10. Logistic Regression Fit

In [None]:
X_train_woe = woe_train[woe_selected].values
y_train_vals = woe_train[target].values

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_woe)

# Resolve config
class_weight = config.logistic.class_weight
if class_weight == "none" or class_weight is None:
    class_weight = None

penalty = config.logistic.penalty
if penalty == "none":
    penalty = None

lr_model = LogisticRegression(
    solver=config.logistic.solver,
    penalty=penalty,
    C=config.logistic.C,
    max_iter=config.logistic.max_iter,
    class_weight=class_weight,
    random_state=seed,
    n_jobs=n_jobs,
)
lr_model.fit(X_train_scaled, y_train_vals)

print("Logistic Regression fitted.")
print(f"  Solver: {config.logistic.solver}")
print(f"  Penalty: {penalty}")
print(f"  C: {config.logistic.C}")
print(f"  Features: {len(woe_selected)}")
print(f"  Intercept: {lr_model.intercept_[0]:.4f}")
print(f"\nCoefficients:")
coef_df = pd.DataFrame({
    "Feature": woe_selected,
    "Coefficient": lr_model.coef_[0],
    "Abs_Coefficient": np.abs(lr_model.coef_[0]),
}).sort_values("Abs_Coefficient", ascending=False)
print(coef_df.to_string(index=False))

## 11. Model Diagnostics

In [None]:
# Create adapter for evaluation
adapter = ClassicModelAdapter(lr_model, scaler, woe_selected)

# Quick AUC check
from sklearn.metrics import roc_auc_score, roc_curve

train_probs = adapter.predict_proba(woe_train[woe_selected])[:, 1]
test_probs = adapter.predict_proba(woe_test[woe_selected])[:, 1]

train_auc = roc_auc_score(woe_train[target], train_probs)
test_auc = roc_auc_score(woe_test[target], test_probs)

print(f"Train AUC: {train_auc:.4f}")
print(f"Test AUC:  {test_auc:.4f}")
print(f"Overfit gap: {train_auc - test_auc:.4f}")

# ROC curve
fig, ax = plt.subplots(1, 1, figsize=(7, 6))
for label, y_true, y_score, color in [
    ("Train", woe_train[target], train_probs, "#3498db"),
    ("Test", woe_test[target], test_probs, "#e74c3c"),
]:
    fpr, tpr, _ = roc_curve(y_true, y_score)
    auc = roc_auc_score(y_true, y_score)
    ax.plot(fpr, tpr, color=color, label=f"{label} (AUC={auc:.4f})")

for label in sorted(woe_oot.keys()):
    qdf = woe_oot[label]
    oot_probs = adapter.predict_proba(qdf[woe_selected])[:, 1]
    fpr, tpr, _ = roc_curve(qdf[target], oot_probs)
    auc = roc_auc_score(qdf[target], oot_probs)
    ax.plot(fpr, tpr, linestyle="--", alpha=0.7, label=f"OOT {label} (AUC={auc:.4f})")

ax.plot([0, 1], [0, 1], "k--", alpha=0.3)
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.set_title("ROC Curves - Classic Model")
ax.legend(loc="lower right")
plt.tight_layout()
plt.show()

## 12. Scorecard Generation

In [None]:
scorecard_gen = ScorecardGenerator(
    target_score=config.scorecard.target_score,
    target_odds=config.scorecard.target_odds,
    pdo=config.scorecard.pdo,
)

scorecard_df = scorecard_gen.generate(
    model=lr_model,
    scaler=scaler,
    woe_transformer=woe_transformer,
    feature_names=selected_features,
)

print("Scorecard:")
print(scorecard_df.to_string(index=False))

# Points range per feature
print("\nPoints range per feature:")
for feat in selected_features:
    feat_rows = scorecard_df[scorecard_df["Feature"] == feat]
    if len(feat_rows) > 0:
        print(f"  {feat}: [{feat_rows['Points'].min()}, {feat_rows['Points'].max()}]")

total_min = scorecard_df.groupby("Feature")["Points"].min().sum()
total_max = scorecard_df.groupby("Feature")["Points"].max().sum()
print(f"\nTotal score range: [{total_min}, {total_max}]")

## 13. Scorecard Application

In [None]:
# Apply scorecard to all datasets
train_scores = scorecard_gen.score(datasets.train, woe_transformer, selected_features)
test_scores = scorecard_gen.score(datasets.test, woe_transformer, selected_features)

print(f"Train scores: min={train_scores.min()}, max={train_scores.max()}, mean={train_scores.mean():.1f}, std={train_scores.std():.1f}")
print(f"Test scores:  min={test_scores.min()}, max={test_scores.max()}, mean={test_scores.mean():.1f}, std={test_scores.std():.1f}")

for label in sorted(datasets.oot_quarters.keys()):
    qdf = datasets.oot_quarters[label]
    oot_scores = scorecard_gen.score(qdf, woe_transformer, selected_features)
    print(f"OOT {label}: min={oot_scores.min()}, max={oot_scores.max()}, mean={oot_scores.mean():.1f}")

# Score distribution by target
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Train
ax = axes[0]
good_mask = datasets.train[target] == 0
ax.hist(train_scores[good_mask], bins=30, alpha=0.6, label="Good", color="#2ecc71", density=True)
ax.hist(train_scores[~good_mask], bins=30, alpha=0.6, label="Bad", color="#e74c3c", density=True)
ax.set_xlabel("Score")
ax.set_ylabel("Density")
ax.set_title("Train Score Distribution")
ax.legend()

# Test
ax = axes[1]
good_mask_test = datasets.test[target] == 0
ax.hist(test_scores[good_mask_test], bins=30, alpha=0.6, label="Good", color="#2ecc71", density=True)
ax.hist(test_scores[~good_mask_test], bins=30, alpha=0.6, label="Bad", color="#e74c3c", density=True)
ax.set_xlabel("Score")
ax.set_ylabel("Density")
ax.set_title("Test Score Distribution")
ax.legend()

plt.tight_layout()
plt.show()

## 14. Evaluation Metrics

In [None]:
performance_df, lift_tables, _ = evaluate_model_quarterly(
    model=adapter,
    selected_features=woe_selected,
    train_df=woe_train,
    test_df=woe_test,
    oot_quarters=woe_oot,
    target_column=target,
)

print("Performance by period:")
print(performance_df.to_string(index=False))

# Coefficient-based feature importance
abs_coefs = np.abs(lr_model.coef_[0])
total_coef = abs_coefs.sum()
importances = abs_coefs / total_coef if total_coef > 0 else abs_coefs

importance_df = pd.DataFrame({
    "Feature": woe_selected,
    "Coefficient": lr_model.coef_[0],
    "Importance": importances,
}).sort_values("Importance", ascending=False)
importance_df["Rank"] = range(1, len(importance_df) + 1)
importance_df["Cumulative_Importance"] = importance_df["Importance"].cumsum()
importance_df = importance_df.reset_index(drop=True)

print("\nFeature Importance (coefficient-based):")
print(importance_df.to_string(index=False))

## 15. Lift Tables

In [None]:
for period_name, lt in lift_tables.items():
    print(f"\n{'=' * 60}")
    print(f"Lift Table: {period_name}")
    print(f"{'=' * 60}")
    print(lt.to_string(index=False))

## 16. Bootstrap CI and Score PSI

In [None]:
# Bootstrap CI
bootstrap_df = None
if config.evaluation.bootstrap.enabled:
    periods_for_bootstrap = [("Train", woe_train), ("Test", woe_test)]
    for label in sorted(woe_oot.keys()):
        periods_for_bootstrap.append((f"OOT_{label}", woe_oot[label]))

    bootstrap_df = bootstrap_auc_ci(
        model=adapter,
        selected_features=woe_selected,
        datasets=periods_for_bootstrap,
        target_column=target,
        n_iterations=config.evaluation.bootstrap.n_iterations,
        confidence_level=config.evaluation.bootstrap.confidence_level,
        n_jobs=n_jobs,
    )

    if bootstrap_df is not None and not bootstrap_df.empty:
        ci_cols = bootstrap_df[["Period", "CI_Lower", "CI_Upper"]].copy()
        performance_df = performance_df.merge(ci_cols, on="Period", how="left")

    print("Bootstrap AUC Confidence Intervals:")
    print(bootstrap_df.to_string(index=False))
else:
    print("Bootstrap CI disabled")

# Score PSI
print("\n")
if config.evaluation.calculate_score_psi:
    train_probs_for_psi = adapter.predict_proba(woe_train[woe_selected])[:, 1]
    oot_scores_dict = {}
    for label in sorted(woe_oot.keys()):
        qdf = woe_oot[label]
        oot_scores_dict[f"OOT_{label}"] = adapter.predict_proba(qdf[woe_selected])[:, 1]

    score_psi_df = compute_score_psi(train_probs_for_psi, oot_scores_dict)
    print("Score PSI (train vs OOT periods):")
    print(score_psi_df.to_string(index=False))
else:
    score_psi_df = None
    print("Score PSI disabled")

## 17. Save Outputs

In [None]:
from datetime import datetime
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils.dataframe import dataframe_to_rows
import numpy as np

run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = Path(config.output.base_dir) / run_id
output_dir.mkdir(parents=True, exist_ok=True)

excel_path = str(output_dir / f"classic_model_{run_id}.xlsx")

# Simple Excel writer
wb = Workbook()
header_fill = PatternFill("solid", fgColor="4472C4")
header_font = Font(bold=True, color="FFFFFF", size=11)
thin_border = Border(
    left=Side(style="thin"), right=Side(style="thin"),
    top=Side(style="thin"), bottom=Side(style="thin"),
)

def write_df(ws, df, start_row=1):
    for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True), start_row):
        for c_idx, value in enumerate(row, 1):
            cell = ws.cell(row=r_idx, column=c_idx, value=value)
            cell.border = thin_border
            if r_idx == start_row:
                cell.font = header_font
                cell.fill = header_fill
                cell.alignment = Alignment(horizontal="center")

# Sheet 1: Performance
ws = wb.active
ws.title = "Performance"
write_df(ws, performance_df)

# Sheet 2: Scorecard
ws2 = wb.create_sheet("Scorecard")
write_df(ws2, scorecard_df)

# Sheet 3: Coefficients
ws3 = wb.create_sheet("Coefficients")
write_df(ws3, importance_df)

# Sheet 4: IV Summary
ws4 = wb.create_sheet("IV_Summary")
iv_out = iv_df.copy()
iv_out["Selected"] = iv_out["Feature"].isin(selected_features)
write_df(ws4, iv_out)

# Sheet 5: Lift Tables
ws5 = wb.create_sheet("Lift_Tables")
row = 1
for period_name, lt in lift_tables.items():
    ws5.cell(row=row, column=1, value=period_name).font = Font(bold=True)
    row += 1
    write_df(ws5, lt, start_row=row)
    row += len(lt) + 2

# Sheet 6: Bootstrap CI
if bootstrap_df is not None and not bootstrap_df.empty:
    ws6 = wb.create_sheet("Bootstrap_CI")
    write_df(ws6, bootstrap_df)

# Sheet 7: Score PSI
if score_psi_df is not None and not score_psi_df.empty:
    ws7 = wb.create_sheet("Score_PSI")
    write_df(ws7, score_psi_df)

wb.save(excel_path)
print(f"Excel report saved: {excel_path}")

# Save WoE binning
woe_path = str(output_dir / "woe_binning.json")
woe_transformer.export_binning(woe_path)

# Save model
if config.output.save_model:
    import joblib
    model_path = str(output_dir / "classic_model.joblib")
    joblib.dump({
        "model": lr_model,
        "scaler": scaler,
        "feature_names": woe_selected,
        "selected_features": selected_features,
    }, model_path)
    print(f"Model saved: {model_path}")

print(f"\nAll outputs: {output_dir}")

## 18. Alternative: Full Pipeline Mode

Run everything in one call using `ClassicModelPipeline`.

In [None]:
# Uncomment to run the full pipeline in one call:

# from src.classic_model.pipeline import ClassicModelPipeline
#
# pipeline = ClassicModelPipeline(
#     input_path=config.data.input_path,
#     train_end_date=config.splitting.train_end_date,
#     output_dir=config.output.base_dir,
#     config=config,
# )
#
# results = pipeline.run()
#
# print(f"Status: {results['status']}")
# print(f"Selected features: {results.get('n_selected', 0)}")
# print(f"Excel: {results.get('excel_path')}")
# print(f"Run dir: {results.get('run_dir')}")