# Credit Scoring Model Development Pipeline

Interactive notebook for step-by-step model development. Uses the same code paths as `scripts/run_model_development.py`.

**Steps:**
1. Load config and data, split into Train / Test / OOT
2. Data quality checks
3. Constant feature elimination
4. Missing value elimination
5. IV (Information Value) filtering
6. PSI stability filtering
7. Correlation elimination
8. Sequential feature selection (forward/backward with CV)
9. VIF multicollinearity check
10. Hyperparameter tuning (Optuna)
11. Model evaluation (quarterly metrics, lift tables, importance)
12. Score PSI, Bootstrap CI, Calibration, SHAP, Validation
13. Generate Excel report and save outputs

Each cell is self-contained and re-runnable.

## 1. Setup

In [1]:
import sys
from pathlib import Path
import os

project_root = str(Path.cwd().parent) if Path.cwd().name == "notebooks" else str(Path.cwd())
if project_root not in sys.path:
    sys.path.insert(0, project_root)
os.chdir(project_root)

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import random
import logging

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)-5s | %(message)s",
    datefmt="%H:%M:%S",
)

from src.config.loader import load_config, save_config
from src.config.schema import PipelineConfig
from src.io.output_manager import OutputManager
from src.model_development.data_loader import load_and_split
from src.model_development.eliminators import (
    ConstantEliminator,
    MissingEliminator,
    IVEliminator,
    PSIEliminator,
    CorrelationEliminator,
    VIFEliminator,
    QuarterlyPSICheck,
    YearlyPSICheck,
    ConsecutiveQuarterPSICheck,
    HalfSplitPSICheck,
    DateSplitPSICheck,
)
from src.model_development.feature_selector import sequential_feature_selection
from src.model_development.hyperparameter_tuner import tune_hyperparameters
from src.model_development.evaluator import (
    evaluate_model_quarterly,
    bootstrap_auc_ci,
    compute_score_psi,
)
from src.model_development import excel_reporter
from src.validation.data_checks import DataValidator
from src.validation.model_checks import ModelValidator
import xgboost as xgb

print(f"Project root: {project_root}")
print(f"Working dir:  {os.getcwd()}")

Project root: /Users/ahmetalinuhoglu/Documents/Personal/Projects/an-model-development
Working dir:  /Users/ahmetalinuhoglu/Documents/Personal/Projects/an-model-development


## 2. Load Config

Single YAML config drives everything. Override values for this session without editing the file.

In [2]:
config = load_config("config/model_development.yaml")

# Override for this session (uncomment and edit as needed):
# config = load_config("config/model_development.yaml", cli_overrides={
#     "splitting.train_end_date": "2024-06-30",
#     "steps.iv.min_iv": 0.03,
#     "model.params.max_depth": 4,
# })

n_jobs = config.reproducibility.n_jobs

print(f"Input:          {config.data.input_path}")
print(f"Train end date: {config.splitting.train_end_date}")
print(f"Target:         {config.data.target_column}")
print(f"Seed:           {config.reproducibility.global_seed}")
print(f"n_jobs:         {n_jobs}")
print(f"IV range:       [{config.steps.iv.min_iv}, {config.steps.iv.max_iv}]")
print(f"PSI threshold:  {config.steps.psi.threshold}")
print(f"Corr threshold: {config.steps.correlation.threshold}")
print(f"Selection:      {config.steps.selection.method} (max {config.steps.selection.max_features})")
print(f"VIF:            {'enabled' if config.steps.vif.enabled else 'disabled'} (threshold {config.steps.vif.threshold})")
print(f"Tuning:         {'enabled' if config.model.tuning.enabled else 'disabled'} ({config.model.tuning.n_trials} trials)")
print(f"Calibration:    {'enabled' if config.evaluation.calibration.enabled else 'disabled'} ({config.evaluation.calibration.method})")
print(f"SHAP:           {'enabled' if config.evaluation.shap.enabled else 'disabled'}")
print(f"Bootstrap CI:   {'enabled' if config.evaluation.bootstrap.enabled else 'disabled'}")

# Set seeds
seed = config.reproducibility.global_seed
np.random.seed(seed)
random.seed(seed)

04:35:56 | INFO  | Loaded config from config/model_development.yaml


Input:          data/sample/sample_features.parquet
Train end date: 2024-06-30
Target:         target
Seed:           42
n_jobs:         -1
IV range:       [0.02, 0.5]
PSI threshold:  0.25
Corr threshold: 0.8
Selection:      forward (max 20)
VIF:            enabled (threshold 5.0)
Tuning:         enabled (100 trials)
Calibration:    enabled (platt)
SHAP:           enabled
Bootstrap CI:   enabled


## 3. Load Data & Split

Uses `load_and_split()` — the same function called by the script pipeline.
Stratified random or temporal train/test split within training period. OOT auto-split by quarter after `train_end_date`.

In [3]:
datasets = load_and_split(
    input_path=config.data.input_path,
    train_end_date=config.splitting.train_end_date,
    target_column=config.data.target_column,
    date_column=config.data.date_column,
    test_size=config.splitting.test_size,
    stratify=config.splitting.stratify,
)

target = config.data.target_column
features = list(datasets.feature_columns)
X_train = datasets.train[features]
y_train = datasets.train[target]
X_test = datasets.test[features]
y_test = datasets.test[target]

print(f"Features: {len(features)}")
print(f"Train: {len(datasets.train):,} rows, bad rate: {y_train.mean():.2%}")
print(f"Test:  {len(datasets.test):,} rows, bad rate: {y_test.mean():.2%}")
for label in datasets.oot_labels:
    qdf = datasets.oot_quarters[label]
    print(f"OOT {label}: {len(qdf):,} rows, bad rate: {qdf[target].mean():.2%}")

04:35:56 | INFO  | Loading data from data/sample/sample_features.parquet
04:35:57 | INFO  | Loaded 11,529 rows, 948 columns
04:35:57 | INFO  | Train end date: 2024-06-30
04:35:57 | INFO  | Training period: 9,342 rows (2022-01-01 to 2024-06-30)
04:35:57 | INFO  | Feature columns: 943
04:35:57 | INFO  | Train: 7,473 rows (bad rate: 17.48%)
04:35:57 | INFO  | Test: 1,869 rows (bad rate: 17.50%)
04:35:57 | INFO  | OOT 2024Q3: 916 rows (bad rate: 17.69%)
04:35:57 | INFO  | OOT 2024Q4: 926 rows (bad rate: 16.31%)
04:35:57 | INFO  | OOT 2025Q1: 345 rows (bad rate: 20.29%)


Features: 943
Train: 7,473 rows, bad rate: 17.48%
Test:  1,869 rows, bad rate: 17.50%
OOT 2024Q3: 916 rows, bad rate: 17.69%
OOT 2024Q4: 926 rows, bad rate: 16.31%
OOT 2025Q1: 345 rows, bad rate: 20.29%


## 4. Data Quality Checks

Automated pre-pipeline checks: target validation, date validation, duplicates, leakage detection.

In [4]:
# Load raw data for data validator (it expects the full DataFrame)
if config.data.input_path.endswith('.csv'):
    df_raw = pd.read_csv(config.data.input_path)
else:
    df_raw = pd.read_parquet(config.data.input_path)

data_validator = DataValidator(config)
data_report = data_validator.validate(df_raw)
print(data_report.summary())

if data_report.has_critical_failures:
    print("\nCRITICAL FAILURES — review before proceeding:")
    for check in data_report.checks:
        if check.status.value == "FAIL":
            print(f"  FAIL: {check.check_name} — {check.message}")
            print(f"        Fix: {check.recommendation}")

del df_raw  # free memory

04:35:58 | INFO  | DATA_CHECK | PASS | Non-empty dataset | Dataset has 11,529 rows.
04:35:58 | INFO  | DATA_CHECK | PASS | Target column exists | Target column 'target' present.
04:35:58 | INFO  | DATA_CHECK | PASS | Target is binary | Target is binary (0/1).
04:35:58 | INFO  | DATA_CHECK | PASS | Target has no nulls | No null values in target.
04:35:58 | INFO  | DATA_CHECK | PASS | Bad rate within range | Bad rate 17.4863% is within acceptable range.
04:35:58 | INFO  | DATA_CHECK | PASS | Date column exists | Date column 'application_date' present and parseable.
04:35:58 | INFO  | DATA_CHECK | PASS | Date range coverage | Date range covers 13 quarters (2022-01-01 to 2025-01-31).
04:35:58 | INFO  | DATA_CHECK | PASS | Sufficient sample size | 11,529 rows available.
04:35:58 | INFO  | DATA_CHECK | PASS | Leakage detection | No features with suspiciously high AUC detected.


  [+] Non-empty dataset: Dataset has 11,529 rows.
  [+] Target column exists: Target column 'target' present.
  [+] Target is binary: Target is binary (0/1).
  [+] Target has no nulls: No null values in target.
  [+] Bad rate within range: Bad rate 17.4863% is within acceptable range.
  [+] Date column exists: Date column 'application_date' present and parseable.
  [+] Date range coverage: Date range covers 13 quarters (2022-01-01 to 2025-01-31).
  [!] Features are numeric: 1 non-numeric feature(s) found.
  [!] No duplicate IDs: 1,529 duplicate application_id values found.
  [+] Sufficient sample size: 11,529 rows available.
  [+] Leakage detection: No features with suspiciously high AUC detected.


---
## Pipeline Steps

Each step narrows the feature set. Run cells in order.

Elimination results are collected for the Excel report.

### Step 1: Constant Elimination
Remove features with fewer than 2 distinct values (zero variance).

In [5]:
elimination_results = []

const_elim = ConstantEliminator()
const_result = const_elim.eliminate(X_train, y_train, features)
elimination_results.append(const_result)
features = const_result.kept_features

print(f"{const_result.step_name}: {const_result.n_kept + const_result.n_eliminated} -> "
      f"{const_result.n_kept} features ({const_result.n_eliminated} eliminated)")

if const_result.eliminated_features:
    elim_df = const_result.details_df[const_result.details_df["Status"] == "Eliminated"]
    print(f"\nEliminated features ({len(elim_df)}):")
    print(elim_df[["Feature", "Unique_Count"]].head(10).to_string(index=False))

04:36:00 | INFO  | CONSTANT | Eliminated 131 features (812 remaining)


01_Constant: 943 -> 812 features (131 eliminated)

Eliminated features (131):
                                       Feature  Unique_Count
                     co_applicant_default_rate             1
                            moved_to_unsecured             1
 installment_sale_defaulted_amount_std_last_6m             1
             mortgage_recovered_count_last_24m             1
      mortgage_recovered_total_amount_last_24m             1
    mortgage_recovered_average_amount_last_24m             1
installment_loan_recovered_amount_std_last_12m             1
        mortgage_recovered_max_amount_last_24m             1
         mortgage_recovered_amount_std_last_3m             1
         mortgage_recovered_min_amount_last_3m             1


### Step 2: Missing Elimination
Remove features with missing rate above threshold on training data.

In [6]:
missing_elim = MissingEliminator(max_missing_rate=config.steps.missing.threshold)
missing_result = missing_elim.eliminate(X_train, y_train, features)
elimination_results.append(missing_result)
features = missing_result.kept_features

print(f"{missing_result.step_name}: {missing_result.n_kept + missing_result.n_eliminated} -> "
      f"{missing_result.n_kept} features ({missing_result.n_eliminated} eliminated)")

if missing_result.eliminated_features:
    elim_df = missing_result.details_df[missing_result.details_df["Status"] == "Eliminated"]
    print(f"\nEliminated features ({len(elim_df)}):")
    print(elim_df[["Feature", "Missing_Rate"]].head(10).to_string(index=False))

04:36:00 | INFO  | MISSING | Eliminated 0 features (812 remaining)


02_Missing: 812 -> 812 features (0 eliminated)


### Step 3: IV Elimination
Remove features with IV below `min_iv` (useless) or above `max_iv` (suspicious leakage).

In [7]:
iv_elim = IVEliminator(min_iv=config.steps.iv.min_iv, max_iv=config.steps.iv.max_iv, n_jobs=n_jobs)
iv_result = iv_elim.eliminate(X_train, y_train, features)
elimination_results.append(iv_result)
features = iv_result.kept_features

# Extract IV scores for downstream use (correlation, selection, VIF)
iv_scores = {}
for _, row in iv_result.details_df.iterrows():
    if row.get("IV_Score") is not None:
        iv_scores[row["Feature"]] = row["IV_Score"]

print(f"{iv_result.step_name}: {iv_result.n_kept + iv_result.n_eliminated} -> "
      f"{iv_result.n_kept} features ({iv_result.n_eliminated} eliminated)")

# IV distribution
print(f"\nIV Category Distribution:")
print(iv_result.details_df["IV_Category"].value_counts().to_string())

# Top features by IV
kept_df = iv_result.details_df[iv_result.details_df["Status"] == "Kept"].sort_values("IV_Score", ascending=False)
print(f"\nTop 10 features by IV:")
print(kept_df[["Feature", "IV_Score", "IV_Category", "Univariate_AUC"]].head(10).to_string(index=False))

04:36:03 | INFO  | IV | Eliminated 657 features (155 remaining)


03_IV_Analysis: 812 -> 155 features (657 eliminated)

IV Category Distribution:
IV_Category
useless       623
weak           90
medium         63
suspicious     34
strong          2

Top 10 features by IV:
                        Feature  IV_Score IV_Category  Univariate_AUC
       is_monthly_payment_total    0.3401      strong          0.6298
             default_count_ever    0.3026      strong          0.8466
  installment_sale_total_amount    0.2941      medium          0.6280
installment_sale_average_amount    0.2933      medium          0.6272
                  is_avg_amount    0.2933      medium          0.6272
    installment_sale_max_amount    0.2929      medium          0.6279
    installment_sale_min_amount    0.2703      medium          0.6264
      payment_to_exposure_ratio    0.2222      medium          0.6350
             total_credit_count    0.2175      medium          0.6247
    multi_product_default_count    0.2159      medium          0.8458


### Step 4: PSI Stability Elimination
Remove features with unstable distributions within training data. PSI checks are built from config (quarterly, yearly, consecutive, etc.).

In [8]:
# Build PSI checks from config (same as run_model_development.py)
check_map = {
    "quarterly": QuarterlyPSICheck,
    "yearly": YearlyPSICheck,
    "consecutive": ConsecutiveQuarterPSICheck,
    "halfsplit": HalfSplitPSICheck,
}
psi_checks = []
for c in config.steps.psi.checks:
    if c.type in check_map:
        psi_checks.append(check_map[c.type]())
    elif c.type == "date_split" and c.date:
        psi_checks.append(DateSplitPSICheck(c.date, label=c.label))

psi_elim = PSIEliminator(
    critical_threshold=config.steps.psi.threshold,
    checks=psi_checks,
    n_jobs=n_jobs,
)
psi_result = psi_elim.eliminate(
    X_train, y_train, features,
    train_dates=datasets.train[config.data.date_column],
)
elimination_results.append(psi_result)
features = psi_result.kept_features

print(f"{psi_result.step_name}: {psi_result.n_kept + psi_result.n_eliminated} -> "
      f"{psi_result.n_kept} features ({psi_result.n_eliminated} eliminated)")

if psi_result.eliminated_features:
    elim_df = psi_result.details_df[psi_result.details_df["Status"] == "Eliminated"]
    print(f"\nUnstable features ({len(elim_df)}):")
    print(elim_df[["Feature", "Max_PSI", "Mean_PSI"]].head(10).to_string(index=False))

04:36:03 | INFO  | PSI | Check QuarterlyPSICheck: 10 comparison(s)
04:36:03 | INFO  | PSI | Check YearlyPSICheck: 3 comparison(s)
04:36:03 | INFO  | PSI | Check ConsecutiveQuarterPSICheck: 9 comparison(s)
04:36:03 | INFO  | PSI | Checking 155 features across 22 comparisons
04:36:03 | INFO  | PSI | Eliminated 0 features (155 remaining)


04_PSI_Stability: 155 -> 155 features (0 eliminated)


### Step 5: Correlation Elimination
Greedy removal: among correlated pairs, keep the higher-IV feature.

In [9]:
corr_elim = CorrelationEliminator(max_correlation=config.steps.correlation.threshold)
corr_result = corr_elim.eliminate(X_train, y_train, features, iv_scores=iv_scores)
elimination_results.append(corr_result)
features = corr_result.kept_features

# Save correlation pairs for the Excel report
corr_pairs_df = getattr(corr_elim, "corr_pairs_df", None)

print(f"{corr_result.step_name}: {corr_result.n_kept + corr_result.n_eliminated} -> "
      f"{corr_result.n_kept} features ({corr_result.n_eliminated} eliminated)")

if corr_pairs_df is not None and len(corr_pairs_df) > 0:
    print(f"\nCorrelated pairs ({len(corr_pairs_df)}):")
    print(corr_pairs_df[["Feature_A", "Feature_B", "Correlation", "Decision"]].head(10).to_string(index=False))

04:36:04 | INFO  | CORRELATION | Computing pearson correlation matrix for 155 features
04:36:04 | INFO  | CORRELATION | Eliminated 74 features (81 remaining)


05_Correlation: 155 -> 81 features (74 eliminated)

Correlated pairs (179):
                      Feature_A                       Feature_B  Correlation                                                                    Decision
             default_count_ever     multi_product_default_count       0.9762                multi_product_default_count eliminated by default_count_ever
  installment_sale_total_amount installment_sale_average_amount       0.9570 installment_sale_average_amount eliminated by installment_sale_total_amount
  installment_sale_total_amount                   is_avg_amount       0.9570                   is_avg_amount eliminated by installment_sale_total_amount
  installment_sale_total_amount     installment_sale_max_amount       0.9798     installment_sale_max_amount eliminated by installment_sale_total_amount
  installment_sale_total_amount     installment_sale_min_amount       0.9109     installment_sale_min_amount eliminated by installment_sale_total_amount
  inst

### Step 6: Sequential Feature Selection
CV-based forward (or backward) selection with elbow detection (1-SE rule).
Saves a performance chart to the output directory.

In [10]:
# Create output manager for saving outputs
output_manager = OutputManager(config)

# Save config snapshot
if config.reproducibility.save_config:
    save_config(config, str(output_manager.run_dir / "config" / "pipeline_config.yaml"))

# Selection chart goes into the run's reports directory
selection_output_dir = str(output_manager.run_dir / "reports")

selected_features, selection_df, chart_path = sequential_feature_selection(
    X_train=X_train[features],
    y_train=y_train,
    X_test=X_test[features],
    y_test=y_test,
    features=features,
    direction=config.steps.selection.method,
    cv=config.steps.selection.cv,
    min_features=config.steps.selection.min_features,
    max_features=config.steps.selection.max_features,
    tolerance=config.steps.selection.tolerance,
    patience=config.steps.selection.patience,
    iv_scores=iv_scores,
    xgb_params=config.model.params,
    n_jobs=n_jobs,
    output_dir=selection_output_dir,
)

print(f"\nSelected features ({len(selected_features)}):")
for i, feat in enumerate(selected_features, 1):
    iv = iv_scores.get(feat, 0)
    print(f"  {i}. {feat} (IV={iv:.4f})")

print(f"\nSelection chart: {chart_path}")

# Show selection steps
added = selection_df[selection_df.get("Is_Optimal", False) | True]  # show all steps
print(f"\nSelection steps:")
cols = [c for c in ["Step", "N_Features", "Mean_CV_AUC", "Std_CV_AUC", "Is_Optimal"] if c in selection_df.columns]
print(selection_df[cols].to_string(index=False))

04:36:12 | INFO  | Output directory: outputs/model_development/20260212_043612_1dc021
04:36:12 | INFO  | Config saved to outputs/model_development/20260212_043612_1dc021/config/pipeline_config.yaml
04:36:12 | INFO  | SELECTION | Starting forward selection with 81 candidates, cv=5, max_features=20, tolerance=0.001, patience=3, n_jobs=-1
04:36:12 | INFO  | SELECTION | Forward step 1/20: evaluating 81 candidates...
04:36:13 | INFO  | SELECTION | Step 1: ADDED default_count_ever (IV=0.3026), CV AUC=0.8466 ± 0.0131
04:36:13 | INFO  | SELECTION | Forward step 2/20: evaluating 80 candidates...
04:36:13 | INFO  | SELECTION | Step 2: ADDED recovered_total_amount (IV=0.1737), CV AUC=0.8626 ± 0.0120
04:36:13 | INFO  | SELECTION | Forward step 3/20: evaluating 79 candidates...
04:36:14 | INFO  | SELECTION | Step 3: ADDED newest_credit_age_months (IV=0.0238), CV AUC=0.8730 ± 0.0056
04:36:14 | INFO  | SELECTION | Forward step 4/20: evaluating 78 candidates...
04:36:15 | INFO  | SELECTION | Step 4: A


Selected features (3):
  1. default_count_ever (IV=0.3026)
  2. recovered_total_amount (IV=0.1737)
  3. newest_credit_age_months (IV=0.0238)

Selection chart: outputs/model_development/20260212_043612_1dc021/reports/selection_chart_20260212_043626.png

Selection steps:
 Step  N_Features  Mean_CV_AUC  Std_CV_AUC  Is_Optimal
    1           1     0.846575    0.013117       False
    2           2     0.862620    0.011953       False
    3           3     0.872956    0.005596        True
    4           4     0.874280    0.009492       False
    5           5     0.876345    0.004179       False
    6           6     0.875424    0.010323       False
    7           7     0.875699    0.009609       False
    8           8     0.878594    0.008959       False
    9           9     0.878309    0.008018       False
   10          10     0.875838    0.010283       False
   11          11     0.877377    0.009961       False


### Step 7: VIF Multicollinearity Check
Post-selection VIF check. Iteratively removes features with VIF above threshold, preserving higher-IV features when `iv_aware=True`.

In [11]:
vif_result = None
if config.steps.vif.enabled and len(selected_features) > 2:
    vif_elim = VIFEliminator(
        threshold=config.steps.vif.threshold,
        iv_aware=config.steps.vif.iv_aware,
    )
    vif_result = vif_elim.eliminate(
        X_train, y_train, selected_features, iv_scores=iv_scores,
    )
    elimination_results.append(vif_result)
    selected_features = vif_result.kept_features

    print(f"{vif_result.step_name}: {vif_result.n_kept + vif_result.n_eliminated} -> "
          f"{vif_result.n_kept} features ({vif_result.n_eliminated} eliminated)")

    if vif_result.n_eliminated > 0:
        elim_df = vif_result.details_df[vif_result.details_df["Status"] == "Eliminated"]
        print(f"\nEliminated by VIF:")
        print(elim_df[["Feature", "VIF_Initial", "IV_Score", "Elimination_Round"]].to_string(index=False))
    else:
        print("All features passed VIF check.")
        print(vif_result.details_df[["Feature", "VIF_Initial", "VIF_Final", "IV_Score"]].to_string(index=False))
else:
    print("VIF check skipped (disabled or too few features)")

print(f"\nFinal selected features ({len(selected_features)}): {selected_features}")

04:36:30 | INFO  | VIF | Starting VIF check with 3 features, threshold=5.0
04:36:30 | INFO  | VIF | Iteration 1: max VIF=1.1548, all features below threshold
04:36:30 | INFO  | VIF | Eliminated 0 features (3 remaining)


07_VIF: 3 -> 3 features (0 eliminated)
All features passed VIF check.
                 Feature  VIF_Initial  VIF_Final  IV_Score
      default_count_ever       1.1548     1.1548    0.3026
  recovered_total_amount       1.1508     1.1508    0.1737
newest_credit_age_months       1.0046     1.0046    0.0238

Final selected features (3): ['default_count_ever', 'recovered_total_amount', 'newest_credit_age_months']


### Step 8: Hyperparameter Tuning (Optuna)
Optuna-based Bayesian optimization with TPE sampler and stratified CV.

In [12]:
tuning_df = None
best_params = None

if config.model.tuning.enabled:
    print(f"Running Optuna tuning: {config.model.tuning.n_trials} trials, "
          f"stability_weight={config.model.tuning.stability_weight}, n_jobs={n_jobs}")
    best_params, tuning_df, final_model = tune_hyperparameters(
        X_train=X_train,
        y_train=y_train,
        X_test=X_test,
        y_test=y_test,
        features=selected_features,
        n_trials=config.model.tuning.n_trials,
        timeout=config.model.tuning.timeout,
        cv=config.model.tuning.cv,
        n_jobs=n_jobs,
        oot_quarters=datasets.oot_quarters,
        target_column=config.data.target_column,
        stability_weight=config.model.tuning.stability_weight,
    )
    print(f"\nBest params: {best_params}")
    if tuning_df is not None:
        print(f"\nTrial history ({len(tuning_df)} trials):")
        print(tuning_df.head(10).to_string(index=False))
else:
    print("Tuning disabled — training with default/configured params")
    # Train with configured params (same as pipeline._train_default_model)
    params = (config.model.params or {}).copy()
    if not params:
        params = {
            "objective": "binary:logistic",
            "eval_metric": "auc",
            "max_depth": 6,
            "learning_rate": 0.1,
            "n_estimators": 300,
            "subsample": 0.8,
            "colsample_bytree": 0.8,
            "random_state": 42,
            "n_jobs": -1,
            "verbosity": 0,
        }
    # Auto-balance
    neg_count = (y_train == 0).sum()
    pos_count = (y_train == 1).sum()
    if params.pop("scale_pos_weight", None) == "auto":
        params["scale_pos_weight"] = neg_count / pos_count
    # early_stopping_rounds to constructor (xgboost >= 2.0)
    early_stopping_rounds = params.pop("early_stopping_rounds", 30)
    params["early_stopping_rounds"] = early_stopping_rounds

    final_model = xgb.XGBClassifier(**params)
    final_model.fit(
        X_train[selected_features], y_train,
        eval_set=[(X_test[selected_features], y_test)],
        verbose=False,
    )
    print(f"Model trained with {final_model.n_estimators} estimators")

04:36:33 | INFO  | TUNING | Starting Optuna tuning — stability-aware, 100 trials, n_jobs=-1, stability_weight=1.0
04:36:33 | INFO  | TUNING | scale_pos_weight=4.7221 (neg=6167, pos=1306)
04:36:33 | INFO  | TUNING | Trial 7: Score=0.8374 (Mean=0.8497, Std=0.0123) [Train=0.8484, Test=0.8401, OOT_2024Q3=0.8559, OOT_2024Q4=0.8345, OOT_2025Q1=0.8696] (new best)
04:36:33 | INFO  | TUNING | Trial 5: Score=0.8389 (Mean=0.8503, Std=0.0115) [Train=0.8507, Test=0.8404, OOT_2024Q3=0.8568, OOT_2024Q4=0.8359, OOT_2025Q1=0.8679] (new best)
04:36:33 | INFO  | TUNING | Trial 1: Score=0.8574 (Mean=0.8676, Std=0.0102) [Train=0.8835, Test=0.8640, OOT_2024Q3=0.8734, OOT_2024Q4=0.8531, OOT_2025Q1=0.8643] (new best)
04:36:33 | INFO  | TUNING | Trial 11: Score=0.8580 (Mean=0.8701, Std=0.0121) [Train=0.8781, Test=0.8651, OOT_2024Q3=0.8741, OOT_2024Q4=0.8492, OOT_2025Q1=0.8838] (new best)
04:36:33 | INFO  | TUNING | Trial 0: Score=0.8565 (Mean=0.8691, Std=0.0126) [Train=0.8841, Test=0.8650, OOT_2024Q3=0.8767, O

Running Optuna tuning: 100 trials, stability_weight=1.0, n_jobs=-1


04:36:33 | INFO  | TUNING | Trial 10: Score=0.8586 (Mean=0.8699, Std=0.0113) [Train=0.8769, Test=0.8667, OOT_2024Q3=0.8769, OOT_2024Q4=0.8491, OOT_2025Q1=0.8799] (new best)
04:36:33 | INFO  | TUNING | Trial 14: Score=0.8587 (Mean=0.8700, Std=0.0112) [Train=0.8794, Test=0.8635, OOT_2024Q3=0.8750, OOT_2024Q4=0.8511, OOT_2025Q1=0.8809] (new best)
04:36:33 | INFO  | TUNING | Trial 20: Score=0.8586 (Mean=0.8684, Std=0.0098) [Train=0.8814, Test=0.8665, OOT_2024Q3=0.8753, OOT_2024Q4=0.8524, OOT_2025Q1=0.8665]
04:36:33 | INFO  | TUNING | Trial 32: Score=0.8604 (Mean=0.8702, Std=0.0097) [Train=0.8740, Test=0.8678, OOT_2024Q3=0.8813, OOT_2024Q4=0.8526, OOT_2025Q1=0.8750] (new best)
04:36:33 | INFO  | TUNING | Trial 30: Score=0.8505 (Mean=0.8704, Std=0.0200) [Train=0.9027, Test=0.8599, OOT_2024Q3=0.8816, OOT_2024Q4=0.8448, OOT_2025Q1=0.8631]
04:36:33 | INFO  | TUNING | Trial 40: Score=0.8559 (Mean=0.8713, Std=0.0155) [Train=0.8960, Test=0.8604, OOT_2024Q3=0.8802, OOT_2024Q4=0.8520, OOT_2025Q1=0.8


Best params: {'max_depth': 6, 'learning_rate': 0.08669738540392355, 'subsample': 0.9521916554373138, 'colsample_bytree': 0.9171587552444311, 'min_child_weight': 44, 'gamma': 1.709553907594803, 'reg_alpha': 0.2450903148386696, 'reg_lambda': 0.001048827151275742, 'objective': 'binary:logistic', 'eval_metric': 'auc', 'verbosity': 0, 'n_jobs': -1, 'random_state': 42, 'early_stopping_rounds': 30, 'scale_pos_weight': 4.722052067381317, 'n_estimators': 1000}

Trial history (100 trials):
 Trial  max_depth  learning_rate  n_estimators  subsample  colsample_bytree  min_child_weight    gamma  reg_alpha  reg_lambda  AUC_Train  AUC_Test  AUC_OOT_2024Q3  AUC_OOT_2024Q4  AUC_OOT_2025Q1  AUC_Mean  AUC_Std  Score  Duration_Sec
     0          5       0.034323          1000   0.886774          0.815078                21 0.732054   0.279215    0.047655     0.8841    0.8650          0.8767          0.8472          0.8723    0.8691   0.0126 0.8565           0.2
     1          6       0.074287          10

### Step 9: Model Evaluation
Evaluate the final model on Train, Test, and each OOT quarter. Produces performance table, lift tables, and feature importance.

In [13]:
performance_df, lift_tables, importance_df = evaluate_model_quarterly(
    model=final_model,
    selected_features=selected_features,
    train_df=datasets.train,
    test_df=datasets.test,
    oot_quarters=datasets.oot_quarters,
    target_column=config.data.target_column,
    importance_type=config.evaluation.importance_type,
)

print("Performance by period:")
print(performance_df.to_string(index=False))

print("\nFeature importance:")
print(importance_df.to_string(index=False))

04:36:37 | INFO  | OOT | Train: AUC=0.8834, Gini=0.7668, KS=0.6895
04:36:37 | INFO  | OOT | Test: AUC=0.8678, Gini=0.7357, KS=0.6831
04:36:37 | INFO  | OOT | OOT_2024Q3: AUC=0.8743, Gini=0.7486, KS=0.7139
04:36:37 | INFO  | OOT | OOT_2024Q4: AUC=0.8557, Gini=0.7115, KS=0.6589
04:36:37 | INFO  | OOT | OOT_2025Q1: AUC=0.8683, Gini=0.7367, KS=0.7452


Performance by period:
    Period  N_Samples  N_Bads  Bad_Rate    AUC   Gini     KS  Precision_at_10pct  Lift_at_10pct
     Train       7473    1306    0.1748 0.8834 0.7668 0.6895              0.7336           4.20
      Test       1869     327    0.1750 0.8678 0.7357 0.6831              0.6613           3.78
OOT_2024Q3        916     162    0.1769 0.8743 0.7486 0.7139              0.7692           4.35
OOT_2024Q4        926     151    0.1631 0.8557 0.7115 0.6589              0.6957           4.27
OOT_2025Q1        345      70    0.2029 0.8683 0.7367 0.7452              0.8529           4.20

Feature importance:
                 Feature  Importance  Rank  Cumulative_Importance
      default_count_ever    0.944123     1               0.944123
  recovered_total_amount    0.047945     2               0.992068
newest_credit_age_months    0.007932     3               1.000000


### Lift Tables
Decile-based lift tables for each evaluation period.

In [14]:
for period, lt in lift_tables.items():
    print(f"\n{'='*60}")
    print(f"Lift Table: {period}")
    print(f"{'='*60}")
    print(lt.to_string(index=False))


Lift Table: Train
decile  Score_Min  Score_Max  Score_Mean  Count  Bads  Bad_Rate     Lift  Cum_Count  Cum_Bads  Cum_Bad_Rate  Cum_Lift  Capture_Rate
     1     0.7523     0.7833      0.7592    748   548    0.7326 4.191975        748       548      0.732620  4.192092      0.419602
     2     0.6553     0.7523      0.7211    747   417    0.5582 3.194049       1495       965      0.645485  3.693498      0.738897
     3     0.3136     0.6553      0.4810    747   133    0.1780 1.018525       2242      1098      0.489741  2.802325      0.840735
     4     0.2987     0.3136      0.3050    747    46    0.0616 0.352478       2989      1144      0.382737  2.190039      0.875957
     5     0.2985     0.2987      0.2985    747    39    0.0522 0.298691       3736      1183      0.316649  1.811881      0.905819
     6     0.2889     0.2985      0.2926    748    29    0.0388 0.222016       4484      1212      0.270294  1.546639      0.928025
     7     0.2889     0.2889      0.2889    747    28    

---
## Post-Evaluation Enhanced Steps

Score PSI, Bootstrap CI, Calibration, SHAP, and Validation — same as the pipeline script.

In [15]:
# Step 9a: Score PSI — stability of predicted scores between train and OOT
score_psi_df = None
if config.evaluation.calculate_score_psi:
    train_probs = final_model.predict_proba(datasets.train[selected_features])[:, 1]
    oot_scores = {}
    for label in sorted(datasets.oot_quarters.keys()):
        qdf = datasets.oot_quarters[label]
        oot_scores[f"OOT_{label}"] = final_model.predict_proba(qdf[selected_features])[:, 1]
    score_psi_df = compute_score_psi(train_probs, oot_scores)
    print("Score PSI (train vs OOT periods):")
    print(score_psi_df.to_string(index=False))
else:
    print("Score PSI disabled")

Score PSI (train vs OOT periods):
Period_1   Period_2    PSI Status
   Train OOT_2024Q3 0.0159 Stable
   Train OOT_2024Q4 0.0182 Stable
   Train OOT_2025Q1 0.0402 Stable


### Bootstrap AUC Confidence Intervals

In [16]:
# Step 9b: Bootstrap CI — confidence intervals on AUC for each period
bootstrap_df = None
if config.evaluation.bootstrap.enabled:
    periods_for_bootstrap = [("Train", datasets.train), ("Test", datasets.test)]
    for label in sorted(datasets.oot_quarters.keys()):
        periods_for_bootstrap.append((f"OOT_{label}", datasets.oot_quarters[label]))

    bootstrap_df = bootstrap_auc_ci(
        model=final_model,
        selected_features=selected_features,
        datasets=periods_for_bootstrap,
        target_column=config.data.target_column,
        n_iterations=config.evaluation.bootstrap.n_iterations,
        confidence_level=config.evaluation.bootstrap.confidence_level,
        n_jobs=n_jobs,
    )

    # Merge CI columns into performance_df
    if bootstrap_df is not None and not bootstrap_df.empty:
        ci_cols = bootstrap_df[["Period", "CI_Lower", "CI_Upper"]].copy()
        performance_df = performance_df.merge(ci_cols, on="Period", how="left")

    print("Bootstrap AUC Confidence Intervals:")
    print(bootstrap_df.to_string(index=False))
else:
    print("Bootstrap CI disabled")

04:36:48 | INFO  | Bootstrap | Train: AUC=0.8834 [0.8716, 0.8945] (1000 iterations)
04:36:48 | INFO  | Bootstrap | Test: AUC=0.8678 [0.8424, 0.8932] (1000 iterations)
04:36:48 | INFO  | Bootstrap | OOT_2024Q3: AUC=0.8743 [0.8395, 0.9105] (1000 iterations)
04:36:48 | INFO  | Bootstrap | OOT_2024Q4: AUC=0.8557 [0.8162, 0.8908] (1000 iterations)
04:36:48 | INFO  | Bootstrap | OOT_2025Q1: AUC=0.8683 [0.8075, 0.9195] (1000 iterations)


Bootstrap AUC Confidence Intervals:
    Period    AUC  CI_Lower  CI_Upper  N_Bootstrap
     Train 0.8834    0.8716    0.8945         1000
      Test 0.8678    0.8424    0.8932         1000
OOT_2024Q3 0.8743    0.8395    0.9105         1000
OOT_2024Q4 0.8557    0.8162    0.8908         1000
OOT_2025Q1 0.8683    0.8075    0.9195         1000


### Probability Calibration

In [17]:
# Step 9c: Calibration — fit on test set, report Brier/ECE improvement
calibration_dict = None
if config.evaluation.calibration.enabled:
    from src.evaluation.calibrator import ModelCalibrator
    calibrator = ModelCalibrator(method=config.evaluation.calibration.method)
    test_probs = final_model.predict_proba(datasets.test[selected_features])[:, 1]
    y_test_vals = datasets.test[config.data.target_column].values
    calibrator.fit(y_test_vals, test_probs)
    cal_result = calibrator.get_calibration_result(y_test_vals, test_probs)
    calibration_dict = cal_result.to_dict()

    print(f"Calibration ({cal_result.method}):")
    print(f"  Brier score: {cal_result.brier_score_before:.4f} -> {cal_result.brier_score_after:.4f}")
    print(f"  ECE:         {cal_result.ece_before:.4f} -> {cal_result.ece_after:.4f}")
    print(f"  H-L chi2:    {cal_result.hosmer_lemeshow_chi2:.4f} (p={cal_result.hosmer_lemeshow_pvalue:.4f})")
else:
    print("Calibration disabled")

Calibration (platt):
  Brier score: 0.1472 -> 0.0897
  ECE:         0.2335 -> 0.0332
  H-L chi2:    17.5369 (p=0.0142)


### SHAP Analysis

In [18]:
# Step 9d: SHAP — TreeExplainer values + summary plots
shap_summary = None
shap_plot_paths = None
if config.evaluation.shap.enabled:
    from src.model_development.shap_analyzer import (
        compute_shap_values,
        shap_summary_df,
        save_shap_plots,
    )
    shap_vals, feat_names, X_shap = compute_shap_values(
        model=final_model,
        X=datasets.train[selected_features],
        max_samples=config.evaluation.shap.max_samples,
    )
    shap_summary = shap_summary_df(shap_vals, feat_names)

    # Save plots to run directory
    shap_output_dir = str(output_manager.run_dir / "reports")
    shap_plot_paths = save_shap_plots(shap_vals, X_shap, shap_output_dir)

    print("SHAP Summary:")
    print(shap_summary.to_string(index=False))
    if shap_plot_paths:
        print(f"\nSHAP plots saved: {shap_plot_paths}")
else:
    print("SHAP analysis disabled")

04:36:54 | INFO  | Sampled 500 rows from 7473 for SHAP computation.
04:36:54 | INFO  | Computed SHAP values: 500 samples x 3 features.
04:36:54 | INFO  | Saved SHAP bar plot: outputs/model_development/20260212_043612_1dc021/reports/shap_summary_bar.png
04:36:54 | INFO  | Saved SHAP beeswarm plot: outputs/model_development/20260212_043612_1dc021/reports/shap_beeswarm.png


SHAP Summary:
                 Feature  Mean_Abs_SHAP  Rank
      default_count_ever       0.908176     1
  recovered_total_amount       0.048540     2
newest_credit_age_months       0.041280     3

SHAP plots saved: ['outputs/model_development/20260212_043612_1dc021/reports/shap_summary_bar.png', 'outputs/model_development/20260212_043612_1dc021/reports/shap_beeswarm.png']


### Model Validation Checks
Automated quality checks: discrimination, overfitting, OOT stability, score PSI, concentration, monotonicity.

In [19]:
# Step 9e: Validation — same checks as the pipeline script
validation_report_df = None
has_critical_failures = False
if config.validation.enabled:
    model_validator = ModelValidator(config)
    val_report = model_validator.validate(
        performance_df=performance_df,
        importance_df=importance_df,
        score_psi_df=score_psi_df,
    )
    validation_report_df = val_report.to_dataframe()
    has_critical_failures = val_report.has_critical_failures

    print(val_report.summary())

    # Recommendations for failures/warnings
    issues = [c for c in val_report.checks if c.status.value != "PASS"]
    if issues:
        print("\nRecommendations:")
        for check in issues:
            if check.recommendation:
                print(f"  [{check.status.value}] {check.check_name}: {check.recommendation}")
else:
    print("Validation checks disabled")

04:36:55 | INFO  | MODEL_CHECK | PASS | Discrimination (AUC) | All periods have AUC >= 0.65 (min observed: 0.8557).
04:36:55 | INFO  | MODEL_CHECK | PASS | Discrimination (Gini) | All Gini values >= 0.3000 (min: 0.7115).
04:36:55 | INFO  | MODEL_CHECK | PASS | Overfitting | AUC gap Train-Test = 0.0156 within threshold 0.0500.
04:36:55 | INFO  | MODEL_CHECK | PASS | OOT stability | All OOT periods within 0.0800 of test AUC (min OOT AUC: 0.8557).
04:36:55 | INFO  | MODEL_CHECK | PASS | Score PSI | Score PSI 0.0402 within threshold 0.2500.
04:36:55 | INFO  | MODEL_CHECK | PASS | OOT sample size | All OOT periods have >= 30 bads (min: 70).


  [+] Discrimination (AUC): All periods have AUC >= 0.65 (min observed: 0.8557).
  [+] Discrimination (Gini): All Gini values >= 0.3000 (min: 0.7115).
  [+] Overfitting: AUC gap Train-Test = 0.0156 within threshold 0.0500.
  [+] OOT stability: All OOT periods within 0.0800 of test AUC (min OOT AUC: 0.8557).
  [+] Score PSI: Score PSI 0.0402 within threshold 0.2500.
  [!] Feature concentration: Feature 'default_count_ever' contributes 94.41% of total importance (threshold: 50%).
  [+] OOT sample size: All OOT periods have >= 30 bads (min: 70).

Recommendations:


---
## Save Outputs & Generate Excel Report

Produces the same Excel report with all sheets as the script pipeline.

In [20]:
from datetime import datetime

# Build summary dict (same structure as pipeline._build_summary)
n_total = len(datasets.feature_columns)
n_after_const = const_result.n_kept
n_after_missing = missing_result.n_kept
n_after_iv = iv_result.n_kept
n_after_psi = psi_result.n_kept
n_after_corr = corr_result.n_kept
n_after_sel = len(selected_features) + (vif_result.n_eliminated if vif_result else 0)
n_after_vif = len(selected_features)

train_dates = datasets.train[config.data.date_column]
oot_labels_str = ", ".join(datasets.oot_labels) if datasets.oot_labels else "None"

summary = {
    "Run Date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "Run ID": output_manager.run_id,
    "Input File": config.data.input_path,
    "Train End Date": config.splitting.train_end_date,
    "Train Period": f"{train_dates.min().strftime('%Y-%m-%d')} to {train_dates.max().strftime('%Y-%m-%d')}",
    "OOT Periods": oot_labels_str,
    "Train Rows": len(datasets.train),
    "Test Rows": len(datasets.test),
    "Train Bad Rate": f"{datasets.train[target].mean():.2%}",
    "Test Bad Rate": f"{datasets.test[target].mean():.2%}",
    "": "",
    "Total Features": n_total,
    "After Constant Elimination": f"{n_after_const} ({n_total - n_after_const} eliminated)",
    "After Missing Elimination": f"{n_after_missing} ({n_after_const - n_after_missing} eliminated)",
    "After IV Elimination": f"{n_after_iv} ({n_after_missing - n_after_iv} eliminated)",
    "After PSI Elimination": f"{n_after_psi} ({n_after_iv - n_after_psi} eliminated)",
    "After Correlation Elimination": f"{n_after_corr} ({n_after_psi - n_after_corr} eliminated)",
    "After Sequential Selection": f"{n_after_sel} ({n_after_corr - n_after_sel} skipped)",
    "After VIF Check": f"{n_after_vif} ({n_after_sel - n_after_vif} eliminated)",
    " ": "",
    "Selection Method": config.steps.selection.method,
    "Selection CV Folds": config.steps.selection.cv,
}

if config.model.tuning.enabled and best_params:
    summary["Tuning Enabled"] = "Yes"
    summary["Tuning Trials"] = config.model.tuning.n_trials
    best_auc = best_params.get("_best_cv_auc", "N/A")
    summary["Tuning Best CV AUC"] = best_auc
else:
    summary["Tuning Enabled"] = "No"

summary["  "] = ""
for _, row in performance_df.iterrows():
    period = row["Period"]
    summary[f"AUC {period}"] = row["AUC"]
    summary[f"Gini {period}"] = row["Gini"]

summary["   "] = ""
summary["IV Range"] = f"[{config.steps.iv.min_iv}, {config.steps.iv.max_iv}]"
summary["Missing Threshold"] = f"{config.steps.missing.threshold:.0%}"
summary["PSI Threshold"] = str(config.steps.psi.threshold)
summary["Correlation Threshold"] = str(config.steps.correlation.threshold)
summary["VIF Threshold"] = str(config.steps.vif.threshold) if config.steps.vif.enabled else "Disabled"

# Generate Excel report (same call as pipeline.py)
excel_path = str(output_manager.run_dir / "reports" / f"model_dev_{output_manager.run_id}.xlsx")

excel_reporter.generate_report(
    output_path=excel_path,
    summary=summary,
    elimination_results=elimination_results,
    corr_pairs_df=corr_pairs_df,
    selection_df=selection_df,
    performance_df=performance_df,
    lift_tables=lift_tables,
    importance_df=importance_df,
    vif_df=vif_result.details_df if vif_result else None,
    tuning_df=tuning_df,
    tuning_best_params=best_params,
    chart_path=chart_path,
    score_psi_df=score_psi_df,
    bootstrap_df=bootstrap_df,
    shap_summary_df=shap_summary,
    shap_plot_path=shap_plot_paths[0] if shap_plot_paths else None,
    calibration_dict=calibration_dict,
    validation_report_df=validation_report_df,
)

# Step 9f: Save model artifact
model_path = None
if config.output.save_model:
    model_path = output_manager.save_artifact("model", final_model, fmt="joblib")
    print(f"Model saved: {model_path}")

# Save run metadata
output_manager.mark_complete("success")
if config.reproducibility.save_metadata:
    output_manager.save_run_metadata()

print(f"\nRun ID: {output_manager.run_id}")
print(f"Excel report: {excel_path}")
if chart_path:
    print(f"Selection chart: {chart_path}")
if shap_plot_paths:
    print(f"SHAP plots: {shap_plot_paths}")
if has_critical_failures:
    print("WARNING: Validation found critical failures!")
print(f"All outputs: {output_manager.run_dir}")

04:36:57 | INFO  | EXCEL | Embedded selection chart in 06_Selection
04:36:57 | INFO  | EXCEL | Embedded SHAP plot in 10_SHAP
04:36:57 | INFO  | COMPLETE | Excel saved: outputs/model_development/20260212_043612_1dc021/reports/model_dev_20260212_043612_1dc021.xlsx


Model saved: outputs/model_development/20260212_043612_1dc021/data/model.joblib


04:36:57 | INFO  | Run metadata saved to outputs/model_development/20260212_043612_1dc021/run_metadata.json



Run ID: 20260212_043612_1dc021
Excel report: outputs/model_development/20260212_043612_1dc021/reports/model_dev_20260212_043612_1dc021.xlsx
Selection chart: outputs/model_development/20260212_043612_1dc021/reports/selection_chart_20260212_043626.png
SHAP plots: ['outputs/model_development/20260212_043612_1dc021/reports/shap_summary_bar.png', 'outputs/model_development/20260212_043612_1dc021/reports/shap_beeswarm.png']
All outputs: outputs/model_development/20260212_043612_1dc021


---
## Alternative: Full Pipeline Mode

Run everything in one call using `ModelDevelopmentPipeline` — the exact same class used by the CLI script.

In [None]:
# Uncomment to run the full pipeline in one call:

# from src.config.loader import load_config, save_config
# from src.io.output_manager import OutputManager
# from src.model_development.pipeline import ModelDevelopmentPipeline
# from src.model_development.eliminators import (
#     QuarterlyPSICheck, YearlyPSICheck, ConsecutiveQuarterPSICheck,
#     HalfSplitPSICheck, DateSplitPSICheck,
# )
#
# config = load_config("config/model_development.yaml")
# output_manager = OutputManager(config)
#
# if config.reproducibility.save_config:
#     save_config(config, str(output_manager.run_dir / "config" / "pipeline_config.yaml"))
#
# # Build PSI checks from config
# check_map = {"quarterly": QuarterlyPSICheck, "yearly": YearlyPSICheck,
#               "consecutive": ConsecutiveQuarterPSICheck, "halfsplit": HalfSplitPSICheck}
# psi_checks = []
# for c in config.steps.psi.checks:
#     if c.type in check_map:
#         psi_checks.append(check_map[c.type]())
#     elif c.type == "date_split" and c.date:
#         psi_checks.append(DateSplitPSICheck(c.date, label=c.label))
#
# pipeline = ModelDevelopmentPipeline(
#     input_path=config.data.input_path,
#     train_end_date=config.splitting.train_end_date,
#     output_dir=str(output_manager.run_dir / "reports"),
#     iv_min=config.steps.iv.min_iv,
#     iv_max=config.steps.iv.max_iv,
#     missing_threshold=config.steps.missing.threshold,
#     psi_threshold=config.steps.psi.threshold,
#     correlation_threshold=config.steps.correlation.threshold,
#     test_size=config.splitting.test_size,
#     target_column=config.data.target_column,
#     date_column=config.data.date_column,
#     xgb_params=config.model.params,
#     psi_checks=psi_checks,
#     selection_method=config.steps.selection.method,
#     selection_cv=config.steps.selection.cv,
#     selection_max_features=config.steps.selection.max_features,
#     selection_min_features=config.steps.selection.min_features,
#     selection_tolerance=config.steps.selection.tolerance,
#     selection_patience=config.steps.selection.patience,
#     vif_enabled=config.steps.vif.enabled,
#     vif_threshold=config.steps.vif.threshold,
#     vif_iv_aware=config.steps.vif.iv_aware,
#     tuning_enabled=config.model.tuning.enabled,
#     tuning_n_trials=config.model.tuning.n_trials,
#     tuning_timeout=config.model.tuning.timeout,
#     tuning_cv=config.model.tuning.cv,
#     config=config,
#     output_manager=output_manager,
# )
#
# results = pipeline.run()
#
# output_manager.mark_complete(results.get("status", "unknown"))
# if config.reproducibility.save_metadata:
#     output_manager.save_run_metadata()
#
# print(f"Status: {results['status']}")
# print(f"Selected features: {results['after_selection']}")
# print(f"Excel: {results['excel_path']}")
# print(f"Run dir: {output_manager.run_dir}")