# Final Workflow: Roll Rate Model

Notebook g·ªçn nh·∫π ch·ªâ gi·ªØ code ch√≠nh:
1. Load data
2. Build transition matrices
3. Build lifecycle + calibration
4. Allocate (fast) xu·ªëng loan-level
5. Export reports

In [None]:
# Setup
import sys
from pathlib import Path
project_root = Path(".").resolve().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
from datetime import datetime

from src.config import CFG, BUCKETS_CANON, BUCKETS_30P, BUCKETS_90P
from src.config import parse_date_column, create_segment_columns, SEGMENT_COLS
from src.data_loader import load_data
from src.rollrate.transition import compute_transition_by_mob
from src.rollrate.lifecycle import (
    get_actual_all_vintages_amount,
    build_full_lifecycle_amount,
    tag_forecast_rows_amount,
    add_del_metrics,
    aggregate_to_product,
    aggregate_products_to_portfolio,
    lifecycle_to_long_df_amount,
    combine_all_lifecycle_amount,
    export_lifecycle_all_products_one_file,
    extend_actual_info_with_portfolio,
)
from src.rollrate.calibration_kmob import (
    fit_k_raw, smooth_k, fit_alpha,
    forecast_all_vintages_partial_step,
)
from src.rollrate.allocation_v2_fast import allocate_multi_mob_with_scaling_fast

print("‚úÖ Import th√†nh c√¥ng")

## 1Ô∏è‚É£ LOAD DATA

In [None]:
# ========== C·∫§U H√åNH ==========
DATA_PATH = 'C:/Users/User/Projection_kiro/ETB_Parquet'  # üî• Thay ƒë·ªïi path
MAX_MOB = 36  # Forecast ƒë·∫øn MOB n√†o
TARGET_MOBS = [12, 24]  # Allocate t·∫°i MOB n√†o
# ==============================

df_raw = load_data(DATA_PATH)
df_raw['DISBURSAL_DATE'] = parse_date_column(df_raw['DISBURSAL_DATE'])
df_raw = create_segment_columns(df_raw)

print(f"üìä Data: {len(df_raw):,} rows | {df_raw[CFG['loan']].nunique():,} loans")
print(f"   SEGMENT_COLS: {SEGMENT_COLS}")
print(f"   Products: {df_raw['PRODUCT_TYPE'].unique().tolist()}")
print(f"   Risk scores: {df_raw['RISK_SCORE'].nunique()} unique")

## 2Ô∏è‚É£ BUILD TRANSITION MATRICES

In [None]:
print("üî® Building transition matrices...")
matrices_by_mob, parent_fallback = compute_transition_by_mob(df_raw)
print(f"‚úÖ {len(matrices_by_mob)} products | {sum(len(m) for m in matrices_by_mob.values())} matrices")

## 3Ô∏è‚É£ BUILD LIFECYCLE + CALIBRATION

In [None]:
print("üî® Building lifecycle + calibration...")

# Actual results
actual_results = get_actual_all_vintages_amount(df_raw)

# DISB_TOTAL map
loan_disb = df_raw.groupby(["PRODUCT_TYPE", "RISK_SCORE", CFG["orig_date"], CFG["loan"]])[CFG["disb"]].first()
disb_total_by_vintage = loan_disb.groupby(level=[0, 1, 2]).sum().to_dict()

# Fit k_raw
k_raw_by_mob, weight_by_mob, _ = fit_k_raw(
    actual_results=actual_results,
    matrices_by_mob=matrices_by_mob,
    parent_fallback=parent_fallback,
    states=BUCKETS_CANON,
    s30_states=BUCKETS_30P,
    include_co=True,
    denom_mode="disb",
    disb_total_by_vintage=disb_total_by_vintage,
    return_detail=True,
)

# Smooth k
mob_min = min(k_raw_by_mob.keys()) if k_raw_by_mob else 0
mob_max = max(k_raw_by_mob.keys()) if k_raw_by_mob else 0
k_smooth_by_mob, _, _ = smooth_k(k_raw_by_mob, weight_by_mob, mob_min, mob_max)

# Fit alpha
alpha, k_final_by_mob, _ = fit_alpha(
    actual_results=actual_results,
    matrices_by_mob=matrices_by_mob,
    parent_fallback=parent_fallback,
    states=BUCKETS_CANON,
    s30_states=BUCKETS_30P,
    k_smooth_by_mob=k_smooth_by_mob,
    mob_target=min(MAX_MOB, mob_max) if mob_max else MAX_MOB,
    include_co=True,
)

print(f"‚úÖ Calibration: alpha={alpha:.4f} | k range: {min(k_final_by_mob.values()):.3f} ‚Üí {max(k_final_by_mob.values()):.3f}")

In [None]:
# Forecast v·ªõi k_final
forecast_calibrated = forecast_all_vintages_partial_step(
    actual_results=actual_results,
    matrices_by_mob=matrices_by_mob,
    parent_fallback=parent_fallback,
    max_mob=MAX_MOB,
    k_by_mob=k_final_by_mob,
    states=BUCKETS_CANON,
)

# Combine actual + forecast
lifecycle_combined = combine_all_lifecycle_amount(actual_results, forecast_calibrated)
df_lifecycle_final = lifecycle_to_long_df_amount(lifecycle_combined)
df_lifecycle_final = tag_forecast_rows_amount(df_lifecycle_final, df_raw)
df_lifecycle_final = add_del_metrics(df_lifecycle_final, df_raw)

print(f"‚úÖ Lifecycle: {len(df_lifecycle_final):,} rows | Forecast: {(df_lifecycle_final['IS_FORECAST']==1).sum():,}")

## 4Ô∏è‚É£ ALLOCATE TO LOAN-LEVEL (FAST)

In [None]:
print(f"üî® Allocating to loan-level (MOB {TARGET_MOBS})...")

# L·∫•y snapshot m·ªõi nh·∫•t
latest_cutoff = df_raw['CUTOFF_DATE'].max()
df_loans_latest = df_raw[df_raw['CUTOFF_DATE'] == latest_cutoff].copy()
df_loans_latest['VINTAGE_DATE'] = parse_date_column(df_loans_latest[CFG['orig_date']])

# Allocate
df_loan_forecast = allocate_multi_mob_with_scaling_fast(
    df_loans_latest=df_loans_latest,
    df_lifecycle_final=df_lifecycle_final,
    matrices_by_mob=matrices_by_mob,
    target_mobs=TARGET_MOBS,
    parent_fallback=parent_fallback,
    include_del30=True,
    include_del90=True,
    seed=42
)

print(f"\n‚úÖ Allocation: {len(df_loan_forecast):,} loans")
for mob in TARGET_MOBS:
    del90 = df_loan_forecast[f'DEL90_FLAG_MOB{mob}'].mean() * 100
    print(f"   MOB {mob}: DEL90 = {del90:.2f}%")

## 5Ô∏è‚É£ EXPORT REPORTS

In [None]:
print("üíæ Exporting...")

output_dir = Path("outputs")
output_dir.mkdir(exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# 1. Lifecycle
df_product = aggregate_to_product(df_lifecycle_final)
df_portfolio = aggregate_products_to_portfolio(df_product, "PORTFOLIO_ALL")
df_del_all = pd.concat([df_product, df_portfolio], ignore_index=True)

actual_info = {}
for (prod, score, vintage), mob_dict in actual_results.items():
    if mob_dict:
        actual_info[(prod, vintage)] = max(mob_dict.keys())
actual_info_all = extend_actual_info_with_portfolio(actual_info, "PORTFOLIO_ALL")

lifecycle_file = output_dir / f"Lifecycle_{timestamp}.xlsx"
export_lifecycle_all_products_one_file(df_del_all, actual_info_all, str(lifecycle_file))
print(f"   ‚úÖ {lifecycle_file}")

# 2. Loan forecast
loan_file = output_dir / f"Loan_Forecast_{timestamp}.xlsx"
with pd.ExcelWriter(loan_file, engine="xlsxwriter") as writer:
    df_loan_forecast.to_excel(writer, sheet_name="All_Loans", index=False)
    for mob in TARGET_MOBS:
        col = f'DEL90_FLAG_MOB{mob}'
        if col in df_loan_forecast.columns:
            df_loan_forecast[df_loan_forecast[col] == 1].to_excel(
                writer, sheet_name=f"DEL90_MOB{mob}", index=False
            )
print(f"   ‚úÖ {loan_file}")

print("\nüéâ DONE!")

## üìä QUICK SUMMARY

In [None]:
print("="*50)
print("üìä SUMMARY")
print("="*50)
print(f"Loans: {df_loan_forecast[CFG['loan']].nunique():,}")
print(f"EAD Current: {df_loan_forecast['EAD_CURRENT'].sum():,.0f}")
for mob in TARGET_MOBS:
    ead = df_loan_forecast[f'EAD_FORECAST_MOB{mob}'].sum()
    del90 = df_loan_forecast[f'DEL90_FLAG_MOB{mob}'].mean() * 100
    print(f"\nMOB {mob}:")
    print(f"   EAD Forecast: {ead:,.0f}")
    print(f"   DEL90: {del90:.2f}%")
print("="*50)