# Complete Workflow: Roll Rate Model - T·ª´ Data ƒë·∫øn Report

Notebook n√†y ch·∫°y to√†n b·ªô workflow:
1. Load & prepare data
2. Build transition matrices
3. Forecast lifecycle
4. Calibration (k per MOB)
5. Apply calibration & aggregate
6. Allocate xu·ªëng loan-level (MOB 12 & 24) + Chi ti·∫øt h·ª£p ƒë·ªìng
7. Analysis & visualization
8. Export reports

**Th·ªùi gian ch·∫°y:** ~5-10 ph√∫t (t√πy data size)

In [None]:
# Setup path
import sys
from pathlib import Path

project_root = Path(".").resolve().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print(f"üìÅ Project root: {project_root}")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Config
from src.config import CFG, BUCKETS_CANON, BUCKETS_30P, BUCKETS_90P

# Data loader
from src.data_loader import load_data

# Transition
from src.rollrate.transition import compute_transition_by_mob

# Lifecycle & Forecast
from src.rollrate.lifecycle import (
    get_actual_all_vintages_amount,
    build_full_lifecycle_amount,
    tag_forecast_rows_amount,
    add_del_metrics,
    aggregate_to_product,
    aggregate_products_to_portfolio,
    export_lifecycle_all_products_one_file,
    extend_actual_info_with_portfolio,
)

# Calibration
from src.rollrate.calibration_kmob import (
    fit_k_raw,
    smooth_k,
    fit_alpha,
    forecast_all_vintages_partial_step,
    backtest_error_by_mob,
)

# Allocation
from src.rollrate.allocation_multi_mob import (
    allocate_multi_mob_with_del_metrics,
    compare_del_across_mobs,
    export_multi_mob_to_excel,
    pivot_del_by_product_mob,
)

print("‚úÖ Import th√†nh c√¥ng")

---
## 1Ô∏è‚É£ LOAD & PREPARE DATA

In [None]:
# Load data
DATA_PATH = 'C:/Users/User/Projection_kiro/ETB_Parquet'  # üî• Thay ƒë·ªïi path c·ªßa b·∫°n

df_raw = load_data(DATA_PATH)

# Chu·∫©n h√≥a
df_raw['DISBURSAL_DATE'] = df_raw['DISBURSAL_DATE'].apply(lambda x: x.replace(day=1))
df_raw["PRODUCT_TYPE"] = df_raw["PRODUCT_TYPE"].astype(str)
df_raw["RISK_SCORE"] = df_raw["GRADE"].astype(str)
df_raw[CFG["orig_date"]] = pd.to_datetime(df_raw[CFG["orig_date"]])

print(f"\nüìä Data Summary:")
print(f"   Total rows: {len(df_raw):,}")
print(f"   Date range: {df_raw[CFG['cutoff']].min()} ‚Üí {df_raw[CFG['cutoff']].max()}")
print(f"   Products: {df_raw['PRODUCT_TYPE'].nunique()}")
print(f"   Risk scores: {df_raw['RISK_SCORE'].nunique()}")
print(f"   Loans: {df_raw[CFG['loan']].nunique():,}")

---
## 2Ô∏è‚É£ BUILD TRANSITION MATRICES

In [None]:
print("üî® Building transition matrices...")
print("   (C√≥ th·ªÉ m·∫•t 1-2 ph√∫t)\n")

matrices_by_mob, parent_fallback = compute_transition_by_mob(df_raw)

print(f"\n‚úÖ Ho√†n t·∫•t:")
print(f"   Products: {len(matrices_by_mob)}")
print(f"   Total matrices: {sum(len(mob_dict) for mob_dict in matrices_by_mob.values())}")

---
## 3Ô∏è‚É£ BUILD LIFECYCLE (ACTUAL + FORECAST)

In [None]:
print("üî® Building lifecycle...")

max_mob = 36  # Forecast ƒë·∫øn MOB 36

# Build lifecycle (actual + forecast)
df_lifecycle = build_full_lifecycle_amount(
    df_raw=df_raw,
    matrices_by_mob=matrices_by_mob,
    max_mob=max_mob
)

# Tag forecast rows
df_lifecycle = tag_forecast_rows_amount(df_lifecycle, df_raw)

# Add DEL metrics
df_lifecycle = add_del_metrics(df_lifecycle, df_raw)

print(f"\n‚úÖ Lifecycle:")
print(f"   Total rows: {len(df_lifecycle):,}")
print(f"   Actual rows: {(df_lifecycle['IS_FORECAST']==0).sum():,}")
print(f"   Forecast rows: {(df_lifecycle['IS_FORECAST']==1).sum():,}")
print(f"   MOB range: {df_lifecycle['MOB'].min()} ‚Üí {df_lifecycle['MOB'].max()}")

---
## 4Ô∏è‚É£ CALIBRATION (K PER MOB)

In [None]:
print("üî® Calibration: Fitting k per MOB...\n")

# Prepare data
states = BUCKETS_CANON
s30_states = BUCKETS_30P

# Actual results
actual_results = get_actual_all_vintages_amount(df_raw)

# DISB_TOTAL map
loan_disb = (
    df_raw.groupby(["PRODUCT_TYPE", "RISK_SCORE", CFG["orig_date"], CFG["loan"]])[CFG["disb"]]
    .first()
)
cohort_disb = loan_disb.groupby(level=[0, 1, 2]).sum()
disb_total_by_vintage = cohort_disb.to_dict()

print("Step 1: Fit k_raw...")
k_raw_by_mob, weight_by_mob, k_raw_df = fit_k_raw(
    actual_results=actual_results,
    matrices_by_mob=matrices_by_mob,
    parent_fallback=parent_fallback,
    states=states,
    s30_states=s30_states,
    include_co=True,
    denom_mode="disb",
    disb_total_by_vintage=disb_total_by_vintage,
    min_disb=1e-10,
    weight_mode="equal",
    method="wls",
    eps=1e-8,
    min_denom=1e-10,
    min_obs=5,
    fallback_k=1.0,
    return_detail=True,
)

print(f"\n‚úÖ k_raw fitted for {len(k_raw_by_mob)} MOBs")

In [None]:
print("Step 2: Smooth k...")

if k_raw_by_mob:
    mob_min = min(k_raw_by_mob.keys())
    mob_max = max(k_raw_by_mob.keys())
else:
    mob_min, mob_max = 0, 0

k_smooth_by_mob, mobs, _ = smooth_k(
    k_raw_by_mob=k_raw_by_mob,
    weight_by_mob=weight_by_mob,
    mob_min=mob_min,
    mob_max=mob_max,
    gamma=10.0,
    monotone=False,
    use_cvxpy=True,
    default_k=1.0,
)

print(f"‚úÖ k_smooth computed")

In [None]:
print("Step 3: Fit alpha (optional)...")

ALPHA_TARGET_MOB = min(max_mob, mob_max) if mob_max else max_mob

alpha, k_final_by_mob, alpha_scores = fit_alpha(
    actual_results=actual_results,
    matrices_by_mob=matrices_by_mob,
    parent_fallback=parent_fallback,
    states=states,
    s30_states=s30_states,
    k_smooth_by_mob=k_smooth_by_mob,
    mob_target=ALPHA_TARGET_MOB,
    include_co=True,
    alpha_grid=None,
    val_frac=0.2,
)

print(f"\n‚úÖ Calibration ho√†n t·∫•t:")
print(f"   Alpha: {alpha:.4f}")
print(f"   k_final: {len(k_final_by_mob)} MOBs")

In [None]:
# Visualize k curves
import matplotlib.pyplot as plt

mobs_list = sorted(set(k_raw_by_mob) | set(k_smooth_by_mob) | set(k_final_by_mob))
kr = [k_raw_by_mob.get(m, np.nan) for m in mobs_list]
ks = [k_smooth_by_mob.get(m, np.nan) for m in mobs_list]
kf = [k_final_by_mob.get(m, np.nan) for m in mobs_list]

plt.figure(figsize=(12, 6))
plt.plot(mobs_list, kr, marker="o", label="k_raw", alpha=0.6)
plt.plot(mobs_list, ks, marker="s", label="k_smooth", linewidth=2)
plt.plot(mobs_list, kf, marker="^", label="k_final", linewidth=2)
plt.axhline(y=1.0, color='gray', linestyle='--', alpha=0.5, label="k=1.0")
plt.xlabel("MOB")
plt.ylabel("k")
plt.title("Calibration k Curves")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("üìä k curves plotted")

---
## 5Ô∏è‚É£ APPLY CALIBRATION & AGGREGATE

In [None]:
print("üî® Applying calibration...")

# Forecast v·ªõi k_final (calibrated)
forecast_calibrated = forecast_all_vintages_partial_step(
    actual_results=actual_results,
    matrices_by_mob=matrices_by_mob,
    parent_fallback=parent_fallback,
    max_mob=max_mob,
    k_by_mob=k_final_by_mob,
    states=states,
)

# Convert to DataFrame
from src.rollrate.lifecycle import (
    lifecycle_to_long_df_amount,
    combine_all_lifecycle_amount,
)

# ‚úÖ FIX: Combine actual + forecast TR∆Ø·ªöC KHI convert to DataFrame
lifecycle_combined = combine_all_lifecycle_amount(
    actual=actual_results,
    forecast=forecast_calibrated
)

# Convert to long format
df_lifecycle_final = lifecycle_to_long_df_amount(lifecycle_combined)

# Tag forecast rows
df_lifecycle_final = tag_forecast_rows_amount(df_lifecycle_final, df_raw)

# Add DEL metrics
df_lifecycle_final = add_del_metrics(df_lifecycle_final, df_raw)

print(f"\n‚úÖ Lifecycle final (calibrated):")
print(f"   Total rows: {len(df_lifecycle_final):,}")
print(f"   Actual rows: {(df_lifecycle_final['IS_FORECAST']==0).sum():,}")
print(f"   Forecast rows (calibrated): {(df_lifecycle_final['IS_FORECAST']==1).sum():,}")
print(f"   MOB range: {df_lifecycle_final['MOB'].min()} ‚Üí {df_lifecycle_final['MOB'].max()}")

# ‚úÖ Ki·ªÉm tra forecast c√≥ data kh√¥ng
df_fc_check = df_lifecycle_final[df_lifecycle_final['IS_FORECAST'] == 1]
if df_fc_check.empty:
    print("\n‚ö†Ô∏è WARNING: Kh√¥ng c√≥ forecast rows!")
else:
    print(f"\n‚úÖ Forecast check:")
    print(f"   Forecast MOB range: {df_fc_check['MOB'].min()} ‚Üí {df_fc_check['MOB'].max()}")
    print(f"   Sample forecast EAD: {df_fc_check[BUCKETS_CANON].sum(axis=1).sum():,.0f}")

In [None]:
print("üî® Aggregating to product level...")

# ‚úÖ Ki·ªÉm tra tr∆∞·ªõc khi aggregate
print(f"\nBefore aggregate:")
print(f"   Total rows: {len(df_lifecycle_final):,}")
print(f"   Actual: {(df_lifecycle_final['IS_FORECAST']==0).sum():,}")
print(f"   Forecast: {(df_lifecycle_final['IS_FORECAST']==1).sum():,}")

# Aggregate to product
df_product = aggregate_to_product(df_lifecycle_final)

# ‚úÖ Ki·ªÉm tra sau aggregate
print(f"\nAfter aggregate to product:")
print(f"   Total rows: {len(df_product):,}")
if 'IS_FORECAST' in df_product.columns:
    print(f"   Actual: {(df_product['IS_FORECAST']==0).sum():,}")
    print(f"   Forecast: {(df_product['IS_FORECAST']==1).sum():,}")

# Aggregate to portfolio
df_portfolio = aggregate_products_to_portfolio(
    df_product,
    portfolio_name="PORTFOLIO_ALL"
)

# Combine
df_del_all = pd.concat([df_product, df_portfolio], ignore_index=True)

print(f"\n‚úÖ Aggregation complete:")
print(f"   Product-level: {len(df_product):,} rows")
print(f"   Portfolio-level: {len(df_portfolio):,} rows")
print(f"   Combined: {len(df_del_all):,} rows")

# ‚úÖ Final check
if 'IS_FORECAST' in df_del_all.columns:
    fc_count = (df_del_all['IS_FORECAST']==1).sum()
    if fc_count == 0:
        print("\n‚ö†Ô∏è WARNING: df_del_all kh√¥ng c√≥ forecast rows!")
        print("   C√≥ th·ªÉ do aggregate_to_product() kh√¥ng preserve IS_FORECAST")
    else:
        print(f"\n‚úÖ df_del_all c√≥ {fc_count:,} forecast rows")

---
## 6Ô∏è‚É£ ALLOCATE TO LOAN-LEVEL (MOB 12 & 24)

In [None]:
print("üî® Allocating forecast to loan-level...")
print("   Target MOBs: 12, 24")
print("   (C√≥ th·ªÉ m·∫•t 1-2 ph√∫t)\n")

df_loan_forecast = allocate_multi_mob_with_del_metrics(
    df_lifecycle_final=df_lifecycle_final,
    df_raw=df_raw,
    target_mobs=[12, 24],
    allocation_method="simple",
    include_del30=True,
    include_del60=False,
    include_del90=True,
)

print(f"\n‚úÖ Loan-level forecast: {len(df_loan_forecast):,} loans")

### üìã Chi ti·∫øt h·ª£p ƒë·ªìng (Loan Details)

K·∫øt qu·∫£ `df_loan_forecast` ƒë√£ c√≥ s·∫µn t·∫•t c·∫£ th√¥ng tin chi ti·∫øt h·ª£p ƒë·ªìng t·ª´ `df_raw`:
- AGREEMENT_ID, CUSTOMER_ID
- PRODUCT_TYPE, RISK_SCORE
- STATE_FORECAST_MOB12, STATE_FORECAST_MOB24
- DEL30_FLAG_MOB12, DEL90_FLAG_MOB12, ...
- V√† t·∫•t c·∫£ c√°c c·ªôt kh√°c t·ª´ df_raw

In [None]:
print("üìä Chi ti·∫øt h·ª£p ƒë·ªìng sau khi allocate:\n")

# Hi·ªÉn th·ªã c√°c c·ªôt c√≥ s·∫µn
print(f"1Ô∏è‚É£ T·ªïng s·ªë c·ªôt: {len(df_loan_forecast.columns)}")
print(f"\nüìã C√°c c·ªôt quan tr·ªçng:")
important_cols = [
    'AGREEMENT_ID',
    'CUSTOMER_ID', 
    'PRODUCT_TYPE',
    'RISK_SCORE',
    'STATE_FORECAST_MOB12',
    'STATE_FORECAST_MOB24',
    'DEL30_FLAG_MOB12',
    'DEL90_FLAG_MOB12',
    'DEL30_FLAG_MOB24',
    'DEL90_FLAG_MOB24'
]

# Ki·ªÉm tra c·ªôt n√†o c√≥ s·∫µn
available_cols = [col for col in important_cols if col in df_loan_forecast.columns]
for col in available_cols:
    print(f"   ‚úÖ {col}")

# Hi·ªÉn th·ªã 10 h·ª£p ƒë·ªìng ƒë·∫ßu ti√™n
print(f"\n2Ô∏è‚É£ Sample 10 h·ª£p ƒë·ªìng ƒë·∫ßu ti√™n:")
display_cols = [col for col in available_cols if col in df_loan_forecast.columns]
print(df_loan_forecast[display_cols].head(10))

In [None]:
# Ph√¢n t√≠ch chi ti·∫øt theo s·∫£n ph·∫©m
print("\n3Ô∏è‚É£ Ph√¢n t√≠ch theo s·∫£n ph·∫©m:\n")

# S·ªë l∆∞·ª£ng h·ª£p ƒë·ªìng theo s·∫£n ph·∫©m
product_count = df_loan_forecast.groupby('PRODUCT_TYPE').size().reset_index(name='Count')
print("üìä S·ªë l∆∞·ª£ng h·ª£p ƒë·ªìng theo s·∫£n ph·∫©m:")
print(product_count)

# DEL90 rate theo s·∫£n ph·∫©m t·∫°i MOB 12
if 'DEL90_FLAG_MOB12' in df_loan_forecast.columns:
    del90_by_product = df_loan_forecast.groupby('PRODUCT_TYPE').agg({
        'DEL90_FLAG_MOB12': ['sum', 'mean']
    }).reset_index()
    del90_by_product.columns = ['PRODUCT_TYPE', 'DEL90_Count', 'DEL90_Rate']
    del90_by_product['DEL90_Rate'] = del90_by_product['DEL90_Rate'] * 100
    
    print("\nüìä DEL90 @ MOB 12 theo s·∫£n ph·∫©m:")
    print(del90_by_product)

In [None]:
# L·ªçc h·ª£p ƒë·ªìng c√≥ r·ªßi ro cao (DEL90 @ MOB 12)
if 'DEL90_FLAG_MOB12' in df_loan_forecast.columns:
    print("\n4Ô∏è‚É£ H·ª£p ƒë·ªìng c√≥ r·ªßi ro cao (DEL90 @ MOB 12):\n")
    
    df_high_risk = df_loan_forecast[df_loan_forecast['DEL90_FLAG_MOB12'] == 1].copy()
    
    print(f"üìä T·ªïng s·ªë h·ª£p ƒë·ªìng DEL90: {len(df_high_risk):,} ({len(df_high_risk)/len(df_loan_forecast)*100:.2f}%)")
    
    # Top 10 h·ª£p ƒë·ªìng r·ªßi ro cao
    if len(df_high_risk) > 0:
        print("\nüìã Top 10 h·ª£p ƒë·ªìng r·ªßi ro cao:")
        risk_cols = ['AGREEMENT_ID', 'PRODUCT_TYPE', 'RISK_SCORE', 
                     'STATE_FORECAST_MOB12', 'DEL90_FLAG_MOB12']
        risk_cols = [col for col in risk_cols if col in df_high_risk.columns]
        print(df_high_risk[risk_cols].head(10))

In [None]:
# Xu·∫•t chi ti·∫øt h·ª£p ƒë·ªìng ra Excel (optional)
print("\n5Ô∏è‚É£ Xu·∫•t chi ti·∫øt h·ª£p ƒë·ªìng ra Excel (optional):\n")

# Uncomment ƒë·ªÉ xu·∫•t file
# output_file = f"outputs/Loan_Details_{timestamp}.xlsx"
# df_loan_forecast.to_excel(output_file, index=False)
# print(f"‚úÖ ƒê√£ xu·∫•t: {output_file}")

print("üí° Tip: Uncomment code tr√™n ƒë·ªÉ xu·∫•t chi ti·∫øt h·ª£p ƒë·ªìng ra Excel")
print("\nüìå L∆∞u √Ω: df_loan_forecast ƒë√£ c√≥ S·∫¥N t·∫•t c·∫£ th√¥ng tin t·ª´ df_raw")
print("   B·∫°n c√≥ th·ªÉ truy c·∫≠p tr·ª±c ti·∫øp m√† kh√¥ng c·∫ßn merge th√™m!")

---
## 6.5Ô∏è‚É£ ALLOCATION V2: Transition Matrix + Scaling (N√¢ng cao)

Ph∆∞∆°ng ph√°p allocation m·ªõi:
- D√πng **transition matrix** ƒë·ªÉ assign STATE_FORECAST (thay v√¨ random sampling)
- **Scaling** ƒë·ªÉ match v·ªõi lifecycle ƒë√£ calibrated
- C√≥ **backtest** ƒë·ªÉ ƒë√°nh gi√° ƒë·ªô ch√≠nh x√°c

In [None]:
# Import allocation_v2
from src.rollrate.allocation_v2 import (
    allocate_multi_mob_with_scaling,
    allocate_with_calibration_scaling,
    backtest_allocation,
    backtest_ead,
)

print("‚úÖ Import allocation_v2 th√†nh c√¥ng")

In [None]:
# L·∫•y snapshot m·ªõi nh·∫•t c·ªßa loans
latest_cutoff = df_raw['CUTOFF_DATE'].max()
df_loans_latest = df_raw[df_raw['CUTOFF_DATE'] == latest_cutoff].copy()

# Th√™m VINTAGE_DATE
df_loans_latest['VINTAGE_DATE'] = df_loans_latest[CFG['orig_date']].apply(lambda x: x.replace(day=1))

print(f"üìä Loans snapshot m·ªõi nh·∫•t:")
print(f"   CUTOFF_DATE: {latest_cutoff}")
print(f"   S·ªë loans: {len(df_loans_latest):,}")

In [None]:
print("üî® Allocation V2: Transition Matrix + Scaling...")
print("   Target MOBs: 12, 24")
print("   (C√≥ th·ªÉ m·∫•t 2-3 ph√∫t)\n")

df_loan_forecast_v2 = allocate_multi_mob_with_scaling(
    df_loans_latest=df_loans_latest,
    df_lifecycle_final=df_lifecycle_final,
    matrices_by_mob=matrices_by_mob,
    target_mobs=[12, 24],
    parent_fallback=parent_fallback,
    include_del30=True,
    include_del90=True,
    seed=42
)

print(f"\n‚úÖ Allocation V2 ho√†n t·∫•t: {len(df_loan_forecast_v2):,} loans")

In [None]:
# So s√°nh EAD raw vs scaled
print("üìä So s√°nh EAD raw vs scaled:\n")

for mob in [12, 24]:
    ead_raw = df_loan_forecast_v2[f'EAD_FORECAST_MOB{mob}'].sum()
    ead_scaled = df_loan_forecast_v2[f'EAD_SCALED_MOB{mob}'].sum()
    
    print(f"MOB {mob}:")
    print(f"   EAD raw: {ead_raw:,.0f}")
    print(f"   EAD scaled: {ead_scaled:,.0f}")
    print(f"   Diff: {(ead_scaled/ead_raw - 1)*100:+.2f}%\n")

In [None]:
# So s√°nh DEL90 gi·ªØa 2 ph∆∞∆°ng ph√°p
print("üìä So s√°nh DEL90 gi·ªØa 2 ph∆∞∆°ng ph√°p:\n")

for mob in [12, 24]:
    del90_v1 = df_loan_forecast[f'DEL90_FLAG_MOB{mob}'].mean() * 100
    del90_v2 = df_loan_forecast_v2[f'DEL90_FLAG_MOB{mob}'].mean() * 100
    
    print(f"MOB {mob}:")
    print(f"   V1 (Random sampling): {del90_v1:.2f}%")
    print(f"   V2 (Transition matrix): {del90_v2:.2f}%")
    print(f"   Diff: {del90_v2 - del90_v1:+.2f}%\n")

In [None]:
# Hi·ªÉn th·ªã sample k·∫øt qu·∫£ V2
print("üìã Sample k·∫øt qu·∫£ Allocation V2:\n")

display_cols_v2 = [
    'AGREEMENT_ID', 'PRODUCT_TYPE', 'RISK_SCORE',
    'STATE_CURRENT', 'MOB_CURRENT', 'EAD_CURRENT',
    'STATE_FORECAST_MOB12', 'EAD_SCALED_MOB12', 'DEL90_FLAG_MOB12',
    'STATE_FORECAST_MOB24', 'EAD_SCALED_MOB24', 'DEL90_FLAG_MOB24'
]

display_cols_v2 = [c for c in display_cols_v2 if c in df_loan_forecast_v2.columns]
print(df_loan_forecast_v2[display_cols_v2].head(10))

---
## 7Ô∏è‚É£ ANALYSIS & VISUALIZATION

In [None]:
# DEL90 migration
print("üìä DEL90 Migration Analysis (MOB 12 ‚Üí 24):\n")

df_migration = compare_del_across_mobs(
    df_multi_mob=df_loan_forecast,
    target_mobs=[12, 24],
    metric="DEL90"
)

In [None]:
# Pivot table: DEL90% by Product √ó MOB
df_pivot_del90 = pivot_del_by_product_mob(
    df_multi_mob=df_loan_forecast,
    target_mobs=[12, 24],
    metric="DEL90"
)

print("\nüìä DEL90% by Product √ó MOB:")
print(df_pivot_del90)

# Visualize
if not df_pivot_del90.empty:
    df_pivot_del90.plot(kind="bar", figsize=(10, 6))
    plt.title("DEL90% by Product (MOB 12 vs MOB 24)")
    plt.xlabel("Product")
    plt.ylabel("DEL90%")
    plt.legend(title="MOB")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

---
## 8Ô∏è‚É£ EXPORT REPORTS

In [None]:
print("üíæ Exporting reports...\n")

# Create output folder
from pathlib import Path
output_dir = Path("outputs")
output_dir.mkdir(exist_ok=True)

# Timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

print("1. Lifecycle (cohort-level)...")

# Actual info for heatmap
actual_info = {}
for (prod, score, vintage), mob_dict in actual_results.items():
    if mob_dict:
        max_mob_actual = max(mob_dict.keys())
        actual_info[(prod, vintage)] = max_mob_actual

# Extend with portfolio
actual_info_all = extend_actual_info_with_portfolio(
    actual_info,
    portfolio_name="PORTFOLIO_ALL"
)

# Export lifecycle
lifecycle_file = output_dir / f"Lifecycle_All_Products_{timestamp}.xlsx"
export_lifecycle_all_products_one_file(
    df_del_prod=df_del_all,
    actual_info=actual_info_all,
    filename=str(lifecycle_file)
)

print(f"   ‚úÖ {lifecycle_file}")

In [None]:
print("\n2. Loan-level forecast (MOB 12 & 24)...")

loan_file = output_dir / f"Loan_Forecast_MOB12_24_{timestamp}.xlsx"
export_multi_mob_to_excel(
    df_multi_mob=df_loan_forecast,
    filename=str(loan_file),
    target_mobs=[12, 24]
)

print(f"   ‚úÖ {loan_file}")

In [None]:
print("\n3. Calibration k values...")

# Export k values
k_file = output_dir / f"Calibration_k_values_{timestamp}.xlsx"

with pd.ExcelWriter(k_file, engine="xlsxwriter") as writer:
    # k_raw, k_smooth, k_final
    df_k = pd.DataFrame({
        "MOB": sorted(set(k_raw_by_mob) | set(k_smooth_by_mob) | set(k_final_by_mob)),
    })
    df_k["k_raw"] = df_k["MOB"].map(k_raw_by_mob)
    df_k["k_smooth"] = df_k["MOB"].map(k_smooth_by_mob)
    df_k["k_final"] = df_k["MOB"].map(k_final_by_mob)
    df_k["alpha"] = alpha
    
    df_k.to_excel(writer, sheet_name="k_values", index=False)
    
    # k_raw detail (if available)
    if not k_raw_df.empty:
        k_raw_df.to_excel(writer, sheet_name="k_raw_detail", index=False)

print(f"   ‚úÖ {k_file}")

---
## ‚úÖ SUMMARY

In [None]:
print("="*60)
print("üìä WORKFLOW SUMMARY")
print("="*60)

print(f"\n1Ô∏è‚É£ Data:")
print(f"   Loans: {df_raw[CFG['loan']].nunique():,}")
print(f"   Products: {df_raw['PRODUCT_TYPE'].nunique()}")
print(f"   Date range: {df_raw[CFG['cutoff']].min()} ‚Üí {df_raw[CFG['cutoff']].max()}")

print(f"\n2Ô∏è‚É£ Transition Matrices:")
print(f"   Products: {len(matrices_by_mob)}")
print(f"   Total matrices: {sum(len(mob_dict) for mob_dict in matrices_by_mob.values())}")

print(f"\n3Ô∏è‚É£ Lifecycle:")
print(f"   Total rows: {len(df_lifecycle_final):,}")
print(f"   Actual: {(df_lifecycle_final['IS_FORECAST']==0).sum():,}")
print(f"   Forecast: {(df_lifecycle_final['IS_FORECAST']==1).sum():,}")
print(f"   MOB range: {df_lifecycle_final['MOB'].min()} ‚Üí {df_lifecycle_final['MOB'].max()}")

print(f"\n4Ô∏è‚É£ Calibration:")
print(f"   Alpha: {alpha:.4f}")
print(f"   k_final: {len(k_final_by_mob)} MOBs")
print(f"   k range: {min(k_final_by_mob.values()):.4f} ‚Üí {max(k_final_by_mob.values()):.4f}")

print(f"\n5Ô∏è‚É£ Loan-level Forecast:")
print(f"   Total loans: {len(df_loan_forecast):,}")
print(f"   DEL90 @ MOB 12: {df_loan_forecast['DEL90_FLAG_MOB12'].sum():,} ({df_loan_forecast['DEL90_FLAG_MOB12'].mean()*100:.2f}%)")
print(f"   DEL90 @ MOB 24: {df_loan_forecast['DEL90_FLAG_MOB24'].sum():,} ({df_loan_forecast['DEL90_FLAG_MOB24'].mean()*100:.2f}%)")

print(f"\n6Ô∏è‚É£ Outputs:")
print(f"   üìÅ {output_dir}/")
print(f"      - Lifecycle_All_Products_{timestamp}.xlsx")
print(f"      - Loan_Forecast_MOB12_24_{timestamp}.xlsx")
print(f"      - Calibration_k_values_{timestamp}.xlsx")

print("\n" + "="*60)
print("‚úÖ WORKFLOW HO√ÄN T·∫§T")
print("="*60)