# 01. Full Pipeline Debug / Gỡ lỗi Pipeline Đầy đủ

Notebook này chạy từng bước của pipeline để debug.

This notebook runs each step of the pipeline for debugging.

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
from IPython.display import display

In [None]:
# Import modules / Nhập các module
from config import CFG, SEGMENT_COLS, BUCKETS_CANON, BUCKETS_30P, ABSORBING_BASE, MAX_MOB, DENOM_LEVEL
from data_io import load_parquet, validate_schema
from transitions import prepare_transitions, estimate_transition_matrices
from forecast import build_initial_vectors, forecast
from metrics import compute_del_from_snapshot, compute_del_from_forecast, make_mixed_report
from export import export_to_excel

## 1. Load Data / Tải Dữ liệu

In [None]:
df = load_parquet('../Oct25.parquet')
print(f"Shape: {df.shape}")
display(df.head())

In [None]:
# Validate schema / Kiểm tra schema
validate_schema(df, CFG, SEGMENT_COLS, BUCKETS_CANON)
print("Schema validation passed!")

## 2. Prepare Transitions / Chuẩn bị Chuyển đổi

In [None]:
df_trans = prepare_transitions(df, CFG, SEGMENT_COLS, BUCKETS_CANON, ABSORBING_BASE)
print(f"Transitions shape: {df_trans.shape}")
display(df_trans.head(10))

## 3. Estimate Transition Matrices / Ước lượng Ma trận Chuyển đổi

In [None]:
segment_levels = [
    ("GLOBAL", []),
    ("COARSE", [SEGMENT_COLS[0]] if SEGMENT_COLS else []),
    ("FULL", SEGMENT_COLS),
]
prior_strengths = {"coarse": 100.0, "full": 50.0}

transitions_dict, transitions_long_df, meta_df = estimate_transition_matrices(
    df_trans, CFG, BUCKETS_CANON, segment_levels,
    max_mob=MAX_MOB, weight_mode="ead", min_count=30,
    prior_strengths=prior_strengths, tail_pool_start=18
)

print(f"Total matrices: {len(transitions_dict)}")
print(f"Transitions long shape: {transitions_long_df.shape}")

In [None]:
# Check GLOBAL matrix at MOB=0 / Kiểm tra ma trận GLOBAL tại MOB=0
P_global_0 = transitions_dict[('GLOBAL', '', 0)]
print("GLOBAL matrix at MOB=0:")
display(P_global_0)

## 4. Build Initial Vectors / Xây dựng Vector Khởi tạo

In [None]:
df_init, denom_map = build_initial_vectors(df, CFG, BUCKETS_CANON, SEGMENT_COLS, DENOM_LEVEL)
print(f"Initial vectors shape: {df_init.shape}")
display(df_init.head(10))

## 5. Forecast / Dự báo

In [None]:
forecast_df = forecast(df_init, transitions_dict, BUCKETS_CANON, MAX_MOB)
print(f"Forecast shape: {forecast_df.shape}")
display(forecast_df.head(20))

## 6. Compute DEL Metrics / Tính toán Chỉ số DEL

In [None]:
actual_del_long, _ = compute_del_from_snapshot(
    df, CFG, BUCKETS_30P, SEGMENT_COLS, MAX_MOB, DENOM_LEVEL
)
pred_del_long = compute_del_from_forecast(forecast_df, BUCKETS_30P, denom_map)

print(f"Actual DEL shape: {actual_del_long.shape}")
print(f"Pred DEL shape: {pred_del_long.shape}")

## 7. Mixed Report / Báo cáo Hỗn hợp

In [None]:
mixed_wide, flags_wide, actual_wide, forecast_wide = make_mixed_report(
    actual_del_long, pred_del_long, MAX_MOB
)

print("Mixed Report:")
display(mixed_wide.head(10))

print("\nFlags:")
display(flags_wide.head(10))

## 8. Export / Xuất file

In [None]:
import os
os.makedirs('../out', exist_ok=True)

export_to_excel(
    '../out/debug_report.xlsx',
    transitions_long_df,
    mixed_wide,
    flags_wide,
    actual_wide,
    forecast_wide,
    forecast_df=forecast_df,
    meta_df=meta_df
)
print("Export complete!")