# Import

In [3]:
import os
import json
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# import tempfile, io
from datetime import datetime

import sklearn.metrics as metrics
# from lightgbm import LGBMClassifier
import lightgbm as lgb
import shap

import gradio as gr
# from tqdm import tqdm

# ModelPredictor

In [4]:
def plot_roc_curve(y_true, y_score, figsize=(5,5)):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score)
    roc_auc = metrics.auc(fpr, tpr)
    plt.figure(figsize=figsize)
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot(fpr, tpr, color='darkorange', label=f"ROC Curve (AUC = {roc_auc:.3f})")
    ax.plot([0,1], [0,1], color='navy', linestyle='--')
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")
    ax.set_title("ROC Curve")
    ax.legend(loc="lower right")
    return fig


def plot_confusion_matrix(y_true, y_pred, figsize=(6, 5)):
    fig, ax = plt.subplots(figsize=figsize)
    cm = metrics.confusion_matrix(y_true, y_pred)
    disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Not Default', 'Default'])
    disp.plot(cmap=plt.cm.Greens, ax=ax, values_format='d')
    
    cm_normalized = cm.astype('float') / np.full((2, 2), cm.sum())
    # Overlay percentages as annotations
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            percentage = cm_normalized[i, j] * 100
            ax.text(j, i + 0.2, f'{percentage:.1f}%', ha='center', va='center', color='red', fontsize=10)
    ax.set_title('Confusion Matrix')
    return fig

def get_classification_report(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    return pd.DataFrame([
        ("n_sample", y_true.shape[0]),
        ("n_default", y_true.sum()),
        ("percent_default", round(y_true.mean() * 100, 2)),
        ("Accuracy", round(metrics.accuracy_score(y_true, y_pred), 2)),
        ("Precision", round(metrics.precision_score(y_true, y_pred), 2)),
        ("Recall", round(metrics.recall_score(y_true, y_pred), 2)),
        ("F1-score", round(metrics.f1_score(y_true, y_pred), 2))
    ], columns=["Metric", "Value"])

In [81]:
class ModelPredictor:
    def __init__(self, model_dir="models/"):
        self.model_dir = Path(model_dir)
        self.models = self.load_models()
        self.config = json.load(open(self.model_dir / "model_config.json"))

    # -----------------------------------------
    def load_models(self):
        return [lgb.Booster(model_file=self.model_dir / f"model_{i}.booster") for i in range(5)]

    # -----------------------------------------
    def preprocess_data(self, df: pd.DataFrame):
        df = df.copy()
        cat_features = self.config["cat_features"]
        df[cat_features] = df[cat_features].astype(str).astype("category")
        return df

    # -----------------------------------------
    def run_prediction(self, df: pd.DataFrame):
        df = self.preprocess_data(df)
        final_score = np.mean([model.predict(df[self.config["features"]]) for model in self.models], axis=0)
        final_pred = (final_score >= self.config["threshold"]).astype(int)
        return final_score, final_pred

    # -----------------------------------------
    def plot_shap_feature_importance(self, df, figsize=(5, 8), model_index=0, max_n_sample=1000):
        df = self.preprocess_data(df)
        X = df[self.config["features"]].head(max_n_sample)
        explainer = shap.TreeExplainer(self.models[model_index])
        shap_values = explainer.shap_values(X)
        fig = plt.figure()  # figsize=figsize
        shap.summary_plot(shap_values, X, plot_type='dot', show=False)
        return fig

    # -----------------------------------------
    def plot_shap_waterfall(self, df, model_index=0):
        df = self.preprocess_data(df)
        X = df[self.config["features"]]
        explainer = shap.TreeExplainer(self.models[model_index])
        shap_values = explainer(X)
        fig, ax = plt.subplots()
        shap.plots.waterfall(shap_values[0], show=False)
        plt.tight_layout()
        return fig

    # -----------------------------------------
    def run_evaluation(self, df):
        y_score, y_pred = self.run_prediction(df)
        y_true = df[self.config["target"]]
        roc = plot_roc_curve(y_true, y_score, figsize=(5,5))
        cm = plot_confusion_matrix(y_true, y_pred, figsize=(5, 4))
        cr = get_classification_report(y_true, y_pred)
        fi = self.plot_shap_feature_importance(df, figsize=(5, 8))
        return roc, cm, cr, fi

# UI functions

In [71]:
# -------------------------
# Login
# -------------------------
USERS = {
    "officer": {"pwd": "123", "role": "Loan Officer"},
    "risk": {"pwd": "123", "role": "Risk Manager"},
    "ds": {"pwd": "123", "role": "Data Scientist"},
    "user": {"pwd": "123", "role": "End User"}
}

def login(username, password):
    user = USERS.get(username)
    if user and user["pwd"] == password:
        return f"‚úÖ ƒêƒÉng nh·∫≠p th√†nh c√¥ng ({user['role']})", user["role"], gr.update(visible=False)
    return "‚ùå Sai username ho·∫∑c password", None, gr.update(visible=True)

def show_tabs(role):
    return (
        gr.update(visible=(role == "Loan Officer")),
        gr.update(visible=(role == "Risk Manager")),
        gr.update(visible=(role == "Data Scientist")),
        gr.update(visible=(role == "End User")),
    )


# -------------------------
# Mock scoring / utilities
# -------------------------

def _score_row(age, income, loan_amt, num_loans, dti):
    z = 0.42 * (loan_amt / 100_000) + 0.22 * (num_loans / 5) + 0.28 * (dti / 60) \
        - 0.12 * (income / 200_000) - 0.06 * (age / 70)
    score = float(np.clip(0.5 + z, 0, 1))
    return score


def risk_overview(_):
    # Pie distribution, trend line, histogram
    # Pie
    categories = ["Low", "Medium", "High"]
    values = [58, 30, 12]

    fig1, ax1 = plt.subplots(figsize=(3.6, 2.6))
    wedges, texts, autotexts = ax1.pie(values, labels=categories, autopct='%1.1f%%', startangle=90, textprops={'fontsize':9})
    ax1.set_title("Ph√¢n b·ªë r·ªßi ro danh m·ª•c")

    # Trend
    months = pd.date_range(end=datetime.now(), periods=8, freq='M').strftime("%b %Y")
    trend = np.round(np.linspace(1.8, 3.6, len(months)) + np.random.rand(len(months))*0.2,2)
    fig2, ax2 = plt.subplots(figsize=(5.0, 2.6))
    ax2.plot(months, trend, marker='o', linewidth=2)
    ax2.set_title("T·ª∑ l·ªá v·ª° n·ª£ theo th√°ng")
    ax2.set_ylim(0, max(trend)*1.3)
    ax2.set_ylabel("%")

    # Histogram of scores
    scores = np.clip(np.random.beta(2,5,1000), 0, 1)
    fig3, ax3 = plt.subplots(figsize=(3.6, 2.6))
    ax3.hist(scores, bins=20)
    ax3.set_title("Ph√¢n b·ªë Risk Score")
    ax3.set_xlabel("Risk Score")

    return fig1, fig2, fig3

# UI Theme

In [7]:
# -------------------------
# CSS & Theme
# -------------------------
# Light, modern palette with teal accents and warm accent color
CSS = """
:root{
  --bg:#f7fbfc;
  --card:#ffffff;
  --muted:#6b7280;
  --accent:#0e9aa7;    /* teal */
  --accent-2:#ff8a4c;  /* warm orange */
  --panel-border: rgba(16,24,40,0.06);
}
.gradio-container { background: var(--bg); color: #0f172a; font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; padding: 16px; }
/* Card style for blocks */
.gr-box, .gr-panel, .gr-form, .gr-group { background: var(--card) !important; border: 1px solid var(--panel-border) !important; border-radius: 12px !important; padding: 14px !important; box-shadow: 0 6px 18px rgba(16,24,40,0.04); }
/* Headings */
.prose h1, .prose h2 { color: #0f172a !important; }
.prose p, .prose small { color: var(--muted) !important; }
/* Buttons */
button.primary { background: var(--accent) !important; color: white !important; border-radius: 10px !important; padding: 8px 12px !important; box-shadow: 0 6px 12px rgba(14,154,167,0.18); }
button.secondary { background: var(--accent-2) !important; color: white !important; border-radius: 10px !important; padding: 8px 12px !important; }
/* Inputs */
input, textarea, select { border-radius: 8px !important; border: 1px solid rgba(15,23,42,0.06) !important; padding: 8px !important; }
/* Small labels */
.label-wrap .label { color: var(--muted) !important; font-weight:600; }
/* Tables / Dataframe */
.dataframe thead th { background: transparent !important; color: #0f172a !important; font-weight:700; }
.dataframe tbody tr:nth-child(even) { background: #fbfcfd !important; }
/* Tabs */
.tabs { background: transparent !important; padding-bottom: 0; }
.tabitem.selected { border-bottom: 3px solid var(--accent) !important; }
"""

# New functions

In [8]:
# -----------------------------------------
def loan_officer_lookup(phone_number, data_df):
    if data_df is None:
        return "Ch∆∞a c√≥ dataset", None, None

    df = data_df.copy()
    if "phone" not in df.columns:
        return "Dataset kh√¥ng c√≥ c·ªôt phone", None, None

    row = df[df["phone"] == phone_number]
    if row.empty:
        return "Kh√¥ng t√¨m th·∫•y h·ªì s∆°", None, None

    info = row.to_dict(orient="records")[0]
    explain = f"Gi·∫£ l·∫≠p SHAP values cho h·ªì s∆° {phone_number}"
    risk = info.get("score", "N/A")

    return info, risk, explain


# -----------------------------------------
def risk_manager_stats(data_df):
    if data_df is None:
        return None

    if "score" not in data_df.columns:
        return None

    fig = px.histogram(data_df, x="score", nbins=20, title="Distribution of Risk Scores")
    return fig


# -----------------------------------------
def analyst_metrics(data_df):
    if data_df is None:
        return None, None, None

    if "label" not in data_df.columns:
        return None, None, None

    y_true = data_df["label"]
    y_score = data_df["score"]
    y_pred = (y_score >= 0.176).astype(int)

    acc = accuracy_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_score)
    cm = confusion_matrix(y_true, y_pred)

    fig_auc = px.line(x=[0, 1], y=[0, auc], title=f"AUC: {auc:.3f}")

    fig_cm = px.imshow(cm,
                       text_auto=True,
                       title="Confusion Matrix",
                       labels=dict(x="Predicted", y="Actual"))

    return acc, fig_auc, fig_cm

# Main

In [82]:
predictor = ModelPredictor(model_dir="../models/v2025-11-27")
global_data_df = None

In [87]:
# -------------------------
# Build the Gradio app
# -------------------------
with gr.Blocks(title="Loan Default Prediction", css=CSS) as app:
    
    # Header
    with gr.Row(elem_id="header-row"):
        gr.Markdown(
            """
            # Loan Default Prediction
            """)
        with gr.Column(min_width=180):
            gr.Markdown("**Status:** Demo ‚Ä¢ no production data")
            gr.Button("T·∫£i h∆∞·ªõng d·∫´n (PDF)", variant="secondary")

    
    # =========== LOGIN BOX ============
    gr.Markdown("## üîê ƒêƒÉng nh·∫≠p h·ªá th·ªëng")
    login_box = gr.Group(visible=True)
    with login_box:
        username = gr.Textbox(label="Username")
        password = gr.Textbox(label="Password", type="password")
        login_btn = gr.Button("ƒêƒÉng nh·∫≠p")
    
    login_status = gr.Markdown(label="Tr·∫°ng th√°i")
    role_state = gr.State()
    
    gr.Markdown("---")

    with gr.Tabs():
        # -----------------------
        # Loan Officer Tab
        # -----------------------
        with gr.TabItem("Loan Officer", visible=False) as tab_officer:
            with gr.Row():
                # Left column: search + quick card
                with gr.Column():
                    gr.Markdown("#### T√¨m h·ªì s∆° / Tra c·ª©u nhanh")
                    upload_eval_btn = gr.File(label="Upload danh s√°ch h·ªì s∆°", file_types=[".csv"])
                    profile = gr.Dropdown(choices=[], label="Ch·ªçn LoanID", value=None)
                    profile_info = gr.DataFrame(label="Th√¥ng tin h·ªì s∆°", interactive=False)
                    df_state = gr.State()  # Save df into a Gradio state variable
                    
                    with gr.Row():
                        decision = gr.Radio(choices=["Ph√™ duy·ªát", "Ph√™ duy·ªát c√≥ ƒëi·ªÅu ki·ªán", "T·ª´ ch·ªëi"], label="Quy·∫øt ƒë·ªãnh")
                    
                    decision_feedback = gr.Markdown("*(Ch∆∞a c√≥ quy·∫øt ƒë·ªãnh ƒë∆∞·ª£c ch·ªçn)*")
                    
                    def load_csv(file):
                        if file is None:
                            return gr.Dropdown.update(choices=[]), None
                        df = pd.read_csv(file.name).head(1000)  # limit 1000 for demo
                        loan_ids = df["LoanID"].dropna().astype(str).unique().tolist()
                        return gr.update(choices=loan_ids, value=None), df

                    upload_eval_btn.upload(fn=load_csv, inputs=upload_eval_btn, outputs=[profile, df_state])
                    
                    def on_decision_change(decision_choice):
                        return f"**ƒê√£ l∆∞u: {decision_choice}**"
                    
                    decision.change(fn=on_decision_change, inputs=[decision], outputs=[decision_feedback])

                # Right column: score + reasons
                with gr.Column():
                    gr.Markdown("#### K·∫øt qu·∫£ d·ª± ƒëo√°n")
                    with gr.Row():
                        score_box = gr.Textbox(label="X√°c su·∫•t Default (0,1)", interactive=False)
                        decision_box = gr.Textbox(label="G·ª£i √Ω quy·∫øt ƒë·ªãnh", interactive=False)
                    expl_plot = gr.Plot(label="Y·∫øu t·ªë ·∫£nh h∆∞·ªüng")
                    
                    def lookup_fn(loan_id, df):
                        if df is None or loan_id is None:
                            return [None] * 4
                        res = df.query(f"LoanID == '{loan_id}'")
                        if res.empty:
                            return [None] * 4
                        score, pred = predictor.run_prediction(res)
                        score = round(score[0], 2)
                        pred = "‚úÖ Ph√™ duy·ªát" if pred[0] == 0 else "üõë T·ª´ ch·ªëi"
                        info = pd.DataFrame(
                            list(res.drop(columns=['Default'], errors='ignore').iloc[0].to_dict().items()), 
                            columns=["Column", "Value"]
                        )
                        shap_plot = predictor.plot_shap_waterfall(res)
                        return info, score, pred, shap_plot
                    
                    profile.change(
                        fn=lookup_fn,
                        inputs=[profile, df_state],
                        outputs=[profile_info, score_box, decision_box, expl_plot]
                    )

                    gr.Markdown("**Ghi ch√∫ c·ªßa nh√¢n vi√™n**")
                    officer_note = gr.Textbox(label="Ghi ch√∫ (t√πy ch·ªçn)", placeholder="Ghi ch√∫ cho h·ªì s∆°")
                    save_note_btn = gr.Button("L∆∞u ghi ch√∫", variant="secondary")
                    # status_note = gr.Textbox(label="Tr·∫°ng th√°i ghi ch√∫", interactive=False)
                    status_note = gr.Markdown("")

                    def _save_note(note):
                        return "ƒê√£ l∆∞u." if note else "Ghi ch√∫ r·ªóng."
                    # save_note_btn.click(fn=_save_note, inputs=officer_note, outputs=status_note)
                    save_note_btn.click(fn=_save_note, inputs=officer_note, outputs=status_note)


        # -----------------------
        # Risk Manager Tab
        # -----------------------
        with gr.TabItem("C-level /  Risk Manager", visible=False) as tab_risk:
            gr.Markdown("#### Risk Overview ‚Ä¢ Th·ªëng k√™ & C·∫£nh b√°o")
            # Controls
            with gr.Row():
                with gr.Column():
                    gr.Markdown("**B·ªô l·ªçc nhanh**")
                    date_range = gr.Slider(1, 24, value=12, label="Xem trong (th√°ng)", info="Ch·ªçn kho·∫£ng th·ªùi gian ƒë·ªÉ xem xu h∆∞·ªõng")
                    seg_select = gr.Dropdown(choices=["To√†n b·ªô", "Khu v·ª±c A", "Khu v·ª±c B", "Ad Source X"], value="To√†n b·ªô", label="Ph√¢n ƒëo·∫°n")
                    refresh_btn = gr.Button("T·∫£i l·∫°i th·ªëng k√™", variant="primary")
                with gr.Column():
                    key_kpis = gr.Markdown("**KPIs)**\n\n- Portfolio Size: **12,540**\n- Current Default Rate: **2.9%**\n- Avg Risk Score: **0.41**")
            # Charts
            with gr.Row():
                pie_plot = gr.Plot()
                trend_plot = gr.Plot()
                hist_plot = gr.Plot()

            def _refresh_kpi(_dr, seg):
                f1, f2, f3 = risk_overview(None)
                return f1, f2, f3
            refresh_btn.click(fn=_refresh_kpi, inputs=[date_range, seg_select], outputs=[pie_plot, trend_plot, hist_plot])

            gr.Markdown("---")
            gr.Markdown("**Stress Test / What-if**")
            with gr.Row():
                shock_slider = gr.Slider(0.0, 0.5, value=0.10, step=0.01, label="Shock l√™n default rate (+%)")
                run_shock = gr.Button("Ch·∫°y stress test", variant="secondary")
                shock_output = gr.Textbox(label="K·∫øt qu·∫£", interactive=False)
            def _do_shock(shock):
                base = 2.9
                projected = round(base * (1 + shock), 2)
                return f"T·ª∑ l·ªá v·ª° n·ª£ d·ª± ph√≥ng: {projected}%"
            run_shock.click(fn=_do_shock, inputs=shock_slider, outputs=shock_output)

        # -----------------------
        # Business Analyst / Data Scientist Tab
        # -----------------------
        with gr.TabItem("BA / Data Analyst / Data Scientist", visible=False) as tab_ds:
            gr.Markdown("#### Gi√°m s√°t m√¥ h√¨nh & ph√¢n t√≠ch ƒë·∫∑c tr∆∞ng")
            with gr.Row():
                with gr.Column():
                    upload_eval_btn = gr.File(label="Upload dataset", file_types=[".csv"])
                    roc_plot = gr.Plot(label="ROC Curve")
                    cm_plot = gr.Plot(label="Confusion Matrix")
                with gr.Column():
                    metric_table = gr.Dataframe(label="Metrics")
                    fi_plot = gr.Plot(label="Feature Importance")
            
            def _run_model_eval(file):
                if file is None:
                    return None, None, None, None
                df = pd.read_csv(file.name)
                return predictor.run_evaluation(df)
            
            upload_eval_btn.upload(
                fn=_run_model_eval,
                inputs=upload_eval_btn,
                outputs=[roc_plot, cm_plot, metric_table, fi_plot]
            )

            gr.Markdown("---")
            gr.Markdown("**Notes / Observability**")
            drift_note = gr.Textbox(label="Drift warning", interactive=False, value="No significant feature drift detected in the last 30 days.")
            explain_note = gr.Textbox(label="Explainability tip", interactive=False, value="Use SHAP summary for cohort-level insights; check model behavior on low-income segment.")

        # -----------------------
        # End Users
        # -----------------------
        with gr.Tab("Ng∆∞·ªùi d√πng cu·ªëi ‚Äî Tra c·ª©u k·∫øt qu·∫£", visible=False) as tab_user:
            gr.Markdown("### üîç Tra c·ª©u k·∫øt qu·∫£ vay v·ªën")
    
            with gr.Group():
                phone = gr.Textbox(label="S·ªë ƒëi·ªán tho·∫°i", placeholder="Nh·∫≠p s·ªë ƒëi·ªán tho·∫°i ƒë√£ ƒëƒÉng k√Ω", max_lines=1)
                send_otp_btn = gr.Button("G·ª≠i m√£ OTP", variant="secondary")
                otp = gr.Textbox(label="Nh·∫≠p OTP", placeholder="Nh·∫≠p m√£ x√°c nh·∫≠n g·ªìm 6 ch·ªØ s·ªë")
                verify_btn = gr.Button("X√°c th·ª±c & Tra c·ª©u", variant="primary")
    
            result_status = gr.Textbox(label="Tr·∫°ng th√°i tra c·ª©u", interactive=False)
            user_info = gr.Dataframe(
                label="K·∫øt qu·∫£ tra c·ª©u", interactive=False, wrap=True, 
                column_widths=["200px", "200px", "200px", "auto"],   # 3 c·ªôt ƒë·∫ßu 200px, c·ªôt 4 auto
            )
    
            # --- Mock OTP backend ---
            import random
            otp_state = gr.State(value="")
    
            def _send_otp(phone):
                if not phone:
                    return "‚ö†Ô∏è Vui l√≤ng nh·∫≠p s·ªë ƒëi·ªán tho·∫°i.", ""
                otp_code = f"{random.randint(100000, 999999)}"
                # (·ªü m√¥i tr∆∞·ªùng th·∫≠t: g·ª≠i OTP qua SMS)
                return f"‚úÖ OTP ƒë√£ g·ª≠i ƒë·∫øn {phone} (OTP: {otp_code})", otp_code
    
            send_otp_btn.click(fn=_send_otp, inputs=[phone], outputs=[result_status, otp_state])
    
            df_processed_user = pd.read_csv("../data/test_processed_user_sample.csv")
            df_processed_user["phone"] = df_processed_user["phone"].astype(str).str.zfill(10)
            def _verify_otp(otp_input, otp_expected, phone):
                if otp_input.strip() != otp_expected.strip():
                    return "‚ùå M√£ OTP kh√¥ng ƒë√∫ng ho·∫∑c ƒë√£ h·∫øt h·∫°n.", pd.DataFrame()
                df_res = df_processed_user.query(f"phone == '{phone}'")
                df_res = (
                    df_res
                    .assign(process_result=lambda dfx: dfx.Default.apply(lambda x: "üõë T·ª´ ch·ªëi" if x == 1 else "‚úÖ ƒê∆∞·ª£c ph√™ duy·ªát"))
                    .loc[:, ["name", "process_date", "process_result", "explains"]]
                    .rename(columns={
                        "name": "H·ªç t√™n",
                        "process_date": "Duy·ªát l√∫c",
                        "process_result": "K·∫øt qu·∫£",
                        "explains": "L√Ω do"
                    })
                )
                return "‚úÖ X√°c th·ª±c th√†nh c√¥ng!", df_res
    
            verify_btn.click(fn=_verify_otp, inputs=[otp, otp_state, phone], outputs=[result_status, user_info])

        
        # -----------------------
        # Login
        # -----------------------
        login_btn.click(login, [username, password], [login_status, role_state, login_box])
        role_state.change(show_tabs, [role_state], [tab_officer, tab_risk, tab_ds, tab_user])


    # Footer
    gr.Markdown("---")
    gr.Markdown("¬© Demo UI ‚Äî Loan Default Prediction ‚Ä¢ Designed for coursework / prototype")

In [88]:
app.launch()

* Running on local URL:  http://127.0.0.1:7886
* To create a public link, set `share=True` in `launch()`.




ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/Users/hieucao/anaconda3/envs/ml/lib/python3.13/site-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        self.scope, self.receive, self.send
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "/Users/hieucao/anaconda3/envs/ml/lib/python3.13/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/hieucao/anaconda3/envs/ml/lib/python3.13/site-packages/fastapi/applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "/Users/hieucao/anaconda3/envs/ml/lib/python3.13/site-packages/starlette/applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/

In [17]:
df_processed_user = pd.read_csv("../data/test_processed_user_sample.csv")
df_processed_user["phone"].astype(str).str.zfill(10)

0    0909123456
1    0909654321
Name: phone, dtype: object