In [1]:
import numpy as np
import pandas as pd
from datetime import date
from typing import List, Dict, Union
import json
import pickle

In [9]:
with open('best_model_dt.pkl', 'rb') as file_1:
    best_model_dt = pickle.load(file_1)

In [13]:
def categorize_age(age):
    if age <= 25:
        return "young"
    elif age <= 35:
        return "adult"
    elif age <= 50:
        return "mature"
    else:
        return "senior"

In [16]:
def prepare_single_loan_inference(
    loan_id: str,
    customer_number: str,
    # Data dari loan_snapshots (mentah)
    principal_amount: float,
    outstanding_amount: float,
    dpd: int,
    # Data dari customers (mentah)
    marital_status: str,
    date_of_birth: str,  # Format 'YYYY-MM-DD'
    # Data dari bills (riwayat tagihan per loan - list of dicts)
    bills_data: List[Dict[str, Union[float, str]]],
    today: date = date.today()
) -> pd.DataFrame:
    """
    Melakukan FE untuk satu Loan, menerima riwayat Bills sebagai input mentah.

    Args:
        bills_data: List of dicts, misalnya:
            [{'amount': 1000, 'paid_amount': 1000, 'bill_paid_date': '2025-10-05', 'bill_scheduled_date': '2025-10-01'}, ...]

    Returns:
        DataFrame 1 baris yang sudah di-feature engineered, siap untuk model.
    """
    print(f"Memproses Loan: {loan_id}")

    # --- 1. FEATURE ENGINEERING - BILLS (Agregasi dari riwayat tagihan) ---
    bills_df = pd.DataFrame(bills_data)
    if bills_df.empty:
        # Isi dengan 0 jika tidak ada riwayat tagihan
        avg_bill_gap, late_ratio, paid_ratio = 0.0, 0.0, 0.0
    else:
        bills_df['bill_paid_date'] = pd.to_datetime(bills_df['bill_paid_date'], errors='coerce')
        bills_df['bill_scheduled_date'] = pd.to_datetime(bills_df['bill_scheduled_date'], errors='coerce')

        bills_df['bill_gap_days'] = (bills_df['bill_paid_date'] - bills_df['bill_scheduled_date']).dt.days
        bills_df['bill_gap_days'] = bills_df['bill_gap_days'].fillna(0)
        bills_df['is_bill_late'] = (bills_df['bill_gap_days'] > 0).astype(int)

        avg_bill_gap = bills_df['bill_gap_days'].mean()
        late_ratio = bills_df['is_bill_late'].mean()
        
        total_amount = bills_df['amount'].sum()
        total_paid = bills_df['paid_amount'].sum()
        paid_ratio = total_paid / total_amount if total_amount > 0 else 0.0
        
    # --- 2. FEATURE ENGINEERING - LOAN SNAPSHOT ---
    outstanding_ratio = outstanding_amount / principal_amount if principal_amount > 0 else 0.0
    is_delinquent = 1 if dpd >= 30 else 0

    # --- 3. FEATURE ENGINEERING - CUSTOMERS (Hitung Age, tapi tidak digunakan di final model) ---
    # Ini hanya untuk kepatuhan FE notebook, tapi akan didrop nanti
    dob = pd.to_datetime(date_of_birth, errors="coerce")
    age = (pd.Timestamp(today) - dob).days // 365 if pd.notna(dob) else 0
    age = categorize_age(age)

    # --- 4. PEMBENTUKAN FINAL DATAFRAME (Level Loan) ---
    # Kita tidak bisa memproses Tasks di sini karena butuh semua Task dari semua Loan
    data_input = {
        'customer_number': [customer_number],
        'principal_amount': [principal_amount],
        'outstanding_amount': [outstanding_amount],
        'outstanding_ratio': [outstanding_ratio],
        'dpd': [dpd],
        'is_delinquent': [is_delinquent],
        'avg_bill_gap': [avg_bill_gap],
        'late_ratio': [late_ratio],
        'paid_ratio': [paid_ratio],
        'marital_status': [marital_status],
        'age_group': [age] # Akan didrop
    }
    
    df_features = pd.DataFrame(data_input)
    
    # --- 5. FEATURE SELECTION (sesuai customer_final_banget yang didrop) ---
    # Karena ini single loan, kita asumsikan agregasi ke customer level (max/mean)
    # sudah terwakili oleh nilai loan tunggal ini atau kita asumsikan 
    # hanya fitur yang TIDAK DIDROP yang kita butuhkan.
    final_cols = [
        "principal_amount", "outstanding_amount", "outstanding_ratio",
        "dpd", "is_delinquent", "avg_bill_gap", "late_ratio",
        "paid_ratio", "marital_status", 'age_group'
    ]
    
    return df_features[['customer_number'] + final_cols].fillna(0).drop(['dpd','customer_number', 'is_delinquent'], axis=1)



# Data Mentah untuk 1 Loan:
input_loan_data = {
    "loan_id": "L999",
    "customer_number": "CUST_9999",
    "principal_amount": 10000000.0,
    "outstanding_amount": 7500000.0,
    "dpd": 35,
    "marital_status": "MARRIED",
    "date_of_birth": "1990-01-01",
    # Riwayat Bills (mentah)
    "bills_data": [
        {'amount': 1000000, 'paid_amount': 1000000, 'bill_paid_date': '2025-10-05', 'bill_scheduled_date': '2025-10-01'}, # 4 hari telat
        {'amount': 1000000, 'paid_amount': 1000000, 'bill_paid_date': '2025-11-01', 'bill_scheduled_date': '2025-11-01'}, # Tepat waktu
        {'amount': 1000000, 'paid_amount': 900000, 'bill_paid_date': '2025-12-05', 'bill_scheduled_date': '2025-12-01'},  # 4 hari telat (kurang bayar)
    ]
}

In [17]:
df_features_single = prepare_single_loan_inference(**input_loan_data)

Memproses Loan: L999


In [18]:
df_features_single

Unnamed: 0,principal_amount,outstanding_amount,outstanding_ratio,avg_bill_gap,late_ratio,paid_ratio,marital_status,age_group
0,10000000.0,7500000.0,0.75,2.666667,0.666667,0.966667,MARRIED,adult


In [19]:
y_proba = best_model_dt.predict_proba(df_features_single)

In [20]:
probability_of_Default = y_proba[:, 1]

In [23]:
df_features_single['probabilitas_default'] = probability_of_Default
df_features_single

Unnamed: 0,principal_amount,outstanding_amount,outstanding_ratio,avg_bill_gap,late_ratio,paid_ratio,marital_status,age_group,probabilitas_beli,probabilitas_default
0,10000000.0,7500000.0,0.75,2.666667,0.666667,0.966667,MARRIED,adult,0.714057,0.714057
