In [1]:
import numpy as np
import os
import pandas as pd
from datetime import date
from typing import List, Dict, Union
import json
import pickle
from google import genai
from google.genai.errors import APIError
from PIL import Image

In [2]:
with open('C:/Users/Farissthira/amara-ai/amara-ai/model/best_model_dt.pkl', 'rb') as file_1:
    best_model_dt = pickle.load(file_1)

In [3]:
def categorize_age(age):
    if age <= 25:
        return "young"
    elif age <= 35:
        return "adult"
    elif age <= 50:
        return "mature"
    else:
        return "senior"

In [4]:
def prepare_single_loan_inference(
    loan_id: str,
    customer_number: str,
    # Data dari loan_snapshots (mentah)
    principal_amount: float,
    outstanding_amount: float,
    dpd: int,
    # Data dari customers (mentah)
    marital_status: str,
    date_of_birth: str,  # Format 'YYYY-MM-DD'
    # Data dari bills (riwayat tagihan per loan - list of dicts)
    bills_data: List[Dict[str, Union[float, str]]],
    today: date = date.today()
) -> pd.DataFrame:
    """
    Melakukan FE untuk satu Loan, menerima riwayat Bills sebagai input mentah.

    Args:
        bills_data: List of dicts, misalnya:
            [{'amount': 1000, 'paid_amount': 1000, 'bill_paid_date': '2025-10-05', 'bill_scheduled_date': '2025-10-01'}, ...]

    Returns:
        DataFrame 1 baris yang sudah di-feature engineered, siap untuk model.
    """
    print(f"Memproses Loan: {loan_id}")

    # --- 1. FEATURE ENGINEERING - BILLS (Agregasi dari riwayat tagihan) ---
    bills_df = pd.DataFrame(bills_data)
    if bills_df.empty:
        # Isi dengan 0 jika tidak ada riwayat tagihan
        avg_bill_gap, late_ratio, paid_ratio = 0.0, 0.0, 0.0
    else:
        bills_df['bill_paid_date'] = pd.to_datetime(bills_df['bill_paid_date'], errors='coerce')
        bills_df['bill_scheduled_date'] = pd.to_datetime(bills_df['bill_scheduled_date'], errors='coerce')

        bills_df['bill_gap_days'] = (bills_df['bill_paid_date'] - bills_df['bill_scheduled_date']).dt.days
        bills_df['bill_gap_days'] = bills_df['bill_gap_days'].fillna(0)
        bills_df['is_bill_late'] = (bills_df['bill_gap_days'] > 0).astype(int)

        avg_bill_gap = bills_df['bill_gap_days'].mean()
        late_ratio = bills_df['is_bill_late'].mean()
        
        total_amount = bills_df['amount'].sum()
        total_paid = bills_df['paid_amount'].sum()
        paid_ratio = total_paid / total_amount if total_amount > 0 else 0.0
        
    # --- 2. FEATURE ENGINEERING - LOAN SNAPSHOT ---
    outstanding_ratio = outstanding_amount / principal_amount if principal_amount > 0 else 0.0
    is_delinquent = 1 if dpd >= 30 else 0

    # --- 3. FEATURE ENGINEERING - CUSTOMERS (Hitung Age, tapi tidak digunakan di final model) ---
    # Ini hanya untuk kepatuhan FE notebook, tapi akan didrop nanti
    dob = pd.to_datetime(date_of_birth, errors="coerce")
    age = (pd.Timestamp(today) - dob).days // 365 if pd.notna(dob) else 0
    age = categorize_age(age)

    # --- 4. PEMBENTUKAN FINAL DATAFRAME (Level Loan) ---
    # Kita tidak bisa memproses Tasks di sini karena butuh semua Task dari semua Loan
    data_input = {
        'customer_number': [customer_number],
        'principal_amount': [principal_amount],
        'outstanding_amount': [outstanding_amount],
        'outstanding_ratio': [outstanding_ratio],
        'dpd': [dpd],
        'is_delinquent': [is_delinquent],
        'avg_bill_gap': [avg_bill_gap],
        'late_ratio': [late_ratio],
        'paid_ratio': [paid_ratio],
        'marital_status': [marital_status],
        'age_group': [age] # Akan didrop
    }
    
    df_features = pd.DataFrame(data_input)
    
    # --- 5. FEATURE SELECTION (sesuai customer_final_banget yang didrop) ---
    # Karena ini single loan, kita asumsikan agregasi ke customer level (max/mean)
    # sudah terwakili oleh nilai loan tunggal ini atau kita asumsikan 
    # hanya fitur yang TIDAK DIDROP yang kita butuhkan.
    final_cols = [
        "principal_amount", "outstanding_amount", "outstanding_ratio",
        "dpd", "is_delinquent", "avg_bill_gap", "late_ratio",
        "paid_ratio", "marital_status", 'age_group'
    ]
    
    return df_features[['customer_number'] + final_cols].fillna(0).drop(['dpd','customer_number', 'is_delinquent'], axis=1)



# Data Mentah untuk 1 Loan:
input_loan_data = {
    "loan_id": "L999",
    "customer_number": "CUST_9999",
    "principal_amount": 10000000.0,
    "outstanding_amount": 7500000.0,
    "dpd": 35,
    "marital_status": "MARRIED",
    "date_of_birth": "1990-01-01",
    # Riwayat Bills (mentah)
    "bills_data": [
        {'amount': 1000000, 'paid_amount': 1000000, 'bill_paid_date': '2025-10-05', 'bill_scheduled_date': '2025-10-01'}, # 4 hari telat
        {'amount': 1000000, 'paid_amount': 1000000, 'bill_paid_date': '2025-11-01', 'bill_scheduled_date': '2025-11-01'}, # Tepat waktu
        {'amount': 1000000, 'paid_amount': 900000, 'bill_paid_date': '2025-12-05', 'bill_scheduled_date': '2025-12-01'},  # 4 hari telat (kurang bayar)
    ]
}

In [5]:
df_features_single = prepare_single_loan_inference(**input_loan_data)

Memproses Loan: L999


In [6]:
df_features_single

Unnamed: 0,principal_amount,outstanding_amount,outstanding_ratio,avg_bill_gap,late_ratio,paid_ratio,marital_status,age_group
0,10000000.0,7500000.0,0.75,2.666667,0.666667,0.966667,MARRIED,adult


In [7]:
y_proba = best_model_dt.predict_proba(df_features_single)

In [8]:
probability_of_Default = y_proba[:, 1]
probability_of_Default

array([0.71405697])

In [9]:
df_features_single['probabilitas_default'] = probability_of_Default
df_features_single

Unnamed: 0,principal_amount,outstanding_amount,outstanding_ratio,avg_bill_gap,late_ratio,paid_ratio,marital_status,age_group,probabilitas_default
0,10000000.0,7500000.0,0.75,2.666667,0.666667,0.966667,MARRIED,adult,0.714057


In [10]:
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

try:
    client = genai.Client()
except Exception as e:
    print(f"Error initializing Gemini client: {e}")
    client = None

def get_dual_vision_risk_score(
    business_image_path: str, 
    home_image_path: str
) -> float:
    """
    Menggunakan Gemini Vision untuk menilai DUA gambar (Bisnis & Rumah) dan 
    mengembalikan skor gabungan risiko (0-1) dengan pembobotan 50% masing-masing.

    1 = Risiko Rendah (Kondisi Aset Rata-Rata Baik)
    0 = Risiko Tinggi (Kondisi Aset Rata-Rata Buruk)
    """
    # Pastikan client sudah diinisialisasi
    global client
    if client is None:
        print("Gemini client not initialized. Returning default score.")
        return 0.5 

    # --- 1. Fungsi Bantuan untuk Menilai Satu Gambar (Reusable) ---
    def assess_single_image(image_path: str, asset_type: str) -> float:
        print(f"Mengirim gambar {asset_type} ({image_path}) ke Gemini Vision...")
        
        # Load Gambar
        try:
            img = Image.open(image_path)
        except FileNotFoundError:
            print(f"Error: File gambar {asset_type} tidak ditemukan di {image_path}. Skor 0.0.")
            return 0.0 # Skor terburuk jika gambar hilang

        # Prompt untuk Vision Assessment
        prompt = (
            f"Anda adalah penilai aset. Berikan skor risiko (0-1) untuk gambar {asset_type} ini. "
            "Skor 1 berarti kondisi aset sangat baik/risiko sangat rendah, dan 0 berarti aset sangat buruk/risiko sangat tinggi. "
            "Berikan output HANYA dalam format JSON: {\"vision_score\": 0.XX}"
        )

        # Panggil Gemini API
        try:
            response = client.models.generate_content(
                model="gemini-2.5-flash",
                contents=[prompt, img]
            )
            
            # Parsing Hasil JSON (Sama seperti sebelumnya)
            text_response = response.text.strip()
            if text_response.startswith('```json'):
                text_response = text_response.strip('```json').strip()
            if text_response.endswith('```'):
                text_response = text_response.rstrip('```').strip()
                
            data = json.loads(text_response)
            score = data.get('vision_score', 0.5) 
            return float(score)

        except APIError as e:
            print(f"Error saat memanggil Gemini Vision API untuk {asset_type}: {e}")
            return 0.5 
        except json.JSONDecodeError:
            print(f"Gagal parsing JSON dari respons Gemini untuk {asset_type}.")
            return 0.5 
        except Exception as e:
            print(f"Terjadi error tak terduga saat memproses {asset_type}: {e}")
            return 0.5

    # --- 2. Dapatkan Skor untuk Masing-Masing Gambar ---
    score_bisnis = assess_single_image(business_image_path, "Bisnis")
    score_rumah = assess_single_image(home_image_path, "Rumah")

    # --- 3. Pembobotan dan Perhitungan Skor Akhir (50% - 50%) ---
    # Skor gabungan adalah rata-rata (50% + 50%)
    final_vision_score = (0.5 * score_bisnis) + (0.5 * score_rumah)
    
    print(f"\nSkor Bisnis (50%): {score_bisnis:.4f}")
    print(f"Skor Rumah (50%): {score_rumah:.4f}")
    print(f"Skor Vision Gabungan: {final_vision_score:.4f}")
    
    return final_vision_score

# Catatan: Pastikan variabel 'client' dan import 'Image', 'json', 

In [11]:
def get_nlp_risk_score(agent_notes: str) -> float:
    """
    Menggunakan Gemini NLP untuk menganalisis sentimen catatan Field Agent dan mengembalikan skor risiko (0-1).
    1 = Risiko Rendah (Sentimen Positif/Aman)
    0 = Risiko Tinggi (Sentimen Negatif/Waspada)
    """
    if client is None:
        print("Gemini client not initialized. Returning default score.")
        return 0.5

    print("Menganalisis catatan agen dengan Gemini NLP...")

    # Prompt untuk NLP Assessment
    prompt = (
        "Lakukan analisis sentimen/risiko pada catatan Field Agent berikut. "
        "Berikan skor risiko (0-1) di mana 1 adalah sentimen positif (misalnya, janji bayar kuat, kooperatif) "
        "dan 0 adalah sentimen negatif (misalnya, menolak bayar, sulit ditemui, aset rusak). "
        f"Catatan Agent: \"{agent_notes}\". "
        "Berikan output HANYA dalam format JSON: {\"nlp_score\": 0.XX}"
    )

    # Panggil Gemini API
    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt
        )

        # Parsing Hasil JSON (Sama seperti Vision)
        text_response = response.text.strip()
        if text_response.startswith('```json'):
            text_response = text_response.strip('```json').strip()
        if text_response.endswith('```'):
            text_response = text_response.rstrip('```').strip()

        data = json.loads(text_response)
        score = data.get('nlp_score', 0.5)
        return float(score)

    except APIError as e:
        print(f"Error saat memanggil Gemini NLP API: {e}")
        return 0.5
    except json.JSONDecodeError:
        print(f"Gagal parsing JSON dari respons: {response.text}")
        return 0.5
    except Exception as e:
        print(f"Terjadi error tak terduga: {e}")
        return 0.5

In [12]:
# Asumsi: File gambar ini ada di direktori Anda untuk pengujian
# Ganti dengan path gambar aset yang sebenarnya
ASSET_IMAGE_PATH_BUSINESS = "C:/Users/Farissthira/amara-ai/amara-ai/images_example/business/business_0.jpeg" 
ASSET_IMAGE_PATH_HOME = "C:/Users/Farissthira/amara-ai/amara-ai/images_example//house/house_22.jpeg" 

FIELD_AGENT_NOTES = "Debitur sangat kooperatif dan berjanji akan melunasi sisa tunggakan dalam 3 hari ke depan. Kondisi aset terlihat baik."

# --- 1. Ambil Skor dari Tiga Sumber ---

# A. PoD (Dari Inference Notebook Anda)
# Mengambil hasil dari df_features_single['probabilitas_default'].iloc[0]
pod_score = probability_of_Default.item()
print(f"1. PoD (Decision Tree) : {pod_score:.4f}")

# B. Vision Score (Panggil Gemini Vision)
# Catatan: Buat file dummy 'asset_photo.jpg' atau ganti path ini untuk testing!
vision_score = get_dual_vision_risk_score(
    ASSET_IMAGE_PATH_BUSINESS, 
    ASSET_IMAGE_PATH_HOME
)
print(f"2. Vision Score (Gemini Gabungan) : {vision_score:.4f}")
print(f"2. Vision Score (Gemini) : {vision_score:.4f}")

# C. NLP Score (Panggil Gemini NLP)
nlp_score = get_nlp_risk_score(FIELD_AGENT_NOTES)
print(f"3. NLP Score (Gemini) : {nlp_score:.4f}")

print("\n--- PERHITUNGAN SKOR AKHIR ---")

# --- 2. Fungsi Pembobotan ---
def calculate_final_score(
    pod: float, 
    vision_score: float, 
    nlp_score: float, 
    weights: dict = {"pod": 0.70, "vision": 0.15, "nlp": 0.15}
) -> float:
    """
    Menghitung skor akhir gabungan dengan pembobotan.
    """
    # Penting: PoD adalah Probability of DEFAULT. Jika kita ingin Final Score
    # merepresentasikan Risiko Rendah (seperti Vision/NLP), kita perlu membalik PoD.
    # Namun, jika Skor Akhir mewakili Probabilitas Default/Risiko, kita gunakan PoD apa adanya.
    # Di sini kita asumsikan Skor Akhir adalah nilai Risiko (seperti PoD).
    
    final_score = (
        weights["pod"] * pod +
        weights["vision"] * (1 - vision_score) + # Membalik Vision (Score 1=Baik -> Risiko 0)
        weights["nlp"] * (1 - nlp_score)         # Membalik NLP (Score 1=Baik -> Risiko 0)
    )
    
    # Keterangan:
    # - Jika skor Vision/NLP tinggi (misalnya 0.9, berarti risiko rendah), maka (1 - 0.9) = 0.1, 
    #   yang berkontribusi kecil pada skor risiko akhir.
    # - Jika skor Vision/NLP rendah (misalnya 0.2, berarti risiko tinggi), maka (1 - 0.2) = 0.8,
    #   yang berkontribusi besar pada skor risiko akhir.
    
    return final_score

# --- 3. Eksekusi Scoring Engine ---

final_composite_risk_score = calculate_final_score(pod_score, vision_score, nlp_score)

# Hasil
print(f"Pembobotan: PoD (70%), Vision (15%), NLP (15%)")
print(f"*** Final Composite Risk Score: {final_composite_risk_score:.4f} ***")

# Interpretasi:
# Nilai yang mendekati 1.0 berarti risiko default/kredit sangat tinggi.
# Nilai yang mendekati 0.0 berarti risiko default/kredit sangat rendah.

1. PoD (Decision Tree) : 0.7141
Mengirim gambar Bisnis (C:/Users/Farissthira/amara-ai/amara-ai/images_example/business/business_0.jpeg) ke Gemini Vision...
Mengirim gambar Rumah (C:/Users/Farissthira/amara-ai/amara-ai/images_example//house/house_22.jpeg) ke Gemini Vision...

Skor Bisnis (50%): 0.7000
Skor Rumah (50%): 0.7500
Skor Vision Gabungan: 0.7250
2. Vision Score (Gemini Gabungan) : 0.7250
2. Vision Score (Gemini) : 0.7250
Menganalisis catatan agen dengan Gemini NLP...
3. NLP Score (Gemini) : 0.9500

--- PERHITUNGAN SKOR AKHIR ---
Pembobotan: PoD (70%), Vision (15%), NLP (15%)
*** Final Composite Risk Score: 0.5486 ***
