In [7]:
# ==============================================================================
# Sel 1: Instalasi & Impor Library Gabungan
# ==============================================================================
import os
import json
import pandas as pd
import time
import asyncio
import copy
import math
from typing import List, Dict, Any, Optional
from pathlib import Path
from datetime import date, datetime
from decimal import Decimal

# Third-party libraries
from dotenv import load_dotenv
import clickhouse_connect
from openai import AzureOpenAI

from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_aws import ChatBedrockConverse
from langchain_openai import AzureChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langsmith import Client

# Muat environment variables dari file .env
load_dotenv()
print("✅ Library dan environment variables siap.")


# ==============================================================================
# Sel 2: Konfigurasi Global
# ==============================================================================

# --- Database ClickHouse ---
CLICKHOUSE_HOST = os.getenv("CLICKHOUSE_HOST")
CLICKHOUSE_PORT = int(os.getenv("CLICKHOUSE_PORT", 18123))
CLICKHOUSE_USER = os.getenv("CLICKHOUSE_USER")
CLICKHOUSE_PASSWORD = os.getenv("CLICKHOUSE_PASSWORD")
CLICKHOUSE_DATABASE = os.getenv("CLICKHOUSE_DATABASE")

# --- Azure OpenAI (untuk pembuatan ringkasan) ---
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
AZURE_DEPLOYMENT_NAME = os.getenv("AZURE_DEPLOYMENT_NAME") 

# --- Langsmith (untuk tracing agent LLM) ---
os.environ["LANGSMITH_TRACING"] = os.getenv("LANGSMITH_TRACING", "false")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY", "")
os.environ["LANGSMITH_ENDPOINT"] = os.getenv("LANGSMITH_ENDPOINT", "")
os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT", "")

# --- Direktori & Grup Agent ---
DATA_DIR = Path("data")
PROMPT_DIR = Path("prompt")

AGENT_GROUPS = {
    "Consumables_and_Medical_Supplies": ["Consumables and Medical Supplies"],
    "Drugs": ["Drugs"],
    "Medical_Diagnostic": [
        "Laboratory", "Radiology", "Diagnostic",
        "Consultation and Visit", "Checkup Package"
    ],
    "Treatment_Facility": [
        "Surgeon", "Anesthetist", "Procedure and Therapy",
        "Procedure Room Rental", "Bed Rental", "Care Package"
    ],
    "Administrative_Others": [
        "Medical Equipment Rental", "Administrative Charge",
        "Other Service and Charge", "Other Items"
    ]
}

ORIGINAL_SALES_ITEM_TYPES = [
    'Consumables and Medical Supplies', 'Drugs', 'Laboratory', 'Other Items', 
    'Medical Equipment Rental', 'Radiology', 'Surgeon', 'Checkup Package', 
    'Consultation and Visit', 'Procedure Room Rental', 'Other Service and Charge', 
    'Diagnostic', 'Administrative Charge', 'Care Package', 'Anesthetist', 
    'Bed Rental', 'Procedure and Therapy'
]

print("✅ Konfigurasi global telah dimuat.")


# ==============================================================================
# Sel 3: Fungsi-Fungsi Pembantu (Gabungan dari kedua skrip)
# ==============================================================================

# --- Fungsi Koneksi & Setup ---

def setup_summary_llm() -> Optional[AzureOpenAI]:
    """Mengkonfigurasi dan mengembalikan client AzureOpenAI untuk ringkasan."""
    try:
        client = AzureOpenAI(
            azure_endpoint=AZURE_OPENAI_ENDPOINT,
            api_key=AZURE_OPENAI_KEY,
            api_version=AZURE_OPENAI_API_VERSION,
        )
        print("✅ Client Azure OpenAI (untuk ringkasan) berhasil dikonfigurasi.")
        return client
    except Exception as e:
        print(f"❌ Gagal mengkonfigurasi Azure OpenAI client: {e}")
        return None

def create_clickhouse_client() -> Optional[clickhouse_connect.driver.Client]:
    """Membuat dan mengembalikan koneksi client ke ClickHouse."""
    try:
        client = clickhouse_connect.get_client(
            host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT,
            username=CLICKHOUSE_USER, password=CLICKHOUSE_PASSWORD,
            database=CLICKHOUSE_DATABASE,
            connect_timeout=20,
        )
        client.command("SELECT 1")
        print("✅ Koneksi ke ClickHouse berhasil!")
        return client
    except Exception as e:
        print(f"❌ Gagal terkoneksi ke ClickHouse: {e}")
        return None

def get_agent_llm():
    """Factory untuk mendapatkan model LLM yang akan digunakan oleh agent."""
    provider = os.getenv("MODEL_PROVIDER", "gpt5").lower()
    if provider == "claude":
        return ChatBedrockConverse(model="anthropic.claude-3-sonnet-20240229-v1:0", temperature=0.0)
    elif provider == "gpt5":
        return AzureChatOpenAI(
            azure_endpoint=AZURE_OPENAI_ENDPOINT,
            api_key=AZURE_OPENAI_KEY,
            api_version=AZURE_OPENAI_API_VERSION,
            azure_deployment=AZURE_DEPLOYMENT_NAME,
            temperature=0.0
        )
    elif provider in ["gemini", "gemini15"]:
        model_name = "gemini-1.5-flash-latest" if provider == "gemini15" else "gemini-1.0-pro"
        return ChatGoogleGenerativeAI(model=model_name, temperature=0.0, convert_system_message_to_human=True)
    else:
        raise ValueError(f"Unknown MODEL_PROVIDER: {provider}")

# --- Fungsi Pemrosesan Data & Teks ---

def json_serializer(obj: Any) -> Any:
    """Konversi tipe data yang tidak didukung JSON."""
    if isinstance(obj, (datetime, date)):
        return obj.isoformat()
    if isinstance(obj, Decimal):
        return float(obj)
    raise TypeError(f"Tipe {type(obj)} tidak bisa diserialisasi ke JSON")

def load_json_data(file_path: Path) -> dict:
    """Memuat data dari file JSON."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Error loading {file_path}: {e}")
        return {}

def generate_summary(client: AzureOpenAI, admission_data: Dict) -> str:
    """Membuat ringkasan medis dari data pasien menggunakan model Azure OpenAI."""
    if not client:
        return "Azure OpenAI client is not available."

    subjective_notes = "\n- ".join(admission_data.get('subjective', []))
    objective_notes = "\n- ".join(admission_data.get('objective', []))
    assessment_notes = "\n- ".join(admission_data.get('assessment', []))
    planning_notes = "\n- ".join(admission_data.get('planning', []))
    
    system_prompt = f"""
    You are a professional AI medical assistant. Your task is to create a clinical summary from the provided patient data.
    Your response MUST begin directly with the clinical summary. Do not include any conversational phrases.
    The summary must be in English and between 150-200 words.
    The summary should be professional, concise, and briefly explain the layman-friendly medical terms for the ICD-10 and ICD-9 codes and their correlation.

    --- Patient Data ---
    Disease Classification (ICD-10): {admission_data.get('disease', 'N/A')}
    Procedure Classification (ICD-9): {admission_data.get('procedure', 'N/A')}
    Subjective Notes (Patient Complaints):
    - {subjective_notes if subjective_notes else 'None.'}
    Objective Notes (Physical Examination):
    - {objective_notes if objective_notes else 'None.'}
    Assessment (Diagnosis):
    - {assessment_notes if assessment_notes else 'None.'}
    Planning (Actions):
    - {planning_notes if planning_notes else 'None.'}
    ---
    """
    
    try:
        response = client.chat.completions.create(
            model=AZURE_DEPLOYMENT_NAME, 
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": "Generate the clinical summary now."}
            ]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error while creating summary with Azure OpenAI: {e}"

def fetch_soap_data(client: clickhouse_connect.driver.Client, admission_ids: List[int]) -> Dict[int, Dict[str, Any]]:
    """Mengambil data SOAP dari ClickHouse untuk daftar Admission ID."""
    if not admission_ids:
        return {}
    soap_query = """
    SELECT AdmissionId, groupArray(map('remark', remark, 'col_name', col_name)) AS soap
    FROM (
        SELECT DISTINCT AdmissionId, x.1 AS remark, x.2 AS col_name 
        FROM (
            SELECT AdmissionId, arrayJoin([
                (subjective_remarks, 'subjective'), (objective_remarks, 'objective'),
                (assessment_remarks, 'assessment'), (planning_remarks, 'planning')
            ]) AS x 
            FROM silver_siloam_etbs.soap_data 
            WHERE AdmissionId IN %(admission_ids)s
        ) WHERE x.1 IS NOT NULL AND trim(x.1) != ''
    ) GROUP BY AdmissionId
    """
    try:
        print(f"\n🚀 Mengambil data SOAP untuk {len(admission_ids)} admission(s)...")
        result = client.query(soap_query, parameters={"admission_ids": admission_ids})
        soap_results = {}
        for row in result.result_rows:
            admission_id, soap_data = row[0], row[1]
            restructured_soap = {'subjective': [], 'objective': [], 'assessment': [], 'planning': []}
            for item in soap_data:
                col_name, remark = item.get('col_name'), item.get('remark')
                if col_name in restructured_soap:
                    restructured_soap[col_name].append(remark)
            soap_results[admission_id] = restructured_soap
        print("✅ Data SOAP berhasil diambil dan direstrukturisasi.")
        return soap_results
    except Exception as e:
        print(f"❌ Error saat mengambil data SOAP: {e}")
        return {}
        
# --- Fungsi untuk Logika Agent ---
def transform_admission_record(record: dict) -> dict:
    """Mengubah format data mentah menjadi struktur yang lebih bersih."""
    items_list = [
        {"item_type": item.get("item_type"), "item_name": item.get("item_name"), "total_quantity": item.get("quantity")}
        for item in record.get('billing_data', {}).get('items', [])
    ]
    disease = record.get('disease')
    procedure = record.get('procedure')
    return {
        "AdmissionId": record.get("admission_id"), "AdmissionTypeName": record.get("admission_type"),
        "OrganizationCode": record.get("organization"), "Region": record.get("region"),
        "Archetype": record.get("archetype"), "PrimaryDoctor": record.get("doctor"),
        "Specialty": record.get("specialty"), "Sex": record.get("sex"),
        "BirthDate": record.get("birth_date"), "Age": record.get("age"),
        "AdmissionDate": record.get("admission_date"), "LengthOfStay": record.get("length_of_stay"),
        "PayerName": record.get("payer_name"), "PayerType": record.get("payer_type"),
        "DiseaseClassification": [disease] if disease else [], "ProcedureClassification": [procedure] if procedure else [],
        "summary": record.get("summary", "No clinical summary available."), "items": items_list
    }

def format_admission_context(admission: dict, relevant_item_types: list) -> str:
    """Memformat satu catatan admisi menjadi string konteks untuk LLM."""
    relevant_items = [item for item in admission.get("items", []) if item.get("item_type") in relevant_item_types]
    if relevant_items:
        items_str = "\n".join([f"- Item: {item.get('item_name', 'N/A')}, Quantity: {item.get('total_quantity', 0)}" for item in relevant_items])
    else:
        items_str = f"No items of types '{', '.join(relevant_item_types)}' were prescribed."

    icd10_str = ', '.join(admission.get('DiseaseClassification', [])) or '-'
    icd9_str = ', '.join(admission.get('ProcedureClassification', [])) or '-'

    return (
        f"--- Admission ID: {admission.get('AdmissionId', 'N/A')} ---\n"
        f"Payer Type: {admission.get('PayerType', 'N/A')}\n"
        f"ICD-10: {icd10_str}, ICD-9: {icd9_str}\n"
        f"Items Prescribed:\n{items_str}\n"
        f"Clinical Summary: {admission.get('summary', 'N/A')}\n"
    )

def get_full_context_for_group(item_types: list) -> str:
    """Mengkonsolidasikan konteks dari beberapa file JSON menjadi satu string besar."""
    full_context_list, processed_ids = [], set()
    for item_type in item_types:
        file_name = item_type.replace(' ', '_').replace('/', '-')
        data = load_json_data(DATA_DIR / f"{file_name}.json")
        for rec in data.get("data", {}).get("results", []):
            adm = transform_admission_record(rec)
            if adm.get("AdmissionId") not in processed_ids:
                full_context_list.append(format_admission_context(adm, item_types))
                processed_ids.add(adm.get("AdmissionId"))
    return "\n".join(full_context_list) if full_context_list else "No historical data available for this group."

def load_prompt_from_file(agent_name: str) -> str:
    """Memuat template prompt dari file .txt."""
    prompt_path = PROMPT_DIR / f"{agent_name}.txt"
    try:
        return prompt_path.read_text(encoding='utf-8')
    except Exception as e:
        print(f"Error loading prompt from {prompt_path}: {e}")
        return ""

print("✅ Fungsi pembantu siap digunakan.")


# ==============================================================================
# BAGIAN 1: PERSIAPAN DATA PENGETAHUAN (dari Skrip 1)
# ==============================================================================

def generate_knowledge_base(api_result_path: str, manual_admission_ids_to_exclude: List[int] = None) -> bool:
    """
    Fungsi ini menjalankan seluruh proses dari Skrip 1:
    1. Membaca data API sumber.
    2. Mengambil data SOAP dari ClickHouse.
    3. Menghasilkan ringkasan klinis untuk setiap data.
    4. Menyimpan hasilnya ke dalam file-file JSON di folder `data/`.
    """
    print("\n" + "="*50)
    print("BAGIAN 1: Memulai Pembuatan Knowledge Base")
    print("="*50)
    
    manual_admission_ids_to_exclude = manual_admission_ids_to_exclude or []
    # --- 1. Muat Data Sumber ---
    try:
        with open(f"{api_result_path}.json", "r", encoding="utf-8") as f:
            api_data = json.load(f)
        
        all_patients_data = api_data.get("data", {}).get("results", [])
        if not all_patients_data:
            print(f"⚠️ File '{api_result_path}.json' tidak berisi data pasien.")
            return False
    except Exception as e:
        print(f"❌ Error saat memuat '{api_result_path}.json': {e}")
        return False

    patients_data = [p for p in all_patients_data if p.get('admission_id') not in manual_admission_ids_to_exclude]
    print(f"✅ Data sumber dimuat. Jumlah data: {len(patients_data)}")

    # --- 2. Setup Koneksi ---
    ch_client = create_clickhouse_client()
    azure_client = setup_summary_llm()
    if not ch_client or not azure_client:
        print("⚠️ Proses pembuatan knowledge base dihentikan karena koneksi gagal.")
        return False

    # --- 3. Ambil Data SOAP & Gabungkan ---
    admission_ids = [p['admission_id'] for p in patients_data if 'admission_id' in p]
    all_soap_data = fetch_soap_data(ch_client, admission_ids)

    print("\n🤖 Memulai proses pembuatan ringkasan untuk data historis...")
    final_results_by_type = {item_type: [] for item_type in ORIGINAL_SALES_ITEM_TYPES}
    
    for i, patient in enumerate(patients_data):
        admission_id = patient.get('admission_id')
        if not admission_id:
            continue
        
        print(f"  ({i+1}/{len(patients_data)}) Membuat ringkasan untuk AdmissionId: {admission_id}...")
        patient_soap_data = all_soap_data.get(admission_id, {})
        data_for_llm = {**patient, **patient_soap_data}
        summary = generate_summary(azure_client, data_for_llm)
        time.sleep(1) # Rate limiting

        # Pisahkan data berdasarkan item_type
        for item_type in ORIGINAL_SALES_ITEM_TYPES:
            patient_copy = copy.deepcopy(patient)
            patient_copy['summary'] = summary
            if 'billing_data' in patient_copy and 'items' in patient_copy['billing_data']:
                filtered_items = [item for item in patient_copy['billing_data']['items'] if item.get('item_type') == item_type]
                patient_copy['billing_data']['items'] = filtered_items
                patient_copy['billing_data']['total_items'] = len(filtered_items)
            final_results_by_type[item_type].append(patient_copy)

    # --- 4. Simpan Hasil ke File JSON ---
    DATA_DIR.mkdir(exist_ok=True)
    for item_type, data in final_results_by_type.items():
        safe_filename = item_type.replace(" ", "_").replace("/", "-")
        output_path = DATA_DIR / f"{safe_filename}.json"
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump({"status": 1, "data": {"results": data}}, f, indent=4, default=json_serializer, ensure_ascii=False)
        print(f"✅ File knowledge base disimpan: {output_path}")
        
    print("\n🎉 Sukses! Knowledge base telah dibuat di dalam folder 'data/'.")
    return True

# ==============================================================================
# BAGIAN 2: ANALISIS PASIEN BARU (dari Skrip 2)
# ==============================================================================

def perform_item_analysis_for_groups():
    """Melakukan analisis statistik dan mengagregasinya berdasarkan grup agen."""
    print("\n--- Memulai Analisis Statistik Item dari Knowledge Base ---")
    analysis_string_per_group = {}
    average_items_per_group = {}
    individual_analysis, individual_averages = {}, {}

    for sit in ORIGINAL_SALES_ITEM_TYPES:
        file_name = sit.replace(' ', '_').replace('/', '-')
        data = load_json_data(DATA_DIR / f"{file_name}.json")
        admissions_raw = data.get("data", {}).get("results", [])
        
        items_for_this_type = []
        total_items, admission_count = 0, len(admissions_raw)

        if admissions_raw:
            for rec in admissions_raw:
                admission = transform_admission_record(rec)
                items = [item for item in admission.get("items", []) if item.get("item_type") == sit]
                total_items += len(items)
                items_for_this_type.extend([{"AdmissionId": admission["AdmissionId"], "item_name": item["item_name"], "Quantity": item["total_quantity"]} for item in items])

        individual_averages[sit] = math.ceil(total_items / admission_count) if admission_count > 0 else 0
        
        if items_for_this_type:
            items_df = pd.DataFrame(items_for_this_type).groupby('item_name').agg(
                frequency=('AdmissionId', 'nunique'),
                average_quantity=('Quantity', 'mean')
            ).reset_index().sort_values(by='frequency', ascending=False)
            individual_analysis[sit] = items_df.to_string()
        else:
            individual_analysis[sit] = f"No items found for detailed analysis for type: {sit}."

    for agent_name, item_types in AGENT_GROUPS.items():
        analysis_string_per_group[agent_name] = "\n\n".join([f"--- Analisis untuk {it} ---\n{individual_analysis.get(it, 'N/A')}" for it in item_types])
        average_items_per_group[agent_name] = sum(individual_averages.get(it, 0) for it in item_types)

    print("✅ Analisis statistik item per grup selesai.")
    return analysis_string_per_group, average_items_per_group
    
async def run_agent(agent_name: str, item_types: list, patient_info: dict, item_analysis_str: str, avg_items_analysis_str: str):
    """Menjalankan satu agent LLM untuk melakukan prediksi."""
    print(f"🚀 Agent [{agent_name}] memulai...")
    template_str = load_prompt_from_file(agent_name)
    if not template_str:
        return {agent_name: {"error": "Prompt file missing or empty."}}

    prompt = PromptTemplate.from_template(template_str)
    full_context_str = get_full_context_for_group(item_types)
    llm = get_agent_llm()
    parser = JsonOutputParser()
    chain = prompt | llm | parser

    try:
        patient_info_str = "\n".join([f"{key}: {value}" for key, value in patient_info.items()])
        
        input_data = {
            "item_analysis": item_analysis_str,
            "average_items_analysis": avg_items_analysis_str,
            "context": full_context_str,
            "patient_info": patient_info_str,
            "sales_item_type": agent_name
        }
        result_json = await chain.ainvoke(input_data, config={"tags": [agent_name]})
        print(f"✅ Agent [{agent_name}] selesai.")
        return {agent_name: result_json}
    except Exception as e:
        print(f"❌ Agent [{agent_name}] gagal: {e}")
        return {agent_name: {"error": str(e)}}

async def run_all_agents(patient_info: dict, item_analysis_per_group: dict, avg_items_per_group: dict):
    """Menjalankan semua agent secara paralel."""
    print("\n" + "="*50)
    print("BAGIAN 2: Memulai Analisis Pasien Baru dengan Agents")
    print("="*50)

    tasks = [
        run_agent(
            agent_name,
            item_types,
            patient_info,
            item_analysis_per_group.get(agent_name, "No analysis available."),
            f"Total average number of items for '{agent_name}' category: {avg_items_per_group.get(agent_name, 0)}"
        )
        for agent_name, item_types in AGENT_GROUPS.items()
    ]
    results = await asyncio.gather(*tasks)
    final_result = {}
    for res in results:
        final_result.update(res)
    return final_result


# ==============================================================================
# FUNGSI UTAMA (MAIN ORCHESTRATOR)
# ==============================================================================

async def main():
    """Fungsi utama untuk menjalankan seluruh alur kerja."""
    
    # --- Langkah 0: Setup Awal ---
    DATA_DIR.mkdir(exist_ok=True)
    PROMPT_DIR.mkdir(exist_ok=True)
    # Inisialisasi Langsmith client
    client = Client()
        # --- LANGKAH 3: Definisikan Data Pasien Baru ---
    # Data pasien baru ini akan dianalisis TANPA membuat ringkasan baru
    patient_input = {
    "hospital_code": "SHMK",
    "archetype": "Premium Generalist",
    "hospital_region": "Region 4",
    "birthdate": "2001-09-12",
    "gender": "Female",
    "icd10": [
      "K01.1"
    ],
    "icd9": [
      "23.19"
    ],
    "primary_doctor": "drg. Timurwati, SpBM",
    "primary_doctor_id": 9000000002001,
    "doctor_specialty": "DENTISTRY - ORAL AND MAXILLOFACIAL SURGERY",
    "admission_type": "Inpatient",
    "admission_id": -2023065,
    "admission_date": "2024-12-10 15:21:45",
    "discharge_date": "",
    "anesthesia_doctor": "",
    "anesthesia_type": "",
    "surgery_nature": "",
    "payer_type": "Third Parties - Insurance Government (BPJS)",
    "payer_name": "BPJS KESEHATAN",
    "payer_id": 14000000002601,
  }
    

    # --- LANGKAH 1: Buat atau verifikasi Knowledge Base ---
    success = generate_knowledge_base('api_result',[patient_input.get("admission_id")])
    if not success:
        print("\nProses dihentikan karena pembuatan knowledge base gagal.")
        return

    # --- LANGKAH 2: Analisis Statistik pada Knowledge Base ---
    item_analysis_per_group_dict, average_items_per_group_dict = perform_item_analysis_for_groups()
    

    # --- LANGKAH 4: Jalankan Agents untuk Prediksi ---
    final_predictions = await run_all_agents(patient_input, item_analysis_per_group_dict, average_items_per_group_dict)
    
    # --- LANGKAH 5: Tampilkan Hasil ---
    print("\n\n--- Raw JSON Output ---")
    print(json.dumps(final_predictions, indent=2))

    print("\n\n--- Tabel Prediksi Akhir ---")
    table_data = []
    for agent_group, details in final_predictions.items():
        if isinstance(details, dict) and 'predictions' in details:
            for pred in details.get('predictions', []):
                table_data.append({
                    "Agent Group": agent_group,
                    "Item Name": pred.get('item', 'N/A'),
                    "Quantity": pred.get('quantity', 0),
                    "Reasoning": next((r['reason'] for r in details.get('reasoning', []) if r.get('item') == pred.get('item')), 'N/A'),
                })
        else:
            table_data.append({"Agent Group": agent_group, "Item Name": f"Error: {details.get('error', 'Unknown')}", "Quantity": 0, "Reasoning": "N/A"})
            
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 200)
    pd.set_option('display.max_colwidth', 60)
    global predictions_df
    predictions_df = pd.DataFrame(table_data)
    print(predictions_df)

    # ==============================================================================
    # LANGKAH 6 : Pembersihan File Knowledge Base Setelah Prediksi Selesai
    # ==============================================================================
    print("\n\n--- Memulai Pembersihan Knowledge Base ---")
    try:
        # Mencari semua file dengan ekstensi .json di dalam direktori DATA_DIR
        json_files_to_delete = list(DATA_DIR.glob('*.json'))
        
        if not json_files_to_delete:
            print("Tidak ada file .json yang ditemukan di folder 'data/' untuk dihapus.")
        else:
            for file_path in json_files_to_delete:
                file_path.unlink()  # Menghapus file
                print(f"✅ File sementara dihapus: {file_path}")
        
        print("🎉 Pembersihan file knowledge base selesai.")
    except Exception as e:
        print(f"❌ Terjadi error saat membersihkan file di folder 'data/': {e}")



if __name__ == "__main__":
    #asyncio.run(main())
    await main()

✅ Library dan environment variables siap.
✅ Konfigurasi global telah dimuat.
✅ Fungsi pembantu siap digunakan.

BAGIAN 1: Memulai Pembuatan Knowledge Base
✅ Data sumber dimuat. Jumlah data: 10
✅ Koneksi ke ClickHouse berhasil!
✅ Client Azure OpenAI (untuk ringkasan) berhasil dikonfigurasi.

🚀 Mengambil data SOAP untuk 10 admission(s)...
✅ Data SOAP berhasil diambil dan direstrukturisasi.

🤖 Memulai proses pembuatan ringkasan untuk data historis...
  (1/10) Membuat ringkasan untuk AdmissionId: -2004234...
  (2/10) Membuat ringkasan untuk AdmissionId: -3152391...
  (3/10) Membuat ringkasan untuk AdmissionId: 9000007046318...
  (4/10) Membuat ringkasan untuk AdmissionId: 9000007038123...
  (5/10) Membuat ringkasan untuk AdmissionId: 9000007035801...
  (6/10) Membuat ringkasan untuk AdmissionId: -4167019...
  (7/10) Membuat ringkasan untuk AdmissionId: -4503356...
  (8/10) Membuat ringkasan untuk AdmissionId: 9000006952302...
  (9/10) Membuat ringkasan untuk AdmissionId: -4733119...
  (10/

In [2]:
import pandas as pd
pd.set_option('display.max_rows', None)

predictions_df.loc[
    predictions_df['Agent Group'] == 'Consumables_and_Medical_Supplies',
    
    ['Item Name', 'Quantity']
]


Unnamed: 0,Item Name,Quantity
0,VIOREX 2% 20ML (REPACK),1
1,MEDIGLOVES STERILE POWDER FREE LATEX 6.5 ONEMED,1
2,SYRINGE WITHOUT NEEDLE LUER LOCK 20ML KANGJIN,1
3,SUCTION CATH 12CH SHM-SC-12SCC HOSPITECH,1
4,SURFLO IV CATH 18GX1.1/4INCH SR+OX1832C3 TERUMO,1
5,ADULT BACTERIAL VIRAL FILTER WITH HME LUER LOCK PORT (VE...,1
6,TRUSYNTH 3-0 90CM ROUND BODIED 26MM VIOLET 10F90DZ26 HEA...,1
7,TRANSPARANT FILM DRESSING 6X7CM WITH NOTCH 1IVD-0607N WI...,1
8,INFUSION SET 21GX1.5INCH ONEMED,1
9,SYRINGE 5ML (22Gx1.5INCH) ONEMED,2
