In [None]:
# %% [markdown]
# # üöÄ Agentic AI System - Notebook Lengkap
# 
# **Arsitektur Lengkap dengan:**
# - LangChain & LangGraph untuk agent orchestration
# - Qwen2.5:7b (umum) & Qwen-coder (SQL) lokal via Ollama
# - Metadata JSON tanpa ChromaDB
# - Region & Leveldata Enforcement
# - Logging Komprehensif
# - Multi-agent System (Router, Planner, SQL Agent, Forecast Agent)

# %% [markdown]
# ## üì¶ 1. Setup & Instalasi Dependensi

# %%
# %pip install langchain langgraph langchain-community pandas sqlalchemy scikit-learn
# %pip install ollama  # Jika belum install Ollama

# %% [markdown]
# ## üîß 2. Import Libraries

# %%

In [None]:

import json
import os
import sqlite3
import re
from datetime import datetime
from typing import TypedDict, List, Optional, Annotated, Dict, Any
from pathlib import Path

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from langchain_ollama.chat_models import ChatOllama
from langgraph.graph import StateGraph, END
from langgraph.checkpoint.sqlite import SqliteSaver
import operator

print("‚úÖ Libraries imported successfully")

‚úÖ Libraries imported successfully


In [None]:


# %% [markdown]
# ## ‚öôÔ∏è 3. Konfigurasi & Inisialisasi

# %%
# ====================
# KONFIGURASI SISTEM
# ====================

# User Context (bisa diganti dengan sistem auth nanti)
USER_CONTEXT = {
    "leveldata": "2_KABUPATEN_JAWA_BARAT",
    "region": "RM III JABAR"
}

BASE_DIR = Path.cwd()
METADATA_DIR = BASE_DIR / "metadata"
DB_PATH = BASE_DIR / "database.db"  # Ganti dengan path database Anda
LOG_DIR = BASE_DIR / "logs"

for directory in [METADATA_DIR, LOG_DIR]:
    directory.mkdir(exist_ok=True)

print(f"Metadata directory: {METADATA_DIR}")
print(f"Database path: {DB_PATH}")
print(f"Log directory: {LOG_DIR}")


üìÅ Metadata directory: /home/aliffatur/coding/kalbe-cdt-intern/langgraph_learn/bps-seki/metadata
üóÉÔ∏è Database path: /home/aliffatur/coding/kalbe-cdt-intern/langgraph_learn/bps-seki/database.db
üìù Log directory: /home/aliffatur/coding/kalbe-cdt-intern/langgraph_learn/bps-seki/logs


In [None]:

# %% [markdown]
# ## ü§ñ 4. Inisialisasi Model LLM (Lokal via Ollama)

# %%
def initialize_llms():
    """Initialize local LLMs via Ollama"""
    try:
        print("Memuat model LLM lokal...")
        
        # Model untuk percakapan umum
        user_llm = ChatOllama(
            model="qwen2.5:7b",
            temperature=0.1,
            base_url="http://localhost:11434",
            num_predict=512
        )
        
        # Model khusus untuk SQL generation
        sql_llm = ChatOllama(
            model="qwen-coder:latest",
            temperature=0.0,
            base_url="http://localhost:11434",
            num_predict=1024
        )
        
        print("Model LLM berhasil dimuat!")
        print(f"   - User Model: qwen2.5:7b (temp: 0.1)")
        print(f"   - SQL Model: qwen-coder:latest (temp: 0.0)")
        
        return user_llm, sql_llm
        
    except Exception as e:
        print(f"‚ùå Gagal memuat model: {e}")
        print("\nüîß Troubleshooting:")
        print("1. Pastikan Ollama berjalan: 'ollama serve'")
        print("2. Pull model terlebih dahulu:")
        print("   > ollama pull qwen2.5:7b")
        print("   > ollama pull qwen-coder:latest")
        
        # Fallback untuk testing
        class StubLLM:
            def invoke(self, prompt):
                print(f"[STUB LLM] Prompt: {prompt[:100]}...")
                return type('obj', (object,), {'content': f"STUB_RESPONSE for: {prompt[:50]}..."})()
        
        return StubLLM(), StubLLM()

# Inisialisasi LLM
user_llm, sql_llm = initialize_llms()


üîÑ Memuat model LLM lokal...
‚úÖ Model LLM berhasil dimuat!
   - User Model: qwen2.5:7b (temp: 0.1)
   - SQL Model: qwen-coder:latest (temp: 0.0)


In [8]:

# %% [markdown]
# ## üìä 5. Logging System

# %%
class AuditLogger:
    """Sistem logging komprehensif untuk semua aktivitas"""
    
    def __init__(self, log_dir: Path = LOG_DIR):
        self.log_dir = log_dir
        self.session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
        
    def log(self, event_type: str, data: Dict, level: str = "INFO"):
        """Log event ke file dan console"""
        timestamp = datetime.now().isoformat()
        
        log_entry = {
            "timestamp": timestamp,
            "session_id": self.session_id,
            "level": level,
            "event_type": event_type,
            **data
        }
        
        # Console output dengan warna
        colors = {
            "INFO": "\033[94m",  # Blue
            "WARNING": "\033[93m",  # Yellow
            "ERROR": "\033[91m",  # Red
            "SUCCESS": "\033[92m"  # Green
        }
        color = colors.get(level, "\033[0m")
        
        print(f"{color}[{level}] {event_type}: {data.get('message', '')}\033[0m")
        
        # Save to file (JSONL format)
        log_file = self.log_dir / f"audit_{datetime.now().strftime('%Y%m%d')}.jsonl"
        with open(log_file, "a", encoding="utf-8") as f:
            f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
        
        return log_entry
    
    def log_user_input(self, user_input: str, context: Dict):
        """Log input dari user"""
        return self.log("USER_INPUT", {
            "user_input": user_input,
            "user_context": context,
            "message": f"User query: {user_input}"
        })
    
    def log_llm_call(self, model: str, prompt: str, response: str):
        """Log panggilan ke LLM"""
        return self.log("LLM_CALL", {
            "model": model,
            "prompt_preview": prompt[:200] + "..." if len(prompt) > 200 else prompt,
            "response_preview": response[:200] + "..." if len(response) > 200 else response,
            "prompt_length": len(prompt),
            "response_length": len(response),
            "message": f"LLM call to {model}"
        })
    
    def log_sql_generation(self, sql: str, metadata: Dict):
        """Log SQL generation"""
        return self.log("SQL_GENERATION", {
            "generated_sql": sql,
            "table_used": metadata.get("table_name", "unknown"),
            "message": f"Generated SQL: {sql[:100]}..."
        })
    
    def log_forecast_result(self, forecast_data: Dict):
        """Log hasil forecasting"""
        return self.log("FORECAST_RESULT", {
            "data_points": forecast_data.get("data_points", 0),
            "periods": forecast_data.get("periods", 0),
            "message": f"Forecast completed for {forecast_data.get('table_name', 'unknown')}"
        })

# Inisialisasi logger
logger = AuditLogger()
print(f"üìù Logger initialized. Session ID: {logger.session_id}")


üìù Logger initialized. Session ID: 20251212_073537


In [9]:

# %% [markdown]
# ## üìö 6. Metadata Management System

# %%
class MetadataManager:
    """Manajemen metadata dari file JSON"""
    
    def __init__(self, metadata_dir: Path):
        self.metadata_dir = metadata_dir
        self._metadata_cache = None
        self._metadata_index = None
    
    def load_all_metadata(self, force_reload: bool = False) -> Dict[str, Any]:
        """Load semua metadata dari folder"""
        if self._metadata_cache is not None and not force_reload:
            return self._metadata_cache
        
        metadata = {}
        json_files = list(self.metadata_dir.glob("*.json"))
        
        if not json_files:
            logger.log("METADATA_LOAD", {
                "status": "WARNING",
                "message": f"Tidak ada file JSON di {self.metadata_dir}"
            }, level="WARNING")
            return metadata
        
        for json_file in json_files:
            try:
                with open(json_file, 'r', encoding='utf-8') as f:
                    table_name = json_file.stem
                    metadata[table_name] = json.load(f)
                    
                logger.log("METADATA_LOAD", {
                    "file": json_file.name,
                    "table": table_name,
                    "status": "SUCCESS"
                }, level="INFO")
                
            except Exception as e:
                logger.log("METADATA_ERROR", {
                    "file": json_file.name,
                    "error": str(e),
                    "status": "ERROR"
                }, level="ERROR")
        
        self._metadata_cache = metadata
        logger.log("METADATA_LOAD_COMPLETE", {
            "total_tables": len(metadata),
            "tables": list(metadata.keys())
        }, level="SUCCESS")
        
        return metadata
    
    def get_table_metadata(self, table_name: str) -> Optional[Dict]:
        """Get metadata untuk tabel spesifik"""
        metadata = self.load_all_metadata()
        return metadata.get(table_name)
    
    def find_relevant_tables(self, user_query: str, top_k: int = 3) -> List[Dict]:
        """
        Cari tabel yang relevan dengan query user
        Menggunakan keyword matching sederhana
        """
        metadata = self.load_all_metadata()
        if not metadata:
            return []
        
        query_terms = set(user_query.lower().split())
        
        # Heuristic scoring
        scores = []
        for table_name, meta in metadata.items():
            score = 0
            
            # 1. Check description
            desc = meta.get("description", "").lower()
            desc_score = sum(3 for term in query_terms if term in desc)
            score += desc_score
            
            # 2. Check column names
            columns = meta.get("columns", {})
            col_score = 0
            for col_name in columns.keys():
                col_name_lower = col_name.lower()
                for term in query_terms:
                    if term in col_name_lower:
                        col_score += 2
                    elif len(term) > 3 and col_name_lower.startswith(term[:3]):
                        col_score += 1
            score += col_score
            
            # 3. Check example data
            examples = meta.get("example_rows", [])
            example_score = 0
            for row in examples:
                for value in row.values():
                    if isinstance(value, str):
                        value_lower = value.lower()
                        for term in query_terms:
                            if term in value_lower:
                                example_score += 0.5
            score += example_score
            
            if score > 0:
                scores.append({
                    "table_name": table_name,
                    "metadata": meta,
                    "relevance_score": score,
                    "description": desc[:100] + "..." if len(desc) > 100 else desc
                })
        
        # Sort by score
        scores.sort(key=lambda x: x["relevance_score"], reverse=True)
        
        # Log results
        logger.log("METADATA_RETRIEVAL", {
            "user_query": user_query,
            "total_candidates": len(scores),
            "top_results": [s["table_name"] for s in scores[:top_k]],
            "scores": [s["relevance_score"] for s in scores[:top_k]]
        })
        
        return scores[:top_k]
    
    def build_schema_prompt(self, table_info: Dict) -> str:
        """Build schema description untuk prompt SQL"""
        meta = table_info["metadata"]
        columns = meta.get("columns", {})
        
        schema_lines = []
        for col_name, col_info in columns.items():
            col_type = col_info.get("type", "unknown")
            col_desc = col_info.get("description", "no description")
            schema_lines.append(f"  - {col_name} ({col_type}): {col_desc}")
        
        schema_text = "\n".join(schema_lines)
        
        return f"""Table: {table_info['table_name']}
Description: {meta.get('description', 'No description')}
Columns:
{schema_text}
Access Column: {meta.get('access_column', 'None')}
Example Data: {json.dumps(meta.get('example_rows', [])[:2], ensure_ascii=False)}"""

# Inisialisasi Metadata Manager
metadata_manager = MetadataManager(METADATA_DIR)

In [10]:


# %% [markdown]
# ## üõ°Ô∏è 7. SQL Validator & Security

# %%
class SQLValidator:
    """Validasi dan keamanan SQL query"""
    
    FORBIDDEN_KEYWORDS = [
        "drop", "delete", "update", "insert", "alter",
        "truncate", "create", "attach", "detach", "grant",
        "revoke", "commit", "rollback", "savepoint"
    ]
    
    FORBIDDEN_PATTERNS = [
        r"--.*$",  # SQL comments
        r";\s*--",  # Multiple statements with comments
        r"union.*select",  # SQL injection patterns
        r"exec.*\(|sp_",  # Stored procedure execution
        r"xp_",  # Extended stored procedures
    ]
    
    @classmethod
    def validate_sql(cls, sql: str) -> Dict[str, Any]:
        """Validasi keamanan SQL query"""
        sql_lower = sql.lower().strip()
        
        # 1. Basic checks
        if not sql_lower.startswith("select"):
            return {
                "is_valid": False,
                "reason": "Hanya query SELECT yang diizinkan",
                "suggested_fix": "Ganti dengan SELECT statement"
            }
        
        # 2. Check forbidden keywords
        for keyword in cls.FORBIDDEN_KEYWORDS:
            if re.search(rf'\b{keyword}\b', sql_lower):
                return {
                    "is_valid": False,
                    "reason": f"Mengandung kata kunci terlarang: {keyword}",
                    "suggested_fix": "Hapus operasi non-SELECT"
                }
        
        # 3. Check forbidden patterns
        for pattern in cls.FORBIDDEN_PATTERNS:
            if re.search(pattern, sql_lower, re.IGNORECASE):
                return {
                    "is_valid": False,
                    "reason": "Mengandung pola SQL yang tidak aman",
                    "suggested_fix": "Hindari komentar atau pola berbahaya"
                }
        
        # 4. Check structure (simple)
        if "from" not in sql_lower:
            return {
                "is_valid": False,
                "reason": "Query tidak memiliki FROM clause",
                "suggested_fix": "Tambahkan FROM clause dengan nama tabel"
            }
        
        return {
            "is_valid": True,
            "reason": "Query aman",
            "validated_sql": sql
        }
    
    @classmethod
    def inject_region_filter(cls, sql: str, access_column: str, region: str) -> str:
        """Inject region filter ke SQL query jika diperlukan"""
        if not access_column or not region:
            return sql
        
        sql_lower = sql.lower()
        access_column_lower = access_column.lower()
        
        # Cek jika access_column sudah ada di WHERE clause
        if re.search(rf"where.*{re.escape(access_column_lower)}", sql_lower, re.IGNORECASE):
            logger.log("REGION_FILTER", {
                "status": "ALREADY_EXISTS",
                "sql": sql[:100],
                "access_column": access_column,
                "region": region
            })
            return sql
        
        # Cek jika sudah ada WHERE clause
        if "where" in sql_lower:
            # Tambahkan AND clause
            insert_pos = sql_lower.rfind("where") + 5
            new_sql = sql[:insert_pos] + f" {access_column} LIKE '{region}%' AND" + sql[insert_pos:]
        else:
            # Tambahkan WHERE clause sebelum GROUP BY/ORDER BY/LIMIT/akhir
            for clause in ["group by", "order by", "limit"]:
                if clause in sql_lower:
                    pos = sql_lower.find(clause)
                    new_sql = sql[:pos] + f" WHERE {access_column} LIKE '{region}%' " + sql[pos:]
                    break
            else:
                # Tambahkan di akhir
                new_sql = sql.rstrip(";") + f" WHERE {access_column} LIKE '{region}%';"
        
        logger.log("REGION_FILTER", {
            "status": "INJECTED",
            "original_sql": sql[:100],
            "modified_sql": new_sql[:100],
            "access_column": access_column,
            "region": region
        })
        
        return new_sql
    
    @classmethod
    def add_limit_if_missing(cls, sql: str, default_limit: int = 100) -> str:
        """Tambahkan LIMIT jika tidak ada"""
        sql_lower = sql.lower()
        
        if "limit" in sql_lower:
            return sql
        
        # Cari posisi untuk menambahkan LIMIT
        sql_clean = sql.rstrip(";").strip()
        return f"{sql_clean} LIMIT {default_limit};"


In [11]:

# %% [markdown]
# ## üíæ 8. SQL Executor

# %%
class SQLExecutor:
    """Eksekusi SQL query ke database"""
    
    def __init__(self, db_path: Path):
        self.db_path = db_path
        
    def execute(self, sql: str, params: tuple = None) -> Dict[str, Any]:
        """Eksekusi SQL query dan return hasil"""
        try:
            # Validasi dulu
            validation = SQLValidator.validate_sql(sql)
            if not validation["is_valid"]:
                return {
                    "success": False,
                    "error": f"SQL validation failed: {validation['reason']}",
                    "suggested_fix": validation.get("suggested_fix", ""),
                    "data": None,
                    "row_count": 0
                }
            
            # Cek file database
            if not self.db_path.exists():
                return {
                    "success": False,
                    "error": f"Database tidak ditemukan di {self.db_path}",
                    "data": None,
                    "row_count": 0
                }
            
            # Execute query
            conn = sqlite3.connect(str(self.db_path))
            cursor = conn.cursor()
            
            try:
                if params:
                    cursor.execute(sql, params)
                else:
                    cursor.execute(sql)
                
                # Get column names
                columns = [description[0] for description in cursor.description] if cursor.description else []
                
                # Fetch all results
                rows = cursor.fetchall()
                
                # Convert to DataFrame
                df = pd.DataFrame(rows, columns=columns) if columns else pd.DataFrame()
                
                result = {
                    "success": True,
                    "data": df,
                    "row_count": len(df),
                    "columns": columns,
                    "sql": sql,
                    "execution_time": None  # Bisa ditambahkan timing
                }
                
                logger.log("SQL_EXECUTION", {
                    "status": "SUCCESS",
                    "sql_preview": sql[:200],
                    "row_count": len(df),
                    "columns": columns[:5] if columns else []
                }, level="SUCCESS")
                
                return result
                
            finally:
                conn.close()
                
        except sqlite3.Error as e:
            error_msg = f"SQL execution error: {str(e)}"
            logger.log("SQL_EXECUTION", {
                "status": "ERROR",
                "error": str(e),
                "sql": sql[:200]
            }, level="ERROR")
            
            return {
                "success": False,
                "error": error_msg,
                "data": None,
                "row_count": 0
            }
    
    def test_connection(self) -> bool:
        """Test koneksi ke database"""
        try:
            if not self.db_path.exists():
                logger.log("DB_CONNECTION", {
                    "status": "ERROR",
                    "message": f"File database tidak ditemukan: {self.db_path}"
                }, level="ERROR")
                return False
            
            conn = sqlite3.connect(str(self.db_path))
            cursor = conn.cursor()
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1;")
            tables = cursor.fetchall()
            conn.close()
            
            logger.log("DB_CONNECTION", {
                "status": "SUCCESS",
                "table_count": len(tables) if tables else 0,
                "message": f"Berhasil terhubung ke database: {self.db_path}"
            }, level="SUCCESS")
            
            return True
            
        except Exception as e:
            logger.log("DB_CONNECTION", {
                "status": "ERROR",
                "error": str(e),
                "message": f"Gagal terhubung ke database: {self.db_path}"
            }, level="ERROR")
            return False

# Inisialisasi SQL Executor
sql_executor = SQLExecutor(DB_PATH)

# Test koneksi database
db_connected = sql_executor.test_connection()
if not db_connected:
    print(f"‚ö†Ô∏è  PERINGATAN: Database {DB_PATH} tidak ditemukan atau tidak dapat diakses")


[92m[SUCCESS] DB_CONNECTION: Berhasil terhubung ke database: /home/aliffatur/coding/kalbe-cdt-intern/langgraph_learn/bps-seki/database.db[0m


In [12]:

# %% [markdown]
# ## üîÆ 9. Forecast Agent

# %%
class ForecastAgent:
    """Agent untuk forecasting time series"""
    
    @staticmethod
    def detect_time_series_columns(metadata: Dict) -> Dict[str, str]:
        """Deteksi kolom tanggal dan nilai dari metadata"""
        columns = metadata.get("columns", {})
        
        # Keywords untuk berbagai jenis kolom
        date_keywords = ['tahun', 'year', 'bulan', 'month', 'tanggal', 'date', 'periode', 'time']
        value_keywords = ['nilai', 'value', 'jumlah', 'total', 'devisa', 'pendapatan', 'price', 'sales']
        
        date_candidates = []
        value_candidates = []
        
        for col_name, col_info in columns.items():
            col_lower = col_name.lower()
            col_type = col_info.get("type", "").lower()
            
            # Check for date columns
            is_date = any(keyword in col_lower for keyword in date_keywords)
            is_date = is_date or col_type in ['date', 'datetime', 'timestamp', 'year', 'month']
            
            # Check for value columns
            is_value = any(keyword in col_lower for keyword in value_keywords)
            is_value = is_value or col_type in ['integer', 'float', 'double', 'decimal', 'numeric', 'real']
            
            if is_date:
                date_candidates.append((col_name, len(date_keywords) + 1))
            
            if is_value:
                value_candidates.append((col_name, len(value_keywords) + 1))
        
        # Pilih yang paling probable
        date_col = max(date_candidates, key=lambda x: x[1])[0] if date_candidates else None
        value_col = max(value_candidates, key=lambda x: x[1])[0] if value_candidates else None
        
        # Fallback: pilih kolom pertama dan kedua
        if not date_col and columns:
            date_col = list(columns.keys())[0]
        if not value_col and len(columns) > 1:
            value_col = list(columns.keys())[1]
        elif not value_col and columns:
            value_col = list(columns.keys())[0]
        
        return {
            "date_column": date_col,
            "value_column": value_col,
            "confidence": "high" if date_candidates and value_candidates else "medium" if date_col and value_col else "low"
        }
    
    @staticmethod
    def prepare_forecast_data(df: pd.DataFrame, date_col: str, value_col: str) -> pd.DataFrame:
        """Persiapkan data untuk forecasting"""
        try:
            # Copy dan clean data
            forecast_df = df[[date_col, value_col]].copy()
            forecast_df = forecast_df.dropna()
            
            # Convert date column
            if not pd.api.types.is_numeric_dtype(forecast_df[date_col]):
                try:
                    forecast_df[date_col] = pd.to_numeric(forecast_df[date_col], errors='coerce')
                except:
                    # Jika tidak bisa di-convert, gunakan index sebagai proxy
                    forecast_df[date_col] = range(len(forecast_df))
            
            # Convert value column
            forecast_df[value_col] = pd.to_numeric(forecast_df[value_col], errors='coerce')
            forecast_df = forecast_df.dropna()
            
            # Sort by date
            forecast_df = forecast_df.sort_values(date_col)
            
            return forecast_df
            
        except Exception as e:
            raise ValueError(f"Data preparation error: {str(e)}")
    
    @staticmethod
    def simple_linear_forecast(df: pd.DataFrame, date_col: str, value_col: str, periods: int = 3) -> pd.DataFrame:
        """Forecasting sederhana menggunakan linear regression"""
        if len(df) < 3:
            raise ValueError(f"Data tidak cukup untuk forecasting. Minimum 3 data points, dapat {len(df)}")
        
        # Prepare data
        X = np.array(df[date_col]).reshape(-1, 1)
        y = np.array(df[value_col])
        
        # Train model
        model = LinearRegression()
        model.fit(X, y)
        
        # Generate future dates
        last_date = df[date_col].iloc[-1]
        future_dates = np.array([last_date + i + 1 for i in range(periods)]).reshape(-1, 1)
        
        # Predict
        predictions = model.predict(future_dates)
        
        # Calculate confidence intervals (simple)
        residuals = y - model.predict(X)
        std_error = np.std(residuals)
        confidence_low = predictions - 1.96 * std_error
        confidence_high = predictions + 1.96 * std_error
        
        # Create result DataFrame
        result_df = pd.DataFrame({
            'period': [f'Period {i+1}' for i in range(periods)],
            'prediction': predictions,
            'confidence_low': confidence_low,
            'confidence_high': confidence_high,
            'date_projection': future_dates.flatten() if pd.api.types.is_numeric_dtype(df[date_col]) else None
        })
        
        return result_df
    
    def forecast(self, table_name: str, metadata: Dict, region: str = None) -> Dict[str, Any]:
        """Lakukan forecasting untuk tabel tertentu"""
        try:
            logger.log("FORECAST_START", {
                "table_name": table_name,
                "region": region,
                "message": f"Starting forecast for {table_name}"
            })
            
            # 1. Deteksi kolom
            columns = self.detect_time_series_columns(metadata)
            
            if columns["confidence"] == "low":
                logger.log("FORECAST_WARNING", {
                    "warning": "Low confidence in column detection",
                    "date_column": columns["date_column"],
                    "value_column": columns["value_column"]
                }, level="WARNING")
            
            # 2. Build SQL query
            sql = f"SELECT {columns['date_column']}, {columns['value_column']} FROM {table_name}"
            
            # Add region filter if applicable
            access_column = metadata.get("access_column")
            if access_column and region:
                sql += f" WHERE {access_column} LIKE '{region}%'"
            
            sql += f" ORDER BY {columns['date_column']}"
            
            # 3. Execute query
            result = sql_executor.execute(sql)
            
            if not result["success"]:
                raise ValueError(f"Failed to fetch data: {result['error']}")
            
            df = result["data"]
            
            if len(df) < 3:
                raise ValueError(f"Insufficient data for forecasting. Need at least 3 rows, got {len(df)}")
            
            # 4. Prepare data
            prepared_df = self.prepare_forecast_data(df, columns["date_column"], columns["value_column"])
            
            if len(prepared_df) < 3:
                raise ValueError(f"After cleaning, insufficient data: {len(prepared_df)} rows")
            
            # 5. Perform forecast
            forecast_result = self.simple_linear_forecast(
                prepared_df, 
                columns["date_column"], 
                columns["value_column"],
                periods=3
            )
            
            # 6. Format results
            forecast_summary = {
                "table_name": table_name,
                "data_points": len(prepared_df),
                "date_range": {
                    "start": prepared_df[columns["date_column"]].iloc[0],
                    "end": prepared_df[columns["date_column"]].iloc[-1]
                },
                "last_value": float(prepared_df[columns["value_column"]].iloc[-1]),
                "forecast_periods": 3,
                "predictions": forecast_result.to_dict("records"),
                "metadata": {
                    "date_column": columns["date_column"],
                    "value_column": columns["value_column"],
                    "confidence": columns["confidence"]
                }
            }
            
            logger.log("FORECAST_COMPLETE", {
                "table_name": table_name,
                "data_points": len(prepared_df),
                "message": f"Forecast completed successfully"
            }, level="SUCCESS")
            
            return {
                "success": True,
                "forecast": forecast_summary,
                "sql": sql,
                "raw_data_preview": df.head(3).to_dict("records")
            }
            
        except Exception as e:
            error_msg = f"Forecast failed: {str(e)}"
            logger.log("FORECAST_ERROR", {
                "error": str(e),
                "table_name": table_name,
                "message": error_msg
            }, level="ERROR")
            
            return {
                "success": False,
                "error": error_msg,
                "forecast": None
            }

# Inisialisasi Forecast Agent
forecast_agent = ForecastAgent()


In [None]:
class SmartTableSelector:
    """Selector tabel pintar menggunakan LLM untuk pilih tabel otomatis"""
    
    def __init__(self, user_llm, sql_llm):
        self.user_llm = user_llm
        self.sql_llm = sql_llm
        self.selection_cache = {}  # Cache untuk hasil seleksi
    
    def select_best_table(self, user_query: str, candidate_tables: List[Dict], 
                         user_context: Dict) -> Dict[str, Any]:
        """
        Gunakan LLM untuk memilih tabel terbaik secara otomatis dari kandidat.
        Mengembalikan tabel terpilih dan alasan pemilihan.
        """
        if not candidate_tables:
            return {"selected": None, "reason": "No candidate tables", "confidence": 0}
        
        # Jika hanya 1 kandidat, langsung pilih
        if len(candidate_tables) == 1:
            return {
                "selected": candidate_tables[0],
                "reason": "Only one candidate table",
                "confidence": 1.0
            }
        
        # Build context untuk LLM
        tables_context = []
        for i, table in enumerate(candidate_tables, 1):
            meta = table["metadata"]
            columns = list(meta.get("columns", {}).keys())[:5]  # Ambil 5 kolom pertama
            tables_context.append(f"""
            {i}. {table['table_name']}
               - Deskripsi: {meta.get('description', 'Tidak ada deskripsi')}
               - Kolom: {', '.join(columns)}
               - Access Column: {meta.get('access_column', 'Tidak ada')}
               - Relevansi: {table['relevance_score']:.2f}
            """)
        
        tables_text = "\n".join(tables_context)
        
        prompt = f"""
        Anda adalah asisten AI yang ahli dalam database. Tugas Anda adalah memilih tabel yang PALING TEPAT untuk menjawab pertanyaan user.
        
        INFORMASI USER:
        - Pertanyaan: "{user_query}"
        - Region: {user_context.get('region', 'Tidak diketahui')}
        - Leveldata: {user_context.get('leveldata', 'Tidak diketahui')}
        
        KANDIDAT TABEL:
        {tables_text}
        
        INSTRUKSI:
        1. Analisis pertanyaan user dan struktur setiap tabel
        2. Pilih tabel yang PALING RELEVAN berdasarkan:
           - Kesamaan kata kunci dengan deskripsi/kolom
           - Adanya kolom yang bisa menjawab pertanyaan
           - Access column yang sesuai dengan region user
        3. Return HANYA JSON dengan format:
        {{
            "selected_table_index": <nomor_index>,
            "selected_table_name": "<nama_tabel>",
            "confidence": <0.1 sampai 1.0>,
            "reason": "<alasan_pemilihan>",
            "columns_needed": ["<kolom1>", "<kolom2>"]
        }}
        
        HANYA return JSON, tanpa penjelasan tambahan.
        """
        
        try:
            # Panggil LLM untuk seleksi
            response = self.user_llm.invoke(prompt)
            
            # Parse response
            import re
            json_match = re.search(r'\{.*\}', response.content, re.DOTALL)
            if json_match:
                result = json.loads(json_match.group())
                
                # Validasi hasil
                if 1 <= result["selected_table_index"] <= len(candidate_tables):
                    selected_idx = result["selected_table_index"] - 1
                    selected_table = candidate_tables[selected_idx]
                    
                    logger.log("TABLE_SELECTION_LLM", {
                        "user_query": user_query,
                        "selected_table": selected_table["table_name"],
                        "confidence": result.get("confidence", 0.5),
                        "reason": result.get("reason", "No reason"),
                        "candidates_count": len(candidate_tables)
                    })
                    
                    return {
                        "selected": selected_table,
                        "confidence": result.get("confidence", 0.5),
                        "reason": result.get("reason", "No reason"),
                        "llm_response": result
                    }
            
            # Fallback: pilih dengan score tertinggi
            best_table = max(candidate_tables, key=lambda x: x["relevance_score"])
            
            logger.log("TABLE_SELECTION_FALLBACK", {
                "user_query": user_query,
                "selected_table": best_table["table_name"],
                "confidence": 0.3,
                "reason": "LLM selection failed, using relevance score",
                "candidates_count": len(candidate_tables)
            })
            
            return {
                "selected": best_table,
                "confidence": 0.3,
                "reason": "Fallback to highest relevance score",
                "llm_response": None
            }
            
        except Exception as e:
            # Error handling
            best_table = max(candidate_tables, key=lambda x: x["relevance_score"])
            
            logger.log("TABLE_SELECTION_ERROR", {
                "error": str(e),
                "user_query": user_query,
                "selected_table": best_table["table_name"],
                "confidence": 0.1,
                "reason": f"Error in LLM selection: {str(e)[:100]}"
            })
            
            return {
                "selected": best_table,
                "confidence": 0.1,
                "reason": f"Error: {str(e)[:100]}",
                "error": str(e)
            }
    
    def build_smart_sql(self, user_query: str, table_info: Dict, 
                       user_context: Dict) -> Dict[str, Any]:
        """
        Build SQL query dengan konteks lengkap menggunakan LLM.
        Mempertimbangkan multiple tables jika diperlukan.
        """
        table_name = table_info["table_name"]
        metadata = table_info["metadata"]
        
        # Build detailed schema info
        columns_info = []
        for col_name, col_meta in metadata.get("columns", {}).items():
            col_type = col_meta.get("type", "unknown")
            col_desc = col_meta.get("description", "no description")
            columns_info.append(f"  - {col_name} ({col_type}): {col_desc}")
        
        columns_text = "\n".join(columns_info)
        
        # Check for date filtering
        date_columns = []
        for col_name, col_meta in metadata.get("columns", {}).items():
            col_lower = col_name.lower()
            if any(keyword in col_lower for keyword in ["tahun", "year", "bulan", "month", "tanggal", "date"]):
                date_columns.append(col_name)
        
        # Build prompt
        prompt = f"""
        Anda adalah ahli SQL untuk database SQLite. Generate SQL query berdasarkan:
        
        USER QUERY: "{user_query}"
        
        TABEL: {table_name}
        DESKRIPSI: {metadata.get('description', 'Tidak ada deskripsi')}
        
        KOLOM-KOLOM:
        {columns_text}
        
        KONTEKS USER:
        - Region: {user_context.get('region')}
        - Leveldata: {user_context.get('leveldata')}
        - Access Column: {metadata.get('access_column', 'Tidak ada')}
        
        ATURAN PENTING:
        1. Hanya gunakan SELECT statement
        2. Jika ada access_column, TAMBAHKAN filter WHERE untuk region user
        3. Filter tahun jika user menyebut tahun spesifik (2024, 2025, dll)
        4. Kolom tanggal yang tersedia: {date_columns if date_columns else 'Tidak ada'}
        5. Gunakan LIMIT 100 jika tidak disebutkan batasan
        6. Return HANYA SQL query, tanpa penjelasan
        
        CONTOH DATA (jika ada):
        {json.dumps(metadata.get('example_rows', [])[:2], ensure_ascii=False)}
        
        SQL QUERY:
        """
        
        try:
            response = self.sql_llm.invoke(prompt)
            sql = response.content.strip().strip("`")
            
            # Validasi SQL
            if not sql.lower().startswith("select"):
                raise ValueError("Generated SQL is not SELECT statement")
            
            # Log generation
            logger.log("SMART_SQL_GENERATION", {
                "table": table_name,
                "user_query": user_query,
                "generated_sql": sql[:500],
                "has_date_filter": any(str(year) in user_query for year in ["2023", "2024", "2025", "2026"])
            })
            
            return {
                "success": True,
                "sql": sql,
                "table": table_name,
                "prompt_length": len(prompt)
            }
            
        except Exception as e:
            error_msg = f"SQL generation error: {str(e)}"
            logger.log("SMART_SQL_ERROR", {
                "error": str(e),
                "table": table_name,
                "user_query": user_query
            })
            
            return {
                "success": False,
                "error": error_msg,
                "table": table_name
            }

# Inisialisasi smart selector
smart_selector = SmartTableSelector(user_llm, sql_llm)

# %% [markdown]
# ## üîÑ ENHANCED NODES: Auto-Table Selection

# %%
def enhanced_metadata_retriever_node(state: AgentState) -> AgentState:
    """Enhanced: Otomatis pilih tabel terbaik dengan LLM"""
    logger.log("NODE_ENTER", {"node": "enhanced_metadata_retriever"})
    
    # Find relevant tables
    relevant_tables = metadata_manager.find_relevant_tables(
        state["user_input"], 
        top_k=5  # Ambil lebih banyak kandidat
    )
    
    state["relevant_tables"] = relevant_tables
    
    if not relevant_tables:
        state["needs_clarification"] = True
        state["clarification_question"] = (
            "Maaf, saya tidak menemukan data yang sesuai dengan permintaan Anda. "
            "Bisa Anda gunakan kata kunci yang lebih spesifik?"
        )
        state["next_node"] = "clarify_agent"
        return state
    
    # üß† SMART SELECTION: Gunakan LLM untuk pilih tabel otomatis
    selection_result = smart_selector.select_best_table(
        user_query=state["user_input"],
        candidate_tables=relevant_tables,
        user_context=state["user_context"]
    )
    
    selected_table = selection_result["selected"]
    
    if selected_table and selection_result["confidence"] > 0.3:
        # Table terpilih dengan confidence cukup tinggi
        state["selected_table"] = selected_table["table_name"]
        state["table_metadata"] = selected_table["metadata"]
        state["selection_confidence"] = selection_result["confidence"]
        state["selection_reason"] = selection_result["reason"]
        state["next_node"] = "planner"
        
        logger.log("AUTO_TABLE_SELECTED", {
            "user_query": state["user_input"],
            "selected_table": selected_table["table_name"],
            "confidence": selection_result["confidence"],
            "reason": selection_result["reason"],
            "candidates_count": len(relevant_tables)
        }, level="SUCCESS")
        
    else:
        # Confidence rendah, tetap pilih tapi log warning
        best_table = max(relevant_tables, key=lambda x: x["relevance_score"])
        state["selected_table"] = best_table["table_name"]
        state["table_metadata"] = best_table["metadata"]
        state["selection_confidence"] = selection_result.get("confidence", 0.1)
        state["selection_reason"] = "Low confidence, using fallback"
        state["next_node"] = "planner"
        
        logger.log("LOW_CONFIDENCE_SELECTION", {
            "user_query": state["user_input"],
            "selected_table": best_table["table_name"],
            "confidence": selection_result.get("confidence", 0.1),
            "warning": "Low confidence in table selection",
            "candidates_count": len(relevant_tables)
        }, level="WARNING")
    
    return state

def enhanced_sql_agent_node(state: AgentState) -> AgentState:
    """Enhanced SQL Agent dengan smart query generation"""
    logger.log("NODE_ENTER", {"node": "enhanced_sql_agent"})
    
    if not state.get("selected_table"):
        state["error"] = "Tidak ada tabel yang dipilih"
        state["next_node"] = "error_handler"
        return state
    
    # Get table info
    table_info = next(
        (t for t in state["relevant_tables"] if t["table_name"] == state["selected_table"]),
        None
    )
    
    if not table_info:
        state["error"] = f"Table {state['selected_table']} not found in relevant tables"
        state["next_node"] = "error_handler"
        return state
    
    # üß† SMART SQL GENERATION dengan konteks lengkap
    sql_result = smart_selector.build_smart_sql(
        user_query=state["user_input"],
        table_info=table_info,
        user_context=state["user_context"]
    )
    
    if not sql_result["success"]:
        state["error"] = sql_result["error"]
        state["next_node"] = "error_handler"
        return state
    
    raw_sql = sql_result["sql"]
    
    # Validate SQL
    validation = SQLValidator.validate_sql(raw_sql)
    if not validation["is_valid"]:
        state["error"] = f"SQL validation failed: {validation['reason']}"
        state["next_node"] = "error_handler"
        return state
    
    # Inject region filter jika diperlukan
    access_column = table_info["metadata"].get("access_column")
    if access_column:
        raw_sql = SQLValidator.inject_region_filter(
            raw_sql, 
            access_column, 
            state["user_context"]["region"]
        )
    
    # Add LIMIT jika tidak ada
    raw_sql = SQLValidator.add_limit_if_missing(raw_sql)
    
    state["raw_sql"] = raw_sql
    state["validated_sql"] = raw_sql
    state["next_node"] = "sql_executor"
    
    logger.log("ENHANCED_SQL_GENERATED", {
        "table": state["selected_table"],
        "user_query": state["user_input"],
        "generated_sql": raw_sql[:300] + "..." if len(raw_sql) > 300 else raw_sql,
        "confidence": state.get("selection_confidence", 0.5)
    }, level="SUCCESS")
    
    return state

def enhanced_forecast_agent_node(state: AgentState) -> AgentState:
    """Enhanced Forecast Agent dengan auto table selection"""
    logger.log("NODE_ENTER", {"node": "enhanced_forecast_agent"})
    
    if not state.get("selected_table"):
        # Jika belum ada tabel terpilih, pilih otomatis
        if state.get("relevant_tables"):
            # Gunakan LLM untuk pilih tabel time series
            time_series_tables = []
            for table in state["relevant_tables"]:
                meta = table["metadata"]
                columns = meta.get("columns", {})
                # Cek jika ada kolom tanggal dan nilai
                date_cols = [c for c in columns.keys() if any(k in c.lower() for k in 
                            ["tahun", "year", "bulan", "month", "date"])]
                value_cols = [c for c in columns.keys() if any(k in c.lower() for k in
                            ["nilai", "value", "jumlah", "total"])]
                
                if date_cols and value_cols:
                    time_series_tables.append(table)
            
            if time_series_tables:
                # Pilih tabel time series terbaik
                selection_result = smart_selector.select_best_table(
                    user_query=state["user_input"],
                    candidate_tables=time_series_tables,
                    user_context=state["user_context"]
                )
                
                if selection_result["selected"]:
                    selected = selection_result["selected"]
                    state["selected_table"] = selected["table_name"]
                    state["table_metadata"] = selected["metadata"]
                else:
                    # Fallback ke tabel pertama
                    state["selected_table"] = state["relevant_tables"][0]["table_name"]
                    state["table_metadata"] = state["relevant_tables"][0]["metadata"]
            else:
                # Tidak ada tabel time series, gunakan tabel pertama
                state["selected_table"] = state["relevant_tables"][0]["table_name"]
                state["table_metadata"] = state["relevant_tables"][0]["metadata"]
        else:
            state["error"] = "Tidak ada tabel yang tersedia untuk forecasting"
            state["next_node"] = "error_handler"
            return state
    
    table_name = state["selected_table"]
    table_meta = state["table_metadata"]
    
    # Gunakan enhanced forecast agent
    forecast_result = enhanced_forecast_agent.enhanced_forecast(
        table_name=table_name,
        metadata=table_meta,
        region=state["user_context"]["region"],
        use_prophet=False  # Gunakan linear regression dulu
    )
    
    if forecast_result["success"]:
        state["forecast_result"] = forecast_result
        state["next_node"] = "response_formatter"
        
        logger.log("ENHANCED_FORECAST_SUCCESS", {
            "table": table_name,
            "data_points": forecast_result["forecast"]["data_points"],
            "method": forecast_result.get("method", "linear_regression")
        }, level="SUCCESS")
    else:
        state["error"] = forecast_result.get("error", "Forecast failed")
        state["next_node"] = "error_handler"
    
    return state

# %% [markdown]
# ## üèóÔ∏è BUILD ENHANCED WORKFLOW

# %%
def build_enhanced_workflow() -> StateGraph:
    """Bangun enhanced workflow dengan auto-table selection"""
    
    print("üî® Membangun ENHANCED LangGraph workflow...")
    
    # Initialize workflow
    workflow = StateGraph(AgentState)
    
    # Add semua nodes dengan versi enhanced
    nodes = [
        ("router", router_node_fixed),
        ("enhanced_metadata_retriever", enhanced_metadata_retriever_node),
        ("planner", planner_node),
        ("enhanced_sql_agent", enhanced_sql_agent_node),
        ("sql_executor", sql_executor_node),
        ("enhanced_forecast_agent", enhanced_forecast_agent_node),
        ("clarify_agent", clarify_agent_node_fixed),
        ("response_formatter", response_formatter_node),
        ("error_handler", error_handler_node),
        ("end", end_node)
    ]
    
    for node_name, node_func in nodes:
        workflow.add_node(node_name, node_func)
        print(f"  ‚úÖ Added node: {node_name}")
    
    # Set entry point
    workflow.set_entry_point("router")
    print(f"  ‚úÖ Entry point: router")
    
    # Enhanced routing functions
    def enhanced_route_after_router(state: AgentState) -> str:
        return state.get("next_node", "enhanced_metadata_retriever")
    
    def enhanced_route_after_metadata(state: AgentState) -> str:
        if state.get("needs_clarification", False):
            return "clarify_agent"
        return "planner"
    
    def enhanced_route_after_planner(state: AgentState) -> str:
        intent = state.get("intent", "sql")
        
        # Auto-route berdasarkan intent
        if intent == "forecast":
            return "enhanced_forecast_agent"
        elif intent == "sql":
            return "enhanced_sql_agent"
        else:
            return "clarify_agent"
    
    def enhanced_route_after_clarify(state: AgentState) -> str:
        return state.get("next_node", "end")
    
    # Add conditional edges
    workflow.add_conditional_edges(
        "router",
        enhanced_route_after_router,
        {
            "enhanced_metadata_retriever": "enhanced_metadata_retriever",
            "clarify_agent": "clarify_agent",
            "error_handler": "error_handler"
        }
    )
    
    workflow.add_conditional_edges(
        "enhanced_metadata_retriever",
        enhanced_route_after_metadata,
        {
            "planner": "planner",
            "clarify_agent": "clarify_agent",
            "error_handler": "error_handler"
        }
    )
    
    workflow.add_conditional_edges(
        "planner",
        enhanced_route_after_planner,
        {
            "enhanced_sql_agent": "enhanced_sql_agent",
            "enhanced_forecast_agent": "enhanced_forecast_agent",
            "clarify_agent": "clarify_agent",
            "error_handler": "error_handler"
        }
    )
    
    workflow.add_conditional_edges(
        "clarify_agent",
        enhanced_route_after_clarify,
        {
            "planner": "planner",
            "end": "end",
            "error_handler": "error_handler"
        }
    )
    
    # Add fixed edges
    workflow.add_edge("enhanced_sql_agent", "sql_executor")
    workflow.add_edge("sql_executor", "response_formatter")
    workflow.add_edge("enhanced_forecast_agent", "response_formatter")
    workflow.add_edge("response_formatter", "end")
    workflow.add_edge("error_handler", "end")
    workflow.add_edge("end", END)
    
    # Compile graph
    compiled_graph = workflow.compile()
    
    print("üéâ ENHANCED Workflow berhasil dikompilasi!")
    print("   ‚Ä¢ Auto-table selection dengan LLM")
    print("   ‚Ä¢ Smart SQL generation")
    print("   ‚Ä¢ Enhanced forecasting")
    
    return compiled_graph

# Build enhanced workflow
enhanced_workflow = build_enhanced_workflow()


In [18]:

# %% [markdown]
# ## üß† 10. Agent State Definition untuk LangGraph

# %%
# ===============================
# STATE DEFINITION FOR LANGGRAPH
# ===============================

class AgentState(TypedDict):
    """State yang mengalir melalui workflow LangGraph - PERBAIKAN"""
    
    # User Input & Context
    user_input: str
    user_context: Dict[str, str]
    
    # Agent Communication
    messages: Annotated[List[BaseMessage], operator.add]
    
    # Processing State
    intent: Optional[str]
    needs_clarification: bool
    clarification_question: Optional[str]
    clarification_response: Optional[str]  # Tambahkan ini
    
    # Metadata & Data
    relevant_tables: List[Dict]
    selected_table: Optional[str]
    table_metadata: Optional[Dict]
    
    # SQL Generation
    raw_sql: Optional[str]
    validated_sql: Optional[str]
    
    # Execution Results
    execution_result: Optional[Dict]
    forecast_result: Optional[Dict]
    
    # Final Output
    final_answer: Optional[str]
    error: Optional[str]
    
    # Routing - PERBAIKAN: gunakan string untuk node names
    next_node: Optional[str]

In [19]:

# %% [markdown]
# ## üé≠ 11. LangGraph Nodes Definition

# %%
# ====================
# LANGGRAPH NODES
# ====================

def router_node_fixed(state: AgentState) -> AgentState:
    """Node 1: Router - Deteksi intent user - PERBAIKAN"""
    logger.log("NODE_ENTER", {"node": "router", "input": state["user_input"]})
    
    user_input = state["user_input"].lower()
    
    # Intent detection rules
    forecast_keywords = ["prediksi", "forecast", "ramal", "estimasi", "proyeksi", "perkiraan"]
    sql_keywords = ["tampilkan", "lihat", "berapa", "total", "jumlah", "select", "where", "data"]
    
    if any(keyword in user_input for keyword in forecast_keywords):
        intent = "forecast"
    elif any(keyword in user_input for keyword in sql_keywords):
        intent = "sql"
    else:
        intent = "clarify"
    
    # Update state
    state["intent"] = intent
    state["next_node"] = "metadata_retriever"
    
    logger.log("ROUTER_DECISION", {
        "intent": intent,
        "keywords_found": {
            "forecast": [k for k in forecast_keywords if k in user_input],
            "sql": [k for k in sql_keywords if k in user_input]
        }
    })
    
    return state


def metadata_retriever_node_fixed(state: AgentState) -> AgentState:
    """Node 2: Retrieve relevant metadata - PERBAIKAN"""
    logger.log("NODE_ENTER", {"node": "metadata_retriever"})
    
    # Find relevant tables
    relevant_tables = metadata_manager.find_relevant_tables(
        state["user_input"], 
        top_k=3
    )
    
    state["relevant_tables"] = relevant_tables
    
    if not relevant_tables:
        state["needs_clarification"] = True
        state["clarification_question"] = (
            "Saya tidak menemukan tabel yang relevan dengan pertanyaan Anda. "
            "Bisa Anda jelaskan dengan kata kunci yang berbeda?"
        )
        state["next_node"] = "clarify_agent"
    elif len(relevant_tables) == 1:
        # Auto-select jika hanya ada 1
        state["selected_table"] = relevant_tables[0]["table_name"]
        state["table_metadata"] = relevant_tables[0]["metadata"]
        state["next_node"] = "planner"
    else:
        # Multiple tables found, need clarification
        table_options = "\n".join([
            f"{i+1}. {table['table_name']} (relevance: {table['relevance_score']:.1f})"
            for i, table in enumerate(relevant_tables)
        ])
        
        state["needs_clarification"] = True
        state["clarification_question"] = (
            f"Saya menemukan beberapa tabel yang relevan:\n{table_options}\n\n"
            f"Tabel mana yang Anda maksud? (sebutkan nomor 1-{len(relevant_tables)})"
        )
        state["next_node"] = "clarify_agent"
    
    logger.log("METADATA_RETRIEVAL_RESULT", {
        "tables_found": len(relevant_tables),
        "table_names": [t["table_name"] for t in relevant_tables],
        "needs_clarification": state.get("needs_clarification", False)
    })
    
    return state


def planner_node(state: AgentState) -> AgentState:
    """Node 3: Planner - Tentukan langkah berikutnya berdasarkan intent"""
    logger.log("NODE_ENTER", {"node": "planner"})
    
    intent = state.get("intent", "sql")
    
    if intent == "forecast":
        state["next_node"] = "forecast_agent"
    elif intent == "sql":
        state["next_node"] = "sql_agent"
    else:  # clarify
        state["next_node"] = "clarify_agent"
    
    logger.log("PLANNER_DECISION", {
        "intent": intent,
        "next_node": state["next_node"],
        "selected_table": state.get("selected_table")
    })
    
    return state


def sql_agent_node(state: AgentState) -> AgentState:
    """Node 4: SQL Agent - Generate dan validasi SQL query"""
    logger.log("NODE_ENTER", {"node": "sql_agent"})
    
    if not state.get("selected_table"):
        state["error"] = "Tidak ada tabel yang dipilih"
        state["next_node"] = "error_handler"
        return state
    
    # Prepare prompt untuk SQL generation
    table_info = next(
        (t for t in state["relevant_tables"] if t["table_name"] == state["selected_table"]),
        None
    )
    
    if not table_info:
        state["error"] = f"Table {state['selected_table']} not found in relevant tables"
        state["next_node"] = "error_handler"
        return state
    
    # Build prompt
    schema_text = metadata_manager.build_schema_prompt(table_info)
    
    prompt = f"""
    Anda adalah ahli SQL untuk database SQLite. Generate HANYA kode SQL berdasarkan instruksi berikut.
    
    INFORMASI TABEL:
    {schema_text}
    
    ATURAN PENTING:
    1. User memiliki akses hanya ke region: "{state['user_context']['region']}"
    2. Leveldata user: "{state['user_context']['leveldata']}"
    3. Jika tabel memiliki 'access_column', HARUS tambahkan filter WHERE untuk region user.
    4. Hanya gunakan SELECT statement.
    5. Batasi hasil dengan LIMIT 100 jika query bisa return banyak data.
    6. Return HANYA kode SQL, tanpa penjelasan apapun.
    
    PERTANYAAN USER: {state['user_input']}
    
    SQL QUERY:
    """
    
    # Call SQL LLM
    logger.log("LLM_CALL_START", {
        "model": "qwen-coder",
        "purpose": "SQL generation",
        "table": state["selected_table"]
    })
    
    try:
        response = sql_llm.invoke(prompt)
        raw_sql = response.content.strip().strip("`")
        
        # Log the LLM call
        logger.log_llm_call("qwen-coder", prompt, raw_sql)
        
        # Validate SQL
        validation = SQLValidator.validate_sql(raw_sql)
        
        if not validation["is_valid"]:
            state["error"] = f"SQL validation failed: {validation['reason']}"
            state["next_node"] = "error_handler"
            return state
        
        # Inject region filter jika diperlukan
        access_column = table_info["metadata"].get("access_column")
        if access_column:
            raw_sql = SQLValidator.inject_region_filter(
                raw_sql, 
                access_column, 
                state["user_context"]["region"]
            )
        
        # Add LIMIT jika tidak ada
        raw_sql = SQLValidator.add_limit_if_missing(raw_sql)
        
        state["raw_sql"] = raw_sql
        state["validated_sql"] = raw_sql
        state["next_node"] = "sql_executor"
        
        logger.log("SQL_GENERATION_SUCCESS", {
            "generated_sql": raw_sql[:200] + "..." if len(raw_sql) > 200 else raw_sql,
            "table": state["selected_table"]
        }, level="SUCCESS")
        
    except Exception as e:
        state["error"] = f"SQL generation error: {str(e)}"
        state["next_node"] = "error_handler"
        logger.log("SQL_GENERATION_ERROR", {
            "error": str(e),
            "table": state["selected_table"]
        }, level="ERROR")
    
    return state


def sql_executor_node(state: AgentState) -> AgentState:
    """Node 5: Execute SQL query"""
    logger.log("NODE_ENTER", {"node": "sql_executor"})
    
    if not state.get("validated_sql"):
        state["error"] = "Tidak ada SQL query untuk dieksekusi"
        state["next_node"] = "error_handler"
        return state
    
    try:
        # Execute SQL
        result = sql_executor.execute(state["validated_sql"])
        
        if result["success"]:
            state["execution_result"] = result
            state["next_node"] = "response_formatter"
            
            logger.log("SQL_EXECUTION_SUCCESS", {
                "row_count": result["row_count"],
                "columns": result["columns"][:5] if result["columns"] else [],
                "sql_preview": state["validated_sql"][:100]
            }, level="SUCCESS")
        else:
            state["error"] = result["error"]
            state["next_node"] = "error_handler"
            
            logger.log("SQL_EXECUTION_ERROR", {
                "error": result["error"],
                "sql": state["validated_sql"][:100]
            }, level="ERROR")
            
    except Exception as e:
        state["error"] = f"Execution error: {str(e)}"
        state["next_node"] = "error_handler"
        logger.log("SQL_EXECUTION_EXCEPTION", {
            "error": str(e),
            "sql": state["validated_sql"][:100] if state.get("validated_sql") else "None"
        }, level="ERROR")
    
    return state


def forecast_agent_node(state: AgentState) -> AgentState:
    """Node 6: Forecast Agent - Lakukan forecasting"""
    logger.log("NODE_ENTER", {"node": "forecast_agent"})
    
    if not state.get("selected_table"):
        state["error"] = "Tidak ada tabel yang dipilih untuk forecasting"
        state["next_node"] = "error_handler"
        return state
    
    try:
        # Get table metadata
        table_metadata = state.get("table_metadata")
        if not table_metadata:
            # Load metadata jika belum ada
            table_metadata = metadata_manager.get_table_metadata(state["selected_table"])
            state["table_metadata"] = table_metadata
        
        if not table_metadata:
            state["error"] = f"Tidak dapat menemukan metadata untuk tabel {state['selected_table']}"
            state["next_node"] = "error_handler"
            return state
        
        # Perform forecast
        forecast_result = forecast_agent.forecast(
            table_name=state["selected_table"],
            metadata=table_metadata,
            region=state["user_context"]["region"]
        )
        
        if forecast_result["success"]:
            state["forecast_result"] = forecast_result
            state["next_node"] = "response_formatter"
            
            logger.log("FORECAST_AGENT_SUCCESS", {
                "table": state["selected_table"],
                "data_points": forecast_result["forecast"]["data_points"],
                "periods": forecast_result["forecast"]["forecast_periods"]
            }, level="SUCCESS")
        else:
            state["error"] = forecast_result.get("error", "Forecast failed")
            state["next_node"] = "error_handler"
            
            logger.log("FORECAST_AGENT_ERROR", {
                "error": forecast_result.get("error", "Unknown error"),
                "table": state["selected_table"]
            }, level="ERROR")
            
    except Exception as e:
        state["error"] = f"Forecast agent error: {str(e)}"
        state["next_node"] = "error_handler"
        logger.log("FORECAST_AGENT_EXCEPTION", {
            "error": str(e),
            "table": state.get("selected_table", "Unknown")
        }, level="ERROR")
    
    return state


def clarify_agent_node_fixed(state: AgentState) -> AgentState:
    """Node 7: Clarify Agent - PERBAIKAN dengan handling yang benar"""
    logger.log("NODE_ENTER", {"node": "clarify_agent"})
    
    # Jika ada clarification response dari user sebelumnya
    if state.get("clarification_response"):
        response = state["clarification_response"].strip()
        
        # Try to parse as table selection number
        if response.isdigit():
            idx = int(response) - 1
            if 0 <= idx < len(state["relevant_tables"]):
                selected_table = state["relevant_tables"][idx]
                state["selected_table"] = selected_table["table_name"]
                state["table_metadata"] = selected_table["metadata"]
                state["needs_clarification"] = False
                state["clarification_question"] = None
                state["clarification_response"] = None
                state["next_node"] = "planner"
                
                logger.log("CLARIFICATION_RESOLVED", {
                    "user_response": response,
                    "selected_table": selected_table["table_name"]
                })
                
                return state
        else:
            # Jika bukan angka, anggap sebagai nama tabel
            for table in state["relevant_tables"]:
                if response.lower() in table["table_name"].lower():
                    state["selected_table"] = table["table_name"]
                    state["table_metadata"] = table["metadata"]
                    state["needs_clarification"] = False
                    state["clarification_question"] = None
                    state["clarification_response"] = None
                    state["next_node"] = "planner"
                    return state
    
    # Jika butuh klarifikasi dan belum ada response
    if state.get("needs_clarification") and state.get("clarification_question"):
        # Set final answer dengan clarification question
        state["final_answer"] = state["clarification_question"]
        state["next_node"] = "end"  # PERBAIKAN: arahkan ke node "end", bukan END
        
        logger.log("CLARIFICATION_REQUESTED", {
            "question": state["clarification_question"][:100],
            "relevant_tables": [t["table_name"] for t in state.get("relevant_tables", [])]
        })
    
    else:
        # General clarification (untuk query yang sangat ambigu)
        prompt = f"User bertanya: {state['user_input']}. Pertanyaan ini kurang jelas. Buatkan pertanyaan klarifikasi yang singkat."
        
        try:
            response = user_llm.invoke(prompt)
            clarification = response.content.strip()
            
            state["final_answer"] = f"Klarifikasi: {clarification}"
            state["next_node"] = "end"
            
            logger.log_llm_call("qwen2.5:7b", prompt, clarification)
            
        except Exception as e:
            state["error"] = f"Clarification agent error: {str(e)}"
            state["next_node"] = "error_handler"
    
    return state

def end_node(state: AgentState) -> AgentState:
    """Node akhir untuk mengakhiri workflow dengan clean"""
    logger.log("NODE_ENTER", {"node": "end"})
    logger.log("WORKFLOW_COMPLETE", {
        "final_answer": state.get("final_answer", "No answer")[:200],
        "user_input": state.get("user_input", "No input")
    })
    return state

def response_formatter_node(state: AgentState) -> AgentState:
    """Node 8: Format response untuk user"""
    logger.log("NODE_ENTER", {"node": "response_formatter"})
    
    try:
        # Format berdasarkan tipe hasil
        if state.get("execution_result"):
            # SQL Query Result
            result = state["execution_result"]
            df = result["data"]
            
            if df.empty:
                response = f"Query berhasil dieksekusi tetapi tidak ada data yang ditemukan."
            else:
                # Format data sebagai tabel sederhana
                response_lines = [
                    f"‚úÖ **HASIL QUERY**",
                    f"Tabel: {state.get('selected_table', 'Unknown')}",
                    f"Jumlah baris: {len(df)}",
                    f"\n**Data (5 baris pertama):**",
                    df.head().to_string(index=False),
                    f"\n**Query SQL:**",
                    f"```sql\n{state.get('validated_sql', 'N/A')}\n```",
                    f"\nüìç **Kontek Akses:**",
                    f"Region: {state['user_context']['region']}",
                    f"Leveldata: {state['user_context']['leveldata']}"
                ]
                
                response = "\n".join(response_lines)
        
        elif state.get("forecast_result"):
            # Forecast Result
            forecast_data = state["forecast_result"]["forecast"]
            
            response_lines = [
                f"üìà **HASIL FORECASTING**",
                f"Tabel: {forecast_data['table_name']}",
                f"Data points: {forecast_data['data_points']}",
                f"Periode data: {forecast_data['date_range']['start']} hingga {forecast_data['date_range']['end']}",
                f"Nilai terakhir: {forecast_data['last_value']:,.2f}",
                f"\n**Prediksi 3 periode berikutnya:**"
            ]
            
            for i, pred in enumerate(forecast_data["predictions"]):
                response_lines.append(
                    f"{i+1}. {pred['period']}: {pred['prediction']:,.2f} "
                    f"(interval: {pred['confidence_low']:,.2f} - {pred['confidence_high']:,.2f})"
                )
            
            response_lines.extend([
                f"\n**Metadata:**",
                f"Kolom tanggal: {forecast_data['metadata']['date_column']}",
                f"Kolom nilai: {forecast_data['metadata']['value_column']}",
                f"Confidence: {forecast_data['metadata']['confidence']}",
                f"\nüìç **Kontek Akses:**",
                f"Region: {state['user_context']['region']}",
                f"Leveldata: {state['user_context']['leveldata']}"
            ])
            
            response = "\n".join(response_lines)
        
        else:
            response = "Tidak ada hasil yang dapat ditampilkan."
        
        state["final_answer"] = response
        state["next_node"] = END
        
        logger.log("RESPONSE_FORMATTED", {
            "response_type": "sql" if state.get("execution_result") else "forecast" if state.get("forecast_result") else "unknown",
            "response_length": len(response)
        }, level="SUCCESS")
        
    except Exception as e:
        state["error"] = f"Response formatting error: {str(e)}"
        state["next_node"] = "error_handler"
        logger.log("RESPONSE_FORMAT_ERROR", {
            "error": str(e)
        }, level="ERROR")
    
    return state


def error_handler_node(state: AgentState) -> AgentState:
    """Node 9: Handle errors gracefully"""
    logger.log("NODE_ENTER", {"node": "error_handler"})
    
    error_msg = state.get("error", "Terjadi kesalahan yang tidak diketahui")
    
    # Format error response
    error_response = f"""
    ‚ö†Ô∏è **SISTEM ERROR**
    
    Pesan error: {error_msg}
    
    **Informasi Debug:**
    - Input user: {state.get('user_input', 'N/A')}
    - Tabel yang dipilih: {state.get('selected_table', 'Tidak ada')}
    - Intent: {state.get('intent', 'N/A')}
    
    **Saran:**
    1. Coba tanyakan dengan format yang lebih sederhana
    2. Pastikan tabel dan kolom yang dimaksud ada dalam database
    3. Hubungi administrator jika error terus berlanjut
    """
    
    state["final_answer"] = error_response
    state["next_node"] = END
    
    logger.log("ERROR_HANDLED", {
        "error": error_msg,
        "user_input": state.get("user_input"),
        "final_answer_preview": error_response[:100]
    }, level="ERROR")
    
    return state

def route_after_router_fixed(state: AgentState) -> str:
    """Routing setelah router node - PERBAIKAN"""
    return state.get("next_node", "metadata_retriever")

def route_after_metadata_fixed(state: AgentState) -> str:
    """Routing setelah metadata retriever - PERBAIKAN"""
    if state.get("needs_clarification", False):
        return "clarify_agent"
    return "planner"

def route_after_planner_fixed(state: AgentState) -> str:
    """Routing setelah planner - PERBAIKAN"""
    return state.get("next_node", "clarify_agent")

def route_after_clarify_fixed(state: AgentState) -> str:
    """Routing setelah clarify agent - PERBAIKAN"""
    return state.get("next_node", "end")



In [21]:

# %% [markdown]
# ## üèóÔ∏è 12. Build LangGraph Workflow

# %%
# ============================
# BUILD LANGGRAPH WORKFLOW
# ============================

def build_fixed_workflow() -> StateGraph:
    """Bangun workflow dengan semua perbaikan"""
    
    print("üî® Membangun LangGraph workflow (FIXED)...")
    
    # Initialize workflow
    workflow = StateGraph(AgentState)
    
    # Add semua nodes dengan versi fixed
    nodes = [
        ("router", router_node_fixed),
        ("metadata_retriever", metadata_retriever_node_fixed),
        ("planner", planner_node),  # node ini ok
        ("sql_agent", sql_agent_node),  # node ini ok
        ("sql_executor", sql_executor_node),  # node ini ok
        ("forecast_agent", forecast_agent_node),  # node ini ok
        ("clarify_agent", clarify_agent_node_fixed),
        ("response_formatter", response_formatter_node),  # node ini ok
        ("error_handler", error_handler_node),  # node ini ok
        ("end", end_node)  # Tambahkan node end
    ]
    
    for node_name, node_func in nodes:
        workflow.add_node(node_name, node_func)
        print(f"  ‚úÖ Added node: {node_name}")
    
    # Set entry point
    workflow.set_entry_point("router")
    print(f"  ‚úÖ Entry point: router")
    
    # Add conditional edges dengan fungsi fixed
    workflow.add_conditional_edges(
        "router",
        route_after_router_fixed,
        {
            "metadata_retriever": "metadata_retriever",
            "clarify_agent": "clarify_agent",
            "error_handler": "error_handler"
        }
    )
    
    workflow.add_conditional_edges(
        "metadata_retriever",
        route_after_metadata_fixed,
        {
            "planner": "planner",
            "clarify_agent": "clarify_agent",
            "error_handler": "error_handler"
        }
    )
    
    workflow.add_conditional_edges(
        "planner",
        route_after_planner_fixed,
        {
            "sql_agent": "sql_agent",
            "forecast_agent": "forecast_agent",
            "clarify_agent": "clarify_agent",
            "error_handler": "error_handler"
        }
    )
    
    workflow.add_conditional_edges(
        "clarify_agent",
        route_after_clarify_fixed,
        {
            "planner": "planner",
            "end": "end",
            "error_handler": "error_handler"
        }
    )
    
    # Add fixed edges
    workflow.add_edge("sql_agent", "sql_executor")
    workflow.add_edge("sql_executor", "response_formatter")
    workflow.add_edge("forecast_agent", "response_formatter")
    workflow.add_edge("response_formatter", "end")
    workflow.add_edge("error_handler", "end")
    workflow.add_edge("end", END)  # Dari node end ke END LangGraph
    
    # Compile graph
    compiled_graph = workflow.compile()
    
    print("üéâ Workflow (FIXED) berhasil dikompilasi!")
    
    return compiled_graph

# Rebuild workflow dengan perbaikan
agent_workflow_fixed = build_fixed_workflow()

üî® Membangun LangGraph workflow (FIXED)...
  ‚úÖ Added node: router
  ‚úÖ Added node: metadata_retriever
  ‚úÖ Added node: planner
  ‚úÖ Added node: sql_agent
  ‚úÖ Added node: sql_executor
  ‚úÖ Added node: forecast_agent
  ‚úÖ Added node: clarify_agent
  ‚úÖ Added node: response_formatter
  ‚úÖ Added node: error_handler
  ‚úÖ Added node: end
  ‚úÖ Entry point: router
üéâ Workflow (FIXED) berhasil dikompilasi!


In [23]:

# %% [markdown]
# ## üß™ 13. Testing System

# %%
def test_fixed_system(test_cases: List[Dict]):
    """Test sistem yang sudah diperbaiki"""
    
    print("\n" + "="*60)
    print("üß™ TESTING FIXED AGENTIC AI SYSTEM")
    print("="*60)
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n{'='*60}")
        print(f"TEST {i}: {test_case['name']}")
        print(f"{'='*60}")
        
        # Log user input
        logger.log_user_input(test_case["query"], USER_CONTEXT)
        
        # Prepare initial state
        initial_state = AgentState(
            user_input=test_case["query"],
            user_context=USER_CONTEXT,
            messages=[],
            intent=None,
            needs_clarification=False,
            clarification_question=None,
            clarification_response=None,  # Tambahkan ini
            relevant_tables=[],
            selected_table=None,
            table_metadata=None,
            raw_sql=None,
            validated_sql=None,
            execution_result=None,
            forecast_result=None,
            final_answer=None,
            error=None,
            next_node=None
        )
        
        try:
            # Execute workflow
            print(f"ü§î Query: '{test_case['query']}'")
            print(f"üìç Context: {USER_CONTEXT}")
            print(f"\nüîÑ Memproses...")
            
            result_state = agent_workflow_fixed.invoke(initial_state)
            
            # Display results
            print(f"\n‚úÖ PROSES SELESAI")
            print(f"{'-'*40}")
            
            if result_state.get("error"):
                print(f"‚ùå Error: {result_state['error']}")
            
            if result_state.get("final_answer"):
                print(f"\nü§ñ RESPONSE SYSTEM:")
                print(f"{'-'*40}")
                print(result_state["final_answer"])
            
            # Show metadata if available
            if result_state.get("relevant_tables"):
                print(f"\nüìä Metadata ditemukan:")
                for table in result_state["relevant_tables"][:3]:
                    print(f"  ‚Ä¢ {table['table_name']} (score: {table['relevance_score']:.1f})")
            
            # Show SQL if generated
            if result_state.get("validated_sql"):
                print(f"\nüíæ SQL Query:")
                print(f"  {result_state['validated_sql'][:200]}...")
            
            # Log test completion
            logger.log("TEST_COMPLETE", {
                "test_name": test_case["name"],
                "query": test_case["query"],
                "success": result_state.get("error") is None,
                "error": result_state.get("error"),
                "result_type": "sql" if result_state.get("execution_result") else "forecast" if result_state.get("forecast_result") else "clarify"
            }, level="SUCCESS" if result_state.get("error") is None else "ERROR")
            
        except Exception as e:
            print(f"\n‚ùå TEST GAGAL: {str(e)}")
            import traceback
            traceback.print_exc()
            logger.log("TEST_FAILED", {
                "test_name": test_case["name"],
                "error": str(e),
                "traceback": traceback.format_exc(),
                "query": test_case["query"]
            }, level="ERROR")
        
        print(f"\nüìù Logs tersimpan di: {LOG_DIR}")
        print(f"{'='*60}")

# Define test cases
test_cases_fixed = [
    {
        "name": "SQL Query - Basic",
        "query": "Tampilkan data tahun 2023"
    },
    {
        "name": "SQL Query - With Region Context",
        "query": "Berapa jumlah pengguna di Jawa Barat?"
    },
    {
        "name": "Forecast Query",
        "query": "Prediksi untuk 3 bulan ke depan"
    },
    {
        "name": "Vague Query (Should Clarify)",
        "query": "Data ekonomi"
    }
]

# Run tests dengan sistem yang sudah diperbaiki
test_fixed_system(test_cases_fixed)



üß™ TESTING FIXED AGENTIC AI SYSTEM

TEST 1: SQL Query - Basic
[94m[INFO] USER_INPUT: User query: Tampilkan data tahun 2023[0m
ü§î Query: 'Tampilkan data tahun 2023'
üìç Context: {'leveldata': '2_KABUPATEN_JAWA_BARAT', 'region': 'RM III JABAR'}

üîÑ Memproses...
[94m[INFO] NODE_ENTER: [0m
[94m[INFO] ROUTER_DECISION: [0m
[94m[INFO] NODE_ENTER: [0m
[94m[INFO] METADATA_RETRIEVAL: [0m
[94m[INFO] METADATA_RETRIEVAL_RESULT: [0m
[94m[INFO] NODE_ENTER: [0m
[94m[INFO] CLARIFICATION_REQUESTED: [0m
[94m[INFO] NODE_ENTER: [0m
[94m[INFO] WORKFLOW_COMPLETE: [0m

‚úÖ PROSES SELESAI
----------------------------------------

ü§ñ RESPONSE SYSTEM:
----------------------------------------
Saya menemukan beberapa tabel yang relevan:
1. ref_mkt_bps_pengeluaran_per_kapita (relevance: 8.0)
2. ref_mkt_bps_jumlah_penduduk_by_usia (relevance: 8.0)
3. ref_mkt_bps_jumlah_ibuhamil (relevance: 8.0)

Tabel mana yang Anda maksud? (sebutkan nomor 1-3)

üìä Metadata ditemukan:
  ‚Ä¢ ref_mkt_bp

In [24]:
def analyze_logs():
    """Analisis log file untuk debugging"""
    
    log_files = list(LOG_DIR.glob("*.jsonl"))
    if not log_files:
        print("‚ùå Tidak ada log files ditemukan")
        return
    
    latest_log = max(log_files, key=lambda x: x.stat().st_mtime)
    print(f"\nüìä ANALYZING LOG: {latest_log.name}")
    print(f"{'='*60}")
    
    with open(latest_log, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    
    errors = []
    warnings = []
    infos = []
    
    for line in lines:
        try:
            entry = json.loads(line)
            if entry.get("level") == "ERROR":
                errors.append(entry)
            elif entry.get("level") == "WARNING":
                warnings.append(entry)
            else:
                infos.append(entry)
        except:
            continue
    
    print(f"üìà Log Statistics:")
    print(f"  Total entries: {len(lines)}")
    print(f"  Errors: {len(errors)}")
    print(f"  Warnings: {len(warnings)}")
    print(f"  Info: {len(infos)}")
    
    if errors:
        print(f"\n‚ùå ERRORS (last 5):")
        for error in errors[-5:]:
            print(f"  [{error.get('timestamp', '')}] {error.get('event_type', '')}")
            print(f"     Message: {error.get('message', 'No message')}")
            if error.get('error'):
                print(f"     Error: {error.get('error')}")
    
    if warnings:
        print(f"\n‚ö†Ô∏è  WARNINGS (last 3):")
        for warning in warnings[-3:]:
            print(f"  [{warning.get('timestamp', '')}] {warning.get('event_type', '')}")
            print(f"     Message: {warning.get('message', 'No message')}")
    
    # Cari entry tentang metadata
    print(f"\nüîç METADATA RETRIEVAL ENTRIES:")
    for line in lines[-20:]:  # Last 20 entries
        try:
            entry = json.loads(line)
            if "metadata" in entry.get("event_type", "").lower():
                print(f"  [{entry.get('timestamp', '')}] {entry.get('event_type', '')}")
                if entry.get("data"):
                    data = entry.get("data", {})
                    if "tables_found" in data:
                        print(f"     Tables found: {data.get('tables_found')}")
                        if data.get("table_names"):
                            print(f"     Table names: {data.get('table_names')}")
        except:
            continue
    
    print(f"{'='*60}")

# Analisis logs
analyze_logs()


üìä ANALYZING LOG: audit_20251212.jsonl
üìà Log Statistics:
  Total entries: 144
  Errors: 8
  Info: 136

‚ùå ERRORS (last 5):
  [2025-12-12T07:39:45.995635] TEST_FAILED
     Message: No message
     Error: '__end__'
  [2025-12-12T07:41:18.503663] TEST_FAILED
     Message: No message
     Error: '__end__'
  [2025-12-12T07:41:18.507041] TEST_FAILED
     Message: No message
     Error: '__end__'
  [2025-12-12T07:41:18.510592] TEST_FAILED
     Message: No message
     Error: '__end__'
  [2025-12-12T07:41:18.513338] TEST_FAILED
     Message: No message
     Error: '__end__'

üîç METADATA RETRIEVAL ENTRIES:
  [2025-12-12T07:58:12.958279] METADATA_RETRIEVAL
  [2025-12-12T07:58:12.958420] METADATA_RETRIEVAL_RESULT
  [2025-12-12T07:58:12.961823] METADATA_RETRIEVAL
  [2025-12-12T07:58:12.961948] METADATA_RETRIEVAL_RESULT


In [25]:
def check_metadata_issue():
    """Cek dan fix metadata issue"""
    
    print("\nüîß CHECKING METADATA ISSUE")
    print(f"{'='*60}")
    
    # Cek folder metadata
    metadata_files = list(METADATA_DIR.glob("*.json"))
    print(f"Metadata files found: {len(metadata_files)}")
    
    for file in metadata_files:
        print(f"  ‚Ä¢ {file.name}")
    
    # Load metadata
    metadata = metadata_manager.load_all_metadata()
    print(f"\nMetadata loaded: {len(metadata)} tables")
    
    # Cek isi metadata
    for table_name, meta in metadata.items():
        print(f"\nüìã Table: {table_name}")
        print(f"   Description: {meta.get('description', 'No description')[:50]}...")
        print(f"   Columns: {list(meta.get('columns', {}).keys())[:5]}...")
        print(f"   Access column: {meta.get('access_column', 'None')}")
    
    # Test metadata retrieval
    test_queries = [
        "Tampilkan data tahun 2023",
        "Berapa jumlah pengguna",
        "Prediksi untuk 3 bulan ke depan"
    ]
    
    print(f"\nüß™ TESTING METADATA RETRIEVAL")
    for query in test_queries:
        tables = metadata_manager.find_relevant_tables(query, top_k=2)
        print(f"\nQuery: '{query}'")
        print(f"  Relevant tables: {len(tables)}")
        for table in tables:
            print(f"    ‚Ä¢ {table['table_name']} (score: {table['relevance_score']:.1f})")
    
    print(f"\n{'='*60}")
    
    # Jika tidak ada metadata, buat sample
    if not metadata_files:
        print("‚ö†Ô∏è  Tidak ada metadata files. Creating sample...")
        advanced = AdvancedFeatures()
        advanced.create_sample_metadata()
        print("‚úÖ Sample metadata created. Please restart testing.")

# Cek metadata issue
check_metadata_issue()



üîß CHECKING METADATA ISSUE
Metadata files found: 26
  ‚Ä¢ ref_mkt_bps_pengeluaran_per_kapita.json
  ‚Ä¢ ref_mkt_seki_investasi.json
  ‚Ä¢ ref_mkt_bps_jumlah_penduduk_by_usia.json
  ‚Ä¢ ref_mkt_seki_interest.json
  ‚Ä¢ ref_mkt_bps_jumlah_ibuhamil.json
  ‚Ä¢ ref_mkt_seki_pdb.json
  ‚Ä¢ ref_mkt_seki_indonesia_ringkasan.json
  ‚Ä¢ ref_mkt_seki_indeks_harga.json
  ‚Ä¢ ref_mkt_bps_persentase_bayi_asi_eksklusif.json
  ‚Ä¢ ref_mkt_bps_umr.json
  ‚Ä¢ ref_mkt_seki_export_import.json
  ‚Ä¢ ref_mkt_bps_jumlah_penduduk.json
  ‚Ä¢ ref_mkt_seki_inflasi.json
  ‚Ä¢ ref_mkt_bps_jumlah_pns.json
  ‚Ä¢ ref_mkt_seki_pareto_terpisah.json
  ‚Ä¢ ref_mkt_bps_produk_domestik_reg_bruto.json
  ‚Ä¢ ref_mkt_bps_angka_kelahiran.json
  ‚Ä¢ ref_mkt_seki_transaksi_berjalan_internasional.json
  ‚Ä¢ ref_mkt_seki_savings.json
  ‚Ä¢ ref_mkt_bps_jumlah_balita.json
  ‚Ä¢ ref_mkt_seki_exchange.json
  ‚Ä¢ ref_mkt_bps_inflasi_nasional.json
  ‚Ä¢ ref_mkt_bps_jumlah_tenaga_kesehatan.json
  ‚Ä¢ ref_mkt_seki_ihk.json
  ‚Ä¢ ref_mk

In [27]:

# %% [markdown]
# ## üéÆ 14. Interactive Mode

# %%
def quick_interactive_test():
    """Testing cepat dengan input manual"""
    
    print("\n" + "="*60)
    print("üöÄ QUICK INTERACTIVE TEST")
    print("="*60)
    print("Testing dengan input langsung (bypass metadata retrieval)")
    
    # Contoh state dengan tabel yang sudah diketahui
    sample_state = AgentState(
        user_input="Tampilkan data tahun 2025",
        user_context=USER_CONTEXT,
        messages=[],
        intent="sql",
        needs_clarification=False,
        clarification_question=None,
        clarification_response=None,
        relevant_tables=[
            {
                "table_name": "tabel_devisa",
                "metadata": {
                    "description": "Tabel data devisa per region per tahun",
                    "columns": {
                        "tahun": {"type": "INTEGER", "description": "Tahun data"},
                        "region": {"type": "TEXT", "description": "Nama region"},
                        "devisa": {"type": "FLOAT", "description": "Jumlah devisa"}
                    },
                    "access_column": "region",
                    "example_rows": []
                },
                "relevance_score": 0.95
            }
        ],
        selected_table="tabel_devisa",
        table_metadata={
            "description": "Tabel data devisa per region per tahun",
            "columns": {
                "tahun": {"type": "INTEGER", "description": "Tahun data"},
                "region": {"type": "TEXT", "description": "Nama region"},
                "devisa": {"type": "FLOAT", "description": "Jumlah devisa"}
            },
            "access_column": "region",
            "example_rows": []
        },
        raw_sql=None,
        validated_sql=None,
        execution_result=None,
        forecast_result=None,
        final_answer=None,
        error=None,
        next_node="planner"
    )
    
    try:
        print(f"\nü§î Query: '{sample_state['user_input']}'")
        print(f"üìç Selected table: {sample_state['selected_table']}")
        
        # Jalankan dari planner
        result = agent_workflow_fixed.invoke(sample_state)
        
        print(f"\n‚úÖ PROSES SELESAI")
        print(f"{'-'*40}")
        
        if result.get("final_answer"):
            print(f"ü§ñ RESPONSE:")
            print(result["final_answer"])
        elif result.get("error"):
            print(f"‚ùå Error: {result['error']}")
        else:
            print("‚ö†Ô∏è  No response generated")
            
    except Exception as e:
        print(f"\n‚ùå ERROR: {str(e)}")
        import traceback
        traceback.print_exc()

# Jalankan quick test
quick_interactive_test()


üöÄ QUICK INTERACTIVE TEST
Testing dengan input langsung (bypass metadata retrieval)

ü§î Query: 'Tampilkan data tahun 2025'
üìç Selected table: tabel_devisa
[94m[INFO] NODE_ENTER: [0m
[94m[INFO] ROUTER_DECISION: [0m
[94m[INFO] NODE_ENTER: [0m
[94m[INFO] METADATA_RETRIEVAL: [0m
[94m[INFO] METADATA_RETRIEVAL_RESULT: [0m
[94m[INFO] NODE_ENTER: [0m
[94m[INFO] CLARIFICATION_REQUESTED: [0m
[94m[INFO] NODE_ENTER: [0m
[94m[INFO] WORKFLOW_COMPLETE: [0m

‚úÖ PROSES SELESAI
----------------------------------------
ü§ñ RESPONSE:
Saya menemukan beberapa tabel yang relevan:
1. ref_mkt_bps_pengeluaran_per_kapita (relevance: 10.0)
2. ref_mkt_bps_jumlah_penduduk_by_usia (relevance: 10.0)
3. ref_mkt_bps_jumlah_ibuhamil (relevance: 10.0)

Tabel mana yang Anda maksud? (sebutkan nomor 1-3)


In [None]:


# %% [markdown]
# ## üìä 15. System Monitoring & Logs

# %%
def show_system_status():
    """Tampilkan status sistem dan logs"""
    
    print("\n" + "="*60)
    print("üìä SYSTEM STATUS")
    print("="*60)
    
    # Check database connection
    db_status = "‚úÖ Connected" if sql_executor.test_connection() else "‚ùå Disconnected"
    
    # Check metadata
    metadata = metadata_manager.load_all_metadata()
    metadata_status = f"‚úÖ {len(metadata)} tables loaded" if metadata else "‚ùå No metadata"
    
    # Check logs
    log_files = list(LOG_DIR.glob("*.jsonl"))
    if log_files:
        latest_log = max(log_files, key=lambda x: x.stat().st_mtime)
        log_size = latest_log.stat().st_size
        log_status = f"‚úÖ {len(log_files)} files, latest: {latest_log.name} ({log_size/1024:.1f} KB)"
    else:
        log_status = "‚ùå No log files"
    
    # Display status
    status_items = [
        ("Database", db_status),
        ("Metadata", metadata_status),
        ("Logs", log_status),
        ("LLM Models", "‚úÖ qwen2.5:7b & qwen-coder" if not isinstance(user_llm, type(sql_executor)) else "‚ùå Stub mode"),
        ("Session ID", logger.session_id)
    ]
    
    for item, status in status_items:
        print(f"{item:20} {status}")
    
    # Show recent activities
    if log_files:
        print(f"\nüìù RECENT ACTIVITIES:")
        latest_log = max(log_files, key=lambda x: x.stat().st_mtime)
        with open(latest_log, 'r', encoding='utf-8') as f:
            lines = f.readlines()[-10:]  # Last 10 entries
            for line in lines[-5:]:  # Show last 5
                try:
                    log_entry = json.loads(line)
                    time_str = log_entry['timestamp'][11:19]
                    event = log_entry['event_type']
                    message = log_entry.get('message', '')[:50]
                    level = log_entry.get('level', 'INFO')
                    
                    level_icons = {
                        "INFO": "‚ÑπÔ∏è",
                        "SUCCESS": "‚úÖ",
                        "WARNING": "‚ö†Ô∏è",
                        "ERROR": "‚ùå"
                    }
                    
                    icon = level_icons.get(level, "‚Ä¢")
                    print(f"  {icon} [{time_str}] {event}: {message}")
                except:
                    continue
    
    print(f"\nüí° Tips: Gunakan interactive_mode() untuk testing manual")
    print(f"       atau test_agent_system() untuk automated testing")
    print("="*60)

# Show system status
show_system_status()

# %% [markdown]
# ## üöÄ 16. Quick Start & Contoh Penggunaan

# %%
# Contoh penggunaan cepat
def quick_example():
    """Contoh penggunaan cepat sistem"""
    
    print("\n" + "="*60)
    print("üöÄ QUICK START EXAMPLE")
    print("="*60)
    
    # Contoh query 1: SQL
    query1 = "Tampilkan data tahun 2022"
    print(f"\n1. SQL Query: '{query1}'")
    
    state1 = AgentState(
        user_input=query1,
        user_context=USER_CONTEXT,
        messages=[],
        intent=None,
        needs_clarification=False,
        clarification_question=None,
        relevant_tables=[],
        selected_table=None,
        table_metadata=None,
        raw_sql=None,
        validated_sql=None,
        execution_result=None,
        forecast_result=None,
        final_answer=None,
        error=None,
        next_node=None
    )
    
    result1 = agent_workflow.invoke(state1)
    print(f"   Result: {'Success' if result1.get('final_answer') else 'Failed'}")
    if result1.get("final_answer"):
        print(f"   Preview: {result1['final_answer'][:100]}...")
    
    # Contoh query 2: Forecast
    query2 = "Prediksi untuk 3 bulan ke depan"
    print(f"\n2. Forecast Query: '{query2}'")
    
    state2 = AgentState(
        user_input=query2,
        user_context=USER_CONTEXT,
        messages=[],
        intent=None,
        needs_clarification=False,
        clarification_question=None,
        relevant_tables=[],
        selected_table=None,
        table_metadata=None,
        raw_sql=None,
        validated_sql=None,
        execution_result=None,
        forecast_result=None,
        final_answer=None,
        error=None,
        next_node=None
    )
    
    result2 = agent_workflow.invoke(state2)
    print(f"   Result: {'Success' if result2.get('final_answer') else 'Failed'}")
    if result2.get("final_answer"):
        print(f"   Preview: {result2['final_answer'][:100]}...")
    
    print(f"\nüìÅ Logs: {LOG_DIR}")
    print(f"üìä Untuk status lengkap: show_system_status()")
    print(f"üéÆ Untuk mode interaktif: interactive_mode()")
    print("="*60)

# Jalankan quick example
quick_example()

# %% [markdown]
# ## üìã 17. Checklist Implementasi

# %%
print("\n" + "="*60)
print("‚úÖ IMPLEMENTATION CHECKLIST")
print("="*60)

checklist_items = [
    ("LangGraph Workflow", "‚úÖ", "Multi-agent system dengan conditional routing"),
    ("Dual LLM Models", "‚úÖ", "qwen2.5:7b untuk umum, qwen-coder untuk SQL"),
    ("Metadata Management", "‚úÖ", "JSON-based tanpa ChromaDB"),
    ("SQL Validator & Security", "‚úÖ", "Validasi dan region enforcement"),
    ("Forecast Agent", "‚úÖ", "Linear regression forecasting"),
    ("Comprehensive Logging", "‚úÖ", "Structured logging ke file JSONL"),
    ("Region & Leveldata Enforcement", "‚úÖ", "Access control berdasarkan region"),
    ("Error Handling", "‚úÖ", "Graceful error handling dan user feedback"),
    ("Interactive Testing", "‚úÖ", "Notebook-based testing interface"),
    ("Database Connection", "‚ö†Ô∏è" if not sql_executor.test_connection() else "‚úÖ", 
     f"Database: {DB_PATH}"),
    ("Metadata Files", "‚ö†Ô∏è" if not list(METADATA_DIR.glob("*.json")) else "‚úÖ",
     f"Metadata directory: {METADATA_DIR}")
]

for item, status, description in checklist_items:
    print(f"{status} {item:30} {description}")

print("\nüìã NEXT STEPS:")
print("1. Tambahkan file metadata JSON ke folder 'metadata/'")
print("2. Siapkan database SQLite di 'database.db'")
print("3. Test dengan query spesifik domain Anda")
print("4. Tambahkan model forecasting yang lebih advanced jika perlu")
print("5. Implementasikan auth system untuk user context")
print("="*60)