In [None]:
import requests
import json
import time
from datetime import datetime
from requests.exceptions import ConnectionError, Timeout, RequestException

# --- C·∫§U H√åNH ---
API_URL = "http://127.0.0.1:8000/chat"
INPUT_FILE = "full_rag_test_suite.json"
OUTPUT_FILE = f"test_report.json"

# --- C·∫§U H√åNH T·ªêC ƒê·ªò & KI√äN TR√å ---
REQUEST_TIMEOUT = 120   # Ch·ªù server x·ª≠ l√Ω t·ªëi ƒëa 2 ph√∫t
MAX_RETRIES = 5         # S·ªë l·∫ßn th·ª≠ l·∫°i n·∫øu l·ªói k·∫øt n·ªëi
SAFE_DELAY = 6          # [QUAN TR·ªåNG] Ngh·ªâ c·ª©ng 6s sau m·ªói c√¢u th√†nh c√¥ng 
                        # (ƒê·∫£m b·∫£o t·ªëi ƒëa 10 request/ph√∫t -> R·∫•t an to√†n cho Gemini Flash)

def process_single_question(item):
    """
    G·ª≠i c√¢u h·ªèi v√† x·ª≠ l√Ω retry th√¥ng minh:
    - N·∫øu l·ªói k·∫øt n·ªëi: Retry sau 10s.
    - N·∫øu l·ªói qu√° t·∫£i (429): Retry sau 30-60s.
    - Ch·ªâ d·ª´ng khi th√†nh c√¥ng ho·∫∑c h·∫øt s·ªë l·∫ßn th·ª≠.
    """
    question = item["input_text"]
    image_input = item.get("input_image")
    payload = {"question": question}
    if image_input:
        payload["image_context"] = image_input

    retries = 0
    
    while retries < MAX_RETRIES:
        try:
            start_time = time.time()
            # G·ª≠i request (Blocking)
            response = requests.post(API_URL, json=payload, timeout=REQUEST_TIMEOUT)
            latency = round(time.time() - start_time, 2)

            # --- PH√ÇN T√çCH STATUS CODE ---
            
            # 1. Th√†nh c√¥ng (200) -> Tr·∫£ v·ªÅ ngay
            if response.status_code == 200:
                return response, latency, None
            
            # 2. B·ªã qu√° t·∫£i (429 Too Many Requests) ho·∫∑c Server qu√° b·∫≠n (503)
            # ƒê√¢y l√† l√∫c Gemini k√™u c·ª©u, c·∫ßn ngh·ªâ l√¢u
            elif response.status_code in [429, 503]:
                wait_time = 30 + (retries * 10) # TƒÉng d·∫ßn: 30s, 40s, 50s...
                print(f"      ‚ö†Ô∏è API Qu√° t·∫£i (HTTP {response.status_code}). ƒêang l√†m m√°t {wait_time}s...")
                time.sleep(wait_time)
                retries += 1
                continue # Th·ª≠ l·∫°i c√¢u n√†y

            # 3. C√°c l·ªói logic kh√°c (400, 500 internal code error)
            # V·∫´n tr·∫£ v·ªÅ ƒë·ªÉ ghi log, kh√¥ng retry (v√¨ retry kh·∫£ nƒÉng cao v·∫´n l·ªói code)
            else:
                return response, latency, f"HTTP_FAIL_{response.status_code}"

        except (ConnectionError, ConnectionRefusedError):
            print(f"      ‚ö†Ô∏è M·∫•t k·∫øt n·ªëi t·ªõi Server. ƒê·ª£i 10s k·∫øt n·ªëi l·∫°i...")
            time.sleep(10)
            retries += 1
            
        except Timeout:
            latency = round(time.time() - start_time, 2)
            print(f"      ‚ö†Ô∏è Timeout (Server treo > {REQUEST_TIMEOUT}s). B·ªè qua.")
            return None, latency, "TIMEOUT_120S"

        except Exception as e:
            print(f"      ‚ö†Ô∏è L·ªói l·∫°: {str(e)}")
            return None, 0, str(e)

    return None, 0, "SERVER_DOWN_OR_MAX_RETRIES"

def run_tests_safe_mode():
    print(f"üöÄ B·∫Øt ƒë·∫ßu ch·∫ø ƒë·ªô AN TO√ÄN (Gemini Flash Friendly)")
    print(f"‚è≥ C·∫•u h√¨nh: Timeout {REQUEST_TIMEOUT}s | Safe Delay {SAFE_DELAY}s")

    try:
        with open(INPUT_FILE, "r", encoding="utf-8") as f:
            dataset = json.load(f)
    except FileNotFoundError:
        print("‚ùå Kh√¥ng t√¨m th·∫•y file input.")
        return

    results = []
    
    print(f"{'ID':<4} | {'Category':<15} | {'Latency':<8} | {'Status'}")
    print("-" * 65)

    for idx, item in enumerate(dataset):
        q_id = idx + 1
        category_label = f"{item['category'][:3]}-{item['sub_category']}" 
        
        # --- B∆Ø·ªöC 1: X·ª¨ L√ù ---
        response, latency, error_msg = process_single_question(item)
        
        # --- B∆Ø·ªöC 2: GHI NH·∫¨N K·∫æT QU·∫¢ ---
        final_status = "UNKNOWN"
        actual_res = ""
        
        if error_msg:
            if "HTTP_FAIL" in error_msg and response:
                final_status = f"FAIL ({response.status_code})"
                actual_res = response.text
            else:
                final_status = error_msg
                actual_res = f"Error: {error_msg}"
        elif response:
            final_status = "DONE (200)"
            try:
                data = response.json()
                actual_res = data.get("answer", str(data))
            except:
                actual_res = response.text

        # In k·∫øt qu·∫£
        cat_print = (category_label[:13] + '..') if len(category_label) > 13 else category_label
        print(f"{q_id:<4} | {cat_print:<15} | {latency:<6}s  | {final_status}")

        results.append({
            "id": item["id"],
            "input": item["input_text"],
            "latency": latency,
            "status": final_status,
            "response": actual_res
        })

        # --- B∆Ø·ªöC 3: NGH·ªà GI·ªÆA HI·ªÜP (QUAN TR·ªåNG) ---
        # B·∫•t k·ªÉ c√¢u tr·∫£ l·ªùi nhanh hay ch·∫≠m, lu√¥n ngh·ªâ c·ª©ng SAFE_DELAY gi√¢y
        # ƒê·ªÉ ƒë·∫£m b·∫£o RPM (Request Per Minute) kh√¥ng v∆∞·ª£t qu√° ng∆∞·ª°ng
        time.sleep(SAFE_DELAY) 

    # L∆∞u file
    final_output = {"details": results}
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(final_output, f, ensure_ascii=False, indent=2)
    print(f"\n‚úÖ Ho√†n t·∫•t. Check file: {OUTPUT_FILE}")

if __name__ == "__main__":
    run_tests_safe_mode()

üöÄ B·∫Øt ƒë·∫ßu ch·∫ø ƒë·ªô AN TO√ÄN (Gemini Flash Friendly)
‚è≥ C·∫•u h√¨nh: Timeout 120s | Safe Delay 6s
ID   | Category        | Latency  | Status
-----------------------------------------------------------------
1    | IN_-Culture     | 7.1   s  | DONE (200)
2    | IN_-Transport   | 0.97  s  | DONE (200)
3    | IN_-Culture     | 5.15  s  | DONE (200)
4    | TOX-VerbalAbu.. | 6.96  s  | DONE (200)
5    | IN_-Tourism     | 4.73  s  | DONE (200)
6    | OUT-VisualQA_.. | 5.73  s  | DONE (200)
7    | IN_-Transport   | 1.86  s  | HTTP_FAIL_500
8    | TOX-VerbalAbu.. | 4.44  s  | DONE (200)
9    | IN_-Transport   | 1.44  s  | HTTP_FAIL_500
10   | OUT-Coding      | 15.39 s  | DONE (200)
11   | IN_-Tourism     | 4.81  s  | DONE (200)
12   | OUT-Medical     | 5.65  s  | DONE (200)
13   | IN_-Culture     | 5.19  s  | DONE (200)
14   | IN_-Transport   | 0.84  s  | DONE (200)
15   | IN_-Culture     | 3.1   s  | DONE (200)
16   | IN_-Transport   | 1.89  s  | HTTP_FAIL_500
17   | TEC-SQLInject.

### C√°c ch·ªâ s·ªë ƒë√°nh gi√°
1. latency_seconds (ƒê·ªô tr·ªÖ):

    < 2s: R·∫•t t·ªët (Th∆∞·ªùng l√† cache ho·∫∑c Bus query ƒë∆°n gi·∫£n).

    2s - 5s: Ch·∫•p nh·∫≠n ƒë∆∞·ª£c (Th∆∞·ªùng l√† c√°c c√¢u h·ªèi Bus ph·ª©c t·∫°p ho·∫∑c Tourism RAG).

    > 5s: C·∫ßn xem l·∫°i (Th∆∞·ªùng l√† do Gemini ph·∫£n h·ªìi ch·∫≠m ho·∫∑c m·∫°ng lag).

2. auto_eval (ƒê√°nh gi√° t·ª± ƒë·ªông):

    PASS: H·ªá th·ªëng ho·∫°t ƒë·ªông ƒë√∫ng mong ƒë·ª£i.

    FAIL (Caught by Bus Logic): ƒê√¢y l√† l·ªói quan tr·ªçng nh·∫•t c·∫ßn soi. T·ª©c l√† c√¢u h·ªèi "L·∫≠p k·∫ø ho·∫°ch ƒëi ch∆°i t·ª´ A ƒë·∫øn B" b·ªã h·ªá th·ªëng hi·ªÉu nh·∫ßm l√† t√¨m xe bu√Ωt. N·∫øu th·∫•y l·ªói n√†y -> C·∫ßn b·ªï sung t·ª´ kh√≥a v√†o "Blacklist" trong unified_server.py.

3. has_options:

    V·ªõi c√°c c√¢u h·ªèi test v·ªÅ ƒë·ªãa ƒëi·ªÉm tr√πng t√™n (ƒêH Kinh t·∫ø, T√¥n ƒê·ª©c Th·∫Øng...), tr∆∞·ªùng n√†y b·∫Øt bu·ªôc ph·∫£i l√† true. N·∫øu l√† false, h·ªá th·ªëng Disambiguation (Gom nh√≥m) ƒëang b·ªã l·ªói.

In [None]:
import requests
import json
import time
from datetime import datetime
from requests.exceptions import ConnectionError, Timeout, RequestException

# --- C·∫§U H√åNH ---
API_URL = "http://127.0.0.1:8000/chat"
INPUT_FILE = "full_rag_test_suite_V2.json"
OUTPUT_FILE = f"test_report_V2.json"

# --- C·∫§U H√åNH T·ªêC ƒê·ªò & KI√äN TR√å ---
REQUEST_TIMEOUT = 120   # Ch·ªù server x·ª≠ l√Ω t·ªëi ƒëa 2 ph√∫t
MAX_RETRIES = 5         # S·ªë l·∫ßn th·ª≠ l·∫°i n·∫øu l·ªói k·∫øt n·ªëi
SAFE_DELAY = 6          # [QUAN TR·ªåNG] Ngh·ªâ c·ª©ng 6s sau m·ªói c√¢u th√†nh c√¥ng 
                        # (ƒê·∫£m b·∫£o t·ªëi ƒëa 10 request/ph√∫t -> R·∫•t an to√†n cho Gemini Flash)

def process_single_question(item):
    """
    G·ª≠i c√¢u h·ªèi v√† x·ª≠ l√Ω retry th√¥ng minh:
    - N·∫øu l·ªói k·∫øt n·ªëi: Retry sau 10s.
    - N·∫øu l·ªói qu√° t·∫£i (429): Retry sau 30-60s.
    - Ch·ªâ d·ª´ng khi th√†nh c√¥ng ho·∫∑c h·∫øt s·ªë l·∫ßn th·ª≠.
    """
    question = item["input_text"]
    image_input = item.get("input_image")
    payload = {"question": question}
    if image_input:
        payload["image_context"] = image_input

    retries = 0
    
    while retries < MAX_RETRIES:
        try:
            start_time = time.time()
            # G·ª≠i request (Blocking)
            response = requests.post(API_URL, json=payload, timeout=REQUEST_TIMEOUT)
            latency = round(time.time() - start_time, 2)

            # --- PH√ÇN T√çCH STATUS CODE ---
            
            # 1. Th√†nh c√¥ng (200) -> Tr·∫£ v·ªÅ ngay
            if response.status_code == 200:
                return response, latency, None
            
            # 2. B·ªã qu√° t·∫£i (429 Too Many Requests) ho·∫∑c Server qu√° b·∫≠n (503)
            # ƒê√¢y l√† l√∫c Gemini k√™u c·ª©u, c·∫ßn ngh·ªâ l√¢u
            elif response.status_code in [429, 503]:
                wait_time = 30 + (retries * 10) # TƒÉng d·∫ßn: 30s, 40s, 50s...
                print(f"      ‚ö†Ô∏è API Qu√° t·∫£i (HTTP {response.status_code}). ƒêang l√†m m√°t {wait_time}s...")
                time.sleep(wait_time)
                retries += 1
                continue # Th·ª≠ l·∫°i c√¢u n√†y

            # 3. C√°c l·ªói logic kh√°c (400, 500 internal code error)
            # V·∫´n tr·∫£ v·ªÅ ƒë·ªÉ ghi log, kh√¥ng retry (v√¨ retry kh·∫£ nƒÉng cao v·∫´n l·ªói code)
            else:
                return response, latency, f"HTTP_FAIL_{response.status_code}"

        except (ConnectionError, ConnectionRefusedError):
            print(f"      ‚ö†Ô∏è M·∫•t k·∫øt n·ªëi t·ªõi Server. ƒê·ª£i 10s k·∫øt n·ªëi l·∫°i...")
            time.sleep(10)
            retries += 1
            
        except Timeout:
            latency = round(time.time() - start_time, 2)
            print(f"      ‚ö†Ô∏è Timeout (Server treo > {REQUEST_TIMEOUT}s). B·ªè qua.")
            return None, latency, "TIMEOUT_120S"

        except Exception as e:
            print(f"      ‚ö†Ô∏è L·ªói l·∫°: {str(e)}")
            return None, 0, str(e)

    return None, 0, "SERVER_DOWN_OR_MAX_RETRIES"

def run_tests_safe_mode():
    print(f"üöÄ B·∫Øt ƒë·∫ßu ch·∫ø ƒë·ªô AN TO√ÄN (Gemini Flash Friendly)")
    print(f"‚è≥ C·∫•u h√¨nh: Timeout {REQUEST_TIMEOUT}s | Safe Delay {SAFE_DELAY}s")

    try:
        with open(INPUT_FILE, "r", encoding="utf-8") as f:
            dataset = json.load(f)
    except FileNotFoundError:
        print("‚ùå Kh√¥ng t√¨m th·∫•y file input.")
        return

    results = []
    
    print(f"{'ID':<4} | {'Category':<15} | {'Latency':<8} | {'Status'}")
    print("-" * 65)

    for idx, item in enumerate(dataset):
        q_id = idx + 1
        category_label = f"{item['category'][:3]}-{item['sub_category']}" 
        
        # --- B∆Ø·ªöC 1: X·ª¨ L√ù ---
        response, latency, error_msg = process_single_question(item)
        
        # --- B∆Ø·ªöC 2: GHI NH·∫¨N K·∫æT QU·∫¢ ---
        final_status = "UNKNOWN"
        actual_res = ""
        
        if error_msg:
            if "HTTP_FAIL" in error_msg and response:
                final_status = f"FAIL ({response.status_code})"
                actual_res = response.text
            else:
                final_status = error_msg
                actual_res = f"Error: {error_msg}"
        elif response:
            final_status = "DONE (200)"
            try:
                data = response.json()
                actual_res = data.get("answer", str(data))
            except:
                actual_res = response.text

        # In k·∫øt qu·∫£
        cat_print = (category_label[:13] + '..') if len(category_label) > 13 else category_label
        print(f"{q_id:<4} | {cat_print:<15} | {latency:<6}s  | {final_status}")

        results.append({
            "id": item["id"],
            "input": item["input_text"],
            "latency": latency,
            "status": final_status,
            "response": actual_res
        })

        # --- B∆Ø·ªöC 3: NGH·ªà GI·ªÆA HI·ªÜP (QUAN TR·ªåNG) ---
        # B·∫•t k·ªÉ c√¢u tr·∫£ l·ªùi nhanh hay ch·∫≠m, lu√¥n ngh·ªâ c·ª©ng SAFE_DELAY gi√¢y
        # ƒê·ªÉ ƒë·∫£m b·∫£o RPM (Request Per Minute) kh√¥ng v∆∞·ª£t qu√° ng∆∞·ª°ng
        time.sleep(SAFE_DELAY) 

    # L∆∞u file
    final_output = {"details": results}
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(final_output, f, ensure_ascii=False, indent=2)
    print(f"\n‚úÖ Ho√†n t·∫•t. Check file: {OUTPUT_FILE}")

if __name__ == "__main__":
    run_tests_safe_mode()

üöÄ B·∫Øt ƒë·∫ßu ch·∫ø ƒë·ªô AN TO√ÄN (Gemini Flash Friendly)
‚è≥ C·∫•u h√¨nh: Timeout 120s | Safe Delay 6s
ID   | Category        | Latency  | Status
-----------------------------------------------------------------
1    | IN_-Transport.. | 1.02  s  | DONE (200)
2    | OUT-General_O.. | 8.85  s  | DONE (200)
3    | TEC-Security    | 5.32  s  | DONE (200)
4    | TEC-Security    | 6.04  s  | DONE (200)
5    | TEC-Security    | 8.12  s  | DONE (200)
6    | OUT-General_O.. | 7.44  s  | DONE (200)
7    | TEC-Security    | 7.34  s  | DONE (200)
8    | OUT-Transport.. | 7.65  s  | DONE (200)
9    | OUT-General_O.. | 6.78  s  | DONE (200)
10   | OUT-General_O.. | 7.91  s  | DONE (200)
11   | OUT-General_O.. | 13.25 s  | DONE (200)
12   | IN_-Culture_T.. | 4.87  s  | DONE (200)
13   | OUT-General_O.. | 9.99  s  | DONE (200)
14   | IN_-Culture_T.. | 3.18  s  | DONE (200)
15   | IN_-Transport.. | 0.61  s  | DONE (200)
16   | IN_-Transport.. | 0.22  s  | DONE (200)
17   | IN_-Transport.. | 0.5  