In [None]:

# # Support Reply Copilot (Compare + Judge + DB Mode + Audio)
# 
# This notebook builds a practical prototype:
# - Model 1 (Cloud) vs Model 2 (Local/Ollama), with streaming output
# - A Judge model scores both answers and picks a winner
# - A SQLite DB provides approved support macros (to reduce invented policy-like text)
# - A DB mode selector controls when the DB is used:
#     - Always: the app queries the DB before calling any LLM
#     - Auto: the REMOTE model decides whether to query the DB via tool-calling; if it queries and gets no hits,
#             the app still injects a "no macros found" guardrail to prevent hallucinations
#     - Off: the DB is never queried
# - Optional TTS reads the winning answer
# - A Gradio UI ties it all together


In [None]:
# =========================
# 1) Imports
# =========================

import os  # For environment variables and paths
import json  # For parsing / validating judge output
import sqlite3  # For SQLite DB access
from typing import Dict, Any, List  # For type hints

import gradio as gr  # For UI
from dotenv import load_dotenv  # For loading .env
from openai import OpenAI  # OpenAI / Ollama-compatible client

In [None]:
#=========================
# 2) Environment + Clients + Model defaults
# =========================

# Load environment variables from .env (e.g., OPENAI_API_KEY)
load_dotenv(override=True)

# Create the cloud client (uses OPENAI_API_KEY from env)
client_cloud = OpenAI()

# Create the local client (Ollama OpenAI-compatible endpoint)
# - Ollama must be running: `ollama serve`
client_local = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

# Model defaults (as requested)
MODEL_1_DEFAULT = "gpt-4.1-nano"   # Cloud / Model 1
MODEL_2_DEFAULT = "llama3.1:8b"    # Local / Model 2 (Ollama)
JUDGE_DEFAULT   = "gpt-4.1-mini"   # Judge (cloud)

# Fixed decoding settings (no UI control)
TEMPERATURE_FIXED = 0.2  # Keep small randomness but stable

# Check whether Ollama is reachable
try:
    _ = client_local.models.list()  # Simple ping to local server
    ollama_ok = True
except Exception:
    ollama_ok = False


In [None]:
# =========================
# 3) SQLite DB: Support macros (init + seed)
# =========================

# SQLite DB file name (created locally next to the notebook)
DB_PATH = "support_macros.db"

def init_macros_db(db_path: str) -> None:
    """
    Create the SQLite database schema and seed it with a small set of approved support macros.
    This DB is meant to be a controlled source of templates (macros) the assistant can reuse.
    """
    # Connect to the SQLite DB (creates the file if it doesn't exist)
    with sqlite3.connect(db_path) as conn:
        cur = conn.cursor()  # Cursor executes SQL statements

        # Create the macros table if it does not already exist
        cur.execute("""
        CREATE TABLE IF NOT EXISTS macros (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            intent TEXT NOT NULL,
            title TEXT NOT NULL,
            content TEXT NOT NULL,
            tags TEXT NOT NULL
        );
        """)

        # Check how many rows exist (to avoid inserting duplicates on re-run)
        cur.execute("SELECT COUNT(*) FROM macros;")
        count = cur.fetchone()[0]

        # Seed only if the table is empty
        if count == 0:
            seed_rows = [
                # -------------------------
                # Billing / refunds
                # -------------------------
                (
                    "refund",
                    "Double charge / duplicate payment",
                    "Thanks for reporting this. I can see how frustrating that is. "
                    "Please share the invoice IDs (or the last 4 digits of the card + the charge dates), and we’ll verify the duplicate charge and process a refund if confirmed. "
                    "Once validated, refunds typically appear within 5–10 business days depending on your bank.",
                    "billing,refund,double charge,invoice,card"
                ),
                (
                    "billing_issue",
                    "Invoice / billing discrepancy",
                    "Thanks for reaching out. Please share your account email and the invoice number(s) affected, and tell us what looks incorrect (amount, plan, dates, taxes). "
                    "We’ll review and get back with a correction or explanation.",
                    "billing,invoice,pricing,taxes"
                ),

                # -------------------------
                # Login / authentication
                # -------------------------
                (
                    "login_help",
                    "Login issue after password reset",
                    "Sorry you’re having trouble logging in. Please confirm the email on the account and whether you see an error message. "
                    "If you recently reset your password, try clearing cache/cookies or using an incognito window; also confirm your device time is correct. "
                    "If it still fails, we can help verify the account and restore access.",
                    "login,password reset,auth,cache,cookies"
                ),
                (
                    "2fa_issue",
                    "2FA codes not arriving",
                    "Thanks for the details. If 2FA codes aren’t arriving, please check your spam folder and confirm the mailbox isn’t blocking automated emails. "
                    "If you use an authenticator app, confirm the app is synced to the correct time. "
                    "If you’re locked out, we can initiate a secure recovery—please share your account email and any recent successful login date you remember.",
                    "2FA,authentication,codes,email,authenticator"
                ),

                # -------------------------
                # Incident / outage
                # -------------------------
                (
                    "technical_outage",
                    "Service outage acknowledgement",
                    "Thanks for flagging this. We’re currently investigating the disruption and we’ll share updates as soon as we have confirmed details. "
                    "If you can, please send the approximate time it started, your region, and any error message or screenshot—this helps us correlate logs faster.",
                    "outage,incident,errors,region,screenshot"
                ),
                (
                    "eta_request",
                    "ETA request during incident",
                    "I understand you need an ETA. We’re actively working on the issue and will provide the next update by <TIME WINDOW>. "
                    "If you share your region and any error code you’re seeing, I can also confirm whether it matches the incident scope.",
                    "ETA,incident,update,scope"
                ),

                # -------------------------
                # Shipping / delivery (generic e-commerce)
                # -------------------------
                (
                    "shipping_delay",
                    "Shipping delay / late delivery",
                    "Sorry about the delay. Please share your order number and the delivery address postcode/ZIP, and I’ll check the latest carrier scan and expected delivery date. "
                    "If the package is stalled, we can start a carrier investigation.",
                    "shipping,delay,delivery,carrier,order"
                ),

                # -------------------------
                # Escalation / handoff
                # -------------------------
                (
                    "escalation",
                    "Escalate to specialist",
                    "Thanks—this looks like it needs a specialist. I’m escalating it now. "
                    "To speed things up, please include: your account email, exact steps to reproduce, time of occurrence, and any screenshots/logs. "
                    "We’ll follow up as soon as we have an update.",
                    "escalation,specialist,logs,screenshots"
                ),

                # -------------------------
                # Polite closing
                # -------------------------
                (
                    "closing",
                    "Polite closing",
                    "If you reply with the requested details, we’ll take it from there. Thanks for your patience.",
                    "closing,thanks,patience"
                ),
            ]

            # Insert all seed rows in one call
            cur.executemany(
                "INSERT INTO macros (intent, title, content, tags) VALUES (?, ?, ?, ?);",
                seed_rows
            )

        # Commit changes to persist DB to disk
        conn.commit()

# Initialize (create/seed) DB
init_macros_db(DB_PATH)



In [None]:
# =========================
# 4) DB Search: search_macros(query, top_k)
# =========================

def search_macros(query: str, top_k: int = 3) -> Dict[str, Any]:
    """
    Keyword-based search: extracts simple tokens from the query and matches ANY token
    across intent/title/content/tags using LIKE.
    """
    q = (query or "").strip().lower()
    if not q:
        return {"query": query, "hits": []}

    # 1) Very simple tokenization: keep alphanumerics, split on whitespace
    tokens = []
    current = []
    for ch in q:
        if ch.isalnum():
            current.append(ch)
        else:
            if current:
                tokens.append("".join(current))
                current = []
    if current:
        tokens.append("".join(current))

    # 2) Remove very short tokens (noise) and cap how many we use
    tokens = [t for t in tokens if len(t) >= 3]
    tokens = tokens[:10]  # limit to keep query small and fast

    if not tokens:
        return {"query": query, "hits": []}

    # 3) Build OR conditions: match any token in any field
    #    (intent/title/content/tags)
    where_clauses = []
    params = []
    for t in tokens:
        like = f"%{t}%"
        where_clauses.append("(lower(intent) LIKE ? OR lower(title) LIKE ? OR lower(content) LIKE ? OR lower(tags) LIKE ?)")
        params.extend([like, like, like, like])

    where_sql = " OR ".join(where_clauses)

    sql = f"""
        SELECT id, intent, title, content
        FROM macros
        WHERE {where_sql}
        LIMIT ?;
    """

    with sqlite3.connect(DB_PATH) as conn:
        cur = conn.cursor()
        cur.execute(sql, (*params, top_k))
        rows = cur.fetchall()

    hits: List[Dict[str, Any]] = []
    for mid, intent, title, content in rows:
        excerpt = content.strip()
        if len(excerpt) > 280:
            excerpt = excerpt[:280] + "..."
        hits.append({
            "id": mid,
            "intent": intent,
            "title": title,
            "excerpt": excerpt,
            "content": content
        })

    return {"query": query, "hits": hits}


In [None]:
# =========================
# 5) DB Trace formatting (for UI)
# =========================

def format_db_trace(db_result: Dict[str, Any]) -> str:
    """Format DB hits into a small Markdown block for transparency."""
    # Handle missing result
    if not db_result:
        return "No DB lookup."

    # Extract hits
    hits = db_result.get("hits", [])

    # If no hits, show that explicitly
    if not hits:
        return (
            "### DB macros\n"
            f"- Query: `{db_result.get('query','')}`\n"
            "- Result: **No hits**"
        )

    # Build a compact list of hits
    lines = [
        "### DB macros",
        f"- Query: `{db_result.get('query','')}`",
        f"- Hits: **{len(hits)}**"
    ]
    for h in hits:
        lines.append(f"  - (id={h['id']}) **{h['intent']}** — {h['title']}")
    return "\n".join(lines)


In [None]:
# =========================
# 6) System prompt (Support Agent)
# =========================

DEFAULT_SYSTEM_PROMPT = """
You are a professional customer support agent.
Your priority is factual accuracy and clarity.
You must not invent policies, SLAs, refunds, ETAs, pricing, or account details.
If information is missing, ask for the minimum necessary details.
Write a single email-style reply, concise and courteous, with clear next steps.
If an internal "APPROVED MACROS" reference is provided, reuse it and stay consistent with it.
Respond in English.
""".strip()

In [None]:
# =========================
# 7) Helper: Build "APPROVED MACROS" block for LLM context
# =========================

def build_approved_macros_block(db_result: Dict[str, Any]) -> str:
    """
    Convert DB hits into a high-priority reference block for the LLM.
    This block is injected as a SYSTEM message so it is treated as authoritative guidance.
    """
    # Pull hits
    hits = db_result.get("hits", []) if db_result else []

    # If no hits, still provide guidance (so model doesn't hallucinate)
    if not hits:
        return (
            "APPROVED MACROS:\n"
            "No matching macros found.\n"
            "Instruction: Ask for missing details and respond professionally without inventing policies."
        )

    # Build the block with multiple macros
    lines = ["APPROVED MACROS (use and adapt as appropriate):"]
    for h in hits:
        lines.append(
            f"\n[Macro id={h['id']} | intent={h['intent']} | title={h['title']}]\n"
            f"{h['content']}"
        )
    lines.append("\nInstruction: Prefer using these macros; do not invent policy details not present above.")
    return "\n".join(lines)

In [None]:
# =========================
# 8) LLM Streaming helper (no tool-calling; DB is injected deterministically)
# =========================

def stream_answer(client: OpenAI, model: str, messages: List[Dict[str, str]]):
    """
    Stream assistant output and yield incremental text for UI updates.
    """
    # Create streaming chat completion
    stream = client.chat.completions.create(
        model=model,                 # Model name
        messages=messages,           # Chat history
        stream=True,                 # Enable streaming
        temperature=TEMPERATURE_FIXED
    )

    # Accumulate text as tokens arrive
    text = ""

    # Iterate over streaming chunks
    for chunk in stream:
        delta = chunk.choices[0].delta  # Incremental delta
        if delta and delta.content:     # If text content exists
            text += delta.content       # Append to full text
            yield text                  # Yield partial output for UI

    # Yield final text once more (convenient for callers)
    yield text


In [None]:
# =========================
# 9) Judge function (strict JSON verdict)
# =========================

def judge_two_answers(
    client: OpenAI,
    judge_model: str,
    customer_message: str,
    answer_a: str,
    answer_b: str,
    model_a_name: str,
    model_b_name: str
) -> Dict[str, Any]:
    """
    Judge compares two answers and returns a strict JSON verdict.

    The judge must output:
      { model_A, model_B, score_A, score_B, winner, reason }
    """
    # Define judge system prompt
    judge_system_prompt = (
        "You are an impartial judge evaluating two customer-support answers.\n"
        "Score each answer from 0 to 10 based on:\n"
        "1) Factual correctness (no invented policies, SLAs, ETAs)\n"
        "2) Professional tone\n"
        "3) Clarity and actionable next steps\n"
        "4) Completeness given the customer message\n"
        "Return ONLY valid JSON."
    )

    # Define judge user prompt (includes both answers)
    judge_user_prompt = f"""
Customer message:
{customer_message}

Answer A (model: {model_a_name}):
{answer_a}

Answer B (model: {model_b_name}):
{answer_b}

Respond with JSON EXACTLY in this schema:
{{
  "model_A": "{model_a_name}",
  "model_B": "{model_b_name}",
  "score_A": <number 0-10>,
  "score_B": <number 0-10>,
  "winner": "A" or "B" or "tie",
  "reason": "brief concrete explanation citing criteria"
}}
""".strip()

    # Call judge model using JSON response format
    resp = client.chat.completions.create(
        model=judge_model,
        messages=[
            {"role": "system", "content": judge_system_prompt},
            {"role": "user", "content": judge_user_prompt}
        ],
        response_format={"type": "json_object"}  # Request JSON object
    )

    # Extract the JSON text
    verdict_text = resp.choices[0].message.content

    # Parse JSON
    verdict = json.loads(verdict_text)

    # Minimal validation
    required = ["model_A", "model_B", "score_A", "score_B", "winner", "reason"]
    for k in required:
        if k not in verdict:
            raise ValueError(f"Judge verdict missing field: {k}")

    if verdict["winner"] not in ["A", "B", "tie"]:
        raise ValueError("Judge winner must be 'A', 'B', or 'tie'")

    return verdict


# Tools + Auto-mode helper (remote decides) + Tool-call handler

def build_search_macros_tool_schema() -> Dict[str, Any]:
    """OpenAI tool schema for letting the REMOTE model request a DB macro search."""
    return {
        "type": "function",
        "function": {
            "name": "search_macros",
            "description": "Search approved customer support macros from the internal SQLite database.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The customer message or a short query to find relevant macros."
                    },
                    "top_k": {
                        "type": "integer",
                        "description": "How many macro hits to return.",
                        "default": 3
                    }
                },
                "required": ["query"],
                "additionalProperties": False
            }
        }
    }

SEARCH_MACROS_TOOL = build_search_macros_tool_schema()
TOOLS = [SEARCH_MACROS_TOOL]


def handle_tool_calls_for_macros(message) -> Dict[str, Any]:
    """
    Handle tool calls coming from the remote model.
    Returns:
      {
        "tool_messages": [ {role:'tool', content:'...', tool_call_id:'...'}, ... ],
        "db_result": {query, hits}
      }
    """
    tool_messages = []
    last_db_result: Dict[str, Any] = {"query": "", "hits": []}

    for tool_call in (message.tool_calls or []):
        if tool_call.function.name == "search_macros":
            # Parse tool arguments safely
            args = json.loads(tool_call.function.arguments or "{}")
            q = args.get("query", "")
            top_k = int(args.get("top_k", 3))

            # Query SQLite DB
            last_db_result = search_macros(q, top_k=top_k)

            # IMPORTANT: Return JSON string as tool content (easy for LLM to parse)
            tool_messages.append({
                "role": "tool",
                "content": json.dumps(last_db_result, ensure_ascii=False),
                "tool_call_id": tool_call.id
            })

    return {"tool_messages": tool_messages, "db_result": last_db_result}


def auto_decide_db_by_remote(
    customer_message: str,
    system_prompt: str,
    cloud_model: str,
    top_k: int = 3,
    max_tool_rounds: int = 3
) -> Dict[str, Any]:
    """
    Auto-mode: only the REMOTE model can decide whether to query the DB.
    If it calls the tool, we execute it and feed results back until the model stops calling tools.

    Returns:
      {
        "used_db": bool,
        "db_result": {query, hits},
        "approved_macros_block": str
      }
    """
    used_db = False
    db_result: Dict[str, Any] = {"query": "", "hits": []}

    # Decision messages: prompt + user message
    # (We do not inject macros here; we want the remote model to decide to call the tool.)
    messages = [
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": customer_message.strip()}
    ]

    # Ask remote model with tools enabled (so it can call search_macros)
    resp = client_cloud.chat.completions.create(
        model=cloud_model,
        messages=messages,
        tools=TOOLS,
        temperature=TEMPERATURE_FIXED
    )

    rounds = 0
    while resp.choices[0].finish_reason == "tool_calls" and rounds < max_tool_rounds:
        rounds += 1
        used_db = True

        assistant_msg = resp.choices[0].message
        messages.append(assistant_msg)

        handled = handle_tool_calls_for_macros(assistant_msg)
        tool_messages = handled["tool_messages"]
        db_result = handled["db_result"]

        # Append tool messages to the conversation
        messages.extend(tool_messages)

        # Call remote again (it may call tools again or finalize)
        resp = client_cloud.chat.completions.create(
            model=cloud_model,
            messages=messages,
            tools=TOOLS,
            temperature=TEMPERATURE_FIXED
        )

    # Recommended behavior:
    # - If DB was used but no hits, still inject a "No matching macros found" block.
    # - If DB not used, return empty approved block.
    if used_db:
        # If model asked the tool but db_result is empty, we still provide explicit guidance
        # to avoid hallucinating policies.
        if not db_result.get("hits"):
            approved_block = (
                "APPROVED MACROS:\n"
                f"Query: {db_result.get('query','')}\n"
                "No matching macros found.\n"
                "Instruction: Ask for the minimum necessary details and do not invent policies, SLAs, ETAs, or refunds."
            )
        else:
            approved_block = build_approved_macros_block(db_result)
    else:
        approved_block = ""

    return {"used_db": used_db, "db_result": db_result, "approved_macros_block": approved_block}

In [None]:
# =========================
# 10) Optional TTS helper (winner audio)
# =========================

def tts_to_file(client: OpenAI, text: str, filename: str = "winner_tts.mp3") -> str:
    """
    Generate TTS audio for the provided text and write it to an MP3 file.
    Returns the file path.
    """
    # Create speech audio bytes from the TTS endpoint
    speech = client.audio.speech.create(
        model="gpt-4o-mini-tts",  # TTS model
        voice="onyx",             # Voice name
        input=text                # Text to synthesize
    )

    # Save the audio bytes to disk
    with open(filename, "wb") as f:
        f.write(speech.content)

    # Return filename so Gradio can load it
    return filename




In [None]:

# =========================
# BLOCK 11 
# compare_mode_run now supports DB mode: Always / Auto / Off
# Auto decision is made ONLY by the REMOTE model (recommended)
# =========================

def compare_mode_run(
    customer_message: str,
    system_prompt: str,
    cloud_model: str,
    local_model: str,
    judge_model: str,
    db_mode: str,        # "Always" | "Auto" | "Off"
    enable_tts: bool
):
    """
    Compare runner with DB mode selector:
    - Off: never consult DB
    - Always: consult DB deterministically before LLMs
    - Auto: ONLY the remote model decides whether to consult DB (tool-calling)
            If remote uses DB and hits are empty, we still inject a "No macros" block (recommended)
    """

    # Local backend check
    if not ollama_ok:
        m1_panel = f"## Model 1 (REMOTE / Cloud) — `{cloud_model}`\n\n⚠️ Local backend unavailable (Ollama not reachable)."
        m2_panel = f"## Model 2 (LOCAL / Ollama) — `{local_model}`\n\n⚠️ Start Ollama with: `ollama serve`."
        j_panel  = f"## Judge (REMOTE) — `{judge_model}`\n\n⚠️ Cannot judge without Model 2."
        yield m1_panel, m2_panel, j_panel, None, "No DB lookup."
        return

    # -------------------------
    # 1) Determine DB behavior
    # -------------------------
    db_trace_md = "No DB lookup."
    approved_macros_block = ""

    mode = (db_mode or "Always").strip()

    if mode == "Off":
        # No DB lookup, no macros
        db_trace_md = "DB mode: **Off** (no lookup)."
        approved_macros_block = ""

    elif mode == "Always":
        # Deterministic DB lookup BEFORE LLMs
        db_result = search_macros(customer_message, top_k=3)
        db_trace_md = "DB mode: **Always**\n\n" + format_db_trace(db_result)

        # Always inject macros block; even if no hits, build_approved_macros_block provides guidance
        approved_macros_block = build_approved_macros_block(db_result)

    elif mode == "Auto":
        # Auto: REMOTE model decides whether to consult DB via tool-calling
        auto = auto_decide_db_by_remote(
            customer_message=customer_message,
            system_prompt=system_prompt,
            cloud_model=cloud_model,
            top_k=3
        )

        used_db = auto["used_db"]
        db_result = auto["db_result"]
        approved_macros_block = auto["approved_macros_block"]

        if used_db:
            db_trace_md = (
                "DB mode: **Auto**\n"
                "- Remote requested DB: **YES**\n\n"
                + format_db_trace(db_result)
            )
        else:
            db_trace_md = (
                "DB mode: **Auto**\n"
                "- Remote requested DB: **NO**"
            )


    else:
        # Fallback: treat unknown as Always
        db_result = search_macros(customer_message, top_k=3)
        db_trace_md = "DB mode: **Always** (fallback)\n\n" + format_db_trace(db_result)
        approved_macros_block = build_approved_macros_block(db_result)

    # -------------------------
    # 2) Build shared messages
    # -------------------------
    messages: List[Dict[str, str]] = [
        {"role": "system", "content": system_prompt.strip()},
    ]

    # Inject macros block only if present
    if approved_macros_block:
        messages.append({"role": "system", "content": approved_macros_block})

    messages.append({"role": "user", "content": customer_message.strip()})

    # -------------------------
    # 3) Stream Model 1 (Cloud)
    # -------------------------
    cloud_text = ""
    for partial in stream_answer(client_cloud, cloud_model, messages):
        cloud_text = partial
        m1_panel = f"## Model 1 (REMOTE / Cloud) — `{cloud_model}`\n\n{cloud_text}"
        m2_panel = f"## Model 2 (LOCAL / Ollama) — `{local_model}`\n\n*(waiting...)*"
        j_panel  = f"## Judge (REMOTE) — `{judge_model}`\n\n*(waiting...)*"
        yield m1_panel, m2_panel, j_panel, None, db_trace_md

    # -------------------------
    # 4) Stream Model 2 (Local)
    # -------------------------
    local_text = ""
    for partial in stream_answer(client_local, local_model, messages):
        local_text = partial
        m1_panel = f"## Model 1 (REMOTE / Cloud) — `{cloud_model}`\n\n{cloud_text}"
        m2_panel = f"## Model 2 (LOCAL / Ollama) — `{local_model}`\n\n{local_text}"
        j_panel  = f"## Judge (REMOTE) — `{judge_model}`\n\n*(waiting...)*"
        yield m1_panel, m2_panel, j_panel, None, db_trace_md

    # -------------------------
    # 5) Judge
    # -------------------------
    verdict = judge_two_answers(
        client=client_cloud,
        judge_model=judge_model,
        customer_message=customer_message.strip(),
        answer_a=cloud_text,
        answer_b=local_text,
        model_a_name=cloud_model,
        model_b_name=local_model
    )

    j_panel = (
        f"## Judge (REMOTE) — `{judge_model}`\n\n"
        f"- Model A (Model 1): **{verdict['model_A']}** — score **{verdict['score_A']}/10**\n"
        f"- Model B (Model 2): **{verdict['model_B']}** — score **{verdict['score_B']}/10**\n"
        f"- Winner: **{verdict['winner']}**\n\n"
        f"**Reason:** {verdict['reason']}"
    )

    # -------------------------
    # 6) Optional TTS (winner)
    # -------------------------
    winner_text = cloud_text if verdict["winner"] in ["A", "tie"] else local_text
    audio_path = tts_to_file(client_cloud, winner_text) if enable_tts else None

    m1_panel = f"## Model 1 (REMOTE / Cloud) — `{cloud_model}`\n\n{cloud_text}"
    m2_panel = f"## Model 2 (LOCAL / Ollama) — `{local_model}`\n\n{local_text}"

    yield m1_panel, m2_panel, j_panel, audio_path, db_trace_md



In [None]:

# =========================
# BLOCK 12 (REPLACE/UPDATE)
# Gradio UI: add DB mode selector Always / Auto / Off
# =========================

with gr.Blocks(title="Support Reply Copilot (Compare + Judge + DB + Audio)") as demo:
    gr.Markdown("# Support Reply Copilot")
    gr.Markdown("Compare two models, judge their replies, optionally use DB macros, and optionally generate TTS for the winner.")

    with gr.Row():
        with gr.Column(scale=1):
            customer_in = gr.Textbox(
                label="Customer message",
                lines=6,
                placeholder=""
            )

            gr.Examples(
                examples=[
                    "Hi, I was charged twice for my subscription this month. Can you fix this and confirm when I’ll get the refund?",
                    "I can’t log in after resetting my password—2FA codes never arrive. Please help ASAP.",
                    "Your service has been down for 30 minutes. What’s the ETA and will we receive a credit?"
                ],
                inputs=[customer_in],
                label="Examples (click to fill)"
            )

            model1 = gr.Dropdown(
                choices=["gpt-4.1-nano", "gpt-4.1-mini"],
                value=MODEL_1_DEFAULT,
                label="Model 1 (REMOTE / Cloud)"
            )

            model2 = gr.Dropdown(
                choices=["llama3.1:8b"],
                value=MODEL_2_DEFAULT,
                label="Model 2 (LOCAL / Ollama)"
            )

            judge_model = gr.Dropdown(
                choices=["gpt-4.1-mini", "gpt-4.1-nano"],
                value=JUDGE_DEFAULT,
                label="Judge (REMOTE)"
            )

            gr.Markdown("### Settings")

            system_prompt_in = gr.Textbox(
                label="System prompt",
                value=DEFAULT_SYSTEM_PROMPT,
                lines=8
            )

            db_mode = gr.Radio(
                choices=["Always", "Auto", "Off"],
                value="Always",
                label="Macro DB usage"
            )

            gr.Markdown(
                "**Always**: The app queries the SQLite macro DB *before* calling any model.\n"
                "**Auto**: The *remote (cloud) model* decides whether to query the DB (tool-calling).\n"
                "**Off**: The DB is never queried."
            )


            enable_tts = gr.Checkbox(value=False, label="Enable TTS (winner audio)")

            run_btn = gr.Button("Run")
            clear_btn = gr.Button("Clear")

        with gr.Column(scale=2):
            out_m1 = gr.Markdown()
            out_m2 = gr.Markdown()
            out_judge = gr.Markdown()
            out_audio = gr.Audio(label="Winner audio (TTS)", autoplay=False)
            out_db = gr.Markdown("No DB lookup.")

    def clear_all():
        return "", "", "", None, "No DB lookup."

    clear_btn.click(
        fn=clear_all,
        inputs=[],
        outputs=[out_m1, out_m2, out_judge, out_audio, out_db]
    )

    def run_app(customer_msg, m1, m2, j, sys_prompt, mode, use_tts):
        if not customer_msg or not customer_msg.strip():
            yield (
                "## Model 1\n\n⚠️ Please paste a customer message.",
                "## Model 2\n\n*(waiting...)*",
                "## Judge\n\n*(waiting...)*",
                None,
                "No DB lookup."
            )
            return

        gen = compare_mode_run(
            customer_message=customer_msg,
            system_prompt=sys_prompt,
            cloud_model=m1,
            local_model=m2,
            judge_model=j,
            db_mode=mode,
            enable_tts=use_tts
        )

        for a, b, c, audio_path, db_md in gen:
            yield a, b, c, audio_path, db_md

    run_btn.click(
        fn=run_app,
        inputs=[customer_in, model1, model2, judge_model, system_prompt_in, db_mode, enable_tts],
        outputs=[out_m1, out_m2, out_judge, out_audio, out_db]
    )




In [None]:
demo.launch()