In [1]:
from tools.accommodations.apis import Accommodations
from mcts.travel.semantic.query_parsing import call_local_llm



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import json

base_url = "http://localhost:11434"  
model = "deepseek-r1:14b"

# --------- Example inputs (replace with your real parsed/state/slot if needed) ----------
parsed = {
  "org": "Fort Lauderdale",
  "dest": "Louisiana",
  "days": 7,
  "date": ["2022-03-08","2022-03-09","2022-03-10","2022-03-11","2022-03-12","2022-03-13","2022-03-14"],
  "people_number": 2,
  "budget": 4400,
  "local_constraint": {
    "cuisine": ["Cajun", "Seafood"],
    "avoid": [],
    "room_type": "entire room",
    "children_under_10": True
  }
}

# Keep state minimal but sufficient for retrieval
state = {
  "current_city": "New Orleans",
  "day_idx": 2,
  "used_restaurant_ids": [101, 109, 155],
  "budget": {"total": 4400, "spent": 1200, "remaining": 3200}
}

slot = {
  "type": "meal",
  "meal_type": "dinner",
  "city": "New Orleans",
  "top_k": 5
}

# --------- Build the prompt ----------
prompt = f"""
You are a senior Python engineer. Generate executable Python code ONLY.

Task:
Implement a function:

    def retrieve_candidates(parsed: dict, state: dict, slot: dict, tables: dict) -> list[dict]:

It must retrieve restaurant candidates for a MEAL slot from tables["restaurants"] (a pandas DataFrame).

Constraints & Requirements:
1) Do NOT import any new libraries. Assume pandas is already imported as pd by the caller.
2) Do NOT read/write files, do NOT use network, do NOT use eval/exec.
3) The function must be deterministic and runnable as-is.
4) The function must:
   - Filter rows by City == slot["city"]
   - Filter rows by MealType == slot["meal_type"]
   - Prefer cuisines in parsed["local_constraint"]["cuisine"] if present (soft preference, not hard filter unless it helps ensure enough candidates)
   - Apply dedup: exclude rows whose id is in state["used_restaurant_ids"]
   - Apply a budget-aware soft filter:
       * compute per_meal_cap = max(10, (state["budget"]["remaining"] / remaining_meals_estimate))
       * remaining_meals_estimate = max(1, (parsed["days"] - state["day_idx"]) * 2)   # assume 2 meals/day remaining
       * keep rows with "Average Cost" <= per_meal_cap if that yields at least 10 rows; otherwise relax the cap and keep top by score.
   - Score candidates with a clear scoring function that uses:
       * cuisine match bonus (if cuisine in preferred list)
       * higher Rating better
       * lower Average Cost better
     Then sort by score desc, Rating desc, Average Cost asc.
   - Return a list of dicts with keys:
       ["id","Name","City","Cuisine","Average Cost","MealType","Rating","PriceLevel","score","per_meal_cap","cap_relaxed"]

5) Fallback behavior:
   - If after filtering and dedup you have < 10 candidates, you must relax in this order:
       (a) ignore MealType constraint
       (b) ignore cuisine preference (still score it if present)
       (c) allow higher cost (ignore cap)
   - Always return up to slot["top_k"] candidates if possible.
   - If DataFrame is empty or no candidates exist, return an empty list (no exception).

Given Inputs:
KB schema:
tables["restaurants"] columns:
- id (int), Name (str), City (str), Cuisine (str), Average Cost (float), MealType (str), Rating (float), PriceLevel (int)

parsed = {json.dumps(parsed, ensure_ascii=False)}
state = {json.dumps(state, ensure_ascii=False)}
slot = {json.dumps(slot, ensure_ascii=False)}

Output format:
- Output ONLY valid Python code.
- Do not include markdown fences.
- Code must define retrieve_candidates exactly with that signature.
- You may define small helper functions inside retrieve_candidates.
- Do not output any explanations, comments are allowed but keep them minimal.

Now generate the code.
""".strip()

# --------- Call your local LLM ----------
code = call_local_llm(base_url, model, prompt, timeout=666.0)

print(code)


None


In [None]:
from __future__ import annotations

import json
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple

import pandas as pd
import requests


# -----------------------
# 1) Example inputs
# -----------------------
parsed = {
    "org": "Fort Lauderdale",
    "dest": "Louisiana",
    "days": 7,
    "date": [
        "2022-03-08",
        "2022-03-09",
        "2022-03-10",
        "2022-03-11",
        "2022-03-12",
        "2022-03-13",
        "2022-03-14",
    ],
    "people_number": 2,
    "budget": 4400,
    "local_constraint": {"cuisine": ["Cajun", "Seafood"], "avoid": [], "room_type": "entire room", "children_under_10": True},
}

state = {
    "current_city": "New Orleans",
    "day_idx": 2,
    "used_restaurant_ids": [101, 109, 155],
    "budget": {"total": 4400, "spent": 1200, "remaining": 3200},
}

slot = {"type": "meal", "meal_type": "dinner", "city": "New Orleans", "top_k": 30}


# -----------------------
# 2) Fake Restaurants DB (replace with your real CSV/DB)
# -----------------------
def build_fake_restaurants_df() -> pd.DataFrame:
    # A tiny dataset that is sufficient for testing logic.
    rows = [
        {"id": 101, "Name": "Bayou Bites", "City": "New Orleans", "Meal Type": "dinner", "Cuisine": "Cajun", "Average Cost": 60, "Rating": 4.6},
        {"id": 109, "Name": "Seafood Central", "City": "New Orleans", "Meal Type": "dinner", "Cuisine": "Seafood", "Average Cost": 85, "Rating": 4.5},
        {"id": 155, "Name": "Cajun Corner", "City": "New Orleans", "Meal Type": "dinner", "Cuisine": "Cajun", "Average Cost": 70, "Rating": 4.4},
        {"id": 201, "Name": "Gumbo House", "City": "New Orleans", "Meal Type": "dinner", "Cuisine": "Cajun", "Average Cost": 75, "Rating": 4.7},
        {"id": 202, "Name": "Oyster & Co", "City": "New Orleans", "Meal Type": "dinner", "Cuisine": "Seafood", "Average Cost": 95, "Rating": 4.8},
        {"id": 203, "Name": "Fusion Wharf", "City": "New Orleans", "Meal Type": "dinner", "Cuisine": "Seafood;Cajun", "Average Cost": 110, "Rating": 4.3},
        {"id": 204, "Name": "Budget Po-Boys", "City": "New Orleans", "Meal Type": "dinner", "Cuisine": "Seafood", "Average Cost": 35, "Rating": 4.1},
        {"id": 205, "Name": "Late Night NOLA", "City": "New Orleans", "Meal Type": "dinner", "Cuisine": "American", "Average Cost": 40, "Rating": 4.2},
        {"id": 301, "Name": "Baton Rouge Cajun", "City": "Baton Rouge", "Meal Type": "dinner", "Cuisine": "Cajun", "Average Cost": 55, "Rating": 4.4},
    ]
    return pd.DataFrame(rows)


# -----------------------
# 3) Deterministic retrieval tool
# -----------------------
def search_restaurants(
    df: pd.DataFrame,
    *,
    city: str,
    meal_type: str,
    cuisine_any: Optional[List[str]] = None,
    exclude_ids: Optional[List[int]] = None,
    max_avg_cost_per_person: Optional[float] = None,
    top_k: int = 30,
    sort: str = "rating_desc_cost_asc",
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
    """Deterministic tool: filters + sorting + top_k. Returns candidates + meta."""
    meta: Dict[str, Any] = {"filters": {}, "counts": {}}
    work = df.copy()

    meta["counts"]["start"] = len(work)

    # hard filters
    work = work[work["City"].astype(str).str.lower() == city.lower()]
    meta["counts"]["after_city"] = len(work)

    work = work[work["Meal Type"].astype(str).str.lower() == meal_type.lower()]
    meta["counts"]["after_meal_type"] = len(work)

    if exclude_ids:
        work = work[~work["id"].isin(exclude_ids)]
    meta["counts"]["after_exclude"] = len(work)

    # soft filters
    if cuisine_any:
        pat = "|".join(re.escape(x.lower()) for x in cuisine_any)
        work = work[work["Cuisine"].astype(str).str.lower().str.contains(pat, na=False)]
    meta["counts"]["after_cuisine_any"] = len(work)

    if max_avg_cost_per_person is not None:
        work = work[pd.to_numeric(work["Average Cost"], errors="coerce") <= float(max_avg_cost_per_person)]
    meta["counts"]["after_cost_cap"] = len(work)

    # sorting
    if sort == "rating_desc_cost_asc":
        work["_rating"] = pd.to_numeric(work["Rating"], errors="coerce")
        work["_cost"] = pd.to_numeric(work["Average Cost"], errors="coerce")
        work = work.sort_values(by=["_rating", "_cost"], ascending=[False, True])
    elif sort == "cost_asc_rating_desc":
        work["_rating"] = pd.to_numeric(work["Rating"], errors="coerce")
        work["_cost"] = pd.to_numeric(work["Average Cost"], errors="coerce")
        work = work.sort_values(by=["_cost", "_rating"], ascending=[True, False])

    # top-k
    work = work.head(int(top_k))

    candidates: List[Dict[str, Any]] = []
    for _, r in work.iterrows():
        candidates.append(
            {
                "id": int(r["id"]),
                "name": str(r["Name"]),
                "city": str(r["City"]),
                "meal_type": str(r["Meal Type"]),
                "cuisine": str(r["Cuisine"]),
                "avg_cost": float(r["Average Cost"]),
                "rating": float(r["Rating"]),
            }
        )

    meta["counts"]["returned"] = len(candidates)
    meta["applied"] = {
        "city": city,
        "meal_type": meal_type,
        "cuisine_any": cuisine_any,
        "exclude_ids": exclude_ids,
        "max_avg_cost_per_person": max_avg_cost_per_person,
        "top_k": top_k,
        "sort": sort,
    }
    return candidates, meta


# -----------------------
# 4) LLM calling helpers (Ollama-compatible)
# -----------------------
@dataclass
class LLMConfig:
    base_url: str = "http://127.0.0.1:11434"
    model: str = "deepseek"  # change to your model
    timeout: float = 120.0


def call_chat_completions(cfg: LLMConfig, messages: List[Dict[str, str]], temperature: float = 0.0) -> str:
    """Calls /v1/chat/completions (Ollama)."""
    url = f"{cfg.base_url.rstrip('/')}/v1/chat/completions"
    payload = {"model": cfg.model, "messages": messages, "temperature": temperature}
    r = requests.post(url, json=payload, timeout=cfg.timeout)
    r.raise_for_status()
    data = r.json()
    return data["choices"][0]["message"]["content"]


def call_generate(cfg: LLMConfig, prompt: str) -> str:
    """Fallback: calls /api/generate (Ollama classic)."""
    url = f"{cfg.base_url.rstrip('/')}/api/generate"
    payload = {"model": cfg.model, "prompt": prompt, "stream": False}
    r = requests.post(url, json=payload, timeout=cfg.timeout)
    r.raise_for_status()
    return r.json().get("response", "")


def extract_json_object(text: str) -> Dict[str, Any]:
    """
    Robust-ish JSON extractor: finds first {...} block and parses it.
    Works for models that sometimes wrap JSON in text.
    """
    m = re.search(r"\{.*\}", text, flags=re.S)
    if not m:
        raise ValueError("No JSON object found in LLM output.")
    return json.loads(m.group(0))


# -----------------------
# 5) Prompt: Ask LLM to produce QuerySpec only
# -----------------------
def build_queryspec_prompt(parsed: dict, state: dict, slot: dict) -> List[Dict[str, str]]:
    schema = {
        "tool": "search_restaurants",
        "arguments": {
            "city": "string (required)",
            "meal_type": "string (required)",
            "cuisine_any": "list[string] | null",
            "exclude_ids": "list[int] | null",
            "max_avg_cost_per_person": "number | null",
            "top_k": "int",
            "sort": "rating_desc_cost_asc | cost_asc_rating_desc",
        },
        "fallbacks": [
            {"when": "no_results", "action": "relax_cost_cap", "scale": 1.25},
            {"when": "still_no_results", "action": "drop_cuisine_filter"},
        ],
    }

    system = (
        "You are a query-planning controller for a deterministic database tool. "
        "You MUST output ONLY a single JSON object and nothing else. "
        "Do NOT invent any restaurants. "
        "Your task: produce a tool call spec that can be executed as-is."
    )

    user = {
        "task": "Generate a QuerySpec JSON to retrieve candidate restaurants for the current planning slot.",
        "parsed": parsed,
        "state": state,
        "slot": slot,
        "available_tool_schema": schema,
        "notes": [
            "Hard constraints: city, meal_type, exclude_ids must be applied.",
            "Soft constraints: cuisine_any is preferred if it doesn't eliminate all options.",
            "Budget: infer max_avg_cost_per_person from remaining budget in state; be conservative.",
            "Return top_k candidates as requested by slot.",
            "Sort by rating desc then cost asc, unless you explain otherwise in JSON fields (but still JSON-only).",
        ],
    }

    return [
        {"role": "system", "content": system},
        {"role": "user", "content": json.dumps(user, ensure_ascii=False)},
    ]


# -----------------------
# 6) End-to-end test
# -----------------------
def main() -> None:
    cfg = LLMConfig(
        base_url="http://127.0.0.1:11434",
        model="deepseek-r1:14b",  # change to your local model name, e.g. "deepseek-r1:14b"
        timeout=666.0,
    )

    df = build_fake_restaurants_df()

    # Round 1: Ask LLM for QuerySpec
    messages = build_queryspec_prompt(parsed, state, slot)
    print("\n=== LLM: produce QuerySpec ===")
    try:
        llm_text = call_chat_completions(cfg, messages, temperature=0.0)
    except Exception as e:
        print(f"[warn] /v1/chat/completions failed: {e}\nTrying /api/generate fallback...\n")
        llm_text = call_generate(cfg, prompt=messages[0]["content"] + "\n" + messages[1]["content"])

    print(llm_text)

    queryspec = extract_json_object(llm_text)
    assert queryspec.get("tool") == "search_restaurants", f"Unexpected tool: {queryspec.get('tool')}"
    args = queryspec.get("arguments") or {}

    # Execute deterministic tool
    candidates, meta = search_restaurants(df, **args)
    print("\n=== Tool execution meta ===")
    print(json.dumps(meta, indent=2, ensure_ascii=False))
    print("\n=== Candidates returned ===")
    print(json.dumps(candidates, indent=2, ensure_ascii=False))

    # Round 2 (optional): If empty, ask LLM to adapt based on meta
    if not candidates:
        print("\n=== No candidates. Ask LLM to adapt QuerySpec based on meta ===")
        adapt_messages = [
            {"role": "system", "content": "You must output ONLY one JSON object (QuerySpec). No extra text."},
            {
                "role": "user",
                "content": json.dumps(
                    {
                        "previous_queryspec": queryspec,
                        "tool_meta": meta,
                        "instruction": "Revise the QuerySpec using fallbacks (e.g., relax cost cap or drop cuisine filter) to obtain non-empty results while keeping hard constraints.",
                    },
                    ensure_ascii=False,
                ),
            },
        ]
        llm_text2 = call_chat_completions(cfg, adapt_messages, temperature=0.0)
        print(llm_text2)
        queryspec2 = extract_json_object(llm_text2)
        candidates2, meta2 = search_restaurants(df, **(queryspec2.get("arguments") or {}))
        print("\n=== Tool execution meta (round2) ===")
        print(json.dumps(meta2, indent=2, ensure_ascii=False))
        print("\n=== Candidates returned (round2) ===")
        print(json.dumps(candidates2, indent=2, ensure_ascii=False))


if __name__ == "__main__":
    main()



=== LLM: produce QuerySpec ===


KeyboardInterrupt: 