In [1]:
import json
import re


In [2]:
from langchain.llms import Ollama

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
llm = Ollama(model='llama3.2')

  llm = Ollama(model='llama3.2')


In [4]:
import json
import re

# --- small safe wrapper to call your llm variable (handles common call styles) ---
def call_llm(prompt: str) -> str:
    try:
        out = llm(prompt)                 # callable
        if isinstance(out, str): return out
        if hasattr(out, 'text'): return out.text
        return str(out)
    except Exception:
        pass
    try:
        out = llm.generate(prompt)       # .generate()
        if isinstance(out, str): return out
        if isinstance(out, dict) and 'choices' in out:
            return ''.join(c.get('content','') for c in out['choices'])
        if hasattr(out, 'text'): return out.text
        return str(out)
    except Exception:
        pass
    try:
        out = llm.complete(prompt)       # .complete()
        if isinstance(out, str): return out
        if hasattr(out, 'text'): return out.text
        return str(out)
    except Exception:
        pass
    raise RuntimeError("Unable to call llm. Ensure `llm` is callable or supports .generate/.complete")



In [8]:
# --- decomposition prompt (concise, JSON-first) ---
DECOMP_PROMPT = (
    "You are a reasoning engine. Decompose the user's problem into a short ordered list "
    "of clear, atomic subtasks needed to solve it.\n"
    "Return output as JSON: {{ \"subtasks\": [ ... ] }} if possible. Be concise.\n"
    "User query: \"\"\"{query}\"\"\""
)

# --- function that returns list of subtasks only ---
def decompose_query(query: str) -> list:
    """
    Call the llm and return a list of subtasks.
    This version avoids KeyError by escaping literal braces in the prompt.
    """
    raw = call_llm(DECOMP_PROMPT.format(query=query))
    raw = (raw or "").strip()
    # Try JSON parse first
    try:
        obj = json.loads(raw)
        if isinstance(obj, dict) and 'subtasks' in obj and isinstance(obj['subtasks'], list):
            return [s.strip() for s in obj['subtasks'] if isinstance(s, str) and s.strip()]
    except Exception:
        pass
    # Try to find a JSON-like substring inside the model output
    try:
        jmatch = re.search(r'\{.*"subtasks".*\}', raw, flags=re.S)
        if jmatch:
            obj = json.loads(jmatch.group(0))
            if isinstance(obj, dict) and 'subtasks' in obj and isinstance(obj['subtasks'], list):
                return [s.strip() for s in obj['subtasks'] if isinstance(s, str) and s.strip()]
    except Exception:
        pass
    # Heuristic fallback: split numbered or dashed lists
    lines = []
    for line in raw.splitlines():
        line = line.strip()
        if not line:
            continue
        # drop common leading markers
        line = re.sub(r'^[0-9]+[)\.\-\s]+', '', line)
        line = re.sub(r'^[-*\u2022]\s*', '', line)
        # ignore long narrative lines that don't look like steps
        if 3 <= len(line) <= 300:
            lines.append(line)
    if lines:
        return lines
    # Last resort: return the original query as single step
    return [query.strip()]

In [9]:
# --- small runner that prints + returns subtasks ---
def run_decomposer(query: str):
    subtasks = decompose_query(query)
    print("Subtasks:")
    for i, s in enumerate(subtasks, 1):
        print(f" {i}. {s}")
    return subtasks

In [10]:
subtasks = run_decomposer("Write a Python script that downloads images from a URL list, resizes them to 512x512, and saves as PNG.")
print(subtasks)

  out = llm(prompt)                 # callable


Subtasks:
 1. Install necessary libraries (e.g., requests, Pillow)
 2. Create a list of URLs for images to download
 3. Download each image from URL and save as temporary file
 4. Use Pillow to resize each image to 512x512
 5. Convert resized image to PNG format and save
['Install necessary libraries (e.g., requests, Pillow)', 'Create a list of URLs for images to download', 'Download each image from URL and save as temporary file', 'Use Pillow to resize each image to 512x512', 'Convert resized image to PNG format and save']


In [12]:
subtasks = run_decomposer("how to make a plane engine faster?")
print(subtasks)

Subtasks:
 1. { "subtasks": [
 2. Identify the type of plane engine (e.g. turbofan, turboprop, jet engine),
 3. Research the current design specifications and performance characteristics of the engine,
 4. Explore aerodynamic and thermodynamic optimization techniques for increasing engine speed,
 5. Investigate advanced materials or manufacturing techniques that could improve engine durability and efficiency,
 6. Consider modifying the engine's compressor, turbine, or nozzle to increase airflow, pressure ratio, or exhaust velocity
 7. ] }
['{ "subtasks": [', 'Identify the type of plane engine (e.g. turbofan, turboprop, jet engine),', 'Research the current design specifications and performance characteristics of the engine,', 'Explore aerodynamic and thermodynamic optimization techniques for increasing engine speed,', 'Investigate advanced materials or manufacturing techniques that could improve engine durability and efficiency,', "Consider modifying the engine's compressor, turbine, or

In [14]:
subtasks = run_decomposer("how to develop an app in flutter")
print(subtasks)

Subtasks:
[]


In [11]:
import json, re

# Strong JSON-first prompt (use f-string to avoid brace escaping)
DECOMP_PROMPT_JSON = (
    "You are a reasoning engine. Decompose the user's problem into a short ordered list of clear, "
    "atomic subtasks needed to solve it. OUTPUT MUST BE STRICT JSON and nothing else, like:\n"
    '{"subtasks": ["step1", "step2", ...]}\n\n'
    "Be concise. Do not add commentary.\n\n"
    "User query:\n"
    "'''{query}'''\n"
)

DECOMP_PROMPT_SIMPLE = (
    "Decompose the user's request into a short ordered list of subtasks (plain text lines). "
    "Return only the list (one step per line). Keep steps atomic and actionable.\n\n"
    "User query:\n"
    "'''{query}'''\n"
)

def heuristic_fallback_for_query(query: str):
    """Deterministic fallback if LLM fails. Returns a reasonable generic decomposition for common tasks."""
    q = query.lower()
    # If it's about building an app (mobile/web) produce a standard flow
    if any(w in q for w in ("app", "application", "flutter", "react native", "android", "ios")):
        return [
            "Define app purpose, target users, and core features",
            "Design UI/UX sketches and basic navigation flow",
            "Choose tech stack and set up project (Flutter SDK, project structure)",
            "Implement core screens and navigation",
            "Implement data layer (local storage / API integration)",
            "Add assets and handle media/sizes",
            "Implement app logic and state management",
            "Test features locally (emulator / device) and fix bugs",
            "Prepare release (signing, build flavors) and deploy to app store / Play Store",
            "Plan post-release monitoring and updates"
        ]
    # If it's a generic 'how to' question give general research/decomposition steps
    return [
        "Clarify objective and desired outcome",
        "List required inputs and constraints",
        "Break the task into 3–6 atomic steps",
        "For each step, identify necessary tools or resources",
        "Execute steps in order, verify results after each",
        "Summarize and document the final result"
    ]

def parse_json_subtasks(raw: str):
    """Try to safely extract JSON substring with 'subtasks' and return list or None."""
    if not raw:
        return None
    raw = raw.strip()
    # try direct parse
    try:
        obj = json.loads(raw)
        if isinstance(obj, dict) and 'subtasks' in obj and isinstance(obj['subtasks'], list):
            return [s.strip() for s in obj['subtasks'] if isinstance(s, str) and s.strip()]
    except Exception:
        pass
    # try to find JSON-like block
    jmatch = re.search(r'\{[^}]*"subtasks"[^}]*\}', raw, flags=re.S)
    if jmatch:
        try:
            obj = json.loads(jmatch.group(0))
            if isinstance(obj, dict) and 'subtasks' in obj and isinstance(obj['subtasks'], list):
                return [s.strip() for s in obj['subtasks'] if isinstance(s, str) and s.strip()]
        except Exception:
            pass
    return None

def decompose_query(query: str) -> list:
    # 1) Try strict JSON output from LLM
    try:
        raw = call_llm(DECOMP_PROMPT_JSON.format(query=query))
    except Exception:
        raw = ""
    parsed = parse_json_subtasks(raw)
    if parsed:
        return parsed

    # 2) Retry with a simpler plain-list prompt
    try:
        raw2 = call_llm(DECOMP_PROMPT_SIMPLE.format(query=query))
    except Exception:
        raw2 = ""
    # try to extract lines from raw2
    lines = []
    if raw2:
        for line in raw2.splitlines():
            line = line.strip()
            if not line:
                continue
            # drop leading bullets/numbers
            line = re.sub(r'^[0-9]+[)\.\-\s]+', '', line)
            line = re.sub(r'^[-*\u2022]\s*', '', line)
            if 3 <= len(line) <= 300:
                lines.append(line)
    if lines:
        return lines

    # 3) Deterministic heuristic fallback based on query
    return heuristic_fallback_for_query(query)

def run_decomposer(query: str):
    subtasks = decompose_query(query)
    print("Subtasks:")
    for i, s in enumerate(subtasks, 1):
        print(f" {i}. {s}")
    return subtasks

# --- quick local test (you can run this) ---
# subtasks = run_decomposer("how to develop an app in flutter")
# print(subtasks)


In [17]:
subtasks = run_decomposer("how to develop an app in flutter")
print(subtasks)

Subtasks:
 1. Define app purpose, target users, and core features
 2. Design UI/UX sketches and basic navigation flow
 3. Choose tech stack and set up project (Flutter SDK, project structure)
 4. Implement core screens and navigation
 5. Implement data layer (local storage / API integration)
 6. Add assets and handle media/sizes
 7. Implement app logic and state management
 8. Test features locally (emulator / device) and fix bugs
 9. Prepare release (signing, build flavors) and deploy to app store / Play Store
 10. Plan post-release monitoring and updates
['Define app purpose, target users, and core features', 'Design UI/UX sketches and basic navigation flow', 'Choose tech stack and set up project (Flutter SDK, project structure)', 'Implement core screens and navigation', 'Implement data layer (local storage / API integration)', 'Add assets and handle media/sizes', 'Implement app logic and state management', 'Test features locally (emulator / device) and fix bugs', 'Prepare release (s

In [16]:
subtasks = run_decomposer("how high is the sky")
print(subtasks)

Subtasks:
 1. Clarify objective and desired outcome
 2. List required inputs and constraints
 3. Break the task into 3–6 atomic steps
 4. For each step, identify necessary tools or resources
 5. Execute steps in order, verify results after each
 6. Summarize and document the final result
['Clarify objective and desired outcome', 'List required inputs and constraints', 'Break the task into 3–6 atomic steps', 'For each step, identify necessary tools or resources', 'Execute steps in order, verify results after each', 'Summarize and document the final result']


In [1]:
from llm_utils import run_decomposer

subtasks = run_decomposer("what is (a^2 - b^2)? how can i solve it effeciently?")
print(subtasks)

  from .autonotebook import tqdm as notebook_tqdm
  llm = Ollama(model=MODEL)
  out = llm(prompt)                 # callable


Subtasks:
 1. Identify the algebraic expression (a^2 - b^2) as a difference of squares.
 2. Recognize the formula for factoring a difference of squares: (x + y)(x - y).
 3. Apply the formula to factor (a^2 - b^2): (a+b)(a-b).
 4. Simplify the expression if necessary.
 5. Find an efficient method to solve the resulting expression, such as expanding it or using algebraic identities.
['Identify the algebraic expression (a^2 - b^2) as a difference of squares.', 'Recognize the formula for factoring a difference of squares: (x + y)(x - y).', 'Apply the formula to factor (a^2 - b^2): (a+b)(a-b).', 'Simplify the expression if necessary.', 'Find an efficient method to solve the resulting expression, such as expanding it or using algebraic identities.']


In [5]:
# inference.py
import json
import time
import uuid
from typing import List, Dict, Any, Tuple, Union
from langchain.llms import Ollama
llm_verify = Ollama(model='mistral')  # used for verification + repair
from llm_utils import run_decomposer


# -------------------------
# Utility: JSON extraction
# -------------------------
def extract_json_from_text(text: str) -> Any:
    text = text.strip()
    try:
        return json.loads(text)
    except Exception:
        pass
    # Find first JSON block manually
    start = text.find("{")
    end = text.rfind("}")
    if start != -1 and end != -1:
        try:
            return json.loads(text[start:end + 1])
        except Exception:
            pass
    raise ValueError("Could not extract JSON from verifier output.")


# -------------------------
# Subtask normalization
# -------------------------
def _make_subtask_dict(id_: str = None, inp: str = "", out: str = "") -> Dict[str, str]:
    return {"id": id_ or str(uuid.uuid4()), "input": inp or "", "output": out or ""}


def normalize_subtasks(raw_subtasks: Union[str, Dict, List, Tuple, Any]) -> List[Dict[str, str]]:
    """Convert run_decomposer output into canonical list of dicts"""
    if isinstance(raw_subtasks, list):
        normalized = []
        for item in raw_subtasks:
            if isinstance(item, dict):
                sid = item.get("id") or item.get("name") or str(uuid.uuid4())
                inp = item.get("input") if item.get("input") is not None else item.get("prompt", "")
                out = item.get("output") if item.get("output") is not None else item.get("result") or item.get("answer") or ""
                normalized.append(_make_subtask_dict(sid, str(inp), str(out)))
            elif isinstance(item, (tuple, list)):
                if len(item) == 2:
                    inp, out = item
                    normalized.append(_make_subtask_dict(str(uuid.uuid4()), str(inp), str(out)))
                elif len(item) >= 3:
                    sid, inp, out = item[0], item[1], item[2]
                    normalized.append(_make_subtask_dict(str(sid), str(inp), str(out)))
            else:
                normalized.append(_make_subtask_dict(None, "", str(item)))
        return normalized

    if isinstance(raw_subtasks, dict):
        if all(not isinstance(v, (dict, list, tuple)) for v in raw_subtasks.values()):
            normalized = []
            for k, v in raw_subtasks.items():
                normalized.append(_make_subtask_dict(str(k), "", str(v)))
            return normalized
        else:
            sid = raw_subtasks.get("id") or raw_subtasks.get("name") or str(uuid.uuid4())
            inp = raw_subtasks.get("input") or raw_subtasks.get("prompt") or ""
            out = raw_subtasks.get("output") or raw_subtasks.get("result") or raw_subtasks.get("answer") or ""
            return [_make_subtask_dict(sid, str(inp), str(out))]

    return [_make_subtask_dict(None, "", str(raw_subtasks))]


# -------------------------
# Prompt builders
# -------------------------
def build_verifier_prompt(task_id: str, original_prompt: str, subtasks: List[Dict[str, Any]], constraints: List[str] = None) -> str:
    constraints = constraints or []
    constraints_text = "\n".join(f"- {c}" for c in constraints) if constraints else "None"
    subs = normalize_subtasks(subtasks)

    subtasks_text = ""
    for s in subs:
        sid = s.get("id") or str(uuid.uuid4())
        inp = s.get("input", "")
        out = s.get("output", "")
        subtasks_text += f"ID: {sid}\nINPUT: {inp}\nOUTPUT: {out}\n---\n"

    schema = {
        "task_id": "string",
        "verifications": [
            {
                "subtask_id": "string",
                "verdict": "accept|partial_accept|reject",
                "score": "0.0",
                "issues": ["string", "..."],
                "suggested_fix": "string (optional)",
                "confidence_interval": [0.0, 1.0],
                "evidence": "string"
            }
        ],
        "aggregate_decision": "accept|partial_accept|repair_required|escalate",
        "repair_plan": {"type": "auto|human_review|call_producer", "instructions": "string"}
    }

    prompt = f"""
You are a verification model. ONLY output a single JSON object following this schema exactly. No extra commentary.

SCHEMA: {json.dumps(schema, indent=2)}

TASK_ID: {task_id}

Original prompt:
{original_prompt}

Constraints:
{constraints_text}

Subtasks:
{subtasks_text}

Return the JSON object now.
"""
    return prompt.strip()


def build_repair_prompt(original_prompt: str, subtask: Dict[str, Any], verifier_issues: List[str]) -> str:
    return f"""
You are a repair model. The original overall prompt:
{original_prompt}

Subtask ID: {subtask.get('id')}
Subtask input:
{subtask.get('input')}

Subtask current output:
{subtask.get('output')}

Verifier issues:
{json.dumps(verifier_issues, indent=2)}

Please return JSON:
{{ "subtask_id": "<id>", "corrected_output": "<fixed output>", "justification": "<1-2 sentence reason>" }}
"""


# -------------------------
# Verification + repair
# -------------------------
def run_verification_and_repair(
    task_id: str,
    original_prompt: str,
    subtasks: List[Dict[str, Any]],
    constraints: List[str] = None,
    max_repair_attempts: int = 2,
) -> Dict[str, Any]:
    """Run the mistral verifier (via Ollama) on subtasks and optionally auto-repair."""
    # 1. Verification
    verifier_prompt = build_verifier_prompt(task_id, original_prompt, subtasks, constraints)
    raw_verifier_out = llm_verify(verifier_prompt)
    try:
        verifier_json = extract_json_from_text(raw_verifier_out)
    except Exception as e:
        return {
            "task_id": task_id,
            "error": "verifier_parse_failed",
            "raw_output": raw_verifier_out,
            "exception": str(e),
        }

    verifications = verifier_json.get("verifications", [])
    aggregate_decision = verifier_json.get("aggregate_decision", "repair_required")
    verif_by_id = {v.get("subtask_id"): v for v in verifications if "subtask_id" in v}

    subtasks_by_id = {s.get("id", str(uuid.uuid4())): dict(s) for s in normalize_subtasks(subtasks)}
    repair_history = []

    # 2. Repair loop
    for sub_id, sub in subtasks_by_id.items():
        v = verif_by_id.get(sub_id)
        verdict = v.get("verdict") if v else "partial_accept"
        issues = v.get("issues", []) if v else []

        if verdict == "accept":
            continue

        # Try auto repair
        for attempt in range(max_repair_attempts):
            repair_prompt = build_repair_prompt(original_prompt, sub, issues)
            raw_repair_out = llm_verify(repair_prompt)
            try:
                repair_json = extract_json_from_text(raw_repair_out)
                corrected = repair_json.get("corrected_output")
                sub["output"] = corrected
                repair_history.append(
                    {
                        "subtask_id": sub_id,
                        "attempt": attempt + 1,
                        "corrected_output": corrected,
                        "justification": repair_json.get("justification"),
                    }
                )
                # re-verify this subtask
                recheck_prompt = build_verifier_prompt(task_id + f"__recheck_{sub_id}", original_prompt, [sub], constraints)
                raw_recheck = llm_verify(recheck_prompt)
                recheck_json = extract_json_from_text(raw_recheck)
                verdict = recheck_json["verifications"][0]["verdict"]
                if verdict == "accept":
                    break
            except Exception as e:
                repair_history.append({"subtask_id": sub_id, "error": str(e)})
                break

    # 3. Final aggregation of subtasks + verifier decisions
    final_subtasks = []
    for sub_id, sub in subtasks_by_id.items():
        v = verif_by_id.get(sub_id, {})
        final_subtasks.append(
            {
                "id": sub_id,
                "input": sub.get("input"),
                "output": sub.get("output"),
                "verdict": v.get("verdict", "unknown"),
                "score": v.get("score"),
                "issues": v.get("issues"),
                "evidence": v.get("evidence"),
            }
        )

    return {
        "task_id": task_id,
        "original_prompt": original_prompt,
        "aggregate_decision": aggregate_decision,
        "verifier_raw": verifier_json,
        "final_subtasks": final_subtasks,
        "repair_history": repair_history,
        "timestamp": time.time(),
    }


# -------------------------
# Aggregator
# -------------------------
def aggregate_final_answer(verified_result: Dict[str, Any]) -> Dict[str, Any]:
    accepted_outputs = [
        s.get("output", "").strip()
        for s in verified_result.get("final_subtasks", [])
        if s.get("verdict") in ("accept", "partial_accept")
    ]
    return {
        "task_id": verified_result["task_id"],
        "final_answer": "\n\n".join(accepted_outputs).strip(),
        "aggregate_decision": verified_result.get("aggregate_decision"),
        "provenance": {
            "verifier": verified_result.get("verifier_raw"),
            "repair_history": verified_result.get("repair_history"),
        },
        "timestamp": time.time(),
    }


# -------------------------
# Pipeline Entry
# -------------------------
def run_inference_pipeline(prompt: str, task_id: str = None, constraints: List[str] = None) -> Dict[str, Any]:
    task_id = task_id or f"task_{int(time.time())}"
    raw_subtasks = run_decomposer(prompt)
    print(f"[DEBUG] Decomposer returned type={type(raw_subtasks)}")

    subtasks = normalize_subtasks(raw_subtasks)
    verified = run_verification_and_repair(task_id, prompt, subtasks, constraints or [])
    final = aggregate_final_answer(verified)
    return {"task_id": task_id, "original_prompt": prompt, "verified": verified, "final": final}





In [6]:
example_prompt = "what is (a^2 - b^2)? how can i solve it efficiently?"
out = run_inference_pipeline(example_prompt)
print(json.dumps(out["final"], indent=2))

Subtasks:
 1. Expand the given expression a^2 - b^2 using algebraic identities.
 2. Simplify the expanded expression to obtain its simplified form.
 3. Identify any patterns or factorable terms within the simplified expression.
 4. Apply factoring techniques (if applicable) to further simplify the expression.
 5. Provide an efficient method for calculating the value of the simplified expression.
[DEBUG] Decomposer returned type=<class 'list'>
{
  "task_id": "task_1762583871",
  "final_answer": "Expand the given expression a^2 - b^2 using algebraic identities.\n\nSimplify the expanded expression to obtain its simplified form.\n\nIdentify any patterns or factorable terms within the simplified expression.\n\nProvide an efficient method for calculating the value of the simplified expression.",
  "aggregate_decision": "accept",
  "provenance": {
    "verifier": {
      "task_id": "task_1762583871",
      "verifications": [
        {
          "subtask_id": "5a4c3c0d-83c7-4f59-8209-0991cae4d

In [12]:
# fast_inference_final_subtasks.py
import json
import time
import uuid
from typing import Any, Dict, List, Union

from langchain.llms import Ollama
llm_verify = Ollama(model='mistral')  # small, fast verifier (must be configured in your env)

# try to import user's decomposer, else fallback
from llm_utils import run_decomposer  


# --- helpers (same as before) ---
def _make_subtask_dict(id_: str = None, inp: str = "", out: str = "") -> Dict[str, str]:
    return {"id": id_ or str(uuid.uuid4()), "input": inp or "", "output": out or ""}


def normalize_subtasks(raw_subtasks: Union[str, Dict, List, Any]) -> List[Dict[str, str]]:
    if isinstance(raw_subtasks, list):
        normalized = []
        for item in raw_subtasks:
            if isinstance(item, dict):
                sid = item.get("id") or item.get("name") or str(uuid.uuid4())
                inp = item.get("input") if item.get("input") is not None else item.get("prompt", "")
                out = item.get("output") if item.get("output") is not None else item.get("result") or item.get("answer") or ""
                normalized.append(_make_subtask_dict(sid, str(inp), str(out)))
            elif isinstance(item, (tuple, list)):
                if len(item) == 2:
                    inp, out = item
                    normalized.append(_make_subtask_dict(str(uuid.uuid4()), str(inp), str(out)))
                elif len(item) >= 3:
                    sid, inp, out = item[0], item[1], item[2]
                    normalized.append(_make_subtask_dict(str(sid), str(inp), str(out)))
                else:
                    normalized.append(_make_subtask_dict(None, "", str(item)))
            else:
                normalized.append(_make_subtask_dict(None, "", str(item)))
        return normalized

    if isinstance(raw_subtasks, dict):
        if all(not isinstance(v, (dict, list, tuple)) for v in raw_subtasks.values()):
            normalized = []
            for k, v in raw_subtasks.items():
                normalized.append(_make_subtask_dict(str(k), "", str(v)))
            return normalized
        else:
            sid = raw_subtasks.get("id") or raw_subtasks.get("name") or str(uuid.uuid4())
            inp = raw_subtasks.get("input") or raw_subtasks.get("prompt") or ""
            out = raw_subtasks.get("output") or raw_subtasks.get("result") or raw_subtasks.get("answer") or ""
            return [_make_subtask_dict(sid, str(inp), str(out))]

    return [_make_subtask_dict(None, "", str(raw_subtasks))]


def extract_json_from_text(text: str) -> Any:
    text = text.strip()
    try:
        return json.loads(text)
    except Exception:
        pass
    s = text.find("{")
    e = text.rfind("}")
    if s != -1 and e != -1 and e > s:
        try:
            return json.loads(text[s:e+1])
        except Exception:
            pass
    s = text.find("[")
    e = text.rfind("]")
    if s != -1 and e != -1 and e > s:
        try:
            return json.loads(text[s:e+1])
        except Exception:
            pass
    raise ValueError("No JSON found in verifier output.")


# --- compact verifier prompt ---
def build_quick_verifier_prompt(original_prompt: str, subtasks: List[Dict[str, Any]], domain_hint: str = "") -> str:
    brief = ""
    for i, s in enumerate(subtasks, start=1):
        text = s.get("input") or s.get("output") or ""
        brief += f"{i}. {text}\n"

    prompt = (
        "You are a fast verifier. Given the original task and the list of subtasks below, answer in JSON only:\n"
        " - 'accept_all': boolean, are these subtasks sufficient to solve the main task?\n"
        " - 'missing': short list of missing concepts or checks (if any)\n"
        " - 'suggestions': short list of short suggested subtasks to add (if any)\n\n"
        f"Domain: {domain_hint}\n"
        f"Original task: {original_prompt}\n"
        f"Subtasks:\n{brief}\n"
        "Return EXACT JSON like: {\"accept_all\": true, \"missing\": [], \"suggestions\": []}\n"
        "Output only JSON."
    )
    return prompt


# --- fast verifier call ---
def fast_verify_subtasks(original_prompt: str, raw_subtasks: Union[List[Any], Any], domain_hint: str = "") -> Dict[str, Any]:
    subtasks = normalize_subtasks(raw_subtasks)
    prompt = build_quick_verifier_prompt(original_prompt, subtasks, domain_hint)
    raw = llm_verify(prompt)
    try:
        parsed = extract_json_from_text(raw)
    except Exception:
        return {
            "accept_all": False,
            "missing": ["verifier_parse_failed"],
            "suggestions": ["Please retry verification or use a different verifier model."],
            "subtasks": subtasks,
            "raw_verifier": raw
        }

    accept_all = bool(parsed.get("accept_all")) if isinstance(parsed, dict) else False
    missing = parsed.get("missing") if isinstance(parsed, dict) else []
    suggestions = parsed.get("suggestions") if isinstance(parsed, dict) else []

    return {
        "accept_all": accept_all,
        "missing": missing or [],
        "suggestions": suggestions or [],
        "subtasks": subtasks,
        "raw_verifier": parsed
    }


# --- updated run_inference_pipeline_fast that always returns final_subtasks ---
def run_inference_pipeline_fast(
    prompt: str,
    domain_hint: str = "",
    auto_extend: bool = False,
    keep_suggestions_field: bool = True
) -> Dict[str, Any]:
    """
    Returns a dict containing:
      - status: 'accepted'|'insufficient'
      - final_subtasks: canonical forwardable subtasks (possibly extended if auto_extend=True)
      - suggestions, missing, raw_verifier
    """
    raw = run_decomposer(prompt)
    print(f"[DEBUG] run_decomposer returned type={type(raw)}, preview={str(raw)[:300]}")

    check = fast_verify_subtasks(prompt, raw, domain_hint=domain_hint)

    canonical = check["subtasks"]  # list of canonical subtask dicts

    final_subtasks = [dict(s) for s in canonical]  # shallow copy

    if check["accept_all"]:
        status = "accepted"
        message = "Subtasks sufficient. Forwarding inference."
    else:
        status = "insufficient"
        message = "Verifier flagged missing points or insufficiencies."
        # If auto_extend, append suggestion strings as new subtasks with empty output
        if auto_extend and check.get("suggestions"):
            for s in check["suggestions"]:
                final_subtasks.append(_make_subtask_dict(None, s, ""))
        # else leave final_subtasks as canonical (caller can handle suggestions)

    result = {
        "status": status,
        "message": message,
        "final_subtasks": final_subtasks,
        "timestamp": time.time(),
        "raw_verifier": check.get("raw_verifier")
    }

    if keep_suggestions_field:
        result["suggestions"] = check.get("suggestions", [])
        result["missing"] = check.get("missing", [])

    return result


# --- quick test ---
if __name__ == "__main__":
    example_prompt = "Design an interview plan to test DSA knowledge for arrays, strings, hashing, and graphs."
    out = run_inference_pipeline_fast(example_prompt, domain_hint="DSA", auto_extend=True)
    print(json.dumps(out, indent=2))


Subtasks:
 1. Determine the scope of each domain (arrays, strings, hashing, graphs) for the interview.
 2. Identify key concepts and common problems within each domain.
 3. Develop a list of potential questions or problems for each domain.
 4. Prioritize the questions based on difficulty and relevance to DSA knowledge.
 5. Outline a general structure for the interview, including format (e.g., take-home, in-person) and length.
 6. Decide on the number of questions or problems to include in the interview plan.
 7. Create a draft of the interview questions and problems, ensuring they meet the required scope and difficulty levels.
 8. Review and refine the interview plan to ensure it effectively tests DSA knowledge for all domains.
[DEBUG] run_decomposer returned type=<class 'list'>, preview=['Determine the scope of each domain (arrays, strings, hashing, graphs) for the interview.', 'Identify key concepts and common problems within each domain.', 'Develop a list of potential questions or p

In [13]:
print("\n=== FINAL SUBTASKS TO FORWARD ===\n", json.dumps(out["final_subtasks"], indent=2))



=== FINAL SUBTASKS TO FORWARD ===
 [
  {
    "id": "339741d7-7150-4a04-adfe-5bb8333b3505",
    "input": "",
    "output": "Determine the scope of each domain (arrays, strings, hashing, graphs) for the interview."
  },
  {
    "id": "6a6c037d-2da3-448d-81c6-6e31a2c24d0b",
    "input": "",
    "output": "Identify key concepts and common problems within each domain."
  },
  {
    "id": "73b2fc6c-55c1-49cd-a91c-bf8cdcd0f739",
    "input": "",
    "output": "Develop a list of potential questions or problems for each domain."
  },
  {
    "id": "5b26d196-2e9f-4d82-8507-cc03e796a7be",
    "input": "",
    "output": "Prioritize the questions based on difficulty and relevance to DSA knowledge."
  },
  {
    "id": "978b91f4-88a8-45f3-aac2-ca7ba8e4a262",
    "input": "",
    "output": "Outline a general structure for the interview, including format (e.g., take-home, in-person) and length."
  },
  {
    "id": "d7f6ca04-2d7f-4075-8a84-0124202a1724",
    "input": "",
    "output": "Decide on the nu

In [14]:
example_prompt = "i want to learn dynammic programming but i am not able to understand the advanced concepts. what should i do?"
out = run_inference_pipeline_fast(example_prompt, domain_hint="DSA", auto_extend=True)
print(json.dumps(out, indent=2))

Subtasks:
 1. Start by learning the basics of dynamic programming using online tutorials or introductory books.
 2. Identify specific areas of dynamic programming that are confusing (e.g., memoization, tabulation).
 3. Find step-by-step explanations or video lectures on those specific topics.
 4. Practice solving problems related to those areas to reinforce understanding.
 5. Join online communities or forums to ask questions and get help from others who have experience with dynamic programming.
 6. Look for courses or tutorials that cater to beginners, such as Coursera's "Dynamic Programming" course by Stanford University.
[DEBUG] run_decomposer returned type=<class 'list'>, preview=['Start by learning the basics of dynamic programming using online tutorials or introductory books.', 'Identify specific areas of dynamic programming that are confusing (e.g., memoization, tabulation).', 'Find step-by-step explanations or video lectures on those specific topics.', 'Practice solving 
{
  "s

In [15]:
print("\n=== FINAL SUBTASKS TO FORWARD ===\n", json.dumps(out["final_subtasks"], indent=2))


=== FINAL SUBTASKS TO FORWARD ===
 [
  {
    "id": "257f4b82-3dfd-4532-b027-670173820fef",
    "input": "",
    "output": "Start by learning the basics of dynamic programming using online tutorials or introductory books."
  },
  {
    "id": "4a0151f3-36ae-429c-b64e-48b4b91118bb",
    "input": "",
    "output": "Identify specific areas of dynamic programming that are confusing (e.g., memoization, tabulation)."
  },
  {
    "id": "717c7484-9b2c-4be4-a63c-ef218e35c6d9",
    "input": "",
    "output": "Find step-by-step explanations or video lectures on those specific topics."
  },
  {
    "id": "6a79b3c5-3a6d-42b1-ada8-1cc707a0e2e8",
    "input": "",
    "output": "Practice solving problems related to those areas to reinforce understanding."
  },
  {
    "id": "acd5e2c0-2642-4516-bcb1-6979773f354b",
    "input": "",
    "output": "Join online communities or forums to ask questions and get help from others who have experience with dynamic programming."
  },
  {
    "id": "9a7eb2af-ec16-46

In [17]:
example_prompt = "how do i learn french?"
out = run_inference_pipeline_fast(example_prompt,auto_extend=True)
print(json.dumps(out, indent=2))

Subtasks:
 1. Identify suitable online language learning platforms or resources.
 2. Determine the level of French proficiency desired (beginner, intermediate, advanced).
 3. Set a realistic study schedule and create a dedicated study space.
 4. Select a French language course or textbook to follow.
 5. Find a language exchange partner or tutor for conversational practice.
[DEBUG] run_decomposer returned type=<class 'list'>, preview=['Identify suitable online language learning platforms or resources.', 'Determine the level of French proficiency desired (beginner, intermediate, advanced).', 'Set a realistic study schedule and create a dedicated study space.', 'Select a French language course or textbook to follow.', 'Find a lang
{
  "status": "accepted",
  "message": "Subtasks sufficient. Forwarding inference.",
  "final_subtasks": [
    {
      "id": "d38d0fdf-9348-4dad-a042-b0048f2095e4",
      "input": "",
      "output": "Identify suitable online language learning platforms or resou

In [22]:
print("\n=== FINAL SUBTASKS TO FORWARD ===\n", json.dumps(out["final_subtasks"], indent=2))


=== FINAL SUBTASKS TO FORWARD ===
 [
  {
    "id": "d38d0fdf-9348-4dad-a042-b0048f2095e4",
    "input": "",
    "output": "Identify suitable online language learning platforms or resources."
  },
  {
    "id": "747ff395-313e-4d4d-af76-4f6556c4143f",
    "input": "",
    "output": "Determine the level of French proficiency desired (beginner, intermediate, advanced)."
  },
  {
    "id": "49eb0c06-e5c0-4e52-829b-64baba4f1c12",
    "input": "",
    "output": "Set a realistic study schedule and create a dedicated study space."
  },
  {
    "id": "9636e595-8c98-4163-9fdd-e666934ec737",
    "input": "",
    "output": "Select a French language course or textbook to follow."
  },
  {
    "id": "248a69b3-0bba-4a94-a322-4a54f49fd002",
    "input": "",
    "output": "Find a language exchange partner or tutor for conversational practice."
  }
]


In [23]:
print("\n=== FORMATTED FINAL SUBTASKS ===")
for i, s in enumerate(out["final_subtasks"], start=1):
    text = s.get("input") or s.get("output") or ""
    print(f"{i}. {text}")



=== FORMATTED FINAL SUBTASKS ===
1. Identify suitable online language learning platforms or resources.
2. Determine the level of French proficiency desired (beginner, intermediate, advanced).
3. Set a realistic study schedule and create a dedicated study space.
4. Select a French language course or textbook to follow.
5. Find a language exchange partner or tutor for conversational practice.
