<a href="https://colab.research.google.com/github/donkeytonk/DIB-R/blob/master/Video_Quiz_UI_Share_Link_v0_04_WORKING.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Version 0.04 Updates:
- Standards & Topic options for Standards Mode

In [3]:
# === ONE-CELL: MyAI101 Quiz Video Maker (1080x1920, Gemini-powered via REST, No MoviePy) ===
# - Vertical HD (1080x1920 @ 24fps), ffmpeg only (threads=1)
# - Gradio UI: Topic, Difficulty(1-10), Count(1-10), Gemini API Key, Model
# - Uses Gemini **v1 REST** (no SDK). Button to list models your key actually has.
# - Randomizes correct answer placement; editable preview table
# - Renders MP4s to /content/out/videos; prints Local/Public URLs

import os, sys, subprocess, random, string, time, gc, warnings, re, json
from dataclasses import dataclass
from typing import List, Tuple, Dict, Optional
from pathlib import Path

warnings.filterwarnings("ignore")

# --- Headless safety ---
os.environ["SDL_AUDIODRIVER"] = "dummy"
os.environ["XDG_RUNTIME_DIR"]  = "/tmp/runtime"
os.makedirs("/tmp/runtime", exist_ok=True)

# --- Deps ---
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                       "pillow>=10.0.0", "gradio>=4.0.0", "pandas>=1.3", "requests>=2.31"])
subprocess.call(["apt-get", "-y", "install", "-qq",
                 "ffmpeg", "fonts-dejavu-core", "fonts-liberation"])

import requests
import gradio as gr
import pandas as pd
from PIL import Image, ImageDraw, ImageFont

# ---------- RENDER SETTINGS ----------
W, H, FPS = 1080, 1920, 24
BITRATE = "3500k"

GREEN   = (16, 163, 74)
MINT    = (209, 250, 229)
DARK    = (15, 23, 42)
MID     = (51, 65, 85)
BG      = (255, 255, 255)
CHIP_BG = (243, 244, 246)

SAFE_X   = int(W * 0.09)
SAFE_TOP = int(H * 0.08)
SAFE_BOT = int(H * 0.10)

TITLE_BOX_H         = int(H * 0.36)
GAP_BELOW_TITLE_MIN = int(H * 0.10)

CHIP_H      = 108
CHIP_TX_H   = 80
CHIP_TX_PAD = 90
CHIP_GAP    = int(H * 0.085)

CTA_BUTTON_W = 640
CTA_BUTTON_H = 120

# ------- Readability budgets -------
Q_MAX = 130
OPT_MAX = 38
EXP_MAX = 110

CLIP_FLAG = " ·clipped"

BEGIN_JSON = "<<<JSON>>>"
END_JSON   = "<<<END>>>"

import re as _re

# ---------- Packs / Blueprints (starter set) ----------
PACKS: Dict[str, dict] = {
    "llm_101": {
        "id": "llm_101",
        "name": "LLM Fundamentals",
        "domain": "ai",
        "locale": "ANY",
        "tags": ["llm","tokens","context","embeddings","finetune","tools"],
        "banned_phrases": ["All of the above", "None of the above", "It depends"],
        "prompt_extra": "Prefer concrete, non-marketing language. No trivia about product brand names.",
        "categories": {"Tokens & Context":2, "Embeddings":2, "Pretrain vs Finetune":2, "Tool Use":2},
        "facts": [
            {"id":"LLM-TOK-001","text":"Tokens are units of text the model reads and generates."},
            {"id":"LLM-CTX-002","text":"A model’s context window caps how many tokens it can consider at once."},
            {"id":"LLM-EMB-003","text":"Embeddings map text to vectors that capture semantic similarity."},
            {"id":"LLM-FIN-004","text":"Finetuning adjusts model weights; prompting does not change weights."},
            {"id":"LLM-TOOL-005","text":"Tool use lets a model call functions or APIs to get external results."},
            {"id":"LLM-HAL-006","text":"Hallucination is confident output not grounded in facts or tools."},
            {"id":"LLM-SYS-007","text":"System prompts steer model behavior but are not guaranteed to be followed."},
            {"id":"LLM-TMP-008","text":"Higher temperature increases randomness in token sampling."},
            {"id":"LLM-TOP-009","text":"Top-p limits sampling to the smallest set of tokens whose mass ≥ p."},
            {"id":"LLM-COT-010","text":"Chain-of-thought reveals intermediate steps but can leak private info if logged."},
        ],
    },
    "rag_essentials": {
        "id": "rag_essentials",
        "name": "RAG Essentials",
        "domain": "ai",
        "locale": "ANY",
        "tags": ["rag","retrieval","rerank","chunking","eval"],
        "banned_phrases": ["All of the above", "None of the above"],
        "prompt_extra": "Focus on retrieval quality, reranking precision, chunking trade-offs, and evaluation signals.",
        "categories": {"Chunking":2, "Retrieval":2, "Reranking":2, "Evaluation":2},
        "facts": [
            {"id":"RAG-CH-001","text":"Overlapping chunks improve recall at the cost of more tokens."},
            {"id":"RAG-RET-002","text":"BM25 is lexical; dense retrieval uses embeddings for semantic matches."},
            {"id":"RAG-RR-003","text":"Rerankers rescore top-k candidates to improve precision."},
            {"id":"RAG-HYB-004","text":"Hybrid retrieval combines lexical and dense signals for robustness."},
            {"id":"RAG-EVL-005","text":"Answer-grounded evals compare the answer to the retrieved context."},
            {"id":"RAG-K-006","text":"Raising k increases recall but may add noise and latency."},
            {"id":"RAG-CH-007","text":"Smaller chunks reduce drift; larger chunks preserve cross-sentence context."},
            {"id":"RAG-CIT-008","text":"Citations help users verify and trust answers."},
            {"id":"RAG-FMT-009","text":"Structured chunks (headings, lists) often retrieve better than raw text."},
            {"id":"RAG-DUP-010","text":"Deduping near-identical chunks reduces wasted tokens and bias."},
        ],
    },
    "prompt_patterns": {
        "id":"prompt_patterns",
        "name":"Prompt Patterns",
        "domain":"ai",
        "locale":"ANY",
        "tags":["prompting","few-shot","structured","guardrails"],
        "banned_phrases":["All of the above","None of the above","very","actually","really"],
        "prompt_extra":"Write stems that test specific patterns (zero-shot, few-shot, JSON structuring, tool calls).",
        "categories":{"Zero/Few-shot":2,"Structure":2,"Guardrails":2,"Self-consistency":2},
        "facts":[
            {"id":"PRM-ZS-001","text":"Zero-shot prompts give no examples; rely on clear instructions."},
            {"id":"PRM-FS-002","text":"Few-shot prompts provide examples to steer the style and format."},
            {"id":"PRM-STR-003","text":"Structured outputs reduce parsing errors and improve reliability."},
            {"id":"PRM-GRD-004","text":"Guardrails are constraints that restrict style or content."},
            {"id":"PRM-SC-005","text":"Self-consistency samples multiple chains and picks the most common answer."},
            {"id":"PRM-ERR-006","text":"Explicit error handling prompts the model to retry on invalid output."},
            {"id":"PRM-ROLE-007","text":"Role priming changes tone but cannot guarantee truthfulness."},
            {"id":"PRM-CMP-008","text":"Compression prompts ask the model to summarize before answering."},
        ],
    },
    "llmops_evals": {
        "id":"llmops_evals",
        "name":"LLMOps & Evaluation",
        "domain":"ai",
        "locale":"ANY",
        "tags":["evals","observability","regression","canary"],
        "banned_phrases":["All of the above","None of the above"],
        "prompt_extra":"Prefer practice-ready items: regression tests, offline evals, golden sets, canary traffic.",
        "categories":{"Offline eval":2,"Golden sets":2,"Observability":2,"Guardrails":2},
        "facts":[
            {"id":"OPS-REG-001","text":"Regression tests catch quality drops from model or prompt changes."},
            {"id":"OPS-GS-002","text":"Golden sets are labeled examples used for repeatable evaluation."},
            {"id":"OPS-CAN-003","text":"Canary deploys route a small percent of traffic to new versions."},
            {"id":"OPS-MET-004","text":"Task-aligned metrics beat generic scores for product fit."},
            {"id":"OPS-LAT-005","text":"Latency and cost must be tracked alongside quality metrics."},
        ],
    },
    "inference_quant": {
        "id":"inference_quant",
        "name":"Inference & Quantization",
        "domain":"ai",
        "locale":"ANY",
        "tags":["vllm","kv-cache","quantization","throughput"],
        "banned_phrases":["All of the above","None of the above"],
        "prompt_extra":"Make trade-offs explicit: speed, memory, quality. Prefer concrete options.",
        "categories":{"Serving":2,"Caching":2,"Quantization":2,"Perf":2},
        "facts":[
            {"id":"INF-KV-001","text":"KV cache stores past keys/values to avoid recomputation."},
            {"id":"INF-BS-002","text":"Larger batch sizes improve throughput but can raise latency."},
            {"id":"INF-SPC-003","text":"Speculative decoding drafts tokens then verifies them for speed."},
            {"id":"INF-QNT-004","text":"Quantization shrinks weights (e.g., 8-bit) to save memory and speed up."},
            {"id":"INF-DIS-005","text":"Distillation trains a smaller model to mimic a larger teacher."},
        ],
    },
    "responsible_ai_lite": {
        "id":"responsible_ai_lite",
        "name":"Responsible AI (Lite)",
        "domain":"ai",
        "locale":"ANY",
        "tags":["safety","privacy","governance"],
        "banned_phrases":["All of the above","None of the above"],
        "prompt_extra":"Neutral, non-legal tone. No promises of compliance.",
        "categories":{"Risks":2,"Privacy":2,"Guardrails":2,"Transparency":2},
        "facts":[
            {"id":"RAI-PRV-001","text":"Minimize PII; avoid logging sensitive user inputs when unnecessary."},
            {"id":"RAI-RSK-002","text":"Risk assessment considers impact, likelihood, and mitigations."},
            {"id":"RAI-GRD-003","text":"Guardrails reduce unsafe outputs but do not guarantee safety."},
            {"id":"RAI-DAT-004","text":"Data retention should match purpose and regulatory requirements."},
        ],
    },
}

# ----- Subtopics per pack (used in Standards mode) -----
SUBTOPICS: Dict[str, List[str]] = {
    "llm_101": [
        "Tokens",
        "Context window limits",
        "Sampling (temperature, top-p)",
        "Embeddings basics",
        "Prompt vs Fine-tune",
        "Tool / function calling",
        "Hallucinations & grounding",
        "System prompts",
        "Chain-of-thought & privacy",
        "Evaluation basics",
    ],
    "rag_essentials": [
        "Chunking strategies",
        "Overlap / stride",
        "Dense vs. BM25",
        "Hybrid retrieval",
        "Reranking models",
        "Query rewriting / expansion",
        "Deduping near-duplicates",
        "Freshness / recency",
        "Citation patterns",
        "Metrics (P@k / MRR / nDCG)",
        "k tuning & noise",
    ],
    "prompt_patterns": [
        "Zero-shot",
        "Few-shot (style steering)",
        "Structured outputs / JSON",
        "Error handling & retries",
        "Role prompting",
        "Self-consistency",
        "Delimiters & format guards",
        "Compression / summarize-then-answer",
        "Tool-calling scaffolds",
    ],
    "llmops_evals": [
        "Golden set curation",
        "Offline eval pipelines",
        "Prompt regression tests",
        "Canary / LB experiments",
        "Observability (quality / latency / cost)",
        "Drift & guardrail monitoring",
        "Red-teaming & failure taxonomies",
        "Experiment tracking",
    ],
    "inference_quant": [
        "Serving stacks (vLLM / TGI)",
        "KV-cache management",
        "Batching / dynamic batching",
        "Speculative decoding",
        "Parallelism (TP / PP)",
        "Prompt caching",
        "Quant methods (INT8 / FP8 / AWQ / GPTQ)",
        "Distillation",
        "Throughput / latency trade-offs",
    ],
    "responsible_ai_lite": [
        "Privacy & PII minimization",
        "Data retention",
        "Transparency / model cards",
        "Human oversight & fallback",
        "Bias assessment & mitigation",
        "Safety filters & rate-limits",
        "Incident response",
        "Access control & audit logging",
    ],
}

# ---- Facet registries (diversity prompts) ----
DEFAULT_FACETS = [
    "definition/terminology", "core mechanism", "use cases", "trade-offs",
    "metrics & evaluation", "failure modes", "edge cases",
    "best practices", "privacy/safety", "mini scenario"
]

FACETS_BY_PACK: Dict[str, List[str]] = {
    "llm_101": [
        "tokens vs words", "context window budgeting", "temperature vs top-p",
        "embeddings use cases", "prompting vs fine-tuning",
        "tool/function calling contracts", "system prompt scope",
        "hallucination diagnosis", "basic evaluation signals"
    ],
    "rag_essentials": [
        "chunk size & overlap", "BM25 vs dense retrieval", "hybrid weighting",
        "reranker precision@k", "query rewrite/expansion", "deduping & clustering",
        "freshness filters", "citation granularity", "k tuning & noise",
        "eval metrics (MRR/nDCG/Recall@k)"
    ],
    "prompt_patterns": [
        "zero-shot clarity", "few-shot selection", "JSON schemas/structure",
        "error handling & retries", "role/tone control", "self-consistency",
        "delimiters & format guards", "compression (summarize-then-answer)",
        "tool-calling argument shaping"
    ],
    "llmops_evals": [
        "golden set design", "offline eval pipeline", "prompt regression gates",
        "canary rollout % & guardrails", "quality/latency/cost SLOs",
        "drift monitoring", "red-teaming & failure taxonomies",
        "experiment tracking"
    ],
    "inference_quant": [
        "serving stack (vLLM/TGI)", "KV-cache limits", "batching strategies",
        "speculative decoding", "TP/PP schemes", "prompt caching",
        "quant (INT8/FP8/AWQ/GPTQ) trade-offs", "distillation",
        "throughput vs latency tuning"
    ],
    "responsible_ai_lite": [
        "risk assessment (impact/likelihood)", "privacy & PII minimization",
        "transparency/model cards", "human oversight & fallback",
        "guardrails (input/output)", "incident response",
        "access control & audit logging", "data retention policy"
    ],
}

# Optional: subtopic-specific overrides (only where you see repetition)
FACETS_BY_SUBTOPIC: Dict[str, Dict[str, List[str]]] = {
    "responsible_ai_lite": {
        "Privacy & PII minimization": [
            "data minimization vs purpose",
            "collection vs processing trade-offs",
            "anonymization vs pseudonymization",
            "access control & least privilege",
            "retention & deletion triggers",
            "consent scope vs necessity",
            "PII in logs/telemetry",
            "privacy-by-default examples",
            "edge-case PII (free text/images)",
            "privacy incident playbook"
        ]
    }
}

def facets_for(pack_id: str, subtopic_label: str) -> List[str]:
    # subtopic → pack → default
    if pack_id and subtopic_label:
        sub_map = FACETS_BY_SUBTOPIC.get(pack_id, {})
        if subtopic_label in sub_map:
            return sub_map[subtopic_label]
    if pack_id and pack_id in FACETS_BY_PACK:
        return FACETS_BY_PACK[pack_id]
    return DEFAULT_FACETS

def subtopics_for_pack(pack_id: str) -> List[str]:
    return SUBTOPICS.get(pack_id, [])

# Quick topic presets → map to packs
TOPIC_PRESETS = [
    {"label":"LLM Fundamentals",     "pack":"llm_101"},
    {"label":"RAG Essentials",       "pack":"rag_essentials"},
    {"label":"Prompt Patterns",      "pack":"prompt_patterns"},
    {"label":"LLMOps & Evaluation",  "pack":"llmops_evals"},
    {"label":"Inference & Quant",    "pack":"inference_quant"},
    {"label":"Responsible AI (Lite)","pack":"responsible_ai_lite"},
]
TOPIC_PRESET_CHOICES = ["— choose —"] + [t["label"] for t in TOPIC_PRESETS]
PACK_CHOICES = [""] + list(PACKS.keys())  # "" means freeform

# ---------------- Standards registry (expanded, curated per pack) ----------------
# Names are high-level guides only. We paraphrase—no verbatim text; not legal advice.

_COMMON_STANDARDS = {
    "Consolidated (default)": {
        "id": "consolidated",
        "name": "Consolidated (NIST AI RMF + ISO/IEC 23894 + OECD principles)",
        "guide": "Blend widely used AI standards; use plain, consistent terms. Paraphrase only."
    },
    "NIST AI RMF 1.0": {
        "id": "nist_ai_rmf_1_0",
        "name": "NIST AI Risk Management Framework 1.0",
        "guide": "Emphasize Govern, Map, Measure, Manage; document context, risks, metrics."
    },
    "ISO/IEC 23894:2023": {
        "id": "iso_23894_2023",
        "name": "ISO/IEC 23894:2023 (AI risk management)",
        "guide": "Risk management lifecycle; controls as mitigations; governance & documentation."
    },
    "ISO/IEC 42001:2023": {
        "id": "iso_42001_2023",
        "name": "ISO/IEC 42001:2023 (AI management system — AIMS)",
        "guide": "Org-level processes, policy, competence, continual improvement for AI."
    },
    "ISO/IEC 22989:2022": {
        "id": "iso_22989_2022",
        "name": "ISO/IEC 22989:2022 (AI concepts & terminology)",
        "guide": "Prefer canonical definitions for core AI terms; avoid vendor marketing jargon."
    },
    "ISO/IEC 23053:2022": {
        "id": "iso_23053_2022",
        "name": "ISO/IEC 23053:2022 (Framework for AI systems using ML)",
        "guide": "Structure lifecycle: data, training, evaluation, deployment, operation, retirement."
    },
    "ISO/IEC TR 24027:2021": {
        "id": "iso_tr_24027_2021",
        "name": "ISO/IEC TR 24027:2021 (Bias in AI systems)",
        "guide": "Identify/mitigate data & model bias; discuss measurement limits and documentation."
    },
    "ISO/IEC TR 24028:2020": {
        "id": "iso_tr_24028_2020",
        "name": "ISO/IEC TR 24028:2020 (Trustworthiness of AI)",
        "guide": "High-level attributes: reliability, robustness, safety, security, privacy, accountability."
    },
    "OECD AI Principles": {
        "id": "oecd_ai",
        "name": "OECD AI Principles",
        "guide": "Human-centered values, robustness, transparency, accountability; high-level phrasing."
    },
    "EU AI Act (lite summary)": {
        "id": "eu_ai_act_lite",
        "name": "EU AI Act (high-level)",
        "guide": "Risk-based categories (unacceptable, high, limited, minimal). Paraphrase only."
    },
    # Engineering/quality sources (non-normative but useful)
    "ISO/IEC 25010 (quality model)": {
        "id": "iso_25010",
        "name": "ISO/IEC 25010 (software product quality)",
        "guide": "Quality characteristics (reliability, performance, security, usability) in evals."
    },
    "ISO/IEC/IEEE 29119 (software testing)": {
        "id": "iso_ieee_29119",
        "name": "ISO/IEC/IEEE 29119 (software testing)",
        "guide": "Test design & evidence; traceability from requirements to test cases & results."
    },
    "IEEE 1012 (V&V)": {
        "id": "ieee_1012",
        "name": "IEEE 1012 (Verification & Validation)",
        "guide": "Independent V&V mindset; evidence for correctness and fitness for purpose."
    },
    "MLPerf Inference (benchmark)": {
        "id": "mlperf_inference",
        "name": "MLPerf Inference (MLCommons benchmark)",
        "guide": "Use standardized perf metrics (latency, throughput, accuracy) & reproducible runs. Treat as benchmarking guidance, not a formal standard."
    },
}

# Curated menus per pack
STANDARDS_BY_PACK: Dict[str, Dict[str, dict]] = {
    "llm_101": {
        "Consolidated (default)": _COMMON_STANDARDS["Consolidated (default)"],
        "ISO/IEC 22989:2022":     _COMMON_STANDARDS["ISO/IEC 22989:2022"],
        "ISO/IEC 23053:2022":     _COMMON_STANDARDS["ISO/IEC 23053:2022"],
        "NIST AI RMF 1.0":        _COMMON_STANDARDS["NIST AI RMF 1.0"],
        "ISO/IEC 23894:2023":     _COMMON_STANDARDS["ISO/IEC 23894:2023"],
    },
    "rag_essentials": {
        "Consolidated (default)": _COMMON_STANDARDS["Consolidated (default)"],
        "ISO/IEC 23053:2022":     _COMMON_STANDARDS["ISO/IEC 23053:2022"],
        "ISO/IEC 22989:2022":     _COMMON_STANDARDS["ISO/IEC 22989:2022"],
        "ISO/IEC 23894:2023":     _COMMON_STANDARDS["ISO/IEC 23894:2023"],
        "ISO/IEC 25010 (quality model)": _COMMON_STANDARDS["ISO/IEC 25010 (quality model)"],
    },
    "prompt_patterns": {
        "Consolidated (default)": _COMMON_STANDARDS["Consolidated (default)"],
        "ISO/IEC 22989:2022":     _COMMON_STANDARDS["ISO/IEC 22989:2022"],
        "ISO/IEC TR 24027:2021":  _COMMON_STANDARDS["ISO/IEC TR 24027:2021"],
        "ISO/IEC TR 24028:2020":  _COMMON_STANDARDS["ISO/IEC TR 24028:2020"],
        "NIST AI RMF 1.0":        _COMMON_STANDARDS["NIST AI RMF 1.0"],
    },
    "llmops_evals": {
        "Consolidated (default)": _COMMON_STANDARDS["Consolidated (default)"],
        "ISO/IEC 25010 (quality model)": _COMMON_STANDARDS["ISO/IEC 25010 (quality model)"],
        "ISO/IEC/IEEE 29119 (software testing)": _COMMON_STANDARDS["ISO/IEC/IEEE 29119 (software testing)"],
        "IEEE 1012 (V&V)":        _COMMON_STANDARDS["IEEE 1012 (V&V)"],
        "NIST AI RMF 1.0":        _COMMON_STANDARDS["NIST AI RMF 1.0"],
        "ISO/IEC 23894:2023":     _COMMON_STANDARDS["ISO/IEC 23894:2023"],
    },
    "inference_quant": {
        "Consolidated (default)": _COMMON_STANDARDS["Consolidated (default)"],
        "ISO/IEC 23053:2022":     _COMMON_STANDARDS["ISO/IEC 23053:2022"],
        "ISO/IEC 22989:2022":     _COMMON_STANDARDS["ISO/IEC 22989:2022"],
        "MLPerf Inference (benchmark)": _COMMON_STANDARDS["MLPerf Inference (benchmark)"],
    },
    "responsible_ai_lite": {
        "Consolidated (default)": _COMMON_STANDARDS["Consolidated (default)"],
        "NIST AI RMF 1.0":        _COMMON_STANDARDS["NIST AI RMF 1.0"],
        "ISO/IEC 23894:2023":     _COMMON_STANDARDS["ISO/IEC 23894:2023"],
        "ISO/IEC 42001:2023":     _COMMON_STANDARDS["ISO/IEC 42001:2023"],
        "OECD AI Principles":     _COMMON_STANDARDS["OECD AI Principles"],
        "EU AI Act (lite summary)": _COMMON_STANDARDS["EU AI Act (lite summary)"],
        "ISO/IEC TR 24027:2021":  _COMMON_STANDARDS["ISO/IEC TR 24027:2021"],
        "ISO/IEC TR 24028:2020":  _COMMON_STANDARDS["ISO/IEC TR 24028:2020"],
    },
}

def standards_choices_for(pack_id: str) -> List[str]:
    table = STANDARDS_BY_PACK.get(pack_id) or _COMMON_STANDARDS
    return list(table.keys())

def standards_meta(pack_id: str, choice: str) -> Optional[dict]:
    table = STANDARDS_BY_PACK.get(pack_id) or _COMMON_STANDARDS
    return table.get(choice) or None

# Per-pack default standard for nicer UX
DEFAULT_STANDARD_BY_PACK = {
    "llm_101":             "ISO/IEC 22989:2022",
    "rag_essentials":      "ISO/IEC 23053:2022",
    "prompt_patterns":     "ISO/IEC TR 24027:2021",
    "llmops_evals":        "ISO/IEC 25010 (quality model)",
    "inference_quant":     "MLPerf Inference (benchmark)",
    "responsible_ai_lite": "NIST AI RMF 1.0",
}
def default_standard_for(pack_id: str) -> str:
    return DEFAULT_STANDARD_BY_PACK.get(pack_id, "Consolidated (default)")

def compose_topic_display(preset_label: str, pack_id: str, subtopic: str, focus: str) -> str:
    base = preset_label if (preset_label and preset_label != "— choose —") else (PACKS.get(pack_id, {}) or {}).get("name", "")
    title = base
    if subtopic and subtopic != "— none —":
        title = f"{subtopic} ({base})" if base else subtopic
    if focus and focus.strip():
        title = f"{title} — focus: {focus.strip()}" if title else f"Focus: {focus.strip()}"
    return (title or "").strip()

# --- token overlap helpers for packs ---
_WORDS = re.compile(r"[A-Za-z0-9_]+")

def _topic_tokens(s: str) -> set:
    return set(w.lower() for w in _WORDS.findall(s or ""))

def _facts_k(count: int, difficulty: int, strict: bool) -> int:
    base = 10
    base += 2 * min(count, 4)   # up to +8
    if difficulty >= 7:
        base += 2
    if strict:
        base += 2
    return max(6, min(base, 14))  # cap much lower than before

def select_facts_slice(topic: str, pack: dict, k: int = 10) -> list:
    if not pack or not pack.get("facts"):
        return []
    tt = _topic_tokens(topic)
    scored = []
    for f in pack["facts"]:
        ft = _topic_tokens(f["text"])
        score = len(tt & ft)
        scored.append((score, f))
    scored.sort(key=lambda x: (-x[0], x[1]["id"]))
    out = [f for sc,f in scored[:k]]
    if not out:
        out = pack["facts"][:min(k, len(pack["facts"]))]  # fallback: first k
    return out

def build_source_alignment_addendum(pack_id: str, source_choice: str) -> str:
    meta = standards_meta(pack_id, source_choice) if source_choice else None
    if not meta or meta["id"] == "consolidated":
        return ("STANDARDS ALIGNMENT: Blend overlapping principles from common AI standards "
                "(e.g., NIST AI RMF, ISO/IEC 23894, OECD). Use consistent, plain terminology. "
                "Do not copy wording; paraphrase at a high level.")
    return (
        f"STANDARDS ALIGNMENT — Base on {meta['name']}. {meta.get('guide','').strip()} "
        "Do not quote or reproduce text verbatim; paraphrase succinctly."
    )

def build_standard_addendum(pack: dict, strict: bool, facts_slice: list, source_addendum: str) -> str:
    if not pack:
        return source_addendum.strip()
    lines = []
    if pack.get("prompt_extra"):
        lines.append(f"GUIDE: {pack['prompt_extra']}")
    if pack.get("banned_phrases"):
        bp = "; ".join(pack["banned_phrases"])
        lines.append(f"BAN these phrases in stems/options: {bp}")
    if source_addendum:
        lines.append(source_addendum)

    if facts_slice:
        show = facts_slice[:12] if strict else facts_slice[:8]  # cap visible facts
        if strict:
            lines.append("STRICT FACTS MODE — Use ONLY the facts below as your knowledge base; do not add external info.")
            lines.append("For each question include a 'citations' array of fact IDs used.")
            lines.append("Allowed facts:")
            for f in show:
                lines.append(f"[{f['id']}] {f['text']}")
        else:
            lines.append("PREFER these facts where relevant (do not force citations):")
            for f in show:
                lines.append(f"- {f['text']}")
    return "\n".join(lines)

def validate_strict_items(qitems: list, allowed_ids: set) -> list:
    out = []
    for q in qitems:
        cites = q.get("citations") or q.get("refs") or []
        if not isinstance(cites, list):
            continue
        cites = [c for c in cites if isinstance(c, str)]
        if not any(c in allowed_ids for c in cites):
            continue
        out.append(q)
    return out

# ---------- text compaction ----------
_FILLER = [
    r"\bthat\b", r"\bvery\b", r"\bactually\b", r"\breally\b",
    r"\bjust\b", r"\bkind of\b", r"\bsort of\b", r"\bin order to\b",
]
def _squash_spaces(s: str) -> str:
    return _re.sub(r"\s+", " ", (s or "").strip())
def _kill_filler(s: str) -> str:
    for pat in _FILLER:
        s = _re.sub(pat, "", s, flags=_re.IGNORECASE)
    return _squash_spaces(s)
def _tighten_punct(s: str) -> str:
    if not s: return s
    s = _re.sub(r"\s+([,.;:!?])", r"\1", s)
    s = _re.sub(r"\(([^)]{0,20})\)", r"\1", s)  # <-- fixed char class
    s = _re.sub(r"--+", "–", s)
    return _squash_spaces(s)

def _smart_clip(s: str, n: int, add_ellipsis: bool = True) -> Tuple[str, bool]:
    s = (s or "").strip()
    if len(s) <= n:
        return s, False
    cut = s[:n+1]
    m = _re.search(r"[.;:!?]\s+\S*$", cut)
    if m:
        out = cut[:m.start()].rstrip()
        return (out + "…", True) if add_ellipsis else (out, True)
    m = _re.search(r"\s+\S*$", cut)
    if m:
        out = cut[:m.start()].rstrip()
        return (out + "…", True) if add_ellipsis else (out, True)
    out = s[:n].rstrip()
    return (out + "…", True) if add_ellipsis else (out, True)

_ABBR_REPL = [
    (r"\bapproximately\b", "~"), (r"\babout\b", "~"), (r"\baround\b", "~"),
    (r"\bversus\b", "vs"), (r"\band\b", "&"), (r"\bpercent\b", "%"),
    (r"\bper\s+cent\b", "%"), (r"\byears?\b", "yrs"), (r"\bminutes?\b", "min"),
    (r"\bhours?\b", "h"), (r"\bseconds?\b", "s"), (r"\bmillion\b", "M"),
    (r"\bbillion\b", "B"), (r"\bUnited States\b", "US"), (r"\bUnited Kingdom\b", "UK"),
    (r"\bkilometers per hour\b", "km/h"), (r"\bkilometres per hour\b", "km/h"), (r"\bmiles per hour\b", "mph"),
]
_MONTHS = {"January":"Jan","February":"Feb","March":"Mar","April":"Apr","June":"Jun",
    "July":"Jul","August":"Aug","September":"Sep","October":"Oct","November":"Nov","December":"Dec","May":"May"}
def _abbr_pass(s: str) -> str:
    t = s
    for pat, rep in _ABBR_REPL:
        t = _re.sub(pat, rep, t, flags=_re.IGNORECASE)
    for long, short in _MONTHS.items():
        t = _re.sub(rf"\b{long}\b", short, t)
    t = _re.sub(r"\b(\d+)\s+yrs?\b", r"\1 yrs", t)
    t = _re.sub(r"\b(\d+)\s+minutes?\b", r"\1 min", t)
    t = _re.sub(r"\b(\d+)\s+hours?\b", r"\1 h", t)
    return _squash_spaces(t)
def _prune_clauses(s: str) -> str:
    t = s
    t = _re.sub(r",\s+(which|that|who|when|where)\b.*$", "", t, flags=_re.IGNORECASE)
    t = _re.sub(r"\s+[–—-]\s+.*$", "", t)
    t = _re.sub(r":\s+.*$", "", t)
    return _squash_spaces(t)
def _prune_clauses_question(s: str) -> str:
    t = s
    t = _re.sub(r",\s+(which|that|who)\s+(is|are|was|were)\b.*$", "", t, flags=_re.IGNORECASE)
    t = _re.sub(r",\s+(when|where)\s+(it|they)\s+(is|are|was|were)\b.*$", "", t, flags=_re.IGNORECASE)
    t = _re.sub(r"\s+[–—-]\s+.*$", "", t)
    t = _re.sub(r":\s+.*$", "", t)
    return _squash_spaces(t)
def compact_to(s: str, n: int, add_ellipsis: bool = True, *, prune_mode: str = "generic") -> Tuple[str, bool]:
    before = s
    t = _squash_spaces(s)
    t = _tighten_punct(t)
    t = _kill_filler(t)
    if len(t) <= n:
        return t, len(t) != len(before)
    t2 = _abbr_pass(t)
    if len(t2) <= n:
        return t2, True
    if prune_mode == "question":
        t3 = _prune_clauses_question(t2)
    elif prune_mode == "generic":
        t3 = _prune_clauses(t2)
    else:
        t3 = t2
    if len(t3) <= n:
        return t3, True
    return _smart_clip(t3, n, add_ellipsis=add_ellipsis)
def _finish_sentence(t: str, *, is_question: bool) -> str:
    import re
    t = (t or "").strip()
    t = re.sub(r"(,\s*(which|that|who|when|where)\b.*)$", "", t, flags=re.IGNORECASE)
    t = re.sub(r"[\s:;,\-–—]+$", "", t)
    t = re.sub(r"\b(such as|including|like|for example|e\.g\.)\s*$", "", t, flags=re.IGNORECASE)
    if is_question:
        t = t.rstrip(".")
        if not t.endswith("?"):
            t += "?"
    else:
        if not re.search(r"[.!?]$", t):
            t += "."
    return t
def lint_item_lengths(q: str, options: List[str], exp: str, *, add_ellipsis: bool = False) -> Tuple[str, List[str], str, dict]:
    flags = {"Q": False, "A": False, "B": False, "C": False, "D": False, "EXP": False}
    q_raw = q or ""
    q = _re.sub(r"^\s*(Which of the following|What of the following|Which statement).*?:\s*", "", q_raw, flags=_re.IGNORECASE)
    q1, qc = compact_to(q, Q_MAX, add_ellipsis=add_ellipsis, prune_mode="question")
    flags["Q"] = qc
    q1 = _finish_sentence(q1, is_question=True)
    looks_stub = (len(q1) < 24) and _re.match(r"^(in|when|while|during|where)\b", q1.strip().lower())
    if looks_stub:
        q2, _ = compact_to(q_raw, Q_MAX, add_ellipsis=add_ellipsis, prune_mode="none")
        q2 = _finish_sentence(q2, is_question=True)
        if len(q2) >= len(q1):
            q1 = q2
    q = q1
    outs, letters = [], ["A","B","C","D"]
    for i, o in enumerate((options + [""]*4)[:4]):
        o2, oc = compact_to(o or "", OPT_MAX, add_ellipsis=add_ellipsis, prune_mode="generic")
        outs.append(o2); flags[letters[i]] = oc
    exp1, ec = compact_to(exp or "", EXP_MAX, add_ellipsis=add_ellipsis, prune_mode="generic")
    flags["EXP"] = ec
    exp = _finish_sentence(exp1, is_question=False)
    return q, outs, exp, flags

# --------- JSON-ish cleanup ----------
def _strip_code_fences(s: str) -> str:
    s = s.strip()
    if s.startswith("```"):
        s = re.sub(r"^```(?:json)?", "", s.strip(), flags=re.IGNORECASE).strip()
        s = re.sub(r"```$", "", s.strip()).strip()
    return s
def _remove_trailing_commas(s: str) -> str:
    s = re.sub(r",\s*([}\]])", r"\1", s); return s
def _strip_json_comments(s: str) -> str:
    s = re.sub(r"(^|\s)//.*?$", r"\1", s, flags=re.MULTILINE)
    s = re.sub(r"/\*.*?\*/", "", s, flags=re.DOTALL)
    return s
def _normalize_quotes(s: str) -> str:
    return (s or "").replace("\u201c", '"').replace("\u201d", '"').replace("\u2018", "'").replace("\u2019", "'")
def _clean_jsonish(s: str) -> str:
    s = _strip_code_fences(s); s = _normalize_quotes(s); s = _strip_json_comments(s); s = _remove_trailing_commas(s); return s.strip()
def _extract_between_markers(s: str):
    i = s.find(BEGIN_JSON); j = s.rfind(END_JSON)
    if i != -1 and j != -1 and j > i: return s[i+len(BEGIN_JSON):j]
    return None
def _split_objects_by_brace(text: str) -> list:
    body, objs, depth, in_str, esc, start = text, [], 0, False, False, None
    for idx, ch in enumerate(body):
        if in_str:
            if esc: esc = False
            elif ch == "\\": esc = True
            elif ch == '"': in_str = False
        else:
            if ch == '"': in_str = True
            elif ch == "{":
                if depth == 0: start = idx
                depth += 1
            elif ch == "}":
                depth -= 1
                if depth == 0 and start is not None:
                    objs.append(body[start:idx+1]); start = None
    return objs
def _coerce_objectish(s: str) -> str:
    t = (s or "").strip()
    if not t: return t
    if t.startswith("{") or t.startswith("["): return t
    if t.startswith('"topic"') or t.startswith('"difficulty"') or t.startswith('"questions"'):
        t = t.rstrip()
        if not t.endswith("}"): t = t + "}"
        return "{" + t
    return t
def _first_json_dict(s: str):
    s = _clean_jsonish(s)
    try:
        return json.loads(s)
    except Exception:
        s2 = _coerce_objectish(s)
        if s2 != s:
            try: return json.loads(_clean_jsonish(s2))
            except Exception: pass
        if "{" in s and "}" in s:
            chunk = s[s.find("{"): s.rfind("}")+1]
            try: return json.loads(_clean_jsonish(chunk))
            except Exception: return None
        return None
def parse_questions_from_model(s: str) -> list:
    between = _extract_between_markers(s)
    if between is not None: s = between
    s = _coerce_objectish(s)
    s_clean = _clean_jsonish(s)
    data = _first_json_dict(s_clean)
    if isinstance(data, dict):
        if isinstance(data.get("questions"), list): return data["questions"]
        for k in ("items", "mcqs", "data", "list", "entries"):
            v = data.get(k)
            if isinstance(v, list): return v
    try:
        arr = json.loads(s_clean)
        if isinstance(arr, list): return arr
    except Exception: pass
    objs = _split_objects_by_brace(s_clean)
    out = []
    for obj in objs:
        try:
            d = json.loads(_clean_jsonish(obj))
        except Exception:
            try:
                d = json.loads(_clean_jsonish(re.sub(r"'", '"', obj)))
            except Exception:
                continue
        if not isinstance(d, dict): continue
        if "question" in d and ("correct_answer" in d or "answer" in d or "correct" in d or "options" in d):
            if "correct_answer" not in d:
                if "answer" in d: d["correct_answer"] = d["answer"]
                elif "correct" in d: d["correct_answer"] = d["correct"]
                elif "options" in d and isinstance(d.get("correct_index"), int):
                    opts = d.get("options") or []
                    ci = d.get("correct_index")
                    if 0 <= ci < len(opts):
                        d["correct_answer"] = opts[ci]
                        d["distractors"] = [o for i, o in enumerate(opts) if i != ci][:3]
            out.append(d)
    return out

# ---------- Font + text helpers ----------
FONT_CANDIDATES = [
    "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
    "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
    "/usr/share/fonts/truetype/liberation2/LiberationSans-Bold.ttf",
    "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
]
def _resolve_font_path() -> str:
    for p in FONT_CANDIDATES:
        if Path(p).exists(): return p
    try:
        for p in Path("/usr/share/fonts").rglob("*.ttf"):
            return str(p)
    except Exception: pass
    return ""
DEFAULT_FONT_PATH = _resolve_font_path()
DEFAULT_FONT_EXISTS = bool(DEFAULT_FONT_PATH)
OUT_DIR = Path("/content/out/videos"); OUT_DIR.mkdir(parents=True, exist_ok=True)
TMP_DIR = Path("/content/out/tmp"); TMP_DIR.mkdir(parents=True, exist_ok=True)
def _font(pt: int) -> ImageFont.FreeTypeFont:
    if DEFAULT_FONT_EXISTS: return ImageFont.truetype(DEFAULT_FONT_PATH, pt)
    return ImageFont.load_default()
def _text_wrap(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont, max_w: int):
    words = text.split()
    lines, cur = [], ""
    for w in words:
        t = (cur + " " + w).strip()
        bbox = draw.textbbox((0,0), t, font=font)
        if bbox[2]-bbox[0] <= max_w or not cur:
            cur = t
        else:
            lines.append(cur); cur = w
    if cur: lines.append(cur)
    if len(lines) >= 2 and len(lines[-1].split()) <= 2 and len(lines[-2].split()) > 2:
        prev = lines[-2].split()
        moved = prev.pop()
        lines[-2] = " ".join(prev)
        lines[-1] = (moved + " " + lines[-1]).strip()
    return lines
def _draw_text_block(img, box, text, color, max_pt, min_pt, leading_ratio=0.30, stroke=0, stroke_color=(255,255,255), align="center"):
    x, y, w, h = box
    draw = ImageDraw.Draw(img)
    for pt in range(max_pt, min_pt-1, -2):
        f = _font(pt)
        lines = _text_wrap(draw, text, f, w)
        line_heights = [draw.textbbox((0,0), ln, font=f)[3] - draw.textbbox((0,0), ln, font=f)[1] for ln in lines]
        total_h = sum(line_heights) + int(pt * leading_ratio) * (len(lines)-1)
        if total_h <= h:
            cur_y = y + (h - total_h)//2
            for ln in lines:
                bbox = draw.textbbox((0,0), ln, font=f)
                tw = bbox[2]-bbox[0]; th = bbox[3]-bbox[1]
                if align == "center": tx = x + (w - tw)//2
                elif align == "left": tx = x
                else: tx = x + (w - tw)
                if stroke > 0:
                    draw.text((tx, cur_y), ln, font=f, fill=stroke_color, stroke_width=stroke, stroke_fill=stroke_color)
                draw.text((tx, cur_y), ln, font=f, fill=color, stroke_width=stroke, stroke_fill=stroke_color)
                cur_y += th + int(pt * leading_ratio)
            return
    f = _font(min_pt)
    bbox = draw.textbbox((0,0), text, font=f); tw = bbox[2]-bbox[0]; th = bbox[3]-bbox[1]
    tx = x + (w - tw)//2; ty = y + (h - th)//2
    draw.text((tx, ty), text, font=f, fill=color, stroke_width=stroke, stroke_fill=stroke_color)

# ---------- Slide renderers ----------
def _badge(img: Image.Image):
    draw = ImageDraw.Draw(img)
    draw.rectangle([0,0,W,8], fill=GREEN)
    pill_w, pill_h = 300, 72
    px, py = W - pill_w - 36, 36
    draw.rounded_rectangle([px, py, px+pill_w, py+pill_h], radius=20, fill=MINT)
    _draw_text_block(img, (px+22, py+14, pill_w-44, pill_h-28), "MyAI101", DARK, 56, 32, align="left")
def _chip(img: Image.Image, y_center: int, text: str):
    draw = ImageDraw.Draw(img)
    chip_w = W - 2*SAFE_X
    x0, y0 = (W - chip_w)//2, y_center - CHIP_H//2
    draw.rounded_rectangle([x0, y0, x0+chip_w, y0+CHIP_H], radius=18, fill=CHIP_BG)
    draw.rectangle([x0, y0, x0+chip_w, y0+4], fill=GREEN)
    _draw_text_block(img, (x0+CHIP_TX_PAD//2, y0+(CHIP_H-CHIP_TX_H)//2, chip_w-CHIP_TX_PAD, CHIP_TX_H),
                     text, DARK, max_pt=54, min_pt=30, align="left")
def _question_slide(question: str, options: List[str]) -> Image.Image:
    img = Image.new("RGB", (W, H), BG)
    _badge(img)
    title_box = (SAFE_X, SAFE_TOP, W-2*SAFE_X, TITLE_BOX_H)
    _draw_text_block(img, title_box, question, DARK, 84, 34, stroke=2, stroke_color=(255,255,255))
    title_bottom = SAFE_TOP + TITLE_BOX_H
    band_top = max(title_bottom + GAP_BELOW_TITLE_MIN, SAFE_TOP + int(H*0.34))
    band_bot = min(H - SAFE_BOT, int(H*0.86))
    n = len(options)
    if n > 0:
        band_h = band_bot - band_top
        preferred = n*CHIP_H + (n-1)*CHIP_GAP
        if preferred <= band_h:
            ys = [band_top + CHIP_H//2 + i*(CHIP_H+CHIP_GAP) for i in range(n)]
        else:
            gap = max(18, int((band_h - n*CHIP_H) / max(1, n-1)))
            ys = [band_top + CHIP_H//2 + i*(CHIP_H + gap) for i in range(n)]
        for i, (opt, yc) in enumerate(zip(options, ys)):
            _chip(img, yc, f"{chr(65+i)}. {opt}")
    return img
def _reveal_slide(correct: str, explanation: str) -> Image.Image:
    img = Image.new("RGB", (W, H), BG)
    _badge(img)
    draw = ImageDraw.Draw(img)
    card_w, card_h = W - 2*SAFE_X, 140
    x0, y0 = SAFE_X, int(H*0.34)
    draw.rounded_rectangle([x0, y0, x0+card_w, y0+card_h], radius=20, fill=MINT)
    _draw_text_block(img, (x0+20, y0+16, card_w-40, card_h-32), f"Answer: {correct}", DARK, 80, 40)
    _draw_text_block(img, (SAFE_X, int(H*0.54), W-2*SAFE_X, int(H*0.28)), explanation, MID, 62, 32)
    return img
def _cta_slide() -> Image.Image:
    img = Image.new("RGB", (W, H), BG)
    draw = ImageDraw.Draw(img)
    _badge(img)
    _draw_text_block(img, (SAFE_X, int(H*0.36), W-2*SAFE_X, int(H*0.22)), "MyAI101", DARK, 160, 84)
    _draw_text_block(img, (SAFE_X, int(H*0.54), W-2*SAFE_X, int(H*0.16)),
                     "Daily AI literacy in 60 seconds", MID, 72, 38)
    bx, by = (W-CTA_BUTTON_W)//2, int(H*0.74)
    draw.rounded_rectangle([bx, by, bx+CTA_BUTTON_W, by+CTA_BUTTON_H], radius=22, fill=GREEN)
    _draw_text_block(img, (bx+18, by+10, CTA_BUTTON_W-36, CTA_BUTTON_H-20), "Start Learning for Free", (255,255,255), 54, 28)
    return img

# ---------- Local fallback generator ----------
@dataclass
class QuizItem:
    topic: str
    question: str
    options: List[str]
    answer_index: int
    explanation: str

EASY_TEMPLATES = [
    "Which of these is an example of {topic}?",
    "What is {topic} mainly used for?",
    "Which choice best matches {topic}?",
]
MEDIUM_TEMPLATES = [
    "Which scenario best illustrates {topic} in practice?",
    "Which statement about {topic} is correct?",
    "What is a common use case of {topic}?",
]
HARD_TEMPLATES = [
    "Which of the following is most accurate regarding {topic}?",
    "In applied settings, which describes {topic} most precisely?",
    "Which statement about {topic} reflects best practice?",
]
EASY_DISTRACTORS = ["Something unrelated","A wrong idea","Not quite right","Another choice","Sounds similar but isn't"]
MEDIUM_DISTRACTORS = ["A partially correct statement","A common misconception","An unrelated technique","A vague description"]
HARD_DISTRACTORS = ["A subtle misconception","A related but incorrect method","An imprecise definition","A misleading best practice"]

def _pick_template(difficulty: int) -> str:
    return (random.choice(EASY_TEMPLATES) if difficulty<=3
            else random.choice(MEDIUM_TEMPLATES) if difficulty<=7
            else random.choice(HARD_TEMPLATES))

def _generate_options_local(topic: str, difficulty: int) -> Tuple[List[str], int, str]:
    topic_clean = topic.strip().rstrip("?.!")
    if difficulty <= 3:
        correct, pool = f"A simple example of {topic_clean}", EASY_DISTRACTORS
    elif difficulty <= 7:
        correct, pool = f"A practical use case of {topic_clean}", MEDIUM_DISTRACTORS
    else:
        correct, pool = f"A precise description of {topic_clean}", HARD_DISTRACTORS
    distractors = random.sample(pool, k=3)
    options = distractors + [correct]
    random.shuffle(options)
    answer_index = options.index(correct)
    explanation = (f"The correct option describes {topic_clean} more appropriately than the others."
                   if difficulty >= 4 else f"It's the best match for {topic_clean}.")
    return options, answer_index, explanation

def make_quiz_item_local(topic: str, difficulty: int) -> QuizItem:
    q = _pick_template(difficulty).format(topic=topic)
    options, answer_index, explanation = _generate_options_local(topic, difficulty)
    return QuizItem(topic=topic, question=q, options=options, answer_index=answer_index, explanation=explanation)

# ---------- Difficulty guide ----------
DIFFICULTY_GUIDE = """
Map difficulty 1–10 to these constraints:
1–2: kid-simple; one sentence; no jargon; obvious distractors.
3–4: basic recognition; short phrasing; simple plausible distractors.
5–6: intermediate conceptual; 1–2 sentences; plausible/related distractors.
7–8: advanced application or edge cases; 2–3 sentences; subtle distractors.
9–10: professional nuance; 2–3 concise sentences; highly plausible distractors with subtle traps.
"""

# Enforce variety across questions
QUESTION_ARCHETYPES = [
    "Definition / concept check",
    "Real-world scenario",
    "Metric / measurement",
    "Mitigation technique",
    "Trade-off / decision",
    "Failure diagnosis / debugging",
    "Governance / policy / oversight",
    "Counterfactual / what-if",
    "Comparison / contrast",
    "Process / ordering"
]

# Common repetitive openings we want to forbid
BANNED_OPENERS = [
    "what is a key", "what's a key",
    "what is a crucial", "what's a crucial",
    "what initial step", "which initial step",
    "what is an initial step", "what’s an initial step",
    "what is a primary step", "what’s a primary step"
]

# ---------- Novelty / dedup helpers ----------
_STOP = set("""
a an the of to in for on at by with from and or but if when while after before than as
what which who whom whose this that these those there here how why is are was were be being been
key crucial initial primary step best most common important effective
""".split())
def _tokens(s: str) -> set:
    return {w for w in re.findall(r"[a-z0-9]+", (s or "").lower()) if len(w) > 2 and w not in _STOP}
def _ngrams(s: str, n: int = 3) -> set:
    t = re.sub(r"\s+", " ", (s or "").lower())
    t = re.sub(r"[^a-z0-9 ]", "", t)
    t = t.replace(" ", "_")
    return {t[i:i+n] for i in range(max(0, len(t)-n+1))}
def _jaccard(a: set, b: set) -> float:
    if not a and not b: return 1.0
    if not a or not b: return 0.0
    inter = len(a & b); union = len(a | b)
    return inter / union if union else 0.0
def too_similar(q1: str, q2: str, t_thr: float = 0.82, c_thr: float = 0.86) -> bool:
    return max(_jaccard(_tokens(q1), _tokens(q2)), _jaccard(_ngrams(q1), _ngrams(q2))) >= max(t_thr, c_thr)

# ---------- Prompt assembly (facet-aware + archetypes) ----------
def build_prompt(topic: str, difficulty: int, count: int,
                 standard_addendum: str,
                 facet: str = "",
                 begin_marker: str = BEGIN_JSON,
                 end_marker: str   = END_JSON) -> str:
    kinds_line = ", ".join(QUESTION_ARCHETYPES)
    banned_line = "; ".join(BANNED_OPENERS)

    facet_block = ""
    if facet:
        facet_block = f"""
DIVERSITY FACET (MANDATORY for this batch):
- Focus specifically on: "{facet}".
- Target THIS facet explicitly; avoid generic umbrella wording.
"""

    base = f"""
You are an expert quiz writer for 1080×1920 SHORT videos.

Produce exactly {count} distinct multiple-choice questions on the topic below.

Topic: "{topic}"
Difficulty (1-10): {difficulty}

{DIFFICULTY_GUIDE.strip()}

HARD LENGTH LIMITS (NEVER EXCEED):
- question <= {Q_MAX} characters
- each option <= {OPT_MAX} characters
- rationale <= {EXP_MAX} characters

STYLE:
- Write the stem directly (no “Which of the following” preambles).
- Everyday words. No parentheticals, citations, footnotes, or emojis.
- Exactly 1 correct answer, 3 plausible distractors. No "All/None of the above".

VARIETY RULES (STRICT):
- Distribute questions across these archetypes and include a "kind" field per item:
  [{kinds_line}]
- Use each archetype at most once until all have been used; only then may you reuse.
- Start each question with a DIFFERENT leading verb/phrase (e.g., Identify, Diagnose, Compare, Select, Quantify, Order, Spot, Predict, Explain, Choose).
- Do not use repetitive templates such as “initial step”, “key step”, “crucial first step”.
- Do NOT begin any question with: {banned_line}.
- If an item’s archetype implies specifics, include them:
  • Metric/measurement → name at least one concrete metric or signal.
  • Trade-off/decision → contrast at least two options with a reason.
  • Failure diagnosis → include a symptom and the likely cause.
  • Process/ordering → ask for the first/next/most appropriate step.
  • Scenario → give a short, realistic context (1–2 clauses).

{facet_block.strip()}

{standard_addendum.strip()}

Return ONLY a single JSON object BETWEEN the markers below.
Start your first character with the JSON after {BEGIN_JSON} and end before {END_JSON}.
Do not include any text outside the markers. No markdown fences.

{BEGIN_JSON}
{{
  "topic": "{topic}",
  "difficulty": {difficulty},
  "questions": [
    {{
      "kind": "one of: {kinds_line}",
      "question": "string (≤{Q_MAX})",
      "correct_answer": "string (≤{OPT_MAX})",
      "distractors": ["string (≤{OPT_MAX})","string (≤{OPT_MAX})","string (≤{OPT_MAX})"],
      "rationale": "string (≤{EXP_MAX})"
    }}
  ]
}}
{END_JSON}
"""
    return base

def _ensure_3_distractors(distractors: List[str], correct: str) -> List[str]:
    seen, out = set(), []
    corr = (correct or "").strip().lower()
    for d in distractors or []:
        d2 = (d or "").strip()
        if not d2: continue
        if d2.lower() == corr: continue
        if d2.lower() in seen: continue
        out.append(d2); seen.add(d2.lower())
    while len(out) < 3:
        out.append(f"Alternative {len(out)+1}")
    return out[:3]
def _shuffle_with_correct(correct: str, distractors: List[str]) -> Tuple[List[str], int]:
    opts = (distractors or [])[:3] + [correct]
    random.shuffle(opts)
    idx = opts.index(correct)
    return opts, idx

# ---------- Gemini REST helpers ----------
API_BASE = "https://generativelanguage.googleapis.com/v1"

def list_models_v1(api_key: str) -> list:
    r = requests.get(f"{API_BASE}/models", params={"key": api_key}, timeout=30)
    if r.status_code >= 400:
        raise RuntimeError(f"REST {r.status_code}: {r.text[:200]}")
    return r.json().get("models", []) or []

def filter_generate_content_models(models: list) -> list:
    out = []
    for m in models:
        methods = (m.get("supportedGenerationMethods") or m.get("supported_generation_methods") or [])
        if "generateContent" in methods:
            name = m.get("name", "")
            if name: out.append(name.split("/")[-1])
    return out

def gemini_generate_v1(api_key: str, model: str, prompt: str,
                       temperature: float, max_output_tokens: int) -> str:
    url = f"{API_BASE}/models/{model}:generateContent"
    params = {"key": api_key}
    payload = {
        "contents": [{"role": "user", "parts": [{"text": prompt}]}],
        "generationConfig": {
            "temperature": float(temperature),
            "topP": 0.9,
            "topK": 40,
            "maxOutputTokens": int(max_output_tokens),
            "candidateCount": 1,
        },
    }
    r = requests.post(url, params=params, json=payload, timeout=60)
    if r.status_code >= 400:
        raise RuntimeError(f"REST {r.status_code}: {r.text[:200]}")
    data = r.json()
    cands = data.get("candidates", [])
    if not cands:
        raise RuntimeError(f"Empty candidates: {data}")
    cand0 = cands[0]
    parts = (cand0.get("content") or {}).get("parts") or []
    text = "".join(p.get("text", "") for p in parts if isinstance(p, dict))
    if not text:
        raise RuntimeError(f"No text in response. Raw: {json.dumps(data)[:400]}")
    return text

def _shrink_prompt_blocks(p: str) -> str:
    # Remove big fact lists if token pressure occurs (keep other guidance intact)
    p = re.sub(r"(?s)\nPREFER these facts.*?(?=\n{2,}|" + re.escape(BEGIN_JSON) + r")", "\n", p)
    p = re.sub(r"(?s)\nSTRICT FACTS MODE.*?(?=\n{2,}|" + re.escape(BEGIN_JSON) + r")", "\n", p)
    return p

def _safe_gemini_call(api_key, model, prompt, temperature, tok, tries: int = 4) -> str:
    last = None
    p = prompt
    t = max(int(tok), 2000)  # start higher to leave room for internal reasoning
    for _ in range(tries):
        try:
            return gemini_generate_v1(api_key, model, p, temperature, t)
        except Exception as e:
            last = e
            msg = str(e)
            if ("MAX_TOKENS" in msg) or ("No text in response" in msg):
                if t < 3500:
                    t = min(t + 500, 4096)  # give the model more headroom
                else:
                    p = _shrink_prompt_blocks(p)  # then shrink prompt facts
                continue
            time.sleep(0.2)  # small backoff for transient issues
    raise last

def _candidate_models_ordered(requested: str, available: list) -> list:
    req = (requested or "").strip()
    if req.endswith("-latest"): req = req[:-7] + "-001"
    order = []
    if req: order.append(req)
    for m in ["gemini-2.5-flash", "gemini-1.5-flash-001", "gemini-1.5-pro-001", "gemini-2.0-flash-exp", "gemini-pro", "gemini-1.0-pro"]:
        if m not in order: order.append(m)
    avail = set(available)
    return [m for m in order if m in avail]

# ---------- AI question generator (REST) ----------
def generate_questions_gemini(api_key: str, model_name: str, topic: str, difficulty: int, count: int,
                              temperature: float = 0.6, max_output_tokens: int = 2800,
                              pack_id: str = "", strict: bool = False,
                              standard_choice: str = "Consolidated (default)",
                              subtopic_label: str = "", focus_text: str = ""):

    topic = (topic or "").strip()
    difficulty = int(max(1, min(10, difficulty)))
    count = int(max(1, min(10, count)))
    items: List[QuizItem] = []
    info_msgs: List[str] = []

    pack = PACKS.get(pack_id) if pack_id else None
    has_facts = bool(pack and pack.get("facts"))
    if strict and not has_facts:
        strict = False
        info_msgs.append("Strict facts disabled (no facts available for the selected pack).")

    # novelty memory
    accepted_qs: List[str] = []

    if not api_key:
        info_msgs.append("No Gemini API key provided — using local generator.")
        for _ in range(count):
            candidate = make_quiz_item_local(topic, difficulty)
            if any(too_similar(candidate.question, q) for q in accepted_qs):
                continue
            accepted_qs.append(candidate.question)
            items.append(candidate)
            if len(items) >= count: break
        # If still short, pad
        while len(items) < count:
            items.append(make_quiz_item_local(topic, difficulty))
        return items, " ".join(info_msgs)

    # Discover models
    try:
        models_raw = list_models_v1(api_key)
        available = filter_generate_content_models(models_raw)
    except Exception as e:
        available = []
        info_msgs.append(f"Model listing failed ({e}); attempting common defaults.")

    to_try = _candidate_models_ordered(
        model_name,
        available if available else ["gemini-2.5-flash","gemini-1.5-flash-001","gemini-1.5-pro-001","gemini-pro","gemini-1.0-pro"]
    ) or ["gemini-2.5-flash","gemini-1.5-flash-001","gemini-1.5-pro-001","gemini-pro","gemini-1.0-pro"]

    # Precompute facet cycle
    facet_pool = facets_for(pack_id or "", subtopic_label or "")
    if not facet_pool:
        facet_pool = DEFAULT_FACETS[:]
    random.shuffle(facet_pool)
    facet_cycle = (facet_pool * ((count + len(facet_pool) - 1) // len(facet_pool)))[:count]

    last_err = None
    for m in to_try:
        try:
            remaining = count
            chunk = 1  # one facet per prompt = maximum diversity

            tok = max(max_output_tokens if max_output_tokens else 1600,
                      1400 if difficulty >= 7 else 1200)

            used_fact_ids = set()
            k_base = _facts_k(count, difficulty, strict)
            facet_idx = 0
            attempts = 0
            max_attempts = max(6, count * 4)

            while remaining > 0 and attempts < max_attempts:
                attempts += 1
                take = min(chunk, remaining)

                # facts slice (for strict or guidance)
                if pack and has_facts:
                    pool = select_facts_slice(topic, pack, k=k_base*2)
                    chunk_slice = [f for f in pool if f["id"] not in used_fact_ids][:k_base]
                    if not chunk_slice:
                        chunk_slice = pool[:k_base]
                        used_fact_ids.clear()
                    used_fact_ids |= {f["id"] for f in chunk_slice}
                else:
                    chunk_slice = []

                # standards + facet
                source_addendum = build_source_alignment_addendum(pack_id or "", standard_choice or "")
                addendum = build_standard_addendum(pack, strict, chunk_slice, source_addendum)
                facet_now = facet_cycle[facet_idx % len(facet_cycle)] if facet_cycle else ""
                facet_idx += 1

                prompt = build_prompt(topic, difficulty, take, standard_addendum=addendum, facet=facet_now)

                # SAFE CALL (with token headroom + prompt shrink under pressure)
                text = _safe_gemini_call(api_key, m, prompt, temperature, tok)

                qlist = parse_questions_from_model(text)
                if not qlist:
                    tok = min(tok + 300, 4096)
                    text = _safe_gemini_call(api_key, m, prompt, temperature, tok)
                    qlist = parse_questions_from_model(text)
                    if not qlist:
                        continue

                if strict:
                    qlist = validate_strict_items(qlist, {f["id"] for f in chunk_slice})

                # Add filtered, deduped items
                added = 0
                for q in qlist:
                    qtext = (q.get("question") or "").strip()
                    correct = (q.get("correct_answer") or q.get("answer") or q.get("correct") or "").strip()
                    distractors = q.get("distractors") or []
                    rationale = (q.get("rationale") or q.get("explanation") or "").strip()
                    if not qtext or not correct:
                        continue
                    # banned openers check
                    low = qtext.lower().strip()
                    if any(low.startswith(b) for b in BANNED_OPENERS):
                        continue
                    # novelty check
                    if any(too_similar(qtext, prev) for prev in accepted_qs):
                        continue

                    distractors = _ensure_3_distractors(distractors, correct)
                    options, answer_index = _shuffle_with_correct(correct, distractors)
                    qtext, options, rationale, _ = lint_item_lengths(qtext, options, rationale, add_ellipsis=False)

                    items.append(QuizItem(topic=topic, question=qtext, options=options,
                                          answer_index=answer_index, explanation=rationale))
                    accepted_qs.append(qtext)
                    added += 1
                    remaining -= 1
                    if remaining <= 0:
                        break

                # If we didn't add anything this round, try a different facet immediately
                if added == 0:
                    continue

                if m != model_name and f"Using available model: {m}" not in info_msgs:
                    info_msgs.append(f"Using available model: {m}")

            if strict and len(items) < count:
                info_msgs.append(f"Strict facts mode returned {len(items)}/{count}. Regenerate for more or disable Strict.")

            # If still short, attempt a quick local top-up, preserving novelty
            tries = 0
            while len(items) < count and tries < count*2:
                tries += 1
                cand = make_quiz_item_local(topic, difficulty)
                if any(too_similar(cand.question, prev) for prev in accepted_qs):
                    continue
                items.append(cand); accepted_qs.append(cand.question)

            return items, (" ".join(info_msgs) if info_msgs else f"Generated {len(items)} item(s) via Gemini.")
        except Exception as e:
            last_err = e
            continue

    info_msgs.append(f"Gemini error: {last_err}. Falling back to local generator.")
    # Fallback local (with novelty)
    while len(items) < count:
        cand = make_quiz_item_local(topic, difficulty)
        if any(too_similar(cand.question, prev) for prev in accepted_qs):
            continue
        items.append(cand); accepted_qs.append(cand.question)
    return items, " ".join(info_msgs)

# ---------- FFmpeg assembly ----------
def _save_png(img: Image.Image, path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)
    img.save(str(path), format="PNG", optimize=True)
def _ffmpeg_build(out_path: Path, question_png: Path, reveal_png: Path, cta_png: Path,
                  q_sec=6.0, r_sec=4.3, c_sec=2.9):
    cmd = [
        "ffmpeg","-y","-loglevel","error","-threads","1",
        "-loop","1","-t",f"{q_sec}","-i",str(question_png),
        "-loop","1","-t",f"{r_sec}","-i",str(reveal_png),
        "-loop","1","-t",f"{c_sec}","-i",str(cta_png),
        "-filter_complex",
        (
          f"[0:v]scale={W}:{H},setsar=1[v0];"
          f"[1:v]scale={W}:{H},setsar=1[v1];"
          f"[2:v]scale={W}:{H},setsar=1[v2];"
          f"[v0][v1][v2]concat=n=3:v=1:a=0,format=yuv420p[v]"
        ),
        "-map","[v]","-r", str(FPS),
        "-c:v","libx264","-preset","veryfast",
        "-b:v", BITRATE,"-maxrate", BITRATE,"-bufsize", BITRATE,
        "-movflags","+faststart","-g", str(FPS*2),
        str(out_path)
    ]
    subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)

def build_quiz_video(item: QuizItem, out_path: Path) -> Path:
    q_img = _question_slide(item.question, item.options[:4])
    r_img = _reveal_slide(item.options[item.answer_index], item.explanation)
    c_img = _cta_slide()
    base = TMP_DIR / f"vid_{int(time.time()*1000)}_{random.randint(1000,9999)}"
    q_png, r_png, c_png = base.with_suffix(".q.png"), base.with_suffix(".r.png"), base.with_suffix(".c.png")
    _save_png(q_img, q_png); _save_png(r_img, r_png); _save_png(c_img, c_png)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    _ffmpeg_build(out_path, q_png, r_png, c_png)
    for p in [q_png, r_png, c_png]:
        try: p.unlink(missing_ok=True)
        except: pass
    return out_path

def _slugify(text: str) -> str:
    s = text.lower().strip().replace(" ", "-")
    allowed = set(string.ascii_lowercase + string.digits + "-")
    s = "".join(ch for ch in s if ch in allowed)
    return s or f"item-{int(time.time())}"

# ---------- Gradio callbacks ----------
def _seed_rng(): random.seed(time.time_ns() % (2**32 - 1))

def generate_preview_ai(topic: str, difficulty: int, count: int, api_key: str, model_name: str,
                        temperature: float = 0.6, max_output_tokens: int = 2800,
                        pack_id: str = "", strict: bool = False, standard_choice: str = "Consolidated (default)",
                        subtopic_label: str = "", focus_text: str = ""):
    topic = (topic or "").strip()
    if not topic:
        return pd.DataFrame([]), "Please enter a topic.", [], ""
    difficulty = int(max(1, min(10, difficulty)))
    count = int(max(1, min(10, count)))
    _seed_rng()

    items, info_msg = generate_questions_gemini(
        api_key, model_name, topic, difficulty, count,
        temperature=temperature, max_output_tokens=max_output_tokens,
        pack_id=pack_id, strict=strict, standard_choice=standard_choice,
        subtopic_label=subtopic_label, focus_text=focus_text
    )

    rows = []
    for i, it in enumerate(items, 1):
        q2, opts2, exp2, flags = lint_item_lengths(it.question, it.options[:4], it.explanation, add_ellipsis=False)
        q_show  = q2 + (CLIP_FLAG if flags["Q"] else "")
        a_show  = opts2[0] + (CLIP_FLAG if flags["A"] else "")
        b_show  = opts2[1] + (CLIP_FLAG if flags["B"] else "")
        c_show  = opts2[2] + (CLIP_FLAG if flags["C"] else "")
        d_show  = opts2[3] + (CLIP_FLAG if flags["D"] else "")
        exp_show= exp2 + (CLIP_FLAG if flags["EXP"] else "")

        rows.append({
            "#": i, "Question": q_show,
            "A": a_show, "B": b_show, "C": c_show, "D": d_show,
            "Correct": ["A","B","C","D"][it.answer_index],
            "Explanation": exp_show,
        })

    df = pd.DataFrame(rows)
    status = info_msg or f"Generated {len(rows)} quiz item(s). Review below."
    return df, status, rows, topic

def confirm_and_produce(items_table, topic: str):
    if items_table is None:
        return "No items to render. Please generate a preview first.", []
    if hasattr(items_table, "to_dict"):
        rows = items_table.to_dict(orient="records")
    elif isinstance(items_table, list):
        rows = items_table
    else:
        rows = []
    if not rows:
        return "No items to render. Please generate a preview first.", []

    saved_paths = []
    for idx, raw in enumerate(rows, 1):
        def _clean(s):
            return (s or "").replace(CLIP_FLAG, "").strip()

        options = [_clean(raw.get("A","")), _clean(raw.get("B","")),
                   _clean(raw.get("C","")), _clean(raw.get("D",""))]
        correct_letter = str(raw.get("Correct","A")).strip().upper()
        answer_index = {"A":0,"B":1,"C":2,"D":3}.get(correct_letter, 0)

        q = _clean(raw.get("Question",""))
        exp = _clean(raw.get("Explanation",""))

        q, options, exp, _ = lint_item_lengths(q, options, exp, add_ellipsis=True)

        qi = QuizItem(topic=topic or "", question=q, options=options,
                      answer_index=answer_index, explanation=exp)

        slug = _slugify(f"{topic}-{idx}")
        out_path = OUT_DIR / f"myai101_{slug}.mp4"
        try:
            build_quiz_video(qi, out_path)
            saved_paths.append(str(out_path))
        except Exception as e:
            saved_paths.append(f"ERROR: {e}")
        gc.collect()

    msg = f"Done. Produced {len(saved_paths)} video(s)."
    files = [p for p in saved_paths if Path(p).suffix.lower()==".mp4" and Path(p).exists()]
    return msg, files

_empty_df = pd.DataFrame(columns=["#", "Question", "A", "B", "C", "D", "Correct", "Explanation"])
def _make_table():
    try:
        return gr.Dataframe(
            headers=list(_empty_df.columns),
            datatype=["number","str","str","str","str","str","str","str"],
            row_count=(1, "dynamic"),
            col_count=(8, "fixed"),
            wrap=True,
            label="You can edit cells before confirming to tweak wording.",
            interactive=True,
        )
    except TypeError:
        return gr.Dataframe(
            value=_empty_df,
            headers=list(_empty_df.columns),
            wrap=True,
            label="You can edit cells before confirming to tweak wording.",
            interactive=True,
        )

# ---------- UI ----------
with gr.Blocks(title="MyAI101 — Quiz Video Maker (Gemini REST, No MoviePy, 1080x1920)") as demo:
    gr.Markdown("# MyAI101 — Quiz Video Maker (Gemini, **REST**, no MoviePy, 1080×1920)")

    # Topic (freeform; will be disabled in Standards mode)
    with gr.Row():
        topic_inp = gr.Textbox(label="Topic", placeholder="e.g. Backpropagation, SSL certificates, Photosynthesis")

    with gr.Row():
        diff_inp  = gr.Slider(1, 10, value=5, step=1, label="Difficulty (1 = child <10, 10 = professional)")
        count_inp = gr.Slider(1, 10, value=3, step=1, label="How many questions / videos to create")

    gr.Markdown("### AI Generation Settings (Gemini)")
    with gr.Row():
        api_key_inp = gr.Textbox(label="Gemini API Key", placeholder="Paste your Google AI Studio API key", type="password")
        model_inp   = gr.Dropdown(choices=["gemini-2.5-flash", "gemini-1.5-flash-001", "gemini-1.5-pro-001"], value="gemini-2.5-flash", label="Model")
        list_btn    = gr.Button("🔎 List Available Models")

    gr.Markdown("### Standards & Topic Alignment")
    with gr.Row():
        topic_preset_inp = gr.Dropdown(choices=TOPIC_PRESET_CHOICES, value="— choose —", label="AI Topic (presets)")
        try:
            pack_inp   = gr.Dropdown(choices=[""] + list(PACKS.keys()), value="", label="Pack (advanced)")
        except TypeError:
            pack_inp   = gr.Dropdown(choices=[""] + list(PACKS.keys()), value="", label="Pack (advanced)")
        mode_inp   = gr.Radio(["Freeform", "Standards"], value="Standards", label="Mode")

    # NEW: Subtopic + Focus (used in Standards mode; ignored in Freeform)
    with gr.Row():
        subtopic_inp = gr.Dropdown(choices=["— none —"], value="— none —", label="Subtopic (optional)")
        focus_inp    = gr.Textbox(label="Focus (optional)", placeholder="e.g., KV cache limits on long contexts")

    with gr.Row():
        std_choices = standards_choices_for("")  # default
        standards_inp = gr.Dropdown(choices=std_choices, value="Consolidated (default)", label="Standards Source")
        strict_inp    = gr.Checkbox(label="Strict facts (require citations from pack)", value=False)

    with gr.Row():
        temp_inp    = gr.Slider(0.0, 1.0, value=0.6, step=0.1, label="Temperature (creativity)")
        max_tok_inp = gr.Slider(200, 4096, value=2800, step=100, label="Max output tokens")

    with gr.Row():
        preview_btn = gr.Button("🧠 Generate Preview (AI)")
        regen_btn   = gr.Button("↻ Regenerate")

    gr.Markdown("### Preview: Questions & Answers")
    preview_state = gr.State([])
    topic_state   = gr.State("")

    table  = _make_table()
    status = gr.Markdown(visible=True)

    with gr.Row():
        confirm_btn = gr.Button("✅ Confirm & Produce Videos", variant="primary")
    out_msg   = gr.Markdown()
    out_files = gr.Files(label="Rendered MP4s")

    # --- Callbacks (definitions) ---
    def _on_list_models(api_key):
        if not api_key:
            return gr.update(), "Enter API key first."
        try:
            mods = list_models_v1(api_key)
            choices = filter_generate_content_models(mods)
            if not choices:
                return gr.update(choices=[], value=None), "No generateContent-capable models found for this key."
            default = ("gemini-2.5-flash" if "gemini-2.5-flash" in choices
                       else "gemini-1.5-flash-001" if "gemini-1.5-flash-001" in choices
                       else choices[0])
            return gr.update(choices=choices, value=default), f"Found {len(choices)} model(s)."
        except Exception as e:
            return gr.update(), f"Listing failed: {e}"

    def _on_topic_preset_change(preset_label, mode_value, focus_text):
        # Auto-set pack when preset chosen
        if preset_label and preset_label != "— choose —":
            match = next((t for t in TOPIC_PRESETS if t["label"] == preset_label), None)
            new_pack = match["pack"] if match else ""
        else:
            new_pack = ""

        # Update standards choices for this pack; set default
        choices = standards_choices_for(new_pack) if new_pack else standards_choices_for("")
        default_std = default_standard_for(new_pack) if new_pack else "Consolidated (default)"
        if default_std not in choices and choices:
            default_std = choices[0]

        # Update subtopics for the selected pack
        subs = ["— none —"] + (subtopics_for_pack(new_pack) if new_pack else [])

        # Compose and possibly lock Topic
        if mode_value == "Standards":
            topic_val = compose_topic_display(preset_label, new_pack, "— none —", focus_text)
            topic_upd = gr.update(value=topic_val, interactive=False)
        else:
            topic_upd = gr.update(interactive=True)  # keep user's text

        return (
            gr.update(value=new_pack),                   # pack_inp
            gr.update(choices=choices, value=default_std),  # standards_inp
            gr.update(choices=subs, value="— none —"),   # subtopic_inp
            topic_upd                                     # topic_inp
        )

    def _on_pack_change(pack_id, mode_value, preset_label, focus_text):
        choices = standards_choices_for(pack_id) if pack_id else standards_choices_for("")
        default_std = default_standard_for(pack_id) if pack_id else "Consolidated (default)"
        if default_std not in choices and choices:
            default_std = choices[0]

        subs = ["— none —"] + (subtopics_for_pack(pack_id) if pack_id else [])

        if mode_value == "Standards":
            topic_val = compose_topic_display(preset_label, pack_id, "— none —", focus_text)
            topic_upd = gr.update(value=topic_val, interactive=False)
        else:
            topic_upd = gr.update(interactive=True)

        return (
            gr.update(choices=choices, value=default_std),  # standards_inp
            gr.update(choices=subs, value="— none —"),      # subtopic_inp
            topic_upd                                       # topic_inp
        )

    def _on_subtopic_change(subtopic, mode_value, preset_label, pack_id, focus_text):
        if mode_value == "Standards":
            topic_val = compose_topic_display(preset_label, pack_id, subtopic, focus_text)
            return gr.update(value=topic_val, interactive=False)
        else:
            return gr.update(interactive=True)

    def _on_focus_change(focus_text, mode_value, preset_label, pack_id, subtopic):
        if mode_value == "Standards":
            topic_val = compose_topic_display(preset_label, pack_id, subtopic, focus_text)
            return gr.update(value=topic_val, interactive=False)
        else:
            return gr.update(interactive=True)

    def _on_mode_change(mode_value, current_pack, preset_label, subtopic, focus_text):
        if mode_value == "Freeform":
            # Enable Topic; disable Pack/Standards/Strict
            return (
                gr.update(interactive=False),                            # pack_inp
                gr.update(interactive=False),                            # standards_inp
                gr.update(value=False, interactive=False),               # strict_inp
                gr.update(interactive=True),                             # topic_inp
            )
        else:
            # Standards mode: enable Pack/Standards/Strict and lock Topic to composed
            topic_val = compose_topic_display(preset_label, current_pack, subtopic, focus_text)
            return (
                gr.update(interactive=True, value=current_pack),         # pack_inp
                gr.update(interactive=True),                             # standards_inp
                gr.update(interactive=True),                             # strict_inp
                gr.update(value=topic_val, interactive=False),           # topic_inp
            )

    def _compose_topic_for_preview(mode_value, freeform_topic, preset_label, pack_id, subtopic, focus_text) -> str:
        if mode_value == "Standards":
            t = compose_topic_display(preset_label, pack_id, subtopic, focus_text)
            return t or freeform_topic or ""
        return freeform_topic or ""

    def _on_preview(topic_text, diff, count, api_key, model_name, temperature, max_tokens,
                    mode_value, pack_id, strict_flag, std_choice, preset_label, subtopic, focus_text):
        # Build final topic based on mode
        final_topic = _compose_topic_for_preview(mode_value, topic_text, preset_label, pack_id, subtopic, focus_text)

        # If Mode is Freeform, ignore pack & standards
        use_pack = pack_id if (mode_value == "Standards" and pack_id) else ""
        use_std  = std_choice if (mode_value == "Standards") else "Consolidated (default)"
        use_strict = bool(strict_flag) and bool(use_pack) and bool(PACKS.get(use_pack, {}).get("facts"))

        df, msg, rows, used_topic = generate_preview_ai(
            final_topic, int(diff), int(count), api_key, model_name, float(temperature), int(max_tokens),
            pack_id=use_pack, strict=use_strict, standard_choice=use_std,
            subtopic_label=subtopic if mode_value == "Standards" else "", focus_text=focus_text if mode_value == "Standards" else ""
        )
        if not hasattr(df, "to_dict"):
            df = pd.DataFrame(df)
        if use_strict:
            msg = msg + " (Strict facts ON)"
        elif mode_value == "Standards" and use_pack:
            msg = msg + f" (Aligned to {PACKS[use_pack]['name']} • {use_std})"
        return df, msg, rows, used_topic

    def _on_confirm(current_table, topic_used):
        return confirm_and_produce(current_table, topic_used)

    # --- Wire events (inside Blocks) ---
    list_btn.click(_on_list_models, inputs=[api_key_inp], outputs=[model_inp, status])

    topic_preset_inp.change(
        _on_topic_preset_change,
        inputs=[topic_preset_inp, mode_inp, focus_inp],
        outputs=[pack_inp, standards_inp, subtopic_inp, topic_inp]
    )

    pack_inp.change(
        _on_pack_change,
        inputs=[pack_inp, mode_inp, topic_preset_inp, focus_inp],
        outputs=[standards_inp, subtopic_inp, topic_inp]
    )

    subtopic_inp.change(
        _on_subtopic_change,
        inputs=[subtopic_inp, mode_inp, topic_preset_inp, pack_inp, focus_inp],
        outputs=[topic_inp]
    )

    focus_inp.change(
        _on_focus_change,
        inputs=[focus_inp, mode_inp, topic_preset_inp, pack_inp, subtopic_inp],
        outputs=[topic_inp]
    )

    mode_inp.change(
        _on_mode_change,
        inputs=[mode_inp, pack_inp, topic_preset_inp, subtopic_inp, focus_inp],
        outputs=[pack_inp, standards_inp, strict_inp, topic_inp]
    )

    preview_btn.click(_on_preview, inputs=[
        topic_inp, diff_inp, count_inp, api_key_inp, model_inp, temp_inp, max_tok_inp,
        mode_inp, pack_inp, strict_inp, standards_inp, topic_preset_inp, subtopic_inp, focus_inp
    ], outputs=[table, status, preview_state, topic_state])

    regen_btn.click(_on_preview, inputs=[
        topic_inp, diff_inp, count_inp, api_key_inp, model_inp, temp_inp, max_tok_inp,
        mode_inp, pack_inp, strict_inp, standards_inp, topic_preset_inp, subtopic_inp, focus_inp
    ], outputs=[table, status, preview_state, topic_state])

    confirm_btn.click(_on_confirm, inputs=[table, topic_state], outputs=[out_msg, out_files])

# --- Launch (print URLs) ---
gr.close_all(); gc.collect()
res = demo.launch(share=True, inbrowser=False, inline=False, show_error=True, debug=True, prevent_thread_lock=True)
try:
    print("Local URL:", getattr(res, "local_url", None) or res.local_url)
    print("Public URL:", getattr(res, "share_url", None) or res.share_url)
except Exception:
    pass


Closing server running on port: 7860
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://a22cfb1ab7ea14c30e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://a22cfb1ab7ea14c30e.gradio.live
