<a href="https://colab.research.google.com/github/donkeytonk/DIB-R/blob/master/Video_Quiz_UI_Share_Link_v0_03_WORKING.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

0.03 NOTES
-- Fixed the issue related to difficult questions being too long (too long not suitable for videos)


In [12]:
# === ONE-CELL: MyAI101 Quiz Video Maker (1080x1920, Gemini-powered via REST, No MoviePy) ===
# - Vertical HD (1080x1920 @ 24fps), ffmpeg only (threads=1)
# - Gradio UI: Topic, Difficulty(1-10), Count(1-10), Gemini API Key, Model
# - Uses Gemini **v1 REST** (no SDK). Button to list models your key actually has.
# - Randomizes correct answer placement; editable preview table
# - Renders MP4s to /content/out/videos; prints Local/Public URLs

import os, sys, subprocess, random, string, time, gc, warnings, re, json
from dataclasses import dataclass
from typing import List, Tuple
from pathlib import Path

warnings.filterwarnings("ignore")

# --- Headless safety ---
os.environ["SDL_AUDIODRIVER"] = "dummy"
os.environ["XDG_RUNTIME_DIR"]  = "/tmp/runtime"
os.makedirs("/tmp/runtime", exist_ok=True)

# --- Deps ---
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
                       "pillow>=10.0.0", "gradio>=4.0.0", "pandas>=1.3", "requests>=2.31"])
subprocess.call(["apt-get", "-y", "install", "-qq",
                 "ffmpeg", "fonts-dejavu-core", "fonts-liberation"])


import requests
import gradio as gr
import pandas as pd
from PIL import Image, ImageDraw, ImageFont

# ---------- RENDER SETTINGS ----------
W, H, FPS = 1080, 1920, 24
BITRATE = "3500k"

GREEN   = (16, 163, 74)
MINT    = (209, 250, 229)
DARK    = (15, 23, 42)
MID     = (51, 65, 85)
BG      = (255, 255, 255)
CHIP_BG = (243, 244, 246)

SAFE_X   = int(W * 0.09)
SAFE_TOP = int(H * 0.08)
SAFE_BOT = int(H * 0.10)

TITLE_BOX_H         = int(H * 0.36)
GAP_BELOW_TITLE_MIN = int(H * 0.10)

CHIP_H      = 108
CHIP_TX_H   = 80
CHIP_TX_PAD = 90
CHIP_GAP    = int(H * 0.085)

CTA_BUTTON_W = 640
CTA_BUTTON_H = 120

# ------- Readability budgets (approx. 6s Q, 4.3s reveal) -------
Q_MAX = 130        # question max chars
OPT_MAX = 38       # per option max chars
EXP_MAX = 110      # explanation max chars

# Flags we’ll append if we had to clip (helps you spot them in the table)
CLIP_FLAG = " ·clipped"

# Output markers to bound JSON
BEGIN_JSON = "<<<JSON>>>"
END_JSON   = "<<<END>>>"

import re as _re

# Light, safe compaction for common bloat
_FILLER = [
    r"\bthat\b", r"\bvery\b", r"\bactually\b", r"\breally\b",
    r"\bjust\b", r"\bkind of\b", r"\bsort of\b", r"\bin order to\b",
]

def _squash_spaces(s: str) -> str:
    return _re.sub(r"\s+", " ", (s or "").strip())

def _kill_filler(s: str) -> str:
    for pat in _FILLER:
        s = _re.sub(pat, "", s, flags=_re.IGNORECASE)
    return _squash_spaces(s)

def _tighten_punct(s: str) -> str:
    if not s: return s
    s = _re.sub(r"\s+([,.;:!?])", r"\1", s)        # no space before punctuation
    s = _re.sub(r"\(([^)]{0,20})\)", r"\1", s)     # drop short parentheticals
    s = _re.sub(r"--+", "–", s)                    # normalize dashes
    return _squash_spaces(s)

def _smart_clip(s: str, n: int, add_ellipsis: bool = True) -> Tuple[str, bool]:
    """Clip at a natural boundary ≤ n; return (text, clipped?)."""
    s = (s or "").strip()
    if len(s) <= n:
        return s, False
    cut = s[:n+1]
    m = _re.search(r"[.;:!?]\s+\S*$", cut)
    if m:
        out = cut[:m.start()].rstrip()
        return (out + "…", True) if add_ellipsis else (out, True)
    m = _re.search(r"\s+\S*$", cut)
    if m:
        out = cut[:m.start()].rstrip()
        return (out + "…", True) if add_ellipsis else (out, True)
    out = s[:n].rstrip()
    return (out + "…", True) if add_ellipsis else (out, True)

def compact_to(s: str, n: int, add_ellipsis: bool = True, *, prune_mode: str = "generic") -> Tuple[str, bool]:
    """
    Heuristic squeeze -> abbreviate -> prune clauses (mode) -> (last resort) clip.
    prune_mode: "generic" | "question" | "none"
    """
    before = s
    t = _squash_spaces(s)
    t = _tighten_punct(t)
    t = _kill_filler(t)
    if len(t) <= n:
        return t, len(t) != len(before)

    t2 = _abbr_pass(t)
    if len(t2) <= n:
        return t2, True

    # choose pruner
    if prune_mode == "question":
        t3 = _prune_clauses_question(t2)
    elif prune_mode == "generic":
        t3 = _prune_clauses(t2)
    else:
        t3 = t2  # no clause pruning

    if len(t3) <= n:
        return t3, True

    # last resort: natural clip
    return _smart_clip(t3, n, add_ellipsis=add_ellipsis)


def lint_item_lengths(q: str, options: List[str], exp: str, *, add_ellipsis: bool = False) -> Tuple[str, List[str], str, dict]:
    flags = {"Q": False, "A": False, "B": False, "C": False, "D": False, "EXP": False}

    # keep original for salvage
    q_raw = q or ""

    # Strip verbose stems
    q = _re.sub(r"^\s*(Which of the following|What of the following|Which statement).*?:\s*", "", q_raw, flags=_re.IGNORECASE)
    # (remove the old 'In <word>,' stripper — it caused harm in some cases)

    # compact with question-safe pruning
    q1, qc = compact_to(q, Q_MAX, add_ellipsis=add_ellipsis, prune_mode="question")
    flags["Q"] = qc
    q1 = _finish_sentence(q1, is_question=True)

    # SALVAGE: if pruning produced a stubby opener, recompute without comma-prune
    looks_stub = (len(q1) < 24) and _re.match(r"^(in|when|while|during|where)\b", q1.strip().lower())
    if looks_stub:
        q2, _ = compact_to(q_raw, Q_MAX, add_ellipsis=add_ellipsis, prune_mode="none")
        q2 = _finish_sentence(q2, is_question=True)
        # prefer the longer, clearer question if it fits budget
        if len(q2) >= len(q1):
            q1 = q2

    q = q1

    outs, letters = [], ["A","B","C","D"]
    for i, o in enumerate((options + [""]*4)[:4]):
        o2, oc = compact_to(o or "", OPT_MAX, add_ellipsis=add_ellipsis, prune_mode="generic")
        outs.append(o2); flags[letters[i]] = oc

    exp1, ec = compact_to(exp or "", EXP_MAX, add_ellipsis=add_ellipsis, prune_mode="generic")
    flags["EXP"] = ec
    exp = _finish_sentence(exp1, is_question=False)

    return q, outs, exp, flags




# --------- JSON-ish cleanup + robust extraction ---------
def _strip_json_comments(s: str) -> str:
    s = re.sub(r"(^|\s)//.*?$", r"\1", s, flags=re.MULTILINE)
    s = re.sub(r"/\*.*?\*/", "", s, flags=re.DOTALL)
    return s

def _normalize_quotes(s: str) -> str:
    return (s or "").replace("\u201c", '"').replace("\u201d", '"').replace("\u2018", "'").replace("\u2019", "'")

def _remove_trailing_commas(s: str) -> str:
    s = re.sub(r",\s*([}\]])", r"\1", s)
    return s

def _clean_jsonish(s: str) -> str:
    s = _strip_code_fences(s)
    s = _normalize_quotes(s)
    s = _strip_json_comments(s)
    s = _remove_trailing_commas(s)
    return s.strip()

def _extract_between_markers(s: str):
    i = s.find(BEGIN_JSON); j = s.rfind(END_JSON)
    if i != -1 and j != -1 and j > i:
        return s[i+len(BEGIN_JSON):j]
    return None

def _split_objects_by_brace(text: str) -> list:
    body, objs, depth, in_str, esc, start = text, [], 0, False, False, None
    for idx, ch in enumerate(body):
        if in_str:
            if esc: esc = False
            elif ch == "\\": esc = True
            elif ch == '"': in_str = False
        else:
            if ch == '"': in_str = True
            elif ch == "{":
                if depth == 0: start = idx
                depth += 1
            elif ch == "}":
                depth -= 1
                if depth == 0 and start is not None:
                    objs.append(body[start:idx+1]); start = None
    return objs

def _first_json_dict(s: str):
    s = _clean_jsonish(s)
    try:
        return json.loads(s)
    except Exception:
        # Try coercing if it looks like bare key/value lines
        s2 = _coerce_objectish(s)
        if s2 != s:
            try:
                return json.loads(_clean_jsonish(s2))
            except Exception:
                pass
        if "{" in s and "}" in s:
            chunk = s[s.find("{"): s.rfind("}")+1]
            try:
                return json.loads(_clean_jsonish(chunk))
            except Exception:
                return None
        return None


def parse_questions_from_model(s: str) -> list:
    """
    Robustly recover the list of question dicts from many shapes:
    - Proper object with "questions"
    - Object with keys like "items"/"mcqs"/"data"/"list"
    - Bare array [...]
    - JSON-ish text with adjacent {} blocks and missing commas
    - Prefer content between BEGIN/END markers if present
    """
    between = _extract_between_markers(s)
    if between is not None:
      s = between

    # NEW: if the slice looks like key/value lines without braces, wrap it
    s = _coerce_objectish(s)

    s_clean = _clean_jsonish(s)



    # 1) whole-doc parse
    data = _first_json_dict(s_clean)
    if isinstance(data, dict):
        if isinstance(data.get("questions"), list):
            return data["questions"]
        for k in ("items", "mcqs", "data", "list", "entries"):
            v = data.get(k)
            if isinstance(v, list):
                return v
    try:
        arr = json.loads(s_clean)
        if isinstance(arr, list):
            return arr
    except Exception:
        pass

    # 2) last resort: split all objects and pick MCQ-like ones
    objs = _split_objects_by_brace(s_clean)
    out = []
    for obj in objs:
        try:
            d = json.loads(_clean_jsonish(obj))
        except Exception:
            try:
                d = json.loads(_clean_jsonish(re.sub(r"'", '"', obj)))
            except Exception:
                continue
        if not isinstance(d, dict):
            continue
        if "question" in d and ("correct_answer" in d or "answer" in d or "correct" in d or "options" in d):
            if "correct_answer" not in d:
                if "answer" in d: d["correct_answer"] = d["answer"]
                elif "correct" in d: d["correct_answer"] = d["correct"]
                elif "options" in d and isinstance(d.get("correct_index"), int):
                    opts = d.get("options") or []
                    ci = d.get("correct_index")
                    if 0 <= ci < len(opts):
                        d["correct_answer"] = opts[ci]
                        d["distractors"] = [o for i, o in enumerate(opts) if i != ci][:3]
            out.append(d)
    return out

# ---------- Font + text helpers ----------
# Robust font resolver so we never fall back to the tiny bitmap font
FONT_CANDIDATES = [
    "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
    "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
    "/usr/share/fonts/truetype/liberation2/LiberationSans-Bold.ttf",
    "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
]

def _resolve_font_path() -> str:
    for p in FONT_CANDIDATES:
        if Path(p).exists():
            return p
    try:
        for p in Path("/usr/share/fonts").rglob("*.ttf"):
            return str(p)
    except Exception:
        pass
    return ""  # will trigger bitmap fallback

DEFAULT_FONT_PATH = _resolve_font_path()
DEFAULT_FONT_EXISTS = bool(DEFAULT_FONT_PATH)

OUT_DIR = Path("/content/out/videos"); OUT_DIR.mkdir(parents=True, exist_ok=True)
TMP_DIR = Path("/content/out/tmp"); TMP_DIR.mkdir(parents=True, exist_ok=True)

def _font(pt: int) -> ImageFont.FreeTypeFont:
    if DEFAULT_FONT_EXISTS:
        return ImageFont.truetype(DEFAULT_FONT_PATH, pt)
    return ImageFont.load_default()

def _finish_sentence(t: str, *, is_question: bool) -> str:
    t = (t or "").strip()
    # remove dangling tails like ", which", "—", "including", etc.
    t = re.sub(r"(,\s*(which|that|who|when|where)\b.*)$", "", t, flags=re.IGNORECASE)
    t = re.sub(r"[\s:;,\-–—]+$", "", t)  # trim trailing punctuation fragments
    t = re.sub(r"\b(such as|including|like|for example|e\.g\.)\s*$", "", t, flags=re.IGNORECASE)
    # normalize end mark
    if is_question:
        t = t.rstrip(".")  # don't end a question with '.'
        if not t.endswith("?"):
            t += "?"
    else:
        if not re.search(r"[.!?]$", t):
            t += "."
    return t



def _text_wrap(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont, max_w: int):
    words = text.split()
    lines, cur = [], ""
    for w in words:
        t = (cur + " " + w).strip()
        bbox = draw.textbbox((0,0), t, font=font)
        if bbox[2]-bbox[0] <= max_w or not cur:
            cur = t
        else:
            lines.append(cur); cur = w
    if cur: lines.append(cur)
    if len(lines) >= 2 and len(lines[-1].split()) <= 2 and len(lines[-2].split()) > 2:
        prev = lines[-2].split()
        moved = prev.pop()
        lines[-2] = " ".join(prev)
        lines[-1] = (moved + " " + lines[-1]).strip()
    return lines

def _draw_text_block(img, box, text, color, max_pt, min_pt, leading_ratio=0.30, stroke=0, stroke_color=(255,255,255), align="center"):
    x, y, w, h = box
    draw = ImageDraw.Draw(img)
    for pt in range(max_pt, min_pt-1, -2):
        f = _font(pt)
        lines = _text_wrap(draw, text, f, w)
        line_heights = [draw.textbbox((0,0), ln, font=f)[3] - draw.textbbox((0,0), ln, font=f)[1] for ln in lines]
        total_h = sum(line_heights) + int(pt * leading_ratio) * (len(lines)-1)
        if total_h <= h:
            cur_y = y + (h - total_h)//2
            for ln in lines:
                bbox = draw.textbbox((0,0), ln, font=f)
                tw = bbox[2]-bbox[0]; th = bbox[3]-bbox[1]
                if align == "center":
                    tx = x + (w - tw)//2
                elif align == "left":
                    tx = x
                else:
                    tx = x + (w - tw)
                if stroke > 0:
                    draw.text((tx, cur_y), ln, font=f, fill=stroke_color, stroke_width=stroke, stroke_fill=stroke_color)
                draw.text((tx, cur_y), ln, font=f, fill=color, stroke_width=stroke, stroke_fill=stroke_color)
                cur_y += th + int(pt * leading_ratio)
            return
    f = _font(min_pt)
    bbox = draw.textbbox((0,0), text, font=f); tw = bbox[2]-bbox[0]; th = bbox[3]-bbox[1]
    tx = x + (w - tw)//2; ty = y + (h - th)//2
    draw.text((tx, ty), text, font=f, fill=color, stroke_width=stroke, stroke_fill=stroke_color)

# ---------- Slide renderers ----------
def _badge(img: Image.Image):
    draw = ImageDraw.Draw(img)
    draw.rectangle([0,0,W,8], fill=GREEN)
    pill_w, pill_h = 300, 72
    px, py = W - pill_w - 36, 36
    draw.rounded_rectangle([px, py, px+pill_w, py+pill_h], radius=20, fill=MINT)
    _draw_text_block(img, (px+22, py+14, pill_w-44, pill_h-28), "MyAI101", DARK, 56, 32, align="left")

def _chip(img: Image.Image, y_center: int, text: str):
    draw = ImageDraw.Draw(img)
    chip_w = W - 2*SAFE_X
    x0, y0 = (W - chip_w)//2, y_center - CHIP_H//2
    draw.rounded_rectangle([x0, y0, x0+chip_w, y0+CHIP_H], radius=18, fill=CHIP_BG)
    draw.rectangle([x0, y0, x0+chip_w, y0+4], fill=GREEN)
    _draw_text_block(img, (x0+CHIP_TX_PAD//2, y0+(CHIP_H-CHIP_TX_H)//2, chip_w-CHIP_TX_PAD, CHIP_TX_H),
                     text, DARK, max_pt=54, min_pt=30, align="left")

def _question_slide(question: str, options: List[str]) -> Image.Image:
    img = Image.new("RGB", (W, H), BG)
    _badge(img)
    title_box = (SAFE_X, SAFE_TOP, W-2*SAFE_X, TITLE_BOX_H)
    _draw_text_block(img, title_box, question, DARK, 84, 34, stroke=2, stroke_color=(255,255,255))
    title_bottom = SAFE_TOP + TITLE_BOX_H
    band_top = max(title_bottom + GAP_BELOW_TITLE_MIN, SAFE_TOP + int(H*0.34))
    band_bot = min(H - SAFE_BOT, int(H*0.86))
    n = len(options)
    if n > 0:
        band_h = band_bot - band_top
        preferred = n*CHIP_H + (n-1)*CHIP_GAP
        if preferred <= band_h:
            ys = [band_top + CHIP_H//2 + i*(CHIP_H+CHIP_GAP) for i in range(n)]
        else:
            gap = max(18, int((band_h - n*CHIP_H) / max(1, n-1)))
            ys = [band_top + CHIP_H//2 + i*(CHIP_H + gap) for i in range(n)]
        for i, (opt, yc) in enumerate(zip(options, ys)):
            _chip(img, yc, f"{chr(65+i)}. {opt}")
    return img

def _reveal_slide(correct: str, explanation: str) -> Image.Image:
    img = Image.new("RGB", (W, H), BG)
    _badge(img)
    draw = ImageDraw.Draw(img)
    card_w, card_h = W - 2*SAFE_X, 140
    x0, y0 = SAFE_X, int(H*0.34)
    draw.rounded_rectangle([x0, y0, x0+card_w, y0+card_h], radius=20, fill=MINT)
    _draw_text_block(img, (x0+20, y0+16, card_w-40, card_h-32), f"Answer: {correct}", DARK, 80, 40)
    _draw_text_block(img, (SAFE_X, int(H*0.54), W-2*SAFE_X, int(H*0.28)), explanation, MID, 62, 32)
    return img

def _cta_slide() -> Image.Image:
    img = Image.new("RGB", (W, H), BG)
    draw = ImageDraw.Draw(img)
    _badge(img)
    _draw_text_block(img, (SAFE_X, int(H*0.36), W-2*SAFE_X, int(H*0.22)), "MyAI101", DARK, 160, 84)
    _draw_text_block(img, (SAFE_X, int(H*0.54), W-2*SAFE_X, int(H*0.16)),
                     "Daily AI literacy in 60 seconds", MID, 72, 38)
    bx, by = (W-CTA_BUTTON_W)//2, int(H*0.74)
    draw.rounded_rectangle([bx, by, bx+CTA_BUTTON_W, by+CTA_BUTTON_H], radius=22, fill=GREEN)
    _draw_text_block(img, (bx+18, by+10, CTA_BUTTON_W-36, CTA_BUTTON_H-20), "Start Learning for Free", (255,255,255), 54, 28)
    return img

# ---------- Local fallback generator ----------
@dataclass
class QuizItem:
    topic: str
    question: str
    options: List[str]
    answer_index: int
    explanation: str

EASY_TEMPLATES = [
    "Which of these is an example of {topic}?",
    "What is {topic} mainly used for?",
    "Which choice best matches {topic}?",
]
MEDIUM_TEMPLATES = [
    "Which scenario best illustrates {topic} in practice?",
    "Which statement about {topic} is correct?",
    "What is a common use case of {topic}?",
]
HARD_TEMPLATES = [
    "Which of the following is most accurate regarding {topic}?",
    "In applied settings, which describes {topic} most precisely?",
    "Which statement about {topic} reflects best practice?",
]
EASY_DISTRACTORS = ["Something unrelated","A wrong idea","Not quite right","Another choice","Sounds similar but isn't"]
MEDIUM_DISTRACTORS = ["A partially correct statement","A common misconception","An unrelated technique","A vague description"]
HARD_DISTRACTORS = ["A subtle misconception","A related but incorrect method","An imprecise definition","A misleading best practice"]

def _pick_template(difficulty: int) -> str:
    return (random.choice(EASY_TEMPLATES) if difficulty<=3
            else random.choice(MEDIUM_TEMPLATES) if difficulty<=7
            else random.choice(HARD_TEMPLATES))

def _generate_options_local(topic: str, difficulty: int) -> Tuple[List[str], int, str]:
    topic_clean = topic.strip().rstrip("?.!")
    if difficulty <= 3:
        correct, pool = f"A simple example of {topic_clean}", EASY_DISTRACTORS
    elif difficulty <= 7:
        correct, pool = f"A practical use case of {topic_clean}", MEDIUM_DISTRACTORS
    else:
        correct, pool = f"A precise description of {topic_clean}", HARD_DISTRACTORS
    distractors = random.sample(pool, k=3)
    options = distractors + [correct]
    random.shuffle(options)
    answer_index = options.index(correct)
    explanation = (f"The correct option describes {topic_clean} more appropriately than the others."
                   if difficulty >= 4 else f"It's the best match for {topic_clean}.")
    return options, answer_index, explanation

def make_quiz_item_local(topic: str, difficulty: int) -> QuizItem:
    q = _pick_template(difficulty).format(topic=topic)
    options, answer_index, explanation = _generate_options_local(topic, difficulty)
    return QuizItem(topic=topic, question=q, options=options, answer_index=answer_index, explanation=explanation)

# ---------- Gemini prompt ----------
DIFFICULTY_GUIDE = """
Map difficulty 1–10 to these constraints:
1–2: kid-simple; one sentence; no jargon; obvious distractors.
3–4: basic recognition; short phrasing; simple plausible distractors.
5–6: intermediate conceptual; 1–2 sentences; plausible/related distractors.
7–8: advanced application or edge cases; 2–3 sentences; subtle distractors.
9–10: professional nuance; 2–3 concise sentences; highly plausible distractors with subtle traps.
"""

GEMINI_PROMPT_TEMPLATE = """
You are an expert quiz writer for 1080×1920 SHORT videos.

Produce {count} distinct multiple-choice questions on the topic.

Topic: "{topic}"
Difficulty (1-10): {difficulty}

{difficulty_guide}

HARD LENGTH LIMITS (NEVER EXCEED):
- question <= 130 characters
- each option <= 38 characters
- rationale <= 110 characters

STYLE:
- No preambles like "Which of the following". Write the stem directly.
- Everyday words. No parentheticals, citations, footnotes, or emojis.
- Difficulty comes from idea and distractor plausibility, not length.
- Exactly 1 correct answer, 3 plausible distractors. No "All/None of the above".

Return ONLY a single JSON object BETWEEN the markers below.
Start your first character with the JSON after {BEGIN_JSON} and end before {END_JSON}.
Do not include any text outside the markers. No markdown fences.

{BEGIN_JSON}
{{
  "topic": "string",
  "difficulty": {difficulty},
  "questions": [
    {{
      "question": "string (<=130 chars)",
      "correct_answer": "string (<=38 chars)",
      "distractors": ["string (<=38)","string (<=38)","string (<=38)"],
      "rationale": "string (<=110 chars)"
    }}
  ]
}}
{END_JSON}
"""


def _strip_code_fences(s: str) -> str:
    s = s.strip()
    if s.startswith("```"):
        s = re.sub(r"^```(?:json)?", "", s.strip(), flags=re.IGNORECASE).strip()
        s = re.sub(r"```$", "", s.strip()).strip()
    return s

def _ensure_3_distractors(distractors: List[str], correct: str) -> List[str]:
    seen, out = set(), []
    for d in distractors:
        d = (d or "").strip()
        if not d: continue
        if d.lower() == (correct or "").strip().lower(): continue
        if d.lower() in seen: continue
        out.append(d); seen.add(d.lower())
    while len(out) < 3:
        out.append(f"Alternative {len(out)+1}")
    return out[:3]

def _shuffle_with_correct(correct: str, distractors: List[str]) -> Tuple[List[str], int]:
    opts = distractors[:3] + [correct]
    random.shuffle(opts)
    idx = opts.index(correct)
    return opts, idx


def _coerce_objectish(s: str) -> str:
    """
    If the text between markers looks like top-level key/value lines but lacks
    outer braces, wrap it into {...}. Also trims leading/trailing junk.
    """
    t = (s or "").strip()
    if not t:
        return t
    # Already an object/array? leave it.
    if t.startswith("{") or t.startswith("["):
        return t
    # Common case we see: starts with "topic": ... or "questions": ...
    if t.startswith('"topic"') or t.startswith('"difficulty"') or t.startswith('"questions"'):
        # Avoid double closing brace if user/model already placed a stray "}"
        t = t.rstrip()
        if not t.endswith("}"):
            t = t + "}"
        return "{" + t
    # Fallback: return as-is
    return t

# --- extra shrink helpers ---
_ABBR_REPL = [
    (r"\bapproximately\b", "~"),
    (r"\babout\b", "~"),
    (r"\baround\b", "~"),
    (r"\bversus\b", "vs"),
    (r"\band\b", "&"),
    (r"\bpercent\b", "%"),
    (r"\bper\s+cent\b", "%"),
    (r"\byears?\b", "yrs"),
    (r"\bminutes?\b", "min"),
    (r"\bhours?\b", "h"),
    (r"\bseconds?\b", "s"),
    (r"\bmillion\b", "M"),
    (r"\bbillion\b", "B"),
    (r"\bUnited States\b", "US"),
    (r"\bUnited Kingdom\b", "UK"),
    (r"\bkilometers per hour\b", "km/h"),
    (r"\bkilometres per hour\b", "km/h"),
    (r"\bmiles per hour\b", "mph"),
]
_MONTHS = {
    "January":"Jan","February":"Feb","March":"Mar","April":"Apr","June":"Jun",
    "July":"Jul","August":"Aug","September":"Sep","October":"Oct","November":"Nov","December":"Dec",
    "May":"May",
}

def _abbr_pass(s: str) -> str:
    t = s
    for pat, rep in _ABBR_REPL:
        t = _re.sub(pat, rep, t, flags=_re.IGNORECASE)
    for long, short in _MONTHS.items():
        t = _re.sub(rf"\b{long}\b", short, t)
    # compact number + unit (e.g., "10 years" -> "10 yrs")
    t = _re.sub(r"\b(\d+)\s+yrs?\b", r"\1 yrs", t)
    t = _re.sub(r"\b(\d+)\s+minutes?\b", r"\1 min", t)
    t = _re.sub(r"\b(\d+)\s+hours?\b", r"\1 h", t)
    t = _squash_spaces(t)
    return t

def _prune_clauses(s: str) -> str:
    """
    Drop tailing descriptive clauses that usually aren't essential for short-form:
    comma 'which/that/when/where/who', em/en-dash chunks, colon tails.
    """
    t = s
    t = _re.sub(r",\s+(which|that|who|when|where)\b.*$", "", t, flags=_re.IGNORECASE)
    t = _re.sub(r"\s+[–—-]\s+.*$", "", t)     # after dash
    t = _re.sub(r":\s+.*$", "", t)            # after colon
    return _squash_spaces(t)


def _prune_clauses_question(s: str) -> str:
    """
    Safer pruning for questions:
    - Only drop comma-tails that look descriptive (e.g., ', which is ...')
    - Keep dash/colon tails trimmed.
    """
    t = s
    # descriptive comma tails
    t = _re.sub(r",\s+(which|that|who)\s+(is|are|was|were)\b.*$", "", t, flags=_re.IGNORECASE)
    t = _re.sub(r",\s+(when|where)\s+(it|they)\s+(is|are|was|were)\b.*$", "", t, flags=_re.IGNORECASE)
    # dash/colon tails
    t = _re.sub(r"\s+[–—-]\s+.*$", "", t)
    t = _re.sub(r":\s+.*$", "", t)
    return _squash_spaces(t)



# ---------- Gemini REST helpers ----------
API_BASE = "https://generativelanguage.googleapis.com/v1"

def list_models_v1(api_key: str) -> list:

# print(prompt[:300])
# assert BEGIN_JSON not in prompt and END_JSON not in prompt
# assert "<<<JSON>>>\n{" in prompt

    r = requests.get(f"{API_BASE}/models", params={"key": api_key}, timeout=30)
    if r.status_code >= 400:
        raise RuntimeError(f"REST {r.status_code}: {r.text[:200]}")
    return r.json().get("models", []) or []

def filter_generate_content_models(models: list) -> list:
    out = []
    for m in models:
        methods = (
            m.get("supportedGenerationMethods")
            or m.get("supported_generation_methods")
            or []
        )
        if "generateContent" in methods:
            name = m.get("name", "")
            if name:
                out.append(name.split("/")[-1])
    return out

def gemini_generate_v1(api_key: str, model: str, prompt: str,
                       temperature: float, max_output_tokens: int) -> str:
    url = f"{API_BASE}/models/{model}:generateContent"
    params = {"key": api_key}
    payload = {
        "contents": [{"role": "user", "parts": [{"text": prompt}]}],
        "generationConfig": {
            "temperature": float(temperature),
            "topP": 0.9,
            "topK": 40,
            "maxOutputTokens": int(max_output_tokens),
            "candidateCount": 1,
        },
    }
    r = requests.post(url, params=params, json=payload, timeout=60)
    if r.status_code >= 400:
        raise RuntimeError(f"REST {r.status_code}: {r.text[:200]}")
    data = r.json()

    cands = data.get("candidates", [])
    if not cands:
        raise RuntimeError(f"Empty candidates: {data}")

    cand0 = cands[0]
    finish = cand0.get("finishReason")
    parts = (cand0.get("content") or {}).get("parts") or []
    text = "".join(p.get("text", "") for p in parts if isinstance(p, dict))

    if not text:
        raise RuntimeError(f"No text in response (finishReason={finish}). Raw: {json.dumps(data)[:400]}")

    return text

def _candidate_models_ordered(requested: str, available: list) -> list:
    req = (requested or "").strip()
    if req.endswith("-latest"):
        req = req[:-7] + "-001"
    order = []
    if req:
        order.append(req)
    for m in ["gemini-2.5-flash", "gemini-1.5-flash-001", "gemini-1.5-pro-001", "gemini-2.0-flash-exp", "gemini-pro", "gemini-1.0-pro"]:
        if m not in order:
            order.append(m)
    avail = set(available)
    return [m for m in order if m in avail]

# ---------- AI question generator (REST) ----------
def generate_questions_gemini(api_key: str, model_name: str, topic: str, difficulty: int, count: int,
                              temperature: float = 0.6, max_output_tokens: int = 1600):
    topic = (topic or "").strip()
    difficulty = int(max(1, min(10, difficulty)))
    count = int(max(1, min(10, count)))
    items: List[QuizItem] = []
    info_msgs: List[str] = []

    if not api_key:
        info_msgs.append("No Gemini API key provided — using local generator.")
        for _ in range(count):
            items.append(make_quiz_item_local(topic, difficulty))
        return items, " ".join(info_msgs)

    # Discover models
    try:
        models_raw = list_models_v1(api_key)
        available = filter_generate_content_models(models_raw)
    except Exception as e:
        available = []
        info_msgs.append(f"Model listing failed ({e}); attempting common defaults.")

    to_try = _candidate_models_ordered(
        model_name,
        available if available else ["gemini-2.5-flash", "gemini-1.5-flash-001", "gemini-1.5-pro-001", "gemini-pro", "gemini-1.0-pro"]
    ) or ["gemini-2.5-flash", "gemini-1.5-flash-001", "gemini-1.5-pro-001", "gemini-pro", "gemini-1.0-pro"]

    last_err = None
    for m in to_try:
        try:
            remaining = count
            # Difficulty-aware chunking + token headroom
            if difficulty >= 9:
                chunk = 1
            elif difficulty >= 7:
                chunk = min(2, remaining)
            else:
                chunk = min(3, remaining)

            tok = max(max_output_tokens if max_output_tokens else 1600,
                      1400 if difficulty >= 7 else 1200)

            while remaining > 0:
                take = min(chunk, remaining)
                prompt = GEMINI_PROMPT_TEMPLATE.format(
                    count=take,
                    topic=topic,
                    difficulty=difficulty,
                    difficulty_guide=DIFFICULTY_GUIDE,
                    BEGIN_JSON=BEGIN_JSON,
                    END_JSON=END_JSON,
                )

                # Try call, back off on MAX_TOKENS/no-text
                try:
                    text = gemini_generate_v1(api_key, m, prompt, temperature, tok)
                except Exception as e:
                    msg = str(e)
                    if "finishReason=MAX_TOKENS" in msg or "No text in response" in msg:
                        if take > 1:
                            chunk = 1
                            continue
                        else:
                            tok = min(tok + 300, 4096)
                            text = gemini_generate_v1(api_key, m, prompt, temperature, tok)
                    else:
                        raise

                qlist = parse_questions_from_model(text)
                if not qlist:
                    # backoff parse once more
                    if take > 1:
                        chunk = 1
                        continue
                    tok = min(tok + 300, 4096)
                    text = gemini_generate_v1(api_key, m, prompt, temperature, tok)
                    qlist = parse_questions_from_model(text)
                    if not qlist:
                        raise ValueError("Gemini returned no questions (parse).")

                if m != model_name and f"Using available model: {m}" not in info_msgs:
                    info_msgs.append(f"Using available model: {m}")

                use_n = min(len(qlist), remaining)
                for q in qlist[:use_n]:
                    qtext = (q.get("question") or "").strip()
                    correct = (q.get("correct_answer") or q.get("answer") or q.get("correct") or "").strip()
                    distractors = q.get("distractors") or []
                    rationale = (q.get("rationale") or q.get("explanation") or "").strip()
                    if not qtext or not correct:
                        continue
                    distractors = _ensure_3_distractors(distractors, correct)
                    options, answer_index = _shuffle_with_correct(correct, distractors)
                    qtext, options, rationale, _ = lint_item_lengths(qtext, options, rationale)
                    items.append(QuizItem(topic=topic, question=qtext, options=options,
                                          answer_index=answer_index, explanation=rationale))

                remaining -= use_n
                chunk = min(chunk, remaining) if remaining > 0 else chunk

            return items, (" ".join(info_msgs) if info_msgs else f"Generated {len(items)} item(s) via Gemini.")
        except Exception as e:
            last_err = e
            continue

    info_msgs.append(f"Gemini error: {last_err}. Falling back to local generator.")
    items = [make_quiz_item_local(topic, difficulty) for _ in range(count)]
    return items, " ".join(info_msgs)

# ---------- FFmpeg assembly ----------
def _save_png(img: Image.Image, path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)
    img.save(str(path), format="PNG", optimize=True)

def _ffmpeg_build(out_path: Path, question_png: Path, reveal_png: Path, cta_png: Path,
                  q_sec=6.0, r_sec=4.3, c_sec=2.9):
    cmd = [
        "ffmpeg","-y",
        "-loglevel","error",
        "-threads","1",
        "-loop","1","-t",f"{q_sec}","-i",str(question_png),
        "-loop","1","-t",f"{r_sec}","-i",str(reveal_png),
        "-loop","1","-t",f"{c_sec}","-i",str(cta_png),
        "-filter_complex",
        (
          f"[0:v]scale={W}:{H},setsar=1[v0];"
          f"[1:v]scale={W}:{H},setsar=1[v1];"
          f"[2:v]scale={W}:{H},setsar=1[v2];"
          f"[v0][v1][v2]concat=n=3:v=1:a=0,format=yuv420p[v]"
        ),
        "-map","[v]","-r", str(FPS),
        "-c:v","libx264","-preset","veryfast",
        "-b:v", BITRATE,"-maxrate", BITRATE,"-bufsize", BITRATE,
        "-movflags","+faststart","-g", str(FPS*2),
        str(out_path)
    ]
    subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)

def build_quiz_video(item: QuizItem, out_path: Path) -> Path:
    q_img = _question_slide(item.question, item.options[:4])
    r_img = _reveal_slide(item.options[item.answer_index], item.explanation)
    c_img = _cta_slide()
    base = TMP_DIR / f"vid_{int(time.time()*1000)}_{random.randint(1000,9999)}"
    q_png, r_png, c_png = base.with_suffix(".q.png"), base.with_suffix(".r.png"), base.with_suffix(".c.png")
    _save_png(q_img, q_png); _save_png(r_img, r_png); _save_png(c_img, c_png)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    _ffmpeg_build(out_path, q_png, r_png, c_png)
    for p in [q_png, r_png, c_png]:
        try: p.unlink(missing_ok=True)
        except: pass
    return out_path

def _slugify(text: str) -> str:
    s = text.lower().strip().replace(" ", "-")
    allowed = set(string.ascii_lowercase + string.digits + "-")
    s = "".join(ch for ch in s if ch in allowed)
    return s or f"item-{int(time.time())}"

# ---------- Gradio callbacks ----------
def _seed_rng(): random.seed(time.time_ns() % (2**32 - 1))

def generate_preview_ai(topic: str, difficulty: int, count: int, api_key: str, model_name: str,
                        temperature: float = 0.6, max_output_tokens: int = 1600):
    topic = (topic or "").strip()
    if not topic:
        return pd.DataFrame([]), "Please enter a topic.", [], ""
    difficulty = int(max(1, min(10, difficulty)))
    count = int(max(1, min(10, count)))
    _seed_rng()

    items, info_msg = generate_questions_gemini(api_key, model_name, topic, difficulty, count,
                                                temperature=temperature, max_output_tokens=max_output_tokens)

    rows = []
    for i, it in enumerate(items, 1):
        q2, opts2, exp2, flags = lint_item_lengths(it.question, it.options[:4], it.explanation, add_ellipsis=False)
        q_show  = q2 + (CLIP_FLAG if flags["Q"] else "")
        a_show  = opts2[0] + (CLIP_FLAG if flags["A"] else "")
        b_show  = opts2[1] + (CLIP_FLAG if flags["B"] else "")
        c_show  = opts2[2] + (CLIP_FLAG if flags["C"] else "")
        d_show  = opts2[3] + (CLIP_FLAG if flags["D"] else "")
        exp_show= exp2 + (CLIP_FLAG if flags["EXP"] else "")

        rows.append({
            "#": i, "Question": q_show,
            "A": a_show, "B": b_show, "C": c_show, "D": d_show,
            "Correct": ["A","B","C","D"][it.answer_index],
            "Explanation": exp_show,
        })

    df = pd.DataFrame(rows)
    status = info_msg or f"Generated {len(rows)} quiz item(s). Review below."
    return df, status, rows, topic

def confirm_and_produce(items_table, topic: str):
    if items_table is None:
        return "No items to render. Please generate a preview first.", []
    if hasattr(items_table, "to_dict"):
        rows = items_table.to_dict(orient="records")
    elif isinstance(items_table, list):
        rows = items_table
    else:
        rows = []
    if not rows:
        return "No items to render. Please generate a preview first.", []

    saved_paths = []
    for idx, raw in enumerate(rows, 1):
        def _clean(s):
            return (s or "").replace(CLIP_FLAG, "").strip()

        options = [_clean(raw.get("A","")), _clean(raw.get("B","")),
                   _clean(raw.get("C","")), _clean(raw.get("D",""))]
        correct_letter = str(raw.get("Correct","A")).strip().upper()
        answer_index = {"A":0,"B":1,"C":2,"D":3}.get(correct_letter, 0)

        q = _clean(raw.get("Question",""))
        exp = _clean(raw.get("Explanation",""))

        q, options, exp, _ = lint_item_lengths(q, options, exp, add_ellipsis=True)


        qi = QuizItem(topic=topic or "", question=q, options=options,
                      answer_index=answer_index, explanation=exp)

        slug = _slugify(f"{topic}-{idx}")
        out_path = OUT_DIR / f"myai101_{slug}.mp4"
        try:
            build_quiz_video(qi, out_path)
            saved_paths.append(str(out_path))
        except Exception as e:
            saved_paths.append(f"ERROR: {e}")
        gc.collect()

    msg = f"Done. Produced {len(saved_paths)} video(s)."
    files = [p for p in saved_paths if Path(p).suffix.lower()==".mp4" and Path(p).exists()]
    return msg, files

_empty_df = pd.DataFrame(columns=["#", "Question", "A", "B", "C", "D", "Correct", "Explanation"])
def _make_table():
    try:
        return gr.Dataframe(
            headers=list(_empty_df.columns),
            datatype=["number","str","str","str","str","str","str","str"],
            row_count=(1, "dynamic"),
            col_count=(8, "fixed"),
            wrap=True,
            label="You can edit cells before confirming to tweak wording.",
            interactive=True,
        )
    except TypeError:
        return gr.Dataframe(
            value=_empty_df,
            headers=list(_empty_df.columns),
            wrap=True,
            label="You can edit cells before confirming to tweak wording.",
            interactive=True,
        )

# ---------- UI ----------
with gr.Blocks(title="MyAI101 — Quiz Video Maker (Gemini REST, No MoviePy, 1080x1920)") as demo:
    gr.Markdown("# MyAI101 — Quiz Video Maker (Gemini, **REST**, no MoviePy, 1080×1920)")

    with gr.Row():
        topic_inp = gr.Textbox(label="Topic", placeholder="e.g. Backpropagation, SSL certificates, Photosynthesis")
    with gr.Row():
        diff_inp  = gr.Slider(1, 10, value=5, step=1, label="Difficulty (1 = child <10, 10 = professional)")
        count_inp = gr.Slider(1, 10, value=3, step=1, label="How many questions / videos to create")

    gr.Markdown("### AI Generation Settings (Gemini)")
    with gr.Row():
        api_key_inp = gr.Textbox(label="Gemini API Key", placeholder="Paste your Google AI Studio API key", type="password")
        model_inp   = gr.Dropdown(choices=["gemini-2.5-flash", "gemini-1.5-flash-001", "gemini-1.5-pro-001"], value="gemini-2.5-flash", label="Model")
        list_btn    = gr.Button("🔎 List Available Models")

    with gr.Row():
        temp_inp    = gr.Slider(0.0, 1.0, value=0.6, step=0.1, label="Temperature (creativity)")
        max_tok_inp = gr.Slider(200, 4096, value=1600, step=100, label="Max output tokens")

    with gr.Row():
        preview_btn = gr.Button("🧠 Generate Preview (AI)")
        regen_btn   = gr.Button("↻ Regenerate")

    gr.Markdown("### Preview: Questions & Answers")
    preview_state = gr.State([])   # Python list (for safety across Gradio versions)
    topic_state   = gr.State("")   # store topic used for preview

    table  = _make_table()
    status = gr.Markdown(visible=True)

    with gr.Row():
        confirm_btn = gr.Button("✅ Confirm & Produce Videos", variant="primary")
    out_msg   = gr.Markdown()
    out_files = gr.Files(label="Rendered MP4s")

    # Callbacks
    def _on_list_models(api_key):
        if not api_key:
            return gr.update(), "Enter API key first."
        try:
            mods = list_models_v1(api_key)
            choices = filter_generate_content_models(mods)
            if not choices:
                return gr.update(choices=[], value=None), "No generateContent-capable models found for this key."
            # Prefer newer flash if present
            default = ("gemini-2.5-flash" if "gemini-2.5-flash" in choices
                       else "gemini-1.5-flash-001" if "gemini-1.5-flash-001" in choices
                       else choices[0])
            return gr.update(choices=choices, value=default), f"Found {len(choices)} model(s)."
        except Exception as e:
            return gr.update(), f"Listing failed: {e}"

    def _on_preview(topic, diff, count, api_key, model_name, temperature, max_tokens):
        df, msg, rows, used_topic = generate_preview_ai(topic, int(diff), int(count), api_key, model_name, float(temperature), int(max_tokens))
        if not hasattr(df, "to_dict"):
            df = pd.DataFrame(df)
        return df, msg, rows, used_topic

    def _on_confirm(current_table, topic_used):
        return confirm_and_produce(current_table, topic_used)

    list_btn.click(_on_list_models, inputs=[api_key_inp], outputs=[model_inp, status])
    preview_btn.click(_on_preview, inputs=[topic_inp, diff_inp, count_inp, api_key_inp, model_inp, temp_inp, max_tok_inp], outputs=[table, status, preview_state, topic_state])
    regen_btn.click(_on_preview,   inputs=[topic_inp, diff_inp, count_inp, api_key_inp, model_inp, temp_inp, max_tok_inp], outputs=[table, status, preview_state, topic_state])
    confirm_btn.click(_on_confirm, inputs=[table, topic_state], outputs=[out_msg, out_files])

# --- Launch (print URLs) ---
gr.close_all(); gc.collect()
res = demo.launch(share=True, inbrowser=False, inline=False, show_error=True, debug=True, prevent_thread_lock=True)
try:
    print("Local URL:", getattr(res, "local_url", None) or res.local_url)
    print("Public URL:", getattr(res, "share_url", None) or res.share_url)
except Exception:
    pass


Closing server running on port: 7860
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://e363bfe9a919d53253.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://e363bfe9a919d53253.gradio.live
