<a href="https://colab.research.google.com/github/adithyarajagopal/text-video-editing-app/blob/main/Video_creation_notebook%2C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
os.environ["OPENROUTER_API_KEY"] = "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"
os.environ["FAL_KEY"] = "eb73d098-fadf-48ec-bb7c-7826e54c3f6e:f4708c1bd653f315805631334d31167a"

In [None]:
pip install fal-client


Collecting fal-client
  Downloading fal_client-0.8.0-py3-none-any.whl.metadata (3.5 kB)
Downloading fal_client-0.8.0-py3-none-any.whl (10 kB)
Installing collected packages: fal-client
Successfully installed fal-client-0.8.0


**Hook + DEMO format**
Note - the following comments are just for general understanding, read the code for better understanding of each pipeline and how it works

It takes a script and storyboard hints as input, splits them into scenes and builds a structured manifest describing each shot.

Then it auto-generates keyframes via FAL SD v3.5 and B-roll videos via Veo/WAN models.

All clips are normalized, trimmed, and stitched into a 1080×1920 video-only preview.

Outputs include the storyboard, manifest JSON, keyframes, and final stitched video for review.

In [None]:


import os, re, json, time, uuid, shutil, subprocess, requests, logging, base64, math, copy
from PIL import Image, ImageDraw, ImageFont
import gradio as gr

JOB_ROOT = "gradio_jobs"
os.makedirs(JOB_ROOT, exist_ok=True)

# ---------- fonts ----------
FONT_CANDIDATES = [
    "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
    "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
]
def find_font_ttf():
    for p in FONT_CANDIDATES:
        if os.path.exists(p): return p
    import glob
    f = glob.glob("/usr/share/fonts/**/*.ttf", recursive=True)
    return f[0] if f else None
FONT_PATH = find_font_ttf()

def ffmpeg_exists(): return shutil.which("ffmpeg") is not None
def shlex_quote(s): return "'" + str(s).replace("'", "'\"'\"'") + "'"
def run_cmd(cmd_list, raise_on_err=True, logger=None):
    cmd_str = " ".join(shlex_quote(a) for a in cmd_list)
    if logger: logger.info("RUN: "+cmd_str)
    p = subprocess.run(cmd_list, capture_output=True, text=True)
    if logger and p.stdout.strip(): logger.debug("STDOUT: "+p.stdout.strip())
    if logger and p.stderr.strip(): logger.debug("STDERR: "+p.stderr.strip())
    if p.returncode!=0:
        msg=f"Command failed: {cmd_str}\nSTDOUT:\n{p.stdout}\nSTDERR:\n{p.stderr}"
        if logger: logger.error(msg)
        if raise_on_err: raise RuntimeError(msg)
    return p.stdout

def draw_text_wrapped(draw, text, font, max_width):
    text = str(text or ""); words = text.split(); lines=[]; cur=""
    for w in words:
        cand=(cur+" "+w).strip(); bbox=draw.textbbox((0,0), cand, font=font)
        if (bbox[2]-bbox[0])>max_width and cur: lines.append(cur); cur=w
        else: cur=cand
    if cur: lines.append(cur)
    return lines

def _esc(s:str)->str: return str(s or "").replace('"','\\"')

def ffmpeg_drawtext_escape(s: str) -> str:
    s = str(s or "")
    s = s.replace("\\", "\\\\").replace(":", "\\:").replace("%", "\\%").replace("'", "\\'")
    s = s.replace("\n", " ").replace("\r", " ")
    return s

RULES = {
    "format": "hook_demo",
    "video": {
        "aspect": "9:16",
        "beats": [
            {"name":"hook", "min":1.5, "max":3.0},
            {"name":"proof","min":2.5, "max":4.0},
            {"name":"cta",  "min":1.5, "max":2.5}
        ],
        "broll_snap_choices": [3,4,5]
    },
    "text": {"max_words_per_caption": 8}
}
def _short_caption_ok(txt:str)->bool: return len((txt or "").split()) <= RULES["text"]["max_words_per_caption"]
def _choose_broll_len(intent:str)->int:
    if intent == "proof": return 3
    if intent == "cta":   return 2
    return 3

PROMPT_STYLE = {
    "lighting": "soft window light from the left; clean studio key; high micro-contrast",
    "color":    "neutral gray palette; natural skin tones",
    "constraints": "vertical 9:16; camera locked; avoid zoom-only shots; do not invent logos or on-screen text; no extra people"
}
def compose_veo_prompt(scene: dict) -> str:
    intent = (scene.get("intent") or "").lower()
    text   = scene.get("on_screen_text","")
    gen_s  = int(scene.get("gen_sec", 4))
    base   = f"Vertical 9:16. {PROMPT_STYLE['lighting']}. {PROMPT_STYLE['color']}. {PROMPT_STYLE['constraints']}."
    if intent == "proof":
        return (f"{base} Top-down macro product demo on a clean tabletop. "
                f"0–0.8s first drops hit filter (tiny droplets), "
                f"0.8–2.6s bloom expands with small bubbles, "
                f"2.6–{gen_s:.1f}s one clockwise swirl then surface becomes glossy-smooth. "
                f"On-screen guide: '{text}'.")
    if intent == "cta":
        return (f"{base} Pack hero centered on matte stone with two small props. "
                f"One gentle steam curl rises, then complete stillness for CTA readability. "
                f"Label remains crisp. On-screen guide: '{text}'.")
    return f"{base} Clean product shot; on-screen guide: '{text}'."

def compose_wan_motion(scene: dict) -> str:
    intent = (scene.get("intent") or "").lower()
    if intent == "proof":
        return ("0–1.0s powder plume blooms; 1.0–3.0s gentle clockwise swirl; 3.0–5.0s surface mirror-smooth. Camera locked.")
    if intent == "cta":
        return ("0–2.0s subtle particles drift behind the pack; 2.0–5.0s still for CTA legibility. Camera locked.")
    return ("Subtle breathing motion only; camera locked; no zoom; no new objects; same composition.")

# ---------- LLM prompt for planner ----------
LLM_PROMPT_TEMPLATE = r"""
SYSTEM: Return ONLY JSON. No prose. You are a Video Planner for short vertical ads (Hook+Demo).
USER:
- ad_format: "Hook+Demo"
- script: "[[SCRIPT]]"
- storyboard_hint: "[[HINT]]"
Task: Split by meaning (hook, proof, cta). Output scenes usable for generation.
Rules:
- duration_s per scene in [1.2,6.0]; total <= 30.
- First scene is A-roll hook if script has a hook line.
- provider: "heygen" for A-roll, "veo" for B-roll.
- For B-roll include concise "recommended_keyframe_prompt".
Schema:
{"ad_format":"Hook+Demo","script_summary":"one-liner","scenes":[{"id":"s1","role":"A-roll|B-roll","intent":"hook|proof|cta","summary":"one line","dialogue":"(if A-roll)","on_screen_text":"","entities":["..."],"duration_s":0.0,"start_s":0.0,"recommended_keyframe_prompt":"(if B-roll)","provider":"heygen|veo"}]}
"""
LLM_PROMPT_RETRY = r"""
SYSTEM: Return ONLY JSON. No prose.
USER:
Earlier output wasn’t valid JSON. Try again STRICTLY with the same schema. JSON only; durations in [1.2,6.0].
"""

def extract_json_from_text(text):
    m = re.search(r'\{.*\}\s*$', text, flags=re.S)
    if not m: m = re.search(r'\{[\s\S]*\}', text)
    if not m: raise ValueError("No JSON object found in LLM output.")
    return json.loads(m.group(0))

def call_openrouter_planner_failover(script, hint, out_dir, logger=None):
    key = os.environ.get("OPENROUTER_API_KEY")
    if not key: raise RuntimeError("OPENROUTER_API_KEY not set")
    from openai import OpenAI
    client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=key)
    def _build(t): return t.replace("[[SCRIPT]]", _esc(script)).replace("[[HINT]]", _esc(hint))
    p1, p2 = _build(LLM_PROMPT_TEMPLATE), _build(LLM_PROMPT_RETRY)
    candidates = [
        ("openai/gpt-5",               p1), ("openai/gpt-5",               p2),
        ("anthropic/claude-sonnet-4",  p1), ("anthropic/claude-sonnet-4",  p2),
        ("anthropic/claude-3.5-sonnet",p1), ("anthropic/claude-3.5-sonnet",p2),
        ("openai/gpt-4o",              p1), ("openai/gpt-4o",              p2),
        ("google/gemini-1.5-pro-002",  p1), ("google/gemini-1.5-pro-002",  p2),
    ]
    last_err=None
    for model, prompt in candidates:
        try:
            resp = client.chat.completions.create(
                model=model,
                messages=[{"role":"system","content":"Return ONLY JSON. No prose."},
                          {"role":"user","content":prompt}],
                temperature=0.0, max_tokens=1200
            )
            text = (resp.choices[0].message.content or "")
            raw_path = os.path.join(out_dir, f"planner_raw_{model.replace('/','_')}.txt")
            with open(raw_path,"w",encoding="utf-8") as fh: fh.write(text)
            if not text.strip(): last_err = ValueError(f"{model} empty"); continue
            try:
                _ = extract_json_from_text(text)
                return text
            except Exception as ve:
                last_err=ve
        except Exception as e:
            last_err=e
    raise RuntimeError(f"Planner failed: {last_err}")

def deterministic_fallback_manifest(script, hint):
    parts=[p.strip() for p in str(script).split('.') if p.strip()]
    hook = parts[0] if parts else str(script)[:60]
    scenes=[
        {"id":"s1","role":"A-roll","intent":"hook","summary":hook,"dialogue":hook,"on_screen_text":"Protein tastes like chalk?","entities":["creator_face"],"duration_s":0,"start_s":0,"provider":"heygen"},
        {"id":"s2","role":"B-roll","intent":"proof","summary":"Pour → swirl → smooth surface","on_screen_text":"25g protein • Mixes smooth • Rich chocolate","duration_s":0,"start_s":0,"recommended_keyframe_prompt":"Top-down macro pour into shaker, swirl, glossy surface, photorealistic, 9:16","provider":"veo"},
        {"id":"s3","role":"B-roll","intent":"cta","summary":"Pack hero, calm hold","on_screen_text":"Try Chocolate Fudge Today","duration_s":0,"start_s":0,"recommended_keyframe_prompt":"Pack hero on matte stone, soft top light, warm rim, neutral gray background, 9:16","provider":"veo"},
    ]
    return {"ad_format":"Hook+Demo","script_summary":hook,"scenes":scenes,"notes":"fallback"}

def infer_wps_from_storyboard(hint:str):
    s=(hint or "").lower()
    if any(k in s for k in ["fast","quick","snappy","rapid","montage"]): return 4.4
    if any(k in s for k in ["slow","calm","relaxed","gentle","long holds","long takes"]): return 2.0
    return 3.6

def estimate_durations_from_manifest(manifest, storyboard_hint, logger=None):
    scenes=manifest.get("scenes",[]); wps=infer_wps_from_storyboard(storyboard_hint)
    voiced, nonvo=[],[]
    for i,sc in enumerate(scenes):
        text=sc.get("dialogue") or sc.get("voice_over",{}).get("text","")
        words=len(str(text).split()); (voiced if words>0 else nonvo).append(i)
    min_scene,max_scene=1.2,5.0; durs={}
    for i in voiced:
        words=len(str(scenes[i].get("dialogue") or scenes[i].get("voice_over",{}).get("text","")).split())
        dur=round(words/wps+0.2,2); durs[i]=max(min_scene,min(max_scene,dur))
    for i in nonvo: durs[i]=2.0
    total=sum(durs.values()); total=min(max(total,10.0),30.0)
    ssum=sum(durs.values()); diff=round(total-ssum,2)
    if abs(diff)>=0.01 and scenes:
        last=list(durs.keys())[-1]; durs[last]=round(max(min_scene,min(max_scene,durs[last]+diff)),2)
    cur=0.0
    for i,sc in enumerate(scenes):
        d=float(durs.get(i,2.0)); sc["duration_s"]=d; sc["start_s"]=round(cur,2); cur+=d
    manifest["total_length_s"]=round(cur,2); return manifest

def enforce_platform_rules(manifest, broll_only=False, logger=None):
    scenes = manifest.get("scenes", [])
    if not scenes: raise ValueError("no scenes")
    intents=[s.get("intent","").lower() for s in scenes]; roles=[s.get("role","").lower() for s in scenes]

    if not broll_only and not (intents and intents[0]=="hook" and roles[0].startswith("a")):
        hook_txt = manifest.get("script_summary") or "Stop the chalky shakes."
        scenes.insert(0, {"id":"s_hook","role":"A-roll","intent":"hook","summary":hook_txt,"dialogue":hook_txt,"on_screen_text":"Protein tastes like chalk?","entities":["creator_face","pack"],"provider":"heygen","duration_s":2.5,"start_s":0.0})

    if "proof" not in [s.get("intent") for s in scenes]:
        scenes.insert(0 if broll_only else 1, {"id":"s_proof","role":"B-roll","intent":"proof","summary":"Pour → swirl → smooth surface","on_screen_text":"25g protein • Mixes smooth","recommended_keyframe_prompt":"Top-down macro pour, swirl, glossy surface, 9:16","provider":"veo","duration_s":3.0})

    if "cta" not in [s.get("intent") for s in scenes]:
        scenes.append({"id":"s_cta","role":"B-roll","intent":"cta","summary":"Pack hero, calm hold","on_screen_text":"Tap to try today","recommended_keyframe_prompt":"Pack hero on matte stone, neutral gray, 9:16","provider":"veo","duration_s":2.0})

    t=0.0; out=[]
    for s in scenes:
        intent=s.get("intent","").lower(); role=s.get("role","").lower()
        if broll_only and role.startswith("a"): continue  # skip A-roll when B-rolls only

        if not _short_caption_ok(s.get("on_screen_text","")):
            s["on_screen_text"]=" ".join(s["on_screen_text"].split()[:RULES["text"]["max_words_per_caption"]])

        if role.startswith("a") and intent=="hook":
            dur=min(max(2.0, s.get("duration_s",2.5)), 3.0)
        elif intent=="cta":
            dur=min(max(1.8, s.get("duration_s",2.0)), 2.5)
        else:
            dur=float(_choose_broll_len(intent))

        s["duration_s"]=round(dur,2); s["start_s"]=round(t,2); t+=s["duration_s"]

        if role.startswith("a"):
            s["provider"]="heygen"
        else:
            if s["duration_s"]==3.0:
                s["provider"]="veo"; s["gen_sec"]=4; s["trim_sec"]=3
            elif s["duration_s"] in (4.0,5.0):
                s["provider"]="wan"; s["gen_sec"]=5; s["trim_sec"]=int(s["duration_s"])
            else:
                s["provider"]="veo"; s["gen_sec"]=4; s["trim_sec"]=int(s["duration_s"])

        out.append(s)

    manifest["scenes"]=out
    manifest["total_length_s"]=round(t,2)
    return manifest

# ---------- keyframes ----------
def make_keyframe_placeholder(prompt_text, scene_summary, metadata_text, outpath, size=(1080,1920), bgcolor=(28,28,36)):
    img=Image.new("RGB",size,bgcolor); draw=ImageDraw.Draw(img)
    try:
        title_font=ImageFont.truetype(FONT_PATH,36) if FONT_PATH else ImageFont.load_default()
        meta_font=ImageFont.truetype(FONT_PATH,18) if FONT_PATH else ImageFont.load_default()
    except Exception:
        title_font=ImageFont.load_default(); meta_font=ImageFont.load_default()
    lines=draw_text_wrapped(draw,(prompt_text or scene_summary or "B-roll"),title_font,int(size[0]*0.9))
    y=int(size[1]*0.12)
    for ln in lines[:6]:
        b=draw.textbbox((0,0),ln,font=title_font); w=b[2]-b[0]; h=b[3]-b[1]
        draw.rectangle([((size[0]-w)//2-10,y-8),((size[0]+w)//2+10,y+h+8)],fill=(0,0,0,180))
        draw.text(((size[0]-w)/2,y),ln,font=title_font,fill=(255,230,180)); y+=h+10
    meta_lines=draw_text_wrapped(draw,(metadata_text or ""),meta_font,int(size[0]*0.9))
    y2=size[1]-200; draw.rectangle([(40,y2-10),(size[0]-40,size[1]-40)],fill=(0,0,0,180))
    ty=y2+6
    for ln in meta_lines[:8]:
        b=draw.textbbox((0,0),ln,font=meta_font); w=b[2]-b[0]; h=b[3]-b[1]
        draw.text(((size[0]-w)/2,ty),ln,font=meta_font,fill=(230,230,230)); ty+=h+6
    os.makedirs(os.path.dirname(outpath),exist_ok=True); img.save(outpath,format="JPEG",quality=90)
    return outpath

def generate_keyframe_fal(prompt, out_path, negative_prompt=None, logger=None):
    if not os.environ.get("FAL_KEY"):
        if logger: logger.warning("FAL_KEY not set; using placeholder keyframe")
        return make_keyframe_placeholder(prompt, prompt, "no FAL_KEY", out_path)
    try:
        import fal_client
    except Exception:
        if logger: logger.warning("fal_client not installed; using placeholder keyframe")
        return make_keyframe_placeholder(prompt, prompt, "no fal_client", out_path)

    if logger: logger.info("Calling FAL SD v3.5 Large for keyframe")
    def on_queue_update(update):
        try:
            from fal_client import InProgress
            if isinstance(update, InProgress) and getattr(update,"logs",None):
                for log in update.logs:
                    msg=log.get("message","");
                    if logger and msg: logger.info("[FAL] "+msg)
        except Exception: pass
    args={"prompt":prompt,"negative_prompt":negative_prompt or "blurry, watermark, text, lowres",
          "num_inference_steps":24,"guidance_scale":3.0,"num_images":1,"enable_safety_checker":True,
          "output_format":"jpeg","image_size":{"width":1080,"height":1920}}
    import fal_client
    result=fal_client.subscribe("fal-ai/stable-diffusion-v35-large",arguments=args,with_logs=True,on_queue_update=on_queue_update)
    images=result.get("images",[])
    if not images: return make_keyframe_placeholder(prompt, prompt, "fal sd no image", out_path)
    url=images[0].get("url")
    if not url: return make_keyframe_placeholder(prompt, prompt, "fal sd no url", out_path)
    r=requests.get(url,timeout=120); r.raise_for_status()
    os.makedirs(os.path.dirname(out_path),exist_ok=True)
    with open(out_path,"wb") as fh: fh.write(r.content)
    if logger: logger.info("Saved FAL keyframe: "+out_path)
    return out_path

def trim_video(inp, outp, keep_sec, logger=None):
    run_cmd([
        "ffmpeg","-y","-hide_banner","-loglevel","error",
        "-fflags","+genpts","-i",inp,
        "-ss","0","-t",str(keep_sec),
        "-r","30","-vsync","cfr","-pix_fmt","yuv420p",
        "-c:v","libx264","-preset","veryfast","-crf","18",
        "-an","-movflags","+faststart",
        outp
    ], logger=logger)
    return outp

def still_from_image(img_path, outmp4, dur_sec, logger=None):
    run_cmd([
        "ffmpeg","-y","-hide_banner","-loglevel","error",
        "-loop","1","-t",str(dur_sec),"-i",img_path,
        "-vf","scale=1080:1920:force_original_aspect_ratio=cover,crop=1080:1920",
        "-r","30","-pix_fmt","yuv420p",
        "-c:v","libx264","-preset","veryfast","-crf","18",
        "-an", outmp4
    ], logger=logger)
    return outmp4

def overlay_caption(inp, outp, text, logger=None):
    if not text:
        shutil.copy(inp, outp); return outp
    safe_text = ffmpeg_drawtext_escape(text)
    if FONT_PATH:
        draw = (
            "drawbox=x=40:y=1650:w=1000:h=120:color=black@0.5:t=fill,"
            f"drawtext=fontfile='{FONT_PATH}':text='{safe_text}':x=60:y=1685:fontsize=52:fontcolor=white"
        )
    else:
        draw = (
            "drawbox=x=40:y=1650:w=1000:h=120:color=black@0.5:t=fill,"
            f"drawtext=text='{safe_text}':x=60:y=1685:fontsize=52:fontcolor=white"
        )
    try:
        run_cmd(["ffmpeg","-y","-hide_banner","-loglevel","error","-i",inp,"-vf",draw,
                 "-c:v","libx264","-crf","18","-preset","veryfast","-an",outp], logger=logger)
        return outp
    except Exception:
        shutil.copy(inp, outp); return outp

def normalize_9x16(inp, outp, logger=None):
    vf = "scale=1080:-2:flags=lanczos,pad=1080:1920:(1080-iw)/2:(1920-ih)/2,setsar=1"
    run_cmd([
        "ffmpeg","-y","-hide_banner","-loglevel","error",
        "-i", inp,
        "-vf", vf,
        "-r","30","-vsync","cfr","-pix_fmt","yuv420p",
        "-c:v","libx264","-preset","veryfast","-crf","18",
        "-an","-movflags","+faststart",
        outp
    ], logger=logger)
    return outp

# ---------- FAL Veo / WAN ----------
def _data_uri_from_file(path):
    ext=(os.path.splitext(path)[1] or "").lower(); mime="image/jpeg"
    if ext in [".png",".bmp",".webp"]: mime=f"image/{ext[1:]}"
    with open(path,"rb") as fh: b64=base64.b64encode(fh.read()).decode("utf-8")
    return f"data:{mime};base64,{b64}"

def gen_broll_veo_t2v(prompt, gen_sec, outmp4, logger=None):
    if not os.environ.get("FAL_KEY"):
        if logger: logger.warning("FAL_KEY not set; Veo placeholder")
        tmp=outmp4.replace(".mp4","_raw.mp4")
        run_cmd(["ffmpeg","-y","-hide_banner","-loglevel","error","-f","lavfi","-i",f"color=c=gray:s=1080x1920:d={gen_sec}",
                 "-c:v","libx264","-preset","veryfast","-crf","18","-an",tmp], logger=logger)
        overlay_caption(tmp, outmp4, " ".join((prompt or "").split()[:8]), logger=logger)
        return outmp4
    try:
        import fal_client
    except Exception:
        if logger: logger.warning("fal_client missing; Veo placeholder")
        tmp=outmp4.replace(".mp4","_raw.mp4")
        run_cmd(["ffmpeg","-y","-hide_banner","-loglevel","error","-f","lavfi","-i",f"color=c=gray:s=1080x1920:d={gen_sec}",
                 "-c:v","libx264","-preset","veryfast","-crf","18","-an",tmp], logger=logger)
        overlay_caption(tmp, outmp4, " ".join((prompt or "").split()[:8]), logger=logger)
        return outmp4

    if logger: logger.info("Calling FAL Veo3 text->video")
    def on_queue_update(update):
        try:
            from fal_client import InProgress
            if isinstance(update, InProgress) and getattr(update,"logs",None):
                for log in update.logs:
                    msg=log.get("message","")
                    if logger and msg: logger.info("[FAL Veo3] "+msg)
        except Exception: pass

    args = {
        "prompt": prompt or "Clean product macro demo, studio soft-key, vertical 9:16",
        "aspect_ratio": "9:16",
        "duration": f"{gen_sec}s" if gen_sec in (4,6,8) else "4s",
        "resolution": "1080p",  # <--- was 720p; now 1080p to match our pipeline
        "generate_audio": False,
        "enhance_prompt": True,
        "auto_fix": True,
        "negative_prompt": "watermark, logo, overlaid text, nsfw, extra hands, duplicated objects"
    }
    import fal_client
    try:
        res = fal_client.subscribe("fal-ai/veo3", arguments=args, with_logs=True, on_queue_update=on_queue_update)
        vid = (res.get("video") or {}).get("url")
        if not vid: raise RuntimeError("Veo no URL")
        r=requests.get(vid,timeout=600); r.raise_for_status()
        with open(outmp4,"wb") as fh: fh.write(r.content)
        if logger: logger.info("Saved Veo3 T2V: "+outmp4)
        return outmp4
    except Exception as e:
        if logger: logger.warning(f"Veo error -> placeholder: {e}")
        tmp=outmp4.replace(".mp4","_raw.mp4")
        run_cmd(["ffmpeg","-y","-hide_banner","-loglevel","error","-f","lavfi","-i",f"color=c=gray:s=1080x1920:d={gen_sec}",
                 "-c:v","libx264","-preset","veryfast","-crf","18","-an",tmp], logger=logger)
        overlay_caption(tmp, outmp4, " ".join((prompt or "").split()[:8]), logger=logger)
        return outmp4

def gen_broll_wan_i2v(keyframe_img, motion_prompt, gen_sec, outmp4, logger=None):
    if not os.environ.get("FAL_KEY"):
        if logger: logger.warning("FAL_KEY not set; WAN placeholder")
        return still_from_image(keyframe_img, outmp4, gen_sec, logger=logger)
    try:
        import fal_client
    except Exception:
        if logger: logger.warning("fal_client missing; WAN placeholder")
        return still_from_image(keyframe_img, outmp4, gen_sec, logger=logger)

    if logger: logger.info("Calling FAL WAN 2.5 image->video")
    def on_queue_update(update):
        try:
            from fal_client import InProgress
            if isinstance(update, InProgress) and getattr(update,"logs",None):
                for log in update.logs:
                    msg=log.get("message","")
                    if logger and msg: logger.info("[FAL WAN] "+msg)
        except Exception: pass

    args = {
        "prompt": motion_prompt or "Subtle breathing motion; vertical 9:16",
        "image_url": _data_uri_from_file(keyframe_img),
        "resolution": "1080p",
        "duration": "5",
        "negative_prompt": "low resolution, watermark, overlaid text, extra limbs, worst quality, artifacts",
        "enable_prompt_expansion": True
    }
    import fal_client
    try:
        res = fal_client.subscribe("fal-ai/wan-25-preview/image-to-video", arguments=args, with_logs=True, on_queue_update=on_queue_update)
    except Exception:
        res = fal_client.subscribe("fal-ai/wan-25-preview/image-to-video", arguments={"input": args}, with_logs=True, on_queue_update=on_queue_update)

    video_obj = res.get("video") or (res.get("data",{}) if isinstance(res.get("data"),dict) else {}).get("video")
    url = (video_obj or {}).get("url") or (((res.get("data") or {}).get("video")) or {}).get("url")
    if not url:
        if logger: logger.warning("WAN no URL -> placeholder")
        return still_from_image(keyframe_img, outmp4, gen_sec, logger=logger)
    r=requests.get(url,timeout=900); r.raise_for_status()
    with open(outmp4,"wb") as fh: fh.write(r.content)
    if logger: logger.info("Saved WAN I2V: "+outmp4)
    return outmp4

# ---------- concat (video-only) ----------
def concat_videos(clip_paths, out_path, logger=None):
    inputs=[]; streams=[]
    for i,p in enumerate(clip_paths):
        inputs += ["-fflags","+genpts","-i", p]
        streams.append(f"[{i}:v]")
    filt = "".join(streams) + f"concat=n={len(clip_paths)}:v=1:a=0[v]"
    run_cmd([
        "ffmpeg","-y","-hide_banner","-loglevel","error",
        *inputs,
        "-filter_complex",filt,"-map","[v]",
        "-r","30","-vsync","cfr","-pix_fmt","yuv420p",
        "-c:v","libx264","-preset","veryfast","-crf","18",
        "-an","-movflags","+faststart",
        out_path
    ], logger=logger)

# ---------- main orchestration ----------
def plan_and_generate(script, storyboard_hint, creator_img_file, use_openrouter, use_fal_keyframes, gen_video, broll_only):
    job_id=f"job_{int(time.time())}_{uuid.uuid4().hex[:6]}"
    job_dir=os.path.join(JOB_ROOT, job_id); os.makedirs(job_dir, exist_ok=True)

    logger=logging.getLogger(job_id); logger.setLevel(logging.DEBUG)
    if not logger.handlers:
        fh=logging.FileHandler(os.path.join(job_dir,"log.txt"),mode="a",encoding="utf-8"); fh.setLevel(logging.DEBUG)
        sh=logging.StreamHandler(); sh.setLevel(logging.INFO)
        fmt=logging.Formatter("%(asctime)s %(levelname)s: %(message)s","%Y-%m-%d %H:%M:%S")
        fh.setFormatter(fmt); sh.setFormatter(fmt); logger.addHandler(fh); logger.addHandler(sh)

    logger.info("Job start: "+job_id)
    logger.debug("Script: "+(script[:1000]+("...[truncated]" if len(script)>1000 else "")))
    logger.debug("Storyboard: "+str(storyboard_hint))

    # plan
    if use_openrouter and os.environ.get("OPENROUTER_API_KEY"):
        try:
            raw = call_openrouter_planner_failover(script, storyboard_hint, job_dir, logger=logger)
            manifest = extract_json_from_text(raw)
        except Exception:
            logger.exception("Planner failed; using fallback")
            manifest = deterministic_fallback_manifest(script, storyboard_hint)
    else:
        manifest = deterministic_fallback_manifest(script, storyboard_hint)

    if not isinstance(manifest, dict) or not manifest.get("scenes"):
        logger.warning("Planner returned no scenes; using deterministic fallback.")
        manifest = deterministic_fallback_manifest(script, storyboard_hint)

    manifest = estimate_durations_from_manifest(manifest, storyboard_hint, logger=logger)
    storyboard_manifest = copy.deepcopy(manifest)

    try:
        manifest = enforce_platform_rules(manifest, broll_only=broll_only, logger=logger)
    except ValueError:
        logger.warning("Empty scenes at enforcement; retrying with fallback.")
        manifest = deterministic_fallback_manifest(script, storyboard_hint)
        manifest = estimate_durations_from_manifest(manifest, storyboard_hint, logger=logger)
        manifest = enforce_platform_rules(manifest, broll_only=broll_only, logger=logger)

    with open(os.path.join(job_dir,"manifest.json"),"w") as fh: json.dump(manifest, fh, indent=2)
    with open(os.path.join(job_dir,"storyboard_pre_rules.json"),"w") as fh: json.dump(storyboard_manifest, fh, indent=2)

    # keyframes
    keyframe_paths=[]
    for sc in manifest.get("scenes",[]):
        sid = sc.get("id","s_"+uuid.uuid4().hex[:6])
        prompt = (sc.get("recommended_keyframe_prompt") or sc.get("summary") or "B-roll").strip()
        img_out = os.path.join(job_dir,f"{sid}_kf.jpg")
        is_broll = sc.get("role","").lower().startswith("b")
        if is_broll and use_fal_keyframes and os.environ.get("FAL_KEY"):
            try: generate_keyframe_fal(prompt,img_out,negative_prompt="blurry, watermark, text",logger=logger)
            except Exception:
                logger.exception("Keyframe FAL failed; placeholder")
                make_keyframe_placeholder(prompt,sc.get("summary",""),"",img_out)
        else:
            make_keyframe_placeholder(prompt,sc.get("summary",""),"",img_out)
        keyframe_paths.append(img_out)

    clips_for_download=[]
    final_video=None

    if gen_video:
        clip_paths=[]
        # Save creator image if provided (placeholder A-roll uses keyframe anyway)
        if creator_img_file is not None:
            try:
                dst=os.path.join(job_dir,"creator"+os.path.splitext(creator_img_file.name)[1])
                with open(dst,"wb") as fh: fh.write(creator_img_file.read()); logger.info("Saved creator image: "+dst)
            except Exception: logger.exception("Failed to save creator image")

        for i, sc in enumerate(manifest.get("scenes", [])):
            sid   = sc.get("id", f"s_{i}")
            role  = sc.get("role","").lower()
            intent= sc.get("intent","").lower()
            dur   = float(sc.get("duration_s", 2.0))
            provider = sc.get("provider","")
            ontext   = sc.get("on_screen_text","")
            outmp4   = os.path.join(job_dir, f"{sid}.mp4")
            key_img  = keyframe_paths[i] if i < len(keyframe_paths) else None

            if role.startswith("a"):
                # A-roll placeholder
                tmp = still_from_image(key_img or keyframe_paths[0], outmp4.replace(".mp4","_raw.mp4"), dur_sec=max(2.0, dur), logger=logger)
                overlay_caption(tmp, outmp4, ontext, logger=logger)
                if not broll_only:
                    # normalize before adding
                    norm = outmp4.replace(".mp4","_norm.mp4")
                    normalize_9x16(outmp4, norm, logger=logger)
                    clip_paths.append(norm); clips_for_download.append(norm)
                continue

            gen_sec  = int(sc.get("gen_sec", math.ceil(dur)))
            trim_sec = int(sc.get("trim_sec", int(dur)))

            if provider == "veo":
                tmp = outmp4.replace(".mp4","_raw.mp4")
                prompt = compose_veo_prompt(sc)
                try: gen_broll_veo_t2v(prompt, gen_sec=gen_sec, outmp4=tmp, logger=logger)
                except Exception:
                    logger.exception("Veo gen failed; placeholder")
                    tmp = still_from_image(key_img or keyframe_paths[0], tmp, gen_sec, logger=logger)
                trimmed = outmp4.replace(".mp4", f"_t{trim_sec}.mp4")
                trim_video(tmp, trimmed, keep_sec=trim_sec, logger=logger)
                overlay_caption(trimmed, outmp4, ontext, logger=logger)
                # normalize before adding
                norm = outmp4.replace(".mp4","_norm.mp4")
                normalize_9x16(outmp4, norm, logger=logger)
                clip_paths.append(norm); clips_for_download.append(norm)

            elif provider == "wan":
                tmp = outmp4.replace(".mp4","_raw.mp4")
                if not key_img:
                    key_img = os.path.join(job_dir, f"{sid}_auto_kf.jpg")
                    make_keyframe_placeholder(sc.get("summary","B-roll"), sc.get("summary",""), "", key_img)
                motion = compose_wan_motion(sc)
                try: gen_broll_wan_i2v(key_img, motion_prompt=motion, gen_sec=gen_sec, outmp4=tmp, logger=logger)
                except Exception:
                    logger.exception("WAN gen failed; placeholder")
                    tmp = still_from_image(key_img, tmp, gen_sec, logger=logger)
                if trim_sec < gen_sec:
                    trimmed = outmp4.replace(".mp4", f"_t{trim_sec}.mp4")
                    trim_video(tmp, trimmed, keep_sec=trim_sec, logger=logger)
                    overlay_caption(trimmed, outmp4, ontext, logger=logger)
                else:
                    overlay_caption(tmp, outmp4, ontext, logger=logger)
                # normalize before adding
                norm = outmp4.replace(".mp4","_norm.mp4")
                normalize_9x16(outmp4, norm, logger=logger)
                clip_paths.append(norm); clips_for_download.append(norm)

            else:
                tmp = still_from_image(key_img or keyframe_paths[0], outmp4.replace(".mp4","_raw.mp4"), dur, logger=logger)
                overlay_caption(tmp, outmp4, ontext, logger=logger)
                norm = outmp4.replace(".mp4","_norm.mp4")
                normalize_9x16(outmp4, norm, logger=logger)
                clip_paths.append(norm); clips_for_download.append(norm)

        if clip_paths:
            final_out=os.path.join(job_dir,"final_demo.mp4")
            concat_videos(clip_paths, final_out, logger=logger)
            final_video=final_out

    # outputs
    try:
        with open(os.path.join(job_dir,"log.txt"),"r",encoding="utf-8") as fh:
            log_text=fh.read()[-4000:]
    except Exception:
        log_text="No log file found or could not read log."

    storyboard_text = json.dumps(storyboard_manifest, indent=2)
    manifest_text   = json.dumps(manifest, indent=2)
    return storyboard_text, manifest_text, final_video, keyframe_paths, keyframe_paths, clips_for_download, log_text

def set_keys(openrouter_key, fal_key):
    if openrouter_key: os.environ["OPENROUTER_API_KEY"]=openrouter_key.strip()
    if fal_key: os.environ["FAL_KEY"]=fal_key.strip()
    ok1=("OPENROUTER_API_KEY" in os.environ); ok2=("FAL_KEY" in os.environ)
    return f"OpenRouter set: {ok1}; FAL set: {ok2}"

def show_keys_status():
    return f"OpenRouter set: {'OPENROUTER_API_KEY' in os.environ}; FAL set: {'FAL_KEY' in os.environ}"

# ---------- UI ----------
with gr.Blocks(title="ABCD-aware Scene Planner — Veo/WAN + SD Keyframes") as demo:
    gr.Markdown("**Hook+Demo** planner. Simple rules. Clean routing. Robust stitching. Toggle **B-rolls only** to generate just the B-rolls. (Video-only preview; add VO later via ElevenLabs.)")
    with gr.Accordion("Settings", open=False):
        or_key = gr.Textbox(label="OPENROUTER_API_KEY", type="password")
        fal_key = gr.Textbox(label="FAL_KEY (for Veo/WAN/SD)", type="password")
        keys_btn = gr.Button("Save Keys")
        keys_status = gr.Markdown()
        keys_btn.click(set_keys, inputs=[or_key, fal_key], outputs=keys_status).then(show_keys_status, outputs=keys_status)

    with gr.Row():
        with gr.Column(scale=2):
            script_in = gr.Textbox(label="Script", lines=5, value=(
                "If your protein tastes like chalk — this fixes it. 25g protein, mixes smooth, rich chocolate. Tap to try Chocolate Fudge today."
            ))
            storyboard_in = gr.Textbox(label="Storyboard (tech hints)", lines=3, value="Hook+Demo 9:16, macro proof, calm CTA hold. Captions on; camera locked.")
            creator_in = gr.File(label="Creator image (optional)")
            use_openrouter = gr.Checkbox(label="Use OpenRouter planner (enable keys)", value=True)
            use_fal = gr.Checkbox(label="Use FAL SD for B-roll keyframes", value=True)
            broll_only = gr.Checkbox(label="B-rolls only (skip A-roll/HeyGen)", value=False)
            gen_video = gr.Checkbox(label="Generate stitched preview (FFmpeg)", value=True)
            run_btn = gr.Button("Plan & Generate")
        with gr.Column(scale=1):
            storyboard_out = gr.Code(label="Storyboard (pre-rules) JSON", language="json")
            manifest_out   = gr.Code(label="Manifest (post-rules) JSON", language="json")
            video_out      = gr.Video(label="Final video", interactive=True)
            keyframes_gallery = gr.Gallery(label="Keyframes (generated)")
            keyframes_files   = gr.Files(label="Keyframe files (download)")
            clips_files       = gr.Files(label="Scene clips (download)")
            log_out      = gr.Textbox(label="Run log (tail)", lines=12)

    run_btn.click(
        plan_and_generate,
        inputs=[script_in, storyboard_in, creator_in, use_openrouter, use_fal, gen_video, broll_only],
        outputs=[storyboard_out, manifest_out, video_out, keyframes_gallery, keyframes_files, clips_files, log_out]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://df1e77b32eecbded61.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


2025-10-05 13:31:52 INFO: Job start: job_1759671112_0c658e
INFO:job_1759671112_0c658e:Job start: job_1759671112_0c658e
DEBUG:job_1759671112_0c658e:Script: If your protein tastes like chalk — this fixes it. 25g protein, mixes smooth, rich chocolate. Tap to try Chocolate Fudge today.
DEBUG:job_1759671112_0c658e:Storyboard: Hook+Demo 9:16, macro proof, calm CTA hold. Captions on; camera locked.
2025-10-05 13:33:21 INFO: Calling FAL SD v3.5 Large for keyframe
INFO:job_1759671112_0c658e:Calling FAL SD v3.5 Large for keyframe
2025-10-05 13:33:24 INFO: [FAL]   0%|          | 0/24 [00:00<?, ?it/s]
INFO:job_1759671112_0c658e:[FAL]   0%|          | 0/24 [00:00<?, ?it/s]
2025-10-05 13:33:24 INFO: [FAL]   4%|▍         | 1/24 [00:00<00:12,  1.85it/s]
INFO:job_1759671112_0c658e:[FAL]   4%|▍         | 1/24 [00:00<00:12,  1.85it/s]
2025-10-05 13:33:24 INFO: [FAL]   8%|▊         | 2/24 [00:00<00:09,  2.36it/s]
INFO:job_1759671112_0c658e:[FAL]   8%|▊         | 2/24 [00:00<00:09,  2.36it/s]
2025-10-05 13

Keyboard interruption in main thread... closing server.
Killing tunnel 0.0.0.0:7860 <> https://df1e77b32eecbded61.gradio.live


**Hook + demo and Lifestyle montage format**


This auto-plans and generates short-form ad videos in two

---

formats: Hook+Demo and Lifestyle Montage.

It takes a script + storyboard hint, analyzes structure via LLM (OpenRouter), and builds a scene manifest with durations, intents, and prompts.
Best practices for Tiktok and youtube high performing ads are included are enforced
Each scene is auto-tagged (A-roll/B-roll, proof/cta/vibe/usage) and rendered via Veo or WAN (using FAL).

If the LLM fails, it uses a deterministic fallback with pre-set scenes.

Then it generates keyframes and optional stitched 9:16 video previews (FFmpeg).

Outputs: storyboard JSON, manifest JSON, keyframes, and final video preview.

Inputs: script, storyboard hint, ad format, optional creator image, API keys (OpenRouter/FAL)


In [None]:
# scene_planner_hook_lifestyle_fixed.py
# Hook+Demo + Lifestyle Montage planner/generator

import os, re, json, time, uuid, shutil, subprocess, requests, logging, base64, math, copy
from PIL import Image, ImageDraw, ImageFont
import gradio as gr

JOB_ROOT = "gradio_jobs"
os.makedirs(JOB_ROOT, exist_ok=True)

# ---------- fonts ----------
FONT_CANDIDATES = [
    "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
    "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
]
def find_font_ttf():
    for p in FONT_CANDIDATES:
        if os.path.exists(p): return p
    import glob
    f = glob.glob("/usr/share/fonts/**/*.ttf", recursive=True)
    return f[0] if f else None
FONT_PATH = find_font_ttf()

def ffmpeg_exists(): return shutil.which("ffmpeg") is not None
def shlex_quote(s): return "'" + str(s).replace("'", "'\"'\"'") + "'"
def run_cmd(cmd_list, raise_on_err=True, logger=None):
    cmd_str = " ".join(shlex_quote(a) for a in cmd_list)
    if logger: logger.info("RUN: "+cmd_str)
    p = subprocess.run(cmd_list, capture_output=True, text=True)
    if logger and p.stdout.strip(): logger.debug("STDOUT: "+p.stdout.strip())
    if logger and p.stderr.strip(): logger.debug("STDERR: "+p.stderr.strip())
    if p.returncode!=0:
        msg=f"Command failed: {cmd_str}\nSTDOUT:\n{p.stdout}\nSTDERR:\n{p.stderr}"
        if logger: logger.error(msg)
        if raise_on_err: raise RuntimeError(msg)
    return p.stdout

def draw_text_wrapped(draw, text, font, max_width):
    text = str(text or ""); words = text.split(); lines=[]; cur=""
    for w in words:
        cand=(cur+" "+w).strip(); bbox=draw.textbbox((0,0), cand, font=font)
        if (bbox[2]-bbox[0])>max_width and cur: lines.append(cur); cur=w
        else: cur=cand
    if cur: lines.append(cur)
    return lines

def _esc(s:str)->str: return str(s or "").replace('"','\\"')

def ffmpeg_drawtext_escape(s: str) -> str:
    s = str(s or "")
    s = s.replace("\\", "\\\\").replace(":", "\\:").replace("%", "\\%").replace("'", "\\'")
    s = s.replace("\n", " ").replace("\r", " ")
    return s

# ---------- RULES ----------
RULES_HOOK = {
    "format": "Hook+Demo",
    "video": {
        "aspect": "9:16",
        "beats": [
            {"name":"hook", "min":1.5, "max":3.0},
            {"name":"proof","min":2.5, "max":4.0},
            {"name":"cta",  "min":1.5, "max":2.5}
        ],
        "broll_snap_choices": [3,4,5]
    },
    "text": {"max_words_per_caption": 8}
}
RULES_LIFESTYLE = {
    "format": "Lifestyle Montage",
    "video": {
        "aspect": "9:16",
        "beats": [
            {"name":"vibe",       "min":3.5, "max":5.0},
            {"name":"usage",      "min":4.0, "max":5.5},
            {"name":"aspiration", "min":4.0, "max":6.0},
            {"name":"cta",        "min":2.0, "max":3.0}
        ]
    },
    "text": {"max_words_per_caption": 12}
}

def _short_caption_ok(txt:str, cap:int)->bool:
    return len((txt or "").split()) <= cap

def _choose_broll_len_hook(intent:str)->int:
    if intent == "proof": return 3
    if intent == "cta":   return 2
    return 3

# ---------- GLOBAL CONTEXT ----------
def extract_global_context(script:str):
    """
    Ultra-light heuristic extractor for product/topic keywords & brand.
    This is intentionally simple and deterministic (no extra LLM call).
    """
    s = (script or "")
    # naive brand/product pulls
    brand = None
    m = re.search(r"\bby\s+([A-Z][A-Za-z0-9& ]{1,40})", s)
    if m: brand = m.group(1).strip()
    # category guesses
    cats = ["coffee", "coffee powder", "protein", "protein powder",
            "shampoo", "skincare", "lotion", "tea", "energy drink",
            "supplement", "multivitamin", "cereal", "snack", "bars"]
    topic_hits = [c for c in cats if re.search(rf"\b{re.escape(c)}\b", s, flags=re.I)]
    topic = topic_hits[0] if topic_hits else None
    # USP/benefits keywords (few)
    benefit_hits = []
    for k in ["mixes smooth", "rich chocolate", "low sugar", "high protein", "energizing",
              "gentle on stomach", "no jitters", "organic", "vegan", "kid-friendly"]:
        if re.search(re.escape(k), s, flags=re.I):
            benefit_hits.append(k)
    return {
        "brand": brand,
        "topic": topic,
        "keywords": list(dict.fromkeys([w for w in benefit_hits] + ([topic] if topic else []))),
    }

def inject_global_guardrails(scene_prompt:str, gc:dict):
    g = []
    if gc.get("topic"):
        g.append(f"This is an ad for {gc['topic']}; keep the {gc['topic']} or its pack present & on-topic.")
    if gc.get("brand"):
        g.append(f"Imply brand {gc['brand']} through packaging style; do not invent a real logo.")
    if gc.get("keywords"):
        g.append("Emphasize: " + ", ".join(gc["keywords"]) + ".")
    g.append("Avoid unrelated categories. No random products. Keep vertical 9:16.")
    return (scene_prompt or "") + " " + " ".join(g)

def validate_scene_topics(manifest:dict, gc:dict):
    """
    If a scene lacks the product/topic, nudge its summary/caption.
    Non-destructive; keeps captions within cap later.
    """
    topic = (gc or {}).get("topic")
    if not topic: return manifest
    for s in manifest.get("scenes", []):
        txt = (s.get("summary","") + " " + s.get("on_screen_text","")).lower()
        if topic.lower() not in txt:
            # push the topic into summary (safer than caption length)
            s["summary"] = (s.get("summary") or "Lifestyle moment") + f" — {topic}"
    return manifest

# ---------- prompt builders ----------
PROMPT_STYLE = {
    "lighting": "soft window light from the left; clean studio key; high micro-contrast",
    "color":    "neutral gray palette; natural skin tones",
    "constraints": "vertical 9:16; camera locked; avoid zoom-only shots; do not invent logos or on-screen text; no extra people"
}
def compose_veo_prompt_hook(scene: dict, gc:dict=None) -> str:
    intent = (scene.get("intent") or "").lower()
    text   = scene.get("on_screen_text","")
    gen_s  = int(scene.get("gen_sec", 4))
    base   = f"Vertical 9:16. {PROMPT_STYLE['lighting']}. {PROMPT_STYLE['color']}. {PROMPT_STYLE['constraints']}."
    if intent == "proof":
        p = (f"{base} Top-down macro product demo on a clean tabletop. "
             f"0–0.8s first drops hit filter (tiny droplets), 0.8–2.6s bloom expands with small bubbles, "
             f"2.6–{gen_s:.1f}s clockwise swirl then surface becomes glossy-smooth. Guide: '{text}'.")
    elif intent == "cta":
        p = (f"{base} Pack hero centered on matte stone with two small props. "
             f"One gentle steam curl rises, then complete stillness for CTA readability. Guide: '{text}'.")
    else:
        p = f"{base} Clean product shot; Guide: '{text}'."
    return inject_global_guardrails(p, gc or {})

def compose_wan_motion_hook(scene: dict, gc:dict=None) -> str:
    intent = (scene.get("intent") or "").lower()
    if intent == "proof":
        m = "0–1s powder bloom; 1–3s gentle clockwise swirl; 3–5s surface mirror-smooth; camera locked."
    elif intent == "cta":
        m = "0–2s subtle particles behind the pack; 2–5s still for CTA legibility; camera locked."
    else:
        m = "Subtle breathing motion only; camera locked; no zoom; same composition."
    return inject_global_guardrails(m, gc or {})

# Lifestyle builders
def compose_lifestyle_prompt(scene, gc:dict=None):
    intent = (scene.get("intent") or "").lower()
    text   = scene.get("on_screen_text","")
    base   = "Vertical 9:16. Natural handheld motion, available light, authentic lifestyle tone. No logos."
    if intent=="vibe":
        p = f"{base} Quiet morning routine in a sunlit kitchen; soft movement; warm highlights. Guide: '{text}'."
    elif intent=="usage":
        p = f"{base} Person prepares and sips product; candid angles; subtle camera drift. Guide: '{text}'."
    elif intent=="aspiration":
        p = f"{base} Small group outdoors at golden hour; shallow depth of field; casual smiles. Guide: '{text}'."
    elif intent=="cta":
        p = f"{base} Pack hero on clean surface; minimal motion; readable CTA. Guide: '{text}'."
    else:
        p = f"{base} Lifestyle moment. Guide: '{text}'."
    return inject_global_guardrails(p, gc or {})

def compose_lifestyle_motion(scene, gc:dict=None):
    intent = (scene.get("intent") or "").lower()
    if intent=="vibe":
        m = "0–2s gentle handheld; 2–5s slow parallax; keep subject natural; no zoom."
    elif intent=="usage":
        m = "0–2s pick-up/pour; 2–4s sip; 4–5s relaxed hold; handheld drift."
    elif intent=="aspiration":
        m = "0–3s casual interaction; 3–5s shared laugh; soft handheld arc."
    elif intent=="cta":
        m = "0–2s micro particles behind pack; 2–3s full stillness for CTA."
    else:
        m = "Subtle handheld breathing; no jump cuts; maintain composition."
    return inject_global_guardrails(m, gc or {})

# ---------- LLM prompts ----------
LLM_PROMPT_HOOK = r"""
SYSTEM: Return ONLY JSON. No prose. You are a Video Planner for short vertical ads (Hook+Demo).
USER:
- ad_format: "Hook+Demo"
- script: "[[SCRIPT]]"
- storyboard_hint: "[[HINT]]"
Task: Split by meaning (hook, proof, cta). Output scenes usable for generation.
Rules:
- duration_s per scene in [1.2,6.0]; total <= 30.
- First scene is A-roll hook if script has a hook line.
- provider: "heygen" for A-roll, "veo" for B-roll.
- For B-roll include concise "recommended_keyframe_prompt".
Schema:
{"ad_format":"Hook+Demo","script_summary":"one-liner","scenes":[{"id":"s1","role":"A-roll|B-roll","intent":"hook|proof|cta","summary":"one line","dialogue":"(if A-roll)","on_screen_text":"","entities":["..."],"duration_s":0.0,"start_s":0.0,"recommended_keyframe_prompt":"(if B-roll)","provider":"heygen|veo"}]}
"""

LLM_PROMPT_LIFESTYLE = r"""
SYSTEM: Return ONLY JSON. No prose. You are a Video Planner for short vertical ads (Lifestyle Montage).
USER:
- ad_format: "Lifestyle Montage"
- script: "[[SCRIPT]]"
- storyboard_hint: "[[HINT]]"
Task: Split into vibe → usage → aspiration → cta. All scenes are B-roll (no A-roll).
Rules:
- duration_s per scene in [2.5,6.0]; total <= 30.
- default provider: "wan"; use "veo" for pack hero macro/CTA if needed.
- Include concise "recommended_keyframe_prompt".
Schema:
{"ad_format":"Lifestyle Montage","script_summary":"one-liner","scenes":[{"id":"s1","role":"B-roll","intent":"vibe|usage|aspiration|cta","summary":"one line","on_screen_text":"","entities":["..."],"duration_s":0.0,"start_s":0.0,"recommended_keyframe_prompt":"concise SDXL-style","provider":"wan|veo"}]}
"""

# IMPORTANT: retry also embeds script/hint (bug fix)
LLM_PROMPT_RETRY_BASED = r"""
SYSTEM: Return ONLY JSON. No prose.
USER:
Earlier output wasn’t valid JSON. Use the same schema and regenerate from the same inputs.
- ad_format: "[[FORMAT]]"
- script: "[[SCRIPT]]"
- storyboard_hint: "[[HINT]]"
JSON only.
"""

def build_planner_prompt(ad_format:str, script:str, hint:str):
    if (ad_format or "").lower().startswith("lifestyle"):
        t = LLM_PROMPT_LIFESTYLE.replace("[[SCRIPT]]", _esc(script)).replace("[[HINT]]", _esc(hint))
        r = (LLM_PROMPT_RETRY_BASED
             .replace("[[FORMAT]]","Lifestyle Montage")
             .replace("[[SCRIPT]]", _esc(script))
             .replace("[[HINT]]", _esc(hint)))
        return t, r, "LIFESTYLE"
    else:
        t = LLM_PROMPT_HOOK.replace("[[SCRIPT]]", _esc(script)).replace("[[HINT]]", _esc(hint))
        r = (LLM_PROMPT_RETRY_BASED
             .replace("[[FORMAT]]","Hook+Demo")
             .replace("[[SCRIPT]]", _esc(script))
             .replace("[[HINT]]", _esc(hint)))
        return t, r, "HOOK"

# ---------- planner (OpenRouter failover) ----------
def extract_json_from_text(text):
    m = re.search(r'\{.*\}\s*$', text, flags=re.S)
    if not m: m = re.search(r'\{[\s\S]*\}', text)
    if not m: raise ValueError("No JSON object found in LLM output.")
    return json.loads(m.group(0))

def call_openrouter_planner_failover(ad_format, script, hint, out_dir, logger=None):
    key = os.environ.get("OPENROUTER_API_KEY")
    if not key: raise RuntimeError("OPENROUTER_API_KEY not set")
    from openai import OpenAI
    client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=key)

    base_prompt, retry_prompt, tag = build_planner_prompt(ad_format, script, hint)

    candidates = [
        ("openai/gpt-5",               base_prompt), ("openai/gpt-5",               retry_prompt),
        ("anthropic/claude-sonnet-4",  base_prompt), ("anthropic/claude-sonnet-4",  retry_prompt),
        ("anthropic/claude-3.5-sonnet",base_prompt), ("anthropic/claude-3.5-sonnet",retry_prompt),
        ("openai/gpt-4o",              base_prompt), ("openai/gpt-4o",              retry_prompt),
        ("google/gemini-1.5-pro-002",  base_prompt), ("google/gemini-1.5-pro-002",  retry_prompt),
    ]
    last_err=None
    for model, prompt in candidates:
        try:
            msgs = [
                {"role":"system","content":"Return ONLY JSON. No prose."},
                {"role":"user","content": prompt}
            ]
            resp = client.chat.completions.create(model=model, messages=msgs, temperature=0.0, max_tokens=1200)
            text = (resp.choices[0].message.content or "")
            raw_path = os.path.join(out_dir, f"planner_raw_{tag}_{model.replace('/','_')}.txt")
            with open(raw_path,"w",encoding="utf-8") as fh: fh.write(text)
            if not text.strip(): last_err = ValueError(f"{model} empty"); continue
            try:
                _ = extract_json_from_text(text)
                return text
            except Exception as ve:
                last_err=ve
        except Exception as e:
            last_err=e
    raise RuntimeError(f"Planner failed: {last_err}")

# ---------- deterministic fallbacks ----------
def deterministic_fallback_manifest_hook(script, hint):
    parts=[p.strip() for p in str(script).split('.') if p.strip()]
    hook = parts[0] if parts else str(script)[:60]
    scenes=[
        {"id":"s1","role":"A-roll","intent":"hook","summary":hook,"dialogue":hook,"on_screen_text":"Protein tastes like chalk?","entities":["creator_face"],"duration_s":0,"start_s":0,"provider":"heygen"},
        {"id":"s2","role":"B-roll","intent":"proof","summary":"Pour → swirl → smooth surface","on_screen_text":"25g protein • Mixes smooth • Rich chocolate","duration_s":0,"start_s":0,"recommended_keyframe_prompt":"Top-down macro pour into shaker, swirl, glossy surface, photorealistic, 9:16","provider":"veo"},
        {"id":"s3","role":"B-roll","intent":"cta","summary":"Pack hero, calm hold","on_screen_text":"Try Chocolate Fudge Today","duration_s":0,"start_s":0,"recommended_keyframe_prompt":"Pack hero on matte stone, soft top light, warm rim, neutral gray background, 9:16","provider":"veo"},
    ]
    return {"ad_format":"Hook+Demo","script_summary":hook,"scenes":scenes,"notes":"fallback:hook"}

def deterministic_fallback_manifest_lifestyle(script, hint):
    line = (script or "").split(".")[0].strip() or "Start the day right."
    scenes = [
        {"id":"s1","role":"B-roll","intent":"vibe","summary":"Sunlit kitchen, tranquil start",
         "on_screen_text":"Morning energy","duration_s":0,"start_s":0,
         "recommended_keyframe_prompt":"Lifestyle: sunlit kitchen, natural handheld, 9:16", "provider":"wan"},
        {"id":"s2","role":"B-roll","intent":"usage","summary":"Mix and sip, candid angles",
         "on_screen_text":"Smooth & delicious","duration_s":0,"start_s":0,
         "recommended_keyframe_prompt":"Handheld product use, soft light, candid feel, 9:16", "provider":"wan"},
        {"id":"s3","role":"B-roll","intent":"aspiration","summary":"Friends outdoors golden hour",
         "on_screen_text":"Feel good daily","duration_s":0,"start_s":0,
         "recommended_keyframe_prompt":"Outdoor group, golden hour, shallow DOF, 9:16", "provider":"wan"},
        {"id":"s4","role":"B-roll","intent":"cta","summary":"Pack hero calm hold",
         "on_screen_text":"Tap to try","duration_s":0,"start_s":0,
         "recommended_keyframe_prompt":"Pack hero on matte stone, clean light, 9:16", "provider":"veo"},
    ]
    return {"ad_format":"Lifestyle Montage","script_summary":line,"scenes":scenes,"notes":"fallback:lifestyle"}

# ---------- duration estimation ----------
def infer_wps_from_storyboard(hint:str):
    s=(hint or "").lower()
    if any(k in s for k in ["fast","quick","snappy","rapid","montage"]): return 4.4
    if any(k in s for k in ["slow","calm","relaxed","gentle","long holds","long takes"]): return 2.0
    return 3.6

def estimate_durations_from_manifest(manifest, storyboard_hint, logger=None):
    scenes=manifest.get("scenes",[]); wps=infer_wps_from_storyboard(storyboard_hint)
    voiced, nonvo=[],[]
    for i,sc in enumerate(scenes):
        text=sc.get("dialogue") or sc.get("voice_over",{}).get("text","")
        words=len(str(text).split()); (voiced if words>0 else nonvo).append(i)
    min_scene,max_scene=1.2,6.0; durs={}
    for i in voiced:
        words=len(str(scenes[i].get("dialogue") or scenes[i].get("voice_over",{}).get("text","")).split())
        dur=round(words/wps+0.2,2); durs[i]=max(min_scene,min(max_scene,dur))
    for i in nonvo: durs[i]=2.0
    total=sum(durs.values()); total=min(max(total,10.0),30.0)
    ssum=sum(durs.values()); diff=round(total-ssum,2)
    if abs(diff)>=0.01 and scenes:
        last=list(durs.keys())[-1]; durs[last]=round(max(min_scene,min(max_scene,durs[last]+diff)),2)
    cur=0.0
    for i,sc in enumerate(scenes):
        d=float(durs.get(i,2.0)); sc["duration_s"]=d; sc["start_s"]=round(cur,2); cur+=d
    manifest["total_length_s"]=round(cur,2); return manifest

# ---------- enforce rules + providers (format-aware) ----------
def enforce_platform_rules(manifest, broll_only=False, logger=None):
    scenes = manifest.get("scenes", [])
    if not scenes: raise ValueError("no scenes")
    fmt = (manifest.get("ad_format") or "").lower()

    # LIFESTYLE branch (all B-roll)
    if "lifestyle" in fmt:
        cap = RULES_LIFESTYLE["text"]["max_words_per_caption"]
        # ensure the four intents exist
        intents_present = {s.get("intent","").lower() for s in scenes}
        for r in ["vibe","usage","aspiration","cta"]:
            if r not in intents_present:
                scenes.append({"id":f"s_{r}","role":"B-roll","intent":r,"summary":r.title(),
                               "on_screen_text":"", "recommended_keyframe_prompt":"Lifestyle scene, 9:16",
                               "provider":"wan","duration_s":0})

        t=0.0; out=[]
        for s in scenes:
            s["role"]="B-roll"
            txt=s.get("on_screen_text","")
            if not _short_caption_ok(txt, cap):
                s["on_screen_text"]=" ".join(txt.split()[:cap])

            intent=s.get("intent","").lower()
            if intent=="cta":
                dur=max(2.0, min(3.0, float(s.get("duration_s",2.5))))
                s["provider"]="veo"; s["gen_sec"]=4; s["trim_sec"]=int(round(dur))
            elif intent in ("vibe","usage","aspiration"):
                base=4.5
                if intent=="vibe":       base=4.2
                if intent=="usage":      base=4.8
                if intent=="aspiration": base=5.2
                dur=max(3.5, min(6.0, float(s.get("duration_s", base))))
                s["provider"]="wan"; s["gen_sec"]=5; s["trim_sec"]=int(round(dur))
            else:
                dur=4.0
                s["provider"]="wan"; s["gen_sec"]=5; s["trim_sec"]=4

            s["duration_s"]=round(dur,2); s["start_s"]=round(t,2); t+=s["duration_s"]
            out.append(s)

        manifest["scenes"]=out
        manifest["total_length_s"]=round(t,2)
        return manifest

    # HOOK+DEMO branch
    cap = RULES_HOOK["text"]["max_words_per_caption"]
    intents=[s.get("intent","").lower() for s in scenes]; roles=[s.get("role","").lower() for s in scenes]

    if not broll_only and not (intents and intents[0]=="hook" and roles[0].startswith("a")):
        hook_txt = manifest.get("script_summary") or "Stop the chalky shakes."
        scenes.insert(0, {"id":"s_hook","role":"A-roll","intent":"hook","summary":hook_txt,"dialogue":hook_txt,
                          "on_screen_text":"Protein tastes like chalk?","entities":["creator_face","pack"],
                          "provider":"heygen","duration_s":2.5,"start_s":0.0})

    if "proof" not in [s.get("intent") for s in scenes]:
        scenes.insert(0 if broll_only else 1, {"id":"s_proof","role":"B-roll","intent":"proof",
                                               "summary":"Pour → swirl → smooth surface",
                                               "on_screen_text":"25g protein • Mixes smooth",
                                               "recommended_keyframe_prompt":"Top-down macro pour, swirl, glossy surface, 9:16",
                                               "provider":"veo","duration_s":3.0})

    if "cta" not in [s.get("intent") for s in scenes]:
        scenes.append({"id":"s_cta","role":"B-roll","intent":"cta","summary":"Pack hero, calm hold",
                       "on_screen_text":"Tap to try today",
                       "recommended_keyframe_prompt":"Pack hero on matte stone, neutral gray, 9:16",
                       "provider":"veo","duration_s":2.0})

    t=0.0; out=[]
    for s in scenes:
        intent=s.get("intent","").lower(); role=s.get("role","").lower()
        if broll_only and role.startswith("a"): continue

        txt=s.get("on_screen_text","")
        if not _short_caption_ok(txt, cap):
            s["on_screen_text"]=" ".join(txt.split()[:cap])

        if role.startswith("a") and intent=="hook":
            dur=min(max(2.0, s.get("duration_s",2.5)), 3.0)
        elif intent=="cta":
            dur=min(max(1.8, s.get("duration_s",2.0)), 2.5)
        else:
            dur=float(_choose_broll_len_hook(intent))

        s["duration_s"]=round(dur,2); s["start_s"]=round(t,2); t+=s["duration_s"]

        if role.startswith("a"):
            s["provider"]="heygen"
        else:
            if s["duration_s"]==3.0:
                s["provider"]="veo"; s["gen_sec"]=4; s["trim_sec"]=3
            elif s["duration_s"] in (4.0,5.0):
                s["provider"]="wan"; s["gen_sec"]=5; s["trim_sec"]=int(s["duration_s"])
            else:
                s["provider"]="veo"; s["gen_sec"]=4; s["trim_sec"]=int(s["duration_s"])

        out.append(s)

    manifest["scenes"]=out
    manifest["total_length_s"]=round(t,2)
    return manifest


def make_keyframe_placeholder(prompt_text, scene_summary, metadata_text, outpath, size=(1080,1920), bgcolor=(28,28,36)):
    img=Image.new("RGB",size,bgcolor); draw=ImageDraw.Draw(img)
    try:
        title_font=ImageFont.truetype(FONT_PATH,36) if FONT_PATH else ImageFont.load_default()
        meta_font=ImageFont.truetype(FONT_PATH,18) if FONT_PATH else ImageFont.load_default()
    except Exception:
        title_font=ImageFont.load_default(); meta_font=ImageFont.load_default()
    lines=draw_text_wrapped(draw,(prompt_text or scene_summary or "B-roll"),title_font,int(size[0]*0.9))
    y=int(size[1]*0.12)
    for ln in lines[:6]:
        b=draw.textbbox((0,0),ln,font=title_font); w=b[2]-b[0]; h=b[3]-b[1]
        draw.rectangle([((size[0]-w)//2-10,y-8),((size[0]+w)//2+10,y+h+8)],fill=(0,0,0,180))
        draw.text(((size[0]-w)/2,y),ln,font=title_font,fill=(255,230,180)); y+=h+10
    meta_lines=draw_text_wrapped(draw,(metadata_text or ""),meta_font,int(size[0]*0.9))
    y2=size[1]-200; draw.rectangle([(40,y2-10),(size[0]-40,size[1]-40)],fill=(0,0,0,180))
    ty=y2+6
    for ln in meta_lines[:8]:
        b=draw.textbbox((0,0),ln,font=meta_font); w=b[2]-b[0]; h=b[3]-b[1]
        draw.text(((size[0]-w)/2,ty),ln,font=meta_font,fill=(230,230,230)); ty+=h+6
    os.makedirs(os.path.dirname(outpath),exist_ok=True); img.save(outpath,format="JPEG",quality=90)
    return outpath

def generate_keyframe_fal(prompt, out_path, negative_prompt=None, logger=None):
    if not os.environ.get("FAL_KEY"):
        if logger: logger.warning("FAL_KEY not set; using placeholder keyframe")
        return make_keyframe_placeholder(prompt, prompt, "no FAL_KEY", out_path)
    try:
        import fal_client
    except Exception:
        if logger: logger.warning("fal_client not installed; using placeholder keyframe")
        return make_keyframe_placeholder(prompt, prompt, "no fal_client", out_path)

    if logger: logger.info("Calling FAL SD v3.5 Large for keyframe")
    def on_queue_update(update):
        try:
            from fal_client import InProgress
            if isinstance(update, InProgress) and getattr(update,"logs",None):
                for log in update.logs:
                    msg=log.get("message","")
                    if logger and msg: logger.info("[FAL] "+msg)
        except Exception: pass
    args={"prompt":prompt,"negative_prompt":negative_prompt or "blurry, watermark, text, lowres",
          "num_inference_steps":24,"guidance_scale":3.0,"num_images":1,"enable_safety_checker":True,
          "output_format":"jpeg","image_size":{"width":1080,"height":1920}}
    import fal_client
    result=fal_client.subscribe("fal-ai/stable-diffusion-v35-large",arguments=args,with_logs=True,on_queue_update=on_queue_update)
    images=result.get("images",[])
    if not images: return make_keyframe_placeholder(prompt, prompt, "fal sd no image", out_path)
    url=images[0].get("url")
    if not url: return make_keyframe_placeholder(prompt, prompt, "fal sd no url", out_path)
    r=requests.get(url,timeout=120); r.raise_for_status()
    os.makedirs(os.path.dirname(out_path),exist_ok=True)
    with open(out_path,"wb") as fh: fh.write(r.content)
    if logger: logger.info("Saved FAL keyframe: "+out_path)
    return out_path

# ---------- video helpers (video-only) ----------
def trim_video(inp, outp, keep_sec, logger=None):
    run_cmd([
        "ffmpeg","-y","-hide_banner","-loglevel","error",
        "-fflags","+genpts","-i",inp,
        "-ss","0","-t",str(keep_sec),
        "-r","30","-vsync","cfr","-pix_fmt","yuv420p",
        "-c:v","libx264","-preset","veryfast","-crf","18",
        "-an","-movflags","+faststart",
        outp
    ], logger=logger)
    return outp

def still_from_image(img_path, outmp4, dur_sec, logger=None):
    run_cmd([
        "ffmpeg","-y","-hide_banner","-loglevel","error",
        "-loop","1","-t",str(dur_sec),"-i",img_path,
        "-vf","scale=1080:1920:force_original_aspect_ratio=cover,crop=1080:1920",
        "-r","30","-pix_fmt","yuv420p",
        "-c:v","libx264","-preset","veryfast","-crf","18",
        "-an", outmp4
    ], logger=logger)
    return outmp4

def overlay_caption(inp, outp, text, logger=None):
    if not text:
        shutil.copy(inp, outp); return outp
    safe_text = ffmpeg_drawtext_escape(text)
    if FONT_PATH:
        draw = (
            "drawbox=x=40:y=1650:w=1000:h=120:color=black@0.5:t=fill,"
            f"drawtext=fontfile='{FONT_PATH}':text='{safe_text}':x=60:y=1685:fontsize=52:fontcolor=white"
        )
    else:
        draw = (
            "drawbox=x=40:y=1650:w=1000:h=120:color=black@0.5:t=fill,"
            f"drawtext=text='{safe_text}':x=60:y=1685:fontsize=52:fontcolor=white"
        )
    try:
        run_cmd(["ffmpeg","-y","-hide_banner","-loglevel","error","-i",inp,"-vf",draw,
                 "-c:v","libx264","-crf","18","-preset","veryfast","-an",outp], logger=logger)
        return outp
    except Exception:
        shutil.copy(inp, outp); return outp

# Normalize each clip to 1080x1920, SAR=1 (prevents concat size mismatch)
def normalize_9x16(inp, outp, logger=None):
    vf = "scale=1080:-2:flags=lanczos,pad=1080:1920:(1080-iw)/2:(1920-ih)/2,setsar=1"
    run_cmd([
        "ffmpeg","-y","-hide_banner","-loglevel","error",
        "-i", inp,
        "-vf", vf,
        "-r","30","-vsync","cfr","-pix_fmt","yuv420p",
        "-c:v","libx264","-preset","veryfast","-crf","18",
        "-an","-movflags","+faststart",
        outp
    ], logger=logger)
    return outp

# ---------- FAL Veo / WAN ----------
def _data_uri_from_file(path):
    ext=(os.path.splitext(path)[1] or "").lower(); mime="image/jpeg"
    if ext in [".png",".bmp",".webp"]: mime=f"image/{ext[1:]}"
    with open(path,"rb") as fh: b64=base64.b64encode(fh.read()).decode("utf-8")
    return f"data:{mime};base64,{b64}"

def gen_broll_veo_t2v(prompt, gen_sec, outmp4, logger=None):
    if not os.environ.get("FAL_KEY"):
        if logger: logger.warning("FAL_KEY not set; Veo placeholder")
        tmp=outmp4.replace(".mp4","_raw.mp4")
        run_cmd(["ffmpeg","-y","-hide_banner","-loglevel","error","-f","lavfi","-i",f"color=c=gray:s=1080x1920:d={gen_sec}",
                 "-c:v","libx264","-preset","veryfast","-crf","18","-an",tmp], logger=logger)
        overlay_caption(tmp, outmp4, " ".join((prompt or "").split()[:8]), logger=logger)
        return outmp4
    try:
        import fal_client
    except Exception:
        if logger: logger.warning("fal_client missing; Veo placeholder")
        tmp=outmp4.replace(".mp4","_raw.mp4")
        run_cmd(["ffmpeg","-y","-hide_banner","-loglevel","error","-f","lavfi","-i",f"color=c=gray:s=1080x1920:d={gen_sec}",
                 "-c:v","libx264","-preset","veryfast","-crf","18","-an",tmp], logger=logger)
        overlay_caption(tmp, outmp4, " ".join((prompt or "").split()[:8]), logger=logger)
        return outmp4

    if logger: logger.info("Calling FAL Veo3 text->video")
    def on_queue_update(update):
        try:
            from fal_client import InProgress
            if isinstance(update, InProgress) and getattr(update,"logs",None):
                for log in update.logs:
                    msg=log.get("message","")
                    if logger and msg: logger.info("[FAL Veo3] "+msg)
        except Exception: pass

    args = {
        "prompt": prompt or "Clean product macro demo, studio soft-key, vertical 9:16",
        "aspect_ratio": "9:16",
        "duration": f"{gen_sec}s" if gen_sec in (4,6,8) else "4s",
        "resolution": "1080p",
        "generate_audio": False,
        "enhance_prompt": True,
        "auto_fix": True,
        "negative_prompt": "watermark, logo, overlaid text, nsfw, extra hands, duplicated objects"
    }
    import fal_client
    try:
        res = fal_client.subscribe("fal-ai/veo3", arguments=args, with_logs=True, on_queue_update=on_queue_update)
        vid = (res.get("video") or {}).get("url")
        if not vid: raise RuntimeError("Veo no URL")
        r=requests.get(vid,timeout=600); r.raise_for_status()
        with open(outmp4,"wb") as fh: fh.write(r.content)
        if logger: logger.info("Saved Veo3 T2V: "+outmp4)
        return outmp4
    except Exception as e:
        if logger: logger.warning(f"Veo error -> placeholder: {e}")
        tmp=outmp4.replace(".mp4","_raw.mp4")
        run_cmd(["ffmpeg","-y","-hide_banner","-loglevel","error","-f","lavfi","-i",f"color=c=gray:s=1080x1920:d={gen_sec}",
                 "-c:v","libx264","-preset","veryfast","-crf","18","-an",tmp], logger=logger)
        overlay_caption(tmp, outmp4, " ".join((prompt or "").split()[:8]), logger=logger)
        return outmp4

def gen_broll_wan_i2v(keyframe_img, motion_prompt, gen_sec, outmp4, logger=None):
    if not os.environ.get("FAL_KEY"):
        if logger: logger.warning("FAL_KEY not set; WAN placeholder")
        return still_from_image(keyframe_img, outmp4, gen_sec, logger=logger)
    try:
        import fal_client
    except Exception:
        if logger: logger.warning("fal_client missing; WAN placeholder")
        return still_from_image(keyframe_img, outmp4, gen_sec, logger=logger)

    if logger: logger.info("Calling FAL WAN 2.5 image->video")
    def on_queue_update(update):
        try:
            from fal_client import InProgress
            if isinstance(update, InProgress) and getattr(update,"logs",None):
                for log in update.logs:
                    msg=log.get("message","")
                    if logger and msg: logger.info("[FAL WAN] "+msg)
        except Exception: pass

    args = {
        "prompt": motion_prompt or "Subtle breathing motion; vertical 9:16",
        "image_url": _data_uri_from_file(keyframe_img),
        "resolution": "1080p",
        "duration": "5",
        "negative_prompt": "low resolution, watermark, overlaid text, extra limbs, worst quality, artifacts",
        "enable_prompt_expansion": True
    }
    import fal_client
    try:
        res = fal_client.subscribe("fal-ai/wan-25-preview/image-to-video", arguments=args, with_logs=True, on_queue_update=on_queue_update)
    except Exception:
        res = fal_client.subscribe("fal-ai/wan-25-preview/image-to-video", arguments={"input": args}, with_logs=True, on_queue_update=on_queue_update)

    video_obj = res.get("video") or (res.get("data",{}) if isinstance(res.get("data"),dict) else {}).get("video")
    url = (video_obj or {}).get("url") or (((res.get("data") or {}).get("video")) or {}).get("url")
    if not url:
        if logger: logger.warning("WAN no URL -> placeholder")
        return still_from_image(keyframe_img, outmp4, gen_sec, logger=logger)
    r=requests.get(url,timeout=900); r.raise_for_status()
    with open(outmp4,"wb") as fh: fh.write(r.content)
    if logger: logger.info("Saved WAN I2V: "+outmp4)
    return outmp4

# ---------- concat (video-only) ----------
def concat_videos(clip_paths, out_path, logger=None):
    inputs=[]; streams=[]
    for i,p in enumerate(clip_paths):
        inputs += ["-fflags","+genpts","-i", p]
        streams.append(f"[{i}:v]")
    filt = "".join(streams) + f"concat=n={len(clip_paths)}:v=1:a=0[v]"
    run_cmd([
        "ffmpeg","-y","-hide_banner","-loglevel","error",
        *inputs,
        "-filter_complex",filt,"-map","[v]",
        "-r","30","-vsync","cfr","-pix_fmt","yuv420p",
        "-c:v","libx264","-preset","veryfast","-crf","18",
        "-an","-movflags","+faststart",
        out_path
    ], logger=logger)

# ---------- main orchestration ----------
def plan_and_generate(script, storyboard_hint, creator_img_file, ad_format, use_openrouter, use_fal_keyframes, gen_video, broll_only):
    job_id=f"job_{int(time.time())}_{uuid.uuid4().hex[:6]}"
    job_dir=os.path.join(JOB_ROOT, job_id); os.makedirs(job_dir, exist_ok=True)

    logger=logging.getLogger(job_id); logger.setLevel(logging.DEBUG)
    if not logger.handlers:
        fh=logging.FileHandler(os.path.join(job_dir,"log.txt"),mode="a",encoding="utf-8"); fh.setLevel(logging.DEBUG)
        sh=logging.StreamHandler(); sh.setLevel(logging.INFO)
        fmt=logging.Formatter("%(asctime)s %(levelname)s: %(message)s","%Y-%m-%d %H:%M:%S")
        fh.setFormatter(fmt); sh.setFormatter(fmt); logger.addHandler(fh); logger.addHandler(sh)

    logger.info("Job start: "+job_id)
    logger.debug("Format: "+str(ad_format))
    logger.debug("Script: "+(script[:1000]+("...[truncated]" if len(script)>1000 else "")))
    logger.debug("Storyboard: "+str(storyboard_hint))

    # Global context (brand/topic/keywords)
    global_ctx = extract_global_context(script)

    # plan
    if use_openrouter and os.environ.get("OPENROUTER_API_KEY"):
        try:
            raw = call_openrouter_planner_failover(ad_format, script, storyboard_hint, job_dir, logger=logger)
            manifest = extract_json_from_text(raw)
        except Exception:
            logger.exception("Planner failed; using fallback")
            manifest = deterministic_fallback_manifest_lifestyle(script, storyboard_hint) if (ad_format or "").lower().startswith("lifestyle") else deterministic_fallback_manifest_hook(script, storyboard_hint)
    else:
        manifest = deterministic_fallback_manifest_lifestyle(script, storyboard_hint) if (ad_format or "").lower().startswith("lifestyle") else deterministic_fallback_manifest_hook(script, storyboard_hint)

    if not isinstance(manifest, dict) or not manifest.get("scenes"):
        logger.warning("Planner returned no scenes; using deterministic fallback.")
        manifest = deterministic_fallback_manifest_lifestyle(script, storyboard_hint) if (ad_format or "").lower().startswith("lifestyle") else deterministic_fallback_manifest_hook(script, storyboard_hint)

    # ensure ad_format is set in manifest
    manifest["ad_format"] = manifest.get("ad_format") or (RULES_LIFESTYLE["format"] if (ad_format or "").lower().startswith("lifestyle") else RULES_HOOK["format"])
    manifest["global_context"] = global_ctx

    # topic validator before hard rules
    manifest = validate_scene_topics(manifest, global_ctx)

    # estimate, snapshot storyboard, enforce
    manifest = estimate_durations_from_manifest(manifest, storyboard_hint, logger=logger)
    storyboard_manifest = copy.deepcopy(manifest)

    try:
        manifest = enforce_platform_rules(manifest, broll_only=broll_only, logger=logger)
    except ValueError:
        logger.warning("Empty scenes at enforcement; retrying with fallback.")
        manifest = deterministic_fallback_manifest_lifestyle(script, storyboard_hint) if "lifestyle" in (ad_format or "").lower() else deterministic_fallback_manifest_hook(script, storyboard_hint)
        manifest["global_context"] = global_ctx
        manifest = validate_scene_topics(manifest, global_ctx)
        manifest = estimate_durations_from_manifest(manifest, storyboard_hint, logger=logger)
        manifest = enforce_platform_rules(manifest, broll_only=broll_only, logger=logger)

    with open(os.path.join(job_dir,"manifest.json"),"w") as fh: json.dump(manifest, fh, indent=2)
    with open(os.path.join(job_dir,"storyboard_pre_rules.json"),"w") as fh: json.dump(storyboard_manifest, fh, indent=2)

    # keyframes
    keyframe_paths=[]
    for sc in manifest.get("scenes",[]):
        sid = sc.get("id","s_"+uuid.uuid4().hex[:6])
        prompt = (sc.get("recommended_keyframe_prompt") or sc.get("summary") or "B-roll").strip()
        img_out = os.path.join(job_dir,f"{sid}_kf.jpg")
        is_broll = sc.get("role","").lower().startswith("b")
        if is_broll and use_fal_keyframes and os.environ.get("FAL_KEY"):
            try: generate_keyframe_fal(prompt,img_out,negative_prompt="blurry, watermark, text",logger=logger)
            except Exception:
                logger.exception("Keyframe FAL failed; placeholder")
                make_keyframe_placeholder(prompt,sc.get("summary",""),"",img_out)
        else:
            make_keyframe_placeholder(prompt,sc.get("summary",""),"",img_out)
        keyframe_paths.append(img_out)

    clips_for_download=[]
    final_video=None

    if gen_video:
        clip_paths=[]
        if creator_img_file is not None:
            try:
                dst=os.path.join(job_dir,"creator"+os.path.splitext(creator_img_file.name)[1])
                with open(dst,"wb") as fh: fh.write(creator_img_file.read()); logger.info("Saved creator image: "+dst)
            except Exception: logger.exception("Failed to save creator image")

        is_lifestyle = "lifestyle" in (manifest.get("ad_format") or "").lower()

        for i, sc in enumerate(manifest.get("scenes", [])):
            sid   = sc.get("id", f"s_{i}")
            role  = sc.get("role","").lower()
            intent= sc.get("intent","").lower()
            dur   = float(sc.get("duration_s", 2.0))
            provider = sc.get("provider","")
            ontext   = sc.get("on_screen_text","")
            outmp4   = os.path.join(job_dir, f"{sid}.mp4")
            key_img  = keyframe_paths[i] if i < len(keyframe_paths) else None

            gen_sec  = int(sc.get("gen_sec", math.ceil(dur)))
            trim_sec = int(sc.get("trim_sec", int(dur)))

            if role.startswith("a"):
                tmp = still_from_image(key_img or keyframe_paths[0], outmp4.replace(".mp4","_raw.mp4"), dur_sec=max(2.0, dur), logger=logger)
                overlay_caption(tmp, outmp4, ontext, logger=logger)
                if not broll_only:
                    norm = outmp4.replace(".mp4","_norm.mp4")
                    normalize_9x16(outmp4, norm, logger=logger)
                    clip_paths.append(norm); clips_for_download.append(norm)
                continue

            if provider == "veo":
                tmp = outmp4.replace(".mp4","_raw.mp4")
                # format-aware + global context
                prompt = (compose_lifestyle_prompt(sc, manifest.get("global_context")) if is_lifestyle
                          else compose_veo_prompt_hook(sc, manifest.get("global_context")))
                try: gen_broll_veo_t2v(prompt, gen_sec=gen_sec, outmp4=tmp, logger=logger)
                except Exception:
                    logger.exception("Veo gen failed; placeholder")
                    tmp = still_from_image(key_img or keyframe_paths[0], tmp, gen_sec, logger=logger)
                trimmed = outmp4.replace(".mp4", f"_t{trim_sec}.mp4")
                trim_video(tmp, trimmed, keep_sec=trim_sec, logger=logger)
                overlay_caption(trimmed, outmp4, ontext, logger=logger)
                norm = outmp4.replace(".mp4","_norm.mp4")
                normalize_9x16(outmp4, norm, logger=logger)
                clip_paths.append(norm); clips_for_download.append(norm)

            elif provider == "wan":
                tmp = outmp4.replace(".mp4","_raw.mp4")
                if not key_img:
                    key_img = os.path.join(job_dir, f"{sid}_auto_kf.jpg")
                    make_keyframe_placeholder(sc.get("summary","B-roll"), sc.get("summary",""), "", key_img)
                motion = (compose_lifestyle_motion(sc, manifest.get("global_context")) if is_lifestyle
                          else compose_wan_motion_hook(sc, manifest.get("global_context")))
                try: gen_broll_wan_i2v(key_img, motion_prompt=motion, gen_sec=gen_sec, outmp4=tmp, logger=logger)
                except Exception:
                    logger.exception("WAN gen failed; placeholder")
                    tmp = still_from_image(key_img, tmp, gen_sec, logger=logger)
                if trim_sec < gen_sec:
                    trimmed = outmp4.replace(".mp4", f"_t{trim_sec}.mp4")
                    trim_video(tmp, trimmed, keep_sec=trim_sec, logger=logger)
                    overlay_caption(trimmed, outmp4, ontext, logger=logger)
                else:
                    overlay_caption(tmp, outmp4, ontext, logger=logger)
                norm = outmp4.replace(".mp4","_norm.mp4")
                normalize_9x16(outmp4, norm, logger=logger)
                clip_paths.append(norm); clips_for_download.append(norm)

            else:
                tmp = still_from_image(key_img or keyframe_paths[0], outmp4.replace(".mp4","_raw.mp4"), dur, logger=logger)
                overlay_caption(tmp, outmp4, ontext, logger=logger)
                norm = outmp4.replace(".mp4","_norm.mp4")
                normalize_9x16(outmp4, norm, logger=logger)
                clip_paths.append(norm); clips_for_download.append(norm)

        if clip_paths:
            final_out=os.path.join(job_dir,"final_demo.mp4")
            concat_videos(clip_paths, final_out, logger=logger)
            final_video=final_out

    # outputs
    try:
        with open(os.path.join(job_dir,"log.txt"),"r",encoding="utf-8") as fh:
            log_text=fh.read()[-4000:]
    except Exception:
        log_text="No log file found or could not read log."

    storyboard_text = json.dumps(storyboard_manifest, indent=2)
    manifest_text   = json.dumps(manifest, indent=2)
    return storyboard_text, manifest_text, final_video, keyframe_paths, keyframe_paths, clips_for_download, log_text

# ---------- settings ----------
def set_keys(openrouter_key, fal_key):
    if openrouter_key: os.environ["OPENROUTER_API_KEY"]=openrouter_key.strip()
    if fal_key: os.environ["FAL_KEY"]=fal_key.strip()
    ok1=("OPENROUTER_API_KEY" in os.environ); ok2=("FAL_KEY" in os.environ)
    return f"OpenRouter set: {ok1}; FAL set: {ok2}"

def show_keys_status():
    return f"OpenRouter set: {'OPENROUTER_API_KEY' in os.environ}; FAL set: {'FAL_KEY' in os.environ}"

# ---------- UI ----------
with gr.Blocks(title="Scene Planner — Hook+Demo + Lifestyle (Veo/WAN + SD Keyframes)") as demo:
    gr.Markdown("Select **Ad Format** → plan → generate keyframes → (optional) stitch a video-only preview. All clips normalized to 1080×1920; audio omitted so you can add VO later.")
    with gr.Accordion("Settings", open=False):
        or_key = gr.Textbox(label="OPENROUTER_API_KEY", type="password")
        fal_key = gr.Textbox(label="FAL_KEY (for Veo/WAN/SD)", type="password")
        keys_btn = gr.Button("Save Keys")
        keys_status = gr.Markdown()
        keys_btn.click(set_keys, inputs=[or_key, fal_key], outputs=keys_status).then(show_keys_status, outputs=keys_status)

    with gr.Row():
        with gr.Column(scale=2):
            ad_format = gr.Dropdown(choices=["Hook+Demo", "Lifestyle Montage"], value="Hook+Demo", label="Ad Format")
            script_in = gr.Textbox(label="Script", lines=5, value=(
                "If your protein tastes like chalk — this fixes it. 25g protein, mixes smooth, rich chocolate. Tap to try Chocolate Fudge today."
            ))
            storyboard_in = gr.Textbox(label="Storyboard (tech hints)", lines=3, value="Hook+Demo 9:16, macro proof, calm CTA hold. Captions on; camera locked.")
            creator_in = gr.File(label="Creator image (optional)")
            use_openrouter = gr.Checkbox(label="Use OpenRouter planner (enable key)", value=True)
            use_fal = gr.Checkbox(label="Use FAL SD for B-roll keyframes", value=True)
            broll_only = gr.Checkbox(label="B-rolls only (skip A-roll/HeyGen)", value=False)
            gen_video = gr.Checkbox(label="Generate stitched preview (FFmpeg)", value=True)
            run_btn = gr.Button("Plan & Generate")
        with gr.Column(scale=1):
            storyboard_out = gr.Code(label="Storyboard (pre-rules) JSON", language="json")
            manifest_out   = gr.Code(label="Manifest (post-rules) JSON", language="json")
            video_out      = gr.Video(label="Final video", interactive=True)
            keyframes_gallery = gr.Gallery(label="Keyframes (generated)")
            keyframes_files   = gr.Files(label="Keyframe files (download)")
            clips_files       = gr.Files(label="Scene clips (download)")
            log_out      = gr.Textbox(label="Run log (tail)", lines=12)

    run_btn.click(
        plan_and_generate,
        inputs=[script_in, storyboard_in, creator_in, ad_format, use_openrouter, use_fal, gen_video, broll_only],
        outputs=[storyboard_out, manifest_out, video_out, keyframes_gallery, keyframes_files, clips_files, log_out]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://1ad805a81cd1835ae8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


2025-10-05 15:07:31 INFO: Job start: job_1759676851_d4f695
INFO:job_1759676851_d4f695:Job start: job_1759676851_d4f695
DEBUG:job_1759676851_d4f695:Format: Lifestyle Montage
DEBUG:job_1759676851_d4f695:Script: If your protein tastes like chalk — this fixes it. 25g protein, mixes smooth, rich chocolate. It is specifically good for children and improves blood flow to the brain
DEBUG:job_1759676851_d4f695:Storyboard: Hook+Demo 9:16, macro proof, calm CTA hold. Captions on; camera locked.
2025-10-05 15:08:35 INFO: Calling FAL SD v3.5 Large for keyframe
INFO:job_1759676851_d4f695:Calling FAL SD v3.5 Large for keyframe
2025-10-05 15:08:36 INFO: [FAL]   0%|          | 0/24 [00:00<?, ?it/s]
INFO:job_1759676851_d4f695:[FAL]   0%|          | 0/24 [00:00<?, ?it/s]
2025-10-05 15:08:36 INFO: [FAL]   0%|          | 0/24 [00:00<?, ?it/s]
INFO:job_1759676851_d4f695:[FAL]   0%|          | 0/24 [00:00<?, ?it/s]
2025-10-05 15:08:36 INFO: [FAL]   0%|          | 0/24 [00:00<?, ?it/s]
INFO:job_1759676851_d4f

Keyboard interruption in main thread... closing server.
Killing tunnel 0.0.0.0:7860 <> https://1ad805a81cd1835ae8.gradio.live
