In [None]:
# %% [0] (optional) allow big uploads + quiet analytics
import os
os.environ.setdefault("GRADIO_MAX_UPLOAD_SIZE", "512")  # MB
os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "0")


In [None]:
# %% [1] Imports, env, model
import os, json, time, html, re
from dotenv import load_dotenv
from PIL import Image, ImageOps, ImageDraw
import gradio as gr

load_dotenv("api.env")
API_KEY  = os.getenv("MOONDREAM_API_KEY", "").strip()
ENDPOINT = os.getenv("MOONDREAM_ENDPOINT", "http://localhost:2020/v1").strip()

import moondream as md
MODEL  = md.vl(api_key=API_KEY) if API_KEY else md.vl(endpoint=ENDPOINT)
SOURCE = "moondream-cloud" if API_KEY else "moondream-local"


In [None]:
# %% [2] Helpers
def exif_rgb(img: Image.Image) -> Image.Image:
    return ImageOps.exif_transpose(img.convert("RGB"))

def to_px_box(obj, w, h):
    return [int(obj["x_min"]*w), int(obj["y_min"]*h), int(obj["x_max"]*w), int(obj["y_max"]*h)]

def to_px_point(p, w, h):
    return [int(p["x"]*w), int(p["y"]*h)]

def jdump(obj) -> str:
    import json
    return json.dumps(obj, indent=2, ensure_ascii=False)

def make_code_details(json_text: str) -> str:
    return f"""
    <details class="code-acc">
      <summary>&lt;&gt; Show Code</summary>
      <pre class="code-pre">{html.escape(json_text)}</pre>
    </details>
    """

# styled overlays (playground-like)
def draw_overlay(base: Image.Image, detections=None, points=None) -> Image.Image:
    detections, points = detections or [], points or []
    canvas = base.convert("RGBA")
    overlay = Image.new("RGBA", canvas.size, (0,0,0,0))
    d = ImageDraw.Draw(overlay)

    # boxes
    for det in detections:
        x1,y1,x2,y2 = [int(v) for v in det["box"]]
        d.rectangle([x1,y1,x2,y2], outline=(239,68,68,255), width=3)

    # points
    for pt in points:
        x,y = [int(v) for v in pt["xy"]]
        d.ellipse([x-16,y-16,x+16,y+16], fill=(59,130,246,70))
        d.ellipse([x-11,y-11,x+11,y+11], outline=(59,130,246,255), width=3)
        d.ellipse([x-7,y-7,x+7,y+7], outline=(255,255,255,220), width=2)
        d.ellipse([x-3,y-3,x+3,y+3], fill=(255,255,255,255))

    return Image.alpha_composite(canvas, overlay).convert("RGB")

# output sections
def _panel_sections(mode: str, sections: dict[str,str], meta: dict, extra_chip=None) -> str:
    chips = [f"<span class='chip'>{mode.upper()}</span>"]
    if extra_chip: chips.append(f"<span class='chip chip--muted'>{html.escape(extra_chip)}</span>")
    rt_html = f"<div class='meta'>Response time: {int(meta.get('ms',0))}ms</div>"
    parts = [f"<div class='chips'>{''.join(chips)}</div>"]
    for title, body in sections.items():
        if body:
            parts.append(f"<div class='section-h'>{title}</div><div class='body'>{html.escape(body)}</div>")
    parts.append(rt_html)
    return f"<div class='panel'>{''.join(parts)}</div>"


In [None]:
# %% [3] Caption helper
def caption_with_length(image: Image.Image, length: str):
    t0 = time.time()
    try:
        text = MODEL.caption(image, length=length)["caption"]
    except Exception:
        base = MODEL.caption(image)["caption"]
        if length=="short":
            prompt=f"Rewrite into 1 sentence: {base}"
        elif length=="normal":
            prompt=f"Rewrite into 3 sentences, single paragraph: {base}"
        else:
            prompt=f"Expand into two paragraphs (4–6 sentences) based on: {base}"
        try:
            text = MODEL.query(image,prompt)["answer"]
        except Exception: text=base
    meta={"ms":int((time.time()-t0)*1000),"source":SOURCE,"length":length}
    return text,meta


In [None]:
# %% [4] Core run
def run(mode, img, question, reasoning, cap_len, labels, progress=gr.Progress(track_tqdm=True)):
    def _updates(img_val=None, panel="", code="", show=False):
        return gr.update(value=img_val,visible=show),gr.update(value=panel,visible=show),gr.update(value=code,visible=show)

    if img is None: return _updates(show=False)
    base=exif_rgb(img); W,H=base.size
    try:
        # Query
        if mode=="Query":
            q=(question or "").strip()
            sections={"PROMPT":q}
            if reasoning and q:
                rationale=MODEL.query(base,"Explain reasoning for: "+q)["answer"]
                sections["REASONING"]=rationale
            ans=MODEL.query(base,q)["answer"] if q else "No question."
            sections["RESULT"]=ans
            meta={"ms":1,"source":SOURCE}
            return _updates(base,_panel_sections("query",sections,meta),make_code_details(jdump(sections)),True)

        # Caption
        if mode=="Caption":
            txt,meta=caption_with_length(base,cap_len)
            sections={"RESULT":txt}
            return _updates(base,_panel_sections("caption",sections,meta,extra_chip=f"LENGTH:{cap_len}"),make_code_details(jdump(sections)),True)

        # Point
        if mode=="Point":
            pts=MODEL.point(base,labels or "object")["points"]
            overlay=draw_overlay(base,points=[{"xy":to_px_point(p,W,H)} for p in pts])
            sections={"RESULT":f"Found {len(pts)} point(s) for '{labels}'"}
            meta={"ms":1,"source":SOURCE}
            return _updates(overlay,_panel_sections("point",sections,meta),make_code_details(jdump(sections)),True)

        # Detect
        lbs=[t.strip() for t in (labels or "").split(",") if t.strip()] or ["object"]
        dets=[]; 
        for lb in lbs: dets+=MODEL.detect(base,lb)["objects"]
        overlay=draw_overlay(base,detections=[{"label":d["label"],"box":to_px_box(d,W,H)} for d in dets])
        sections={"RESULT":f"Detections for {lbs} ({len(dets)} total)"}
        meta={"ms":1,"source":SOURCE}
        return _updates(overlay,_panel_sections("detect",sections,meta),make_code_details(jdump(sections)),True)
    except Exception as e:
        return _updates(base,f"<div>Error:{e}</div>",make_code_details(jdump({"error":str(e)})),True)


In [None]:
# %% [5] CSS
custom_css = r"""
.play-card { background:#fff; border-radius:14px; box-shadow:0 10px 24px rgba(2,6,23,.06); padding:16px; }
.prompt-surface { display:grid; grid-template-rows:1fr auto; border:1px solid #e5e7eb; border-radius:12px; }
.prompt-input textarea{ border:0!important; padding:14px 16px!important; resize:vertical; min-height:160px; }
.prompt-footer{ display:grid; grid-template-columns:1fr auto auto; align-items:center; gap:10px; padding:6px 10px; border-top:1px solid #e5e7eb; background:#EEF0F3; }
.mode-strip{ display:inline-flex; gap:6px; }
.chip{ background:#f3f4f6; border:1px solid #d1d5db; padding:3px 8px; font-size:12px; border-radius:4px; }
.section-h{ font-weight:700; font-size:12px; text-transform:uppercase; margin:8px 0 4px; }
.panel .body{ font-size:15px!important; line-height:1.6; }
.results-heading h2{ font-size:20px!important; font-weight:700!important; margin:20px 0 12px!important; }
footer{visibility:hidden}
"""


In [None]:
# %% [6] UI
with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("## Playground")
    with gr.Row(elem_classes=["play-card"]):
        with gr.Column(scale=1):
            img_in=gr.Image(type="pil",sources=["upload"],image_mode="RGB",height=320,show_label=False)
        with gr.Column(scale=2):
            with gr.Group(elem_classes=["prompt-surface"]):
                with gr.Group(visible=False) as prompt_wrap:
                    question=gr.Textbox(placeholder="Enter a question…",lines=7,show_label=False,container=False,elem_classes=["prompt-input"])
                with gr.Row(elem_classes=["prompt-footer"]):
                    mode_radio=gr.Radio(["Caption","Query","Point","Detect"],value="Caption",show_label=False,container=False,interactive=True,elem_classes=["mode-strip"])
                    with gr.Row(visible=False,elem_classes=["switch"]) as reasoning_wrap:
                        reasoning=gr.Checkbox(label="Reasoning",value=False)
                    submit_btn=gr.Button("➜",elem_classes=["submit-arrow"])
            with gr.Row(visible=True) as opts_caption:
                cap_len=gr.Radio([("Short","short"),("Normal","normal"),("Long","long")],value="normal",label="Caption length")

    gr.Markdown("## Results",elem_classes=["results-heading"])
    with gr.Row(elem_classes=["play-card"]):
        with gr.Column(scale=1):
            out_img=gr.Image(height=320,visible=False)
            code_html=gr.HTML(visible=False)
        with gr.Column(scale=1):
            out_panel=gr.HTML(visible=False)

    def _on_mode_change(mode):
        show_prompt=mode in ("Query","Point","Detect")
        ph={"Query":"Enter a question…","Point":"Type the object","Detect":"Comma labels","Caption":""}[mode]
        return gr.update(visible=show_prompt),gr.update(placeholder=ph),gr.update(visible=(mode=="Caption")),gr.update(visible=(mode=="Query"))
    mode_radio.change(_on_mode_change,mode_radio,[prompt_wrap,question,opts_caption,reasoning_wrap])

    def _run_bridge(mode,img,text,reasoning_val,cap_len):
        if mode!="Query": reasoning_val=False
        labels=text if mode in ("Point","Detect") else ""
        return run(mode,img,text,reasoning_val,cap_len,labels)
    submit_btn.click(_run_bridge,[mode_radio,img_in,question,reasoning,cap_len],[out_img,out_panel,code_html])
    question.submit(_run_bridge,[mode_radio,img_in,question,reasoning,cap_len],[out_img,out_panel,code_html])

demo.launch(inline=True,show_error=True,server_name="127.0.0.1")
