In [1]:
# %% [A] Imports, env, model (safe to re-run)
import os, json, time, traceback, html
from dotenv import load_dotenv
from PIL import Image, ImageDraw, ImageOps, __version__ as PIL_VERSION
import gradio as gr

# Large uploads (optional)
os.environ.setdefault("GRADIO_MAX_UPLOAD_SIZE", "512")
os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "0")

load_dotenv("api.env")
API_KEY  = os.getenv("MOONDREAM_API_KEY", "").strip()
ENDPOINT = os.getenv("MOONDREAM_ENDPOINT", "http://localhost:2020/v1").strip()

import moondream as md
MODEL  = md.vl(api_key=API_KEY) if API_KEY else md.vl(endpoint=ENDPOINT)
SOURCE = "moondream-cloud" if API_KEY else "moondream-local"

def exif_rgb(img: Image.Image) -> Image.Image:
    return ImageOps.exif_transpose(img.convert("RGB"))

def jdump(obj) -> str:
    return json.dumps(obj, indent=2, ensure_ascii=False)

def diagnostics(last_exc: Exception | None = None) -> str:
    md_ver = getattr(md, "__version__", "unknown")
    lines = [
        f"Source: {SOURCE}",
        f"Pillow: {PIL_VERSION}",
        f"Moondream pkg: {md_ver}",
        f"Key set? {'yes' if API_KEY else 'no'}",
        f"Endpoint: {ENDPOINT if not API_KEY else '(cloud)'}",
        f"GRADIO_MAX_UPLOAD_SIZE: {os.getenv('GRADIO_MAX_UPLOAD_SIZE','(default)')} MB",
    ]
    if last_exc:
        tb = "".join(traceback.format_exception_only(type(last_exc), last_exc)).strip()
        lines.append(f"Last error: {tb}")
    return "```\n" + "\n".join(lines) + "\n```"


In [2]:
# %% [B] Formatter + core logic (returns 3 outputs; no container updates)
import html as _html

def _panel_html(mode: str, text: str, meta: dict, extra_chip: str | None = None) -> str:
    chips = [f"<span class='chip'>{mode.upper()}</span>"]
    if extra_chip:
        chips.append(f"<span class='chip chip--muted'>{_html.escape(extra_chip)}</span>")
    rt_ms = meta.get("ms")
    rt_html = f"<div class='meta'>Response time: {int(rt_ms)}ms</div>" if isinstance(rt_ms, (int, float)) else ""
    return f"""
    <div class="panel">
      <div class="chips">{''.join(chips)}</div>
      <div class="subhead">RESULT</div>
      <div class="result-text">{_html.escape(text).replace('\\n','<br>')}</div>
      {rt_html}
    </div>
    """

def run(mode, img, question, reasoning, cap_len, labels, progress=gr.Progress(track_tqdm=True)):
    """
    Returns: (display_image, right_panel_html, json_str)
    """
    if img is None:
        data = {"error": "no_image"}
        right = _panel_html("notice", "Please upload an image.", {"ms": 0})
        return None, right, json.dumps(data, indent=2)

    try:
        progress(0.1, desc="Preparing image")
        base = ImageOps.exif_transpose(img.convert("RGB"))

        if mode == "Query":
            q = (question or "").strip()
            if not q:
                data = {
                    "mode": "query",
                    "text": "",
                    "suggestions": [
                        "What objects are most prominent?",
                        "What is the likely setting or environment?",
                        "Are any brand names or labels visible?",
                    ],
                    "meta": {"source": SOURCE, "ms": 0},
                }
                right = _panel_html("query", "Suggestions: " + " • ".join(data["suggestions"]), data["meta"])
                return base, right, json.dumps(data, indent=2)

            if reasoning:
                q += " Think carefully and justify briefly."
            progress(0.35, desc="Sending query")
            t0 = time.time()
            ans = MODEL.query(base, q)["answer"]
            meta = {"ms": int((time.time() - t0) * 1000), "source": SOURCE}
            right = _panel_html("query", ans, meta)
            return base, right, json.dumps({"mode": "query", "text": ans, "meta": meta}, indent=2)

        if mode == "Caption":
            progress(0.35, desc="Generating caption")
            t0 = time.time()
            cap = MODEL.caption(base)["caption"]
            if cap_len in ("normal", "long"):
                try:
                    progress(0.6, desc="Refining")
                    target = "two to three" if cap_len == "normal" else "four to six"
                    cap = MODEL.query(base, f"Rewrite the caption into {target} sentences. Caption: {cap}")["answer"]
                except Exception:
                    pass
            meta = {"ms": int((time.time() - t0) * 1000), "source": SOURCE, "length": cap_len}
            right = _panel_html("caption", cap, meta, extra_chip=f"LENGTH: {cap_len.upper()}")
            return base, right, json.dumps({"mode": "caption", "text": cap, "meta": meta}, indent=2)

        if mode == "Point":
            label = (labels or "object").strip()
            progress(0.4, desc=f"Finding '{label}'")
            t0 = time.time()
            pts = MODEL.point(base, label)["points"]
            meta = {"ms": int((time.time() - t0) * 1000), "source": SOURCE, "points": len(pts)}
            summary = f"Found {len(pts)} point(s) for '{label}'."
            right = _panel_html("point", summary, meta)
            return base, right, json.dumps({"mode": "point", "points": pts, "meta": meta}, indent=2)

        # Detect
        lbs = [t.strip() for t in (labels or "").split(",") if t.strip()] or ["object"]
        progress(0.4, desc="Detecting")
        t0 = time.time()
        all_dets = []
        for lb in lbs:
            objs = MODEL.detect(base, lb)["objects"]
            for o in objs:
                all_dets.append({"label": lb, "box": [o["x_min"], o["y_min"], o["x_max"], o["y_max"]]})
        meta = {"ms": int((time.time() - t0) * 1000), "source": SOURCE, "detections": len(all_dets)}
        summary = f"Detections for: {', '.join(lbs)} ({len(all_dets)} total)."
        right = _panel_html("detect", summary, meta)
        return base, right, json.dumps({"mode": "detect", "detections": all_dets, "meta": meta}, indent=2)

    except Exception as e:
        err = {"error": str(e)}
        right = _panel_html("error", f"Runtime error: {e}", {"ms": 0})
        return base, right, json.dumps(err, indent=2)


In [3]:
# %% [C] UI — segmented "Mode" + right-side Reasoning; Show Code HTML under image
import gradio as gr
import html as _html

custom_css = """
.play-card { background:#fff; border-radius:14px; box-shadow:0 10px 24px rgba(2,6,23,.06); padding:16px; }
.panel .chips { margin-bottom:8px; }
.chip { display:inline-block; font-weight:700; font-size:12px; letter-spacing:.02em;
        padding:4px 10px; border-radius:999px; background:#eef2ff; color:#4338ca; margin-right:6px; }
.chip--muted { background:#f1f5f9; color:#334155; }
.panel .subhead { font-weight:800; color:#374151; letter-spacing:.03em; margin:4px 0 8px; }
.panel .result-text { color:#111827; line-height:1.55; }
.panel .meta { color:#6b7280; font-size:12px; margin-top:12px; }

/* One-line bar with segmented mode on left, Reasoning on right */
#modebar { display:flex; align-items:center; gap:12px; }
#mode-left  { flex: 1 1 auto; }
#mode-right { display:flex; justify-content:flex-end; align-items:center; }

/* Segmented control look for Radio */
#segmented .wrap { display:inline-flex; border:1px solid #e5e7eb; border-radius:9999px; overflow:hidden; }
#segmented .wrap label { margin:0; }
#segmented input[type="radio"] { display:none; }
#segmented .wrap .item { padding:6px 12px; font-weight:600; cursor:pointer; user-select:none; }
#segmented input[type="radio"]:checked + .item { background:#eef2ff; color:#4338ca; }
#segmented .wrap .item:hover { background:#f8fafc; }
.code-details summary {
  cursor: pointer; list-style: none; font-weight: 700; color: #334155;
  padding: 8px 10px; border-radius: 8px; background: #f1f5f9; display: inline-block;
}
.code-details pre {
  background: #0b1020; color: #e5e7eb; padding: 12px; border-radius: 10px; overflow:auto; max-height: 320px;
  font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; font-size: 12.5px; line-height: 1.4; margin-top: 10px;
}
"""

def _details_for_json(json_str: str) -> str:
    if not json_str:
        return ""
    return f"""
    <details class="code-details">
      <summary>&lt;&gt; Show Code</summary>
      <pre>{_html.escape(json_str)}</pre>
    </details>
    """

with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("## Results")

    # Input row
    with gr.Row(elem_classes=["play-card"]):
        with gr.Column(scale=1):
            img_in = gr.Image(type="pil", sources=["upload"], label="Image", height=360)

        with gr.Column(scale=1):
            with gr.Row(elem_id="modebar"):
                with gr.Column(elem_id="mode-left"):
                    # Segmented radio (acts like tabs, but simpler + safer)
                    mode = gr.Radio(
                        ["Query", "Caption", "Point", "Detect"], value="Caption", label=None, elem_id="segmented"
                    )
                with gr.Column(elem_id="mode-right", min_width=160):
                    reasoning = gr.Checkbox(label="Reasoning", value=False)

            # Mode-specific controls (toggle visibility)
            question = gr.Textbox(label=None, placeholder="Enter a question…", lines=2, visible=False)
            with gr.Row(visible=True) as caption_row:
                cap_len = gr.Radio(
                    choices=[("Short","short"),("Normal","normal"),("Long","long")],
                    value="normal", label=None
                )
            with gr.Row(visible=False) as point_row:
                point_label = gr.Textbox(value="object", label=None, placeholder="Object label (e.g., 'grapes')")
            with gr.Row(visible=False) as detect_row:
                detect_labels = gr.Textbox(value="object", label=None,
                                           placeholder="Comma-separated labels (e.g., 'grapes, banana')")

            # Suggestions for Query
            with gr.Row(visible=False) as query_sugs:
                gr.Button("What objects are most prominent?").click(
                    lambda: "What objects are most prominent?", None, [question]
                )
                gr.Button("What is the likely setting or environment?").click(
                    lambda: "What is the likely setting or environment?", None, [question]
                )
                gr.Button("Are any brand names or labels visible?").click(
                    lambda: "Are any brand names or labels visible?", None, [question]
                )

            def _toggle(m):
                return (
                    gr.update(visible=(m == "Query")),    # question
                    gr.update(visible=(m == "Caption")),  # caption_row
                    gr.update(visible=(m == "Point")),    # point_row
                    gr.update(visible=(m == "Detect")),   # detect_row
                    gr.update(visible=(m == "Query")),    # query_sugs
                )
            mode.change(_toggle, mode, [question, caption_row, point_row, detect_row, query_sugs])

            run_btn = gr.Button("Run", variant="primary")

    # Result card
    with gr.Row(elem_classes=["play-card"]):
        with gr.Column(scale=1):
            out_img = gr.Image(label=None, height=360)
            code_html = gr.HTML(value="")   # stays empty until a result; sits UNDER the image
        with gr.Column(scale=1):
            out_panel = gr.HTML()

    # Clear code when a new image is chosen
    img_in.change(lambda _: "", inputs=img_in, outputs=[code_html])

    # Bridge labels based on current mode and call run(...)
    def _run_bridge(m, img, q, reason, cap_len, point_label, detect_labels, progress=gr.Progress(track_tqdm=True)):
        labels = point_label if m == "Point" else detect_labels
        img_out, panel_html, json_str = run(m, img, q, reason, cap_len, labels, progress)
        details = _details_for_json(json_str) if img_out is not None else ""
        return img_out, panel_html, details

    run_btn.click(
        _run_bridge,
        [mode, img_in, question, reasoning, cap_len, point_label, detect_labels],
        [out_img, out_panel, code_html],
    )


In [4]:
# %% [D] Launch inline
demo.queue().launch(inline=True, show_error=True)


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


