In [3]:
!pip install -U huggingface_hub pillow

Collecting huggingface_hub
  Downloading huggingface_hub-1.2.3-py3-none-any.whl.metadata (13 kB)
Collecting pillow
  Downloading pillow-12.1.0-cp312-cp312-win_amd64.whl.metadata (9.0 kB)
Collecting filelock (from huggingface_hub)
  Downloading filelock-3.20.2-py3-none-any.whl.metadata (2.1 kB)
Collecting fsspec>=2023.5.0 (from huggingface_hub)
  Using cached fsspec-2025.12.0-py3-none-any.whl.metadata (10 kB)
Collecting hf-xet<2.0.0,>=1.2.0 (from huggingface_hub)
  Downloading hf_xet-1.2.0-cp37-abi3-win_amd64.whl.metadata (5.0 kB)
Collecting shellingham (from huggingface_hub)
  Using cached shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting typer-slim (from huggingface_hub)
  Downloading typer_slim-0.21.0-py3-none-any.whl.metadata (16 kB)
Collecting click>=8.0.0 (from typer-slim->huggingface_hub)
  Using cached click-8.3.1-py3-none-any.whl.metadata (2.6 kB)
Downloading huggingface_hub-1.2.3-py3-none-any.whl (520 kB)
Downloading hf_xet-1.2.0-cp37-abi3-win_amd64.whl (2.9 

In [None]:
# CELL 1 ‚Äî Imports + Config (HF + Ollama)

import os, time, json
import requests

from typing import TypedDict, List, Literal
from langgraph.graph import StateGraph, START, END
from langchain_ollama import ChatOllama

from IPython.display import display, Markdown, Image
from pprint import pprint

from dotenv import load_dotenv
load_dotenv()

from huggingface_hub import InferenceClient

# ---- Hugging Face token (set it in env as HF_TOKEN) ----
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not HF_TOKEN:
    raise ValueError("HF_TOKEN not found in environment. Set it before running the notebook.")

HF_HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

# Uses your existing HF_TOKEN / HUGGINGFACEHUB_API_TOKEN from .env
hf_client = InferenceClient(
    provider="fal-ai",          # text-to-image is supported on fal-ai
    api_key=HF_TOKEN,
)

# ---- HF Inference endpoints ----
# Text-to-image model (choose one you have access to / that fits your usage)
HF_T2I_MODEL = "stabilityai/stable-diffusion-2-1"
HF_T2I_URL   = f"https://api-inference.huggingface.co/models/{HF_T2I_MODEL}"

# Object detection model (DETR is a solid default)
HF_OD_MODEL  = "facebook/detr-resnet-50"
HF_OD_URL   = f"https://api-inference.huggingface.co/models/{HF_OD_MODEL}"

# ---- Output folder ----
OUTPUT_DIR = "outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ---- Ollama models ----
TEXT_MODEL  = "llama3.2"


  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# CELL 2 ‚Äî State + Nodes (Prompt ‚Üí Local SDXL ‚Üí Object Detection ‚Üí Ollama Summary)

import os
import torch
from diffusers import DiffusionPipeline
from typing import TypedDict, List

# ---- Load Diffusion Model ONCE (important) ----
# NOTE: You can switch device_map to "cuda" if GPU is available
pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.bfloat16,
    device_map="cpu"
)

pipe.enable_attention_slicing()  # reduces memory footprint

# ---- Workflow State ----
class WorkflowState(TypedDict, total=False):
    topic: str
    image_prompt: str
    image_path: str
    objects: List[str]
    summary: str

# ---- Ollama LLMs ----
llm_prompt  = ChatOllama(model=TEXT_MODEL, temperature=0.5)
llm_summary = ChatOllama(model=TEXT_MODEL, temperature=0.2)

# -------------------------
# Node 1: Topic ‚Üí Image Prompt
# -------------------------
def generate_image_prompt(state: WorkflowState) -> WorkflowState:
    topic = state["topic"]

    sys = (
        "You are an expert prompt engineer for photorealistic image generation. "
        "Return ONLY one final prompt. No explanations."
    )
    user = (
        f"Topic: {topic}\n\n"
        "Write one detailed photorealistic prompt including:\n"
        "- subject and environment\n"
        "- lighting and mood\n"
        "- camera / lens style\n"
        "- ultra-detailed, cinematic realism\n"
        "Avoid: text, watermarks, logos."
    )

    resp = llm_prompt.invoke([("system", sys), ("user", user)])
    return {"image_prompt": resp.content.strip()}

# -------------------------
# Node 2: Prompt ‚Üí Image (LOCAL SDXL)
# -------------------------
def generate_image(state: WorkflowState) -> WorkflowState:
    prompt = state["image_prompt"]
    out_path = os.path.join(OUTPUT_DIR, "generated.png")

    image = pipe(prompt).images[0]
    image.save(out_path)

    return {"image_path": out_path}

# -------------------------
# Node 3: Image ‚Üí Object Detection (HF DETR ‚Äî FREE)
# -------------------------
def detect_objects(state: WorkflowState) -> WorkflowState:
    image_path = state["image_path"]

    with open(image_path, "rb") as f:
        img_bytes = f.read()

    r = requests.post(
        HF_OD_URL,
        headers=HF_HEADERS,
        data=img_bytes,
        timeout=120
    )

    r.raise_for_status()
    detections = r.json()

    labels = []
    for d in detections:
        label = d.get("label")
        if label:
            labels.append(label)

    # De-duplicate while preserving order
    seen, unique = set(), []
    for l in labels:
        if l not in seen:
            seen.add(l)
            unique.append(l)

    return {"objects": unique}

# -------------------------
# Node 4: Objects ‚Üí Final Summary
# -------------------------
def summarize(state: WorkflowState) -> WorkflowState:
    topic = state["topic"]
    objects = state.get("objects", [])

    prompt = (
        f"Topic: {topic}\n"
        f"Detected objects: {objects}\n\n"
        "Write a crisp final summary in 5 bullets:\n"
        "- what the image likely depicts\n"
        "- key objects and their roles\n"
        "- scene and atmosphere\n"
        "- notable visual details\n"
    )

    resp = llm_summary.invoke(prompt)
    return {"summary": resp.content.strip()}


Loading pipeline components...:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 3/7 [00:19<00:24,  6.13s/it]`torch_dtype` is deprecated! Use `dtype` instead!
Loading pipeline components...: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [00:38<00:00,  5.47s/it]


In [12]:
# CELL 3 ‚Äî Graph & Edges (no notebook-specific tricks here)

graph = StateGraph(WorkflowState)

graph.add_node("generate_image_prompt", generate_image_prompt)
graph.add_node("generate_image", generate_image)
graph.add_node("detect_objects", detect_objects)
graph.add_node("summarize", summarize)

graph.add_edge(START, "generate_image_prompt")
graph.add_edge("generate_image_prompt", "generate_image")
graph.add_edge("generate_image", "detect_objects")
graph.add_edge("detect_objects", "summarize")
graph.add_edge("summarize", END)

workflow = graph.compile()


In [None]:
# CELL 4 ‚Äî Run + Pretty Output + Checkpoints (ipynb-friendly)

def checkpoint(msg: str):
    display(Markdown(f"‚úÖ **Checkpoint:** {msg}"))

def show_output(initial_state: dict, final_state: dict):
    checkpoint("Rendering output started")

    display(Markdown("## üü¶ Input"))
    display(Markdown(f"**Topic:** `{initial_state.get('topic','')}`"))

    display(Markdown("## üñºÔ∏è Generated Image"))
    img_path = final_state.get("image_path")
    if img_path and os.path.exists(img_path):
        checkpoint("Image found on disk ‚Äî displaying it")
        display(Image(filename=img_path))
        display(Markdown(f"Saved at: `{img_path}`"))
    else:
        checkpoint("Image not found / not generated")
        display(Markdown("_Image not available (generation failed or file missing)._"))

    display(Markdown("## üß© Detected Objects"))
    objs = final_state.get("objects", [])
    if objs:
        checkpoint(f"Objects detected ‚Äî count: {len(objs)}")
        display(Markdown("\n".join([f"- {o}" for o in objs])))
    else:
        checkpoint("No objects detected (or detection node did not populate state)")
        display(Markdown("_No objects detected._"))

    display(Markdown("## üìù Summary"))
    summary = final_state.get("summary", "")
    if summary:
        checkpoint("Summary present ‚Äî rendering it")
        display(Markdown(summary.replace("\n", "  \n")))
    else:
        checkpoint("No summary found in final_state")
        display(Markdown("_No summary._"))

    checkpoint("Rendering output completed")


# ---- Execute with execution checkpoints ----
checkpoint("About to invoke workflow")

initial_state = {"topic": "Bengal tiger walking through tall grass at sunrise, golden light, shallow depth"}
checkpoint("Initial state created")

final_state = workflow.invoke(initial_state)

checkpoint("Workflow invocation completed ‚Äî now displaying results")
show_output(initial_state, final_state)

# Optional: also inspect raw keys/state
checkpoint("Displaying raw state keys")
display(Markdown("### üîé Raw state keys"))
pprint(list(final_state.keys()))
