# Pipeline walkthrough: stepwise execution of `scripts/run_pipeline.py`

Use this notebook to run the LangGraph nodes one-by-one: planner → segmentation → inpaint → realism critique. It mirrors `scripts/run_pipeline.py` but lets you inspect or modify the state at each checkpoint.


In [39]:
import os
import sys
from pathlib import Path

from dotenv import load_dotenv
from omegaconf import OmegaConf

# Ensure repo root on sys.path for local imports
# Use cwd (or notebook path upwards) since __file__ is not defined in notebooks
ROOT = Path.cwd().resolve()
# Search upwards for the repo root, if needed (e.g. by presence of a marker file/folder)
# For now assume repo root is parent of cwd, or customize as needed:
if (ROOT / "src").exists():
    repo_root = ROOT
elif (ROOT.parent / "src").exists():
    repo_root = ROOT.parent
else:
    repo_root = ROOT  # fallback: use cwd, but this may need tuning

sys.path.insert(0, str(repo_root))

from src.integrations.openai_client import OpenAIPlanner
from src.integrations.replicate_client import ReplicateClient, ReplicateModels
from src.workflow.nodes.planning import plan_edit_node
from src.workflow.nodes.segmentation import segment_object_node
from src.workflow.nodes.generation import inpaint_node
from src.workflow.nodes.criticism import check_realism_node
from src.workflow.state import AgentState
from src.utils.logging import configure_logging
from src.utils.paths import ensure_dir



In [53]:
# Load environment and configure logging
load_dotenv()
configure_logging(os.getenv("LOG_LEVEL", "INFO"))



In [54]:
# Load configs with OmegaConf, but resolve Hydra-style paths manually (no hydra resolver here)
main_cfg = OmegaConf.load(repo_root / "configs" / "main.yaml")

# defaults list may include agents/tools names
defaults = OmegaConf.to_container(main_cfg.get("defaults", []), resolve=False)
agents_name = defaults[0].get("agents", "default") if defaults else "default"
tools_name = defaults[1].get("tools", "default") if len(defaults) > 1 else "default"

# Manual project path resolution (Hydra ${hydra:runtime.cwd} not available in notebook)
project_cfg = OmegaConf.to_container(main_cfg.project, resolve=False)
data_root = project_cfg.get("data_root") or str(repo_root / "data")
raw_dir = Path(project_cfg.get("raw_dir") or Path(data_root) / "01_raw")
mask_dir = Path(project_cfg.get("mask_dir") or Path(data_root) / "02_masks")
cf_dir = Path(project_cfg.get("counterfactual_dir") or Path(data_root) / "03_counterfactuals")

# Workflow as plain dict
workflow_cfg = OmegaConf.to_container(main_cfg.workflow, resolve=False)
workflow = {
    "target_attribute": workflow_cfg.get("target_attribute", "safety"),
    "input_dir": workflow_cfg.get("input_dir") or str(raw_dir),
    "max_attempts": workflow_cfg.get("max_attempts", 3),
    "openai_model": workflow_cfg.get("openai_model", "gpt-4o-mini"),
    "concurrency": workflow_cfg.get("concurrency", 1),
    "realism_threshold": workflow_cfg.get("realism_threshold", 0.5),
}

# Load agents/tools with full resolution
agents_cfg = OmegaConf.to_container(
    OmegaConf.load(repo_root / "configs" / "agents" / f"{agents_name}.yaml"), resolve=True
)
tools_cfg = OmegaConf.to_container(
    OmegaConf.load(repo_root / "configs" / "tools" / f"{tools_name}.yaml"), resolve=True
)

print("workflow:", workflow)
print(f"Workflow target: {workflow['target_attribute']}")
print(f"Raw dir: {raw_dir}")


workflow: {'target_attribute': 'safety', 'input_dir': '${project.raw_dir}', 'max_attempts': 3, 'openai_model': 'gpt-4o-mini', 'concurrency': 1, 'realism_threshold': 0.5}
Workflow target: safety
Raw dir: ${project.data_root}/01_raw


In [55]:
# Instantiate clients
openai_model = workflow["openai_model"]
planner_prompt = agents_cfg["planner_prompt"]
critic_prompt = agents_cfg["critic_prompt"]
planner = OpenAIPlanner(model=openai_model, planner_prompt=planner_prompt, critic_prompt=critic_prompt)

replicate_models = ReplicateModels.from_config(tools_cfg["replicate"])
replicate_client = ReplicateClient(models=replicate_models, download_timeout=tools_cfg["storage"]["download_timeout"])

print("Using OpenAI model:", openai_model)
print("Replicate models:", replicate_models)



Using OpenAI model: gpt-4o-mini
Replicate models: ReplicateModels(dino_model='adirik/grounding-dino:latest', sam_model='meta/sam-2:fe97b453a6455861e3bac769b441ca1f1086110da7466dbb65cf1eecfd60dc83', inpaint_model='black-forest-labs/flux-fill-pro:latest', nano_banana_model='', mock=False)


In [56]:
target_attribute

'safety'

In [57]:
# Select an input image
ensure_dir(raw_dir)
images = sorted(raw_dir.glob("*.png")) + sorted(raw_dir.glob("*.jpg"))

image_path = '/Users/admin/counter-factual-streetview/data/01_raw/image.png'
print("Selected image:", image_path)

target_attribute = workflow["target_attribute"]
state: AgentState = {
    "image_path": str(image_path),
    "target_attribute": target_attribute,
}
state


Selected image: /Users/admin/counter-factual-streetview/data/01_raw/image.png


{'image_path': '/Users/admin/counter-factual-streetview/data/01_raw/image.png',
 'target_attribute': 'safety'}

In [58]:
# Step 1: Planner
state.update(plan_edit_node(state, planner=planner, target_attribute=target_attribute))
state


2025-12-21 16:31:36,420 | INFO | httpx | HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


{'image_path': '/Users/admin/counter-factual-streetview/data/01_raw/image.png',
 'target_attribute': 'safety',
 'edit_plan': 'Add more clearly marked and illuminated crosswalks with reflective materials',
 'target_object': 'crosswalk marking',
 'attempts': 1}

In [None]:
from replicate import Client
client = Client()
# Upload image to a temporary URL
image_url = client.upload_file(image_path) # This returns a URL



AttributeError: 'Client' object has no attribute 'upload_file'

In [None]:
from replicate import Client
client = Client()
# Upload image to a temporary URL
image_url = client.upload_file(image_path) # This returns a URL
output = client.run(
    "schananas/grounded_sam:ee871c19efb1941f55f66a3d7d960428c8a5afcb77449547fe8e5a3ab9ebc21c",
    input={
        "adjustment_factor": 0,
        "image": "https://replicate.delivery/pbxt/OH0zu1v6XhSafOTTabcOV5rnfPd0H5AcFMWfYRo3bydeIm5J/IMG_3827.jpeg",
        "mask_prompt": "dog",
        "negative_mask_prompt": "bag"
    }
)

# The schananas/grounded_sam model can stream output as it's running.
# The predict method returns an iterator, and you can iterate over that output.
for item in output:
    # https://replicate.com/schananas/grounded_sam/api#output-schema
    print(item)

NameError: name 'replicate' is not defined

In [None]:
# Step 3: Inpaint
ensure_dir(cf_dir)
state.update(inpaint_node(state, replicate_client=replicate_client, output_dir=cf_dir))
state


In [None]:
# Step 4: Critic / realism
state.update(
    check_realism_node(
        state,
        planner=planner,
        replicate_client=replicate_client,
        realism_threshold=workflow.realism_threshold,
    )
)
state


## Tips
- Set env vars before running: `OPENAI_API_KEY`, `REPLICATE_API_TOKEN`, optional `LOG_LEVEL`.
- You can rerun cells 6–9 iteratively to refine prompts or swap model IDs in `configs/tools/default.yaml` without restarting the kernel (just re-run cell 4 if config changes).
- Masks and edits are written to `data/02_masks/` and `data/03_counterfactuals/` respectively; inspect them after each run.
- If Replicate returns 422/429, the client falls back to mock outputs; adjust credit/access or model versions to get real generations (see `configs/tools/default.yaml`).
