In [172]:
import os
from pathlib import Path
from typing import TypedDict, Optional
from PIL import Image
from ultralytics import YOLO
from transformers import CLIPProcessor, CLIPModel
from langgraph.graph import StateGraph, START, END
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import json

from src.utils.path import from_root
from src.yolo.yolo import Yolo
from src.llm_caller import LLMCaller
from src.llm_detector import Detector
from src.edge_validator import EdgeValidator


In [173]:
# === Ensure working directory is project root ===
while Path.cwd().name.lower() != "aiml25-exam" and "aiml25-exam" in str(Path.cwd()).lower():
    os.chdir("..")
print(f"Working directory set to: {Path.cwd()}")

# === Initialize models ===
yolo = Yolo(YOLO(from_root("models/yolo-trained.pt")))
llm = LLMCaller(
    api_key=os.getenv("WX_API_KEY"),
    project_id=os.getenv("WX_PROJECT_ID"),
    api_url="https://us-south.ml.cloud.ibm.com",
    model_id="watsonx/meta-llama/llama-3-2-90b-vision-instruct",
    params={}
)
detector = Detector(llm, yolo)


Working directory set to: /Users/emilstausbol/Documents/GitHub/AIML25-Exam


In [174]:
# === State ===
class ImageState(TypedDict):
    image_path: str
    verbose: bool
    is_diagram: Optional[bool]
    classification_confidence: Optional[float]
    diagram_detections: Optional[list]
    cropped_images: Optional[list[str]]
    graph: Optional[object]
    validation_results: Optional[object]

In [178]:
# === Nodes ===
def read_image(state: ImageState):
    if state["verbose"]:
        print(f"🔍 Reading image: {state['image_path']}")
    return {}

def classify_image(state: ImageState):
    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    image = Image.open(state["image_path"]).convert("RGB")
    inputs = processor(text=["a diagram", "not a diagram"], images=image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    probs = outputs.logits_per_image.softmax(dim=1)
    is_diagram = probs.argmax().item() == 0
    confidence = probs.max().item()
    if state["verbose"]:
        print(f"🧠 Image classification: {'Diagram' if is_diagram else 'Not a diagram'} (confidence: {confidence:.2f})")
    return {"is_diagram": is_diagram, "classification_confidence": confidence}

def route_decision(state: ImageState) -> str:
    return "run_yolo_detection" if state["is_diagram"] else "handle_non_diagram"

def handle_non_diagram(state: ImageState):
    print("🚫 Not a diagram. Ending pipeline.")
    return {}

def run_yolo_detection(state: ImageState):
    if state["verbose"]:
        print("📦 Running YOLO detection...")
    yolo.predict(state["image_path"])
    detections = yolo.getBBoxes()

    # 🔁 Fix: normalize keys for downstream steps
    for det in detections:
        det["coords"] = det.pop("xyxy")

    return {"diagram_detections": detections}

def crop_diagram(state: ImageState):
    if state["verbose"]:
        print("✂️ Cropping diagram parts...")
    detections = state.get("diagram_detections", [])
    os.makedirs("tmp", exist_ok=True)
    image = Image.open(state["image_path"]).convert("RGB")
    image_paths = []
    for idx, det in enumerate(detections):
        x1, y1, x2, y2 = map(int, det["coords"])
        cropped = image.crop((x1, y1, x2, y2))
        path = f"tmp/cropped_{idx+1}.png"
        cropped.save(path)
        image_paths.append(path)
        if state["verbose"]:
            print(f"💾 Saved: {path}")
    return {"cropped_images": image_paths}

def detect_nodes(state: ImageState):
    if state["verbose"]:
        print("🔍 Detecting nodes...")
    detector.initiate_image(state["image_path"])
    detector.detect_nodes()
    return {}

def detect_edges(state: ImageState):
    if state["verbose"]:
        print("🔗 Detecting edges...")
    detector.detect_edges()
    return {}

def create_graph(state: ImageState):
    if state["verbose"]:
        print("📊 Creating graph...")
    return {"graph": detector.get_graph()}

def validate_edges(state: ImageState):
    if state["verbose"]:
        print("✅ Validating edges...")
    validator = EdgeValidator.from_json_file(from_root("datasets/test/json/4.json"), detector.get_graph().edges)
    return {"validation_results": validator.validate()}


In [179]:
# === Graph ===
graph = StateGraph(ImageState)
graph.add_node("read_image", read_image)
graph.add_node("classify_image", classify_image)
graph.add_node("handle_non_diagram", handle_non_diagram)
graph.add_node("run_yolo_detection", run_yolo_detection)
graph.add_node("crop_diagram", crop_diagram)
graph.add_node("detect_nodes", detect_nodes)
graph.add_node("detect_edges", detect_edges)
graph.add_node("create_graph", create_graph)
graph.add_node("validate_edges", validate_edges)

graph.add_edge(START, "read_image")
graph.add_edge("read_image", "classify_image")
graph.add_conditional_edges("classify_image", route_decision, {
    "run_yolo_detection": "run_yolo_detection",
    "handle_non_diagram": "handle_non_diagram"
})
graph.add_edge("run_yolo_detection", "crop_diagram")
graph.add_edge("crop_diagram", "detect_nodes")
graph.add_edge("detect_nodes", "detect_edges")
graph.add_edge("detect_edges", "create_graph")
graph.add_edge("create_graph", "validate_edges")
graph.add_edge("validate_edges", END)
graph.add_edge("handle_non_diagram", END)

compiled_graph = graph.compile()

In [180]:
# === Run ===
image_path = str(from_root("datasets/test/images/4.png"))
initial_state = {"image_path": image_path, "verbose": True}
result = compiled_graph.invoke(initial_state)

print("\n📦 Final state:")
for k, v in result.items():
    if isinstance(v, list):
        print(f"- {k}: {len(v)} items")
    else:
        print(f"- {k}: {v}")

🔍 Reading image: /Users/emilstausbol/Documents/GitHub/AIML25-Exam/datasets/test/images/4.png
🧠 Image classification: Diagram (confidence: 0.64)
📦 Running YOLO detection...

image 1/1 /Users/emilstausbol/Documents/GitHub/AIML25-Exam/datasets/test/images/4.png: 384x640 7 sub-flows, 969.0ms
Speed: 6.6ms preprocess, 969.0ms inference, 10.0ms postprocess per image at shape (1, 3, 384, 640)
✂️ Cropping diagram parts...
💾 Saved: tmp/cropped_1.png
💾 Saved: tmp/cropped_2.png
💾 Saved: tmp/cropped_3.png
💾 Saved: tmp/cropped_4.png
💾 Saved: tmp/cropped_5.png
💾 Saved: tmp/cropped_6.png
💾 Saved: tmp/cropped_7.png
🔍 Detecting nodes...

image 1/1 /Users/emilstausbol/Documents/GitHub/AIML25-Exam/datasets/test/images/4.png: 384x640 7 sub-flows, 900.6ms
Speed: 1.9ms preprocess, 900.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
totale images: 7
🔗 Detecting edges...


InstructorRetryException: 1 validation error for EdgeResponse
  Input should be an object [type=model_type, input_value=[3], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/model_type

In [141]:
def show_image_with_boxes(image_path, detections):
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches
    image = Image.open(image_path).convert("RGB")
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.imshow(image)
    for det in detections:
        x1, y1, x2, y2 = det["coords"]
        rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        ax.text(x1, y1 - 5, f"{det['label']} ({det['confidence']:.2f})", color='white', backgroundcolor='red')
    plt.axis("off")
    plt.show()

if result.get("is_diagram") and "diagram_detections" in result:
    show_image_with_boxes(image_path, result["diagram_detections"])

In [142]:
# Optional: Show YOLO detections
if result.get("is_diagram") and result.get("diagram_detections"):
    def show_image_with_boxes(image_path, detections):
        image = Image.open(image_path).convert("RGB")
        fig, ax = plt.subplots(figsize=(12, 8))
        ax.imshow(image)
        for det in detections:
            x1, y1, x2, y2 = det["coords"]
            rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            ax.text(x1, y1 - 5, f"{det['label']} ({det['confidence']:.2f})", color='white', backgroundcolor='red')
        plt.axis("off")
        plt.show()

    show_image_with_boxes(result["image_path"], result["diagram_detections"])



image 1/1 /Users/emilstausbol/Documents/GitHub/AIML25-Exam/datasets/test/images/4.png: 384x640 7 sub-flows, 3893.4ms
Speed: 10.1ms preprocess, 3893.4ms inference, 18.2ms postprocess per image at shape (1, 3, 384, 640)
[{'xywh': [1380.986328125, 604.0886840820312, 989.7669677734375, 1017.4681396484375], 'confidence': 0.9350194334983826, 'class_id': 0, 'xyxy': [886.1029052734375, 95.35460662841797, 1875.869873046875, 1112.82275390625]}, {'xywh': [802.1484375, 599.476318359375, 809.3967895507812, 987.23681640625], 'confidence': 0.8156471848487854, 'class_id': 0, 'xyxy': [397.45001220703125, 105.8579330444336, 1206.8468017578125, 1093.0947265625]}, {'xywh': [634.4658813476562, 597.1973266601562, 1238.7115478515625, 993.0460815429688], 'confidence': 0.46774500608444214, 'class_id': 0, 'xyxy': [15.1101655960083, 100.67426300048828, 1253.8216552734375, 1093.7203369140625]}, {'xywh': [444.9051818847656, 602.0018310546875, 868.2406616210938, 1004.81494140625], 'confidence': 0.3852144181728363,