### Detect obj

In [None]:
from typing import List
from PIL import Image
import torch
from transformers import Owlv2Processor, Owlv2ForObjectDetection

# Load the OWLv2 processor and model once at module load


def detect_object(
    image: Image.Image,
    class_name: str
) -> List[List[float]]:
    """
    Tool: DetectObject – Returns bounding boxes for all instances of the given class name,
    in [x, y, width, height] (Pascal VOC → xywh) format.

    Parameters:
        image: A PIL Image to run detection on.
        class_name: The target class to detect (e.g. "cat", "dog").

    Returns:
        A list of bounding boxes [x, y, width, height], one list per detected instance.
    """
    _processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
    _model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")

    # Prepare the grounding query
    text_labels = [[f"a photo of a {class_name}"]]
    inputs = _processor(text=text_labels, images=image, return_tensors="pt")
    outputs = _model(**inputs)

    # Map normalized predictions back to pixel coordinates
    target_sizes = torch.tensor([(image.height, image.width)])
    results = _processor.post_process_grounded_object_detection(
        outputs=outputs,
        target_sizes=target_sizes,
        threshold=0.1,
        text_labels=text_labels
    )

    # Extract boxes for the first query
    result = results[0]
    pascal_boxes = result["boxes"]  # Tensor of shape (N, 4): xmin,ymin,xmax,ymax

    # Convert each Pascal VOC box to [x, y, width, height]
    xywh_boxes: List[List[float]] = []
    for box in pascal_boxes:
        xmin, ymin, xmax, ymax = box.tolist()
        width  = xmax - xmin
        height = ymax - ymin
        xywh_boxes.append([xmin, ymin, width, height])

    return xywh_boxes


In [3]:
from PIL import Image

img = Image.open("../cat.jpg").convert("RGB")
dog_boxes = detect_object(img, "cat")
for bbox in dog_boxes:
    x, y, w, h = bbox
    print(f"Detected cat at x={x}, y={y}, width={w}, height={h}")

### Decompose question

In [2]:
from typing import List, Annotated
from langchain_google_vertexai import VertexAI
import os, json
from google.cloud import aiplatform
from autogen_core.tools import FunctionTool
from pathlib import Path

# Initialize Vertex AI
CREDENTIALS_PATH = "../credentials/bwa-agents-ad0e1f5ab4b7.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(CREDENTIALS_PATH)
aiplatform.init(project='bwa-agents', location='us-central1')


def decompose_question(
    question: Annotated[str, "Complex question to decompose"]
) -> List[Annotated[str, "Simpler sub-question"]]:
    """
    Tool: DecomposeQuestion – Splits a complex question into two simpler sub-questions.
    """
    llm = VertexAI(model_name="gemini-2.0-flash-lite-001")
    prompt = (
            f"Decompose the following complex question into exactly two simpler, self-contained sub-questions. "
            f"The sub-questions, when answered sequentially, should help answer the original complex question. "
            f"Return the two sub-questions as a JSON array of strings.\n\n"
            f"Complex Question: \"What is the Köppen climate classification for the city where this mosque is located?\"\n"
            f"Output: [\"In which city is this mosque located?\", \"What is the Köppen climate classification for this city?\"]\n\n"
            f"Complex Question: \"Who is the CEO of the company that developed the game featuring a plumber who jumps on turtles?\"\n"
            f"Output: [\"Which company developed the game featuring a plumber who jumps on turtles?\", \"Who is the CEO of that company?\"]\n\n"
            f"Complex Question: \"{question}\"\n"
            f"Output:"
        )
    resp = llm.invoke(prompt)
    try:
        # Expecting JSON array
        result = json.loads(resp)
        if isinstance(result, list) and len(result) == 2:
            return result  # type: ignore
    except json.JSONDecodeError:
        pass
    # Fallback: take first two non-empty lines
    lines = [ln.strip(' -"') for ln in resp.splitlines() if ln.strip()]
    return lines[:2]

examples = [
        "What is the tallest mountain in the country where the world’s largest waterfall is located?",
        "Who is the author of the novel that inspired the movie about a girl who shrinks to the size of an ant?",
        "How many goals did the top scorer of the 2022 World Cup score, and which country did he play for?"
    ]

for idx, q in enumerate(examples, 1):
    print(f"\nExample #{idx}:")
    print("Original question:", q)
    try:
        subs = decompose_question(q)
        print("Sub-questions:")
        for i, sub in enumerate(subs, 1):
            print(f"  {i}. {sub}")
    except Exception as e:
            print("Error calling decompose_question:", e)