In [1]:
pip install open-clip-torch

Collecting open-clip-torch
  Downloading open_clip_torch-2.32.0-py3-none-any.whl.metadata (31 kB)
Collecting ftfy (from open-clip-torch)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.9.0->open-clip-torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.9.0->open-clip-torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.9.0->open-clip-torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.9.0->open-clip-torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.9.0->open-clip-torch)
  Downloading nv

In [2]:
# ╔══════════════════════════════════════════════════════════════════════╗
# ║  Magma-8B Multi-Technique Demo with PDF Report Generation            ║
# ║  (works on Colab Pro, tested with A100)                              ║
# ╚══════════════════════════════════════════════════════════════════════╝

# 1️⃣  Install deps (first run only) ------------------------------------------
# (comment these out after the runtime already has the wheels cached)
!pip install -q --upgrade git+https://github.com/jwyang/transformers.git@dev/jwyang-v4.48.2 \
                      torchvision Pillow open_clip_torch reportlab

# 2️⃣  Imports & model ---------------------------------------------------------
import io, requests, torch, typing as T, os
from collections import Counter
from datetime import datetime
from PIL import Image
from google.colab import files
from transformers import AutoModelForCausalLM, AutoProcessor
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as RLImage, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib.enums import TA_CENTER, TA_LEFT
import tempfile

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE  = torch.bfloat16 if DEVICE == "cuda" else torch.float32

print("⏳ Loading Magma-8B …")
MODEL = AutoModelForCausalLM.from_pretrained(
            "microsoft/Magma-8B",
            trust_remote_code=True,
            torch_dtype=DTYPE).to(DEVICE).eval()

PROCESSOR = AutoProcessor.from_pretrained("microsoft/Magma-8B",
                                          trust_remote_code=True)
print("✅  Magma-8B ready")

# 3️⃣  Prompt builder with clear section tags ----------------------------------
def prompt_builder(
    task: str,
    flavour: str = "standard",
    *,
    reasoning_tag: str = "### Reasoning:",
    draft_tag:    str = "### Draft:",
    refined_tag:  str = "### Refined Answer:",
    answer_tag:   str = "### Answer:"
) -> list[dict[str, str]]:
    """Return a list of chat messages for .apply_chat_template()."""
    flavour = flavour.lower()
    if flavour not in {"standard", "cot", "cod", "cot_consistency"}:
        raise ValueError("flavour must be standard / cot / cod / cot_consistency")

    # ---- build the user message body ---------------------------------------
    if flavour == "standard":
        user_body = (
            "<image_start><image><image_end>\n"
            f"{task}\nPlease answer briefly and accurately."
        )
    elif flavour == "cot":
        user_body = (
            "<image_start><image><image_end>\n"
            f"{task}\n{reasoning_tag} Think step-by-step, then end with:\n{answer_tag}"
        )
    elif flavour == "cod":
        user_body = (
            "<image_start><image><image_end>\n"
            f"{task}\n{draft_tag} Bullet thoughts (≤ 6 words each). "
            f"Rewrite a polished reply under:\n{refined_tag}"
        )
    else:  # cot_consistency
        user_body = (
            "<image_start><image><image_end>\n"
            f"{task}\n{reasoning_tag} Think independently. "
            f"Finish with:\n{answer_tag}"
        )

    return [
        {
            "role": "system",
            "content": (
                "You are a vision-language agent that analyses indoor scenes "
                "and plans safe, precise actions for a 170 cm humanoid robot."
            ),
        },
        {"role": "user", "content": user_body},
    ]

# 4️⃣  Inference helper ---------------------------------------------------------
def magma_analyse(
    image: Image.Image,
    task: str,
    flavour: str = "standard",
    *,
    max_new: int = 256,
    num_samples: int = 5,               # only for cot_consistency
    temperature: float = 0.7,
    top_p: float = 0.9,
) -> T.Union[str, tuple[str, list[str]]]:
    """Run Magma-8B with one of four prompting techniques."""
    # ---- build chat prompt --------------------------------------------------
    chat_prompt = PROCESSOR.tokenizer.apply_chat_template(
        prompt_builder(task, flavour),
        tokenize=False,
        add_generation_prompt=True,
    )

    # ---- encode inputs ------------------------------------------------------
    inputs = PROCESSOR(images=[image], texts=chat_prompt, return_tensors="pt")
    inputs["pixel_values"] = inputs["pixel_values"].unsqueeze(0).to(DEVICE).to(DTYPE)
    inputs["image_sizes"]  = inputs["image_sizes"].unsqueeze(0).to(DEVICE)
    inputs["input_ids"]    = inputs["input_ids"].to(DEVICE)
    inputs["attention_mask"] = inputs["attention_mask"].to(DEVICE)

    gen_cfg = dict(
        max_new_tokens=max_new,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        use_cache=True,
        repetition_penalty=1.1,
    )

    # ---- single-shot modes --------------------------------------------------
    if flavour in {"standard", "cot", "cod"}:
        with torch.inference_mode():
            ids = MODEL.generate(**inputs, **gen_cfg)
        answer = PROCESSOR.decode(
            ids[0, inputs["input_ids"].shape[-1]:],
            skip_special_tokens=True,
        ).strip()
        return answer

    # ---- self-consistency mode ---------------------------------------------
    answers = []
    with torch.inference_mode():
        for _ in range(num_samples):
            ids = MODEL.generate(**inputs, **gen_cfg)
            ans = PROCESSOR.decode(
                ids[0, inputs["input_ids"].shape[-1]:],
                skip_special_tokens=True,
            ).strip()
            answers.append(ans)

    majority = Counter(answers).most_common(1)[0][0]
    return majority, answers  # return both the vote and raw samples

# 5️⃣  Multi-technique analysis function ------------------------------------
def run_all_techniques(image: Image.Image, task: str) -> dict:
    """Run all four techniques on the same image and task."""
    results = {}
    techniques = ["standard", "cot", "cod", "cot_consistency"]

    print("🚀 Running all prompting techniques...")

    for technique in techniques:
        print(f"⏳ Running {technique.upper()}...")

        if technique == "cot_consistency":
            majority, samples = magma_analyse(image, task, flavour=technique)
            results[technique] = {
                "majority": majority,
                "samples": samples,
                "type": "consistency"
            }
        else:
            answer = magma_analyse(image, task, flavour=technique)
            results[technique] = {
                "answer": answer,
                "type": "single"
            }

        print(f"✅ {technique.upper()} completed")

    return results

# 6️⃣  PDF Report Generator -------------------------------------------------
def create_pdf_report(image: Image.Image, task: str, results: dict, filename: str = None):
    """Generate a comprehensive PDF report with all results."""
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"magma_8b_analysis_{timestamp}.pdf"

    # Create document
    doc = SimpleDocTemplate(filename, pagesize=A4)
    story = []
    styles = getSampleStyleSheet()

    # Custom styles
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Heading1'],
        fontSize=18,
        spaceAfter=30,
        alignment=TA_CENTER
    )

    heading_style = ParagraphStyle(
        'CustomHeading',
        parent=styles['Heading2'],
        fontSize=14,
        spaceAfter=12,
        spaceBefore=20
    )

    subheading_style = ParagraphStyle(
        'CustomSubHeading',
        parent=styles['Heading3'],
        fontSize=12,
        spaceAfter=8,
        spaceBefore=10
    )

    # Title
    story.append(Paragraph("Magma-8B Multi-Technique Analysis Report", title_style))
    story.append(Spacer(1, 20))

    # Metadata
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    story.append(Paragraph(f"<b>Generated:</b> {timestamp}", styles['Normal']))
    story.append(Paragraph(f"<b>Model:</b> Microsoft Magma-8B", styles['Normal']))
    story.append(Paragraph(f"<b>Task/Prompt:</b> {task}", styles['Normal']))
    story.append(Spacer(1, 20))

    # Save image temporarily for PDF
    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
        image.save(tmp_file.name, 'PNG')
        tmp_image_path = tmp_file.name

    # Add image
    story.append(Paragraph("Input Image", heading_style))
    img = RLImage(tmp_image_path, width=4*inch, height=3*inch)
    story.append(img)
    story.append(Spacer(1, 20))

    # Results section
    story.append(Paragraph("Analysis Results", heading_style))
    story.append(Spacer(1, 10))

    # Technique descriptions
    technique_descriptions = {
        "standard": "Direct prompting without additional reasoning steps",
        "cot": "Chain-of-Thought: Step-by-step reasoning before final answer",
        "cod": "Chain-of-Draft: Draft thoughts followed by refined response",
        "cot_consistency": "Self-Consistency: Multiple reasoning paths with majority voting"
    }

    for technique in ["standard", "cot", "cod", "cot_consistency"]:
        story.append(Paragraph(f"{technique.upper()} Technique", subheading_style))
        story.append(Paragraph(f"<i>{technique_descriptions[technique]}</i>", styles['Normal']))
        story.append(Spacer(1, 8))

        if results[technique]["type"] == "single":
            answer = results[technique]["answer"]
            story.append(Paragraph("<b>Response:</b>", styles['Normal']))
            story.append(Paragraph(answer, styles['Normal']))
        else:  # consistency
            majority = results[technique]["majority"]
            samples = results[technique]["samples"]

            story.append(Paragraph("<b>Majority Vote Result:</b>", styles['Normal']))
            story.append(Paragraph(majority, styles['Normal']))
            story.append(Spacer(1, 8))

            story.append(Paragraph("<b>All Samples:</b>", styles['Normal']))
            for i, sample in enumerate(samples, 1):
                story.append(Paragraph(f"Sample {i}: {sample}", styles['Normal']))

        story.append(Spacer(1, 20))

    # Summary section
    story.append(PageBreak())
    story.append(Paragraph("Analysis Summary", heading_style))
    story.append(Paragraph(
        "This report compares four different prompting techniques applied to the Magma-8B vision-language model. "
        "Each technique has different strengths:", styles['Normal']
    ))
    story.append(Spacer(1, 10))

    story.append(Paragraph("• <b>Standard:</b> Fast and direct, good for simple tasks", styles['Normal']))
    story.append(Paragraph("• <b>CoT:</b> Better reasoning for complex tasks requiring step-by-step thinking", styles['Normal']))
    story.append(Paragraph("• <b>CoD:</b> Iterative refinement for improved response quality", styles['Normal']))
    story.append(Paragraph("• <b>CoT-Consistency:</b> Most robust but computationally expensive, reduces hallucinations", styles['Normal']))

    # Build PDF
    doc.build(story)

    # Clean up temporary image
    os.unlink(tmp_image_path)

    print(f"📄 PDF report saved as: {filename}")
    return filename

# 7️⃣  Main interactive function --------------------------------------------
def upload_and_analyze_all():
    """Upload image, get prompt, run all techniques, generate PDF report."""
    print("📁 Please upload an image...")
    uploaded = files.upload()
    img_path = next(iter(uploaded))
    img = Image.open(img_path).convert("RGB")

    print("🖼️  Image loaded successfully!")

    # Get task/prompt from user
    task = input("Enter your prompt/task (e.g., 'go towards a woman'): ").strip()
    if not task:
        task = "go towards a woman"  # default
        print(f"Using default prompt: '{task}'")

    print(f"\n🎯 Task: {task}")
    print("=" * 60)

    # Run all techniques
    results = run_all_techniques(img, task)

    # Display results
    print("\n" + "=" * 60)
    print("📊 RESULTS SUMMARY")
    print("=" * 60)

    for technique in ["standard", "cot", "cod", "cot_consistency"]:
        print(f"\n🔸 {technique.upper()}:")
        print("-" * 40)

        if results[technique]["type"] == "single":
            print(results[technique]["answer"])
        else:  # consistency
            print("Majority Answer:", results[technique]["majority"])
            print(f"\nAll {len(results[technique]['samples'])} samples:")
            for i, sample in enumerate(results[technique]["samples"], 1):
                print(f"  [{i}] {sample}")

    # Generate PDF report
    print("\n" + "=" * 60)
    print("📄 Generating PDF Report...")
    pdf_filename = create_pdf_report(img, task, results)

    # Download the PDF
    print("⬇️  Downloading PDF report...")
    files.download(pdf_filename)

    print("\n✅ Analysis complete! Check your downloads for the PDF report.")

# 8️⃣  Alternative function for batch processing ------------------------
def analyze_with_custom_params(
    image_path: str,
    task: str,
    output_filename: str = None,
    max_new_tokens: int = 256,
    temperature: float = 0.7,
    num_consistency_samples: int = 5
):
    """Analyze with custom parameters - useful for scripted runs."""
    img = Image.open(image_path).convert("RGB")

    # Custom analysis with different parameters
    results = {}
    techniques = ["standard", "cot", "cod", "cot_consistency"]

    for technique in techniques:
        if technique == "cot_consistency":
            majority, samples = magma_analyse(
                img, task, flavour=technique,
                max_new=max_new_tokens, temperature=temperature,
                num_samples=num_consistency_samples
            )
            results[technique] = {"majority": majority, "samples": samples, "type": "consistency"}
        else:
            answer = magma_analyse(
                img, task, flavour=technique,
                max_new=max_new_tokens, temperature=temperature
            )
            results[technique] = {"answer": answer, "type": "single"}

    # Generate PDF
    pdf_filename = create_pdf_report(img, task, results, output_filename)
    return results, pdf_filename

# 👉  Run the enhanced demo
print("\n🎉 Enhanced Magma-8B Multi-Technique Demo Ready!")
print("This will run all 4 techniques automatically and generate a PDF report.")
print("\nStarting analysis...")
upload_and_analyze_all()

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m132.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m821.2/821.2 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m393.1/393.1 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m124.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m99.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.7/897.7 kB[0m [31m62.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m571.0/571.0 MB[0m [31m1.9 MB/s[0m eta [36m0:00:

config.json:   0%|          | 0.00/3.25k [00:00<?, ?B/s]

configuration_magma.py:   0%|          | 0.00/7.60k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Magma-8B:
- configuration_magma.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_magma.py:   0%|          | 0.00/75.6k [00:00<?, ?B/s]

image_tower_magma.py:   0%|          | 0.00/15.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Magma-8B:
- image_tower_magma.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/microsoft/Magma-8B:
- modeling_magma.py
- image_tower_magma.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/70.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.91G [00:00<?, ?B/s]

open_clip_pytorch_model.bin:   0%|          | 0.00/4.80G [00:00<?, ?B/s]

open_clip_config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of MagmaForCausalLM were not initialized from the model checkpoint at microsoft/Magma-8B and are newly initialized: ['vision_tower.clip_vision_model.head.proj.weight', 'vision_tower.clip_vision_model.trunk.head.norm.bias', 'vision_tower.clip_vision_model.trunk.head.norm.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/152 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/478 [00:00<?, ?B/s]

processing_magma.py:   0%|          | 0.00/7.77k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Magma-8B:
- processing_magma.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


image_processing_magma.py:   0%|          | 0.00/11.2k [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/52.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/434 [00:00<?, ?B/s]

✅  Magma-8B ready

🎉 Enhanced Magma-8B Multi-Technique Demo Ready!
This will run all 4 techniques automatically and generate a PDF report.

Starting analysis...
📁 Please upload an image...


Saving courage.jpg to courage.jpg
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): How can you go to the first floor?

🎯 Task: How can you go to the first floor?
🚀 Running all prompting techniques...
⏳ Running STANDARD...


  return fn(*args, **kwargs)
The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.


✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
You can take the green staircase on the left side of the room.

🔸 COT:
----------------------------------------
To get to the first floor, you would need to walk down the green staircase on the left side of the image. The staircase leads from the second floor to the first floor, providing access to the lower level of the house.

🔸 COD:
----------------------------------------
To reach the first floor, take the green staircase located in the living room.

🔸 COT_CONSISTENCY:
----------------------------------------
Majority Answer: The first floor is accessible via a set of green stairs located in the living room.

All 5 samples:
  [1] The first floor is accessible via a set of green stairs located in the living room.
  [2] To get to the first floor, you can use the green st

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.


In [3]:
upload_and_analyze_all()

📁 Please upload an image...


Saving Kitchen.jpg to Kitchen.jpg
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): Can you go towards the woman?

🎯 Task: Can you go towards the woman?
🚀 Running all prompting techniques...
⏳ Running STANDARD...
✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
Yes

🔸 COT:
----------------------------------------
Sure! Let's start by observing the scene. The kitchen is spacious and well-lit, with white walls and gray cabinets. A large stainless steel refrigerator stands on the left side of the room, while a black island takes center stage in the middle. Three wicker barstools are tucked under the island, providing seating.

A woman is seated at the island, engrossed in her phone. She’s dressed casually in a white shirt and blue jeans. The island houses a sink and a stove, ready for culin

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.


In [4]:
upload_and_analyze_all()

📁 Please upload an image...


Saving home.jpg to home.jpg
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): Can you go towards the lamp?

🎯 Task: Can you go towards the lamp?
🚀 Running all prompting techniques...
⏳ Running STANDARD...
✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
Yes

🔸 COT:
----------------------------------------
Yes, I can walk towards the floor lamp.

🔸 COD:
----------------------------------------
Yes, I can walk towards the lamp.

🔸 COT_CONSISTENCY:
----------------------------------------
Majority Answer: Yes, I can walk towards the lamp.

All 5 samples:
  [1] Yes, I can walk towards the lamp.
  [2] Yes, I can walk towards the floor lamp in the living room.
  [3] Yes, I can walk towards the lamp.
  [4] No, I cannot reach the lamp as it is too far away from me. I am only a computer program a

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.


In [5]:
upload_and_analyze_all()



📁 Please upload an image...


Saving kitchen_map.JPG to kitchen_map.JPG
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): How can you go towards the commercial dishwasher?

🎯 Task: How can you go towards the commercial dishwasher?
🚀 Running all prompting techniques...
⏳ Running STANDARD...
✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
兄弟 〜ｍ衝ẳn بلغุษย

🔸 COT:
----------------------------------------
To reach the commercial dishwasher from the main kitchen, follow these steps:

1. Move towards the right side of the room.
2. Continue moving right until you reach the commercial dishwasher area.

The commercial dishwasher is located on the right side of the room can be accessed by following these steps.

🔸 COD:
----------------------------------------
To reach the commercial dishwasher, follow the path from the kitchen

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.


In [6]:
upload_and_analyze_all()


📁 Please upload an image...


Saving office-retail-category-desks-tables.jpg to office-retail-category-desks-tables.jpg
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): How can you locate and move towards any chair in the office?

🎯 Task: How can you locate and move towards any chair in the office?
🚀 Running all prompting techniques...
⏳ Running STANDARD...
✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
огля एपکات ослож 手机 домовุษย

🔸 COT:
----------------------------------------
olet Григореком斷 kaliteli 있어서ุษย

🔸 COD:
----------------------------------------
огля एपکات ослож 手机 домовุษย

🔸 COT_CONSISTENCY:
----------------------------------------
Majority Answer: oletанню……。 手机 Peygamberційнаiв

All 5 samples:
  [1] oletанню……。 手机 Peygamberційнаiв
  [2] ือขौडアニメаннюつぶ efektุษย
  [3] olet……。Nghือข منزلفوุษย
  [4]

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.


In [7]:
upload_and_analyze_all()

📁 Please upload an image...


Saving room_map.jpg to room_map.jpg
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): Bring me a piece of a paper

🎯 Task: Bring me a piece of a paper
🚀 Running all prompting techniques...
⏳ Running STANDARD...
✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
The paper is located on the desk in the room with the bed and round rug.

🔸 COT:
----------------------------------------
the camera wearer takes the paper on the table

🔸 COD:
----------------------------------------
1. Plan actions for robot's tasks.
2. Ensure precision and safety.
3. Indoor scene analysis required.
4. 170cm humanoid robot target.
5. Safe actions for indoors.
6. Precise movements in rooms.
7. Avoid obstacles and hazards.
8. Efficient navigation within space.
9. Adapt to changing environments.
10. Considerate priva

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.


In [8]:
upload_and_analyze_all()

📁 Please upload an image...


Saving office_layout.jpg to office_layout.jpg
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): How can you go to the Eddy table?

🎯 Task: How can you go to the Eddy table?
🚀 Running all prompting techniques...
⏳ Running STANDARD...
✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
oletiв炉つぶยนแปลง私のุษย

🔸 COT:
----------------------------------------
兄弟族自治کاتิวเตอร 드라마ційнаุษย

🔸 COD:
----------------------------------------
To reach the Eddy table, follow these steps:

1. Move towards the kitchen area.
2. Turn right from the hallway.
3. Follow the signs for the Eddy room.

### Original thoughts:
- Go to the kitchen.
- Find the Eddy table.

### Refinement:
- To quickly locate the Eddy table, head towards the kitchen area and turn right at the signs indicating its direction.

🔸 COT_CONSIST

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.


In [9]:
upload_and_analyze_all()

📁 Please upload an image...


Saving office_marked.JPG to office_marked.JPG
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): From the entrance, go towards the green sofa

🎯 Task: From the entrance, go towards the green sofa
🚀 Running all prompting techniques...
⏳ Running STANDARD...
✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
the camera wearer goes up the stairs

🔸 COT:
----------------------------------------
The green sofa is located in the bottom right corner of the image, near the center of the room. To get there, walk straight ahead from the entrance, passing by the bookshelves on your left and the TV on your right. Continue moving forward until you reach the green sofa.

🔸 COD:
----------------------------------------
1. Please proceed to the green sofa.
2. You will find a comfortable seating area there.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.


In [10]:
upload_and_analyze_all()

📁 Please upload an image...


Saving office_plan.PNG to office_plan.PNG
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): Go towards the big oval table 

🎯 Task: Go towards the big oval table
🚀 Running all prompting techniques...
⏳ Running STANDARD...
✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
Coordinate: (0.87, 0.21)

🔸 COT:
----------------------------------------
the camera wearer moves around the room

🔸 COD:
----------------------------------------
1. The dining area is located in the bottom right corner of the floor plan.
2. There are two types of tables available: round and rectangular.
3. The large oval table is situated in the top right corner of the floor plan.
4. The conference room is located in the top left corner of the floor plan.
5. There is a microwave located in the bottom left corner of the f

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.


In [11]:
upload_and_analyze_all()

📁 Please upload an image...


Saving the-ultimate-home-office-design.jpg to the-ultimate-home-office-design.jpg
🖼️  Image loaded successfully!
Enter your prompt/task (e.g., 'go towards a woman'): Where is the laptop located?

🎯 Task: Where is the laptop located?
🚀 Running all prompting techniques...
⏳ Running STANDARD...
✅ STANDARD completed
⏳ Running COT...
✅ COT completed
⏳ Running COD...
✅ COD completed
⏳ Running COT_CONSISTENCY...
✅ COT_CONSISTENCY completed

📊 RESULTS SUMMARY

🔸 STANDARD:
----------------------------------------
Desk

🔸 COT:
----------------------------------------
The laptop is located on top of a wooden bookshelf in a library-like room. This suggests that the room might be used for studying or working, as it has a desk and chair setup nearby. It could also imply that the person who uses this space prefers to have their work area close to their book collection, possibly indicating an interest in both traditional and digital resources. The presence of a calendar on the wall further supports th

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Analysis complete! Check your downloads for the PDF report.
