# Run Naivagation

In [None]:
import sys
sys.path.insert(0, "../src")

from PIL import Image
from mai_naivigation_agent import MAIUINaivigationAgent
from utils import draw_clicks_on_image, extract_click_coordinates

In [None]:
# Test configuration
SCREENSHOT_PATH_1 = "../resources/example_img/figure1.png"
SCREENSHOT_PATH_2 = "../resources/example_img/figure2.png"

# Load test image
test_image_1 = Image.open(SCREENSHOT_PATH_1)
test_image_2 = Image.open(SCREENSHOT_PATH_2)
print(f"Image size: {test_image_1.size}")
display(test_image_1)
display(test_image_2)

In [None]:
# Test instruction
instruction = "open the settings and turn on the wifi"

# Create agent instance
agent = MAIUINaivigationAgent(
    llm_base_url="http://localhost:8000/v1",
    model_name="MAI-UI-8B",
    runtime_conf={
        "history_n": 3,
        "temperature": 0.0,
        "top_k": -1,
        "top_p": 1.0,
        "max_tokens": 2048,
    },
)

In [None]:
# Run prediction for both images
test_images = [test_image_1, test_image_2]
results = []

for i, test_image in enumerate(test_images, 1):
    print(f"Processing Image {i}")
    
    obs = {"screenshot": test_image}
    print(f"Processing instruction: '{instruction}'")
    prediction, action = agent.predict(instruction, obs)
    
    # Print results
    print("\n=== Raw Model Response ===")
    print(prediction)
    print("\n=== Processed Action ===")
    print(action)
    
    results.append({
        "image_idx": i,
        "prediction": prediction,
        "action": action,
        "test_image": test_image
    })


In [None]:
screenshot_paths = [SCREENSHOT_PATH_1, SCREENSHOT_PATH_2]

for result in results:
    i = result["image_idx"]
    action = result["action"]
    test_image = result["test_image"]
    screenshot_path = screenshot_paths[i - 1]
    
    print(f"\n{'='*50}")
    print(f"Visualizing Image {i}")
    print(f"{'='*50}")
    
    click_coordinates = extract_click_coordinates(action)
    if click_coordinates:
        abs_click_coordinates = (
            click_coordinates[0] * test_image.width, 
            click_coordinates[1] * test_image.height
        )
        print(f"Click coordinates (normalized): {click_coordinates}")
        print(f"Click coordinates (absolute): {abs_click_coordinates}")
        
        image = draw_clicks_on_image(screenshot_path, abs_click_coordinates)
        
        display(image)
    else:
        print("No click actions found in the provided actions.")
