# OpenAI API Labeling

## Count and sample from labeled dataset

In [None]:
import json
import random
from collections import Counter, defaultdict
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import base64
from openai import OpenAI
from dotenv import load_dotenv
from tqdm import tqdm
import re

In [None]:
HAND_LABELED_PATH = "../json_txt/hand_labeled_annotations_redo.json"
SAMPLED_OUT_PATH = "../sample_100_for_openai_lights.json"
# SAMPLED_OUT_PATH = "../sample_100_for_openai_lights_test.json"


with open(HAND_LABELED_PATH) as f:
    data = json.load(f)

combo_counts = Counter((d["turn_signal"], d["tail_light"]) for d in data)
print("Class distribution:")
for combo, count in combo_counts.items():
    print(f"{combo}: {count}")

In [None]:
sampling_scheme = {
    ("none", "off"): 30,
    ("left", "on"): 15,
    ("left", "unclear"): 6,
    ("left", "off"): 20,
    ("right", "off"): 20,
    ("right", "on"): 15,
    ("right", "unclear"): 10,
    ("none", "on"): 7,
    ("none", "unclear"): 7
}

In [None]:
# sampled = []
# for combo, n in sampling_scheme.items():
#     subset = [d for d in data if (d["turn_signal"], d["tail_light"]) == combo]
#     if len(subset) < n:
#         print(f"Only found {len(subset)} samples for {combo}, taking all available.")
#     sampled.extend(random.sample(subset, min(len(subset), n)))

# print(f"\nTotal sampled: {len(sampled)}")
# with open(SAMPLED_OUT_PATH, "w") as f:
#     json.dump(sampled, f, indent=2)
# print(f"Saved sampled subset to {SAMPLED_OUT_PATH}")

### Accuracy of turn_signal predictions per turn signal type

In [None]:
def accuracy_per_turn_signal(results):
    counts = defaultdict(lambda: {"correct": 0, "total": 0})

    for r in results:
        hand = r["hand_label"]["turn_signal"]
        pred = r["openai_label"]["turn_signal"]
        counts[hand]["total"] += 1
        if hand == pred:
            counts[hand]["correct"] += 1

    accuracy_dict = {}
    for ts_type, vals in counts.items():
        accuracy = vals["correct"] / vals["total"] if vals["total"] > 0 else 0
        accuracy_dict[ts_type] = accuracy

    return accuracy_dict

## Run the OpenAI API on the sampled images

In [None]:
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

SAMPLED_PATH = "../json_txt/sample_100_for_openai_lights.json"
LOCAL_BASE = "../sampled_images" 

In [None]:
def encode_image(image_path):
    with open(image_path, "rb") as img:
        return base64.b64encode(img.read()).decode("utf-8")

## gpt-4o-mini & prompt 1

Prompt 1:

```
"You are an advanced image analysis model. Look at the car image and determine:\n"
"1. turn_signal — one of: left, right, unclear, none, both\n"
"2. tail_light — one of: on, off, unclear \n\n"
"Return only valid JSON in this format:\n"
"{\n"
"  \"turn_signal\": \"left\",\n"
"  \"tail_light\": \"on\"\n"
"}"
```

Approx time: 7:44pm October 27, 2025

In [None]:
OUTPUT_FILE = "../json_txt/gpt-4o-mini_prompt1.json"

In [None]:
def classify_image(local_path):
    img_b64 = encode_image(local_path)
    prompt = {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": (
                    "You are an advanced image analysis model. Look at the car image and determine:\n"
                    "1. turn_signal — one of: left, right, unclear, none, both\n"
                    "2. tail_light — one of: on, off, unclear \n\n"
                    "Return only valid JSON in this format:\n"
                    "{\n"
                    "  \"turn_signal\": \"left\",\n"
                    "  \"tail_light\": \"on\"\n"
                    "}"
                ),
            },
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}
        ],
    }

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[prompt],
            temperature=0,
            response_format={"type": "json_object"}, 
        )
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"Error processing {local_path}: {e}")
        return {"turn_signal": "none", "tail_light": "not_visible"}

In [None]:
with open(SAMPLED_PATH) as f:
    sampled = json.load(f)

In [None]:
results = []
for entry in tqdm(sampled, desc="Classifying images"): 
    local_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    print(local_path)
    result = classify_image(local_path)
    results.append({
        "image": entry["image"],
        "hand_label": {
            "turn_signal": entry["turn_signal"],
            "tail_light": entry["tail_light"]
        },
        "openai_label": result
    })

with open(OUTPUT_FILE, "w") as f:
    json.dump(results, f, indent=2)

print(f"\nDone! Saved {len(results)} results to {OUTPUT_FILE}")

### Display results side-by-side

In [None]:
RESULTS_PATH = "../json_txt/gpt-4o-mini_prompt1.json"
LOCAL_BASE = "../sampled_images"

with open(RESULTS_PATH) as f:
    results = json.load(f)

# Display 12 images per page
for i, entry in enumerate(results[:100]):
    if i % 12 == 0:
        plt.figure(figsize=(12, 10))
    plt.subplot(3, 4, (i % 12) + 1)
    img_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    try:
        img = mpimg.imread(img_path)
        plt.imshow(img)
        plt.axis("off")
        plt.title(
            f"Hand: Turn Signal={entry['hand_label']['turn_signal']} | Tail Light={entry['hand_label']['tail_light']}\n"
            f"OpenAI: Turn Signal={entry['openai_label']['turn_signal']} | Tail Light={entry['openai_label']['tail_light']}",
            fontsize=8
        )
    except Exception as e:
        plt.title(f"Missing image: {e}")
    if (i + 1) % 12 == 0 or i == len(results) - 1:
        plt.tight_layout()
        plt.show()

### Metrics comparison

In [None]:
with open("../json_txt/gpt-4o-mini_prompt1.json") as f:
    results = json.load(f)

total = len(results)
turn_correct = sum(r["hand_label"]["turn_signal"] == r["openai_label"]["turn_signal"] for r in results)
tail_correct = sum(r["hand_label"]["tail_light"] == r["openai_label"]["tail_light"] for r in results)

print(f"Turn signal accuracy: {turn_correct / total:.2%}")
print(f"Tail light accuracy: {tail_correct / total:.2%}")

In [None]:
per_turn_accuracy = accuracy_per_turn_signal(results)
print("\nTurn signal accuracy per type:")
for ts_type, acc in per_turn_accuracy.items():
    print(f"{ts_type}: {acc:.2%}")

## gpt-4o & prompt 1

Ran approx: 7:47pm October 27 2025

In [None]:
OUTPUT_FILE = "../json_txt/gpt-4o_prompt1.json"

In [None]:
def classify_image(local_path):
    img_b64 = encode_image(local_path)
    prompt = {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": (
                    "You are an advanced image analysis model. Look at the car image and determine:\n"
                    "1. turn_signal — one of: left, right, unclear, none, both\n"
                    "2. tail_light — one of: on, off, unclear \n\n"
                    "Return only valid JSON in this format:\n"
                    "{\n"
                    "  \"turn_signal\": \"left\",\n"
                    "  \"tail_light\": \"on\"\n"
                    "}"
                ),
            },
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}
        ],
    }

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[prompt],
            temperature=0,
            response_format={"type": "json_object"}, 
        )
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"Error processing {local_path}: {e}")
        return {"turn_signal": "none", "tail_light": "not_visible"}

In [None]:
with open(SAMPLED_PATH) as f:
    sampled = json.load(f)

In [None]:
results = []
for entry in tqdm(sampled, desc="Classifying images"): 
    local_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    print(local_path)
    result = classify_image(local_path)
    results.append({
        "image": entry["image"],
        "hand_label": {
            "turn_signal": entry["turn_signal"],
            "tail_light": entry["tail_light"]
        },
        "openai_label": result
    })

with open(OUTPUT_FILE, "w") as f:
    json.dump(results, f, indent=2)

print(f"\nDone! Saved {len(results)} results to {OUTPUT_FILE}")

### Display results side-by-side

In [None]:
RESULTS_PATH = "../json_txt/gpt-4o_prompt1.json"
LOCAL_BASE = "../sampled_images"

with open(RESULTS_PATH) as f:
    results = json.load(f)

# Display 12 images per page
for i, entry in enumerate(results[:100]):
    if i % 12 == 0:
        plt.figure(figsize=(12, 10))
    plt.subplot(3, 4, (i % 12) + 1)
    img_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    try:
        img = mpimg.imread(img_path)
        plt.imshow(img)
        plt.axis("off")
        plt.title(
            f"Hand: Turn Signal={entry['hand_label']['turn_signal']} | Tail Light={entry['hand_label']['tail_light']}\n"
            f"OpenAI: Turn Signal={entry['openai_label']['turn_signal']} | Tail Light={entry['openai_label']['tail_light']}",
            fontsize=8
        )
    except Exception as e:
        plt.title(f"Missing image: {e}")
    if (i + 1) % 12 == 0 or i == len(results) - 1:
        plt.tight_layout()
        plt.show()

### Metrics comparison

In [None]:
with open("../json_txt/gpt-4o_prompt1.json") as f:
    results = json.load(f)

total = len(results)
turn_correct = sum(r["hand_label"]["turn_signal"] == r["openai_label"]["turn_signal"] for r in results)
tail_correct = sum(r["hand_label"]["tail_light"] == r["openai_label"]["tail_light"] for r in results)

print(f"Turn signal accuracy: {turn_correct / total:.2%}")
print(f"Tail light accuracy: {tail_correct / total:.2%}")

In [None]:
per_turn_accuracy = accuracy_per_turn_signal(results)
print("\nTurn signal accuracy per type:")
for ts_type, acc in per_turn_accuracy.items():
    print(f"{ts_type}: {acc:.2%}")

## gpt-4o-mini & prompt 2

Prompt 1:

```
"You are an advanced image analysis model. Look at the car image and determine:\n"
"1. turn_signal — one of: left, right, unclear, none \n"
"2. tail_light — one of: on, off, unclear \n\n"
"Return only valid JSON in this format:\n"
"{\n"
"  \"turn_signal\": \"left\",\n"
"  \"tail_light\": \"on\"\n"
"}"
```

Removing the none and both options, as perhaps this "confuses" the model.

Running at approx 7:54pm October 27, 2025

In [None]:
OUTPUT_FILE = "../json_txt/gpt-4o-mini_prompt2.json"

In [None]:
def classify_image(local_path):
    img_b64 = encode_image(local_path)
    prompt = {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": (
                    "You are an advanced image analysis model. Look at the car image and determine:\n"
                    "1. turn_signal — one of: left, right, unclear, none \n"
                    "2. tail_light — one of: on, off, unclear \n\n"
                    "Return only valid JSON in this format:\n"
                    "{\n"
                    "  \"turn_signal\": \"left\",\n"
                    "  \"tail_light\": \"on\"\n"
                    "}"
                ),
            },
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}
        ],
    }

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[prompt],
            temperature=0,
            response_format={"type": "json_object"}, 
        )
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"Error processing {local_path}: {e}")
        return {"turn_signal": "none", "tail_light": "not_visible"}

In [None]:
with open(SAMPLED_PATH) as f:
    sampled = json.load(f)

In [None]:
results = []
for entry in tqdm(sampled, desc="Classifying images"): 
    local_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    print(local_path)
    result = classify_image(local_path)
    results.append({
        "image": entry["image"],
        "hand_label": {
            "turn_signal": entry["turn_signal"],
            "tail_light": entry["tail_light"]
        },
        "openai_label": result
    })

with open(OUTPUT_FILE, "w") as f:
    json.dump(results, f, indent=2)

print(f"\nDone! Saved {len(results)} results to {OUTPUT_FILE}")

### Display results side-by-side

In [None]:
RESULTS_PATH = "../json_txt/gpt-4o-mini_prompt2.json"
LOCAL_BASE = "../sampled_images"

with open(RESULTS_PATH) as f:
    results = json.load(f)

# Display 12 images per page
for i, entry in enumerate(results[:100]):
    if i % 12 == 0:
        plt.figure(figsize=(12, 10))
    plt.subplot(3, 4, (i % 12) + 1)
    img_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    try:
        img = mpimg.imread(img_path)
        plt.imshow(img)
        plt.axis("off")
        plt.title(
            f"Hand: Turn Signal={entry['hand_label']['turn_signal']} | Tail Light={entry['hand_label']['tail_light']}\n"
            f"OpenAI: Turn Signal={entry['openai_label']['turn_signal']} | Tail Light={entry['openai_label']['tail_light']}",
            fontsize=8
        )
    except Exception as e:
        plt.title(f"Missing image: {e}")
    if (i + 1) % 12 == 0 or i == len(results) - 1:
        plt.tight_layout()
        plt.show()

### Metrics comparison

In [None]:
with open("../json_txt/gpt-4o-mini_prompt2.json") as f:
    results = json.load(f)

total = len(results)
turn_correct = sum(r["hand_label"]["turn_signal"] == r["openai_label"]["turn_signal"] for r in results)
tail_correct = sum(r["hand_label"]["tail_light"] == r["openai_label"]["tail_light"] for r in results)

print(f"Turn signal accuracy: {turn_correct / total:.2%}")
print(f"Tail light accuracy: {tail_correct / total:.2%}")

In [None]:
per_turn_accuracy = accuracy_per_turn_signal(results)
print("\nTurn signal accuracy per type:")
for ts_type, acc in per_turn_accuracy.items():
    print(f"{ts_type}: {acc:.2%}")

## gpt-4o & prompt 2

Running at approx 7:56pm, October 27, 2025

In [None]:
OUTPUT_FILE = "../json_txt/gpt-4o_prompt2.json"

In [None]:
def classify_image(local_path):
    img_b64 = encode_image(local_path)
    prompt = {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": (
                    "You are an advanced image analysis model. Look at the car image and determine:\n"
                    "1. turn_signal — one of: left, right, unclear, none \n"
                    "2. tail_light — one of: on, off, unclear \n\n"
                    "Return only valid JSON in this format:\n"
                    "{\n"
                    "  \"turn_signal\": \"left\",\n"
                    "  \"tail_light\": \"on\"\n"
                    "}"
                ),
            },
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}
        ],
    }

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[prompt],
            temperature=0,
            response_format={"type": "json_object"}, 
        )
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"Error processing {local_path}: {e}")
        return {"turn_signal": "none", "tail_light": "not_visible"}

In [None]:
with open(SAMPLED_PATH) as f:
    sampled = json.load(f)

In [None]:
results = []
for entry in tqdm(sampled, desc="Classifying images"): 
    local_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    print(local_path)
    result = classify_image(local_path)
    results.append({
        "image": entry["image"],
        "hand_label": {
            "turn_signal": entry["turn_signal"],
            "tail_light": entry["tail_light"]
        },
        "openai_label": result
    })

with open(OUTPUT_FILE, "w") as f:
    json.dump(results, f, indent=2)

print(f"\nDone! Saved {len(results)} results to {OUTPUT_FILE}")

### Display results side-by-side

In [None]:
RESULTS_PATH = "../json_txt/gpt-4o_prompt2.json"
LOCAL_BASE = "../sampled_images"

with open(RESULTS_PATH) as f:
    results = json.load(f)

# Display 12 images per page
for i, entry in enumerate(results[:100]):
    if i % 12 == 0:
        plt.figure(figsize=(12, 10))
    plt.subplot(3, 4, (i % 12) + 1)
    img_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    try:
        img = mpimg.imread(img_path)
        plt.imshow(img)
        plt.axis("off")
        plt.title(
            f"Hand: Turn Signal={entry['hand_label']['turn_signal']} | Tail Light={entry['hand_label']['tail_light']}\n"
            f"OpenAI: Turn Signal={entry['openai_label']['turn_signal']} | Tail Light={entry['openai_label']['tail_light']}",
            fontsize=8
        )
    except Exception as e:
        plt.title(f"Missing image: {e}")
    if (i + 1) % 12 == 0 or i == len(results) - 1:
        plt.tight_layout()
        plt.show()

### Metrics comparison

In [None]:
with open("../json_txt/gpt-4o_prompt2.json") as f:
    results = json.load(f)

total = len(results)
turn_correct = sum(r["hand_label"]["turn_signal"] == r["openai_label"]["turn_signal"] for r in results)
tail_correct = sum(r["hand_label"]["tail_light"] == r["openai_label"]["tail_light"] for r in results)

print(f"Turn signal accuracy: {turn_correct / total:.2%}")
print(f"Tail light accuracy: {tail_correct / total:.2%}")

In [None]:
per_turn_accuracy = accuracy_per_turn_signal(results)
print("\nTurn signal accuracy per type:")
for ts_type, acc in per_turn_accuracy.items():
    print(f"{ts_type}: {acc:.2%}")

## gpt-4o-mini & prompt 3

Prompt 1:

```
"You are an advanced image analysis model. Look at the car image and determine:\n"
"1. turn_signal — one of: left, right, unclear, none \n"
"2. tail_light — one of: on, off, unclear \n\n"
"A turn signal should be lit up and indicating which direction the car is planning to turn \n"
"A tail light is on if lights other than the turn signal are on. \n"
"Return only valid JSON in this format:\n"
"{\n"
"  \"turn_signal\": \"left\",\n"
"  \"tail_light\": \"on\"\n"
"}"
```

Removing the none and both options, as perhaps this "confuses" the model.

Running at approx 8:03pm October 27, 2025

In [None]:
OUTPUT_FILE = "../json_txt/gpt-4o-mini_prompt3.json"

In [None]:
def classify_image(local_path):
    img_b64 = encode_image(local_path)
    prompt = {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": (
                    "You are an advanced image analysis model. Look at the car image and determine:\n"
                    "1. turn_signal — one of: left, right, unclear, none \n"
                    "2. tail_light — one of: on, off, unclear \n\n"
                    "A turn signal should be lit up and indicating which direction the car is planning to turn \n"
                    "A tail light is on if lights other than the turn signal are on. \n"
                    "Return only valid JSON in this format:\n"
                    "{\n"
                    "  \"turn_signal\": \"left\",\n"
                    "  \"tail_light\": \"on\"\n"
                    "}"
                ),
            },
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}
        ],
    }

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[prompt],
            temperature=0,
            response_format={"type": "json_object"}, 
        )
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"Error processing {local_path}: {e}")
        return {"turn_signal": "none", "tail_light": "not_visible"}

In [None]:
with open(SAMPLED_PATH) as f:
    sampled = json.load(f)

In [None]:
results = []
for entry in tqdm(sampled, desc="Classifying images"): 
    local_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    print(local_path)
    result = classify_image(local_path)
    results.append({
        "image": entry["image"],
        "hand_label": {
            "turn_signal": entry["turn_signal"],
            "tail_light": entry["tail_light"]
        },
        "openai_label": result
    })

with open(OUTPUT_FILE, "w") as f:
    json.dump(results, f, indent=2)

print(f"\nDone! Saved {len(results)} results to {OUTPUT_FILE}")

### Display results side-by-side

In [None]:
RESULTS_PATH = "../json_txt/gpt-4o-mini_prompt3.json"
LOCAL_BASE = "../sampled_images"

with open(RESULTS_PATH) as f:
    results = json.load(f)

# Display 12 images per page
for i, entry in enumerate(results[:100]):
    if i % 12 == 0:
        plt.figure(figsize=(12, 10))
    plt.subplot(3, 4, (i % 12) + 1)
    img_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    try:
        img = mpimg.imread(img_path)
        plt.imshow(img)
        plt.axis("off")
        plt.title(
            f"Hand: Turn Signal={entry['hand_label']['turn_signal']} | Tail Light={entry['hand_label']['tail_light']}\n"
            f"OpenAI: Turn Signal={entry['openai_label']['turn_signal']} | Tail Light={entry['openai_label']['tail_light']}",
            fontsize=8
        )
    except Exception as e:
        plt.title(f"Missing image: {e}")
    if (i + 1) % 12 == 0 or i == len(results) - 1:
        plt.tight_layout()
        plt.show()

### Metrics comparison

In [None]:
with open("../json_txt/gpt-4o-mini_prompt3.json") as f:
    results = json.load(f)

total = len(results)
turn_correct = sum(r["hand_label"]["turn_signal"] == r["openai_label"]["turn_signal"] for r in results)
tail_correct = sum(r["hand_label"]["tail_light"] == r["openai_label"]["tail_light"] for r in results)

print(f"Turn signal accuracy: {turn_correct / total:.2%}")
print(f"Tail light accuracy: {tail_correct / total:.2%}")

In [None]:
per_turn_accuracy = accuracy_per_turn_signal(results)
print("\nTurn signal accuracy per type:")
for ts_type, acc in per_turn_accuracy.items():
    print(f"{ts_type}: {acc:.2%}")

## gpt-4o & prompt 3

Removing the none and both options, as perhaps this "confuses" the model.

Running at approx 8:05, October 27, 2025

In [None]:
OUTPUT_FILE = "../json_txt/gpt-4o_prompt3.json"

In [None]:
def classify_image(local_path):
    img_b64 = encode_image(local_path)
    prompt = {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": (
                    "You are an advanced image analysis model. Look at the car image and determine:\n"
                    "1. turn_signal — one of: left, right, unclear, none \n"
                    "2. tail_light — one of: on, off, unclear \n\n"
                    "Return only valid JSON in this format:\n"
                    "{\n"
                    "  \"turn_signal\": \"left\",\n"
                    "  \"tail_light\": \"on\"\n"
                    "}"
                ),
            },
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}
        ],
    }

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[prompt],
            temperature=0,
            response_format={"type": "json_object"}, 
        )
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"Error processing {local_path}: {e}")
        return {"turn_signal": "none", "tail_light": "not_visible"}

In [None]:
with open(SAMPLED_PATH) as f:
    sampled = json.load(f)

In [None]:
results = []
for entry in tqdm(sampled, desc="Classifying images"): 
    local_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    print(local_path)
    result = classify_image(local_path)
    results.append({
        "image": entry["image"],
        "hand_label": {
            "turn_signal": entry["turn_signal"],
            "tail_light": entry["tail_light"]
        },
        "openai_label": result
    })

with open(OUTPUT_FILE, "w") as f:
    json.dump(results, f, indent=2)

print(f"\nDone! Saved {len(results)} results to {OUTPUT_FILE}")

### Display results side-by-side

In [None]:
RESULTS_PATH = "../json_txt/gpt-4o_prompt3.json"
LOCAL_BASE = "../sampled_images"

with open(RESULTS_PATH) as f:
    results = json.load(f)

# Display 12 images per page
for i, entry in enumerate(results[:100]):
    if i % 12 == 0:
        plt.figure(figsize=(12, 10))
    plt.subplot(3, 4, (i % 12) + 1)
    img_path = os.path.join(LOCAL_BASE, entry["image"].lstrip("/"))
    try:
        img = mpimg.imread(img_path)
        plt.imshow(img)
        plt.axis("off")
        plt.title(
            f"Hand: Turn Signal={entry['hand_label']['turn_signal']} | Tail Light={entry['hand_label']['tail_light']}\n"
            f"OpenAI: Turn Signal={entry['openai_label']['turn_signal']} | Tail Light={entry['openai_label']['tail_light']}",
            fontsize=8
        )
    except Exception as e:
        plt.title(f"Missing image: {e}")
    if (i + 1) % 12 == 0 or i == len(results) - 1:
        plt.tight_layout()
        plt.show()

### Metrics comparison

In [None]:
with open("../json_txt/gpt-4o_prompt3.json") as f:
    results = json.load(f)

total = len(results)
turn_correct = sum(r["hand_label"]["turn_signal"] == r["openai_label"]["turn_signal"] for r in results)
tail_correct = sum(r["hand_label"]["tail_light"] == r["openai_label"]["tail_light"] for r in results)

print(f"Turn signal accuracy: {turn_correct / total:.2%}")
print(f"Tail light accuracy: {tail_correct / total:.2%}")

In [None]:
per_turn_accuracy = accuracy_per_turn_signal(results)
print("\nTurn signal accuracy per type:")
for ts_type, acc in per_turn_accuracy.items():
    print(f"{ts_type}: {acc:.2%}")