# MSBA 503 Take-Home Assignment
## Computer Vision Object Detection – Part A

Author: Alex Snyder
Course: MSBA 503  

Block 0 - Code: Setup Directories & Imports

In [1]:
import os
import time
import glob
import numpy as np
import pandas as pd
from PIL import Image

DATA_DIR = "../data"
OUTPUT_DIR = "../outputs"

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Data directory:", DATA_DIR)
print("Output directory:", OUTPUT_DIR)

Data directory: ../data
Output directory: ../outputs


## 1. Image List Setup

In [2]:
#Block 1 - Image List Setup

#Collect all images in the data folder
image_paths = sorted(
    glob.glob(os.path.join(DATA_DIR, "*.jpg"))
    + glob.glob(os.path.join(DATA_DIR, "*.jpeg"))
    + glob.glob(os.path.join(DATA_DIR, "*.png"))
)

print(f"Found {len(image_paths)} images.")
image_paths[:10]

Found 10 images.


['../data/img01.jpeg',
 '../data/img02.jpeg',
 '../data/img03.jpeg',
 '../data/img04.jpeg',
 '../data/img05.jpeg',
 '../data/img06.jpeg',
 '../data/img07.jpeg',
 '../data/img08.jpeg',
 '../data/img09.jpeg',
 '../data/img10.jpeg']

## 2. Model 1 - Setup & Detection

Block 2 – YOLOv8: Model Setup & Test Detection

In [3]:
# Block 2a - YOLOv8 Model Setup

from ultralytics import YOLO

#Load the YOLOv8 Medium model
YOLO_MODEL_PATH = os.path.join("..", "..", "yolov8m.pt")

yolo_model = YOLO(YOLO_MODEL_PATH)

print("YOLO model loaded successfully from:", YOLO_MODEL_PATH)
yolo_model

YOLO model loaded successfully from: ../../yolov8m.pt


YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_

In [4]:
#Block 2b - YOLOv8 Single Image Detection

#Select the first image from your dataset
test_image_path = image_paths[0]
print("Running YOLO on:", test_image_path)

#Run YOLO model
results = yolo_model(test_image_path)

#Extract the first result
result = results[0]

#Loop through boxes and print detections
for box in result.boxes:
    cls_id = box.cls[0].item() #class ID
    class_name = result.names[cls_id] #class label
    
    coords = box.xyxy[0].tolist() #bounding box coords
    coords = [round(x) for x in coords] #round for display
    
    conf = round(box.conf[0].item(), 2) #confidence score
    
    print("Object type:", class_name)
    print("Coordinates:", coords)
    print("Probability:", conf)
    print("\nNext Object")

Running YOLO on: ../data/img01.jpeg

image 1/1 /Users/Home/MSBA 503/take_home_assignment/notebooks/../data/img01.jpeg: 448x640 5 persons, 4 bicycles, 5 cars, 2 motorcycles, 372.4ms
Speed: 9.1ms preprocess, 372.4ms inference, 24.2ms postprocess per image at shape (1, 3, 448, 640)
Object type: car
Coordinates: [3537, 2268, 4927, 2906]
Probability: 0.92

Next Object
Object type: person
Coordinates: [4049, 2059, 4527, 3234]
Probability: 0.9

Next Object
Object type: car
Coordinates: [810, 2177, 1441, 2538]
Probability: 0.9

Next Object
Object type: car
Coordinates: [2175, 2103, 3056, 2628]
Probability: 0.89

Next Object
Object type: car
Coordinates: [159, 2160, 497, 2366]
Probability: 0.85

Next Object
Object type: car
Coordinates: [702, 2107, 1052, 2441]
Probability: 0.83

Next Object
Object type: motorcycle
Coordinates: [3177, 2253, 3581, 2721]
Probability: 0.82

Next Object
Object type: motorcycle
Coordinates: [2997, 2311, 3251, 2692]
Probability: 0.67

Next Object
Object type: bicycle


In [5]:
#Block 2c - YOLO Metrics for a Single Image

def yolo_get_metrics(img_path, conf_threshold=0.25):
    """
    Returns timing, number of objects, and average confidence
    using professor-style YOLO detection.
    """
    start = time.time()
    results = yolo_model(img_path)
    elapsed = time.time() - start

    result = results[0]
    
    scores = []
    
    for box in result.boxes:
        conf = box.conf[0].item()
        if conf >= conf_threshold:
            scores.append(conf)

    num_objects = len(scores)
    avg_confidence = float(sum(scores) / num_objects) if num_objects > 0 else 0.0

    return elapsed, num_objects, avg_confidence

In [6]:
#Object Test - 1 Image
test_time, test_num, test_avg = yolo_get_metrics(image_paths[0], conf_threshold=0.25)

print("YOLO Metrics Test:")
print(f"Time taken: {test_time:.3f} seconds")
print(f"Objects detected: {test_num}")
print(f"Average confidence: {test_avg:.3f}")


image 1/1 /Users/Home/MSBA 503/take_home_assignment/notebooks/../data/img01.jpeg: 448x640 5 persons, 4 bicycles, 5 cars, 2 motorcycles, 306.7ms
Speed: 2.8ms preprocess, 306.7ms inference, 2.3ms postprocess per image at shape (1, 3, 448, 640)
YOLO Metrics Test:
Time taken: 0.670 seconds
Objects detected: 16
Average confidence: 0.614


## 3. Model 2 - Setup & Detection

In [7]:
#Block 3a - Faster R-CNN Model Setup

import torch
import torchvision.transforms.functional as F
import torchvision.models.detection as detection
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights

#Load COCO class names from the pre-trained weights metadata
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
COCO_CLASSES = weights.meta["categories"]

print("Number of COCO classes:", len(COCO_CLASSES))

#Select device (CPU Mac, unless GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

#Load the pre-trained Faster R-CNN model (same idea as professor's pretrained=True)
model = detection.fasterrcnn_resnet50_fpn(weights=weights)
model.eval()
model.to(device)

print("Faster R-CNN model loaded.")

Number of COCO classes: 91
Using device: cpu
Faster R-CNN model loaded.


In [8]:
#Block 3b - Faster R-CNN Single Image Detection

#Use the first image from the dataset
frcnn_test_image_path = image_paths[0]
print("Running Faster R-CNN on:", frcnn_test_image_path)

#Load image and convert to tensor
image = Image.open(frcnn_test_image_path).convert("RGB")
image_tensor = F.to_tensor(image).to(device)

#Run the model
with torch.no_grad():
    predictions = model([image_tensor])[0]

predicted_boxes = predictions["boxes"]
predicted_scores = predictions["scores"]
predicted_labels = predictions["labels"]

confidence_threshold = 0.5

for box, score, label in zip(predicted_boxes, predicted_scores, predicted_labels):
    score_val = score.item()
    if score_val < confidence_threshold:
        continue  #skip low-confidence detections
    
    x_min, y_min, x_max, y_max = box.tolist()
    coords_rounded = [round(x_min), round(y_min), round(x_max), round(y_max)]
    class_name = COCO_CLASSES[label.item()]

    print("Object type:", class_name)
    print("Coordinates:", coords_rounded)
    print("Probability:", round(score_val, 2))
    print("\nNext Object")

Running Faster R-CNN on: ../data/img01.jpeg
Object type: car
Coordinates: [161, 2161, 462, 2368]
Probability: 1.0

Next Object
Object type: person
Coordinates: [4049, 2070, 4507, 3263]
Probability: 1.0

Next Object
Object type: car
Coordinates: [3540, 2264, 4891, 2923]
Probability: 0.99

Next Object
Object type: car
Coordinates: [788, 2178, 1438, 2545]
Probability: 0.99

Next Object
Object type: motorcycle
Coordinates: [2973, 2331, 3230, 2714]
Probability: 0.99

Next Object
Object type: motorcycle
Coordinates: [3166, 2259, 3608, 2727]
Probability: 0.98

Next Object
Object type: car
Coordinates: [2182, 2117, 3034, 2647]
Probability: 0.98

Next Object
Object type: car
Coordinates: [498, 2218, 638, 2320]
Probability: 0.97

Next Object
Object type: person
Coordinates: [3478, 2196, 3574, 2378]
Probability: 0.95

Next Object
Object type: person
Coordinates: [2111, 2106, 2210, 2513]
Probability: 0.93

Next Object
Object type: car
Coordinates: [693, 2104, 1005, 2448]
Probability: 0.93

Next Ob

In [9]:
#Block 3c - Faster R-CNN Metrics for a Single Image

def frcnn_get_metrics(img_path, conf_threshold=0.5):
    """
    Returns timing, number of objects, and average confidence
    using professor-style Faster R-CNN detection.
    """
    #Load image and convert to tensor
    image = Image.open(img_path).convert("RGB")
    image_tensor = F.to_tensor(image).to(device)

    #Run detection with timing
    start = time.time()
    with torch.no_grad():
        predictions = model([image_tensor])[0]
    elapsed = time.time() - start

    scores = predictions["scores"].detach().cpu().numpy()
    keep = scores >= conf_threshold

    num_objects = int(keep.sum())
    avg_confidence = float(scores[keep].mean()) if keep.any() else 0.0

    return elapsed, num_objects, avg_confidence

In [10]:
frcnn_time, frcnn_num, frcnn_avg = frcnn_get_metrics(image_paths[0], conf_threshold=0.5)

print("Faster R-CNN Metrics Test:")
print(f"Time taken: {frcnn_time:.3f} seconds")
print(f"Objects detected: {frcnn_num}")
print(f"Average confidence: {frcnn_avg:.3f}")

Faster R-CNN Metrics Test:
Time taken: 2.818 seconds
Objects detected: 24
Average confidence: 0.808


## 4. Comparison Table Construction

In [11]:
#Block 4a - Collect metrics for all images and both models

records = []

for img_id, img_path in enumerate(image_paths, start=1):
    # --- YOLOv8 metrics ---
    y_time, y_num, y_avg = yolo_get_metrics(img_path, conf_threshold=0.25)
    records.append({
        "image_id": img_id,
        "image_path": img_path,
        "model": "YOLOv8",
        "time_sec": y_time,
        "num_objects": y_num,
        "avg_confidence": y_avg
    })

    # --- Faster R-CNN metrics ---
    f_time, f_num, f_avg = frcnn_get_metrics(img_path, conf_threshold=0.5)
    records.append({
        "image_id": img_id,
        "image_path": img_path,
        "model": "Faster R-CNN",
        "time_sec": f_time,
        "num_objects": f_num,
        "avg_confidence": f_avg
    })

print(f"Total records collected: {len(records)}")


image 1/1 /Users/Home/MSBA 503/take_home_assignment/notebooks/../data/img01.jpeg: 448x640 5 persons, 4 bicycles, 5 cars, 2 motorcycles, 312.1ms
Speed: 2.4ms preprocess, 312.1ms inference, 5.7ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 /Users/Home/MSBA 503/take_home_assignment/notebooks/../data/img02.jpeg: 352x640 3 persons, 4 cars, 1 motorcycle, 1 truck, 5 traffic lights, 272.4ms
Speed: 2.2ms preprocess, 272.4ms inference, 5.8ms postprocess per image at shape (1, 3, 352, 640)

image 1/1 /Users/Home/MSBA 503/take_home_assignment/notebooks/../data/img03.jpeg: 480x640 (no detections), 333.0ms
Speed: 2.5ms preprocess, 333.0ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

image 1/1 /Users/Home/MSBA 503/take_home_assignment/notebooks/../data/img04.jpeg: 384x640 (no detections), 270.1ms
Speed: 1.4ms preprocess, 270.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /Users/Home/MSBA 503/take_home_assignment/notebooks/../data/i

In [12]:
#Block 4b - Create results DataFrame

results_df = pd.DataFrame(records)
results_df

Unnamed: 0,image_id,image_path,model,time_sec,num_objects,avg_confidence
0,1,../data/img01.jpeg,YOLOv8,0.588588,16,0.614479
1,1,../data/img01.jpeg,Faster R-CNN,3.256828,24,0.808105
2,2,../data/img02.jpeg,YOLOv8,0.296152,14,0.686294
3,2,../data/img02.jpeg,Faster R-CNN,2.675963,18,0.887055
4,3,../data/img03.jpeg,YOLOv8,0.34964,0,0.0
5,3,../data/img03.jpeg,Faster R-CNN,2.470502,0,0.0
6,4,../data/img04.jpeg,YOLOv8,0.295523,0,0.0
7,4,../data/img04.jpeg,Faster R-CNN,2.792974,0,0.0
8,5,../data/img05.jpeg,YOLOv8,0.310279,6,0.755401
9,5,../data/img05.jpeg,Faster R-CNN,2.604644,11,0.819307


In [14]:
#Block 4c - Cleaner pivot table with flattened column names

pivot = (
    results_df
    .pivot(index="image_id", columns="model", values=["time_sec", "num_objects", "avg_confidence"])
)

#Flatten the MultiIndex columns
pivot.columns = [f"{metric}_{model.replace(' ', '')}" for metric, model in pivot.columns]

#Reset index (easier for Word)
comparison_table = pivot.reset_index()

comparison_table

Unnamed: 0,image_id,time_sec_FasterR-CNN,time_sec_YOLOv8,num_objects_FasterR-CNN,num_objects_YOLOv8,avg_confidence_FasterR-CNN,avg_confidence_YOLOv8
0,1,3.256828,0.588588,24.0,16.0,0.808105,0.614479
1,2,2.675963,0.296152,18.0,14.0,0.887055,0.686294
2,3,2.470502,0.34964,0.0,0.0,0.0,0.0
3,4,2.792974,0.295523,0.0,0.0,0.0,0.0
4,5,2.604644,0.310279,11.0,6.0,0.819307,0.755401
5,6,2.747882,0.359271,1.0,1.0,0.986215,0.746031
6,7,2.80674,0.2878,1.0,1.0,0.970246,0.870449
7,8,2.940647,0.461757,4.0,2.0,0.75353,0.289558
8,9,3.067932,0.472394,19.0,19.0,0.857055,0.634507
9,10,2.937898,0.306432,9.0,7.0,0.9327,0.92865


## 5. Additional Image Features (Beyond Object Detection)

In [15]:
#Block 5 - Additional Image Feature: Average Brightness

def compute_brightness(img_path):
    """
    Computes the average brightness of an image.
    Brightness = mean pixel intensity converted to grayscale.
    """
    image = Image.open(img_path).convert("L")  #Convert to grayscale
    arr = np.array(image)
    return arr.mean() / 255.0  #Normalize to 0–1 [using mean]

#Compute Brightness for all images
brightness_list = []
for img_path in image_paths:
    brightness_list.append(compute_brightness(img_path))

brightness_list

[0.3192325394892431,
 0.4172095860566449,
 0.6196745484310047,
 0.5613198529411765,
 0.37171818175334403,
 0.4348028369351413,
 0.495885758410352,
 0.37034957460621387,
 0.5126967535982877,
 0.7266353137254902]

In [16]:
#Printing Comparison Table
comparison_table["brightness"] = brightness_list
comparison_table

Unnamed: 0,image_id,time_sec_FasterR-CNN,time_sec_YOLOv8,num_objects_FasterR-CNN,num_objects_YOLOv8,avg_confidence_FasterR-CNN,avg_confidence_YOLOv8,brightness
0,1,3.256828,0.588588,24.0,16.0,0.808105,0.614479,0.319233
1,2,2.675963,0.296152,18.0,14.0,0.887055,0.686294,0.41721
2,3,2.470502,0.34964,0.0,0.0,0.0,0.0,0.619675
3,4,2.792974,0.295523,0.0,0.0,0.0,0.0,0.56132
4,5,2.604644,0.310279,11.0,6.0,0.819307,0.755401,0.371718
5,6,2.747882,0.359271,1.0,1.0,0.986215,0.746031,0.434803
6,7,2.80674,0.2878,1.0,1.0,0.970246,0.870449,0.495886
7,8,2.940647,0.461757,4.0,2.0,0.75353,0.289558,0.37035
8,9,3.067932,0.472394,19.0,19.0,0.857055,0.634507,0.512697
9,10,2.937898,0.306432,9.0,7.0,0.9327,0.92865,0.726635


## 6. Export Results for Word Document

In [17]:
#Exporting to CSV
output_path = os.path.join(OUTPUT_DIR, "model_comparison_final.csv")
comparison_table.to_csv(output_path, index=False)
print("Saved final results to:", output_path)

Saved final results to: ../outputs/model_comparison_final.csv
