In [None]:
# Install required packages if missing
# This cell will attempt to install packages used by the notebook:
#   cv2 (opencv-python), numpy, pandas, matplotlib
# stdlib modules used by the notebook (os, math, mpl_toolkits) do not need installation.
import sys
import subprocess

def install_if_missing(module_name, pypi_name=None):
    try:
        __import__(module_name)
        print(f"{module_name} already installed")
    except ImportError:
        pkg = pypi_name or module_name
        print(f"Installing {pkg}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])

# Map module -> PyPI package name when they differ
install_if_missing('cv2', 'opencv-python')
install_if_missing('numpy')
install_if_missing('pandas')
install_if_missing('matplotlib')

print('\nAll required packages are present (or have been installed).')


In [None]:
# Auto-generate missing .gnd ground-truth files (approximate)
#
# This cell will check `train_images` and `test_images` for missing .gnd files.
# For each missing .gnd it will try to detect eyes using the cascade (CASCADE_PATH).
# If detection fails it will write reasonable fallback coordinates (image center +/- offset).
# WARNING: These generated .gnd files are approximations and should be replaced with
# accurate ground-truth produced by you for correct evaluation.

import cv2
import os


def create_gnd_for_image(img_path, gnd_path, eye_cascade):
    if os.path.exists(gnd_path):
        return False

    img = cv2.imread(img_path)
    if img is None:
        print(f"Could not read image {img_path}; skipping .gnd creation.")
        return False

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Try the detector with common params
    eyes = eye_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
    centers = [(x + w // 2, y + h // 2) for (x, y, w, h) in eyes]

    h, w = img.shape[:2]

    if len(centers) >= 2:
        # sort by x coordinate -> assume left/right
        centers_sorted = sorted(centers, key=lambda c: c[0])
        left, right = centers_sorted[0], centers_sorted[1]
        source = f"detected {len(centers)} eyes"
    elif len(centers) == 1:
        c = centers[0]
        # guess the other eye position using symmetric placement
        if c[0] < w / 2:
            left = c
            right = (min(w - 1, int(c[0] + w * 0.3)), int(h // 2))
        else:
            right = c
            left = (max(0, int(c[0] - w * 0.3)), int(h // 2))
        source = "detected 1 eye (mirrored fallback)"
    else:
        # No eyes detected -> use center-based fallback (approximate)
        left = (int(w * 0.35), int(h * 0.5))
        right = (int(w * 0.65), int(h * 0.5))
        source = "no detections (center fallback)"

    # Clamp coordinates to integer and image bounds
    def clamp(pt):
        x = max(0, min(w - 1, int(pt[0])))
        y = max(0, min(h - 1, int(pt[1])))
        return x, y

    left = clamp(left)
    right = clamp(right)

    # Write .gnd in expected format
    try:
        with open(gnd_path, 'w') as f:
            f.write(f"L: {left[0]}, {left[1]}\n")
            f.write(f"R: {right[0]}, {right[1]}\n")
        print(f"Created {gnd_path} ({source})")
        return True
    except Exception as e:
        print(f"Failed to write {gnd_path}: {e}")
        return False


# Run for train and test sets
def ensure_ground_truth(train_dir, test_dir, train_images, test_images, cascade_path):
    if not os.path.exists(cascade_path):
        print(f"Cascade not found at {cascade_path}; cannot run detection. Create .gnd files manually.")
        return

    eye_cascade = cv2.CascadeClassifier(cascade_path)
    if eye_cascade.empty():
        print(f"Failed to load cascade from {cascade_path}; check the file. Create .gnd files manually.")
        return

    created = 0
    for img in train_images:
        img_path = os.path.join(train_dir, img)
        gnd_path = img_path.replace('.jpg', '.gnd')
        if create_gnd_for_image(img_path, gnd_path, eye_cascade):
            created += 1

    for img in test_images:
        img_path = os.path.join(test_dir, img)
        gnd_path = img_path.replace('.jpg', '.gnd')
        if create_gnd_for_image(img_path, gnd_path, eye_cascade):
            created += 1

    if created == 0:
        print("No .gnd files were created (either they already exist, images missing, or cascade failed).")
    else:
        print(f"Created {created} .gnd file(s). Please review them for correctness.")


# Try to use variables defined elsewhere in the notebook; fall back to defaults
try:
    _TRAIN_DIR = TRAIN_DIR
    _TEST_DIR = TEST_DIR
    _TRAIN_IMAGES = TRAIN_IMAGES
    _TEST_IMAGES = TEST_IMAGES
    _CASCADE_PATH = CASCADE_PATH
except NameError:
    _TRAIN_DIR = "train_images"
    _TEST_DIR = "test_images"
    _TRAIN_IMAGES = [f"train{i}.jpg" for i in range(1, 6)]
    _TEST_IMAGES = [f"test{i}.jpg" for i in range(1, 11)]
    _CASCADE_PATH = "haarcascade_eye.xml"

print("Checking for missing .gnd files and creating approximations where needed...")
ensure_ground_truth(_TRAIN_DIR, _TEST_DIR, _TRAIN_IMAGES, _TEST_IMAGES, _CASCADE_PATH)
print("-- Done. Review the created .gnd files in the image folders before re-running training.")


In [None]:
"""
CS 506 Programming for Computing
PE06 Machine Learning
City University of Seattle

This script implements a hyperparameter tuning process for an OpenCV eye detector
as per the PE06 instructions. It finds the optimal 'scaleFactor' and
'minNeighbors' by testing against a training set with ground truth data,
and then reports the performance of the best parameters on a test set.
"""

import cv2
import os
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


In [None]:
# --- Configuration ---

# ! Paths to your data
# Please update these paths to point to your files
TRAIN_DIR = "train_images"
TEST_DIR = "test_images"
CASCADE_PATH = "haarcascade_eye.xml"

In [None]:
# ! Ground Truth (Task 1)
# Example 'train1.gnd':
# [cite_start]L: 238, 357 [cite: 24]
# [cite_start]R: 421, 357 [cite: 25]
TRAIN_IMAGES = [f"train{i}.jpg" for i in range(1, 6)]
TEST_IMAGES = [f"test{i}.jpg" for i in range(1, 11)]

In [None]:
# --- Helper Functions ---

def parse_ground_truth(gnd_path):
    """
    Parses a .gnd file and returns the coordinates for Left (GLX, GLY)
    [cite_start]and Right (GRX, GRY) eyes. [cite: 21]
    """
    try:
        with open(gnd_path, 'r') as f:
            lines = f.readlines()
            l_line = lines[0].split(':')[1].strip()
            r_line = lines[1].split(':')[1].strip()

            glx, gly = [int(v.strip()) for v in l_line.split(',')]
            grx, gry = [int(v.strip()) for v in r_line.split(',')]

            return glx, gly, grx, gry
    except FileNotFoundError:
        print(f"Error: Ground truth file not found: {gnd_path}")
        print("Please create the .gnd files for all train and test images.")
        return None
    except Exception as e:
        print(f"Error parsing {gnd_path}: {e}")
        return None

def calculate_distance(p1, p2):
    """Calculates the Euclidean distance between two points (x, y)."""
    return math.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)

# --- Required Functions (Task 2 & 3) ---

def measureDistance(LX, LY, GLX, GLY, RX, RY, GRX, GRY):
    """
    [cite_start]Calculates the performance metric based on average distance. [cite: 26]
    [cite_start]A missing eye (LX or RX is None) incurs a 10.0 distance penalty. [cite: 28]
    """
    dist_L = 10.0
    dist_R = 10.0

    # Calculate distance for left eye if detected
    if LX is not None and LY is not None:
        dist_L = calculate_distance((LX, LY), (GLX, GLY))

    # Calculate distance for right eye if detected
    if RX is not None and RY is not None:
        dist_R = calculate_distance((RX, RY), (GRX, GRY))

    # [cite_start]Return the average, as per example [cite: 29]
    return (dist_L + dist_R) / 2.0

def process_image(img_path, gnd_path, eye_cascade, sf, mn):
    """
    Loads an image, runs detection, matches eyes, and returns the score.
    """
    # Load image
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error: Could not load image {img_path}")
        return 10.0  # Return max penalty if image fails to load

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Load ground truth
    gnd_data = parse_ground_truth(gnd_path)
    if gnd_data is None:
        return 10.0 # Return max penalty if ground truth is missing
    GLX, GLY, GRX, GRY = gnd_data
    ground_L = (GLX, GLY)
    ground_R = (GRX, GRY)

    # --- Detection ---
    # [cite_start]Run the eye detector [cite: 32]
    eyes = eye_cascade.detectMultiScale(
        gray,
        scaleFactor=sf,
        minNeighbors=mn
    )

    # --- Eye Assignment Logic ---
    # [cite_start]Handle > 2 detections: "decide which 2... based on size" [cite: 31]
    # We'll take the two largest bounding boxes (by area w*h)
    if len(eyes) > 2:
        eyes = sorted(eyes, key=lambda b: b[2] * b[3], reverse=True)[:2]

    # [cite_start]Get centers of detected boxes [cite: 27]
    centers = [(x + w // 2, y + h // 2) for (x, y, w, h) in eyes]

    detected_L = None
    detected_R = None

    if len(centers) == 0:
        # No eyes found
        pass
    elif len(centers) == 1:
        # One eye found. Assign it to the closest ground truth (L or R).
        c = centers[0]
        if calculate_distance(c, ground_L) < calculate_distance(c, ground_R):
            detected_L = c
        else:
            detected_R = c
    elif len(centers) == 2:
        # Two eyes found. Assign them based on minimizing total distance.
        c1, c2 = centers

        # Option 1: c1 is Left, c2 is Right
        dist_1L = calculate_distance(c1, ground_L)
        dist_2R = calculate_distance(c2, ground_R)

        # Option 2: c1 is Right, c2 is Left
        dist_1R = calculate_distance(c1, ground_R)
        dist_2L = calculate_distance(c2, ground_L)

        if (dist_1L + dist_2R) < (dist_1R + dist_2L):
            detected_L, detected_R = c1, c2
        else:
            detected_L, detected_R = c2, c1

    # --- Scoring ---
    (LX, LY) = detected_L if detected_L else (None, None)
    (RX, RY) = detected_R if detected_R else (None, None)

    return measureDistance(LX, LY, GLX, GLY, RX, RY, GRX, GRY)

Here is a sample of how you might fill out the **Analysis & Observations** section.

**Note:** I cannot run the code, as I don't have your specific `train_images`, `test_images`, or the `.gnd` files you created. The values below are **hypothetical examples** to show you *how* to complete the analysis. You must run the script and replace the bracketed `[...]` values with your actual results.

```
================================================================================
PE06 SUMMARY
================================================================================

1.  **Best Parameters Found:**
    * Best scaleFactor: [e.g., 1.15]
    * Best minNeighbors: [e.g., 5]
    * Best 'Training' score (average distance): [e.g., 0.8241]

2.  **Final 'Testing' Performance:**
    * Average distance on 10 test images: [e.g., 2.3572]

3.  **Analysis & Observations:**
    * The best parameters found during training (`sf=1.15`, `mn=5`) yielded a very low average distance of `0.82` on the 5 training images. However, when these same parameters were applied to the 10 test images, the average distance was higher at `2.36`.

    * This difference between the training and testing score is a classic example of **overfitting**. The parameters were highly optimized for the specific five images in the training set but were not as generalizable to new, unseen images in the test set. A larger and more diverse training set (e.g., 50 images instead of 5) would likely help find more robust parameters and reduce this gap.

    * The 3D plot and the results table were very informative. There was a clear "valley" (low error) for `scaleFactor` values between `1.10` and `1.25` and `minNeighbors` between `4` and `7`. Performance was significantly worse (high peaks) at very low `scaleFactor` values (e.g., `1.01-1.05`), which likely created too many false detections, and at high `minNeighbors` (e.g., `9-10`), which likely missed one or both eyes entirely.

    * [cite_start]The most significant challenge was handling images where the detector found 0 or 1 eye, forcing the `measureDistance` function to apply the `10.0` penalty[cite: 28], which drastically increased the average error for that parameter pair.

================================================================================
```