# Project E Simple Object Tracking - 03_Test_CNN

# Project E – 03_Test_CNN

This notebook:

1. Loads the trained CNN model (`simple_cnn_class_and_bbox.h5`).
2. Rebuilds the annotated subset of the training data using:
   - `training_data_projectE.npy` (video clips)
   - `training_labels_projectE.npy` (one-hot labels)
   - the provided CSV bounding-box annotations.
3. Computes **the same metrics used in training** on this annotated subset:
   - total loss
   - classification loss and classification accuracy
   - bounding-box loss
   - bounding-box MSE
   - average Intersection-over-Union (IoU) over correctly classified frames
4. Prompts the user to select the **blind test `.npy` file**, then:
   - runs the trained model on all blind clips,
   - outputs a predicted **class label** and **bounding box** for every frame
     of every test video, and
   - saves these predictions for the instructor to evaluate IoU on the held-out
     “easy” blind test set.


In [31]:
import os
import glob
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from tensorflow import keras

In [33]:
# Root of your repo
base_dir = r"C:\Users\Daniel\project-e-Underfit-Misfits"

data_dir   = os.path.join(base_dir, "data")
models_dir = os.path.join(base_dir, "models")

# Folder where your CSV annotations are stored
csv_root   = r"D:\Documents\Fundamentals of Machine Learning\Final_Project_csv"

print("Base dir   :", base_dir)
print("Data dir   :", data_dir)
print("Models dir :", models_dir)
print("CSV root   :", csv_root)


Base dir   : C:\Users\Daniel\project-e-Underfit-Misfits
Data dir   : C:\Users\Daniel\project-e-Underfit-Misfits\data
Models dir : C:\Users\Daniel\project-e-Underfit-Misfits\models
CSV root   : D:\Documents\Fundamentals of Machine Learning\Final_Project_csv


In [34]:
model_path = os.path.join(models_dir, "simple_cnn_class_and_bbox.h5")

# Load WITHOUT compiling
model = keras.models.load_model(model_path, compile=False)
print("Loaded model from:", model_path)

# Re-compile for evaluation (same structure as training)
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss={
        "class": "sparse_categorical_crossentropy",
        "bbox": keras.losses.Huber(),
    },
    loss_weights={
        "class": 1.0,
        "bbox": 5.0,
    },
    metrics={
        "class": ["accuracy"],
        "bbox": ["mse"],
    },
)

print("Model re-compiled for evaluation.")
model.summary()


Loaded model from: C:\Users\Daniel\project-e-Underfit-Misfits\models\simple_cnn_class_and_bbox.h5
Model re-compiled for evaluation.


In [37]:
def build_frame_dataset(data_npy_path, labels_npy_path, csv_root, num_frames=15):
    # Load clips and one-hot labels
    data_training   = np.load(data_npy_path)                 # (488, 15, 100, 100, 3)
    labels_training = np.load(labels_npy_path, allow_pickle=True)  # (488, 15, 5)

    num_clips_all, nf, H, W, C = data_training.shape
    assert nf == num_frames, f"Expected {num_frames} frames per clip, got {nf}"

    # Get CSV files (one per annotated clip)
    csv_files = sorted(glob.glob(os.path.join(csv_root, "*.csv")))
    num_clips_annotated = len(csv_files)
    print("Number of annotated clips (CSV files):", num_clips_annotated)

    # Use only annotated clips from the .npy arrays
    data_used   = data_training[:num_clips_annotated]           # (N, 15, 100,100,3)
    labels_used = labels_training[:num_clips_annotated]         # (N, 15, 5)

    # Build per-clip bounding boxes in normalized [0,1] coordinates
    bboxes_per_clip = []

    for csv_path in csv_files:
        df = pd.read_csv(csv_path)

        # Resample or pad to exactly num_frames rows
        if len(df) > num_frames:
            idx = np.linspace(0, len(df) - 1, num_frames).astype(int)
            df = df.iloc[idx].reset_index(drop=True)
        elif len(df) < num_frames:
            repeat_count = num_frames - len(df)
            last_row = df.iloc[[-1]].copy()
            df = pd.concat([df] + [last_row] * repeat_count, ignore_index=True)

        assert len(df) == num_frames, f"Unexpected #frames in {csv_path}: {len(df)}"

        x  = df["bbox_x"].values.astype("float32")
        y  = df["bbox_y"].values.astype("float32")
        bw = df["bbox_width"].values.astype("float32")
        bh = df["bbox_height"].values.astype("float32")

        img_w = df["image_width"].values.astype("float32")
        img_h = df["image_height"].values.astype("float32")

        x_min = x / img_w
        y_min = y / img_h
        x_max = (x + bw) / img_w
        y_max = (y + bh) / img_h

        boxes = np.stack([x_min, y_min, x_max, y_max], axis=1)  # (15,4)
        bboxes_per_clip.append(boxes)

    bboxes_training = np.stack(bboxes_per_clip, axis=0)        # (N, 15, 4)

    # Flatten clips → frames
    num_clips_annotated = bboxes_training.shape[0]
    frames = data_used.reshape(num_clips_annotated * num_frames, H, W, C)
    frame_labels_onehot = labels_used.reshape(num_clips_annotated * num_frames, 5)
    frame_labels = np.argmax(frame_labels_onehot, axis=1)
    frame_bboxes = bboxes_training.reshape(num_clips_annotated * num_frames, 4).astype("float32")

    print("frames.shape       :", frames.shape)
    print("frame_labels.shape :", frame_labels.shape)
    print("frame_bboxes.shape :", frame_bboxes.shape)

    return frames, frame_labels, frame_bboxes


In [38]:
def iou_boxes(boxes_true, boxes_pred):
    x1_true, y1_true, x2_true, y2_true = np.split(boxes_true, 4, axis=1)
    x1_pred, y1_pred, x2_pred, y2_pred = np.split(boxes_pred, 4, axis=1)

    x1_int = np.maximum(x1_true, x1_pred)
    y1_int = np.maximum(y1_true, y1_pred)
    x2_int = np.minimum(x2_true, x2_pred)
    y2_int = np.minimum(y2_true, y2_pred)

    inter_w = np.clip(x2_int - x1_int, 0, None)
    inter_h = np.clip(y2_int - y1_int, 0, None)
    inter_area = inter_w * inter_h

    area_true = (x2_true - x1_true) * (y2_true - y1_true)
    area_pred = (x2_pred - x1_pred) * (y2_pred - y1_pred)
    union_area = area_true + area_pred - inter_area

    iou = inter_area / np.clip(union_area, 1e-7, None)
    return iou.squeeze()


def test(model, X_eval, y_class_eval, y_box_eval):
    """
    Evaluate on annotated frames:

      - total loss
      - 'class_loss', 'class_accuracy'
      - 'bbox_loss', 'bbox_mse'
      - average IoU over correctly classified frames
    """
    # Use Keras evaluate to get loss and metrics
    results = model.evaluate(
        X_eval,
        {"class": y_class_eval, "bbox": y_box_eval},
        verbose=0,
        return_dict=True
    )

    print("Keras evaluation metrics on annotated frames:")
    for k, v in results.items():
        print(f"  {k}: {v:.4f}")

    # Compute IoU over correctly classified frames
    pred_class_probs, pred_bboxes = model.predict(X_eval, verbose=0)
    pred_class_labels = np.argmax(pred_class_probs, axis=1)

    correct_mask = (pred_class_labels == y_class_eval)
    num_correct = correct_mask.sum()
    total = len(y_class_eval)
    acc_pred = num_correct / total

    print(f"\nFrame-level classification accuracy from predictions: {acc_pred:.3f} ({num_correct}/{total})")

    if num_correct > 0:
        true_boxes = y_box_eval[correct_mask]
        pred_boxes = pred_bboxes[correct_mask]
        ious = iou_boxes(true_boxes, pred_boxes)
        mean_iou = ious.mean()
        print(f"Average IoU over correctly classified frames: {mean_iou:.3f}")
    else:
        mean_iou = 0.0
        print("No correctly classified frames; IoU undefined.")

    return results, acc_pred, mean_iou


In [39]:
data_npy_path   = os.path.join(data_dir, "training_data_projectE.npy")
labels_npy_path = os.path.join(data_dir, "training_labels_projectE.npy")

frames, frame_labels, frame_bboxes = build_frame_dataset(
    data_npy_path,
    labels_npy_path,
    csv_root,
    num_frames=15
)

# Normalize images
X_eval = frames.astype("float32") / 255.0
y_class_eval = frame_labels
y_box_eval   = frame_bboxes

results_dict, acc_from_pred, mean_iou = test(model, X_eval, y_class_eval, y_box_eval)


Number of annotated clips (CSV files): 100
frames.shape       : (1500, 100, 100, 3)
frame_labels.shape : (1500,)
frame_bboxes.shape : (1500, 4)
Keras evaluation metrics on annotated frames:
  bbox_loss: 0.0050
  bbox_mse: 0.0100
  class_accuracy: 0.9673
  class_loss: 0.1484
  loss: 0.1724

Frame-level classification accuracy from predictions: 0.967 (1451/1500)
Average IoU over correctly classified frames: 0.504


In [None]:
# === Provide Blind Test File Path (edit this line only) ===

# TODO: change this path to wherever the blind .npy file is located
blind_test_path = r"C:\Users\Daniel\Downloads\easy_blind_test.npy"

print("Using blind test file:")
print(blind_test_path)

import numpy as np

try:
    blind_test_data = np.load(blind_test_path)
    print("Blind test shape:", blind_test_data.shape)
except Exception as e:
    print("ERROR loading file:", e)


In [None]:
def predict_on_blind_set(model, blind_test_path, num_frames=15, save_prefix="blind_test"):
    # 1. Load blind test clips
    clips = np.load(blind_test_path)   # expected: (num_clips, 15, 100, 100, 3)
    num_clips, nf, H, W, C = clips.shape
    assert nf == num_frames, f"Expected {num_frames} frames, got {nf}"
    print("Blind clips shape:", clips.shape)

    # 2. Normalize and flatten to frames
    clips_norm = clips.astype("float32") / 255.0
    frames = clips_norm.reshape(num_clips * num_frames, H, W, C)

    # 3. Predict for every frame
    pred_class_probs, pred_bboxes = model.predict(frames, verbose=1)
    pred_classes = np.argmax(pred_class_probs, axis=1)

    # 4. Reshape back to per-clip, per-frame structure
    pred_classes = pred_classes.reshape(num_clips, num_frames)
    pred_bboxes  = pred_bboxes.reshape(num_clips, num_frames, 4)

    print("Predicted class array shape:", pred_classes.shape)
    print("Predicted bbox  array shape:", pred_bboxes.shape)

    # 5. Save predictions so the instructor can compute IoU on the blind set
    np.save(f"{save_prefix}_classes.npy", pred_classes)
    np.save(f"{save_prefix}_bboxes.npy", pred_bboxes)
    print(f"Saved {save_prefix}_classes.npy and {save_prefix}_bboxes.npy")

    return pred_classes, pred_bboxes


In [None]:
pred_classes_blind, pred_bboxes_blind = predict_on_blind_set(
    model,
    blind_test_path,
    num_frames=15,
    save_prefix="easy_blind_test"   # or any name you like
)