# Project E – Evaluation of Trained CoordConv CNN

This notebook loads the trained CoordConv-based CNN from disk and evaluates its performance on a separate test dataset. It computes classification metrics (accuracy, precision, recall, F1-score) and localization quality via Intersection over Union (IoU). When labels are available, it also generates visualizations comparing predicted and ground-truth bounding boxes.


In [15]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from sklearn.metrics import accuracy_score, classification_report

from tensorflow.keras import layers

class AddCoords(layers.Layer):
    def call(self, input_tensor):
        batch_size_tensor = tf.shape(input_tensor)[0]
        x_dim = tf.shape(input_tensor)[2]
        y_dim = tf.shape(input_tensor)[1]
        
        # xx_channel encodes column indices
        xx_ones = tf.ones([batch_size_tensor, x_dim], dtype='int32')
        xx_ones = tf.expand_dims(xx_ones, -1)
        xx_range = tf.tile(tf.expand_dims(tf.range(y_dim), 0), [batch_size_tensor, 1])
        xx_range = tf.expand_dims(xx_range, 1)
        xx_channel = tf.matmul(xx_ones, xx_range)
        xx_channel = tf.expand_dims(xx_channel, -1)
        xx_channel = tf.cast(xx_channel, 'float32') / (tf.cast(x_dim, 'float32') - 1)

        # yy_channel encodes row indices
        yy_ones = tf.ones([batch_size_tensor, y_dim], dtype='int32')
        yy_ones = tf.expand_dims(yy_ones, 1)
        yy_range = tf.tile(tf.expand_dims(tf.range(x_dim), 0), [batch_size_tensor, 1])
        yy_range = tf.expand_dims(yy_range, -1)
        yy_channel = tf.matmul(yy_range, yy_ones)
        yy_channel = tf.expand_dims(yy_channel, -1)
        yy_channel = tf.cast(yy_channel, 'float32') / (tf.cast(y_dim, 'float32') - 1)

        # Concatenate original features with coordinate channels
        return tf.concat([input_tensor, xx_channel, yy_channel], axis=-1)

## 2. Load the trained model

We load the trained CoordConv CNN from disk. The `AddCoords` custom layer must be provided through `custom_objects` so that Keras can reconstruct the model correctly.

In [None]:
# Adjust this path to wherever you saved the final or best model in 02_train_cnn
MODEL_PATH = '/kaggle/working/final_model_coord.keras'  # or '../models/best_model_coord.keras'

model = tf.keras.models.load_model(
    MODEL_PATH,
    custom_objects={'AddCoords': AddCoords}
)


## 3. Intersection over Union (IoU) metric

Here we define a helper function that computes the IoU between ground-truth and predicted bounding boxes given in normalized `[x, y, w, h]` form.

In [None]:
def compute_iou(box_true, box_pred):
    x1_t, y1_t, w_t, h_t = box_true[:, 0], box_true[:, 1], box_true[:, 2], box_true[:, 3]
    x2_t, y2_t = x1_t + w_t, y1_t + h_t
    
    x1_p, y1_p, w_p, h_p = box_pred[:, 0], box_pred[:, 1], box_pred[:, 2], box_pred[:, 3]
    x2_p, y2_p = x1_p + w_p, y1_p + h_p
    
    # Intersection rectangle
    x1_i = np.maximum(x1_t, x1_p)
    y1_i = np.maximum(y1_t, y1_p)
    x2_i = np.minimum(x2_t, x2_p)
    y2_i = np.minimum(y2_t, y2_p)
    
    w_i = np.maximum(0, x2_i - x1_i)
    h_i = np.maximum(0, y2_i - y1_i)
    inter_area = w_i * h_i
    
    # Union area
    box_t_area = w_t * h_t
    box_p_area = w_p * h_p
    union_area = box_t_area + box_p_area - inter_area
    
    return inter_area / (union_area + 1e-7)

## 4. Load and preprocess the test dataset

The professor will provide a blind test dataset with `.npy` files for the input frames and (possibly) the labels:

- `DATA_PATH`  – video frames
- `LABEL_PATH` – class IDs and bounding boxes (only if labels are provided)

We reshape the videos into frame-level samples and normalize pixel values to match the training preprocessing.

In [None]:
# TODO: update these when your professor gives you the test file paths
DATA_PATH = '/kaggle/input/blindset/test_data_projectE.npy'
LABEL_PATH = '/kaggle/input/blindset/test_labels_projectE.npy'  # if labels are provided

X_raw = np.load(DATA_PATH)

# Flatten videos → frames and normalize
X_test = X_raw.reshape(-1, 100, 100, 3).astype('float32') / 255.0

print("X_test shape:", X_test.shape)

# If labels are provided (for accuracy & IoU):
labels_available = True  # set False if the blind set has no labels

if labels_available:
    y_raw = np.load(LABEL_PATH, allow_pickle=True)
    y_flat = y_raw.reshape(-1, 5).astype('float32')
    y_true_cls = (y_flat[:, 0] - 1).astype(int)
    y_true_box = y_flat[:, 1:] / 100.0  # normalize boxes to [0, 1]
    print("y_true_cls shape:", y_true_cls.shape)
    print("y_true_box shape:", y_true_box.shape)

## 5. Predict classes and bounding boxes

We feed the test frames into the trained model to obtain:

- `pred_probs` – class probabilities from the softmax head.
- `pred_boxes` – predicted normalized bounding boxes from the regression head.

We also apply the same confidence-based UNKNOWN mechanism used earlier.

In [None]:
print("Predicting on test set...")
pred_probs, pred_boxes = model.predict(X_test, batch_size=64)

CONFIDENCE_THRESHOLD = 0.7

final_preds = []
for p in pred_probs:
    if np.max(p) < CONFIDENCE_THRESHOLD:
        final_preds.append(5)  # 5 = UNKNOWN
    else:
        final_preds.append(np.argmax(p))
final_preds = np.array(final_preds)

print("Example predicted box:", pred_boxes[0])
print("Max predicted box value:", pred_boxes.max())

## 6. Classification and IoU metrics (when labels are available)

If the blind test set includes labels, we can compute:

- Overall **classification accuracy**  
- **Per-class precision, recall, and F1-score** via `classification_report`  
- **Mean IoU** between predicted and ground-truth boxes on valid frames

In [None]:
if labels_available:
    # Only evaluate where ground-truth boxes are valid (width > 0)
    valid_mask = (y_true_box[:, 2] > 0)
    y_true_box_valid = y_true_box[valid_mask]
    pred_boxes_valid = pred_boxes[valid_mask]
    final_preds_valid = final_preds[valid_mask]
    y_true_cls_valid = y_true_cls[valid_mask]

    # Accuracy
    acc = accuracy_score(y_true_cls_valid, final_preds_valid)

    # IoU
    ious = compute_iou(y_true_box_valid, pred_boxes_valid)
    correct_mask = (final_preds_valid == y_true_cls_valid)
    mean_iou = np.mean(ious[correct_mask]) if np.sum(correct_mask) > 0 else 0.0

    print(f"Test Accuracy: {acc:.4f}")
    print(f"Test Mean IoU (correctly classified): {mean_iou:.4f}")

    # Classification report
    label_names = ['Ball', 'Mug', 'Pen', 'Spoon', 'Notebook', 'UNKNOWN']

    print("\nClassification report (including UNKNOWN):\n")
    print(classification_report(
        y_true_cls_valid,
        final_preds_valid,
        labels=[0, 1, 2, 3, 4, 5],
        target_names=label_names,
        zero_division=0
    ))

    print("\nClassification report (only real object classes):\n")
    print(classification_report(
        y_true_cls_valid,
        final_preds_valid,
        labels=[0, 1, 2, 3, 4],
        target_names=label_names[:5],
        zero_division=0
    ))
else:
    print("Labels not available for this test set – skipping metrics.")

## 7. Visualizing Model Predictions on the Test Set

While quantitative metrics such as accuracy, precision/recall, and IoU provide measurable
performance indicators, visual inspection is equally important for understanding how the model
behaves on real test images. This section uses the `visualize_results` function to display:

- The **input image**  
- The **predicted bounding box** (in red)  
- The **ground-truth bounding box** (in green), if labels are provided  
- The **predicted class name**  
- The **true class name**, highlighted when the prediction is incorrect  

These visualizations help answer qualitative questions such as:

- Does the model correctly focus on the object of interest?
- Are the predicted bounding boxes tightly aligned with the object?
- Does the model confuse certain object classes more than others?
- Do low-confidence predictions correspond to ambiguous or difficult images?

We can directly compare predicted and true boxes. If not, the visualization will still plot the predicted bounding boxes and class
labels, which is useful for sanity-checking the model’s behavior.

Visual inspection is particularly valuable for diagnosing failure cases and understanding class-specific
patterns—for example, whether thin objects such as **Pens** or **Spoons** produce weaker localization
performance compared to larger, more distinctive objects like **Mugs** and **Balls**.

The following cell randomly selects a subset of test images and displays them side-by-side with their
predictions for qualitative analysis.

In [None]:
def visualize_results(images, true_boxes, pred_boxes, pred_classes, true_classes=None, num_samples=5):
    label_names = {0: 'Ball', 1: 'Mug', 2: 'Pen', 3: 'Spoon', 4: 'Notebook', 5: 'UNKNOWN'}
    
    total_samples = len(images)
    num_samples = min(num_samples, total_samples)
    indices = np.random.choice(total_samples, num_samples, replace=False)
    
    plt.figure(figsize=(3 * num_samples, 4))
    
    for i, idx in enumerate(indices):
        ax = plt.subplot(1, num_samples, i + 1)
        
        img = images[idx]
        ax.imshow(img)
        ax.axis('off')
        
        # Ground truth box in green
        if true_boxes is not None:
            box_t = true_boxes[idx] * 100  # back to pixels
            if box_t[2] > 0 and box_t[3] > 0:
                rect_t = patches.Rectangle(
                    (box_t[0], box_t[1]), box_t[2], box_t[3],
                    linewidth=2, edgecolor='#00FF00', facecolor='none', label='Ground Truth'
                )
                ax.add_patch(rect_t)
        
        # Predicted box in red
        box_p = pred_boxes[idx] * 100
        if box_p[2] > 0 and box_p[3] > 0:
            rect_p = patches.Rectangle(
                (box_p[0], box_p[1]), box_p[2], box_p[3],
                linewidth=2, edgecolor='#FF0000', facecolor='none', label='Prediction'
            )
            ax.add_patch(rect_p)
            
        pred_cls_idx = pred_classes[idx]
        pred_name = label_names.get(pred_cls_idx, "Unknown")
        
        title_text = f"Pred: {pred_name}"
        title_color = 'black'
        
        if true_classes is not None:
            true_cls_idx = true_classes[idx]
            if pred_cls_idx != true_cls_idx:
                title_color = 'red'
                true_name = label_names.get(true_cls_idx, "Unknown")
                title_text += f"\n(True: {true_name})"
            else:
                title_color = 'green'
                
        ax.set_title(title_text, color=title_color, fontsize=10)

    plt.tight_layout()
    plt.show()

print("Green = Ground Truth\nRed   = Prediction")

visualize_results(
    X_test[valid_mask],      # images with valid boxes
    y_true_box[valid_mask],  # ground truth boxes
    pred_boxes[valid_mask],  # predicted boxes
    final_preds[valid_mask], # predicted classes
    y_true_cls[valid_mask],  # true classes
    num_samples=5
)