The code cell provided below is for the reference code to unzip the unput dataset on your local system.

#### Note: We do not recommend running the code in the lab environment. The zip file size will delay the code execution and may lead to some unforseen errors. The input files have already been unzipped for use in this code.


In [None]:
import shutil

shutil.unpack_archive("Images.zip")

In [46]:
import tensorflow as tf

# Get the list of available devices
gpus = tf.config.list_physical_devices("GPU")
print("Available GPUs:", gpus)

# # Disable GPU
# if gpus:
#     tf.config.set_visible_devices([], "GPU")
#     print("GPU disabled. Running on CPU.")

# Enable GPU
if gpus:
    tf.config.set_visible_devices(gpus[0], "GPU")
    print("GPU enabled.")

Available GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU enabled.


#### Step 1: Data Loading and Preprocessing


In [47]:
# import pandas,os,cv2 and numpy
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
from tqdm import tqdm
import plotly.express as px

In [48]:
#### Step 2: Load and preprocess the data

# Load the labels from labels.csv
# // TODO

labels_df = pd.read_csv("./labels.csv")

labels_df.columns = ["image_id", "class", "x_min", "y_min", "x_max", "y_max"]

# Adjust the image IDs in the dataframe
labels_df["image_id"] = labels_df["image_id"].apply(lambda x: f"{x:08d}")

# Use iloc to pick the first 1000 labels
# // TODO
labels_df = labels_df.iloc[:1000]
# labels_df = labels_df.iloc[:10000]

# reshapeDims = (int(720 / 10), int(480 / 10))
reshapeDims = (224, 224)

image_dimm_ratio_x_list = []
image_dimm_ratio_y_list = []

# Load the corresponding images
images_dir = "Images/"
images = []
for index, row in tqdm(labels_df.iterrows(), total=labels_df.shape[0]):
    img_path = os.path.join(images_dir, f"{row['image_id']}.jpg")
    img = cv2.imread(img_path)
    if img is not None:

        image_dimm_ratio_x_list.append(reshapeDims[0] / img.shape[1])
        image_dimm_ratio_y_list.append(reshapeDims[1] / img.shape[0])

        # The reason I resize images when we load them in so that the np.array() can hold the images as integers instead of objects and saving a lot of memory in RAM.
        img = cv2.resize(img, reshapeDims)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        images.append(img)
    else:
        print(f"Error loading image: {img_path}")


images = np.array(images)
image_dimm_ratio_x_array = np.array(image_dimm_ratio_x_list)
image_dimm_ratio_y_array = np.array(image_dimm_ratio_y_list)

# Check if images are loaded
# // TODO
print(f"\nNum of Images: {images.shape[0]}")

100%|██████████| 1000/1000 [00:00<00:00, 1498.25it/s]


Num of Images: 1000





#### Step 2: Exploratory Data Analysis (EDA)


In [49]:
# Analyze the distribution of vehicle types in the limited dataset
# // TODO
labels_df["class"].value_counts()

# Address data quality issues arising from the discrepancy between labels and actual image filenames
# Sorting the image filenames
# // TODO
# @ I believe that all of the labels and image file names are correct. I also believe that all of the image file names are sorted.
# @ So if there's something I should do here, please let me know.

class
car                      683
pickup_truck             110
motorized_vehicle         61
articulated_truck         30
work_van                  29
bus                       28
pedestrian                23
single_unit_truck         18
bicycle                   12
non-motorized_vehicle      5
motorcycle                 1
Name: count, dtype: int64

#### Step 3: Preprocess the images


In [50]:
# Check if the 'images' list is not empty
if len(images) > 0:
    # // TODO
    resized_imgs = []
    for img in images:
        # Resize each image in the 'images' list to dimensions 224x224
        resized_imgs.append(cv2.resize(img, reshapeDims))

    # Convert the list of resized images to a NumPy array
    processed_images = np.array(resized_imgs)

    # Print a success message indicating that the images were resized successfully
    print("DONE!")

DONE!


In [51]:
processed_images.shape

(1000, 224, 224, 3)

#### Step4: Prepare the labels and bounding boxes


In [52]:
labels = labels_df["class"].to_numpy()
bounding_boxes = labels_df[["x_min", "y_min", "x_max", "y_max"]].to_numpy()
bounding_boxes_resized = bounding_boxes.copy()


for index, ratio in enumerate(image_dimm_ratio_x_array):
    bounding_boxes_resized[index, 0] = round(bounding_boxes[index, 0] * ratio)
    bounding_boxes_resized[index, 2] = round(bounding_boxes[index, 2] * ratio)

for index, ratio in enumerate(image_dimm_ratio_y_array):
    bounding_boxes_resized[index, 1] = round(bounding_boxes[index, 1] * ratio)
    bounding_boxes_resized[index, 3] = round(bounding_boxes[index, 3] * ratio)


# Convert labels to one-hot encoding
unique_labels = np.unique(labels)
label_to_index = {label: index for index, label in enumerate(unique_labels)}
index_to_label = {index: label for index, label in enumerate(unique_labels)}
labels = np.array([label_to_index[label] for label in labels])

print(image_dimm_ratio_x_array[4])
print(image_dimm_ratio_y_array[4])
print(bounding_boxes[4])
print(bounding_boxes_resized[4])

0.3111111111111111
0.4666666666666667
[205 155 568 314]
[ 64  72 177 147]


In [53]:
import plotly.graph_objects as go

index = 1

img_path = os.path.join(images_dir, f"{labels_df.iloc[index]['image_id']}.jpg")
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

fig = go.Figure()

# Add image
fig.add_trace(go.Image(z=img))

# Add bounding box rectangle
fig.add_shape(
    type="rect",
    x0=bounding_boxes[index][0],
    y0=bounding_boxes[index][1],
    x1=bounding_boxes[index][2],
    y1=bounding_boxes[index][3],
    line=dict(color="red", width=2),
)

# Layout adjustments
fig.update_layout(
    title="Bounding Box Visualization",
    xaxis=dict(showticklabels=False),  # Hide axis labels
    yaxis=dict(showticklabels=False),
    margin=dict(l=0, r=0, t=30, b=0),  # Reduce margins
)

# Show the interactive plot
fig.show()

fig = go.Figure()

# Add image
fig.add_trace(go.Image(z=images[index]))

# Add bounding box rectangle
fig.add_shape(
    type="rect",
    x0=bounding_boxes_resized[index][0],
    y0=bounding_boxes_resized[index][1],
    x1=bounding_boxes_resized[index][2],
    y1=bounding_boxes_resized[index][3],
    line=dict(color="red", width=2),
)

# Layout adjustments
fig.update_layout(
    title="Bounding Box Visualization",
    xaxis=dict(showticklabels=False),  # Hide axis labels
    yaxis=dict(showticklabels=False),
    margin=dict(l=0, r=0, t=30, b=0),  # Reduce margins
)

# Show the interactive plot
fig.show()

#### Step5: Split the data into training and testing sets


In [10]:
# // TODO
# Split the data into training and testing sets using the train_test_split function.
# - `processed_images`: The input images that have been resized and preprocessed.
# - `labels`: The corresponding labels for the images.
# - `bounding_boxes`: Bounding box information for the images.
# - `test_size=0.2`: Specifies that 20% of the data will be used for testing, while 80% will be used for training.
# - `random_state=42`: Sets a random seed for reproducibility.
x_train, x_test, y_train, y_test, bbox_train, bbox_test = train_test_split(
    processed_images,
    labels,
    bounding_boxes_resized,
    test_size=0.2,
    random_state=42,
    # shuffle=False,
)

#### Step6: Model Creation and Training


In [11]:
def create_model(input_shape, num_classes):
    inputs = keras.Input(shape=input_shape)
    x = layers.Conv2D(32, (3, 3), activation="relu")(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(64, (3, 3), activation="relu")(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(64, (3, 3), activation="relu")(x)
    x = layers.Flatten()(x)
    x = layers.Dense(64, activation="relu")(x)
    vehicle_class = layers.Dense(
        num_classes, activation="softmax", name="vehicle_class"
    )(x)
    bounding_box = layers.Dense(4, name="bounding_box")(x)

    model = keras.Model(inputs=inputs, outputs=[vehicle_class, bounding_box])
    return model


input_shape = processed_images[0].shape
print(f"input_shape: {input_shape}")
num_classes = len(unique_labels)
print(f"num_classes: {num_classes}")
model = create_model(input_shape, num_classes)


model.compile(
    optimizer="adam",
    loss={"vehicle_class": "sparse_categorical_crossentropy", "bounding_box": "mse"},
    metrics={"vehicle_class": "accuracy", "bounding_box": "mae"},
)

history = model.fit(
    x_train,
    {"vehicle_class": y_train, "bounding_box": bbox_train},
    epochs=25,  # 25
    validation_data=(x_test, {"vehicle_class": y_test, "bounding_box": bbox_test}),
)

2025-02-25 15:25:22.574095: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2025-02-25 15:25:22.574131: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 48.00 GB
2025-02-25 15:25:22.574134: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 18.00 GB
2025-02-25 15:25:22.574151: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-02-25 15:25:22.574160: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


input_shape: (224, 224, 3)
num_classes: 11
Epoch 1/25


2025-02-25 15:25:23.049747: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - bounding_box_loss: 3205192.7500 - bounding_box_mae: 809.4200 - loss: 3206027.0000 - vehicle_class_accuracy: 0.0336 - vehicle_class_loss: 834.5175 - val_bounding_box_loss: 10112.1885 - val_bounding_box_mae: 79.5804 - val_loss: 11368.0361 - val_vehicle_class_accuracy: 0.0200 - val_vehicle_class_loss: 1579.6504
Epoch 2/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - bounding_box_loss: 10638.8350 - bounding_box_mae: 81.5558 - loss: 11728.8398 - vehicle_class_accuracy: 0.2377 - vehicle_class_loss: 1090.0045 - val_bounding_box_loss: 3639.9202 - val_bounding_box_mae: 47.4432 - val_loss: 3903.6074 - val_vehicle_class_accuracy: 0.7350 - val_vehicle_class_loss: 298.0439
Epoch 3/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - bounding_box_loss: 5333.6494 - bounding_box_mae: 57.3673 - loss: 5655.8247 - vehicle_class_accuracy: 0.4296 - vehicle_class_loss: 322.175

In [12]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Extract history data
train_class_loss = history.history["vehicle_class_loss"]
val_class_loss = history.history["val_vehicle_class_loss"]

train_class_acc = history.history["vehicle_class_accuracy"]
val_class_acc = history.history["val_vehicle_class_accuracy"]

train_bbox_loss = history.history["bounding_box_loss"]
val_bbox_loss = history.history["val_bounding_box_loss"]

epochs = list(range(1, len(train_class_loss) + 1))  # Epoch indices

# Creating subplots
fig = make_subplots(
    rows=1,
    cols=3,
    subplot_titles=[
        "Classification Loss",
        "Classification Accuracy",
        "Bounding Box Loss",
    ],
)

# Classification Loss
fig.add_trace(
    go.Scatter(
        x=epochs, y=train_class_loss, mode="lines+markers", name="Train Class Loss"
    ),
    row=1,
    col=1,
)
fig.add_trace(
    go.Scatter(x=epochs, y=val_class_loss, mode="lines+markers", name="Val Class Loss"),
    row=1,
    col=1,
)

# Classification Accuracy
fig.add_trace(
    go.Scatter(
        x=epochs, y=train_class_acc, mode="lines+markers", name="Train Class Accuracy"
    ),
    row=1,
    col=2,
)
fig.add_trace(
    go.Scatter(
        x=epochs, y=val_class_acc, mode="lines+markers", name="Val Class Accuracy"
    ),
    row=1,
    col=2,
)

# Bounding Box Loss
fig.add_trace(
    go.Scatter(
        x=epochs, y=train_bbox_loss, mode="lines+markers", name="Train BBox Loss"
    ),
    row=1,
    col=3,
)
fig.add_trace(
    go.Scatter(x=epochs, y=val_bbox_loss, mode="lines+markers", name="Val BBox Loss"),
    row=1,
    col=3,
)

# Updating axis titles
fig.update_xaxes(title_text="Epoch", row=1, col=1)
fig.update_yaxes(title_text="Loss", row=1, col=1)

fig.update_xaxes(title_text="Epoch", row=1, col=2)
fig.update_yaxes(title_text="Accuracy", row=1, col=2)

fig.update_xaxes(title_text="Epoch", row=1, col=3)
fig.update_yaxes(title_text="Loss", row=1, col=3)

# Layout update
fig.update_layout(title="Training & Validation Metrics", title_x=0.5)

# Show the plot
fig.show()

#### Step7: Model Evaluation


In [13]:
# Evaluate the model's performance on the test data.
# // TODO
results = model.evaluate(x_test, {"vehicle_class": y_test, "bounding_box": bbox_test})
# The 'model.evaluate' function calculates various metrics and loss values.
# It takes the test data 'x_test' as input and a dictionary that specifies the
# expected outputs for two different tasks: 'vehicle_class' and 'bounding_box'.

# Print the test results to the console.
results

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - bounding_box_loss: 3379.9812 - bounding_box_mae: 46.6979 - loss: 3377.5044 - vehicle_class_accuracy: 0.4992 - vehicle_class_loss: 14.1762


[3390.08251953125,
 13.079851150512695,
 3443.614990234375,
 46.85139465332031,
 0.5149999856948853]

#### Step8: Inferencing and Visualization


In [14]:
import matplotlib.pyplot as plt

# Choose a few sample images for inference
# // TODO  (Adjust the number of sample images as needed)
sample_images = x_train[:20]

# Perform inference on the sample images
# // TODO
predictions = model.predict(sample_images)
print(f"predictions: {predictions[1]}")

# Extract the predicted bounding box coordinates
# // TODO
predicted_bounding_boxes = predictions[1]

# Visualize the sample images with predicted bounding boxes
for i in range(len(sample_images)):

    fig = go.Figure()

    # Add image
    fig.add_trace(go.Image(z=x_train[i]))

    # Add bounding box rectangle
    fig.add_shape(
        type="rect",
        x0=bbox_train[i][0],
        y0=bbox_train[i][1],
        x1=bbox_train[i][2],
        y1=bbox_train[i][3],
        line=dict(color="green", width=2),
    )
    fig.add_shape(
        type="rect",
        x0=predicted_bounding_boxes[i][0],
        y0=predicted_bounding_boxes[i][1],
        x1=predicted_bounding_boxes[i][2],
        y1=predicted_bounding_boxes[i][3],
        line=dict(color="red", width=2),
    )

    # Layout adjustments
    fig.update_layout(
        title="Bounding Box Visualization",
        xaxis=dict(showticklabels=False),  # Hide axis labels
        yaxis=dict(showticklabels=False),
        margin=dict(l=0, r=0, t=30, b=0),  # Reduce margins
    )

    # Show the interactive plot
    fig.show()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
predictions: [[132.14848   66.33541  156.47726   83.28092 ]
 [ 70.3804    69.007324  83.53854   94.27268 ]
 [ 53.942467  99.61762   97.57958  119.34757 ]
 [ 90.278244  61.934967 137.05554  124.02759 ]
 [132.76646   97.71053  159.5479   125.48065 ]
 [129.88419   57.782745 162.89944   90.423065]
 [ 89.95944   71.37077  178.69629  160.6948  ]
 [110.64884   53.55343  140.04138   78.54511 ]
 [101.57825   81.37673  136.55794  130.66495 ]
 [132.76646   97.71053  159.5479   125.48065 ]
 [136.71204   81.68462  155.76108  108.251854]
 [ 38.257523  68.837395 118.79158  127.24294 ]
 [ 78.224915  71.42678  113.70347  123.58059 ]
 [132.76646   97.71053  159.5479   125.48065 ]
 [ 75.31433   75.95849  100.19386   98.475174]
 [139.04349   99.55401  165.39236  138.62996 ]
 [130.64763   96.25453  152.11015  116.97047 ]
 [ 86.64454   89.27372  148.64116  136.75766 ]
 [ 79.63451   49.509106 118.58498   96.346176]
 [107.068504  45.71073