In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import Adam
from torchsummary import summary
import numpy as np
import time
import os
import random
from google.colab import drive

# Get Faster R-CNN models
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights

# Imports for Data
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms, Resize, ToTensor
from torchvision.transforms.functional import to_pil_image, convert_image_dtype
from torchvision.io import read_image
from torchvision.utils import draw_bounding_boxes

# Set up drive storage and device
drive.mount('/content/drive')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
# Get models set up
classes = FasterRCNN_ResNet50_FPN_Weights.DEFAULT.meta["categories"]
fast_rcnn = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
fast_rcnn.eval()

yolo = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

In [None]:
# Experiment 1: Testing models on random images

# Images
imgs = ["alan.jpg", "cats2.jpg", "cats.jpg", "shed.jpg", "market.jpg"]
img_tensors = []
for path in imgs:
    image = read_image(path)
    image = image / 255.0  # Normalise the image to [0, 1]
    img_tensors.append(image)

# Inference
fast_rcnn_results = fast_rcnn(img_tensors)
yolo_results = yolo(imgs)

In [None]:
# Display Results with bounding boxes and captions
def show_bb(img, boxes, captions):
    img = (img * 255).to(torch.uint8)
    img = draw_bounding_boxes(img, boxes, captions, width=1)
    img = img.detach()
    img = to_pil_image(img)
    display(img)

# Display the results for each image from faster r-cnn
yolo_results.print()
for i, result in enumerate(fast_rcnn_results):

    # Get captions, labels, confidences
    confs = result["scores"]
    labels = result["labels"]
    captions = []
    for label, conf in zip(labels, confs):
        captions.append(f"{classes[label]} {(conf * 100).round()}")

    # Display
    boxes = result["boxes"]
    show_bb(img_tensors[i], boxes, captions)

# Display the YOLO results
yolo_results.show()

In [None]:
# Experiment 2: Testing models on bus dataset

# Get Data for R-CNN
class BusDataset(Dataset):
    def __init__(self, data_root):
        self.root = data_root
        self.image_root = data_root + "/images/images"

        # Read CSV data from path
        data = pd.read_csv(data_root + "/subset.csv")

        # Set up x and y from CSV stuff
        x = data["ImageID"]
        y = data[["LabelName", "XMin", "XMax", "YMin", "YMax"]]
        self.x, self.y = x, y

    def __getitem__(self, ix):

        # Retrieve image using ID
        img_id = self.x.iloc[ix]
        path = self.image_root + f"/{img_id}.jpg"
        img = read_image(path)
        img = img / 255.0
        label = self.y.iloc[ix].values
        return img, label

    def __len__(self):
        return len(self.x)

test_dataset = BusDataset("drive/MyDrive/Data/bus_data")


In [None]:
def area_from_points(min_p, max_p):
    """Calculates the area of a square defined by its min and max point"""
    width = max_p[0] - min_p[0]
    height = max_p[1] - min_p[1]
    return width * height


def compute_IoU(box1, box2):
    """Computes Intersection over Union for two bounding boxes"""

    min1, max1 = (box1[0], box1[2]), (box1[1], box1[3])
    min2, max2 = (box2[0], box2[2]), (box2[1], box2[3])

    i_min = max(min1, min2)
    i_max = min(max1, max2)
    if i_min[0] > i_max[0] or i_min[1] > i_max[1]:
        return 0
    intersection = area_from_points(i_min, i_max)

    area1 = area_from_points(min1, max1)
    area2 = area_from_points(min2, max2)
    union = area1 + area2 - intersection
    return intersection / union


In [None]:
def convert_boxes(boxes):
    """Converts bounding boxes from xxyy to xyxy format"""
    new_boxes = []
    for box in boxes:
        new_box = [box[0], box[2], box[1], box[3]]
        new_boxes.append(new_box)
    return new_boxes

def evaluate_fast_rcnn(model, dataset):
    correct = 0
    total = 0
    confidence_thresh = .3
    iou_thresh = .4

    # Set the model to evaluation mode
    model.eval()

    # Disable gradient calculation
    start = time.time()
    for i in range(50):
        print(f"STARTING IMAGE {i}...")

        # Get image, label, and prediction
        img, label = dataset[i]
        prediction = model([img])[0]
        true_class = label[0]
        true_box = label[1:5]
        height, width = img.size()[1], img.size()[2]

        # Find which predicted boxes are buses, with conf > thresh
        bus_indexes = []
        pred_classes = [classes[label] for label in prediction["labels"]]
        for j in range(len(pred_classes)):
            if pred_classes[j] == "bus" and prediction["scores"][j] > confidence_thresh:
                bus_indexes.append(j)

        # Adjust predicted bounding box to size of image
        true_box = [val * width if idx < 2 else val * height for idx, val in enumerate(true_box)]

        # For each confident detection, check if it was correct
        for bus_index in bus_indexes:

            # Get our predicted bounding box for a bus
            predict_box = prediction["boxes"][bus_index].tolist()
            predict_box = convert_boxes([predict_box])[0]

            # Compute predicted IoU with true bounding box
            print(true_box, predict_box)
            iou = compute_IoU(true_box, predict_box)
            if iou > iou_thresh and true_class == "Bus":
                correct += 1

            # Display our wrong guess, if there is no bus
            elif true_class != "Bus":
                boxes = convert_boxes([predict_box])
                show_bb(img, torch.tensor(boxes), ["Predicted Bus"])

            # Display the bad guess, if we are confident and missed
            elif true_class == "Bus" and iou < iou_thresh:
                print([true_box, predict_box])
                boxes = convert_boxes([true_box, predict_box])
                print(boxes)
                show_bb(img, torch.tensor(boxes), ["True Bus", "Predicted Bus"])

            print(f"Predicted Box: {predict_box}, IoU: {iou}")
            total += 1

    # Calculate accuracy
    accuracy = (correct / total) * 100.0
    testing_time = time.time() - start
    return accuracy, testing_time

# Run through our tests
evaluate_fast_rcnn(fast_rcnn, test_dataset)

In [None]:
class YoloDataset(Dataset):
    def __init__(self, data_root):
        self.root = data_root
        self.image_root = data_root + "/images/images"

        # Read CSV data from path
        data = pd.read_csv(data_root + "/subset.csv")

        # Set up x and y from CSV stuff
        x = data["ImageID"]
        y = data[["LabelName", "XMin", "XMax", "YMin", "YMax"]]
        self.x, self.y = x, y

    def __getitem__(self, ix):

        # Retrieve image using ID
        img_id = self.x.iloc[ix]
        path = self.image_root + f"/{img_id}.jpg"
        img_size = read_image(path).size()
        label = self.y.iloc[ix].values
        return path, (img_size[1], img_size[2]), label

    def __len__(self):
        return len(self.x)


def evaluate_yoloV5(model, dataset):

    accurate_predictions = 0
    total_bus_predictions = 0
    confidence_thresh = 0.3
    iou_thresh = 0.4

    start = time.time()
    for i in range(50):

          img, size, true_label = dataset[i]
          height, width = size

          # Get the true class and true box dimensions
          true_class = true_label[0]
          true_box = true_label[1:5]
          # Adjust predicted bounding box to size of image
          true_box = [val * width if idx < 2 else val * height for idx, val in enumerate(true_box)]

          # Compute and store the results for the yolo model
          results = model(img)
          label_table = results.pandas().xyxy[0]

          """
              Count the total number of true predictions
              A true prediction consists of:
                  1) A confident "bus" when the image contains a bus, with
                  2) An accurate bounding box
          """
          for index, row in label_table.iterrows():
                # Only loop through the predictions that are a confident "bus"
                if row["name"] == "bus" and row["confidence"] > confidence_thresh:
                      total_bus_predictions += 1
                      pred_box = [row["xmin"], row["xmax"], row["ymin"], row["ymax"]]
                      iou = compute_IoU(pred_box, true_box)

                      if iou > iou_thresh and true_class == "Bus":
                            # Increase accuracy if IoU passes our threshold
                            accurate_predictions += 1
                      elif true_class != "Bus":
                            # Display the incorrect bus prediction box
                            draw_box = convert_boxes([pred_box])
                            img = read_image(img)
                            show_bb(img, torch.tensor(draw_box), ["Predicted Bus"])
                      else:
                            # Display the true bus bounding box and our incorrect bus prediction box
                            draw_boxes = convert_boxes([true_box, pred_box])
                            img = read_image(img)
                            show_bb(img, torch.tensor(draw_boxes), ["True Bus", "Predicted Bus"])



    accuracy = (accurate_predictions / total_bus_predictions) * 100.0
    testing_time = time.time() - start
    return accuracy, testing_time

    # What the results format should look like:
    #     xmin    ymin    xmax   ymax  confidence  class    name
    # 0  749.50   43.50  1148.0  704.5    0.874023      0  person
    # 2  114.75  195.75  1095.0  708.0    0.624512      0  person
    # 3  986.00  304.00  1028.0  420.0    0.286865     27     tie

yolo_dataset = YoloDataset("drive/MyDrive/Data/bus_data")
evaluate_yoloV5(yolo, yolo_dataset)

In [None]:
# Create a list of images we were able to download
downloaded_files = os.listdir("drive/MyDrive/Data/bus_data/images/images")

# Create a new DataFrame with only the downloaded images (12,308)
original_df = pd.read_csv("drive/MyDrive/Data/bus_data/df.csv")
original_df['ImageID'] = original_df['ImageID'] + '.jpg'  # Add file extension so we can compare
downloaded_df = original_df[original_df['ImageID'].isin(downloaded_files)]
downloaded_df['ImageID'] = downloaded_df['ImageID'].str.rstrip('.jpg')
downloaded_df = downloaded_df.drop_duplicates(subset=['ImageID'], keep=False)

# Filter down to a subset
size_of_subset = 50
print(downloaded_df["ImageID"].nunique())
subset = random.sample(downloaded_df['ImageID'].tolist(), size_of_subset)
subset_df = downloaded_df[downloaded_df["ImageID"].isin(subset)]
print(subset_df)

# Download to drive
downloaded_df.to_csv('drive/MyDrive/Data/bus_data/subset.csv', index=False)