## FHNW bverI - HS2023

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Object-Detection

## Lernziele

- Objekt Klassifikation und Lokalisation: An einem einfachen Beispiel verstehen wie Objekte lokalisiert werden können.
- Object Detection: Anwenden & verstehen von Pre-Trained Modellen

## Setup

Im Folgenden installieren und laden wir die benötigten Python packages. Danach setzten wir die Pfade für den Zugriff auf Daten und spezifizieren einen Output-Folder.

Mount your google drive to store data and results.

In [None]:
from pathlib import Path

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

print(f"In colab: {IN_COLAB}")

In [None]:
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

Modifizieren Sie die folgenden Pfade bei Bedarf.

In [None]:
if IN_COLAB:
    DATA_PATH = Path('/content/drive/MyDrive/bverI/data')
else:
    DATA_PATH = Path('../data')

Install packages not in base Colab environment.

In [None]:
if IN_COLAB:
    os.system("pip install torchshow torchinfo gdown")

In [None]:
from IPython.display import Image 
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import seaborn as sns
import torch
from tqdm.notebook import tqdm

## Object Klassifikation und Lokalisierung

In dieser Aufgabe machen wir es uns etwas einfacher: Wir klassifizieren und lokalisieren genau ein Objekt pro Bild. Dies soll zeigen, wie Objetc Detection im Ansatz funktioniert und soll die Schwierigkeit der Problemstell verdeutlichen.

### Datensatz

Wir erstellen uns einen künstlichen Datensatz. Dieser erstellt on-the-fly Bilder und Labels.

In [None]:
import numpy as np
import torch
import cv2
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms.functional as F

class ShapeDataset(Dataset):
    def __init__(self, num_samples=1000, img_size=256, seed=123, max_number_of_shapes_per_image=5, background="random", transforms=None):
        self.num_samples = num_samples
        self.img_size = img_size
        self.rng = np.random.default_rng(seed=seed)
        self.max_number_of_shapes_per_image = max_number_of_shapes_per_image
        self.classes = ["circle", "rectangle", "triangle"]
        self.class_map = {k: i for i, k in enumerate(self.classes)}
        self.background = background
        self.transforms = transforms

    def __len__(self):
        return self.num_samples

    def draw_random_circle(self, img, annotations):
        # Generate a random color for the circle
        color = tuple(int(value) for value in self.rng.integers(50, 255, size=3))

        # Generate random coordinates for the center of the circle
        center = (self.rng.integers(0, self.img_size), self.rng.integers(0, self.img_size))

        # Generate a random radius for the circle (between 5% and 20% of img_size)
        radius = int(self.rng.uniform(0.05 * self.img_size, 0.2 * self.img_size))

        # Draw the filled circle on the image
        cv2.circle(img, center, radius, color, -1)

        # Calculate the bounding box for the circle (xmin, ymin, xmax, ymax)
        bbox = (center[0] - radius, center[1] - radius, center[0] + radius, center[1] + radius)

        # Append the annotation to the list with the class "circle"
        annotations.append({"class": self.class_map['circle'], "box": bbox})

    def draw_random_rectangle(self, img, annotations):
        # Generate a random color
        color = tuple(int(value) for value in self.rng.integers(50, 255, size=3))

        # Generate random coordinates for the top-left corner of the rectangle (xmin, ymin)
        xmin = self.rng.integers(0, int(self.img_size * 0.8))
        ymin = self.rng.integers(0, int(self.img_size * 0.8))

        # Generate random width and height for the rectangle
        width = int(self.rng.uniform(0.05 * self.img_size, 0.2 * self.img_size))
        height = int(self.rng.uniform(0.05 * self.img_size, 0.2 * self.img_size))

        # Calculate the coordinates for the bottom-right corner of the rectangle (xmax, ymax)
        xmax = xmin + width
        ymax = ymin + height

        # Draw the filled rectangle on the image
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, -1)

        # Define the bounding box (xmin, ymin, xmax, ymax)
        bbox = (xmin, ymin, xmax, ymax)

        # Append the annotation to the list with the class "rectangle"
        annotations.append({"class": self.class_map["rectangle"], "box": bbox})

    def draw_random_triangle(self, img, annotations):
        # Generate a random color
        color = tuple(int(value) for value in self.rng.integers(50, 255, size=3))

        # Generate a random point for the top-left vertex of the triangle
        pt1 = (
            self.rng.integers(0, int(self.img_size * 0.8)),
            self.rng.integers(0, int(self.img_size * 0.8))
        )

        # Generate random width and height for the triangle
        width = int(self.rng.uniform(0.05 * self.img_size, 0.2 * self.img_size))
        height = int(self.rng.uniform(0.05 * self.img_size, 0.2 * self.img_size))

        # Calculate the coordinates for the other two points of the triangle
        pt2 = (pt1[0] + width, pt1[1])
        pt3 = (int((pt1[0] + pt2[0]) / 2), pt1[1] - height)

        # Define the triangle points as a numpy array
        triangle_points = np.array([pt1, pt2, pt3])

        # Draw the filled triangle on the image
        cv2.drawContours(img, [triangle_points], 0, color, -1)

        # Calculate the bounding box (xmin, ymin, xmax, ymax)
        x_coords = [pt[0] for pt in [pt1, pt2, pt3]]
        y_coords = [pt[1] for pt in [pt1, pt2, pt3]]
        bounding_box = (min(x_coords), min(y_coords), max(x_coords), max(y_coords))

        # Append the annotation to the list with the class "triangle"
        annotations.append({"class": self.class_map["triangle"], "box": bounding_box})

    def _clip_bounding_box_to_image(self, annotations):
        # Clip boxes to image size
        box_unclipped = torch.tensor(annotations[-1]['box']).reshape(1, -1)
        box_clipped = torchvision.ops.clip_boxes_to_image(
            box_unclipped,
            (self.img_size - 1, self.img_size - 1)
        ).squeeze(0)
        annotations[-1]['box'] = [int(x) for x in box_clipped]

    def __getitem__(self, idx):
        if self.background == 'random':
            img = self.rng.integers(0, 256, (self.img_size, self.img_size, 3), dtype=np.uint8)
        elif self.background == 'white':
            img = np.ones((self.img_size, self.img_size, 3), np.uint8) * 255  # White background

        annotations = []

        num_shapes = self.rng.integers(1, self.max_number_of_shapes_per_image + 1)  # Random number of shapes per image
        for _ in range(num_shapes):
            choice = self.rng.choice(["circle", "rectangle", "triangle"])
            if choice == "circle":
                self.draw_random_circle(img, annotations)
            elif choice == "rectangle":
                self.draw_random_rectangle(img, annotations)
            else:
                self.draw_random_triangle(img, annotations)
            self._clip_bounding_box_to_image(annotations)

        # Convert image to PyTorch tensor
        if self.transforms:
            img = self.transforms(img)

        labels = {'class': [x['class'] for x in annotations], 'box': [x['box'] for x in annotations]}

        return img, labels

Wir schauen uns den Datensatz nun etwas genauer an.

In [None]:
import torchvision.transforms.functional as FT
from torchvision.utils import draw_bounding_boxes
from torchvision import transforms

dataset = ShapeDataset(
    img_size=64,
    max_number_of_shapes_per_image=1,
    background="white",
    transforms=transforms.ToTensor())

for i, (img, annotations) in enumerate(dataset):
    img = (img * 255.0).to(torch.uint8)
    boxes = torch.tensor(annotations['box'])
    img_with_box = draw_bounding_boxes(image=img, boxes=boxes)
    fig, ax = plt.subplots(figsize=(2, 2))
    pil_image = F.to_pil_image(img_with_box)
    _ = ax.imshow(pil_image)
    _ = ax.set_xticks([])
    _ = ax.set_yticks([])
    plt.show()
    if i > 5:
        break

**FRAGE:** Was sieht man im Ouptut oben? Was beinhaltet der Datensatz?

### Trainings-Datensatz erstellen

Nun erstellen wir einen Trainings-Datensatz.

Als erstes schauen wir uns den Output des `DataLoader` Objektes an. Damit muss unser Modell umgehen können.

Überprüfen Sie die Outputs auf Datentyp und Shape.

`collate_fn` definiert wie Samples gebatcht werden: [Link](https://pytorch.org/docs/stable/data.html#dataloader-collate-fn)

In [None]:
from torch.utils.data import DataLoader

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(0.5,1.0)])

def collate_fn(batch):
    images, annotations = zip(*batch)
    return torch.stack(images), annotations

dataset = ShapeDataset(
    img_size=64,
    num_samples=1000,
    background="white",
    max_number_of_shapes_per_image=1,
    transforms=transform)

dataloader = DataLoader(
    dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)

images, annotations = next(iter(dataloader))
# YOUR CODE HERE
raise NotImplementedError()

**Frage:** Wie sieht der Output des `ShapeDataset` mit einem `DataLoader` aus? Was sind die Schwierigkeiten?

### Architektur Definieren

Wir definieren nun eine Architektur, die aus drei Komponenten besteht:

- Backbone: Ein CNN für die Feature-Extraktion
- Classification-Head: Modelliert die Klassifikation
- Bounding-Box-Regression-Head: Modelliert die Bounding-Box Regression

Erstellen Sie den Backbone mit 3 Convolutional Layers. 

- Conv Layer 1: 16 Filters, Stride 1
- Conv Layer 2: 32 Filters, Stride 2
- Conv Layer 3: 64 Filters, Stride 2

Ergänzen Sie die Layers für die Heads.

In [None]:
import torch
import torch.nn as nn

class CNNBackbone(nn.Module):
    def __init__(self, input_shape=(64, 64), output_features=128):
        super(CNNBackbone, self).__init__()
        # YOUR CODE HERE
        raise NotImplementedError()
        global_stride = 4
        cnn_features = 64 * (input_shape[0] // global_stride) * (input_shape[1] // global_stride)
        self.fc1 = nn.Linear(cnn_features, output_features)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = torch.flatten(x, start_dim=1)
        x = torch.relu(self.fc1(x))
        return x

class ClassificationHead(nn.Module):
    def __init__(self, num_classes, num_input_features):
        super(ClassificationHead, self).__init__()
        # self.fc2_class = ...
        # YOUR CODE HERE
        raise NotImplementedError()
    def forward(self, x):
        class_logits = self.fc2_class(x)
        return class_logits

class DetectionHead(nn.Module):
    def __init__(self, num_input_features):
        super(DetectionHead, self).__init__()
        # self.fc2_bb = ...
        # YOUR CODE HERE
        raise NotImplementedError()

    def forward(self, x):
        bb_coords = self.fc2_bb(x)
        return bb_coords

class ObjectClassificationAndLocalization(nn.Module):
    def __init__(self, input_shape, num_features, num_classes):
        super(ObjectClassificationAndLocalization, self).__init__()
        self.backbone = CNNBackbone(input_shape=input_shape, output_features=num_features)
        self.classification_head = ClassificationHead(num_classes, num_features)
        self.detection_head = DetectionHead(num_features)

    def forward(self, x):
        x = self.backbone(x)
        class_scores = self.classification_head(x)
        bb_coords = self.detection_head(x)
        return class_scores, bb_coords

### Initialize Modell, Lossfunktion und Optimizer

Initialisieren Sie ihr Modell. Definieren Sie Loss-Funktionen (separate für Klassifikation und Regression).

In [None]:
net = ObjectClassificationAndLocalization(
    input_shape=(64, 64),
    num_features=128,
    num_classes=3)

# loss_fn_class = nn.
# loss_fn_bbx = nn.

# YOUR CODE HERE
raise NotImplementedError()

optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

### Modell Training

Nun trainieren wir das Modell für 30 Epochen.

In [None]:
num_epochs = 30

for epoch in range(num_epochs):
    running_loss = 0.0
    running_cls_loss = 0.0
    running_bbx_loss = 0.0
    for step, (images, annotations) in enumerate(dataloader):
        labels_class = torch.tensor([sample['class'][0] for sample in annotations])
        labels_bb = torch.tensor([sample['box'][0] for sample in annotations])

        # scale bb labels
        labels_bb_scaled = labels_bb / images.shape[2]

        # Forward pass
        class_scores, bb_coords = net(images)

        # compute classification and regression losses
        # loss_class = ...
        # loss_bb = ...
        # YOUR CODE HERE
        raise NotImplementedError()

        # Total loss (you can adjust weights for classification and regression losses)
        # total_loss = ...
        # YOUR CODE HERE
        raise NotImplementedError()

        # Backpropagation and optimization
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # print statistics
        running_cls_loss += loss_class.item()
        running_bbx_loss += loss_bb.item()
        running_loss += (loss_class.item() + loss_bb.item())

    print(f"epoch: {epoch + 1:02d} - step: {step + 1:5d} - total loss: {running_loss / (step+1):.3f} \
        Class Loss: {running_cls_loss  / (step+1):.4f} BB Loss: {running_bbx_loss / (step+1) :.4f}")

**Frage:** Was fällt auf wenn Sie die Loss-Werte betrachten? Was können Sie dagegen tun?

### Modell Evaluation

Nun evaluieren wir das Modell (mit einfachen Mitteln).

In [None]:
test_dataset = ShapeDataset(
    background="white",
    max_number_of_shapes_per_image=1,
    img_size=64,
    num_samples=256,
    transforms=transform)

test_dataloader = DataLoader(
    dataset,
    batch_size=256,
    collate_fn=collate_fn)

Hier schauen wir uns die Klassifikations-Performance an.

In [None]:
test_images, test_labels = next(iter(test_dataloader))
with torch.no_grad():
    net = net.eval()
    class_scores, bb_coords = net(test_images)

y_class_pred = torch.argmax(class_scores, 1).numpy()
y_class_true = np.array([sample['class'][0] for sample in test_labels])

for i, (y_pred, y_true) in enumerate(zip(y_class_pred, y_class_true)):
    if i > 10:
        break
    print(f"Predicted: {test_dataset.classes[y_pred]} True: {test_dataset.classes[y_true]}")
    

print(f"Accuracy: {sum(y_class_pred == y_class_true) / len(y_class_true)} ")

Nun visualisieren wir die Predictions und Ground Truth.

In [None]:
test_dataset = ShapeDataset(
    img_size=64,
    max_number_of_shapes_per_image=1,
    background="white",
    transforms=transform)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=1,
    collate_fn=collate_fn)

for i, (img, annotations) in enumerate(test_dataloader):

    # Predict
    with torch.no_grad():
        net = net.eval()
        class_scores, bb_coords_scaled = net(img)
        bb_coords = bb_coords_scaled * img.shape[2]

    # Plot Ground Truth
    img = img.squeeze(0)
        
    img = (img * 2 * 255.0).to(torch.uint8)
    boxes = torch.tensor(annotations[0]['box'])
    img_with_box = draw_bounding_boxes(image=img, boxes=boxes, colors="red")

    # Plot Prediction
    img_with_box = draw_bounding_boxes(image=img_with_box, boxes=bb_coords, colors="green")

    fig, ax = plt.subplots(figsize=(2, 2))
    pil_image = F.to_pil_image(img_with_box)
    _ = ax.imshow(pil_image)
    # Remove x and y axis ticks
    _ = ax.set_xticks([])
    _ = ax.set_yticks([])
    plt.show()
    if i > 5:
        break

**Frage:** Rein qualitativ: Wie finden Sie die Klassifikationen und Lokalisationen ihres Modells? Wo könnte man sich verbessern?

Im Folgenden schauen wir uns die Performance der Detektion an. Dazu berechnen wir die IoU.

Ergänzen Sie den Code. Verwenden Sie [torchvision.ops.box_iou](https://pytorch.org/vision/main/generated/torchvision.ops.box_iou.html).

In [None]:
test_dataset = ShapeDataset(
    img_size=64,
    max_number_of_shapes_per_image=1,
    background="white",
    transforms=transform)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=1,
    collate_fn=collate_fn)

for i, (img, annotations) in enumerate(test_dataloader):

    # Predict
    with torch.no_grad():
        net = net.eval()
        class_scores, bb_coords_scaled = net(img)
        predicted_box = bb_coords_scaled * img.shape[2]

    gt_box = torch.tensor(annotations[0]['box']).reshape(1, -1)

    # iou = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    print(f"IOU: {iou:.2f}")
    
    # Plot Ground Truth
    img = img.squeeze(0)
        
    img = (img * 2 * 255.0).to(torch.uint8)
    boxes = torch.tensor(annotations[0]['box'])
    img_with_box = draw_bounding_boxes(image=img, boxes=boxes, colors="red")

    # Plot Prediction
    img_with_box = draw_bounding_boxes(image=img_with_box, boxes=predicted_box, colors="green")

    fig, ax = plt.subplots(figsize=(2, 2))
    pil_image = F.to_pil_image(img_with_box)
    _ = ax.imshow(pil_image)
    # Remove x and y axis ticks
    _ = ax.set_xticks([])
    _ = ax.set_yticks([])
    plt.show()
    if i > 5:
        break

**Frage:** Welche IoU Werte finden Sie akzeptabel?

**Frage:** Was müsste man alles anpassen, damit man Object Detection durchführen kann? (mehrere Objekte pro Bild).

## Pre-Trained _Faster R-CNN_

In dieser Aufgabe werden wir ein vortrainiertes Object Detection Modell der Familie der _Faster R-CNNs_ einsetzen.


### Daten

Lesen Sie die folgenden Bilder ein mit `PIL.Image`. Schauen Sie die Bilder an und überlegen Sie sich wie gut Object-Detection funktionieren könnte.

```
DATA_PATH.joinpath("dogs.jpg")
DATA_PATH.joinpath("ducks.jpeg")
````

In [None]:
import gdown

files = [
    {'id': '18zuHwfojUUpmkrQttEtuaNW-MQ0QOoAH',  'name': 'ducks.jpg'},
    {'id': '1-UWVWqTpE80Qxh36hPuKkuQZj5BT3hXr', 'name': 'dogs.jpg'}
]

for file in files:
    url = f"https://drive.google.com/uc?id={file['id']}"
    download_path = DATA_PATH / file['name']
    if not download_path.exists():
        gdown.download(url, str(download_path), quiet=False)

In [None]:
# YOUR CODE HERE
raise NotImplementedError()

### Modell Laden

Laden Sie ein vortrainiertes Modell der _Faster R-CNN_ Familie von [torchvision](https://pytorch.org/vision/stable/models.html#object-detection-instance-segmentation-and-person-keypoint-detection). Eine Möglichkeit ist z.B. das `fasterrcnn_mobilenet_v3_large_320_fpn`, welches Resourcen-schonend ist. Wenn Sie bessere Performance möchten, können Sie gerne ein anderes wählen.


Initialisieren Sie das Modell und setzten Sie es in den `eval` Mode. Setzten Sie `box_score_thresh` auf einen Wert zwischen 0.5 und 0.9.

In [None]:
from torchvision.models.detection import (
    fasterrcnn_mobilenet_v3_large_320_fpn,
    FasterRCNN_MobileNet_V3_Large_320_FPN_Weights,
  )


Model = fasterrcnn_mobilenet_v3_large_320_fpn
weights = FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT

# YOUR CODE HERE
raise NotImplementedError()

### Modell Anwenden

Benutzen Sie Funktion `inference()` um Predictions für ein Bild zu generieren. Schauen Sie sich dazu folgendes Beispiel an: https://pytorch.org/vision/stable/models.html#object-detection-instance-segmentation-and-person-keypoint-detection

Erstellen Sie danach Predictions für "dogs.jpg" und inspizieren Sie den Output.

In [None]:
def inference_single(img, model, preprocess):
    """ Inference on a single image
        Args:
            img: (C, H, W) torch.tensor
            model: torchvision.models.detection.faster_rcnn.FasterRCNN
            preprocess: function to pre-process image batch for the model
            
        Returns:
            predictions: Dict with lists of object detections
    """
    
    image_batch = img.unsqueeze(0)
    image_processed = preprocess(image_batch)
    return model(image_processed)[0]

from torchvision.transforms import functional as FT

# YOUR CODE HERE
raise NotImplementedError()

Visualisieren Sie die Predictions mit [torchvision.utils.draw_bounding_boxes](torchvision.utils.draw_bounding_boxes). Visualisieren Sie die Labels, sowie die Confidence-Scores der Predictions zusammen mit den Bounding-Boxes.

Die Labels finden Sie in `weights.meta["categories"]`

In [None]:
from torchvision.utils import draw_bounding_boxes

def draw_boxes(image, predictions, categories):
    """ Draw Boxes from Predictions 
        Args:
            image: The input image torch.tensor
            predictions: Output of inference()
            categories: List of category labels
        Returns:
            PIL.Image
    """
    labels = [f"{categories[i]} ({s * 100:.2f} %)" for i, s in zip(predictions["labels"], predictions["scores"])]

    box = draw_bounding_boxes(
        image, boxes=predictions["boxes"],
        labels=labels, width=5,
        colors="red")
    img = box.detach()
    return FT.to_pil_image(img)
    #im = Image.fromarray(im.permute(1, 2, 0).numpy())
    #return im

# YOUR CODE HERE
raise NotImplementedError()

Initialisieren Sie das Modell neu und wählen Sie einen tieferen Wert für [box_score_thresh](https://github.com/pytorch/vision/blob/main/torchvision/models/detection/faster_rcnn.py). 

Erstellen Sie danach Predictions für "ducks.jpg".

Visualisieren Sie wieder die Boxen.


In [None]:
from torchvision.ops import box_iou

# YOUR CODE HERE
raise NotImplementedError()


Berechnen Sie die IoU für die gefundenen Boxen. Sie können folgende Funktion verwenden [torchvision.ops.box_iou](https://pytorch.org/vision/stable/generated/torchvision.ops.box_iou.html#torchvision.ops.box_iou)

In [None]:
# YOUR CODE HERE
raise NotImplementedError()

Nun schauen wir uns die Activation Maps vom Backbone-CNN an, welche in das RPN geht. Verwenden Sie dazu die folgende Funktion und inspizieren Sie die Shape der Activation Map.

Schauen Sie sich die Activation Maps von beiden Beispiel-Bildern an. Was stellen Sie fest?

In [None]:
def backbone(img, model, preprocess):
    """ Get Features from the Backbone Network
        Args:
            img: (C, H, W) torch.tensor
            model: torchvision.models.detection.faster_rcnn.FasterRCNN
            preprocess: function to pre-process image batch for the model
            
        Returns:
            predictions: Dict with lists of object detections
    """
    image_batch = img.unsqueeze(0)
    image_processed = preprocess(image_batch)
    features = model.backbone(image_processed)
    return features['0']

# YOUR CODE HERE
raise NotImplementedError()

Nun schauen wir uns den Output des RPNs an. Vergleichen Sie wieder die beiden Bilder.

Setzen Sie die Modell-Parameter: `rpn_score_thresh` und  `rpn_post_nms_top_n_test` und schauen Sie verschiedene Werte an.

In [None]:
def rpn(img, model, preprocess):
    """ Get Region Proposals
        Args:
            img: (C, H, W) torch.tensor
            model: torchvision.models.detection.faster_rcnn.FasterRCNN
            preprocess: function to pre-process image batch for the model
            
        Returns:
            predictions: Dict with lists of object detections
    """
    image_batch = img.unsqueeze(0)
    image_processed = preprocess(image_batch)
    
    images, targets = model.transform(image_processed, targets=None)
    features = model.backbone(image_processed)
    proposals, proposal_losses = model.rpn(images, features, targets=targets)

    original_image_sizes: List[Tuple[int, int]] = []
    for img in image_batch:
        val = img.shape[-2:]
        torch._assert(
            len(val) == 2,
            f"expecting the last two dimensions of the Tensor to be H and W instead got {img.shape[-2:]}",
        )
        original_image_sizes.append((val[0], val[1]))
    proposals = model.transform.postprocess([{'boxes': proposals[0]}], images.image_sizes, original_image_sizes)

    return proposals


def draw_proposals(image, proposals):
    """ Draw Boxes from Predictions 
        Args:
            image: The input image torch.tensor
            predictions: Output of inference()
            categories: List of category labels
        Returns:
            PIL.Image
    """

    box = draw_bounding_boxes(
        image, boxes=proposals,width=5,
        colors="red")
    im = box.detach()
    im = Image.fromarray(im.permute(1, 2, 0).numpy())
    return im

# YOUR CODE HERE
raise NotImplementedError()