In [None]:
!pip install d2l==0.16.2
!pip install opencv-python
!pip install scikit-learn

In [None]:
%matplotlib inline
from d2l import torch as d2l
import torch
import torchvision
from torchvision.transforms import ToPILImage
from torch import nn
from torch.nn import functional as F
from PIL import Image
import matplotlib.pyplot as plt
from object_detection_utils import *
import random
import cv2



# Download the dataset
First, let's download the dataset. It consists of images of plant, bounding box annotations, and leaf counts annotations.

In [None]:
!git clone https://git.wur.nl/deep-learning-course/leaf-dataset.git

Let's have a look at one image.

In [None]:
image = Image.open("leaf-dataset/detection/ara2012_plant001_rgb.png")
plt.imshow(image)


In [None]:
image.size


# The `Dataset`class
Now, let's create a dataset customized to our data.
We will call it `LeafDetectionDataset`. 


**Exercise:** complete the missing parts.

In [None]:
import glob
import os
from sklearn.model_selection import train_test_split
import numpy as np


class LeafDetectionDataset(torch.utils.data.Dataset):
    def __init__(self, root, img_size, is_train=True, transforms=None):
        """
        Constructor of the LeafDetectionDataset
        :param root: the root folder of the dataset
        :param is_train: Whether to return the training or test set. Default: True.
        :param transforms: list of transformations to be applied to the data
        """
        self.root = root
        self.transforms = transforms
        self.resize = ResizeWithBBox(img_size)

        imgs = glob.glob(os.path.join(root, "*rgb.png"))
        self.img_files = imgs.copy()
        bboxes = glob.glob(os.path.join(root, "*bbox.csv"))
        imgs.sort()
        bboxes.sort()

        # Split the data into train and validation.
        x_train, x_test, y_train, y_test = train_test_split(imgs, bboxes, test_size=0.2, random_state=42)
        if is_train:
            imgs = x_train
            bboxes = y_train
        else:
            imgs = x_test
            bboxes = y_test

        # Read images and boxes and store them in a class attribute
        images_list = []
        bboxes_list = []
        for img_name, labels in zip(imgs, bboxes):
            img = cv2.imread(img_name)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(img)
            boxes = np.loadtxt(labels, delimiter=",")
            boxes = torch.tensor(boxes, dtype=torch.float32)
            if len(boxes.shape) == 1:
                boxes = boxes.unsqueeze(0)
            image, boxes = self.resize(image=img, boxes=boxes)
            boxes = torch.tensor(boxes, dtype=torch.float32)
            # TODO: convert the images from xyxy to cxcywh
            boxes = ...
            images_list.append(image)
            bboxes_list.append(boxes)
        self.imgs = images_list
        self.bboxes = bboxes_list

    def __getitem__(self, idx):
        # Get image and boxes from list
        image = self.imgs[idx]
        bboxes = self.bboxes[idx]

        if self.transforms:  # Non geometric transforms
            image = self.transforms(image)

        # Create a torch tensor with zeros with represent the labels of the boxes
        # (There's only one class in this dataset)
        labels = torch.zeros((len(bboxes),), dtype=torch.int64)

        # TODO: add your code here
        # Since the number of bounding boxes (aka leaves) per image is different, we need to
        # create illegal boxes (with label=-1) so all images have the same number of boxes
        # and we can create batches
        # Remember, illegal_labels must have a dtype=torch.int64 and illegal_boxes a dtype=torch.float32
        # 50 should be a good max number
        illegal_needed = ...
        illegal_labels = ...
        illegal_boxes = ...

        return image, {
            "labels": torch.cat((labels, illegal_labels)),
            "boxes": torch.cat((bboxes, illegal_boxes), axis=0),
        }

    def __len__(self):
        # TODO: return the number of images in the dataset
        return ...


Now that we created our custom `Dataset` class, let's create an instance of it

In [None]:
dataset_folder = 'leaf-dataset/detection'

img_size = 384
batch_size = 16

augs = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

train_dataset = LeafDetectionDataset(
    dataset_folder,
    img_size=(img_size, img_size),
    is_train=True,
    transforms=augs,
)

display_imgs_bbox = []

for i in range(10):
    img, target = train_dataset[i]
    img = ToPILImage()(img)
    img = plot_bbox(img, target["boxes"] * img_size)
    display_imgs_bbox.append(img)

# Plot two grids, one per list (don't forget the functions declared in the beginning of this notebook)
plot_grid(imgs=display_imgs_bbox, nrows=2, ncols=5)


## Recomendations
This is a small dataset with roughly 100 images. The images present a low resolution and a big number of small objects. It's going to be a challenge for our simple Object Detection model. Don't be surprised if the average precision that you get oscilates between 0.4 and 0.6.

Things to try for better performance:
- Increase the resolution of the images. This also helps in the amount of objects that our detector can predict.
- Use data augmentations.
- Try different backbones/encoders.
- Freeze the backbone/encoder so its weights are not trained. This is useful when dealing with small datasets. You can use the following code for it:
    ```python
    backbone = models.resnet50(pretrained=pretrained)
    for param in backbone.parameters():
        param.requires_grad = False
    ```
- We have a lot of overlapping objects. Therefore, be aware of the Non-Maximum suppression threshold that you use for both calculating the AP and making predictions.
    - You can change the NMS threshold while calculating the AP during training like this: `ap = ap_calculator.calculate_map(model, nms_threshold=0.5)`
    - And for predictions like this: `boxes, top_class, scores = predict(model, img, n_classes=1, nms_threshold=0.3)`
    