# Landfill Detection with YOLO

### Requirements

In [None]:
!pip install transformers==4.46.1
!pip install pytorch-lightning
!pip install wandb
!pip install roboflow
!pip install torch
!pip install torchvision
!pip install pycocotools
!pip install numpy
!pip install scipy

## Get Dataset from S3

In [None]:
!aws s3 sync s3://scrnts-dev-dataplat-ai-models-eu-west-1-772012299168/datasets/landfills-detection/v2/ ./dataset

dataset_path = 'dataset'

## Register Dataset

In [None]:
import torchvision
import os
from transformers import YolosImageProcessor


class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, feature_extractor, train=True):
        ann_file = os.path.join(img_folder, "_annotations.coco.json")
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self.feature_extractor = feature_extractor

    def __getitem__(self, idx):
        # read in PIL image and target in COCO format
        img, target = super(CocoDetection, self).__getitem__(idx)

        # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
        image_id = self.ids[idx]
        target = {"image_id": image_id, "annotations": target}
        encoding = self.feature_extractor(
            images=img, annotations=target, return_tensors="pt"
        )
        pixel_values = encoding["pixel_values"].squeeze()  # remove batch dimension

        target = encoding["labels"][0]  # remove batch dimension

        return pixel_values, target


feature_extractor = YolosImageProcessor.from_pretrained(
    "hustvl/yolos-small", size=1000, max_size=1200              # <- SHOULD I USE A DIFFERENT YOLO MODEL? (TODO)
)

train_dataset = CocoDetection(
    img_folder=(dataset_path + "/train"), feature_extractor=feature_extractor
)
val_dataset = CocoDetection(
    img_folder=(dataset_path + "/valid"),
    feature_extractor=feature_extractor,
    train=False,
)

print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(val_dataset))

### Vizualize that our data has loaded correctly - You can hit this cell as many times as you want to vizualize how your training set has loaded


In [None]:
import numpy as np
import os
from PIL import Image, ImageDraw

# based on https://github.com/woctezuma/finetune-detr/blob/master/finetune_detr.ipynb
image_ids = train_dataset.coco.getImgIds()
# let's pick a random image
image_id = image_ids[np.random.randint(0, len(image_ids))]
print("Image n°{}".format(image_id))
image = train_dataset.coco.loadImgs(image_id)[0]
image = Image.open(os.path.join(dataset_path + "/train", image["file_name"]))

annotations = train_dataset.coco.imgToAnns[image_id]
draw = ImageDraw.Draw(image, "RGBA")

cats = train_dataset.coco.cats
id2label = {k: v["name"] for k, v in cats.items()}

for annotation in annotations:
    box = annotation["bbox"]
    class_idx = annotation["category_id"]
    x, y, w, h = tuple(box)
    draw.rectangle((x, y, x + w, y + h), outline="blue", width=2)
    draw.text((x, y), id2label[class_idx], fill="white")

image

# Training

### Setup Data Loader

In [5]:
from torch.utils.data import DataLoader


def collate_fn(batch):
    pixel_values = [item[0] for item in batch]
    encoding = feature_extractor.pad(pixel_values, return_tensors="pt")
    labels = [item[1] for item in batch]
    batch = {}
    batch["pixel_values"] = encoding["pixel_values"]
    batch["labels"] = labels
    return batch


train_dataloader = DataLoader(
    train_dataset, collate_fn=collate_fn, batch_size=1, shuffle=True
)
val_dataloader = DataLoader(val_dataset, collate_fn=collate_fn, batch_size=1)
batch = next(iter(train_dataloader))

## Setup YOLO Model

In [6]:
import pytorch_lightning as pl
from transformers import AutoModelForObjectDetection
import torch


# we wrap our model around pytorch lightning for training
class YoloModel(pl.LightningModule):

    def __init__(self, lr, weight_decay):
        super().__init__()
        # replace COCO classification head with custom head
        self.model = AutoModelForObjectDetection.from_pretrained(
            "hustvl/yolos-tiny", num_labels=len(id2label), ignore_mismatched_sizes=True
        )
        # see https://github.com/PyTorchLightning/pytorch-lightning/pull/1896
        self.lr = lr
        self.weight_decay = weight_decay
        self.save_hyperparameters()  # adding this will save the hyperparameters to W&B too

    def forward(self, pixel_values):
        outputs = self.model(pixel_values=pixel_values)

        return outputs

    def common_step(self, batch, batch_idx):
        pixel_values = batch["pixel_values"]
        labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]

        outputs = self.model(pixel_values=pixel_values, labels=labels)

        loss = outputs.loss
        loss_dict = outputs.loss_dict

        return loss, loss_dict

    def training_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        # logs metrics for each training_step,
        # and the average across the epoch
        self.log(
            "train/loss", loss
        )  # logging metrics with a forward slash will ensure the train and validation metrics as split into 2 separate sections in the W&B workspace
        for k, v in loss_dict.items():
            self.log(
                "train/" + k, v.item()
            )  # logging metrics with a forward slash will ensure the train and validation metrics as split into 2 separate sections in the W&B workspace

        return loss

    def validation_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        self.log(
            "validation/loss", loss
        )  # logging metrics with a forward slash will ensure the train and validation metrics as split into 2 separate sections in the W&B workspace
        for k, v in loss_dict.items():
            self.log(
                "validation/" + k, v.item()
            )  #  logging metrics with a forward slash will ensure the train and validation metrics as split into 2 separate sections in the W&B workspace

        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(
            self.parameters(), lr=self.lr, weight_decay=self.weight_decay
        )

        return optimizer

    def train_dataloader(self):
        return train_dataloader

    def val_dataloader(self):
        return val_dataloader

## Train the model

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint

model = YoloModel(lr=2.5e-5, weight_decay=1e-4)

# Keep track of the checkpoint with the lowest validation loss
checkpoint_callback = ModelCheckpoint(monitor="validation/loss", mode="min")

from pytorch_lightning import Trainer

# more epochs leads to a tighter fit of your model to your data.
trainer = Trainer(
    accelerator='auto',
    max_epochs=50,
    gradient_clip_val=0.1,
    accumulate_grad_batches=8,
    log_every_n_steps=5,
    callbacks=[checkpoint_callback],
) 
trainer.fit(model)

## Publish the Model

### Folder Prep

In [None]:
!mkdir content/deploy
!mkdir content/deploy/code

In [None]:
%%writefile content/deploy/code/inference.py

from transformers import pipeline

def model_fn(model_dir):
    pipe = pipeline("object-detection", model=model_dir, threshold=0.1)
    return pipe

In [None]:
with open("/content/deploy/preprocessor_config.json", "w") as f:
    for l in feature_extractor.to_json_string().split("\n"):
        f.write(l + "\n")

In [None]:
%%writefile /content/deploy/code/inference.py

from transformers import pipeline

def model_fn(model_dir):
    pipe = pipeline("object-detection", model=model_dir, threshold=0.1)
    return pipe

In [None]:
%%writefile /content/deploy/code/requirements.txt

transformers==4.46.1

In [None]:
# save model
model.model.save_pretrained("/content/deploy")

### Upload model to S3

In [None]:
from datetime import datetime

# Produce Model Version based on current date
modelversion = datetime.today().strftime('%Y%m%d')

s3_file=f"model-v{modelversion}.tar.gz"
s3_location=f"s3://scrnts-dev-dataplat-ai-models-eu-west-1-772012299168/models/landfills/{s3_file}"
%cd /content/deploy/
!tar zcvf model.tar.gz *
!aws s3 cp model.tar.gz {s3_location}