# Step 1: Install PyTorch and torchvision with CUDA

In [None]:
#!pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118 --user
#!pip install matplotlib
#!pip install pycocotools


# Step 2: Import necessary libraries

In [None]:
# Import the needed libraries
import os
import pandas as pd
import numpy as np
import math

import torchvision
import torchvision.transforms as T

import torch
from torch.utils.data import DataLoader

from lib.TowerDataset import TowerDataset

from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [None]:
# Print PyTorch version
print("PyTorch version:", torch.__version__)
print("torchvision version:", torchvision.__version__)

# Print CUDA version used by PyTorch
print("CUDA version:", torch.version.cuda)

# To check if CUDA is available in your PyTorch installation
if torch.cuda.is_available():
    print("CUDA is available. GPU Name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available.")


# Step 3: Load and prepare the dataset

Specifically for the torchvision reference scripts to work, the dataset `__getitem__` should return a tuple `(image, target)`, with:

* `image`: a PIL Image of size (H, W)
* `target`: a dictionary containing the following fields
    * `boxes` (`FloatTensor[N, 4]`): the coordinates of the `N` bounding boxes in `[x0, y0, x1, y1]` format, ranging from `0` to `W` and `0` to `H`
    * `labels` (`Int64Tensor[N]`): the label for each bounding box
    * `image_id` (`Int64Tensor[1]`): an image identifier. It should be unique between all the images in the dataset, and is used during evaluation
    * `area` (`Tensor[N]`): The area of the bounding box. This is used during evaluation with the COCO metric, to separate the metric scores between small, medium and large boxes.
    * `iscrowd` (`UInt8Tensor[N]`): instances with `iscrowd=True` will be ignored during evaluation.
    * (optionally) `masks` (`UInt8Tensor[N, H, W]`): The segmentation masks for each one of the objects
    * (optionally) `keypoints` (`FloatTensor[N, K, 3]`): For each one of the `N` objects, it contains the `K` keypoints in `[x, y, visibility]` format, defining the object. `visibility=0` means that the keypoint is not visible. Note that for data augmentation, the notion of flipping a keypoint is dependent on the data representation, and you should probably adapt `references/detection/transforms.py` for your new keypoint representation



In [None]:
# Define data transforms for training batches
train_tfm = T.Compose([
    T.ToTensor(),  # converts the image, a PIL image, into a PyTorch Tensor
    #T.RandomHorizontalFlip(0.5)  # randomly flip the training images
])

# Define data transforms for validation batches
val_tfm = T.ToTensor()

# Define datasets
dataset = TowerDataset('data/', train_tfm)


# Step 4: Define the model

In [None]:
model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)

# Define your custom sizes
min_size = 4000  # The minimum size of the image during training/testing
max_size = 5300  # The maximum size of the image during training/testing

# Adjust the min_size and max_size of the transform
#model.transform.min_size = (min_size,)
#model.transform.max_size = max_size

# Modify the classifier to fit the number of classes
num_classes = 5  # Your number of classes + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Move model to the right device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)


# Step 5: Initialize data loaders + train the mdoel

In [None]:
from lib.detection.utils import collate_fn
from torch.utils.data import random_split

#split the dataset in training and test set
number = len(dataset)
train_number = math.ceil(number * 0.7)
dataset_train, dataset_val = random_split(dataset, [155, 65])

# define training and validation data loaders
data_loader_train = DataLoader(
    dataset_train, batch_size=2, shuffle=True, num_workers=6,
    collate_fn=collate_fn
)

data_loader_val = DataLoader(
    dataset_val, batch_size=2, shuffle=False, num_workers=6,
    collate_fn=collate_fn
)

In [None]:
from torch.optim.lr_scheduler import StepLR
from torch.optim import SGD

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = SGD(params, lr=0.0001, momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = StepLR(optimizer,
                      step_size=9,
                      gamma=0.1)

In [None]:
from lib.detection.engine import train_one_epoch, evaluate
from torch.utils.tensorboard import SummaryWriter

print('start')

num_epochs = 20

writer = SummaryWriter()

#from PIL import ImageFile
from lib.detection.coco_utils import get_coco_api_from_dataset
coco = get_coco_api_from_dataset(data_loader_val.dataset)

#if __name__ == '__main__':
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader_train,
                    device, epoch, writer=writer)

    # update the learning rate
    print(f'Epoch {epoch} training done')
    lr_scheduler.step()
    
    # evaluate on the validation dataset
    evaluate(model, data_loader_val, device, epoch, coco, writer=writer)
    
   

# Step 6: Prediction

In [None]:
# pick one image from the test set
img, _ = dataset_val[1]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

In [None]:
prediction

In [None]:
from PIL import Image, ImageDraw, ImageFont

im = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(im)

for index, box in enumerate(prediction[0]['boxes'].cpu().numpy()):
    if prediction[0]['scores'][index] > 0.45:
        draw.rectangle(box, width=5, outline="red")
        text = str(prediction[0]['labels'][index].item())
        text = text + ' score: ' + str(round(prediction[0]['scores'][index].item(),2))
        font = ImageFont.truetype("arial.ttf", size=30)
        text_position = (box[0], box[3])
        draw.text(text_position, text, fill="red", font=font)

im