# **Import necessary libraries for linear algebra, data processing, file I/O, image processing, machine learning, and visualization.**

In [4]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

# **Install the torchvision library.**

In [2]:
!pip install -U torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# **Import the required modules from torch and torchvision libraries.**

In [5]:
import torch
import torchvision
from torchvision import datasets, models
from torchvision.transforms import functional as FT
from torchvision import transforms as T
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split, Dataset
import copy
import math
from PIL import Image
import cv2
import albumentations as A  # our data augmentation library

import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
import warnings
warnings.filterwarnings("ignore")
from collections import defaultdict, deque
import datetime
import time
from tqdm import tqdm # progress bar
from torchvision.utils import draw_bounding_boxes

In [7]:
print(torch.__version__)
print(torchvision.__version__)

2.0.1+cu118
0.15.2+cu118


# **Install the pycocotools library.**
Import the required modules from pycocotools library.

In [8]:
# our dataset is in cocoformat, we will need pypcoco tools
!pip install pycocotools
from pycocotools.coco import COCO

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# **Import the required module from the `'albumentations.pytorch'` library.**

In [9]:
# Now, we will define our transforms
from albumentations.pytorch import ToTensorV2

# **Define a function get_transforms that returns the data augmentation transforms based on whether it's for training or not.**

In [11]:
def get_transforms(train=False):
    if train:
        transform = A.Compose([
            A.Resize(600, 600), # our input size can be 600px
            A.HorizontalFlip(p=0.3),
            A.VerticalFlip(p=0.3),
            A.RandomBrightnessContrast(p=0.1),
            A.ColorJitter(p=0.1),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    else:
        transform = A.Compose([
            A.Resize(600, 600), # our input size can be 600px
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    return transform

# **Define a class `'ObjectDetection'` that inherits from `'datasets.VisionDataset'` This class represents an object detection dataset.**

In [12]:
class ObjectDetection(datasets.VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None, transforms=None):
        # the 3 transform parameters are reuqired for datasets.VisionDataset
        super().__init__(root, transforms, transform, target_transform)
        self.split = split #train, valid, test
        self.coco = COCO(os.path.join(root, split, "_annotations.coco.json")) # annotatiosn stored here
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
    
    def _load_image(self, id: int):
        path = self.coco.loadImgs(id)[0]['file_name']
        image = cv2.imread(os.path.join(self.root, self.split, path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return image
    def _load_target(self, id):
        return self.coco.loadAnns(self.coco.getAnnIds(id))
    
    def __getitem__(self, index):
        id = self.ids[index]
        image = self._load_image(id)
        target = self._load_target(id)
        target = copy.deepcopy(self._load_target(id))
        
        boxes = [t['bbox'] + [t['category_id']] for t in target] # required annotation format for albumentations
        if self.transforms is not None:
            transformed = self.transforms(image=image, bboxes=boxes)
        
        image = transformed['image']
        boxes = transformed['bboxes']
        
        new_boxes = [] # convert from xywh to xyxy
        for box in boxes:
            xmin = box[0]
            xmax = xmin + box[2]
            ymin = box[1]
            ymax = ymin + box[3]
            new_boxes.append([xmin, ymin, xmax, ymax])
        
        boxes = torch.tensor(new_boxes, dtype=torch.float32)
        
        targ = {} # here is our transformed target
        targ['boxes'] = boxes
        targ['labels'] = torch.tensor([t['category_id'] for t in target], dtype=torch.int64)
        targ['image_id'] = torch.tensor([t['image_id'] for t in target])
        targ['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # we have a different area
        targ['iscrowd'] = torch.tensor([t['iscrowd'] for t in target], dtype=torch.int64)
        return image.div(255), targ # scale images
    def __len__(self):
        return len(self.ids)

# **Install the roboflow library**

Import the `'Roboflow'` class from the `'roboflow'` library and login to Roboflow. Set up the Roboflow workspace, project, and dataset to download.

In [10]:
!pip install roboflow
import roboflow
from roboflow import Roboflow

roboflow.login()
rf = Roboflow()
project = rf.workspace("ympublic").project("bdd-kcbst")
dataset = project.version(2).download("coco")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting roboflow
  Downloading roboflow-1.0.9-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting cycler==0.10.0 (from roboflow)
  Downloading cycler-0.10.0-py2.py3-none-any.whl (6.5 kB)
Collecting idna==2.10 (from roboflow)
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting pyparsing==2.4.7 (from roboflow)
  Downloading pyparsing-2.4.7-py2.py3-none-any.whl (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.8/67.8 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Collecting python-dotenv (from roboflow)
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Collecting wget (from roboflow)
  Downloading wget-3.2.zip (10 kB)
  Prep

visit https://app.roboflow.com/auth-cli to get your authentication token.
Paste the authentication token here: ··········
loading Roboflow workspace...
loading Roboflow project...
Downloading Dataset Version Zip in bdd-2 to coco: 100% [697584956 / 697584956] bytes


Extracting Dataset Version Zip to bdd-2 in coco:: 100%|██████████| 8001/8001 [00:04<00:00, 1885.51it/s]


# **Specify the path to the dataset**

In [2]:
dataset_path = "/content/bdd-2"

# **Load the COCO annotations and get the categories**

In [13]:
#load classes
import os
coco = COCO(os.path.join(dataset_path, "train", "_annotations.coco.json"))
categories = coco.cats
n_classes = len(categories.keys())
categories

loading annotations into memory...
Done (t=0.96s)
creating index...
index created!


{0: {'id': 0, 'name': 'obj', 'supercategory': 'none'},
 1: {'id': 1, 'name': 'bike', 'supercategory': 'obj'},
 2: {'id': 2, 'name': 'bus', 'supercategory': 'obj'},
 3: {'id': 3, 'name': 'car', 'supercategory': 'obj'},
 4: {'id': 4, 'name': 'motor', 'supercategory': 'obj'},
 5: {'id': 5, 'name': 'person', 'supercategory': 'obj'},
 6: {'id': 6, 'name': 'rider', 'supercategory': 'obj'},
 7: {'id': 7, 'name': 'traffic light', 'supercategory': 'obj'},
 8: {'id': 8, 'name': 'traffic sign', 'supercategory': 'obj'},
 9: {'id': 9, 'name': 'train', 'supercategory': 'obj'},
 10: {'id': 10, 'name': 'truck', 'supercategory': 'obj'}}

# **Create an instance of the `ObjectDetection` class for the train dataset**

In [14]:
train_dataset = ObjectDetection(root=dataset_path, transforms=get_transforms(True))

loading annotations into memory...
Done (t=0.59s)
creating index...
index created!


# **Print the length of the train dataset**

In [15]:
len(train_dataset)

7537

# **Load the Faster R-CNN model**

Get the input features of the model and replace the box predictor with a new one that matches the number of classes.

In [16]:
# lets load the faster rcnn model
model = models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features # we need to change the head
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(in_features, n_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth
100%|██████████| 74.2M/74.2M [00:00<00:00, 191MB/s]


# **Define a collate function to process the batch data**

In [17]:
def collate_fn(batch):
    return tuple(zip(*batch))

# **Create a data loader for the train dataset**

In [19]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=collate_fn)

# **Get a batch of images and targets from the data loader**

In [20]:
images,targets = next(iter(train_loader))
images = list(image for image in images)
targets = [{k:v for k, v in t.items()} for t in targets]
output = model(images, targets) # just make sure this runs without error

# **Move the model to the CUDA device**

In [21]:
device = torch.device("cuda")

In [22]:
model = model.to(device)

# **Set up the optimizer for training**

In [23]:
# Now, and optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)

In [24]:
import sys

# **Define a function `train_one_epoch` to train the model for one epoch**

In [25]:
def train_one_epoch(model, optimizer, loader, device, epoch):
    model.to(device)
    model.train()
    
#     lr_scheduler = None
#     if epoch == 0:
#         warmup_factor = 1.0 / 1000 # do lr warmup
#         warmup_iters = min(1000, len(loader) - 1)
        
#         lr_scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor = warmup_factor, total_iters=warmup_iters)
    
    all_losses = []
    all_losses_dict = []
    
    for images, targets in tqdm(loader):
        images = list(image.to(device) for image in images)
        targets = [{k: torch.tensor(v).to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets) # the model computes the loss automatically if we pass in targets
        losses = sum(loss for loss in loss_dict.values())
        loss_dict_append = {k: v.item() for k, v in loss_dict.items()}
        loss_value = losses.item()
        
        all_losses.append(loss_value)
        all_losses_dict.append(loss_dict_append)
        
        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping trainig") # train if loss becomes infinity
            print(loss_dict)
            sys.exit(1)
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
#         if lr_scheduler is not None:
#             lr_scheduler.step() # 
        
    all_losses_dict = pd.DataFrame(all_losses_dict) # for printing
    print("Epoch {}, lr: {:.6f}, loss: {:.6f}, loss_classifier: {:.6f}, loss_box: {:.6f}, loss_rpn_box: {:.6f}, loss_object: {:.6f}".format(
        epoch, optimizer.param_groups[0]['lr'], np.mean(all_losses),
        all_losses_dict['loss_classifier'].mean(),
        all_losses_dict['loss_box_reg'].mean(),
        all_losses_dict['loss_rpn_box_reg'].mean(),
        all_losses_dict['loss_objectness'].mean()
    ))

# **Specify the number of epochs**

Train the model for the specified number of epochs

In [26]:
num_epochs=25

for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
#     lr_scheduler.step()

100%|██████████| 472/472 [05:43<00:00,  1.37it/s]


Epoch 0, lr: 0.010000, loss: 0.789628, loss_classifier: 0.294390, loss_box: 0.330234, loss_rpn_box: 0.092803, loss_object: 0.072201


100%|██████████| 472/472 [05:43<00:00,  1.37it/s]


Epoch 1, lr: 0.010000, loss: 0.696107, loss_classifier: 0.250047, loss_box: 0.300715, loss_rpn_box: 0.085371, loss_object: 0.059973


100%|██████████| 472/472 [05:43<00:00,  1.38it/s]


Epoch 2, lr: 0.010000, loss: 0.668316, loss_classifier: 0.239408, loss_box: 0.291297, loss_rpn_box: 0.082661, loss_object: 0.054951


100%|██████████| 472/472 [05:41<00:00,  1.38it/s]


Epoch 3, lr: 0.010000, loss: 0.653712, loss_classifier: 0.232904, loss_box: 0.288116, loss_rpn_box: 0.080392, loss_object: 0.052300


100%|██████████| 472/472 [05:38<00:00,  1.39it/s]


Epoch 4, lr: 0.010000, loss: 0.636142, loss_classifier: 0.225887, loss_box: 0.281950, loss_rpn_box: 0.078544, loss_object: 0.049762


100%|██████████| 472/472 [05:40<00:00,  1.39it/s]


Epoch 5, lr: 0.010000, loss: 0.626703, loss_classifier: 0.222280, loss_box: 0.280297, loss_rpn_box: 0.077080, loss_object: 0.047045


100%|██████████| 472/472 [05:40<00:00,  1.39it/s]


Epoch 6, lr: 0.010000, loss: 0.616674, loss_classifier: 0.217373, loss_box: 0.276954, loss_rpn_box: 0.076138, loss_object: 0.046209


100%|██████████| 472/472 [05:38<00:00,  1.40it/s]


Epoch 7, lr: 0.010000, loss: 0.606650, loss_classifier: 0.213673, loss_box: 0.273497, loss_rpn_box: 0.074922, loss_object: 0.044558


100%|██████████| 472/472 [05:36<00:00,  1.40it/s]


Epoch 8, lr: 0.010000, loss: 0.603609, loss_classifier: 0.212177, loss_box: 0.274170, loss_rpn_box: 0.074024, loss_object: 0.043238


100%|██████████| 472/472 [05:36<00:00,  1.40it/s]


Epoch 9, lr: 0.010000, loss: 0.596401, loss_classifier: 0.209145, loss_box: 0.272556, loss_rpn_box: 0.072870, loss_object: 0.041830


100%|██████████| 472/472 [05:34<00:00,  1.41it/s]


Epoch 10, lr: 0.010000, loss: 0.592103, loss_classifier: 0.207613, loss_box: 0.271842, loss_rpn_box: 0.071933, loss_object: 0.040715


100%|██████████| 472/472 [05:35<00:00,  1.41it/s]


Epoch 11, lr: 0.010000, loss: 0.587415, loss_classifier: 0.204998, loss_box: 0.271069, loss_rpn_box: 0.071380, loss_object: 0.039967


100%|██████████| 472/472 [05:34<00:00,  1.41it/s]


Epoch 12, lr: 0.010000, loss: 0.584437, loss_classifier: 0.203877, loss_box: 0.270956, loss_rpn_box: 0.070645, loss_object: 0.038960


100%|██████████| 472/472 [05:33<00:00,  1.42it/s]


Epoch 13, lr: 0.010000, loss: 0.581550, loss_classifier: 0.203259, loss_box: 0.270895, loss_rpn_box: 0.069577, loss_object: 0.037818


100%|██████████| 472/472 [05:31<00:00,  1.42it/s]


Epoch 14, lr: 0.010000, loss: 0.577053, loss_classifier: 0.201367, loss_box: 0.269361, loss_rpn_box: 0.068793, loss_object: 0.037532


100%|██████████| 472/472 [05:31<00:00,  1.42it/s]


Epoch 15, lr: 0.010000, loss: 0.572541, loss_classifier: 0.199027, loss_box: 0.268477, loss_rpn_box: 0.068374, loss_object: 0.036663


100%|██████████| 472/472 [05:30<00:00,  1.43it/s]


Epoch 16, lr: 0.010000, loss: 0.570490, loss_classifier: 0.198553, loss_box: 0.268745, loss_rpn_box: 0.067348, loss_object: 0.035843


100%|██████████| 472/472 [05:29<00:00,  1.43it/s]


Epoch 17, lr: 0.010000, loss: 0.572367, loss_classifier: 0.198517, loss_box: 0.271685, loss_rpn_box: 0.067027, loss_object: 0.035138


100%|██████████| 472/472 [05:31<00:00,  1.42it/s]


Epoch 18, lr: 0.010000, loss: 0.577836, loss_classifier: 0.201289, loss_box: 0.273218, loss_rpn_box: 0.067484, loss_object: 0.035845


100%|██████████| 472/472 [05:28<00:00,  1.44it/s]


Epoch 19, lr: 0.010000, loss: 0.567463, loss_classifier: 0.196836, loss_box: 0.270578, loss_rpn_box: 0.066130, loss_object: 0.033918


100%|██████████| 472/472 [05:26<00:00,  1.44it/s]


Epoch 20, lr: 0.010000, loss: 0.563924, loss_classifier: 0.195704, loss_box: 0.269648, loss_rpn_box: 0.065373, loss_object: 0.033198


100%|██████████| 472/472 [05:20<00:00,  1.47it/s]


Epoch 21, lr: 0.010000, loss: 0.562074, loss_classifier: 0.194527, loss_box: 0.269728, loss_rpn_box: 0.064834, loss_object: 0.032985


100%|██████████| 472/472 [05:20<00:00,  1.47it/s]


Epoch 22, lr: 0.010000, loss: 0.560396, loss_classifier: 0.194317, loss_box: 0.269646, loss_rpn_box: 0.064108, loss_object: 0.032325


100%|██████████| 472/472 [05:20<00:00,  1.47it/s]


Epoch 23, lr: 0.010000, loss: 0.560158, loss_classifier: 0.194185, loss_box: 0.269584, loss_rpn_box: 0.064268, loss_object: 0.032121


100%|██████████| 472/472 [05:20<00:00,  1.47it/s]

Epoch 24, lr: 0.010000, loss: 0.559255, loss_classifier: 0.193716, loss_box: 0.269879, loss_rpn_box: 0.063747, loss_object: 0.031913





# **Set the model to evaluation mode and clear the GPU cache**

In [31]:
model.eval()
torch.cuda.empty_cache()

# **Save the trained model to a file**

In [30]:
torch.save(model, '/content/model.pth')