In [2]:
import torchvision
import torch
import numpy as np
import cv2 as cv
import random
import os
import matplotlib.pyplot as plt

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 198MB/s]


In [4]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [13]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root, image_path, label_path):
        self.root = root
        self.image_path = image_path
        self.label_path = label_path
        self.imgs = list(sorted(os.listdir(os.path.join(root, self.image_path))))
        self.labels = list(sorted(os.listdir(os.path.join(root, self.label_path))))

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.image_path, self.imgs[idx])
        label_path = os.path.join(self.root, self.label_path, self.labels[idx])

        img = cv.cvtColor(cv.imread(img_path), cv.COLOR_BGR2RGB)

        file_tmp = open(label_path, "r")

        lines = [i.split() for i in file_tmp.readlines()]
        label = np.array([[float(value) for value in line] for line in lines])

        boxes = label[:, 1:]
        boxes[:, :2] -= boxes[:, :2] / 2
        boxes *= np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        boxes[:, 2:]  = boxes[:, :2] + boxes[:, 2:]
        boxes = boxes.astype(int)
        cls = label[:, 0].astype(int)

        target = {}

        target["boxes"] = torch.tensor(boxes)
        target["label"] = torch.tensor(cls)



        return torchvision.transforms.ToTensor()(img), target

    def __len__(self):
        return len(self.imgs)

In [14]:
def custom_collate(data):
    return data

In [15]:
train_data = torch.utils.data.DataLoader(CustomDataset("/content/drive/MyDrive/data_yolo/", "images/train", "labels/train"),
                                         batch_size=32,
                                         shuffle=True,
                                         collate_fn=custom_collate,
                                         pin_memory=True if torch.cuda.is_available() else False)

val_data = torch.utils.data.DataLoader(CustomDataset("/content/drive/MyDrive/data_yolo/", "images/val", "labels/val"),
                                       batch_size=32,
                                       shuffle=True,
                                       collate_fn=custom_collate,
                                       pin_memory=True if torch.cuda.is_available() else False)

In [16]:
num_classes = 21
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

In [17]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [18]:
optimizer = torch.optim.Adam(model.parameters())
num_epochs = 10

In [None]:
model.to(device)
for epoch in range(num_epochs):
    epoch_loss = 0
    for data in train_data:
        imgs = []
        targets = []
        for d in data:
            imgs.append(d[0].to(device))
            targ = {}
            targ["boxes"] = d[1]["boxes"].to(device)
            targ["label"] = d[1]["label"].to(device)
            targets.append(targ)

        loss_dict = model(imgs, targets)
        loss = sum([v for v in loss_dict.values()])
        epoch_loss += loss.cpu().detach().numpy()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(epoch_loss)