<a href="https://colab.research.google.com/github/ioannis-toumpoglou/pytorch-repo/blob/main/pytorch_custom_model_coco.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image
from pycocotools.coco import COCO


class CocoDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # path for input image
        path = coco.loadImgs(img_id)[0]['file_name']
        # open the input image
        img = Image.open(os.path.join(self.root, path)).convert('RGB')

        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels (In my case, I only one class: target class or background)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([img_id])
        # Size of bbox (Rectangular)
        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        # Iscrowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Annotation is in dictionary format
        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, my_annotation

    def __len__(self):
        return len(self.ids)

In [2]:
# The inputs for a PyTorch model must be in tensor format
def get_transform():
    custom_transforms = []
    custom_transforms.append(torchvision.transforms.ToTensor())
    return torchvision.transforms.Compose(custom_transforms)

In [3]:
# Create data directory
from pathlib import Path


data_path = Path('data/')
image_path = data_path / 'images'

if image_path.is_dir():
  print(f'[INFO] {image_path} already exists, skipping download...')
else:
  print(f'[INFO] Unable to find {image_path}, creating one...')
  image_path.mkdir(parents=True, exist_ok=True)

[INFO] data/images already exists, skipping download...


In [5]:
# path to data and coco file
train_data_dir = 'data/'
train_coco = 'data/train_coco.json'

# create own Dataset
my_dataset = CocoDataset(root=train_data_dir,
                         annotation=train_coco,
                         transforms=get_transform()
                         )

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

# Batch size
train_batch_size = 32
num_workers = os.cpu_count()

# own DataLoader
data_loader = torch.utils.data.DataLoader(my_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=num_workers,
                                          collate_fn=collate_fn)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [6]:
# select device (whether GPU or CPU)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# DataLoader is iterable over Dataset
for imgs, annotations in data_loader:
    imgs = list(img.to(device) for img in imgs)
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    print(annotations)

[{'boxes': tensor([[ 573.8427,  149.5730, 1074.3370,  408.4494]]), 'labels': tensor([1]), 'image_id': tensor([5]), 'area': tensor([129566.1875]), 'iscrowd': tensor([0])}, {'boxes': tensor([[  32.1857,   34.6615, 1245.3385, 1245.3385]]), 'labels': tensor([1]), 'image_id': tensor([2]), 'area': tensor([1468736.1250]), 'iscrowd': tensor([0])}, {'boxes': tensor([[ 111.4120,  106.4604, 1173.5397, 1252.7660]]), 'labels': tensor([1]), 'image_id': tensor([3]), 'area': tensor([1217522.8750]), 'iscrowd': tensor([0])}, {'boxes': tensor([[471.3017, 267.6731, 748.8279, 558.3365]]), 'labels': tensor([1]), 'image_id': tensor([7]), 'area': tensor([80666.6953]), 'iscrowd': tensor([0])}, {'boxes': tensor([[   1.5725,    3.1451, 1278.4700,  809.8549]]), 'labels': tensor([1]), 'image_id': tensor([6]), 'area': tensor([1030085.8125]), 'iscrowd': tensor([0])}, {'boxes': tensor([[460.3230, 448.7737, 821.6519, 595.6151]]), 'labels': tensor([1]), 'image_id': tensor([12]), 'area': tensor([53058.0273]), 'iscrowd':

In [None]:
# Run the model
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2

def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained=False)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = fasterrcnn_resnet50_fpn_v2(in_features, num_classes)

    return model


# 2 classes; Only target class or background
num_classes = 2
num_epochs = 10
model = get_model_instance_segmentation(num_classes)

# move model to the right device
model.to(device)

# parameters
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, weight_decay=0.0005)

len_dataloader = len(data_loader)

for epoch in range(num_epochs):
    model.train()
    i = 0
    for imgs, annotations in data_loader:
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        loss_dict = model(imgs, annotations)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        print(f'Iteration: {i}/{len_dataloader}, Loss: {losses}')

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth
100%|██████████| 167M/167M [00:02<00:00, 80.4MB/s]
