In [0]:
!wget https://github.com/Tony607/detectron2_instance_segmentation_demo/releases/download/V0.1/data.zip
!unzip data.zip > /dev/null

In [0]:
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image
from pycocotools.coco import COCO
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor


class myOwnDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # path for input image
        path = coco.loadImgs(img_id)[0]["file_name"]
        # open the input image
        img = Image.open(os.path.join(self.root, path))
        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]["bbox"][0]
            ymin = coco_annotation[i]["bbox"][1]
            xmax = xmin + coco_annotation[i]["bbox"][2]
            ymax = ymin + coco_annotation[i]["bbox"][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels (In my case, I only one class: target class or background)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([img_id])
        # Size of bbox (Rectangular)
        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]["area"])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        # Iscrowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Annotation is in dictionary format
        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, my_annotation

    def __len__(self):
        return len(self.ids)


# In my case, just added ToTensor
def get_transform():
    custom_transforms = []
    custom_transforms.append(torchvision.transforms.ToTensor())
    return torchvision.transforms.Compose(custom_transforms)


# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))


def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [30]:
import torch
import config

print("Torch version:", torch.__version__)

# create own Dataset
my_dataset = myOwnDataset(
    root=config.train_data_dir, annotation=config.train_coco, transforms=get_transform()
)

# own DataLoader
data_loader = torch.utils.data.DataLoader(
    my_dataset,
    batch_size=config.train_batch_size,
    shuffle=config.train_shuffle_dl,
    num_workers=config.num_workers_dl,
    collate_fn=collate_fn,
)

Torch version: 1.5.0+cu101
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [31]:
# select device (whether GPU or CPU)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# DataLoader is iterable over Dataset
for imgs, annotations in data_loader:
    imgs = list(img.to(device) for img in imgs)
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    print(annotations)

[{'boxes': tensor([[ 103.,   73., 1585., 2275.],
        [ 109.,   70., 1600.,  724.],
        [ 109.,  727., 1582., 1339.],
        [ 106., 1345., 1582., 2041.],
        [ 109., 2047., 1594., 2272.]]), 'labels': tensor([1, 1, 1, 1, 1]), 'image_id': tensor([16]), 'area': tensor([3263364.,  975114.,  901476., 1027296.,  334125.]), 'iscrowd': tensor([0, 0, 0, 0, 0])}]
[{'boxes': tensor([[ 160.,  133., 1555., 2257.],
        [ 166.,  403., 1519.,  658.],
        [ 166.,  751., 1531., 1231.],
        [ 169., 1234., 1516., 1705.],
        [ 172., 1717., 1507., 2086.],
        [ 172., 2077., 1453., 2257.]]), 'labels': tensor([1, 1, 1, 1, 1, 1]), 'image_id': tensor([59]), 'area': tensor([2962980.,  345015.,  655200.,  634437.,  492615.,  230580.]), 'iscrowd': tensor([0, 0, 0, 0, 0, 0])}]
[{'boxes': tensor([[ 184.,  574., 1468., 1012.],
        [ 181., 1093., 1459., 1483.]]), 'labels': tensor([1, 1]), 'image_id': tensor([62]), 'area': tensor([562392., 498420.]), 'iscrowd': tensor([0, 0])}]
[{'

In [32]:
model = get_model_instance_segmentation(config.num_classes)
# move model to the right device
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequent

In [0]:
# parameters
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params, lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay
)

len_dataloader = len(data_loader)

In [34]:
params

[Parameter containing:
 tensor([[[[ 0.0100]],
 
          [[ 0.0155]],
 
          [[ 0.0260]],
 
          ...,
 
          [[ 0.0129]],
 
          [[-0.0317]],
 
          [[ 0.0059]]],
 
 
         [[[ 0.0062]],
 
          [[ 0.0075]],
 
          [[-0.0250]],
 
          ...,
 
          [[ 0.0070]],
 
          [[-0.0181]],
 
          [[-0.0673]]],
 
 
         [[[ 0.0266]],
 
          [[ 0.0019]],
 
          [[-0.0693]],
 
          ...,
 
          [[ 0.0099]],
 
          [[-0.0187]],
 
          [[-0.0031]]],
 
 
         ...,
 
 
         [[[-0.0035]],
 
          [[ 0.0040]],
 
          [[-0.0008]],
 
          ...,
 
          [[ 0.0086]],
 
          [[ 0.0164]],
 
          [[ 0.0041]]],
 
 
         [[[ 0.0005]],
 
          [[ 0.0023]],
 
          [[ 0.1286]],
 
          ...,
 
          [[-0.0031]],
 
          [[-0.0078]],
 
          [[ 0.0495]]],
 
 
         [[[ 0.0106]],
 
          [[ 0.0009]],
 
          [[-0.0131]],
 
          ...,
 
          [[ 0.00

In [0]:
# Training
for epoch in range(config.num_epochs):
    print(f"Epoch: {epoch}/{config.num_epochs}")
    model.train()
    i = 0
    for imgs, annotations in data_loader:
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        loss_dict = model(imgs, annotations)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        print(f"Iteration: {i}/{len_dataloader}, Loss: {losses}")

Epoch: 0/10




Iteration: 1/181, Loss: 1.5577572584152222
Iteration: 2/181, Loss: 1.5912898778915405
Iteration: 3/181, Loss: 1.4279074668884277
Iteration: 4/181, Loss: 1.1663223505020142
Iteration: 5/181, Loss: 1.0537364482879639
Iteration: 6/181, Loss: 0.9368792176246643
Iteration: 7/181, Loss: 0.9916161894798279
Iteration: 8/181, Loss: 0.8338232636451721
