In [None]:
! pip install evaluate
! pip install timm
! pip install torch torchvision transformers datasets
! pip install transformers[torch]

In [10]:
from google.colab import drive
drive.mount('/content/drive')

! rm -r __MACOSX dataset dataset.zip
! cp -r /content/drive/MyDrive/dataset.zip /content
! unzip /content/dataset.zip
! rm -r __MACOSX


rm: cannot remove '__MACOSX': No such file or directory


In [3]:
import os
import torch
import torchvision
from PIL import Image
from pycocotools.coco import COCO
from torch.utils.data import Dataset, DataLoader

class myDataset(Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):

        coco = self.coco
        
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        coco_annotation = coco.loadAnns(ann_ids)
        path = coco.loadImgs(img_id)[0]['file_name']
        img = Image.open(os.path.join(self.root, path))
        num_objs = len(coco_annotation)

        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32) #???

        labels = []
        for i in range(num_objs):
            labels.append(coco_annotation[i]['category_id'])
        labels = torch.as_tensor(labels, dtype=torch.int64)

        img_id = torch.tensor([img_id])

        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)

        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, my_annotation

    def __len__(self):
        return len(self.ids)

In [4]:
def get_transform():
    custom_transforms = []
    custom_transforms.append(torchvision.transforms.ToTensor())
    return torchvision.transforms.Compose(custom_transforms)

In [5]:
# path to dataset and coco file
data_dir = 'dataset'
train_coco = 'dataset/coco_train.json'

# create Dataset
train_dataset = myDataset(root=data_dir,
                          annotation=train_coco,
                          transforms=get_transform()
                          )

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [6]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_batch_size = 4

data_loader = torch.utils.data.DataLoader(train_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          collate_fn=collate_fn)

In [8]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

def get_model_instance_segmentation(num_classes):
    
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model


# classes + background
num_classes = 3

num_epochs = 2
model = get_model_instance_segmentation(num_classes)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# move model to the right device
model.to(device)

# parameters
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

len_dataloader = len(data_loader)

for epoch in range(num_epochs):
    model.train()
    i = 0
    for imgs, annotations in data_loader:
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        loss_dict = model(imgs, annotations)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        print(f'Iteration: {i}/{len_dataloader}, Loss: {losses}')

Iteration: 1/42, Loss: 1.1098710298538208
Iteration: 2/42, Loss: 0.5174397826194763
Iteration: 3/42, Loss: 0.32265380024909973
Iteration: 4/42, Loss: 0.3868410885334015
Iteration: 5/42, Loss: 0.31667208671569824
Iteration: 6/42, Loss: 0.3291205167770386
Iteration: 7/42, Loss: 0.32308775186538696
Iteration: 8/42, Loss: 0.3167784512042999
Iteration: 9/42, Loss: 0.28030771017074585
Iteration: 10/42, Loss: 0.20892705023288727
Iteration: 11/42, Loss: 0.22345903515815735
Iteration: 12/42, Loss: 0.199838787317276
Iteration: 13/42, Loss: 0.19196352362632751
Iteration: 14/42, Loss: 0.22355619072914124
Iteration: 15/42, Loss: 0.2158464640378952
Iteration: 16/42, Loss: 0.20521707832813263
Iteration: 17/42, Loss: 0.16400782763957977
Iteration: 18/42, Loss: 0.19538792967796326
Iteration: 19/42, Loss: 0.22171476483345032
Iteration: 20/42, Loss: 0.16077221930027008
Iteration: 21/42, Loss: 0.22037586569786072
Iteration: 22/42, Loss: 0.1863775998353958
Iteration: 23/42, Loss: 0.17574751377105713
Iterat

In [9]:
# save model
torch.save(model.state_dict(), 'faster_rcnn_model.pth')