In [1]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader, Subset
import gdown
import os
import zipfile
import tqdm

In [2]:
def download_coco_data():
    if not os.path.exists("coco_data"):
        os.makedirs("coco_data")

    img_url = 'http://images.cocodataset.org/zips/val2017.zip'
    ann_url = 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip'

    print("Downloading COCO Images...")
    gdown.download(img_url, 'coco_data/val2017.zip', quiet=False)

    print("Downloading COCO Annotations...")
    gdown.download(ann_url, 'coco_data/annotations.zip', quiet=False)


    print("Unzipping files...")
    with zipfile.ZipFile('coco_data/val2017.zip', 'r') as zip_ref:
        zip_ref.extractall('coco_data')
    with zipfile.ZipFile('coco_data/annotations.zip', 'r') as zip_ref:
        zip_ref.extractall('coco_data')

    print("Dataset Ready.")

In [3]:
if not os.path.exists("coco_data/val2017"):
    download_coco_data()

Downloading...
From: http://images.cocodataset.org/zips/val2017.zip
To: /content/coco_data/val2017.zip


Downloading COCO Images...


100%|██████████| 816M/816M [00:14<00:00, 55.6MB/s]
Downloading...
From: http://images.cocodataset.org/annotations/annotations_trainval2017.zip
To: /content/coco_data/annotations.zip


Downloading COCO Annotations...


100%|██████████| 253M/253M [00:04<00:00, 52.5MB/s]


Unzipping files...
Dataset Ready.


In [4]:
class COCODataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        from pycocotools.coco import COCO
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        coco_annotation = coco.loadAnns(ann_ids)

        path = coco.loadImgs(img_id)[0]['file_name']
        from PIL import Image
        img = Image.open(os.path.join(self.root, path)).convert("RGB")

        num_objs = len(coco_annotation)
        boxes = []
        labels = []

        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(coco_annotation[i]['category_id'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        image_id = torch.tensor([img_id])
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id

        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.ids)

In [5]:
def get_model(num_classes):
    model = fasterrcnn_resnet50_fpn(weights="DEFAULT")

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [6]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [7]:

def get_transform():
    transforms = []
    transforms.append(torchvision.transforms.ToTensor())
    return torchvision.transforms.Compose(transforms)

dataset = COCODataset(root='coco_data/val2017',
                      annotation='coco_data/annotations/instances_val2017.json',
                      transforms=get_transform())

dataset_subset = Subset(dataset, indices=range(100))

def collate_fn(batch):
    return tuple(zip(*batch))
data_loader = DataLoader(dataset_subset, batch_size=4, shuffle=True, collate_fn=collate_fn)

model = get_model(num_classes=91)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

loading annotations into memory...
Done (t=0.47s)
creating index...
index created!
Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


100%|██████████| 160M/160M [00:01<00:00, 134MB/s]


In [8]:
num_epochs = 3

print(f"Starting training on device: {device}")

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    i = 0
    for imgs, annotations in tqdm.tqdm(data_loader, desc=f"Epoch {epoch+1}"):
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]

        loss_dict = model(imgs, annotations)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()

        if i % 10 == 0:
            print(f"Epoch: {epoch+1}, Step: {i}, Loss: {losses.item():.4f}")

    print(f"Epoch {epoch+1} Complete. Average Loss: {epoch_loss/i:.4f}")

print("Training Finished!")

Starting training on device: cuda


Epoch 1:  40%|████      | 10/25 [00:13<00:16,  1.13s/it]

Epoch: 1, Step: 10, Loss: 1.3093


Epoch 1:  80%|████████  | 20/25 [00:26<00:06,  1.33s/it]

Epoch: 1, Step: 20, Loss: 1.7975


Epoch 1: 100%|██████████| 25/25 [00:32<00:00,  1.30s/it]


Epoch 1 Complete. Average Loss: 1.8819


Epoch 2:  40%|████      | 10/25 [00:13<00:20,  1.34s/it]

Epoch: 2, Step: 10, Loss: 1.3204


Epoch 2:  80%|████████  | 20/25 [00:26<00:05,  1.19s/it]

Epoch: 2, Step: 20, Loss: 1.4657


Epoch 2: 100%|██████████| 25/25 [00:32<00:00,  1.31s/it]


Epoch 2 Complete. Average Loss: 1.1844


Epoch 3:  40%|████      | 10/25 [00:13<00:19,  1.31s/it]

Epoch: 3, Step: 10, Loss: 1.9182


Epoch 3:  80%|████████  | 20/25 [00:27<00:06,  1.32s/it]

Epoch: 3, Step: 20, Loss: 0.6429


Epoch 3: 100%|██████████| 25/25 [00:34<00:00,  1.39s/it]

Epoch 3 Complete. Average Loss: 1.1037
Training Finished!



