In [None]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import torchvision.models
from PIL import Image

import sys, os

import os
import matplotlib.pyplot as plt

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [None]:
!unzip RCNNSet.zip

Archive:  RCNNSet.zip
replace RCNNSet/.DS_Store? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")

num_classes = 7 # 6 classes + background

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
class RCNNDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        #load image files
        self.imgs = list(os.listdir(os.path.join(root, "images")))
        self.bounds = list(os.listdir(os.path.join(root, "bounds"))) #bounds are stored in the format (left, upper, right, lower)
    
    def __getitem__(self, idx):
        transform = transforms.ToTensor()

        # load images and masks
        imgPath = os.path.join(self.root, "images", self.imgs[idx])
        boundTxt = self.imgs[idx][:-4] + ".txt"
        boundsPath = os.path.join(self.root, "bounds", boundTxt)
        img = Image.open(imgPath).convert("RGB")


        boundFile = open(boundsPath, "r")
        bounds = boundFile.readlines()

        # get bounding box coordinates for each mask
        num_objs = len(bounds)
        boxes = []
        labels = []
        for i in range(num_objs):
            line = bounds[i]
            line.strip()
            indices = line.split()
            xmin = int(indices[0])
            xmax = int(indices[2])
            ymin = int(indices[1])
            ymax = int(indices[3])
            
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(int(indices[4]))
        

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.as_tensor(labels, dtype=torch.int64)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
def get_transform():
    t = []
    # converts the image, a PIL image, into a PyTorch Tensor
    t.append(transforms.ToTensor())
    return transforms.Compose(t)

train_dataset = RCNNDataset("RCNNSet/train", get_transform())
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=collate_fn)

val_dataset = RCNNDataset("RCNNSet/val", get_transform())
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=collate_fn)

test_dataset = RCNNDataset("RCNNSet/test", get_transform())
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=4, collate_fn=collate_fn)



In [None]:
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
if (device == torch.device('cuda')):
  print("Training on GPU!!!")
else:
  print("Training on CPU")

#device = torch.device('cpu')

Training on GPU!!!


In [None]:
%%shell

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.8.2

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

fatal: destination path 'vision' already exists and is not an empty directory.
HEAD is now at 2f40a483d7 [v0.8.X] .circleci: Add Python 3.9 to CI (#3063)




In [None]:
from engine import train_one_epoch, evaluate
import utils

model.to(device)
model.train()

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [None]:
# let's train it for 10 epochs
from torch.optim.lr_scheduler import StepLR
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 100 iterations
    train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=100)
    #save model
    path = f"rcnn_epoch_{epoch}"
    torch.save(model.state_dict(), path)

    # update the learning rate
    lr_scheduler.step()
    # evaluate on the val dataset
    evaluate(model, val_loader, device=device)

Epoch: [0]  [  0/854]  eta: 0:16:37  lr: 0.000011  loss: 2.6372 (2.6372)  loss_classifier: 2.1368 (2.1368)  loss_box_reg: 0.4088 (0.4088)  loss_objectness: 0.0262 (0.0262)  loss_rpn_box_reg: 0.0653 (0.0653)  time: 1.1677  data: 0.4298  max mem: 2403
Epoch: [0]  [100/854]  eta: 0:06:29  lr: 0.000596  loss: 0.5840 (0.9947)  loss_classifier: 0.2638 (0.5978)  loss_box_reg: 0.2973 (0.3338)  loss_objectness: 0.0068 (0.0208)  loss_rpn_box_reg: 0.0295 (0.0423)  time: 0.5222  data: 0.0092  max mem: 2944
Epoch: [0]  [200/854]  eta: 0:05:38  lr: 0.001182  loss: 0.5297 (0.8192)  loss_classifier: 0.2206 (0.4319)  loss_box_reg: 0.2652 (0.3219)  loss_objectness: 0.0134 (0.0229)  loss_rpn_box_reg: 0.0319 (0.0425)  time: 0.5168  data: 0.0102  max mem: 3002
Epoch: [0]  [300/854]  eta: 0:04:45  lr: 0.001768  loss: 0.5602 (0.7515)  loss_classifier: 0.2451 (0.3769)  loss_box_reg: 0.2445 (0.3108)  loss_objectness: 0.0085 (0.0219)  loss_rpn_box_reg: 0.0380 (0.0419)  time: 0.5145  data: 0.0094  max mem: 3002
