In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [1]:
import os
os.chdir("..")

In [2]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, fasterrcnn_resnet50_fpn
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from src.faster_rcnn import fasterrcnn_resnet18_fpn

In [3]:
device = torch.device('cpu') # if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 91

# Finetune pre-trained model

In [4]:
# load an instance segmentation model pre-trained pre-trained on COCO
# model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True, num_classes=num_classes)
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True, num_classes=num_classes)
model = fasterrcnn_resnet18_fpn(pretrained_backbone=True, num_classes=num_classes)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes=num_classes)

# load model checkpoint
# path = os.path.join(os.getcwd(), "./checkpoints/faster_rcnn_resnet1810_epochs.ckpt")
# checkpoint = torch.load(path)
# model.load_state_dict(checkpoint['model_state_dict'])

# model.to(device) 

# PennFudan Dataset

In [None]:
os.chdir("/content/gdrive/MyDrive/xai-lab/detection")

from pennfudan_dataset import PennFudanDataset, get_transform
from utils import collate_fn

# use our dataset and defined transformations
dataset = PennFudanDataset('../PennFudanPed', get_transform(train=True))
dataset_test = PennFudanDataset('../PennFudanPed', get_transform(train=False))

# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=2, shuffle=True, num_workers=4,
        collate_fn=collate_fn)

data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1, shuffle=False, num_workers=4,
        collate_fn=collate_fn)


In [None]:
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9) #weight_decay=0.0005)
# and a learning rate scheduler
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
#                                                step_size=3,
#                                                gamma=0.1)

In [None]:
!pip install --upgrade torch==1.10.0
!pip install --upgrade torchvision

In [None]:
from engine import train_one_epoch, evaluate

# let's train it for 20 epochs
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    # lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)


In [None]:
# pick one image from the test set
img, _ = dataset_test[0]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

In [None]:
from PIL import Image, ImageDraw

result = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
image = ImageDraw.Draw(result)  

for bbox in prediction[0]["boxes"]:
  image.rectangle(bbox.cpu().numpy().tolist(), outline ="red")



In [None]:
result

In [None]:
# Save checkpoint
path = os.path.join(os.getcwd(), "..", "checkpoints" ,"faster_rcnn_resnet18" + str(20) + "_epochs.ckpt")
torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, path)

In [None]:
# Test loading of checkpoint

checkpoint = torch.load(path)
model.load_state_dict(checkpoint['model_state_dict'])

# Coco Dataset
Please follow https://medium.com/howtoai/pytorch-torchvision-coco-dataset-b7f5e8cad82.
Do the terminal commands in *explanations-for-computer-vision* folder.

In [5]:
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image
from pycocotools.coco import COCO

class myOwnDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # path for input image
        path = coco.loadImgs(img_id)[0]['file_name']
        # open the input image
        img = Image.open(os.path.join(self.root, path))

        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels (In my case, I only one class: target class or background)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([img_id])
        # Size of bbox (Rectangular)
        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        # Iscrowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Annotation is in dictionary format
        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, my_annotation

    def __len__(self):
        return len(self.ids)

In [6]:
def get_transform():
    custom_transforms = []
    custom_transforms.append(torchvision.transforms.ToTensor())
    return torchvision.transforms.Compose(custom_transforms)# path to your own data and coco file

# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

path2train="./coco/train2017"
path2trainjson="./coco/annotations/instances_train2017.json"
path2val="./coco/val2017"
path2valjson="./coco/annotations/instances_val2017.json"
# create own Dataset
my_dataset = myOwnDataset(root=path2train,
                          annotation=path2trainjson,
                          transforms=get_transform()
                          )

my_dataset_test = myOwnDataset(root=path2val,
                          annotation=path2valjson,
                          transforms=get_transform()
                          )
# Batch size
train_batch_size = 1

# own DataLoader
data_loader = torch.utils.data.DataLoader(my_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=4,
                                          collate_fn=collate_fn)

data_loader_test = torch.utils.data.DataLoader(my_dataset_test,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=4,
                                          collate_fn=collate_fn)

loading annotations into memory...
Done (t=12.66s)
creating index...
index created!
loading annotations into memory...
Done (t=0.42s)
creating index...
index created!


In [7]:
print('Number of samples: ', len(data_loader))
print('Number of samples: ', len(data_loader_test))


Number of samples:  118287
Number of samples:  5000


In [None]:
# 2 classes; Only target class or background
num_classes = 91
num_epochs = 1

# move model to the right device
model.to(device)
    
# parameters
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0001, momentum=0.9, weight_decay=0.0005)

len_dataloader = len(data_loader)

for epoch in range(num_epochs):
    model.train()
    i = 0    
    for imgs, annotations in data_loader:
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        if annotations != []:
            loss_dict = model(imgs, annotations)
            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

        print(f'Iteration: {i}/{len_dataloader}, Loss: {losses}')

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Iteration: 1/118287, Loss: 5.266780853271484
Iteration: 2/118287, Loss: 6.004335880279541
Iteration: 3/118287, Loss: 5.346428871154785
Iteration: 4/118287, Loss: 5.199862480163574
Iteration: 5/118287, Loss: 5.3045854568481445
Iteration: 6/118287, Loss: 5.1054840087890625
Iteration: 7/118287, Loss: 5.000123977661133
Iteration: 8/118287, Loss: 4.975391387939453
Iteration: 9/118287, Loss: 4.818087577819824
Iteration: 10/118287, Loss: 4.624713897705078
Iteration: 11/118287, Loss: 4.820614337921143
Iteration: 12/118287, Loss: 4.6482014656066895
Iteration: 13/118287, Loss: 4.1555705070495605
Iteration: 14/118287, Loss: 4.493748664855957
Iteration: 15/118287, Loss: 3.89536452293396
Iteration: 16/118287, Loss: 4.109428405761719
Iteration: 17/118287, Loss: 3.344741106033325
Iteration: 18/118287, Loss: 3.095829486846924
Iteration: 19/118287, Loss: 3.3950626850128174
Iteration: 20/118287, Loss: 2.9761412143707275
Iteration: 21/118287, Loss: 2.200562000274658
Iteration: 22/118287, Loss: 2.32203412

In [None]:
# pick one image from the test set
img, _ = my_dataset[99]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

In [None]:
prediction

In [None]:
from PIL import Image, ImageDraw

result = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
image = ImageDraw.Draw(result)  

for bbox in prediction[0]["boxes"]:
    image.rectangle(bbox.cpu().numpy().tolist(), outline ="red")

In [None]:
result

In [None]:
# Save checkpoint
path = os.path.join(os.getcwd(), "..", "checkpoints" ,"faster_rcnn_resnet50_" + str(1) + "_epoch_pretrained_COCO.ckpt")
torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, path)