In [None]:
import torchvision
import numpy as np
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

def get_model_instance_segmentation(num_classes):
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)

    return model

In [None]:
import torchvision.transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(1))
    return T.Compose(transforms)

## Testing `forward()` method

In [None]:
from PennFudanDataset import PennFudanDataset
import torch as t

def collate_fn(batch):
    data_list, label_list = [], []
    for _data, _label in batch:
        data_list.append(_data)
        label_list.append(_label)
    return t.Tensor(data_list), t.LongTensor(label_list)


model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
dataset = PennFudanDataset(root="data/PennFudanPed", transforms=get_transform(train=True))

img, target = dataset[0]
#print(np.array(img))

In [None]:
import matplotlib.pyplot as plt
plt.imshow(img.moveaxis(0,2))

In [None]:
dataloader = t.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=torchvision.utils.col)
#dataloader = t.utils.data.DataLoader(dataset, batch_size=2, shuffle=True)

# For training
images, targets = next(iter(dataloader))
images = list(image for image in images)
targets = [{k: v for k, v in tg.items()} for tg in targets]
output = model(images, targets)
model.eval()
x = [t.rand(3, 300, 400), t.rand(3, 500, 400)]
predictions = model(x) 