# This document serves to provide the bounding boxes using MaskRCNN

## Creating the dataset for the WAYMO images!

In [1]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image

In [2]:
from data_utils import WaymoDataset
dataset = WaymoDataset(cameras=['FRONT'], heatmaps=False, network = "fstrcnn")
dataset_test = WaymoDataset(scope='validation', cameras=['FRONT'], heatmaps=False)

In [3]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [347]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [467]:
for (name, layer) in model._modules.items():
    #iteration over outer layers
    print(name)

transform
backbone
rpn
roi_heads


In [349]:
model1 = model.backbone
model3 = model.rpn
model2 = model.roi_heads

In [457]:
for name, child in model.roi_heads.named_children():
    print(name)

box_roi_pool
box_head
box_predictor


In [469]:
for name, child in model.roi_heads.box_predictor.named_children():
    print(name)

cls_score
bbox_pred


In [462]:
for name, child in model.roi_heads.box_head.named_children():
    print(name)

fc6
fc7


In [454]:
for name, child in model.rpn.named_children():
    print(name)

anchor_generator
head


In [456]:
for name, child in model.rpn.head.named_children():
    print(name)

conv
cls_logits
bbox_pred


In [453]:
for name, child in model.backbone.named_children():
    print(name)

body
fpn


In [440]:
for name, child in model.backbone.body.named_children():
    print(name)

conv1
bn1
relu
maxpool
layer1
layer2
layer3
layer4


In [446]:
for name, child in model.backbone.fpn.named_children():
    print(name)

inner_blocks
layer_blocks
extra_blocks


In [444]:
for name, child in model.backbone.body.named_children():
    if name in ['layer3']:
        print(name + ' is unfrozen')
        for param in child.parameters():
            param.requires_grad = True
    else:
        print(name + ' is frozen')
        for param in child.parameters():
                   param.requires_grad = False

conv1 is frozen
bn1 is frozen
relu is frozen
maxpool is frozen
layer1 is frozen
layer2 is frozen
layer3 is unfrozen
layer4 is frozen


In [399]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def transfer_learning(model, layers_to_freeze = {'backbone':{'body':['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3', 'layer4'],
                                                             'fpn':['inner_blocks', 'layer_blocks', 'extra_blocks']}
                                                  'rpn':{'anchor_generator':'', 
                                                         'head':['conv', 'cls_logits', 'bbox_pred']}
                                                  'roi_heads':{'box_roi_pool', 
                                                               'box_head':['fc6', 'fc7'],
                                                               'box_predictor':['cls_score', 'bbox_pred']}}
    print(f"Num trainable params before:{count_parameters(model)}")
        
    for layer_group in layers_to_freeze:
        for layer in layer_group.modules():
            freeze_params(layer)
    print(f"Num trainable params after:{count_parameters(model)}")
    print(f"Layers 0 to {len(layers_to_freeze)-1} frozen, only training the {text} layers")

def freeze_params(layer):
    for parameter in layer.parameters():
        if parameter.requires_grad:
            parameter.requires_grad = False
# transfer_learning(ed.backbone_net.model)

In [400]:
count_parameters(model)
transfer_learning(model)

Num trainable params before:14955990
hi
14955990
hi
14955990
Num trainable params after:593935
Layers 0 to 1 frozen, only training the rpn layers


In [24]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# There are two options, finetuning a pretrained model, or changing the backbone in a given model!
## Finetuning a pretrained model!      
def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [11]:
# Helper functions for augmentation and transformations

In [12]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T


def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [13]:
# # use our dataset and defined transformations
# dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
# dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False))

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:1])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-1:])


# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)
# assign 1 worker
data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [38]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a pre-trained model for classification and return
# only the features
# backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone = torchvision.models.resnet50(pretrained=True)#.features
# backbone = torchvision.models.squeezenet1_0(pretrained=True)
# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280

# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=4,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)


Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/checkpoints/resnet50-19c8e357.pth
100%|██████████| 97.8M/97.8M [00:04<00:00, 23.7MB/s]


In [39]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 4

# get the model using our helper function
# model = get_instance_segmentation_model(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [40]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [41]:
count_parameters(model)

105696391

In [None]:
# let's train it for 10 epochs
num_epochs = 1

for epoch in range(1):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=0)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

just before calling metric_logger cpu
just before the for loop in metric_logger_for_every


In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
dataset = WaymoDataset(cameras=['FRONT'], heatmaps=False)
data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=2, shuffle=True, num_workers=4,
 collate_fn=utils.collate_fn)
# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]

In [None]:
for i in data_loader:
    x = i
    break

In [None]:
output = model(images,targets)   # Returns losses and detections

In [None]:
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)           # Returns predictions

In [None]:
for i in range(1):
    print(i)

In [346]:
import torchvision.models as models

resnet = models.resnet101(pretrained=True)

for (name, layer) in resnet._modules.items():
    #iteration over outer layers
    print((name, layer))

# resnet._modules['layer1'][0]._modules['bn1'].weight.data.zero_()

('conv1', Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False))
('bn1', BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
('relu', ReLU(inplace=True))
('maxpool', MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False))
('layer1', Sequential(
  (0): Bottleneck(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): Ba