In [1]:
import numpy as np
import os
import numpy as np
import torch
import torchvision
from torch.utils import data
from torchvision import datasets, models, transforms
from PIL import Image
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import copy
import math
import itertools as it
from torch.optim import Optimizer

###Defining the dataset 

* image: a PIL Image of size (H, W)
* target: a dict containing the following fields
    * `boxes` (`FloatTensor[N, 4]`): the coordinates of the `N` bounding boxes in `[x0, y0, x1, y1]` format, ranging from `0` to `W` and `0` to `H`
    * `labels` (`Int64Tensor[N]`): the label for each bounding box
    * `image_id` (`Int64Tensor[1]`): an image identifier. It should be unique between all the images in the dataset.
    * `area` (`Tensor[N]`): The area of the bounding box.
    * `iscrowd` (`UInt8Tensor[N]`): boolean representation of whether the image has crowded instances
    * (optionally) `masks` (`UInt8Tensor[N, H, W]`): The segmentation masks for each one of the objects
    * (optionally) `keypoints` (`FloatTensor[N, K, 3]`): For each one of the `N` objects, it contains the `K` keypoints in `[x, y, visibility]` format, defining the object. 

### Writing a custom dataset for Penn-Fudan

 Write a dataset for the Penn-Fudan dataset.

First, download and extract the data, present in a zip file at https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip

In [2]:
# # Run shell to download the dataset and download the Penn-Fudan dataset
!wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip .
# # extract it in the current folder
!unzip PennFudanPed.zip

--2020-06-28 15:23:59--  https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip
Resolving www.cis.upenn.edu (www.cis.upenn.edu)... 158.130.69.163, 2607:f470:8:64:5ea5::d
Connecting to www.cis.upenn.edu (www.cis.upenn.edu)|158.130.69.163|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 53723336 (51M) [application/zip]
Saving to: ‘PennFudanPed.zip’


2020-06-28 15:24:49 (1.04 MB/s) - ‘PennFudanPed.zip’ saved [53723336/53723336]

--2020-06-28 15:24:49--  http://./
Resolving . (.)... failed: No address associated with hostname.
wget: unable to resolve host address ‘.’
FINISHED --2020-06-28 15:24:49--
Total wall clock time: 50s
Downloaded: 1 files, 51M in 49s (1.04 MB/s)
Archive:  PennFudanPed.zip
   creating: PennFudanPed/
  inflating: PennFudanPed/added-object-list.txt  
   creating: PennFudanPed/Annotation/
  inflating: PennFudanPed/Annotation/FudanPed00001.txt  
  inflating: PennFudanPed/Annotation/FudanPed00002.txt  
  inflating: PennFudanPed/Annotation/Fuda

The data is structured as follows
```
PennFudanPed/
  Annotations/
    FudanPed00001.txt
    FudanPed00002.txt
    ...
  PedMasks/
    FudanPed00001_mask.png
    FudanPed00002_mask.png
    ...
  PNGImages/
    FudanPed00001.png
    FudanPed00002.png
    ...
```

Write a `torch.utils.data.Dataset` class for this dataset for the input image and target

In [3]:
# Create Dataset class
class PennFudanDataset(data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)

        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks.astype(np.uint8), dtype=torch.uint8)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

That's all for the dataset. The structure of the output dictionary is shown below.

In [None]:
dataset = PennFudanDataset('PennFudanPed/')
dataset[0]

(<PIL.Image.Image image mode=RGB size=559x536 at 0x7FC6C869C6D8>,
 {'area': tensor([35358., 36225.]), 'boxes': tensor([[159., 181., 301., 430.],
          [419., 170., 534., 485.]]), 'image_id': tensor([0]), 'iscrowd': tensor([0, 0]), 'labels': tensor([1, 1]), 'masks': tensor([[[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]],
  
          [[0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           ...,
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0],
           [0, 0, 0,  ..., 0, 0, 0]]], dtype=torch.uint8)})

### Model

I'm using Maskrcnn model, provided by torchvision. The backbone used is `resnet_50_fpn`. The model is further modified for our use case.

In [4]:
# helper function for model
def get_backbone(num_classes):
  backbone = torchvision.models.resnet50(pretrained=True)

  new_backbone = torch.nn.Sequential(*(list(backbone.children())[:-2]))
  new_backbone.out_channels = 2048

  model = MaskRCNN(new_backbone, num_classes)
  return model

In [5]:
# Define model
def get_instance_segmentation_model(num_classes):
    
    # get the maskrcnn model
    model = get_backbone(num_classes)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model


Copy files from [pytorch github](https://github.com/pytorch/vision.git) to access helper functions, used for training the model

In [10]:
%%shell
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

Cloning into 'vision'...
remote: Enumerating objects: 69, done.[K
remote: Counting objects: 100% (69/69), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 8631 (delta 32), reused 18 (delta 5), pack-reused 8562[K
Receiving objects: 100% (8631/8631), 10.38 MiB | 6.86 MiB/s, done.
Resolving deltas: 100% (5949/5949), done.
Note: checking out 'v0.3.0'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by performing another checkout.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -b with the checkout command again. Example:

  git checkout -b <new-branch-name>

HEAD is now at be37608 version check against PyTorch's CUDA version






Write a helper function for data augmentation, which leverages the functions in `refereces/detection` that we have just copied:


In [11]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T


def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

Split the data to training and validation sets and create pytorch dataloader objects

In [7]:
# Combine X and y within a batch
def collate_fn(batch):
    return tuple(zip(*batch))

In [12]:
# use our dataset and defined transformations
train_dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
val_dataset = PennFudanDataset('PennFudanPed', get_transform(train=False))

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(train_dataset)).tolist()
train_dataset = torch.utils.data.Subset(train_dataset, indices[:-50])
val_dataset = torch.utils.data.Subset(val_dataset, indices[-50:])

BATCH_SIZE = 2
# define training and validation data loaders
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True,
    collate_fn=collate_fn)

val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=1, shuffle=False,
    collate_fn=collate_fn)


Length of training set : 120

Length of validation set : 50

Instantiate the model

In [9]:
# our dataset has two classes only - background and person
num_classes = 2

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# get the model
model = get_instance_segmentation_model(num_classes)
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




Define optimizer and sceduler

In [None]:
# Define our optimizer classes- RAdam and LookAhead
class Lookahead(Optimizer):
    def __init__(self, base_optimizer,alpha=0.5, k=6):
        if not 0.0 <= alpha <= 1.0:
            raise ValueError(f'Invalid slow update rate: {alpha}')
        if not 1 <= k:
            raise ValueError(f'Invalid lookahead steps: {k}')
        self.optimizer = base_optimizer
        self.param_groups = self.optimizer.param_groups
        self.alpha = alpha
        self.k = k
        for group in self.param_groups:
            group["step_counter"] = 0
        self.slow_weights = [[p.clone().detach() for p in group['params']]
                                for group in self.param_groups]

        for w in it.chain(*self.slow_weights):
            w.requires_grad = False

    def step(self, closure=None):
        loss = None
        if closure is not None:
            loss = closure()
        loss = self.optimizer.step()
        for group,slow_weights in zip(self.param_groups,self.slow_weights):
            group['step_counter'] += 1
            if group['step_counter'] % self.k != 0:
                continue
            for p,q in zip(group['params'],slow_weights):
                if p.grad is None:
                    continue
                q.data.add_(self.alpha,p.data - q.data)
                p.data.copy_(q.data)
        return loss

class RAdam(Optimizer):
    '''
    a PyTorch implementation of the RAdam Optimizer from th paper
    On the Variance of the Adaptive Learning Rate and Beyond.

    https://arxiv.org/abs/1908.03265
    Example:
        >>> from optimizer import RAdam
        >>> optimizer = RAdam(model.parameters(), lr=0.001)
    '''

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma
                    if N_sma > 5:
                        step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                if N_sma > 5:
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss

#
class Ralamb(Optimizer):
    '''
    Ralamb optimizer (RAdam + LARS trick)
    '''
    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(Ralamb, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(Ralamb, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('Ralamb does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                # Decay the first and second moment running average coefficient
                # m_t
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
                # v_t
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]

                if state['step'] == buffered[0]:
                    N_sma, radam_step = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        radam_step = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        radam_step = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = radam_step

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                weight_norm = p.data.pow(2).sum().sqrt().clamp(0, 10)
                radam_norm = p_data_fp32.pow(2).sum().sqrt()
                if weight_norm == 0 or radam_norm == 0:
                    trust_ratio = 1
                else:
                    trust_ratio = weight_norm / radam_norm

                state['weight_norm'] = weight_norm
                state['adam_norm'] = radam_norm
                state['trust_ratio'] = trust_ratio

                # more conservative since it's an approximated value
                if N_sma >= 5:
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-radam_step * trust_ratio, exp_avg, denom)
                else:
                    p_data_fp32.add_(-radam_step * trust_ratio, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss



In [None]:
# Define optimizer and scheduler 
base_optim = RAdam(model.parameters(), lr = 0.001) 
optimizer =  Lookahead(base_optim, k=5, alpha=0.5)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=20,  gamma=0.1)

In [None]:
#resnet_50 parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print('Numer of trainable parameters : {}'.format(count_parameters(model)))

Numer of trainable parameters : 171862875


Helper function to calculate IoU for predictions on validation set

In [None]:
def calc_iou(pred_masks, target_masks):
    target_mask_comb = np.sum(target_masks, axis=0)
    pred_mask_comb = np.sum(pred_masks, axis=0)
    
    mask = np.add(pred_mask_comb,target_mask_comb)
    
    union = np.sum(mask>0)
    inter = np.sum(mask==2)
    
    IoU = inter/union

    return IoU

Train the model for 70 epochs and save weights for the highest IoU on validation set

In [None]:
# training cell
BATCH_SIZE=2
N_EPOCH = 70

IoU=0
for epoch in range(N_EPOCH):

  # use helper function to train model
  train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=61)

  # update the learning rate 
  scheduler.step()

  # evaluate on the validation set
  model.eval()
  curr_IoU = 0
  with torch.no_grad():
    for images, targets in val_dataloader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        torch.cuda.synchronize()

        # get predictions
        pred = model(images)

        # post-processing to get masks with scores>0.5
        pred_masks = (pred[0]['masks']>0.5).squeeze().detach().cpu().numpy()
        pred_score = list(pred[0]['scores'].detach().cpu().numpy())
        pred_t = [pred_score.index(x) for x in pred_score if x>0.5]
        if len(pred_t) == 0:
          continue
        pred_t = pred_t[-1]

        pred_masks = pred_masks[:pred_t+1]
        target_masks = targets[0]['masks'].detach().cpu().numpy()

        # calculate IoU for the current batch
        batch_IoU = calc_iou(pred_masks, target_masks)
        curr_IoU+=batch_IoU

  print("Epoch: {} Validation IoU: {}"
  .format(epoch, curr_IoU/len(val_dataset)))
  if curr_IoU>IoU:
      best_model_wts = copy.deepcopy(model.state_dict())
      IoU = curr_IoU
      print('Model saved in {} epoch'.format(epoch))



Epoch: [0]  [ 0/60]  eta: 0:01:50  lr: 0.000018  loss: 12.6068 (12.6068)  loss_classifier: 0.7011 (0.7011)  loss_box_reg: 0.0001 (0.0001)  loss_mask: 6.3443 (6.3443)  loss_objectness: 0.6883 (0.6883)  loss_rpn_box_reg: 4.8731 (4.8731)  time: 1.8386  data: 0.1575  max mem: 9082
Epoch: [0]  [59/60]  eta: 0:00:01  lr: 0.001000  loss: 3.3934 (6.5869)  loss_classifier: 0.0914 (0.2226)  loss_box_reg: 0.0569 (0.0507)  loss_mask: 0.6566 (2.5259)  loss_objectness: 0.1896 (0.3901)  loss_rpn_box_reg: 2.2654 (3.3976)  time: 1.9057  data: 0.0907  max mem: 9082
Epoch: [0] Total time: 0:01:52 (1.8702 s / it)
Epoch: 0 Validation IoU: 0.0
Epoch: [1]  [ 0/60]  eta: 0:02:12  lr: 0.001000  loss: 2.4728 (2.4728)  loss_classifier: 0.1152 (0.1152)  loss_box_reg: 0.0800 (0.0800)  loss_mask: 0.6587 (0.6587)  loss_objectness: 0.1163 (0.1163)  loss_rpn_box_reg: 1.5026 (1.5026)  time: 2.2123  data: 0.1866  max mem: 9082
Epoch: [1]  [59/60]  eta: 0:00:01  lr: 0.001000  loss: 2.0038 (2.2576)  loss_classifier: 0.035

KeyboardInterrupt: ignored

It seems the model is overfitting so we store and load the model weights for the highest IoU, add `weight_decay` to our optimizer for regularization and train the model again.

In [None]:
# load the weight with highest IoU on validation set
model.load_state_dict(best_model_wts)

In [None]:
# Define new optimizer and scheduler 
base_optim = RAdam(model.parameters(), lr = 0.0001, weight_decay = 1e-3) # add wight decay
optimizer =  Lookahead(base_optim, k=5, alpha=0.5)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=10,  gamma=0.1)
# = torch.optim.lr_scheduler.OneCycleLR()


In [None]:
# training cell final
BATCH_SIZE=2
N_EPOCH = 30

IoU=0
for epoch in range(N_EPOCH):

  # use helper function to train model
  train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=61)

  # update the learning rate 
  scheduler.step()

  # evaluate on the validation set
  model.eval()
  curr_IoU = 0
  with torch.no_grad():
    for images, targets in val_dataloader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        torch.cuda.synchronize()

        # get predictions
        pred = model(images)

        # post-processing to get masks with scores>0.5
        pred_masks = (pred[0]['masks']>0.5).squeeze().detach().cpu().numpy()
        pred_score = list(pred[0]['scores'].detach().cpu().numpy())
        pred_t = [pred_score.index(x) for x in pred_score if x>0.5]
        if len(pred_t) == 0:
          continue
        pred_t = pred_t[-1]

        pred_masks = pred_masks[:pred_t+1]
        target_masks = targets[0]['masks'].detach().cpu().numpy()

        # calculate IoU for the current batch
        batch_IoU = calc_iou(pred_masks, target_masks)
        curr_IoU+=batch_IoU

  print("Epoch: {} Validation IoU: {}"
  .format(epoch, curr_IoU/len(val_dataset)))
  if curr_IoU>IoU:
      best_model_wts_1 = copy.deepcopy(model.state_dict())
      IoU = curr_IoU
      print('Model saved in {} epoch'.format(epoch))

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


Epoch: [0]  [ 0/60]  eta: 0:02:08  lr: 0.000002  loss: 1.0401 (1.0401)  loss_classifier: 0.0261 (0.0261)  loss_box_reg: 0.0255 (0.0255)  loss_mask: 0.2528 (0.2528)  loss_objectness: 0.0533 (0.0533)  loss_rpn_box_reg: 0.6825 (0.6825)  time: 2.1398  data: 0.0510  max mem: 6126
Epoch: [0]  [59/60]  eta: 0:00:01  lr: 0.000100  loss: 0.8351 (0.9178)  loss_classifier: 0.0134 (0.0167)  loss_box_reg: 0.0121 (0.0141)  loss_mask: 0.1684 (0.1805)  loss_objectness: 0.0595 (0.0727)  loss_rpn_box_reg: 0.5398 (0.6338)  time: 1.9102  data: 0.0754  max mem: 8395
Epoch: [0] Total time: 0:01:54 (1.9033 s / it)
Epoch: 0 Validation IoU: 0.6254253516285333
Model saved in 0 epoch
Epoch: [1]  [ 0/60]  eta: 0:02:01  lr: 0.000100  loss: 0.9784 (0.9784)  loss_classifier: 0.0300 (0.0300)  loss_box_reg: 0.0238 (0.0238)  loss_mask: 0.1859 (0.1859)  loss_objectness: 0.1041 (0.1041)  loss_rpn_box_reg: 0.6346 (0.6346)  time: 2.0210  data: 0.1589  max mem: 8395
Epoch: [1]  [59/60]  eta: 0:00:01  lr: 0.000100  loss: 0.7

Highest IoU obtained is `0.65`. Save the model weights as shown below

In [None]:
checkpoint = {'model': model.state_dict()}
torch.save(checkpoint, 'maskrcnn_resnet_50.pt')