In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from tempfile import TemporaryDirectory
import utils


plt.ion()   # interactive mode

<contextlib.ExitStack at 0x241c28b8090>

In [2]:
from torchvision.models.detection import maskrcnn_resnet50_fpn_v2, MaskRCNN
from torchvision.models.detection import MaskRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator

In [3]:
from torchvision.models.detection import maskrcnn_resnet50_fpn_v2

In [4]:
from torchvision.io import read_image

In [5]:
import torch
import torch.utils.data
from torchvision import models, datasets, tv_tensors
from torchvision.transforms import v2
from torchvision.io import read_image
import engine
import coco_eval
import coco_utils
import utils
import transforms
from torchvision.utils import draw_bounding_boxes
torch.manual_seed(0)

<torch._C.Generator at 0x241ba4fc8b0>

In [6]:
torch.cuda.is_available()

True

In [7]:
device=torch.device('cuda')
print(f"Using {device} device")

Using cuda device


In [8]:
cudnn.enabled = True

In [9]:
cudnn.benchmark = True

In [10]:
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = maskrcnn_resnet50_fpn_v2(weights="DEFAULT")

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask,
        hidden_layer,
        num_classes
    )

    return model

In [11]:
transforms_ = v2.Compose(
    [
    v2.ToImage(),
    v2.RandomResizedCrop((640), antialias=True),
    v2.RandomPhotometricDistort(),
    #v2.RandomIoUCrop(),
    v2.RandomHorizontalFlip(),
    v2.SanitizeBoundingBoxes(),
    v2.ToDtype(torch.float32, scale=True),
    v2.ToPureTensor()
    ]
)

In [12]:
transforms_

Compose(
      ToImage()
      RandomResizedCrop(size=(640, 640), scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=InterpolationMode.BILINEAR, antialias=True)
      RandomPhotometricDistort(brightness=(0.875, 1.125), contrast=(0.5, 1.5), hue=(-0.05, 0.05), saturation=(0.5, 1.5), p=0.5)
      RandomHorizontalFlip(p=0.5)
      SanitizeBoundingBoxes(min_size=1.0, min_area=1.0, labels_getter=default)
      ToDtype(scale=True)
      ToPureTensor()
)

In [13]:
train="data/av_com_seg2/train"
train_ann="data/av_com_seg2/train/_annotations.coco.json"

In [14]:
val="data/av_com_seg2/valid"
val_ann="data/av_com_seg2/valid/_annotations.coco.json"

In [15]:
dataset = datasets.CocoDetection(train,train_ann, transforms=transforms_)
dataset = datasets.wrap_dataset_for_transforms_v2(dataset, target_keys=["segmentation","area","iscrowd","image_id","bbox",
                                                                        "category_id","boxes", "labels", "masks"])

loading annotations into memory...
Done (t=0.13s)
creating index...
index created!


In [16]:
val_ds = datasets.CocoDetection(val,val_ann, transforms=transforms_)
val_ds = datasets.wrap_dataset_for_transforms_v2(val_ds, target_keys=["segmentation","area","iscrowd","image_id","bbox",
                                                                        "category_id","boxes", "labels", "masks"])

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!


In [17]:
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=1,
    num_workers=4,
    shuffle=True,
    # We need a custom collation function here, since the object detection
    # models expect a sequence of images and target dictionaries. The default
    # collation function tries to torch.stack() the individual elements,
    # which fails in general for object detection, because the number of bounding
    # boxes varies between the images of the same batch.
    collate_fn= utils.collate_fn #lambda batch: tuple(zip(*batch)) 
)

In [18]:
val_data_loader = torch.utils.data.DataLoader(
    val_ds,
    batch_size=1,
    num_workers=4,
    shuffle=False,
    # We need a custom collation function here, since the object detection
    # models expect a sequence of images and target dictionaries. The default
    # collation function tries to torch.stack() the individual elements,
    # which fails in general for object detection, because the number of bounding
    # boxes varies between the images of the same batch.
    collate_fn= utils.collate_fn #tv_utils.collate_fn #lambda batch: tuple(zip(*batch))
)

In [19]:
num_classes=9

In [20]:
model = get_model_instance_segmentation(num_classes)
#model = models.get_model("maskrcnn_resnet50_fpn_v2", weights=MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1, weights_backbone="IMAGENET1K_V2")  #.train()

In [21]:
model = model.to(device)

In [22]:
criterion = nn.CrossEntropyLoss()
params = [p for p in model.parameters() if p.requires_grad]
# Observe that all parameters are being optimized
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Decay LR by a factor of 0.1 every 7 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [23]:
model

MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
         

In [24]:
CUDA_LAUNCH_BLOCKING=1

In [None]:
for epoch in range(1):
    # train for one epoch, printing every 10 iterations
    engine.train_one_epoch(model,optimizer, data_loader, device, epoch, print_freq=1)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    engine.evaluate(model, val_data_loader, device)

  with torch.cuda.amp.autocast(enabled=scaler is not None):


Epoch: [0]  [  0/384]  eta: 1 day, 3:08:48  lr: 0.000018  loss: 3.3780 (3.3780)  loss_classifier: 2.2325 (2.2325)  loss_box_reg: 0.2958 (0.2958)  loss_mask: 0.8194 (0.8194)  loss_objectness: 0.0175 (0.0175)  loss_rpn_box_reg: 0.0128 (0.0128)  time: 254.5011  data: 126.3586  max mem: 1595
Epoch: [0]  [  1/384]  eta: 14:13:55  lr: 0.000031  loss: 3.2207 (3.2993)  loss_classifier: 2.2325 (2.2376)  loss_box_reg: 0.1604 (0.2281)  loss_mask: 0.7861 (0.8028)  loss_objectness: 0.0175 (0.0223)  loss_rpn_box_reg: 0.0043 (0.0086)  time: 133.7740  data: 63.1793  max mem: 1758
Epoch: [0]  [  2/384]  eta: 10:27:12  lr: 0.000044  loss: 3.2213 (3.2733)  loss_classifier: 2.2426 (2.2414)  loss_box_reg: 0.1604 (0.1920)  loss_mask: 0.8194 (0.8141)  loss_objectness: 0.0175 (0.0196)  loss_rpn_box_reg: 0.0043 (0.0062)  time: 98.5136  data: 42.1245  max mem: 1758
Epoch: [0]  [  3/384]  eta: 8:10:36  lr: 0.000057  loss: 3.2207 (3.2253)  loss_classifier: 2.2325 (2.2295)  loss_box_reg: 0.1197 (0.1727)  loss_mask

In [None]:
torch.save(model,'models/av_seg.pt')