In [1]:
cd ../../msg

/home/angran/GIT/msg


In [2]:
import numpy as np
from torch import nn
from torch.utils.data import DataLoader, Subset
from torchvision import transforms
from xt_training import metrics
from xt_training.utils import DummyOptimizer, SKDataset, SKInterface, functional

from utils import transforms as xt_transforms

  '"sox" backend is being deprecated. '


In [19]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /home/angran/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [24]:
model.roi_heads.box_predictor

FastRCNNPredictor(
  (cls_score): Linear(in_features=1024, out_features=91, bias=True)
  (bbox_pred): Linear(in_features=1024, out_features=364, bias=True)
)

In [3]:
import os
import numpy as np
import torch
from PIL import Image


class PennFudanDataset(object):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)
        # convert the PIL Image into a numpy array
        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [4]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

### Transforms

In [5]:
def get_transform(train):
    trans = []
    trans.append(transforms.ToTensor())
    if train:
        trans.append(transforms.RandomHorizontalFlip(p=0.5))
    return transforms.Compose(trans)

### Dataset

In [6]:
# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = PennFudanDataset('/home/angran/Downloads/PennFudanPed', get_transform(train=True))
dataset_test = PennFudanDataset('/home/angran/Downloads/PennFudanPed', get_transform(train=False))

# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
test_dataset = torch.utils.data.Subset(dataset_test, indices[-50:])

### Dataloaders

In [7]:
# define training and validation data loaders
data_loader = DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4)

data_loader_test = DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4)

# get the model using our helper function
model = get_model_instance_segmentation(num_classes)

### Model & Optimizer

In [8]:
# get the model using our helper function
model = get_model_instance_segmentation(num_classes)

# move model to the right device
# model.to(device) # Already implemented in xt-training

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [9]:
dataset[0][0].size()

torch.Size([3, 406, 612])

In [15]:
from collections import Iterable
isinstance(y, Iterable)

True

In [17]:
for y_i in y:
    print(y_i)

boxes
labels
masks
image_id
area
iscrowd


### Model Training

In [18]:
save_dir = '/home/angran/Downloads/test/'

stats, matrix = functional.train(
    save_dir=save_dir,
    train_loader=data_loader,
    model=model,
    optimizer=optimizer,
    epochs=10,
    loss_fn=nn.CrossEntropyLoss(),
    overwrite=True,
    val_loader=data_loader_test,
#     test_loaders=test_loaders,
#     device='cuda:0',
    scheduler=lr_scheduler,
#     eval_metrics=eval_metrics,
#     on_exit=default_train_exit
)

Running on device: cuda:0


AttributeError: 'str' object has no attribute 'to'