In [None]:
# Thanks to: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
# Google colab implemntation: https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/torchvision_finetuning_instance_segmentation.ipynb#scrollTo=6-NFR--2fXV3


from torch.utils.data import Dataset
import torch
import torchvision.transforms as T
from torch.utils.data import DataLoader
import pandas as pd
from PIL import Image, ImageDraw

In [None]:
class RipCurrentDataset(Dataset):
    """ Rip current detector dataset. """

    def __init__(self, dframe, image_dir, transform=None):
        """

        :param dframe: Dataframe object of csv file "aug_data_label.csv" generated by fix_size_and_aug
        :param image_dir: path where all fixed size and augment images are saved
        :param transform: the transform to be operated converting PIL image to torch tensor
        """
        super().__init__()

        self.df = dframe
        self.images_ids = self.df['Name'].unique()
        data_dict = {}
        
        for d in dframe.to_dict(orient='records'):
            data_dict[d['Name']] = {k: v if k=='Name' else int(v) for k, v in d.items()}
        
        self.data_dict = data_dict
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return self.images_ids.shape[0]

    def __getitem__(self, item):
        """
        Parameters
        ----------
        item : int
            id number to get one image from the dataset

        Returns
        -------

        img_tensor: torch.tensor
            Image as torch tensor object ready to be inserted into the Deep Neural Network

        target: dictionary
            Python dictionary contains image id, bounding box location (x1, y1, x2, y2) and label 0 - no rip, 1 - rip

        """
        img_name = self.images_ids[item]
        img_data = self.data_dict[img_name]
        
        box = [img_data[key] for key in ['x1', 'y1', 'x2', 'y2']]
        boxes = [box]
        
        area = abs(box[0] - box[2])*abs(box[1]-box[3])
        areas = [area]
        
        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        areas = torch.as_tensor(areas, dtype=torch.float32)
        
        label = torch.tensor(img_data['label'], dtype=torch.int64)

        img = Image.open(self.image_dir + img_name).convert("RGB")
        img_tensor = self.transform(img) 
        
        target = {
            'image_id': torch.tensor([item]),
            'boxes': boxes, 
#            0 labels for pics without the rip current
            'labels': torch.tensor((img_data['label'],), dtype=torch.int64),
            'iscrowd': torch.tensor((0,), dtype=torch.uint8),   
            'area': areas
            
            
        }
        
        
        
        
#         x1, y1, x2, y2 = [torch.tensor(img_data[key], dtype=torch.int64) for key in ['x1', 'y1', 'x2', 'y2']]
        
        
        
        
#         img_data = self.df[self.df['Name'] == img_name]



#         x1, y1, x2, y2 = torch.tensor(img_data['x1'].values, dtype=torch.int64), torch.tensor(img_data['y1'].values, dtype=torch.int64), \
#                          torch.tensor(img_data['x2'].values, dtype=torch.int64), torch.tensor(img_data['y2'].values, dtype=torch.int64)
#         label = torch.tensor(img_data['label'].values, dtype=torch.int64)


#         target['image_id'] = torch.tensor(item)

#         if label == 1:
#             target['box'] = torch.cat((x1.unsqueeze(0), y1.unsqueeze(0), x2.unsqueeze(0), y2.unsqueeze(0)), dim=1)[0]
#         else:
#             target['box'] = torch.zeros((0, 4), dtype=torch.int64)

#         target['labels'] = label[0]

        return img_tensor, target


## 1 - Finetuning from a pretrained model

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor,FasterRCNN_ResNet50_FPN_Weights

# load a model pre-trained on COCO
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

#This line is different from the toturial because of a deprecation warning
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (rip current) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

## 2 - Modifying the model to add a different backbone

In [None]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(pretrained=True).features

# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280

# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))


# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.

# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)

## Putting everything together

In [None]:
# import transforms as T

# def get_transform(train):
# #     transforms = []
#     transforms.append(T.ToTensor())
#     if train:
#         transforms.append(T.RandomHorizontalFlip(0.5))
#     return T.Compose(transforms)

### Testing forward() method

In [None]:
from pathlib import Path
from torch.utils.data import DataLoader
from vision_utils import utils
import torchvision.transforms as T

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
# data_loader = torch.utils.data.DataLoader(
#  dataset, batch_size=2, shuffle=True, num_workers=4,
#  collate_fn=utils.collate_fn)

data_path = Path(r'..\Data')
df = pd.read_csv(data_path/'aug_data_labels.csv')
df = df[df['label'] == 1]

img_dir = str(data_path/'fixed_data') + '\\'
trans = T.ToTensor()

train_ds = RipCurrentDataset(df, img_dir, trans)
data_loader = DataLoader(train_ds, batch_size=16, shuffle=True, collate_fn=utils.collate_fn)





In [None]:

# For Training
images,targets = next(iter(data_loader))

In [None]:

images = list(image for image in images)

targets = [{k: v for k, v in t.items()} for t in targets]

output = model(images,targets)   # Returns losses and detections

# For inference
model.eval()
# x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]




In [None]:
x = [images[1], images[2]]
predictions= model(x)
predictions

In [None]:
%matplotlib notebook

In [None]:
import torchvision.transforms as T
from PIL import Image, ImageDraw
from matplotlib.pyplot import imshow
import  matplotlib.pyplot as plt


def draw_rect(image, box):
    """Present image with bounding box on top

    Parameters
    ----------
    image : numpy.ndarray
        numpy image

    box: numpy.ndarray
        Numpy array containing bounding boxes of shape `1 X 4` and the bounding boxes are represented in the
        format `x1 y1 x2 y2`

    Returns
    -------

    open new figure with image and bounding box

    """
    box = box[0]
    x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
    
    img1 = ImageDraw.Draw(image)
    shape = [(x1, y1), (x2, y2)]
    img1.rectangle(shape, outline="red", width=4)
    
    display(image)




idx = 1
img = T.ToPILImage()(x[idx].squeeze_(0))

# draw_rect(img, new_targets[0]['boxes'])
for box, label in zip(predictions[idx]['boxes'], predictions[idx]['labels']):
    if label == 1:
        draw_rect(img, [box])


##  Putting everything together

In [None]:
dataset = RipCurrentDataset(df, img_dir, trans)


# split the dataset in train and test set
torch.manual_seed(1)

indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])

dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=16, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)






##  instantiate the model and the optimizer

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
# model = get_instance_segmentation_model(num_classes)


# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [None]:
import math
import sys
import time

import torch
import torchvision.models.detection.mask_rcnn
sys.path.append("vision_utils")
import utils



def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
    header = f"Epoch: [{epoch}]"

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1.0 / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=warmup_factor, total_iters=warmup_iters
        )

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        with torch.cuda.amp.autocast(enabled=scaler is not None):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        if scaler is not None:
            scaler.scale(losses).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            losses.backward()
            optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return metric_logger

In [None]:
def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = "Test:"

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator

In [None]:

# let's train it for 10 epochs
from torch.optim.lr_scheduler import StepLR
from engine import train_one_epoch, evaluate
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

In [None]:
model.eval()
x = [images[1], images[2]]
predictions= model(x)
predictions

In [None]:
idx = 1
img = T.ToPILImage()(x[idx].squeeze_(0))

# draw_rect(img, new_targets[0]['boxes'])
for box, label, score in zip(predictions[idx]['boxes'], predictions[idx]['labels'], predictions[idx]['scores']):
    if label == 1 and score > 0.6:
        draw_rect(img, [box])