In [1]:
"""Laura Su (GitHub: LCS18)"""
%pylab inline

from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit

# from livelossplot import PlotLosses
# from pycm import *

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import torchvision.transforms as transforms

def set_seed(seed):
    """
    Use this to set ALL the random seeds to a fixed value and take out any randomness from cuda kernels
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.benchmark = False  ##uses the inbuilt cudnn auto-tuner to find the fastest convolution algorithms. -
    torch.backends.cudnn.enabled   = False

    return True


Populating the interactive namespace from numpy and matplotlib


In [2]:
device = 'cpu'
if torch.cuda.device_count() > 0 and torch.cuda.is_available():
    print("Cuda installed! Running on GPU!")
    device = 'cuda'
else:
    print("No GPU available!")

Cuda installed! Running on GPU!


In [3]:
print(torch.cuda.current_device())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

0
1
GeForce 930MX


# Writing a custom dataset

The dataset __getitem__ should return:

- image: a PIL Image of size (H, W)
- target: a dict containing the following fields
    - boxes (FloatTensor[N, 4]): the coordinates of the N bounding boxes in [x0, y0, x1, y1] format, ranging from 0 to W and 0 to H
    - labels (Int64Tensor[N]): the label for each bounding box
    - image_id (Int64Tensor[1]): an image identifier. It should be unique between all the images in the dataset, and is used during evaluation
    - area (Tensor[N]): The area of the bounding box. This is used during evaluation with the COCO metric, to separate the metric scores between small, medium and large boxes.
    - iscrowd (UInt8Tensor[N]): instances with iscrowd=True will be ignored during evaluation.
    - (optionally) masks (UInt8Tensor[N, H, W]): The segmentation masks for each one of the objects
    - (optionally) keypoints (FloatTensor[N, K, 3]): For each one of the N objects, it contains the K keypoints in [x, y, visibility] format, defining the object. visibility=0 means that the keypoint is not visible. Note that for data augmentation, the notion of flipping a keypoint is dependent on the data representation, and you should probably adapt references/detection/transforms.py for your new keypoint representation

In [4]:
import os
import numpy as np
import torch
from PIL import Image
import xml.etree.ElementTree as ET
from torch.utils.data import Dataset 

class CustomImageTensorDataset(Dataset):
    def __init__(self, root, transforms=None):
        """
        Args:
        #    data (Tensor): A tensor containing the data e.g. images
        #    targets (Tensor): A tensor containing all the labels
        #    transform (callable, optional): Optional transform to be applied
        #        on a sample.
        """
        self.root = root
        self.transforms = transforms
        
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "JPGImages"))))
        self.boxes = list(sorted(os.listdir(os.path.join(root, "Boxes"))))
        

    def __len__(self):
        return len(self.imgs)
    

    def __getitem__(self, idx):
        # load images and boxes
        img_path = os.path.join(self.root, "JPGImages", self.imgs[idx])
        box_path = os.path.join(self.root, "Boxes", self.boxes[idx])
        img = Image.open(img_path).convert("RGB")
        
        tree = ET.parse(box_path)
        treeroot = tree.getroot()
        boxes = []
        one_box = []
        for i in range(6, len(treeroot)):
            boxes.append([int(treeroot[i][4][j].text) for j in range(4)])    
        
        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # there is only one class
        labels = torch.ones((len(boxes),), dtype=torch.int64)
        
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((len(boxes),), dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)        
        
        return img, target

# Transformations + data augmentation

In [5]:
import sys
sys.path.insert(0,'vision/references/detection')

import transforms as T
import utils

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [6]:
dataset_train = CustomImageTensorDataset('custom_dataset', get_transform(train=True))
dataset_test = CustomImageTensorDataset('custom_dataset', get_transform(train=False))

# print(dataset_test[0])
# print(len(dataset_test))


print(dataset_test[0][0])
print(dataset_test[0][1])

tensor([[[0.1255, 0.1255, 0.1255,  ..., 0.8000, 0.8078, 0.8196],
         [0.1255, 0.1255, 0.1255,  ..., 0.8039, 0.8157, 0.8235],
         [0.1255, 0.1255, 0.1216,  ..., 0.8157, 0.8275, 0.8314],
         ...,
         [0.0902, 0.0863, 0.0863,  ..., 0.0980, 0.0980, 0.1765],
         [0.0902, 0.0902, 0.0863,  ..., 0.1098, 0.1020, 0.1765],
         [0.0941, 0.0902, 0.0863,  ..., 0.1176, 0.1020, 0.1569]],

        [[0.1294, 0.1294, 0.1294,  ..., 0.8627, 0.8784, 0.8902],
         [0.1294, 0.1294, 0.1294,  ..., 0.8667, 0.8863, 0.8941],
         [0.1294, 0.1294, 0.1255,  ..., 0.8784, 0.8980, 0.9020],
         ...,
         [0.0902, 0.0863, 0.0863,  ..., 0.0980, 0.0980, 0.1843],
         [0.0902, 0.0902, 0.0863,  ..., 0.1098, 0.1020, 0.1843],
         [0.0941, 0.0902, 0.0863,  ..., 0.1176, 0.1020, 0.1647]],

        [[0.1059, 0.1059, 0.1059,  ..., 0.9529, 0.9647, 0.9765],
         [0.1059, 0.1059, 0.1059,  ..., 0.9569, 0.9725, 0.9804],
         [0.1059, 0.1059, 0.1020,  ..., 0.9765, 0.9843, 0.

In [7]:
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset_train)).tolist()
dataset_train = torch.utils.data.Subset(dataset_train, indices[:-30])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-30:])

# define training and validation data loaders
data_loader_train = torch.utils.data.DataLoader(
        dataset_train, batch_size=2, shuffle=True, num_workers=0,
        collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1, shuffle=False, num_workers=0,
        collate_fn=utils.collate_fn)

In [8]:
print(dataset_test[5][0].shape)
# print(dataset_test[0][0])
print(len(dataset_test))

torch.Size([3, 360, 640])
30


# Finetuning from a pretrained model (Faster R-CNN)

In [9]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Doing the training and check the results

In [10]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T

## Hyperparameters

In [16]:
seed = 42
lr = 5e-3
momentum = 0.9
# batch_size = 1
# test_batch_size = 1000
n_epochs = 10

In [17]:
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

device = torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

In [18]:
# let's train it for 10 epochs
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

Epoch: [0]  [ 0/60]  eta: 0:59:13  lr: 0.000090  loss: 1.1860 (1.1860)  loss_classifier: 0.9940 (0.9940)  loss_box_reg: 0.0240 (0.0240)  loss_objectness: 0.1359 (0.1359)  loss_rpn_box_reg: 0.0321 (0.0321)  time: 59.2318  data: 0.0848  max mem: 0
Epoch: [0]  [10/60]  eta: 0:37:16  lr: 0.000936  loss: 0.5550 (0.7035)  loss_classifier: 0.4619 (0.5303)  loss_box_reg: 0.0870 (0.0916)  loss_objectness: 0.0468 (0.0602)  loss_rpn_box_reg: 0.0227 (0.0214)  time: 44.7211  data: 0.0701  max mem: 0
Epoch: [0]  [20/60]  eta: 0:28:35  lr: 0.001783  loss: 0.3060 (0.4996)  loss_classifier: 0.1907 (0.3504)  loss_box_reg: 0.0744 (0.0838)  loss_objectness: 0.0352 (0.0484)  loss_rpn_box_reg: 0.0131 (0.0170)  time: 42.0635  data: 0.0709  max mem: 0
Epoch: [0]  [30/60]  eta: 0:21:28  lr: 0.002629  loss: 0.2889 (0.4617)  loss_classifier: 0.1418 (0.2993)  loss_box_reg: 0.0840 (0.1065)  loss_objectness: 0.0292 (0.0405)  loss_rpn_box_reg: 0.0096 (0.0154)  time: 41.9861  data: 0.0746  max mem: 0
Epoch: [0]  [40/

Epoch: [5]  [10/60]  eta: 0:39:15  lr: 0.000500  loss: 0.0435 (0.0578)  loss_classifier: 0.0224 (0.0270)  loss_box_reg: 0.0158 (0.0231)  loss_objectness: 0.0009 (0.0012)  loss_rpn_box_reg: 0.0056 (0.0064)  time: 47.1014  data: 0.0463  max mem: 0
Epoch: [5]  [20/60]  eta: 0:30:59  lr: 0.000500  loss: 0.0436 (0.0670)  loss_classifier: 0.0226 (0.0313)  loss_box_reg: 0.0158 (0.0264)  loss_objectness: 0.0005 (0.0020)  loss_rpn_box_reg: 0.0057 (0.0072)  time: 46.5787  data: 0.0456  max mem: 0
Epoch: [5]  [30/60]  eta: 0:23:24  lr: 0.000500  loss: 0.0716 (0.0758)  loss_classifier: 0.0319 (0.0347)  loss_box_reg: 0.0246 (0.0313)  loss_objectness: 0.0005 (0.0017)  loss_rpn_box_reg: 0.0084 (0.0081)  time: 46.6390  data: 0.0448  max mem: 0
Epoch: [5]  [40/60]  eta: 0:15:30  lr: 0.000500  loss: 0.0699 (0.0744)  loss_classifier: 0.0365 (0.0351)  loss_box_reg: 0.0250 (0.0302)  loss_objectness: 0.0006 (0.0016)  loss_rpn_box_reg: 0.0063 (0.0075)  time: 46.5373  data: 0.0542  max mem: 0
Epoch: [5]  [50/

## Saving the model

In [19]:
model_save_name = 'faster-r-cnn-resnet50-fpn-finetuning.pt'
path = F"custom_dataset/{model_save_name}" 
torch.save(model.state_dict(), path)

# Check the predictions

In [10]:
#note: order of the images, for reference

directory = r"C:\Users\lsu543\OneDrive - Schlumberger\Documents\project_video_laura\custom_dataset\JPGImages"
list_files = []
for filename in list(sorted(os.listdir(directory))):
    list_files.append(filename)

print(list_files)

['fig0_13.30.00.jpg', 'fig10000_13.20.00.jpg', 'fig1000_13.30.00.jpg', 'fig10050_13.20.00.jpg', 'fig100_03.00.00.jpg', 'fig10100_13.20.00.jpg', 'fig10150_13.20.00.jpg', 'fig1016_13.30.00.jpg', 'fig10200_13.20.00.jpg', 'fig10250_13.20.00.jpg', 'fig10300_13.20.00.jpg', 'fig10350_13.20.00.jpg', 'fig10400_13.20.00.jpg', 'fig10450_13.20.00.jpg', 'fig10500_13.20.00.jpg', 'fig10550_13.20.00.jpg', 'fig10600_13.20.00.jpg', 'fig10650_13.20.00.jpg', 'fig10700_13.30.00.jpg', 'fig10850_13.20.00.jpg', 'fig10900_13.20.00.jpg', 'fig11000_03.00.00.jpg', 'fig1100_13.30.00.jpg', 'fig11050_13.20.00.jpg', 'fig11100_13.20.00.jpg', 'fig11150_13.20.00.jpg', 'fig11200_13.20.00.jpg', 'fig11250_13.20.00.jpg', 'fig11300_13.20.00.jpg', 'fig11350_13.20.00.jpg', 'fig11400_13.20.00.jpg', 'fig11450_13.20.00.jpg', 'fig11500_13.20.00.jpg', 'fig11550_13.20.00.jpg', 'fig11600_03.00.00.jpg', 'fig11600_13.20.00.jpg', 'fig11650_13.20.00.jpg', 'fig11700_13.20.00.jpg', 'fig11750_13.20.00.jpg', 'fig11800_13.20.00.jpg', 'fig1185

## Check the results (if using the model produced before)

In [78]:
# # pick one image from the test set
# img, _ = dataset_test[5]
# # put the model in evaluation mode
# model.eval()
# with torch.no_grad():
#     prediction = model([img.to(device)])

# print(prediction)

In [79]:
# # Check the true results (pre-labelled)
# img, _ = dataset_test[5]
# print(_)

## Loading a model on the current architecture

In [11]:
device = torch.device('cpu')

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model_save_name = 'faster-r-cnn-resnet50-fpn-finetuning-dividedby2sizeimage.pt'

# load a model pre-trained on COCO
model1 = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (person) + background
# get number of input features for the classifier
in_features = model1.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model1.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model1.load_state_dict(torch.load(F"custom_dataset/{model_save_name}"))
model1.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform()
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d()
          )
        )
  

## Checking the results (with loaded model)

In [81]:
# pick one image from the test set
img, _ = dataset_test[8]
# put the model in evaluation mode
model1.eval()
with torch.no_grad():
    prediction1 = model1([img.to(device)])

print(prediction1)

[{'boxes': tensor([[500.9510, 243.8863, 615.6051, 360.0000],
        [297.4997, 248.1392, 342.2975, 356.0720],
        [299.0325, 307.6533, 347.2774, 357.3348]]), 'labels': tensor([1, 1, 1]), 'scores': tensor([0.9957, 0.8322, 0.1336])}]


### Plotting the output results

In [82]:
import cv2

# Loading the image corresponding to the boxes (test set)
img2 = cv2.imread("custom_dataset/JPGImages/" + list_files[_["image_id"][0]])

# Creating rectangle
for i in range(len(prediction1[0]["boxes"])):
    cv2.rectangle(img2, (int(prediction1[0]["boxes"][i][0]), int(prediction1[0]["boxes"][i][1])), (int(prediction1[0]["boxes"][i][2]), int(prediction1[0]["boxes"][i][3])), (0, 0, 255), 3)

cv2.imshow('result', img2) 
  
# Allows us to see image 
# until closed forcefully 
cv2.waitKey(0) 
cv2.destroyAllWindows()

## Eliminating the low scores

In [83]:
# print(prediction1[0]["scores"])
# prediction1[0]["scores"] > 0.9
# print(prediction1[0]["boxes"][prediction1[0]["scores"] > 0.9])

picks = non_max_suppression_slow(prediction1[0]["boxes"][prediction1[0]["scores"] > 0.80].cpu(), 0.3)

print(picks)
print(_["image_id"])

print(list_files[_["image_id"][0]])

print(_["boxes"])

[0, 1]
tensor([24])
fig11100_13.20.00.jpg
tensor([[301., 238., 347., 360.],
        [505., 240., 613., 360.]])


## Non max suppression (eliminate the duplicates)

In [12]:
#non-max suppression, cf pyimageresearch

# import the necessary packages
import numpy as np
    
#  Felzenszwalb et al.
def non_max_suppression_slow(boxes, overlapThresh):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []
 
    # initialize the list of picked indexes
    pick = []
 
    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
 
    # compute the area of the bounding boxes and sort the bounding
    # boxes by the bottom-right y-coordinate of the bounding box
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)

    
    # keep looping while some indexes still remain in the indexes
    # list
    while len(idxs) > 0:
        # grab the last index in the indexes list, add the index
        # value to the list of picked indexes, then initialize
        # the suppression list (i.e. indexes that will be deleted)
        # using the last index
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        suppress = [last]


        # loop over all indexes in the indexes list
        for pos in range(0, last):
            # grab the current index
            j = idxs[pos]
 
            # find the largest (x, y) coordinates for the start of
            # the bounding box and the smallest (x, y) coordinates
            # for the end of the bounding box
            xx1 = max(x1[i], x1[j])
            yy1 = max(y1[i], y1[j])
            xx2 = min(x2[i], x2[j])
            yy2 = min(y2[i], y2[j])
 
            # compute the width and height of the bounding box
            w = max(0, xx2 - xx1 + 1)
            h = max(0, yy2 - yy1 + 1)
 
            # compute the ratio of overlap between the computed
            # bounding box and the bounding box in the area list
            overlap = float(w * h) / area[j]

            # if there is sufficient overlap, suppress the
            # current bounding box
            if overlap > overlapThresh:
                suppress.append(pos)
 
        # delete all indexes from the index list that are in the
        # suppression list
        idxs = np.delete(idxs, suppress)
     
    return [int(p) for p in pick]


## Labels, to compare with prediction

In [85]:
print(_["image_id"])

print(list_files[_["image_id"][0]])

print(_["boxes"])


tensor([24])
fig11100_13.20.00.jpg
tensor([[301., 238., 347., 360.],
        [505., 240., 613., 360.]])


## Drawing the results on the corresponding image

In [86]:
import cv2

bboxes = prediction1[0]["boxes"]
# print(bboxes)

print(picks)

remaining_bboxes = bboxes[picks]
print(remaining_bboxes)


# Loading the image corresponding to the boxes (test set)
img2 = cv2.imread("custom_dataset/JPGImages/" + list_files[_["image_id"][0]])

# Creating rectangle
for i in range(len(picks)):
    cv2.rectangle(img2, (int(remaining_bboxes[i][0]), int(remaining_bboxes[i][1])), (int(remaining_bboxes[i][2]), int(remaining_bboxes[i][3])), (0, 255, 0), 3)

cv2.imshow('result', img2) 
  
# Allows us to see image 
# until closed forcefully 
cv2.waitKey(0) 
cv2.destroyAllWindows()

[0, 1]
tensor([[500.9510, 243.8863, 615.6051, 360.0000],
        [297.4997, 248.1392, 342.2975, 356.0720]])
