In [1]:
# Basic python and ML Libraries
import os
import random
import numpy as np
import pandas as pd
# for ignoring warnings
import warnings
warnings.filterwarnings('ignore')

# We will be reading images using OpenCV
import cv2

# xml library for parsing xml files
from xml.etree import ElementTree as et

# matplotlib for visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# torchvision libraries
import torch
import torchvision
from torchvision import transforms as torchtrans  
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# these are the helper libraries imported.
from engine import train_one_epoch, evaluate
import utils
import transforms as T

# for image augmentations
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [2]:
class IELDataset(torch.utils.data.Dataset):

    def __init__(self,images_dir, width, height, transforms=None):
        self.transforms = transforms
        self.images_dir = images_dir
        self.label_dir = images_dir.replace("images","labels")
        self.height = height
        self.width = width
        
        # sorting the images for consistency
        # To get images, the extension of the filename is checked to be jpg
        self.imgs = [image for image in sorted(os.listdir(images_dir))
                        if image[-4:]=='.jpg']
        
        for text_file in sorted(os.listdir(self.label_dir)):
            l = 0
            with open(os.path.join(self.label_dir,text_file),'r') as f:
                for x in f:
                    l += 1
                    
            if l == 0:
                self.imgs.remove(text_file.replace('.txt','.jpg'))
        
        # classes: 0 index is reserved for background
        self.classes = [_, 'Epithelial Nuclei','IEL']
        
    def __getitem__(self, idx):

        img_name = self.imgs[idx]
        image_path = os.path.join(self.images_dir, img_name)

        # reading the images and converting them to correct size and color    
        img = cv2.imread(image_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img_res = cv2.resize(img_rgb, (self.width, self.height), cv2.INTER_CUBIC)
        # diving by 255
        img_res /= 255.0
        
        boxes = []
        labels = []
        
        wt = img.shape[1]
        ht = img.shape[0]
        
        label_path = os.path.join(self.label_dir, img_name.replace('.jpg','.txt'))
        
        with open(label_path,'r') as f:
            for line in f:
                splits = line.split(' ')
                w = float(splits[3]) * wt
                h = float(splits[4]) * ht
                x1 = ((2 * float(splits[1]) * wt) - w)/2
                y1 = ((2 * float(splits[2]) * ht) - h)/2
                x2 = x1 + w
                y2 = y1 + h
                
                x1 = max(0,(x1/wt)*self.width)
                x2 = min(self.width-1,(x2/wt)*self.width)
                y1 = max(0,(y1/ht)*self.height)
                y2 = min(self.height-1,(y2/ht)*self.height)
                
                if x1 >= x2 or y1 >= y2:
                    continue
                
                boxes.append([x1,y1,x2,y2])
                labels.append(int(splits[0]) + 1)
                
        boxes = [box for box in boxes if len(box) == 4]
                
        # convert boxes into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # getting the areas of the boxes
        if boxes.shape[0] == 0:
            area = boxes
        else:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # suppose all instances are not crowd
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        # image_id
        image_id = torch.tensor([idx])
        target["image_id"] = image_id


        if self.transforms:
            
            sample = self.transforms(image = img_res,
                                     bboxes = target['boxes'],
                                     labels = labels)
            
            img_res = sample['image']
            target['boxes'] = torch.Tensor(sample['bboxes'])
            
        return img_res , target
    
    def __len__(self):
        return len(self.imgs)

In [3]:
# Send train=True fro training transforms and False for val/test transforms
def get_transform(train):
    
    if train:
        return A.Compose([
                            A.HorizontalFlip(0.5),
                     # ToTensorV2 converts image to pytorch tensor without div by 255
                            ToTensorV2(p=1.0) 
                        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
    else:
        return A.Compose([
                            ToTensorV2(p=1.0)
                        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [4]:
def get_object_detection_model(num_classes):

    # load a model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 

    return model

In [5]:
fold_dir = "/home/aayush/chirag/tensorflow/yolo_kfold/2/images/"

# use our dataset and defined transformations
dataset_train = IELDataset(os.path.join(fold_dir,"train"), 640, 640, transforms= get_transform(train=True))
dataset_val = IELDataset(os.path.join(fold_dir,"val"), 640, 640, transforms= get_transform(train=False))
dataset_test = IELDataset(os.path.join(fold_dir,"test"), 640, 640, transforms= get_transform(train=False))

# define training and validation data loaders
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, batch_size=4, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_val = torch.utils.data.DataLoader(
    dataset_val, batch_size=4, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=4, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [6]:
# to train on gpu if selected.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 3

# get the model using our helper function
model = get_object_detection_model(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [7]:
# training for 10 epochs
num_epochs = 1

for epoch in range(num_epochs):
    # training for one epoch
    train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=10,postreg=True)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_val, device=device)

#torch.save(model, './weights/faster-rcnn-iel.pt')

Epoch: [0]  [ 0/49]  eta: 0:00:48  lr: 0.000109  loss: 5.8621 (5.8621)  loss_classifier: 0.8870 (0.8870)  loss_box_reg: 0.2426 (0.2426)  loss_objectness: 4.5299 (4.5299)  loss_rpn_box_reg: 0.2025 (0.2025)  time: 0.9963  data: 0.2757  max mem: 4622
Epoch: [0]  [10/49]  eta: 0:00:28  lr: 0.001150  loss: 10.4576 (33.5209)  loss_classifier: 0.6585 (0.6988)  loss_box_reg: 0.6426 (0.5348)  loss_objectness: 0.4305 (1.7168)  loss_rpn_box_reg: 0.1331 (0.1489)  posterior_loss: 14.9156 (41.8298)  time: 0.7188  data: 0.0325  max mem: 6005
Epoch: [0]  [20/49]  eta: 0:00:20  lr: 0.002190  loss: 21.0020 (7208657706.3437)  loss_classifier: 0.5561 (0.6209)  loss_box_reg: 0.7171 (0.6428)  loss_objectness: 0.2500 (0.9866)  loss_rpn_box_reg: 0.1248 (0.1396)  posterior_loss: 25.5840 (8410100601.0771)  time: 0.6892  data: 0.0088  max mem: 6017
Epoch: [0]  [30/49]  eta: 0:00:13  lr: 0.003231  loss: 27.5865 (9498823425.0453)  loss_classifier: 0.5228 (0.5799)  loss_box_reg: 0.7548 (0.6742)  loss_objectness: 0.