In [1]:
!pip install -q pycocotools
!git clone https://github.com/pytorch/vision.git
!git checkout v0.3.0

!cp vision/references/detection/utils.py ./
!cp vision/references/detection/transforms.py ./
!cp vision/references/detection/coco_eval.py ./
!cp vision/references/detection/engine.py ./
!cp vision/references/detection/coco_utils.py ./

Cloning into 'vision'...
remote: Enumerating objects: 585151, done.[K
remote: Counting objects: 100% (32937/32937), done.[K
remote: Compressing objects: 100% (1919/1919), done.[K
remote: Total 585151 (delta 30865), reused 32818 (delta 30784), pack-reused 552214 (from 1)[K
Receiving objects: 100% (585151/585151), 1.09 GiB | 43.31 MiB/s, done.
Resolving deltas: 100% (546956/546956), done.
fatal: not a git repository (or any parent up to mount point /kaggle)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).


In [2]:
# Basic python and ML Libraries
import os
import random
import numpy as np
import pandas as pd
# for ignoring warnings
import warnings
warnings.filterwarnings('ignore')

# We will be reading images using OpenCV
import cv2

# xml library for parsing xml files
from xml.etree import ElementTree as et

# matplotlib for visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# torchvision libraries
import torch
import torchvision
from torchvision import transforms as torchtrans  
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# these are the helper libraries imported.
from engine import train_one_epoch, evaluate
import utils
import transforms as T

# for image augmentations
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [3]:
# defining the files directory and testing directory
files_dir = '/kaggle/input/lung-ct-version-n-512/lung_ct_version_n_512.v2i.voc/train'
val_dir = '/kaggle/input/lung-ct-version-n-512/lung_ct_version_n_512.v2i.voc/valid'


class LungImagesDataset(torch.utils.data.Dataset):

    def __init__(self, files_dir, width, height, transforms=None):
        self.transforms = transforms
        self.files_dir = files_dir
        self.height = height
        self.width = width
        
        # sorting the images for consistency
        # To get images, the extension of the filename is checked to be jpg
        self.imgs = [image for image in sorted(os.listdir(files_dir))
                        if image[-4:]=='.jpg']
        
        
        # classes: 0 index is reserved for background
        self.classes = [_, 'nodule']

    def __getitem__(self, idx):

        img_name = self.imgs[idx]
        image_path = os.path.join(self.files_dir, img_name)

        # reading the images and converting them to correct size and color    
        img = cv2.imread(image_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img_res = cv2.resize(img_rgb, (self.width, self.height), cv2.INTER_AREA)
        # diving by 255
        img_res /= 255.0
        
        # annotation file
        annot_filename = img_name[:-4] + '.xml'
        annot_file_path = os.path.join(self.files_dir, annot_filename)
        
        boxes = []
        labels = []
        tree = et.parse(annot_file_path)
        root = tree.getroot()
        
        # cv2 image gives size as height x width
        wt = img.shape[1]
        ht = img.shape[0]
        
        # box coordinates for xml files are extracted and corrected for image size given
        for member in root.findall('object'):
            labels.append(self.classes.index(member.find('name').text))
            
            # bounding box
            xmin = int(member.find('bndbox').find('xmin').text)
            xmax = int(member.find('bndbox').find('xmax').text)
            
            ymin = int(member.find('bndbox').find('ymin').text)
            ymax = int(member.find('bndbox').find('ymax').text)
            
            
            xmin_corr = (xmin/wt)*self.width
            xmax_corr = (xmax/wt)*self.width
            ymin_corr = (ymin/ht)*self.height
            ymax_corr = (ymax/ht)*self.height
            
            boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr])
        
        # convert boxes into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # getting the areas of the boxes
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # suppose all instances are not crowd
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        
        # labels = torch.as_tensor(labels, dtype=torch.int64)
        labels = torch.ones((len(labels),), dtype=torch.int64)


        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        # image_id
        # image_id = torch.tensor([idx])
        target["image_id"] = idx


        if self.transforms:
            
            # sample = self.transforms(image = img_res,
            #                          bboxes = target['boxes'],
            #                          labels = labels)

            sample = self.transforms(image = img_res)
            
            img_res = sample['image']
            # target['boxes'] = torch.Tensor(sample['bboxes'])
            
            
            
        return img_res, target

    def __len__(self):
        return len(self.imgs)


# check dataset
dataset = LungImagesDataset(files_dir, 512, 512)
print('length of dataset = ', len(dataset), '\n')

# getting the image and target for a test index.  Feel free to change the index.
img, target = dataset[78]
print(img.shape, '\n',target)

length of dataset =  2417 

(512, 512, 3) 
 {'boxes': tensor([[395., 307., 407., 319.]]), 'labels': tensor([1]), 'area': tensor([144.]), 'iscrowd': tensor([0]), 'image_id': 78}


In [8]:
def get_object_detection_model(num_classes):

    # load a model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 

    return model

In [5]:
def get_transform(train):
    if train:
        return A.Compose([
                            A.HorizontalFlip(0.5),
                     # ToTensorV2 converts image to pytorch tensor without div by 255
                            ToTensorV2(p=1.0) 
                        ])
    else:
        return A.Compose([
                            ToTensorV2(p=1.0)
                        ])

In [6]:
# use our dataset and defined transformations
train_dataset = LungImagesDataset(files_dir, 512, 512, transforms= get_transform(train=True))
val_dataset = LungImagesDataset(val_dir, 512, 512, transforms= get_transform(train=False))
test_dataset = LungImagesDataset("/kaggle/input/lung-ct-version-n-512/lung_ct_version_n_512.v2i.voc/test", 512, 512, transforms= get_transform(train=False))

# define training and validation data loaders
train_data_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=8, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

val_data_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=8, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

test_data_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=8, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [9]:
# to train on gpu if selected.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2
num_epochs = 10

# get the model using our helper function
model = get_object_detection_model(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=5e-3, momentum=0.9, weight_decay=5e-4)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 195MB/s]  


In [10]:
# training for 50 epochs
for epoch in range(num_epochs):
    # training for one epoch
    train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=302)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, val_data_loader, device=device)

Epoch: [0]  [  0/303]  eta: 0:13:00  lr: 0.000022  loss: 1.3369 (1.3369)  loss_classifier: 0.8135 (0.8135)  loss_box_reg: 0.0080 (0.0080)  loss_objectness: 0.4989 (0.4989)  loss_rpn_box_reg: 0.0166 (0.0166)  time: 2.5760  data: 0.3119  max mem: 5791
Epoch: [0]  [302/303]  eta: 0:00:00  lr: 0.005000  loss: 0.1204 (0.1933)  loss_classifier: 0.0352 (0.0621)  loss_box_reg: 0.0279 (0.0262)  loss_objectness: 0.0487 (0.0965)  loss_rpn_box_reg: 0.0048 (0.0084)  time: 0.7735  data: 0.0211  max mem: 5950
Epoch: [0] Total time: 0:04:04 (0.8083 s / it)
creating index...
index created!
Test:  [ 0/15]  eta: 0:00:11  model_time: 0.3903 (0.3903)  evaluator_time: 0.0129 (0.0129)  time: 0.7444  data: 0.3299  max mem: 5950
Test:  [14/15]  eta: 0:00:00  model_time: 0.3647 (0.3571)  evaluator_time: 0.0085 (0.0092)  time: 0.4151  data: 0.0401  max mem: 5950
Test: Total time: 0:00:06 (0.4175 s / it)
Averaged stats: model_time: 0.3647 (0.3571)  evaluator_time: 0.0085 (0.0092)
Accumulating evaluation results..

In [11]:
evaluate(model, test_data_loader, device=device)

creating index...
index created!
Test:  [ 0/29]  eta: 0:00:21  model_time: 0.3811 (0.3811)  evaluator_time: 0.0071 (0.0071)  time: 0.7313  data: 0.3320  max mem: 5951
Test:  [28/29]  eta: 0:00:00  model_time: 0.3645 (0.3588)  evaluator_time: 0.0057 (0.0056)  time: 0.3891  data: 0.0194  max mem: 5951
Test: Total time: 0:00:11 (0.4039 s / it)
Averaged stats: model_time: 0.3645 (0.3588)  evaluator_time: 0.0057 (0.0056)
Accumulating evaluation results...
DONE (t=0.04s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.501
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.856
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.504
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.506
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.480
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU

<coco_eval.CocoEvaluator at 0x794278f41d80>

In [12]:
ckpt_file_name = f"/kaggle/working/frcnn_model_1.pth"
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
}, ckpt_file_name)