# Faster RCNN Implementation on VinBIGData X-Ray Images using PyTorch Lightning Module
Make sure to turn on GPU and internet in the right side bar on Kaggle

In [1]:
import pandas as pd
import numpy as np
import cv2
import os

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision
from torchvision.ops import box_iou, MultiScaleRoIAlign
from torchvision import models

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
from pytorch_lightning.metrics import AveragePrecision

import gc

SEED=2484
DEVICE=torch.device('cuda')
pl.utilities.seed.seed_everything(SEED)

In [2]:
print(DEVICE)

## Generate list of image filenames for train, valid and test sets
Lists are generated from the train directory containing png files.

In [None]:
filepaths = []

for dirname, _, filenames in os.walk('../input/vinbigdata-chest-xray-original-png/train'):
    for filename in filenames:
        filepaths.append(filename)

Assign 80% of the data to train set, 10% to validation set and 10% to test set.

In [4]:
trainNum = int(0.8 * len(filenames))
validNum = int(0.1 * len(filenames))
testNum = int(0.1 * len(filenames))

In [None]:
trainImages = filepaths[:trainNum]

trainFiles = []

for image in trainImages:
    newImage = image.split('.')[0]
    trainFiles.append(newImage)

## Read in given train data
We are using this as our entire train, validation and test datasets.
Specify datatypes to save memory when reading the dataset.

In [9]:
df_dtypes = {
    'image_id': 'string',
    'class_name': 'string',
    'class_id': 'int64',
    'rad_id': 'string',
    'x_min': 'float64',
    'y_min': 'float64',
    'x_max': 'float64',
    'y_max': 'float64'
}

all_df = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/train.csv', dtype = df_dtypes)
train_df = all_df[all_df['image_id'].isin(trainFiles)].reset_index(drop = True)
train_df

In [None]:
validImages = filepaths[trainNum : trainNum + validNum]

validFiles = []

for image in validImages:
    newImage = image.split('.')[0]
    validFiles.append(newImage)

Create subset of train dataset for validation using list of validation image filenames.

In [13]:
valid_df = all_df[all_df['image_id'].isin(validFiles)].reset_index(drop = True)
valid_df

In [None]:
testImages = filepaths[trainNum + validNum :]

testFiles = []

for image in testImages:
    testImage = image.split('.')[0]
    testFiles.append(testImage)

Create subset of train dataset for testing using list of test image filenames.

In [17]:
test_df = all_df[all_df['image_id'].isin(testFiles)].reset_index(drop = True)
test_df

Set directory to read images from.

In [18]:
train_dir = '../input/vinbigdata-chest-xray-original-png/train'

# Modelling
Code is adapted from: https://www.kaggle.com/code/sanjayarvind/fasterrcnn-pytorch-lightning/notebook

## Image preprocessing

In [19]:
class imageProcessing(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None, phase='train'):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
        self.phase = phase

    def __getitem__(self, idx):
        # select one image id from relevant dataframe
        image_id = self.image_ids[idx]
        # select all rows for that image from relevant dataframe
        records = self.df[self.df['image_id'] == image_id]
        
        # read in images, convert to tensor datatype and normalise
        image = cv2.imread(f'{self.image_dir}/{image_id}.png', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) /= 255.0
        
        # run for validation and test sets
        if self.phase == 'test':
            if self.transforms:
                sample = {
                    'image': image,
                }
                
                # transform image to tensor to input to PyTorch model
                sample = self.transforms(**sample)
                image = sample['image']
            
            # delete variables to save memory
            del records, sample
            gc.collect()
            
            return image, image_id
        
        # select box coordinates from dataframe rows for this image
        boxes = records[['x_min', 'y_min', 'x_max', 'y_max']].values
        
        # calculate area of bounding box
        height = boxes[:, 3] - boxes[:, 1]
        width = boxes[:, 2] - boxes[:, 0]
        area = height * width
        # convert to area to tensor
        area = torch.as_tensor(area, dtype=torch.float32)
        
        # shift all class_ids up by 1
        labels = torch.squeeze(torch.as_tensor((records.class_id.values + 1), dtype=torch.int64))
        # make tensor of zeros to fill in coordinates for bounding box where there is no finding in image
        background = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['area'] = area
        target['image_id'] = torch.tensor([idx])
        target['background'] = background
        
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            # apply transform function to convert data to tensor
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.as_tensor(sample['bboxes'])
            
        # delete unnecessary variables to save memory    
        del image_id, records, boxes, area, labels, background, sample
        gc.collect()

        return image, target

    # compute number of records for this image
    def __len__(self):
        return self.image_ids.shape[0]

## Transform input images to tensors

In [68]:
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ])

### Create file to log train losses

In [70]:
!touch loss_train.log

Create batch of images for each model step

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

## Create LightningModule class to run model

In [71]:
class VinDetector(pl.LightningModule):
    def __init__(self, lr, **kwargs):
        super().__init__()
        
        # download pretrained Faster R-CNN model with ResNet50 CNN architecture
        self.model = models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
        # 14 abnormalities + background class
        num_classes = 15
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        # train head of model while freezing learned layers
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
        
        # hyperparamters -- can change these
        self.learning_rate = lr
        self.batch_size = 4

    # run during inference
    def forward(self, x):
        return self.model(x)
    
    # get dataframe and directory for image processing
    def prepare_data(self):
        df = train_df
        # drop all rows where there is no finding to avoid model attempting to detect these
        df = df[df['class_id'] != 14].reset_index(drop=True)
        # process dataframe and images for inputting to model
        self.train_dataset = imageProcessing(df, '../input/vinbigdata-chest-xray-original-png/train', get_train_transform())
    
    # returns iterable form of train dataset
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, pin_memory=True, num_workers=4, collate_fn=collate_fn)
    
    # calculate loss for each batch of train images
    def training_step(self, batch, batch_idx):
        images, targets = batch
        targets = [{k: v for k, v in t.items()} for t in targets]
        loss_dict = self.model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        
        # display progress on progress bar for each batch
        self.log('Loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        
        # log train loss for each batch in file
        file = open('./loss_normalised.log', "a")
        file.write(f'{batch_idx}:{loss}\n')
        file.close()
        
        # delete unnecessary variables to save memory
        del images, targets, loss_dict
        gc.collect()
        
        return {"loss": loss}
    
    # choose optimisers for stochastic gradient descent and learning rate
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.95, weight_decay=1e-5, nesterov=True)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=6, eta_min=0, verbose=True)
        return [optimizer], [scheduler]

## Train Model

In [72]:
learning_rate = 0.001
# create model
first_model = VinDetector(learning_rate)

### Train model

In [73]:
trainers = pl.Trainer(max_epochs=1, gpus=1, progress_bar_refresh_rate=100)
trainers.fit(first_model)

### Check log contents
Batch index:batch loss

In [75]:
!head loss_train.log

## Testing model
Process dataframe and images, then get iterable dataloader

In [78]:
test_dataset = imageProcessing(test_df, '../input/vinbigdata-chest-xray-original-png/train', get_valid_transform(), phase='test')
test_data_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4, collate_fn=collate_fn)

### Run model for inference

In [None]:
detection_threshold = 0.5
results = []

first_model.model.to(DEVICE)
first_model.model.eval()

In [79]:
with torch.no_grad():
    for images, image_ids in test_data_loader:
        # send each image batch to GPU
        images = list(image.to(DEVICE) for image in images)
        # save dict of predictions from this batch of images
        outputs = first_model.model(images)
        
        # loop through each image in batch
        for i, image in enumerate(images):
            # set flag for new prediction
            a = True
            
            image_id = image_ids[i]
            # create dict for prediction with 'no finding' as default
            result = {
                'image_id': image_id,
                'class_id': 14,
                'scores': 1.0,
                'x_min': 0,
                'y_min': 0,
                'x_max': 1,
                'y_max': 1
            }
            
            # get predicted bounding box coordinates, classes and classifcation probabilities
            boxes = outputs[i]['boxes'].data.cpu().numpy()
            labels = outputs[i]['labels'].data.cpu().numpy()
            scores = outputs[i]['scores'].data.cpu().numpy()
            
            # at least one object was detected
            if len(boxes) > 0:
                # shift all class_ids down by 1
                labels = labels - 1
                # map all '-1's to 14, meaning 'no finding'
                labels[labels == -1] = 14
                
                # only accept detections with classification probability >= 0.5
                selected = scores >= detection_threshold
                boxes = boxes[selected].astype(np.int32)
                scores = scores[selected]
                labels = labels[selected]
                
                # check there is still at least one detected object
                if len(boxes) > 0:
                    # switch flag to add to results
                    a = False
                    
                    # loop through each detection
                    for i in zip(labels, boxes, scores):
                        # create dict for each prediction
                        result = {
                            'image_id': image_id,
                            'class_id': i[0],
                            'scores': i[2],
                            'x_min': i[1][0],
                            'y_min': i[1][1],
                            'x_max': i[1][2],
                            'y_max': i[1][3]
                        }
                        results.append(result)
                    
            # for no new detection
            if a is True:
                results.append(result)

Convert list of dictionaries to dataframe

In [None]:
results_df = pd.DataFrame(results)

### Save detections to csv file
This dataframe is used for drawing detected bounding boxes over images in the next notebook

In [None]:
results_df.to_csv('./detections.csv', index=False)

### Formatting dataframes
Map class_ids to class names

In [38]:
classes = {     
    0: 'Aortic enlargement',
    1: 'Atelectasis',
    2: 'Cardiomegaly',
    3: 'Calcification',
    4: 'Consolidation',
    5: 'ILD',
    6: 'Infiltration',
    7: 'Lung Opacity',
    8: 'Nodule/Mass',
    9: 'Other lesion',
    10: 'Pleural effusion',
    11: 'Pleural thickening',
    12: 'Pneumothorax',
    13: 'Pulmonary fibrosis',
    14: 'No finding'
}

In [39]:
results_df['class_name'] = results_df['class_id'].map(classes)

In [40]:
pred_df = results_df.copy()
pred_df

### Read in meta data about all images

In [41]:
all_meta = pd.read_csv('../input/vinbigdata-chest-xray-original-png/train_meta.csv')
all_meta

Join image dimensions from meta dataframe to detections

In [42]:
det_df = pd.merge(pred_df, all_meta, on = 'image_id')
det_df

Normalise the bounding box coordinates

In [43]:
det_df['x_min_norm'] = det_df['x_min'] / det_df['dim0']
det_df['x_max_norm'] = det_df['x_max'] / det_df['dim0']
det_df['y_min_norm'] = det_df['y_min'] / det_df['dim1']
det_df['y_max_norm'] = det_df['y_max'] / det_df['dim1']
det_df

In [44]:
ann_df = test_df.copy()
ann_df

In [45]:
gt_df = pd.merge(ann_df, all_meta, on = 'image_id')
gt_df

In [46]:
gt_df['x_min_norm'] = gt_df['x_min'] / gt_df['dim0']
gt_df['x_max_norm'] = gt_df['x_max'] / gt_df['dim0']
gt_df['y_min_norm'] = gt_df['y_min'] / gt_df['dim1']
gt_df['y_max_norm'] = gt_df['y_max'] / gt_df['dim1']
gt_df

## Calculate mean Average Precision and Average Precision
Reformat actual and detected values

In [48]:
ann = gt_df[['image_id', 'class_name', 'x_min_norm', 'x_max_norm', 'y_min_norm', 'y_max_norm']].values
det = det_df[['image_id', 'class_name', 'scores', 'x_min_norm', 'x_max_norm', 'y_min_norm', 'y_max_norm']].values

In [94]:
!pip install map_boxes

In [95]:
from map_boxes import mean_average_precision_for_boxes

Classes, average precisions, actual objects

In [139]:
mean_ap, average_precisions = mean_average_precision_for_boxes(ann, det)

# Class distributions

In [175]:
import matplotlib.pyplot as plt
import seaborn as sns

### Predicted class distributions

In [195]:
plt.figure(figsize=(6, 6))
sns.countplot(y = 'class_name', data = det_df, )
plt.show()

### Actual class distribution

In [197]:
plt.figure(figsize=(6, 6))
sns.countplot(y = 'class_name', data = gt_df)
plt.show()