<a href="https://colab.research.google.com/github/bhanup6663/chest_x_ray_reporting/blob/main/fasterCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
# !unzip /content/drive/MyDrive/resized_images.zip -d /content/

In [3]:
# !pip install bbox_visualizer
# !pip install torchvision
# !pip install pydicom

In [4]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from torch.optim.lr_scheduler import ReduceLROnPlateau

import cv2
from skimage import io, exposure
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import bbox_visualizer as bbv

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from glob import glob
from skimage import exposure

import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torchvision.ops import box_iou

import shutil

import warnings

warnings.filterwarnings('ignore')

In [5]:
dataset = pd.read_csv(os.path.join("train1.csv"))
dataset.head()

Unnamed: 0,image_id,class_name,class_id,rad_id,x_min,y_min,x_max,y_max,width,height
0,50a418190bc3fb1ef1633bf9678929b3,No finding,14,R11,,,,,2332.0,2580.0
1,21a10246a5ec7af151081d0cd6d65dc9,No finding,14,R7,,,,,2954.0,3159.0
2,9a5094b2563a1ef3ff50dc5c7ff71345,Cardiomegaly,3,R10,0.332212,0.588613,0.794712,0.783818,2080.0,2336.0
3,051132a778e61a86eb147c7c6f564dfe,Aortic enlargement,0,R10,0.548611,0.257986,0.699219,0.353819,2304.0,2880.0
4,063319de25ce7edb9b1c6b8881290140,No finding,14,R10,,,,,2540.0,3072.0


In [6]:
dataset_new = dataset[dataset.class_name!='No finding'].reset_index(drop=True)

In [7]:
class_brands = {
    0: 'Aortic enlargement',
    1: 'Atelectasis',
    2: 'Calcification',
    3: 'Cardiomegaly',
    4: 'Consolidation',
    5: 'ILD',
    6: 'Infiltration',
    7: 'Lung Opacity',
    8: 'Nodule/Mass',
    9: 'Other lesion',
    10: 'Pleural effusion',
    11: 'Pleural thickening',
    12: 'Pneumothorax',
    13: 'Pulmonary fibrosis'
}

In [8]:
num_classes = 15

In [9]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [10]:
def set_device():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    return device

In [11]:
device=set_device()

In [12]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]

In [13]:
def get_train_transform():
    return A.Compose([ToTensorV2(),], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([ToTensorV2(),], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [14]:
class LungsAnnotationDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]

        image = io.imread(f'{self.image_dir}/{image_id}.png')

        # Normalize the image
        image = image / 255.0  # pixel values are in the range [0, 255]
        image = exposure.equalize_hist(image)
        image = image.astype('float32')

        # If the image has 3 channels already (like RGB), no need to stack, else ensure 3 channels
        if image.ndim == 2:  # If the image is grayscale, convert to 3 channels
            image = np.stack([image, image, image], axis=-1)

        # Ensure the image is in the correct (C, H, W) format
        if image.shape[2] == 3:  # Check if image is in (H, W, C)
            image = image.transpose(2, 0, 1)  # Convert from (H, W, C) to (C, H, W)

        # Get bounding boxes and other details
        boxes = records[['x_min', 'y_min', 'x_max', 'y_max']].values

        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)

        labels = records.class_id.values + 1
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)

        target = {
            'boxes': torch.tensor(boxes, dtype=torch.float32),
            'labels': torch.tensor(labels, dtype=torch.int64),
            'area': area,
            'iscrowd': iscrowd
        }

        # Apply transformations if available (pass normalized boxes)
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            target['boxes'] = torch.tensor(sample['bboxes'], dtype=torch.float32)

        # Denormalize boxes AFTER transformations (if you need pixel coordinates)
        target['boxes'][:, [0, 2]] = target['boxes'][:, [0, 2]] * 512
        target['boxes'][:, [1, 3]] = target['boxes'][:, [1, 3]] * 512

        return image, target


    def __len__(self):
        return len(self.image_ids)

In [15]:
def collate_fn(batch):
    images, targets = zip(*batch)
    images = [image.permute(1, 2, 0) if image.shape[0] != 3 else image for image in images]
    return torch.stack(images), targets


In [16]:
lr_scheduler = None

num_epochs = 50

In [17]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [18]:
def calculate_iou(pred_boxes, gt_boxes):
    """
    Calculate IoU (Intersection over Union) between predicted and ground truth boxes.

    Args:
        pred_boxes (Tensor): Predicted bounding boxes, shape [num_pred_boxes, 4].
        gt_boxes (Tensor): Ground truth bounding boxes, shape [num_gt_boxes, 4].

    Returns:
        Tensor: IoU scores, shape [num_pred_boxes, num_gt_boxes].
    """
    return box_iou(pred_boxes, gt_boxes)

In [19]:
def train_model(train_dataset, val_dataset, fold):
    # Initialize data loaders
    train_data_loader = DataLoader(
        train_dataset,
        batch_size=10,
        shuffle=True,
        num_workers=4,
        collate_fn=collate_fn,
        prefetch_factor=4
    )

    val_data_loader = DataLoader(
        val_dataset,
        batch_size=3,
        shuffle=False,
        num_workers=4,
        collate_fn=collate_fn,
        prefetch_factor=4
    )

    # Initialize Averager instances for loss tracking
    loss_hist = Averager()
    val_loss_hist = Averager()

    # Initialize optimizer and scheduler
    optimizer = torch.optim.Adam(params, lr=0.0005, weight_decay=0.0005)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=10, factor=0.5, verbose=True)

    for epoch in range(num_epochs):
        loss_hist.reset()  # Reset Averager instance for each epoch
        model.train()  # Set model to training mode

        for itr, (images, targets) in enumerate(train_data_loader, 1):
            optimizer.zero_grad()
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            if isinstance(loss_dict, dict):
                # Extract individual losses
                loss_classifier = loss_dict['loss_classifier']
                loss_box_reg = loss_dict['loss_box_reg']
                loss_objectness = loss_dict['loss_objectness']
                loss_rpn_box_reg = loss_dict['loss_rpn_box_reg']

                # Compute total loss
                losses = (loss_objectness +
                          10 * loss_classifier +
                          10 * loss_rpn_box_reg +
                          0.5 * loss_box_reg ** 2)

                # Check for NaNs in the losses
                if torch.isnan(losses).any():
                    print(f"NaN detected in losses: {losses}")
                    continue

                loss_value = losses.item()
                loss_hist.send(loss_value)

                losses.backward()
                optimizer.step()

                if itr % 100 == 0:
                    print(f"Fold #{fold} Epoch #{epoch+1} Iteration #{itr}/{len(train_data_loader)} loss: {loss_hist.value:.4f}")


        iou_hist = Averager()
        # Validation phase
        model.eval()
        with torch.no_grad():
            for images, targets in val_data_loader:
                images = [image.to(device) for image in images]
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

                # Get model predictions (boxes and labels)
                outputs = model(images)

                for output, target in zip(outputs, targets):
                    pred_boxes = output['boxes']
                    gt_boxes = target['boxes']

                    # Calculate IoU
                    iou = calculate_iou(pred_boxes, gt_boxes)

                    # Append average IoU for each image
                    iou_hist.send(iou.mean().item())

            avg_iou = iou_hist.value
            print(f"Fold #{fold} Epoch #{epoch+1} Validation IoU: {avg_iou:.4f}")

        # Update learning rate based on average IoU
        scheduler.step(avg_iou)

        # Save model checkpoint every 10 epochs
        if (epoch + 1) % 10 == 0:
            checkpoint_path = f"x_ray_models/model_fasterRCNN_fold{fold}_epoch{epoch+1}.pth"
            print(f"Saving model checkpoint at epoch {epoch+1}...")
            torch.save(model.state_dict(), checkpoint_path)
            shutil.copy(checkpoint_path, f'/content/drive/MyDrive/x_ray_models/{checkpoint_path}')


    # Save final model state after the last epoch
    final_model_path = f"x_ray_models/model_fasterRCNN_fold{fold}_final.pth"
    print(f"Saving final model state for fold #{fold}...")
    torch.save(model.state_dict(), final_model_path)
    shutil.copy(final_model_path, f'/content/drive/MyDrive/x_ray_models/{final_model_path}')

In [20]:
DIR_TRAIN = os.path.join( "resized_images")

train_df, valid_df = train_test_split(dataset_new, test_size=0.20, random_state=42)

train_dataset = LungsAnnotationDataset(train_df, DIR_TRAIN,get_train_transform())
valid_dataset = LungsAnnotationDataset(valid_df, DIR_TRAIN,get_valid_transform())

In [None]:
k = 1
df = dataset_new.sample(frac=1).reset_index(drop=True)
y = dataset_new.class_id.values

# Use GroupKFold to split the dataset
kfold = model_selection.GroupKFold(n_splits=5)

# Loop over each fold and perform training and validation
for train_index, val_index in kfold.split(df, y, groups=df.image_id.values):
    train_dataset = LungsAnnotationDataset(df.loc[train_index], DIR_TRAIN, get_train_transform())
    val_dataset = LungsAnnotationDataset(df.loc[val_index], DIR_TRAIN, get_valid_transform())

    # Train and validate the model
    train_model(train_dataset, val_dataset, k)

    k += 1

Fold #1 Epoch #1 Iteration #100/352 loss: 4.4094
Fold #1 Epoch #1 Iteration #200/352 loss: 4.0404
Fold #1 Epoch #1 Iteration #300/352 loss: 3.9320
Fold #1 Epoch #1 Validation IoU: 0.0613
Fold #1 Epoch #2 Iteration #100/352 loss: 3.3825
Fold #1 Epoch #2 Iteration #200/352 loss: 3.4401
Fold #1 Epoch #2 Iteration #300/352 loss: 3.4508
