MONAI - Medical Open Network for AI - is a library for for deep learning in healthcare imaging, originally started by NVIDIA & King’s College London: [https://monai.io](https://monai.io)


# Install required packages

Since it is not installed by default with kaggle jupyter Notebooks, we first need to install [monai](https://github.com/Project-MONAI/MONAI)

In [None]:
!pip3 install monai

# Import libraries

Load python libraries used later.

In [None]:
import pandas as pd
import os
import SimpleITK as sitk
import numpy as np
import torch
from monai import transforms
import random
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import numpy as np

# Define some utilities

## Method for metadata reading

First of all, we need to create a metadata table and join filepaths to train.csv

Therefore, we crawl train/test subdirectories for dicom files and extract their image IDs from filenames.

Then we join filenames to the metadata via "image_id".

Note that multiple annotation can occur in one image.

In [None]:
def _read_metadata():
    
    base_dir = '/kaggle/input/vinbigdata-chest-xray-abnormalities-detection'
    
    dataframes = {}
    
    for _type in ["train", "test"]:
        dcm_files = os.listdir(os.path.join(base_dir, _type))
        _df = pd.DataFrame(
            {
                "filepath": dcm_files
            }
        )
            
        _df["image_id"] = _df.filepath.str.split(".", 1).str[0]
        _df["filepath"] = [os.path.join(base_dir, _type, _f) for _f in _df["filepath"]]
        
        dataframes[_type] = _df
        
    train_df = dataframes["train"].merge(
        right=pd.read_csv(
            filepath_or_buffer=os.path.join(
                base_dir, "train.csv"
            )
        ),
        how="left",
        on="image_id",
        suffixes=("", "")
    )
    
    test_df = dataframes["test"]
    
    return train_df, test_df

In [None]:
train_info, test_info = _read_metadata()
train_info.head(20)

## Define dataset class  

For deep learning using pytorch, we first need to prepare a dataset.

Later, we want to train a Faster-RCNN model + resnet for object detection and classification: https://pytorch.org/docs/stable/torchvision/models.html#faster-r-cnn

Therefore, our dataset needs to return images, bounding boxes and labels.

Please note these caveats:
* Faster RCNN requires the background to be encoded as 0. Our background in the dataset is 14. We therefore replace 14 with -1 and shift classes by +1.

* Furthermore, we want to load an image only once. Therefore, from our metadata table, the unique combinations of filepath and image ID are extracted and stored inside `self.data_unique`.
  + For each image ID, we check if only class 14/background was annotated by the expert readers. If this is the case, we replace all annotations with one (!) label + box of dimensions 0, 0, 1, 1.
  + If there are other classes than 14, we only keep those and extract class labels and boxes from the metadata. 
  
Since the dicom images are very big, we further reduce them by `self.reducefct` in x- and y dimension.  Bounding box coordinates are reduced accordingly.

In [None]:
class XrayDataset(torch.utils.data.Dataset):

    def __init__(
        self,
        dataframe: pd.DataFrame,
        datatype: str
    ):
        
        # dataframe contains all data
        self.dataframe = dataframe
        self.datatype = datatype # "train" or "test"
        
        # data_unique contains only unique combinations of filepath and image_id
        self.data_unique = self.dataframe[["filepath", "image_id"]].drop_duplicates()
        
        # image size reduce factor
        self.reducefct = 5
    
        
    @staticmethod
    def load_image(img_path):
        img_sitk = sitk.ReadImage(img_path)
        np_img = sitk.GetArrayFromImage(img_sitk)
        # c, y, x
        return np_img
        
    # image transforms
    @staticmethod
    def img_trf(spatial_size):
        imgtf = transforms.Compose(
            [
                transforms.CastToType(),
                transforms.NormalizeIntensity(),
                transforms.ScaleIntensity(),
                transforms.Resize(spatial_size)
            ]
        )
        return imgtf
    
    def get_label_boxes(self, img_id):
        
        targets = self.dataframe[["class_id", "x_min", "y_min", "x_max", "y_max"]].loc[self.dataframe.image_id == img_id]
        
        if targets.class_id.max() == 14:
            
            # test if only background class (=14) available
            if len(targets[targets.class_id == 14]) == len(targets):
                # create new dataframe
                targets = pd.DataFrame(
                    {
                        "class_id": -1,
                        "x_min": 0,
                        "y_min": 0,
                        "x_max": 1,
                        "y_max": 1
                    },
                    index=[0]
                )
            else:
                # if other classes than background are available:
                # exclude all cases with 14
                targets = targets[targets.class_id != 14]
        
        # extract labels; now 0 = background class
        lbls = list(targets.class_id.astype(int) + 1)
        
        # extract boxes
        boxes = targets[["x_min", "y_min", "x_max", "y_max"]].to_numpy()
        
        return lbls, boxes
    
    def __len__(self):
        return len(self.data_unique)
        
    def __getitem__(self, idx):
        
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_path = self.data_unique.iloc[idx]["filepath"]
        img_id = self.data_unique.iloc[idx]["image_id"]
        
        img = self.load_image(img_path=img_path)
        
        # resize, normalize intensity, to tensor
        c, y, x = img.shape
        ynew = int(y/self.reducefct)
        xnew = int(x/self.reducefct)
        img = self.img_trf(
            spatial_size=(ynew, xnew)
        )(img) 
        
        out_dict = {}

        # https://pytorch.org/docs/stable/torchvision/models.html#faster-r-cnn
        if self.datatype == "train":

            # get labels and boxes
            labels, boxes = self.get_label_boxes(img_id=img_id)
            out_dict["labels"] = labels
            
            bb = np.array(boxes / self.reducefct, dtype="uint16")
            out_dict["boxes"] = bb
        
        
        out_dict["image"] = img
        out_dict["image_id"] = img_id
        
        return out_dict 

## Define method for plotting

In [None]:
def plot_xray(batch):
    
    blen = len(batch) if len(batch) < 20 else 20
    
    # Create figure and axes
    fig, axs = plt.subplots(4, int(blen / 4), figsize=(15,15))
    
    _b = 0
        
    for row in axs:
        for col in row:

            im=batch[_b]["image"]
            bx=batch[_b]["boxes"]
            lbl=batch[_b]["labels"] 

            # Display the image
            col.imshow(im[0, :, :], cmap = "gray")

            # Create a Rectangle patch
            for i in range(len(bx)):
                bbox = bx[i]
                rect = patches.Rectangle(
                    (bbox[0], bbox[1]), 
                    bbox[2] - bbox[0],
                    bbox[3] - bbox[1],
                    linewidth=1,
                    edgecolor='r',
                    facecolor='none'
                )

                col.text(
                    bbox[2] + 2,
                    bbox[1] - 2,
                    str(lbl[i])
                )

                # Add the patch to the Axes
                col.add_patch(rect)
            _b += 1

    plt.show()

# Finally, have a look at the data

In [None]:
train_ds = XrayDataset(train_info, "train")
batch = [train_ds[_i] for _i in range(20)]
batch[0]["image"].shape

In [None]:
plot_xray(batch)