# Setup

In [1]:
!pip install pydicom -q



In [2]:
import sys
sys.path.append('../input/d/kozodoi/timm-pytorch-image-models/pytorch-image-models-master/')

In [3]:
# General imports.
import random
import pydicom
import platform
import os

import numpy as np
import pandas as pd
import albumentations as A
import sklearn
import cv2
import timm
import torch
import torch.nn as nn

# Specific imports. 
from collections import Counter
from multiprocessing import Pool
from pydicom.pixel_data_handlers.util import apply_voi_lut

from tqdm.notebook import tqdm
from albumentations.pytorch import ToTensorV2
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD

import warnings
warnings.simplefilter('ignore')

# Utility Functions

In [4]:
def seed_everything(seed=123):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

# Converting to PNGs and Extracting Meta DataFrame

In [5]:
# For fast submissions.
sample_submission = pd.read_csv(r"../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")
fast_sub = True if sample_submission.shape[0] == 87 else False
fast_cnt = 3

In [6]:
# Data is here: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/data.

mode = "test"
meta_df_name = "test_meta"
png_image_path_root = "./images/"
comp_data_root = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/"
meta_df_root = "./"

os.makedirs(png_image_path_root, exist_ok=True)
os.makedirs(meta_df_root, exist_ok=True)

class ME:
    def __init__(self, file_path, ImageID, PatientID, mpMRI_type):
        self.file_path = file_path
        self.ImageID = ImageID
        self.PatientID = PatientID
        self.mpMRI_type = mpMRI_type

        
def dicom2image(ele):
    dcm_file = pydicom.read_file(ele.file_path)
    
    PatientID = dcm_file.PatientID
    StudyInstanceUID = dcm_file.StudyInstanceUID
    SeriesInstanceUID = dcm_file.SeriesInstanceUID
    SeriesDescription = dcm_file.SeriesDescription  # This is the mpMRI scan type.

    assert PatientID == ele.PatientID, "DCM Image patientid and file path patientid do not match!"
    assert SeriesDescription == ele.mpMRI_type, "SeriesDescription and mpMRI scan type do not match!"

    data = apply_voi_lut(dcm_file.pixel_array, dcm_file)

    if dcm_file.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data

    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)

    image_path = os.path.join(png_image_path_root, f"{PatientID}_{SeriesDescription}_{ele.ImageID}.png")
    cv2.imwrite(image_path, data)
    
    return [ele.file_path, image_path, PatientID, SeriesDescription, ele.ImageID, StudyInstanceUID, SeriesInstanceUID]

# cnt is for counting and for fast submission.
c = Counter()
images_meta = []
for root, dirs, files in os.walk(os.path.join(comp_data_root, f"{mode}/")):
    if len(files) != 0 and (".dcm" in files[0] or ".dicom" in files[0]):
        split = root.split("/")
        patientid = split[-2]
        mpMRI_type = split[-1]

        c[patientid] += 1
        for file in files:
            full_path = os.path.join(root, file)
            ImageID = file.split(".")[0]  # Get the image file name.
            
            dcm_file = pydicom.read_file(full_path)
            PatientID = dcm_file.PatientID
            SeriesDescription = dcm_file.SeriesDescription  # This is the mpMRI scan type.
            
            images_meta.append(ME(full_path, ImageID, PatientID, SeriesDescription))
        
        # For fast submissions.
        if fast_sub and len(dict(c).keys()) == fast_cnt:
            break
        
p = Pool(16)
results = p.map(func=dicom2image, iterable=images_meta)
meta_df = pd.DataFrame(
        data=np.array(results), 
        columns=["dicom_filepath", "png_filepath", "PatientID", "SeriesDescription", "ImageID", "StudyInstanceUID", "SeriesInstanceUID"])

# This part is for when the PatientIDs are turned into ints (for some weird reason).
patientids = [x.split("/")[-3] for x in meta_df.dicom_filepath.values]
meta_df.PatientID = patientids

meta_df.to_csv(os.path.join(meta_df_root, f"{meta_df_name}.csv"), index=False)

# Building the Dataset

In [7]:
test_meta = pd.read_csv("test_meta.csv")
test_meta.head()

Unnamed: 0,dicom_filepath,png_filepath,PatientID,SeriesDescription,ImageID,StudyInstanceUID,SeriesInstanceUID
0,../input/rsna-miccai-brain-tumor-radiogenomic-...,./images/00114_T2w_Image-4.png,114,T2w,Image-4,1.2.826.0.1.3680043.8.498.38439529458846212961...,1.2.826.0.1.3680043.8.498.91885894975366405300...
1,../input/rsna-miccai-brain-tumor-radiogenomic-...,./images/00114_T2w_Image-2.png,114,T2w,Image-2,1.2.826.0.1.3680043.8.498.38439529458846212961...,1.2.826.0.1.3680043.8.498.91885894975366405300...
2,../input/rsna-miccai-brain-tumor-radiogenomic-...,./images/00114_T2w_Image-3.png,114,T2w,Image-3,1.2.826.0.1.3680043.8.498.38439529458846212961...,1.2.826.0.1.3680043.8.498.91885894975366405300...
3,../input/rsna-miccai-brain-tumor-radiogenomic-...,./images/00114_T2w_Image-5.png,114,T2w,Image-5,1.2.826.0.1.3680043.8.498.38439529458846212961...,1.2.826.0.1.3680043.8.498.91885894975366405300...
4,../input/rsna-miccai-brain-tumor-radiogenomic-...,./images/00114_T2w_Image-19.png,114,T2w,Image-19,1.2.826.0.1.3680043.8.498.38439529458846212961...,1.2.826.0.1.3680043.8.498.91885894975366405300...


In [8]:
class RsnaMiccaiDataset(Dataset):
    def __init__(self, df, images_dir, image_size, mode, classes, by_patient=False):
        super(RsnaMiccaiDataset, self).__init__()
        self.df = df.reset_index(drop=True)
        self.images_dir = images_dir
        self.image_size = image_size
        assert mode in ['train', 'valid', 'test']
        self.mode = mode
        self.classes = classes

        self.patient_ids = self.df.PatientID.sort_values().reset_index(drop=True).unique()
        self.by_patient = by_patient

        if self.mode == 'train':
            self.df = self.df.sample(frac=1).reset_index(drop=True)

            self.transform = A.Compose([
                # A.RandomResizedCrop(height=self.image_size, width=self.image_size, scale=(0.25, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=1, p=1.0),
                # A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=30, interpolation=1, border_mode=0, value=0, p=0.25),
                # A.HorizontalFlip(p=0.5),
                # A.VerticalFlip(p=0.5),
                # A.OneOf([
                #     A.MotionBlur(p=.2),
                #     A.MedianBlur(blur_limit=3, p=0.1),
                #     A.Blur(blur_limit=3, p=0.1),
                # ], p=0.25),
                # A.OneOf([
                #     A.CLAHE(clip_limit=2),
                #     A.IAASharpen(),
                #     A.IAAEmboss(),
                #     A.RandomBrightnessContrast(),            
                # ], p=0.25),
                # A.Cutout(num_holes=8, max_h_size=32, max_w_size=32, fill_value=0, p=0.25),

                A.Resize(self.image_size, self.image_size),
                A.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
                ToTensorV2(),
            ])
        else:
            self.transform = A.Compose([
                A.Resize(self.image_size, self.image_size),
                A.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
                ToTensorV2(),
            ])

    def __len__(self):
        if self.by_patient: return len(self.patient_ids)
        return len(self.df)

    def __getitem__(self, index):
        if self.mode == "test":
            patientid = self.patient_ids[index]
            df_sliced_by_patientid = self.df[self.df.PatientID == patientid].reset_index(drop=True)
            images = []
            for idx, row in df_sliced_by_patientid.iterrows():
                if idx % 5 == 0:
                    image = cv2.imread(row.png_filepath, cv2.IMREAD_GRAYSCALE)
                    image = np.stack([image, image, image],axis=-1)
                    image = self.transform(image=image)['image']
                    images.append(image)
            image = torch.mean(torch.stack(images), dim=0)
            return patientid, image
        else:
            if self.by_patient:
                patientid = self.patient_ids[index]
                df_sliced_by_patientid = self.df[self.df.PatientID == patientid].reset_index(drop=True)
                images = []
                for idx, row in df_sliced_by_patientid.iterrows():
                    if idx % 5 == 0:
                        image = cv2.imread(row.png_filepath, cv2.IMREAD_GRAYSCALE)
                        image = np.stack([image, image, image],axis=-1)
                        image = self.transform(image=image)['image']
                        images.append(image)
                image = torch.mean(torch.stack(images), dim=0)
                assert df_sliced_by_patientid.MGMT_value.nunique() == 1, f"The mpMRI scan conclusions for patient {patientid} disagree with each other!"
                label = torch.FloatTensor(df_sliced_by_patientid.loc[0, self.classes])
                return image, label


            img_path = '{}/{}'.format(self.images_dir, self.df.loc[index, 'png_filepath'])
            image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            image = np.stack([image, image, image], axis=-1)
            image = self.transform(image=image)['image']
            label = torch.FloatTensor(self.df.loc[index, self.classes])
            return image, label

# Hyperparameters

In [9]:
class Config:
    model_name = "tf_efficientnet_b0"
    batch_size = 32
    image_size = 512
    num_workers = 0
    epochs = 30
    init_lr = 0.001
    fold_type = "fold_gkf_patientid"

    # Final params.
    folds = 5
    classes = ["MGMT_value"]
    n_classes = len(classes)
    project_name = "RSNA-MICCAI_baseline"
    ckpt_dir = "."
    seed_everything()

# Building the Model

In [10]:
cfg = Config()

model = timm.create_model(cfg.model_name, pretrained=False)
model = nn.Sequential(*list(model.children()),
                        nn.Linear(1000, cfg.n_classes))
model.load_state_dict(torch.load(r"../input/baseline-models/fold0_epoch3.pt"))

<All keys matched successfully>

# Prediction

In [11]:
infer_ds = RsnaMiccaiDataset(
    test_meta, ".", 
    image_size=cfg.image_size, 
    mode="test",
    classes=cfg.classes, 
    by_patient=True
)

infer_loader = DataLoader(
    infer_ds, 
    batch_size=cfg.batch_size,
    sampler=RandomSampler(infer_ds), 
    num_workers=cfg.num_workers,
    drop_last=False
)

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.eval().to(device)
predictions = []
patientids = []
for patient_ids, images in tqdm(infer_loader):
    images = images.to(device).float()

    with torch.cuda.amp.autocast(), torch.no_grad():
        outputs = model(images)
        predictions.append(outputs.data.cpu().numpy())
        patientids.append(patient_ids.data.cpu().numpy())

  0%|          | 0/1 [00:00<?, ?it/s]

In [13]:
ids = [str(x).zfill(5) for x in np.concatenate(patientids)]
preds = np.concatenate(predictions).flatten()
submission = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": preds})
submission.to_csv("submission.csv", index=False)

In [14]:
submission

Unnamed: 0,BraTS21ID,MGMT_value
0,114,0.339111
1,13,0.277832
2,821,0.249146


In [15]:
!rm test_meta.csv
!rm -rf images/