# Setup

In [2]:
from google.colab import files
files.upload()  # Upload your kaggle.json here.

from google.colab import drive
drive.mount('/content/gdrive')

!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

# !mkdir kaggle/
# %cd kaggle/
# !mkdir input/
# !mkdir output/
# %cd input/

!mkdir rsna-miccai-png
%cd rsna-miccai-png/
!kaggle datasets download -d jonathanbesomi/rsna-miccai-png
!unzip -q rsna-miccai-png.zip
!rm rsna-miccai-png.zip
%cd ../

# %cd output/

Saving kaggle.json to kaggle.json
Mounted at /content/gdrive
/content/rsna-miccai-png
Downloading rsna-miccai-png.zip to /content/rsna-miccai-png
100% 5.08G/5.08G [00:51<00:00, 72.8MB/s]
100% 5.08G/5.08G [00:51<00:00, 105MB/s] 
/content


In [3]:
!pip install albumentations --upgrade -q
!pip install pydicom -q
!pip install timm -q
!pip install torch --upgrade -q
!pip install keras_applications -q
!pip install efficientnet-3D -q
!pip install git+https://github.com/shijianjian/EfficientNet-PyTorch-3D -q
!pip install classification-models-3D -q
!pip install volumentations-3D -q

[K     |████████████████████████████████| 98 kB 3.2 MB/s 
[K     |████████████████████████████████| 37.1 MB 88 kB/s 
[K     |████████████████████████████████| 2.0 MB 5.0 MB/s 
[K     |████████████████████████████████| 282 kB 5.2 MB/s 
[K     |████████████████████████████████| 376 kB 5.1 MB/s 
[K     |████████████████████████████████| 50 kB 2.8 MB/s 
[?25h  Building wheel for efficientnet-pytorch-3d (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 45 kB 2.6 MB/s 
[?25h

In [4]:
# General imports.
import gc
import re
import sys
import PIL
import yaml
import math
import random
import pydicom
import platform
import shutil, os

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import albumentations as A
import sklearn
import cv2
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
import tensorflow as tf

# Specific imports. 
from math import ceil
from glob import glob
from PIL import Image
from IPython.display import Image, clear_output
from pydicom.pixel_data_handlers.util import apply_voi_lut

from tqdm.notebook import tqdm
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GroupKFold
from tensorflow import keras
from efficientnet_3D import keras as efn  # TF.keras.
from efficientnet_pytorch_3d import EfficientNet3D  # Pytorch.
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torchsummary import summary
from torchvision import transforms
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD

import warnings
warnings.simplefilter('ignore')

In [5]:
!pip install wandb -qqq
import wandb
# from wandb.keras import WandbCallback
wandb.login()

[K     |████████████████████████████████| 1.7 MB 5.0 MB/s 
[K     |████████████████████████████████| 97 kB 6.5 MB/s 
[K     |████████████████████████████████| 170 kB 44.6 MB/s 
[K     |████████████████████████████████| 133 kB 47.2 MB/s 
[K     |████████████████████████████████| 63 kB 1.6 MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# Utility Functions

In [6]:
def seed_everything(seed=123):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)

# Building the Dataset

In [7]:
train_kfold = pd.read_csv(r"/content/gdrive/MyDrive/Kaggle Competitions/RSNA-brain-tumor/train_kfold.csv")

In [8]:
# Getting rid of unneeded cases.
train_kfold = train_kfold[train_kfold.PatientID != 109]
train_kfold = train_kfold[train_kfold.PatientID != 123]
train_kfold = train_kfold[train_kfold.PatientID != 709].reset_index(drop=True)

In [9]:
train_kfold.head()

Unnamed: 0,dicom_filepath,png_filepath,PatientID,SeriesDescription,ImageID,StudyInstanceUID,SeriesInstanceUID,MGMT_value,fold_gkf_patientid,fold_skf_patientid,fold_skf_seriesdescription,fold_skf_MGMT_value,fold_gkf_png_filepath
0,../input/rsna-miccai-brain-tumor-radiogenomic-...,./rsna-miccai-png/train/00688/T2w/Image-273.png,688,T2w,Image-273,1.2.826.0.1.3680043.8.498.11655577270489271674...,1.2.826.0.1.3680043.8.498.75434499750167731864...,0,0.0,0.0,0.0,0.0,1.0
1,../input/rsna-miccai-brain-tumor-radiogenomic-...,./rsna-miccai-png/train/00688/T2w/Image-245.png,688,T2w,Image-245,1.2.826.0.1.3680043.8.498.11655577270489271674...,1.2.826.0.1.3680043.8.498.75434499750167731864...,0,0.0,0.0,0.0,0.0,3.0
2,../input/rsna-miccai-brain-tumor-radiogenomic-...,./rsna-miccai-png/train/00688/T2w/Image-130.png,688,T2w,Image-130,1.2.826.0.1.3680043.8.498.11655577270489271674...,1.2.826.0.1.3680043.8.498.75434499750167731864...,0,0.0,0.0,0.0,0.0,1.0
3,../input/rsna-miccai-brain-tumor-radiogenomic-...,./rsna-miccai-png/train/00688/T2w/Image-98.png,688,T2w,Image-98,1.2.826.0.1.3680043.8.498.11655577270489271674...,1.2.826.0.1.3680043.8.498.75434499750167731864...,0,0.0,0.0,0.0,0.0,4.0
4,../input/rsna-miccai-brain-tumor-radiogenomic-...,./rsna-miccai-png/train/00688/T2w/Image-247.png,688,T2w,Image-247,1.2.826.0.1.3680043.8.498.11655577270489271674...,1.2.826.0.1.3680043.8.498.75434499750167731864...,0,0.0,0.0,0.0,0.0,4.0


In [10]:
class RsnaMiccaiDataset(Dataset):
    def __init__(self, df, images_dir, image_size, mode, classes, by_patient=False):
        super(RsnaMiccaiDataset, self).__init__()
        self.df = df.reset_index(drop=True)
        self.images_dir = images_dir
        self.image_size = image_size
        assert mode in ['train', 'valid']
        self.mode = mode
        self.classes = classes

        self.patient_ids = self.df.PatientID.sort_values().reset_index(drop=True).unique()
        self.by_patient = by_patient

        if self.mode == 'train':
            self.df = self.df.sample(frac=1).reset_index(drop=True)

            self.transform = A.Compose([
                # A.RandomResizedCrop(height=self.image_size, width=self.image_size, scale=(0.25, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=1, p=1.0),
                # A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=30, interpolation=1, border_mode=0, value=0, p=0.25),
                # A.HorizontalFlip(p=0.5),
                # A.VerticalFlip(p=0.5),
                # A.OneOf([
                #     A.MotionBlur(p=.2),
                #     A.MedianBlur(blur_limit=3, p=0.1),
                #     A.Blur(blur_limit=3, p=0.1),
                # ], p=0.25),
                # A.OneOf([
                #     A.CLAHE(clip_limit=2),
                #     A.IAASharpen(),
                #     A.IAAEmboss(),
                #     A.RandomBrightnessContrast(),            
                # ], p=0.25),
                # A.Cutout(num_holes=8, max_h_size=32, max_w_size=32, fill_value=0, p=0.25),

                A.Resize(self.image_size, self.image_size),
                A.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
                ToTensorV2(),
            ])
        else:
            self.transform = A.Compose([
                A.Resize(self.image_size, self.image_size),
                A.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
                ToTensorV2(),
            ])

    def __len__(self):
        if self.by_patient: return len(self.patient_ids)
        return len(self.df)

    def __getitem__(self, index):
        if self.by_patient:
            patientid = self.patient_ids[index]
            df_sliced_by_patientid = self.df[self.df.PatientID == patientid].reset_index(drop=True)
            images = []
            for idx, row in df_sliced_by_patientid.iterrows():
                if idx % 5 == 0:
                    image = cv2.imread(row.png_filepath, cv2.IMREAD_GRAYSCALE)
                    image = np.stack([image, image, image],axis=-1)
                    image = self.transform(image=image)['image']
                    images.append(image)
            image = torch.mean(torch.stack(images), dim=0)
            assert df_sliced_by_patientid.MGMT_value.nunique() == 1, f"The mpMRI scan conclusions for patient {patientid} disagree with each other!"
            label = torch.FloatTensor(df_sliced_by_patientid.loc[0, self.classes])
            return image, label


        img_path = '{}/{}'.format(self.images_dir, self.df.loc[index, 'png_filepath'])
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = np.stack([image, image, image], axis=-1)
        image = self.transform(image=image)['image']
        label = torch.FloatTensor(self.df.loc[index, self.classes])
        return image, label

In [11]:
l = []
for x in train_kfold.PatientID.unique():
  l.append(len(train_kfold[train_kfold.PatientID == x]))
pd.Series(l).describe()  # Descriptive statistics about how many images per patient for all 582 patients.

count     582.000000
mean      434.929553
std       218.453050
min        60.000000
25%       298.250000
50%       392.500000
75%       651.750000
max      1013.000000
dtype: float64

# Hyperparameters

In [12]:
class Config:
  model_name = "tf_efficientnet_b0"
  batch_size = 32
  image_size = 512
  num_workers = 0
  epochs = 30
  init_lr = 0.001
  fold_type = "fold_gkf_patientid"

  # Final params.
  folds = 5
  classes = ["MGMT_value"]
  n_classes = len(classes)
  project_name = "RSNA-MICCAI_baseline"
  ckpt_dir = "."
  seed_everything()

# Training

In [1]:
import torch
if torch.cuda.is_available():
  print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
else:
  print("\n[INFO] GPU not found. Using CPU: {}\n".format(platform.processor()))

[INFO] Using GPU: Tesla P100-PCIE-16GB



In [14]:
cfg = Config()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for fold in range(1, cfg.folds):
  CHECKPOINT = '{}/{}_{}_fold{}.pth'.format(cfg.ckpt_dir, cfg.model_name, cfg.image_size, fold)

  run = wandb.init(project=cfg.project_name, name=f"fold{fold}")

  model = timm.create_model(cfg.model_name, pretrained=True)
  model = nn.Sequential(*list(model.children()),
                        nn.Linear(1000, cfg.n_classes))

  criterion = nn.BCEWithLogitsLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=cfg.init_lr)
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, cfg.epochs-1)
  scaler = torch.cuda.amp.GradScaler()

  train_df = train_kfold[getattr(train_kfold, cfg.fold_type) != fold]
  val_df = train_kfold[getattr(train_kfold, cfg.fold_type) == fold]

  train_ds = RsnaMiccaiDataset(df=train_df,
                               images_dir=".",
                               image_size=cfg.image_size,
                               mode="train",
                               classes=cfg.classes,
                               by_patient=True)
  val_ds = RsnaMiccaiDataset(df=val_df,
                             images_dir=".",
                             image_size=cfg.image_size,
                             mode="valid",
                             classes=cfg.classes,
                             by_patient=True)
  
  train_loader = DataLoader(train_ds, 
                            batch_size=cfg.batch_size,
                            sampler=RandomSampler(train_ds), 
                            num_workers=cfg.num_workers, 
                            drop_last=True)
  
  val_loader = DataLoader(val_ds, 
                          batch_size=cfg.batch_size,
                          sampler=RandomSampler(val_ds), 
                          num_workers=cfg.num_workers, 
                          drop_last=False)

  val_loss_min = np.Inf
  early_stopping_patience = 5
  cnt = 0

  for epoch in range(1, cfg.epochs + 1):
    scheduler.step()
    model.train().to(device)
    train_loss = []

    print('Epoch: {:02d}/{:02d}'.format(epoch, cfg.epochs))
    print("TRAIN")
    loop = tqdm(train_loader)
    for images, labels in loop:
      images = images.to(device).float()
      labels = labels.to(device).float()
                
      optimizer.zero_grad()

      with torch.cuda.amp.autocast(): 
        outputs = model(images)
        loss = criterion(outputs, labels)

      scaler.scale(loss).backward()
      scaler.step(optimizer)
      scaler.update()

      train_loss.append(loss.item())
      loop.set_description('current_loss: {:.5f} | LR: {:.5f}'.format(loss.item(), optimizer.param_groups[0]['lr']))
      loop.set_postfix(loss=np.mean(train_loss))
    train_loss = np.mean(train_loss)

    model.eval()

    auc_scores = []
    val_loss = []

    print("VAL")
    loop = tqdm(val_loader)
    for images, labels in loop:
      images = images.to(device).float()
      labels = labels.to(device).float()

      with torch.cuda.amp.autocast(), torch.no_grad():
        outputs = model(images)
        loss = criterion(outputs.float(), labels)
        
      auc_score = roc_auc_score(labels.data.cpu().numpy(), 
                                F.sigmoid(outputs).data.cpu().numpy())
      auc_scores.append(auc_score)

      val_loss.append(loss.item())
      loop.set_description('current_loss: {:.5f} | current_auc: {:.5f}'.format(loss.item(), auc_score))
      loop.set_postfix(loss=np.mean(val_loss), auc=np.mean(auc_scores))
    val_loss = np.mean(val_loss)
    final_auc_score = np.mean(auc_scores)

    wandb.log({"epoch": epoch, 
               "loss": train_loss, 
               "val_loss": val_loss,
               "val_auc": final_auc_score,
              })

    if val_loss < val_loss_min:
      cnt = 0
      print('Valid loss improved from {:.5f} to {:.5f} saving model to {}'.format(val_loss_min, val_loss, CHECKPOINT))
      val_loss_min = val_loss
      torch.save(model.state_dict(), CHECKPOINT)
      artifact = wandb.Artifact(cfg.model_name, type='model')
      artifact.add_file(CHECKPOINT, name=f"fold{fold}_epoch{epoch}.pt")
      run.log_artifact(artifact)
    else:
      cnt += 1
    print("")

    if cnt >= early_stopping_patience:
      break 

  del model
  del optimizer
  torch.cuda.empty_cache()

  run.finish()

[34m[1mwandb[0m: Currently logged in as: [33mvincenttu[0m (use `wandb login --relogin` to force relogin)


Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_aa-827b6e33.pth" to /root/.cache/torch/hub/checkpoints/tf_efficientnet_b0_aa-827b6e33.pth


Epoch: 01/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Valid loss improved from inf to 0.69129 saving model to ./tf_efficientnet_b0_512_fold1.pth
Epoch: 02/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 03/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 04/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 05/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 06/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 07/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]




VBox(children=(Label(value=' 20.45MB of 20.45MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
epoch,7.0
loss,0.16561
val_loss,1.63395
val_auc,0.4578
_runtime,2772.0
_timestamp,1631311513.0
_step,6.0


0,1
epoch,▁▂▃▅▆▇█
loss,█▆▅▄▃▁▁
val_loss,▁▄▁▁▂█▃
val_auc,▅▁▇▇▂█▂
_runtime,▁▂▃▄▆▇█
_timestamp,▁▂▃▄▆▇█
_step,▁▂▃▅▆▇█


Epoch: 01/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Valid loss improved from inf to 0.69290 saving model to ./tf_efficientnet_b0_512_fold2.pth
Epoch: 02/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 03/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 04/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 05/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 06/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 07/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]




VBox(children=(Label(value=' 20.45MB of 20.45MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
epoch,7.0
loss,0.16914
val_loss,1.74831
val_auc,0.61186
_runtime,2728.0
_timestamp,1631314245.0
_step,6.0


0,1
epoch,▁▂▃▅▆▇█
loss,█▆▅▄▁▁▁
val_loss,▁▁▁▂█▃▄
val_auc,▄▁▂▃▅▇█
_runtime,▁▂▃▅▆▇█
_timestamp,▁▂▃▅▆▇█
_step,▁▂▃▅▆▇█


Epoch: 01/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Valid loss improved from inf to 0.69805 saving model to ./tf_efficientnet_b0_512_fold3.pth
Epoch: 02/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 03/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 04/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 05/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 06/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 07/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]




VBox(children=(Label(value=' 20.45MB of 20.45MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
epoch,7.0
loss,0.06821
val_loss,1.7043
val_auc,0.54823
_runtime,2729.0
_timestamp,1631316977.0
_step,6.0


0,1
epoch,▁▂▃▅▆▇█
loss,█▇▆▃▃▂▁
val_loss,▁▂▂▆▆██
val_auc,▄▁▂▇▆██
_runtime,▁▂▃▅▆▇█
_timestamp,▁▂▃▅▆▇█
_step,▁▂▃▅▆▇█


Epoch: 01/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Valid loss improved from inf to 0.68633 saving model to ./tf_efficientnet_b0_512_fold4.pth
Epoch: 02/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 03/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 04/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 05/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 06/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]


Epoch: 07/30
TRAIN


  0%|          | 0/14 [00:00<?, ?it/s]

VAL


  0%|          | 0/4 [00:00<?, ?it/s]




VBox(children=(Label(value=' 20.45MB of 20.45MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
epoch,7.0
loss,0.18609
val_loss,1.40485
val_auc,0.57793
_runtime,2731.0
_timestamp,1631319713.0
_step,6.0


0,1
epoch,▁▂▃▅▆▇█
loss,█▅▄▃▂▂▁
val_loss,▁▁▂▂▄█▆
val_auc,█▃▁▃▃▁▆
_runtime,▁▂▃▅▆▇█
_timestamp,▁▂▃▅▆▇█
_step,▁▂▃▅▆▇█


In [None]:
run.finish()