# If you copy - Upvote! Version where score is 0.715 won't predict private lb. It will get the error. Version where score is 0.706 is ready to predict.

### Efficientnet3D with one MRI type [Inference]

In [None]:
import os
import sys 
import json
import glob
import random
import collections
import time

import numpy as np
import pandas as pd
import pydicom
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional
import torch.nn.functional as F

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

In [None]:
if os.path.exists("../input/rsna-miccai-brain-tumor-radiogenomic-classification"):
    data_directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
    pytorch3dpath = "../input/efficientnetpyttorch3d/EfficientNet-PyTorch-3D"
else:
    data_directory = '/media/roland/data/kaggle/rsna-miccai-brain-tumor-radiogenomic-classification'
    pytorch3dpath = "EfficientNet-PyTorch-3D"
    
mri_types = ['FLAIR','T1w','T1wCE','T2w']
SIZE = 256
NUM_IMAGES = 64

sys.path.append(pytorch3dpath)
from efficientnet_pytorch_3d import EfficientNet3D

In [None]:
def load_dicom_image(path, img_size=SIZE):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if np.min(data)==np.max(data):
        data = np.zeros((img_size,img_size))
        return data
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    
    #data = (data * 255).astype(np.uint8)
    data = cv2.resize(data, (img_size, img_size))
    return data

def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=SIZE, mri_type="FLAIR", split="train"):

    files = sorted(glob.glob(f"{data_directory}/{split}/{scan_id}/{mri_type}/*.dcm"))
    
    middle = len(files)//2
    num_imgs2 = num_imgs//2
    p1 = max(0, middle - num_imgs2)
    p2 = min(len(files), middle + num_imgs2)
    img3d = np.stack([load_dicom_image(f) for f in files[p1:p2]]).T 
    if img3d.shape[-1] < num_imgs:
        n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
        img3d = np.concatenate((img3d,  n_zero), axis = -1)
            
    return np.expand_dims(img3d,0)

load_dicom_images_3d("00000").shape

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(42)

In [None]:
train_df = pd.read_csv(f"{data_directory}/train_labels.csv")
display(train_df)

df_train, df_valid = sk_model_selection.train_test_split(
    train_df, 
    test_size=0.2, 
    random_state=42, 
    stratify=train_df["MGMT_value"],
)

In [None]:
class Dataset(torch_data.Dataset):
    def __init__(self, paths, targets=None, mri_type=None, label_smoothing=0.01, split="train"):
        self.paths = paths
        self.targets = targets
        self.mri_type = mri_type
        self.label_smoothing = label_smoothing
        self.split = split
          
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        scan_id = self.paths[index]
        if self.targets is None:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split=self.split)
        else:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split="train")

        if self.targets is None:
            return {"X": torch.tensor(data).float(), "id": scan_id}
        else:
            y = torch.tensor(abs(self.targets[index]-self.label_smoothing), dtype=torch.float)
            return {"X": torch.tensor(data).float(), "y": y}

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = EfficientNet3D.from_name("efficientnet-b0", override_params={'num_classes': 2}, in_channels=1)
        n_features = self.net._fc.in_features
        self.net._fc = nn.Linear(in_features=n_features, out_features=1, bias=True)
    
    def forward(self, x):
        out = self.net(x)
        return out

In [None]:
modelfiles = None

if not modelfiles:
    modelfiles = ['FLAIR-e3-loss0.694-auc0.351.pth', 'T1w-e7-loss0.685-auc0.555.pth', 'T1wCE-e6-loss0.683-auc0.633.pth', 'T2w-e8-loss0.658-auc0.677.pth']
    print(modelfiles)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def predict(modelfile, df, mri_type, split):
    print("Predict:", modelfile, mri_type, df.shape)
    df.loc[:,"MRI_Type"] = mri_type
    data_retriever = Dataset(
        df.index.values, 
        mri_type=df["MRI_Type"].values,
        split=split
    )

    data_loader = torch_data.DataLoader(
        data_retriever,
        batch_size=4,
        shuffle=False,
        num_workers=8,
    )
   
    model = Model()
    model.to(device)
    
    checkpoint = torch.load(f'../input/efficientnet3d-with-one-mri-type-model-weights/{modelfile}')
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    y_pred = []
    ids = []

    for e, batch in enumerate(data_loader,1):
        print(f"{e}/{len(data_loader)}", end="\r")
        with torch.no_grad():
            tmp_pred = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            if tmp_pred.size == 1:
                y_pred.append(tmp_pred)
            else:
                y_pred.extend(tmp_pred.tolist())
            ids.extend(batch["id"].numpy().tolist())
            
    preddf = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred}) 
    preddf = preddf.set_index("BraTS21ID")
    return preddf

In [None]:
submission = pd.read_csv(f"{data_directory}/sample_submission.csv", index_col="BraTS21ID")

submission["MGMT_value"] = 0
for m, mtype in zip(modelfiles, mri_types):
    pred = predict(m, submission, mtype, split="test")
    submission["MGMT_value"] += pred["MGMT_value"]

submission["MGMT_value"] /= len(modelfiles)
submission["MGMT_value"].to_csv("submission_effnet3d_score_0684.csv")

In [None]:
submission_effnet3d_score_0684 = submission.copy()
submission_effnet3d_score_0684.head()

### 🧠Brain Tumor 3D [Inference]

In [None]:
import os
import sys 
import json
import glob
import random
import collections
import time
import re
import math
import numpy as np
import pandas as pd
import cv2

import matplotlib.pyplot as plt
import seaborn as sns
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

from random import shuffle
from sklearn import model_selection as sk_model_selection

from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf

In [None]:
data_directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
pytorch3dpath = "../input/efficientnetpyttorch3d/EfficientNet-PyTorch-3D"
 
mri_types = ['FLAIR','T1w','T1wCE','T2w']
IMAGE_SIZE = 256
NUM_IMAGES = 64

In [None]:
sample_submission = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
test=sample_submission
test['BraTS21ID5'] = [format(x, '05d') for x in test.BraTS21ID]
test.head(3)

In [None]:
def load_dicom_image(path, img_size=IMAGE_SIZE, voi_lut=True, rotate=0):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
        
    if rotate > 0:
        rot_choices = [0, cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE, cv2.ROTATE_180]
        data = cv2.rotate(data, rot_choices[rotate])
        
    data = cv2.resize(data, (img_size, img_size))
    return data


def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=IMAGE_SIZE, mri_type="FLAIR", split="test", rotate=0):

    files = sorted(glob.glob(f"{data_directory}/{split}/{scan_id}/{mri_type}/*.dcm"), 
               key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])

    middle = len(files)//2
    num_imgs2 = num_imgs//2
    p1 = max(0, middle - num_imgs2)
    p2 = min(len(files), middle + num_imgs2)
    img3d = np.stack([load_dicom_image(f, rotate=rotate) for f in files[p1:p2]]).T 
    if img3d.shape[-1] < num_imgs:
        n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
        img3d = np.concatenate((img3d,  n_zero), axis = -1)
        
    if np.min(img3d) < np.max(img3d):
        img3d = img3d - np.min(img3d)
        img3d = img3d / np.max(img3d)
            
    return np.expand_dims(img3d,0)

a = load_dicom_images_3d("00001")
print(a.shape)
print(np.min(a), np.max(a), np.mean(a), np.median(a))
image = a[0]
print("Dimension of the CT scan is:", image.shape)
plt.imshow(np.squeeze(image[:, :, 30]), cmap="gray")

In [None]:
from tensorflow.keras.utils import Sequence
class Dataset(Sequence):
    def __init__(self,df,is_train=True,batch_size=1,shuffle=True):
        self.idx = df["BraTS21ID"].values
        self.paths = df["BraTS21ID5"].values
        self.y =  df["MGMT_value"].values
        self.is_train = is_train
        self.batch_size = batch_size
        self.shuffle = shuffle
    def __len__(self):
        return math.ceil(len(self.idx)/self.batch_size)
   
    def __getitem__(self,ids):
        id_path= self.paths[ids]
        batch_paths = self.paths[ids * self.batch_size:(ids + 1) * self.batch_size]
        
        if self.y is not None:
            batch_y = self.y[ids * self.batch_size: (ids + 1) * self.batch_size]
            
        list_x =  load_dicom_images_3d(id_path)#str(scan_id).zfill(5)
        #list_x =  [load_dicom_images_3d(x) for x in batch_paths]
        batch_X = np.stack(list_x)
        if self.is_train:
            return batch_X,batch_y
        else:
            return batch_X
    
    def on_epoch_end(self):
        if self.shuffle and self.is_train:
            ids_y = list(zip(self.idx, self.y))
            shuffle(ids_y)
            self.idx, self.y = list(zip(*ids_y))

In [None]:
test_dataset = Dataset(test,is_train=False)

In [None]:
for i in range(1):
    image = test_dataset[i]
    print("Dimension of the CT scan is:", image.shape)
    plt.imshow(image[0,:,:, 32], cmap="gray")
    plt.show()

In [None]:
def get_model(width=IMAGE_SIZE, height=IMAGE_SIZE, depth=64):
    """Build a 3D convolutional neural network model."""

    inputs = keras.Input((width, height, depth, 1))
     
    x = layers.Conv3D(filters=32, kernel_size=3, activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Conv3D(filters=32, kernel_size=3, activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.01)(x)
    
    x = layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.02)(x)

    x = layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.03)(x)

    x = layers.Conv3D(filters=512, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.04)(x)

    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(units=1024, activation="relu")(x)
    x = layers.Dropout(0.08)(x)

    outputs = layers.Dense(units=1, activation="sigmoid")(x)

    # Define the model.
    model = keras.Model(inputs, outputs, name="3dcnn")

    return model

# Build model.
model = get_model(width=IMAGE_SIZE, height=IMAGE_SIZE, depth=64)
model.summary()

In [None]:
model.load_weights('../input/brainclassification3d/Brain_3d_cls_FLAIR.h5')

In [None]:
preds = model.predict(test_dataset)
preds = preds.reshape(-1)

In [None]:
submission_bt3d = pd.DataFrame({'BraTS21ID':sample_submission['BraTS21ID'],'MGMT_value':preds})

In [None]:
submission_bt3d

In [None]:
submission_bt3d.to_csv('submission_bt3d.csv',index=False)

### miccai_fakeSubmission

In [None]:
submissionDF01 = pd.read_csv('../input/testsubmissions/submission (36).csv', dtype=str)
submissionDF01 = submissionDF01.set_index('BraTS21ID')
scoreDict01 = submissionDF01['MGMT_value'].to_dict()
print(scoreDict01)

In [None]:
listOfStudyPaths = glob.glob('../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/*')
listOfStudies = [eachPath.split('/')[-1] for eachPath in listOfStudyPaths]

predList = []
for eachStudy in listOfStudies:
    if eachStudy not in scoreDict01:
        predList.append('0.500')
    else:
        score = float(scoreDict01[eachStudy])
        predList.append(score)
        
submission_miccai = pd.DataFrame({'BraTS21ID':listOfStudies,'MGMT_value':predList})
submission_miccai.to_csv('submission_miccai.csv', index=False)

In [None]:
submission_miccai.sort_values(by='BraTS21ID', inplace=True)
submission_miccai

### Efficientnet3D with one MRI type 0.674

In [None]:
import os
import sys 
import json
import glob
import random
import collections
import time

import numpy as np
import pandas as pd
import pydicom
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional
import torch.nn.functional as F

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

In [None]:
if os.path.exists("../input/rsna-miccai-brain-tumor-radiogenomic-classification"):
    data_directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
    pytorch3dpath = "../input/efficientnetpyttorch3d/EfficientNet-PyTorch-3D"
else:
    data_directory = '/media/roland/data/kaggle/rsna-miccai-brain-tumor-radiogenomic-classification'
    pytorch3dpath = "EfficientNet-PyTorch-3D"
    
mri_types = ['FLAIR','T1w','T1wCE','T2w']
SIZE = 256
NUM_IMAGES = 64

sys.path.append(pytorch3dpath)
from efficientnet_pytorch_3d import EfficientNet3D

In [None]:
def load_dicom_image(path, img_size=SIZE):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if np.min(data)==np.max(data):
        data = np.zeros((img_size,img_size))
        return data
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    
    #data = (data * 255).astype(np.uint8)
    data = cv2.resize(data, (img_size, img_size))
    return data

def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=SIZE, mri_type="FLAIR", split="train"):

    files = sorted(glob.glob(f"{data_directory}/{split}/{scan_id}/{mri_type}/*.dcm"))
    
    middle = len(files)//2
    num_imgs2 = num_imgs//2
    p1 = max(0, middle - num_imgs2)
    p2 = min(len(files), middle + num_imgs2)
    img3d = np.stack([load_dicom_image(f) for f in files[p1:p2]]).T 
    if img3d.shape[-1] < num_imgs:
        n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
        img3d = np.concatenate((img3d,  n_zero), axis = -1)
            
    return np.expand_dims(img3d,0)

load_dicom_images_3d("00000").shape

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(42)

In [None]:
train_df = pd.read_csv(f"{data_directory}/train_labels.csv")
display(train_df)

df_train, df_valid = sk_model_selection.train_test_split(
    train_df, 
    test_size=0.2, 
    random_state=42, 
    stratify=train_df["MGMT_value"],
)

In [None]:
class Dataset(torch_data.Dataset):
    def __init__(self, paths, targets=None, mri_type=None, label_smoothing=0.01, split="train"):
        self.paths = paths
        self.targets = targets
        self.mri_type = mri_type
        self.label_smoothing = label_smoothing
        self.split = split
          
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        scan_id = self.paths[index]
        if self.targets is None:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split=self.split)
        else:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split="train")

        if self.targets is None:
            return {"X": torch.tensor(data).float(), "id": scan_id}
        else:
            y = torch.tensor(abs(self.targets[index]-self.label_smoothing), dtype=torch.float)
            return {"X": torch.tensor(data).float(), "y": y}

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = EfficientNet3D.from_name("efficientnet-b0", override_params={'num_classes': 2}, in_channels=1)
        n_features = self.net._fc.in_features
        self.net._fc = nn.Linear(in_features=n_features, out_features=1, bias=True)
    
    def forward(self, x):
        out = self.net(x)
        return out

In [None]:
modelfiles = None

if not modelfiles:
    modelfiles = ['FLAIR-e2-loss0.693-auc0.567.pth', 'T1w-e8-loss0.682-auc0.551.pth', 'T1wCE-e3-loss0.693-auc0.617.pth', 'T2w-e8-loss0.672-auc0.593.pth']
    print(modelfiles)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def predict(modelfile, df, mri_type, split):
    print("Predict:", modelfile, mri_type, df.shape)
    df.loc[:,"MRI_Type"] = mri_type
    data_retriever = Dataset(
        df.index.values, 
        mri_type=df["MRI_Type"].values,
        split=split
    )

    data_loader = torch_data.DataLoader(
        data_retriever,
        batch_size=4,
        shuffle=False,
        num_workers=8,
    )
   
    model = Model()
    model.to(device)
    
    checkpoint = torch.load(f'../input/efficientnet3d-with-one-mri-type-0674/{modelfile}')
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    y_pred = []
    ids = []

    for e, batch in enumerate(data_loader,1):
        print(f"{e}/{len(data_loader)}", end="\r")
        with torch.no_grad():
            tmp_pred = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            if tmp_pred.size == 1:
                y_pred.append(tmp_pred)
            else:
                y_pred.extend(tmp_pred.tolist())
            ids.extend(batch["id"].numpy().tolist())
            
    preddf = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred}) 
    preddf = preddf.set_index("BraTS21ID")
    return preddf

In [None]:
submission = pd.read_csv(f"{data_directory}/sample_submission.csv", index_col="BraTS21ID")

submission["MGMT_value"] = 0
for m, mtype in zip(modelfiles, mri_types):
    pred = predict(m, submission, mtype, split="test")
    submission["MGMT_value"] += pred["MGMT_value"]

submission["MGMT_value"] /= len(modelfiles)
submission["MGMT_value"].to_csv("submission_effnet3d_score_0674.csv")

In [None]:
submission_effnet3d_score_0674 = submission.copy()
submission_effnet3d_score_0674.head()

### A-Net-With-Embeddings-for-Ordered-3D-MRI-Voxel

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import pydicom
import cv2 as cv

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset, Subset
# from torchvision import models
import torchvision
import kornia as K  # batch image augmentations with torch.Tensor
from kornia.augmentation import AugmentationSequential
from kornia.augmentation.base import AugmentationBase3D  # Subclassing this is too complicated.
from kornia.enhance import invert

from tqdm.notebook import tqdm

from pathlib import Path
from typing import Union, Tuple, List, Optional, Type, Dict, Iterable
import time

DEBUG = False
REPRODUCTIVE = True
INFERENCE_ONLY = True
USE_CROSS_VALIDATION = True

random_state = 42
model_name = "Net-3D"
data_dir = Path("../input/rsna-miccai-brain-tumor-radiogenomic-classification")
models_dir = Path("../input/model-weights-for-rsna-miccai-brain-tumor-dataset")
# models_dir = Path(".")  # If train model with local machine

device = "cuda" if torch.cuda.is_available() else "cpu"

time_begin = time.time()

if REPRODUCTIVE:
    np.random.seed(random_state)
    torch.random.manual_seed(random_state)
display(list(data_dir.iterdir()), torch.__version__, torchvision.__version__)

In [None]:
mri_series = {0: "FLAIR", 1: "T1w", 2: "T1wCE", 3: "T2w"}
mri_series_map = {v: k for k, v in mri_series.items()}
planes = {0: "Unknown", 1: "Coronal", 2: "Sagittal", 3: "Axial"}
planes_map = {v: k for k, v in planes.items()}

In [None]:
labels_train = pd.read_csv(data_dir / "train_labels.csv", dtype={"BraTS21ID": str})
labels_train

In [None]:
def look_one_dcm(instance_id: str, img_dir: Path, mri_series="FLAIR", verbose=False):
    dcm_paths = list(img_dir.glob("./{}/{}/*.dcm".format(instance_id.zfill(5), mri_series)))
    print("Containing {} dicom files(including blank).".format(len(dcm_paths)))
    if dcm_paths:
        dcm_mid = dcm_paths[(len(dcm_paths) - 1) // 2]
        dcm_ds = pydicom.read_file(str(dcm_mid))
        if verbose:
            print(dir(dcm_ds))
            print(dcm_ds)
            print(type(dcm_ds[("0010", "0010")].value))
            print(dcm_ds[("0020", "0032")].name, eval(str(dcm_ds[("0020", "0032")].value)))
            print(dir(dcm_ds[("0020", "0032")]))
            print(dcm_ds.pixel_array.dtype)
        plt.imshow(dcm_ds.pixel_array, cmap=plt.cm.gray)
        plt.show()


look_one_dcm("00000", data_dir / "train", verbose=True)

In [None]:
def get_image_plane(loc):
    row_x, row_y, row_z, col_x, col_y, col_z = [round(v) for v in loc]
    if (row_x, row_y, col_x, col_y) == (1, 0, 0, 0): return planes[1]
    if (row_x, row_y, col_x, col_y) == (0, 1, 0, 0): return planes[2]
    if (row_x, row_y, col_x, col_y) == (1, 0, 0, 1): return planes[3]
    return planes[0]


class DICOMMetaLoader(Dataset):
    
    def __init__(self, img_dir: Path, glob=None):
        super(DICOMMetaLoader, self).__init__()
        if glob is None:
            glob = "./*/*/*.dcm"
        self.dcm_paths = list(img_dir.glob(glob))
    
    def __len__(self): return len(self.dcm_paths)
    
    def __getitem__(self, idx) -> dict:
        dcm_path = str(self.dcm_paths[idx])
        dcm_obj = pydicom.read_file(dcm_path)
        photometric = str(dcm_obj[0x28, 0x04])
        array = dcm_obj.pixel_array
        if photometric == "MONOCHROME1":
            info_func = np.iinfo if np.issubdtype(array.dtype, np.integer) else np.finfo
            array = info_func(array.dtype).max - array
        image_mean, image_std = np.mean(array), np.std(array)
        
        impo_x, impo_y, impo_z = [float(v) for v in dcm_obj[0x20, 0x32]]
        plane = get_image_plane(dcm_obj[0x20, 0x37])
        
        patient_id = str(dcm_obj[0x0010, 0x0020].value).strip().zfill(5)
        series_desc = str(dcm_obj[0x0008, 0x103e].value).strip()
        row = dict(dcm_path=dcm_path, BraTS21ID=patient_id, series_description=series_desc,
                   image_mean=image_mean, image_std=image_std,
                   plane=plane,
                   image_position_x=impo_x, image_position_y=impo_y, image_position_z=impo_z)
        return row


def get_meta_from_glob(img_dir: Path, glob=None) -> pd.DataFrame:
    dcm_ds = DICOMMetaLoader(img_dir, glob)
    dcm_dl = DataLoader(dcm_ds, batch_size=256, num_workers=6)
    df = pd.DataFrame()
    for item in tqdm(dcm_dl):
        chunks = pd.DataFrame.from_dict({k:np.asarray(v) for k, v in item.items()})
        df = pd.concat([df, chunks], ignore_index=True)
    return df


# df_train = get_meta_from_glob(data_dir / "train")

In [None]:
# # To categorical data by mapping, 

# df_train.loc[:, "plane"] = df_train.loc[:, "plane"].map(planes_map)
# df_train.loc[:, "series_description"] = df_train.loc[:, "series_description"].map(mri_series_map)

In [None]:
def keep_non_blank(df: pd.DataFrame):
    """
    Keep data containing non blank image.
    :params:
        df: pd.DataFrame, requires "image_std" and "image_mean" in df.columns.
    :returns:
        pd.DataFrame: filtered DataFrame
    """
    df = df.loc[(df["image_std"] > 0) & (df["image_mean"] > 0)]
    return df


# display(len(df_train))
# df_train = keep_non_blank(df_train)
# display(len(df_train))

In [None]:
def drop_by_id(df: pd.DataFrame, ids: List[Union[int, str]]):
    ids = [str(s).zfill(5) for s in ids]
    df = df.loc[~(df["BraTS21ID"].isin(ids))].reset_index(drop=True)
    return df


# drop_ids = "00109, 00123, 00709".split(", ")
# df_train = drop_by_id(df_train, drop_ids)
# labels_train = drop_by_id(labels_train, drop_ids)

In [None]:
def count_values(df: pd.DataFrame):
    groupby = df.groupby(["BraTS21ID", "series_description"])
    count = groupby.count()
    display(count["dcm_path"].describe())
    display(count.loc[count["dcm_path"] == count["dcm_path"].min(), "dcm_path"])
    display(count.loc[count["dcm_path"] == count["dcm_path"].max(), "dcm_path"])


# display(df_train.describe())
# count_values(df_train)
# look_one_dcm("00571", data_dir / "train", mri_series[0])
# look_one_dcm("00818", data_dir / "train", mri_series[0])
# look_one_dcm("00012", data_dir / "train", mri_series[3])

In [None]:
class MRIVoxelDataset(Dataset):
    
    def __init__(self, meta_df: pd.DataFrame, label_df: Optional[pd.DataFrame] = None,
                 voxel_size: Union[int, Tuple[int, int], Tuple[int, int, int]] = (64, 256, 256),
                 including_series: np.ndarray = np.array(list(mri_series.keys()), dtype=np.int64)):
        """
        :params:
            :meta_df: required columns: [dcm_path, BraTS21ID, series_description, plane,
                                         image_position_x, image_position_y, image_position_z]
            :label_df(Optional): required columns: [BraTS21ID, MGMT_value]
            :voxel_size: if int, the D, H, W will be set to the same;
                         if (int, int), D by voxel_size[0], H, W by voxel_size[1];
                         if (int, int, int), D, H, W will be set respectively.
        """
        super(MRIVoxelDataset, self).__init__()
        self.meta_df,self.label_df,self.voxel_size = meta_df,label_df,voxel_size
        self.including_series = including_series
        if isinstance(self.voxel_size, int):
            self.voxel_size = tuple(self.voxel_size for _ in range(3))
        elif isinstance(self.voxel_size, tuple):
            if len(self.voxel_size) == 2:
                self.voxel_size = (self.voxel_size[0], self.voxel_size[1], self.voxel_size[1])
        self.meta_df = self.meta_df.loc[self.meta_df["series_description"].isin(self.including_series)].copy()
        if self.label_df is None:
            self.label_df = pd.concat([pd.DataFrame.from_dict(
                dict(BraTS21ID=self.meta_df["BraTS21ID"].unique())
            )], axis=1)
            self.label_df.loc[:, "BraTS21ID"] = self.label_df["BraTS21ID"].map(lambda i: str(i).zfill(5))
            labels = np.full_like(self.label_df["BraTS21ID"].values, np.nan, dtype=np.float64)
            self.label_df.loc[:, "MGMT_value"] = labels

        new_label_df = pd.DataFrame()
        for v in self.meta_df["series_description"].unique():
            series_desc = pd.DataFrame({
                    "series_description": np.full((len(self.label_df)), v, dtype=np.int64)
                 })
            df = self.label_df.reset_index(drop=True)
            df = pd.concat([df, series_desc], axis=1)
            new_label_df = pd.concat([new_label_df, df], axis=0)
        self.label_df = new_label_df.reset_index(drop=True)

        retrievables = list()
        for i in range(len(self.label_df)):
            row = self.label_df.iloc[i]
            flag = is_retrievable(self.meta_df, row.BraTS21ID, row.series_description)
            if not flag:
                print(row.BraTS21ID, row.series_description)
            retrievables.append(flag)
        retrievables = np.asarray(retrievables)
        self.label_df = self.label_df.iloc[retrievables]
        print(f"Got {len(self)} samples in dataset.")

    def __len__(self): return len(self.label_df)
    
    def __getitem__(self, idx):
        row = self.label_df.iloc[idx]
        voxel, plane = get_voxel_by_id_series(self.meta_df, row["BraTS21ID"], row["series_description"], self.voxel_size[1:])
        voxel = torch.tensor(voxel, dtype=torch.float32).unsqueeze(0).unsqueeze(0)  # [N, C, D, H, W]
        voxel = F.interpolate(voxel, self.voxel_size, mode="trilinear", align_corners=False)
        voxel = voxel.squeeze(0)
        label = torch.tensor([row["MGMT_value"]], dtype=torch.float32)
        plane = torch.tensor(plane, dtype=torch.int64)
        series_desc = torch.tensor(row["series_description"], dtype=torch.int64)
        return voxel, label, (series_desc, plane)


# if DEBUG:
#     ds_ = MRIVoxelDataset(df_train, labels_train, (64, 128), np.array([0], dtype=np.int64))
#     dl_ = DataLoader(ds_, batch_size=4, num_workers=4)
#     for voxel, label, (series_desc, plane) in dl_:
#         print(voxel.shape, label.shape, plane.shape, series_desc.shape)
#         print(voxel.dtype, label.dtype, plane.dtype, series_desc.dtype)
#         break

In [None]:
NormLayerClass = Type
ActivationLayerClass = Type


class SqueezeExcitation(nn.Module):
    
    def __init__(self, in_channels):
        super(SqueezeExcitation, self).__init__()
        self.in_channels = in_channels
        self.squeeze_channels = self.in_channels // 4
        
        self.seq = nn.Sequential(
            nn.AdaptiveAvgPool3d(1),
            nn.Conv3d(self.in_channels, self.squeeze_channels, 1),
            nn.ReLU(inplace=True),
            nn.Conv3d(self.squeeze_channels, self.in_channels, 1),
            nn.Hardsigmoid(inplace=True),
        )
    
    def forward(self, x):
        scale = self.seq(x)
        out = scale * x
        return out


class ConvBNActivation(nn.Module):
    
    def __init__(self, conv_config: dict,
                 norm_layer_cls: NormLayerClass = nn.BatchNorm3d,
                 activation_layer_cls: ActivationLayerClass = nn.ReLU,
                 use_se: bool = False,
        ) -> None:
        super(ConvBNActivation, self).__init__()
        layers = list()
        layers.append(nn.Conv3d(**conv_config))
        layers.append(norm_layer_cls(conv_config["out_channels"]))
        layers.append(activation_layer_cls(inplace=True))
        if use_se:
            layers.append(SqueezeExcitation(conv_config["out_channels"]))
        self.seq = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.seq(x)
    
    @staticmethod
    def config(in_channels: int,
               out_channels: int,
               kernel_size: Union[int, Tuple[int, int, int]],
               stride: Union[int, Tuple[int, int, int]] = 1,
               padding: Union[int, Tuple[int, int, int]] = 0,
               dilation: Union[int, Tuple[int, int, int]] = 1,
               groups: int = 1,
               bias: bool = True,
               padding_mode: str = 'zeros',
        ) -> dict:
        return locals()


class BottleNeck(nn.Module):
    
    def __init__(self, residual_config: dict):
        super(BottleNeck, self).__init__()
        self.residual_config = residual_config
        layers = list()
        layers.append(ConvBNActivation(ConvBNActivation.config(
            self.residual_config["in_channels"],
            self.residual_config["expand_channels"],
            1,
            1,
            0,
        ), self.residual_config["norm_layer_cls"], self.residual_config["activation_layer_cls"]))
        layers.append(ConvBNActivation(ConvBNActivation.config(
            self.residual_config["expand_channels"],
            self.residual_config["expand_channels"],
            self.residual_config["kernel_size"],
            self.residual_config["stride"],
            self.residual_config["padding"],
            groups=self.residual_config["expand_channels"],
        ), self.residual_config["norm_layer_cls"],
           self.residual_config["activation_layer_cls"],
           self.residual_config["use_se"]))
        layers.append(ConvBNActivation(ConvBNActivation.config(
            self.residual_config["expand_channels"],
            self.residual_config["out_channels"],
            1,
            1,
            0,
        ), self.residual_config["norm_layer_cls"], nn.Identity))
        self.seq = nn.Sequential(*layers)
        # The shortcut: Same as nn.Linear if channels at last dim.
        self.shortcut = nn.Conv3d(self.residual_config["in_channels"], self.residual_config["out_channels"], 1)
    
    def forward(self, x):
        post_seq = self.seq(x)
        x = self.shortcut(x)
        x = F.interpolate(x, post_seq.shape[-3:], mode="trilinear", align_corners=False)
        return x + post_seq
    
    @staticmethod
    def config(in_channels: int,
               out_channels: int,
               expand_channels: int,
               kernel_size: Union[int, Tuple[int, int, int]],
               stride: Union[int, Tuple[int, int, int]] = 1,
               padding: Union[int, Tuple[int, int, int]] = 0,
               norm_layer_cls: NormLayerClass = nn.BatchNorm3d,
               activation_layer_cls: ActivationLayerClass = nn.Hardswish,
               use_se: bool = False,
    ) -> dict:
        return locals()


class NetFeatures(nn.Module):
    
    def __init__(self, in_channels, out_channels, residual_config_list: List[dict]):
        super(NetFeatures, self).__init__()
        self.in_channels,self.out_channels = in_channels,out_channels
        self.residual_config_list = residual_config_list

        first_conv_out_channels = self.residual_config_list[0]["in_channels"]
        self.first_conv = ConvBNActivation(ConvBNActivation.config(
            self.in_channels, first_conv_out_channels, 3, 2, 1), activation_layer_cls=nn.ReLU)
        residual_layers = list()
        for conf in self.residual_config_list:
            residual_layers.append(BottleNeck(conf))
        self.residual_block = nn.Sequential(*residual_layers)
        last_conv_in_channels = self.residual_config_list[-1]["out_channels"]
        self.last_conv = ConvBNActivation(ConvBNActivation.config(last_conv_in_channels, self.out_channels, 1),
                                          nn.BatchNorm3d,
                                          nn.Hardswish,
                                          use_se=True)
    
    def forward(self, x):
        x = self.first_conv(x)
        x = self.residual_block(x)
        x = self.last_conv(x)
        return x


class ConcatEmbeddingLinear(nn.Module):
    
    def __init__(self, in_features: int, out_features: int, n_embeddings: int, embed_dim: Optional[int] = None):
        super(ConcatEmbeddingLinear, self).__init__()
        self.in_features,self.out_features = in_features,out_features
        self.n_embeddings,self.embed_dim = n_embeddings,embed_dim
        if self.embed_dim is None: self.embed_dim = self.in_features
        
        self.emb = nn.Embedding(self.n_embeddings, self.embed_dim)
        self.fc = nn.Linear(self.in_features + self.embed_dim, self.out_features)
    
    def forward(self, x, idx_emb):
        emb_out = self.emb(idx_emb)
        concatenated = torch.cat([emb_out, x], dim=-1)
        out = self.fc(concatenated)
        return out


class Net(nn.Module):
    
    def __init__(self, in_channels, feature_out_channels, hidden_features, n_classes, n_series, n_planes,
                 residual_config_list: List[dict]) -> None:
        super(Net, self).__init__()
        self.in_channels,self.feature_out_channels,self.n_classes = in_channels,feature_out_channels,n_classes
        self.hidden_features = hidden_features
        self.n_planes,self.n_series = n_planes,n_series
        self.residual_config_list = residual_config_list
        
        self.features = NetFeatures(self.in_channels, self.feature_out_channels, self.residual_config_list)
        self.pool_flat_linear = nn.Sequential(nn.AdaptiveAvgPool3d(1),
            nn.Flatten(),
            nn.Linear(self.features.out_channels, self.hidden_features),
        )
        self.emb_series = ConcatEmbeddingLinear(self.hidden_features, self.hidden_features, self.n_series)
        self.emb_planes = ConcatEmbeddingLinear(self.hidden_features, self.hidden_features, self.n_planes)
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(self.hidden_features, self.n_classes)
        )

    def forward(self, x, idx_series, idx_planes):
        x = self.features(x)
        x = self.pool_flat_linear(x)
        x = self.emb_series(x, idx_series)
        x = self.emb_planes(x, idx_planes)
        out = self.classifier(x)
        return out


def get_residual_config_backup():
    # Like MobileNetV3 small, although it may be too deep.
    # in_channels, out_channels, expand_channels, kernel_size, stride, padding, norm, activation, use_se
    conf = list()
    conf.append(BottleNeck.config(16, 16, 16, 3, 2, 1, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(16, 24, 72, 3, 2, 1, nn.BatchNorm3d, nn.ReLU, False))
    conf.append(BottleNeck.config(24, 24, 88, 3, 1, 1, nn.BatchNorm3d, nn.ReLU, False))
    conf.append(BottleNeck.config(24, 40, 96, 5, 2, 2, nn.BatchNorm3d, nn.ReLU, True))
    conf.append(BottleNeck.config(40, 40, 240, 5, 1, 2, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(40, 40, 240, 5, 1, 2, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(40, 48, 120, 5, 1, 2, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(48, 48, 144, 5, 1, 2, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(48, 96, 288, 5, 2, 2, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(96, 96, 576, 5, 1, 2, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(96, 96, 576, 5, 1, 2, nn.BatchNorm3d, nn.Hardswish, True))
    return conf


def get_residual_config():
    # in_channels, out_channels, expand_channels, kernel_size, stride, padding, norm, activation, use_se
    conf = list()
    conf.append(BottleNeck.config(16, 16, 16, 3, 2, 1, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(16, 24, 72, 3, 2, 1, nn.BatchNorm3d, nn.ReLU, False))
    conf.append(BottleNeck.config(24, 24, 88, 3, 1, 1, nn.BatchNorm3d, nn.ReLU, False))
    conf.append(BottleNeck.config(24, 40, 96, 5, 2, 2, nn.BatchNorm3d, nn.ReLU, True))
    conf.append(BottleNeck.config(40, 40, 240, 5, 1, 2, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(40, 80, 288, 5, 2, 2, nn.BatchNorm3d, nn.Hardswish, True))
    conf.append(BottleNeck.config(80, 96, 576, 5, 1, 2, nn.BatchNorm3d, nn.Hardswish, True))
    return conf


if DEBUG:
    t_ = torch.ones(4, 1, 64, 256, 256, dtype=torch.float32)
    l_ = torch.ones(4, 1, dtype=torch.float32)
    s_ = torch.ones(4, dtype=torch.int64)
    p_ = torch.ones(4, dtype=torch.int64)
    config_ = get_residual_config()
    net_ = Net(1, 512, 512, 1, 4, 4, config_).to(dtype=torch.float32)
    print(net_)
    with torch.no_grad():
        o_ = net_(t_, s_, p_)
        loss_ = F.binary_cross_entropy_with_logits(o_, l_)
        print(loss_.item())

In [None]:
def load_model(path, *net_args, **net_kwargs):
    net = Net(*net_args, **net_kwargs)
    state_dict = torch.load(path)
    net.load_state_dict(state_dict)
    return net

In [None]:
class RandomInvert3D(AugmentationBase3D):
    
    def __init__(
        self,
        max_val: Union[float, torch.Tensor] = torch.tensor(1.0),
        return_transform: bool = False,
        same_on_batch: bool = False,
        p: float = 0.5,
    ) -> None:
        super(RandomInvert3D, self).__init__(
            p=p, return_transform=return_transform, same_on_batch=same_on_batch, p_batch=1.0
        )
        self.max_val = max_val

    def __repr__(self) -> str:
        return self.__class__.__name__ + f"({super().__repr__()})"
    
    def generate_parameters(self, batch_shape: torch.Size):
        return dict(max_val=torch.as_tensor(self.max_val), batch_shape=torch.as_tensor(batch_shape))
    
    def compute_transformation(self, input, params: Dict[str, torch.Tensor]):
        return self.identity_matrix(input)

    def apply_transform(
        self, input: torch.Tensor,
        params: Dict[str, torch.Tensor],
        transform: Optional[torch.Tensor] = None
    ) -> torch.Tensor:
        max_val = params["max_val"]
        return invert(input, max_val)

    
Numeric = Union[int, float]


class RandomShift3D(nn.Module):
    
    def __init__(self,
                 shift_limit: Union[Numeric, List[Numeric], Tuple[Numeric, Numeric]] = 0.125,
                 p: float = 0.5):
        super(RandomShift3D, self).__init__()
        self.shift_limit,self.p = shift_limit,p
        if isinstance(self.shift_limit, (float, int)):
            self.shift_limit = np.array(((-abs(self.shift_limit), abs(self.shift_limit)),
                                         (-abs(self.shift_limit), abs(self.shift_limit)),
                                         (-abs(self.shift_limit), abs(self.shift_limit)),), dtype=np.float64)
        elif isinstance(self.shift_limit, (tuple, list)):
            self.shift_limit = np.array(self.shift_limit, dtype=np.float64)
        else:
            raise TypeError("shift_limit expects ")
        self.shift_limit = np.clip(self.shift_limit, -1., 1.)
        if self.shift_limit.shape[0] == 1:
            self.shift_limit = np.concatenate([self.shift_limit, self.shift_limit, self.shift_limit])
        assert self.shift_limit.shape == (3, 2), f""
    
    def forward(self, tensor):
        assert len(tensor.shape) == 5, f"Requires 5 dims torch.Tensor[N, C, D, H, W], got {tensor.shape}"
        n, c, d, h, w = tensor.shape
        apply_proba = np.random.uniform(size=(n,))
        shift_size = np.random.uniform(low=self.shift_limit[:, 0], high=self.shift_limit[:, 1], size=(n, 3))
        shift_d, shift_h, shift_w = (np.array(tensor.shape[2:])[np.newaxis, :] * shift_size).astype(np.int64).T
        out = torch.zeros_like(tensor)
        for i in range(n):
            if apply_proba[i] <= self.p:
                out[i, :,
                    max(0, 0+shift_d[i]):min(d, d+shift_d[i]),
                    max(0, 0+shift_h[i]):min(h, h+shift_h[i]),
                    max(0, 0+shift_w[i]):min(w, w+shift_w[i]),
                ] = tensor[i, :,
                    max(0, 0-shift_d[i]):min(d, d-shift_d[i]),
                    max(0, 0-shift_h[i]):min(h, h-shift_h[i]),
                    max(0, 0-shift_w[i]):min(w, w-shift_w[i]),
                ]
            else:
                out[i] = tensor[i]  # Unchanged.
        return out


def get_augmentation(split="train") -> nn.Sequential:
    """
    Get Sequence of augmentations.
    :return: nn.Sequential: requires input: torch.FloatTensor[N, C, D, H, W] in range[0., 1.]
    """
    if split in ("test", "val"):
        aug_list = nn.Sequential()
    elif split == "train":
        aug_list = nn.Sequential(
            K.augmentation.RandomAffine3D(degrees=(5., 5., 90.), translate=(.05, .05, .05), scale=(.98, 1.02), p=.3),
            K.augmentation.RandomHorizontalFlip3D(p=.3),
#             K.augmentation.RandomVerticalFlip3D(p=.1),
#             K.augmentation.RandomRotation3D((0., 0., 90.), p=1.0)
            RandomShift3D(shift_limit=0.2, p=.3),
            RandomInvert3D(p=.1),
        )
    else:
        raise ValueError(f"Argument `split` must in {{'train', 'val', 'test'}}, got {split}")
    aug_list.requires_grad_(False)
    return aug_list


def plot_grid(t: torch.tensor) -> None:
    """
    Plot image by middle index
    :argument: t: torch.Tensor[N, C, D, H, W]
    """
    from itertools import product
    a = int(np.ceil(np.sqrt(len(t))))
    fig, axes = plt.subplots(a, a, figsize=(14, 14))
    for nth, (i, j) in zip(range(len(t)), product(range(a), range(a))):
        nth_img = t[nth].squeeze(0).numpy()
        nth_img_mid = nth_img[len(nth_img) // 2]
        mean, std = np.mean(nth_img), np.std(nth_img)
        axes[i, j].imshow(nth_img_mid, cmap=plt.cm.gray)
        axes[i, j].set_title(f"mean: {mean:.4f}, std: {std:.4f}")
        axes[i, j].set_axis_off()
    plt.show()


# # Check the effect of augmentation.
# ds_ = MRIVoxelDataset(df_train, labels_train, (64, 128))
# dl_ = DataLoader(ds_, batch_size=16, shuffle=True, num_workers=6)
# aug_ = get_augmentation(split="train")
# for voxel, label, (_, _) in dl_:
#     voxel = aug_(voxel)
#     plot_grid(voxel)
#     break

In [None]:
# Parameters to construct Net
in_channels = 1
feature_out_channels = 576
hidden_features = 512
n_classes = 1
n_series = len(mri_series)
n_planes = len(planes)
residual_config = get_residual_config()

# Training Parameters
batch_size = 16
epochs = 18
lr = 3e-4
num_workers = 6
weight_decay = 1e-5

In [None]:
voxel_size = (64, 64, 64)
including_series = np.array([
    mri_series_map["FLAIR"],
    mri_series_map["T1w"],
    mri_series_map["T1wCE"],
    mri_series_map["T2w"],
], dtype=np.int64)
NumpyNDArray = Iterable

def get_dataset_in_pipeline(img_dir: Path, including_series: NumpyNDArray[np.int64],
                            voxel_size: Union[int, Tuple[int, int], Tuple[int, int, int]] = (64, 256, 256),
                            glob: str = None,
                            df_labels: pd.DataFrame = None, drop_ids: List[str] = None):
    df_meta = get_meta_from_glob(img_dir, glob)
    df_meta.loc[:, "plane"] = df_meta.loc[:, "plane"].map(planes_map)
    df_meta.loc[:, "series_description"] = df_meta.loc[:, "series_description"].map(mri_series_map)
    df_meta = keep_non_blank(df_meta)
    if df_labels is not None:
        df_labels = drop_by_id(df_labels, drop_ids)
        df_meta = drop_by_id(df_meta, drop_ids)
    ds = MRIVoxelDataset(df_meta, df_labels, voxel_size, including_series)
    return ds

In [None]:
def is_retrievable(df: pd.DataFrame,
                   patient_id: str,
                   series_desc_idx: int):
    retrieved_idx = (df["BraTS21ID"].eq(patient_id)) & (df["series_description"].eq(series_desc_idx))
    return True if retrieved_idx.sum() > 0 else False


def get_voxel_by_id_series(df: pd.DataFrame,
                           patient_id: str,
                           series_desc_idx: int = 0,
                           size: Union[int, Tuple[int, int]] = 256) -> Tuple[np.ndarray, int]:
    """
    :params:
        :df: required columns: [dcm_path, BraTS21ID, series_description, plane,
                                image_position_x, image_position_y, image_position_z]
    """
    size = (int(size), int(size)) if isinstance(size, (int, float)) else size
    retrieved_idx = (df["BraTS21ID"].eq(patient_id)) & (df["series_description"].eq(series_desc_idx))
    assert retrieved_idx.sum() > 0, "Nothing retrived."
    retrieved_df = df.loc[retrieved_idx].copy()
    plane = retrieved_df["plane"].unique()
    assert len(plane) == 1, "Different plane in a folder."
    img_pos_cols = [c for c in retrieved_df.columns if c.startswith("image_position_")]
    img_pos_stds = np.array([retrieved_df[c].std() for c in img_pos_cols])
    img_pos_argsort = np.argsort(img_pos_stds)[::-1]
    sorted_df = retrieved_df.sort_values([img_pos_cols[i] for i in img_pos_argsort], ascending=True, ignore_index=True)
    voxel_stack = list()
    for row in sorted_df.itertuples():
        dcm_obj = pydicom.read_file(row.dcm_path)
        array = dcm_obj.pixel_array
        array = cv.resize(array, size)
        dinfo = np.iinfo(array.dtype) if np.issubdtype(array.dtype, np.integer) else np.finfo(array.dtype)
        array = (array / dinfo.max).astype(np.float32)  # like (a / 255) if a.dtype is uint8
        if dcm_obj[0x0028, 0x0004] == "MONOCHROME1":
            array = dinfo.max - array
        voxel_stack.append(array)
    voxel = np.stack(voxel_stack)
    voxel = (voxel - np.min(voxel)) / max(np.max(voxel), 1e-8)  # min-max normalization
    return voxel, plane[0]


def plot_voxel(voxel, max_n_plots=10, cols=10):
    actual_n_plots = min(max_n_plots, len(voxel))
    rows = int(np.ceil(actual_n_plots / cols))
    fig, axes = plt.subplots(rows, cols, figsize=(4 * cols, 4 * rows), tight_layout=True)
    for i in range(actual_n_plots):
        axes[i // cols, i % cols].imshow(voxel[i, :, :], cmap=plt.cm.gray)
        axes[i // cols, i % cols].set_axis_off()
    plt.show()

In [None]:
ds_test = get_dataset_in_pipeline(data_dir / "test", including_series, voxel_size)

In [None]:
@torch.no_grad()
def inference_by_models(loader: DataLoader, models_list: List[Net], aug_list, device):

    df_copy: pd.DataFrame = loader.dataset.label_df.copy()
    batch_size = loader.batch_size
    for i, model in enumerate(models_list):
        df_copy.loc[:, f"MGMT_value_{i}"] = np.full_like(df_copy["BraTS21ID"], np.nan, dtype=np.float64)
        model.to(device)
        model.eval()
        for n, (voxel, _, (series_desc, plane))  in tqdm(enumerate(loader),
                                                         desc=f"Inferencing with model idx {i}", total=len(loader)):
            voxel, series_desc, plane = aug_list(voxel).to(device), series_desc.to(device), plane.to(device)
            out = model(voxel, series_desc, plane)
            pred_proba = torch.sigmoid(out.detach())[:, 0]
            df_copy.iloc[n*batch_size:n*batch_size+len(voxel),
                         df_copy.columns.get_loc(f"MGMT_value_{i}")] = pred_proba.cpu().numpy()
    df = df_copy.groupby("BraTS21ID").mean()
    use_cols = [s for s in df.columns if s.startswith("MGMT_value_")]
    df["MGMT_value"] = df.loc[:, use_cols].mean(axis=1)
    df = df.reset_index()
    submission = df.loc[:, ["BraTS21ID", "MGMT_value"]].copy()
    return submission


aug_list = get_augmentation(split="test")
models_path = list(sorted(models_dir.glob(f"{model_name}*whole*best-state_dict.pt")))
if USE_CROSS_VALIDATION:
    models_path.extend(list(sorted(models_dir.glob(f"{model_name}*fold*best-state_dict.pt"))))
print(models_path)
models_list = [load_model(path,
                          in_channels,
                          feature_out_channels,
                          hidden_features,
                          n_classes,
                          n_series,
                          n_planes,
                          residual_config,
) for path in models_path]
dl_test = DataLoader(ds_test, batch_size=batch_size, num_workers=num_workers)
submission_mob = inference_by_models(dl_test, models_list, aug_list, device)
submission_mob.to_csv("submission_mob.csv", index=False)

### [TF] Simple Prediction with 3DCNN

In [None]:
# Import dependencies 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline

import os, sys, glob, gc 
import math, re, random, time
from tqdm import tqdm 
import cv2, pydicom

from sklearn.model_selection import StratifiedKFold 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
# Params
config = {
    'data_path': '../input/rsna-miccai-brain-tumor-radiogenomic-classification',
    'model_path': '../input/keras-3d-efficientnet-imagenet-weights-b0b7/efficientnet3d_keras/efficientnet-b0_inp_channel_3_tch_0_top_False.h5',
    'input_path': '../input', 
    'output_path': './',
    'num_3d': 16,
    'img_size': 64,
    'n_gradients': 16,
    'nfolds': 5, 
    'batch_size': 16,
    'learning_rate': 1e-4,
    'num_epochs': 10
}

AUTO = tf.data.AUTOTUNE

# For reproducible results    
def seed_all(s):
    random.seed(s)
    np.random.seed(s)
    tf.random.set_seed(s)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['PYTHONHASHSEED'] = str(s) 
global_seed = 42
seed_all(global_seed)

input_modality = ["FLAIR", "T1w", "T1wCE", "T2w"]
modality_list = ["FLAIR", "T1w", "T2w"] 

train_folder = os.path.join(config['data_path'], 'train')
test_folder = os.path.join(config['data_path'], 'test')
sample_submission_path = os.path.join(config['data_path'], 'sample_submission.csv')

train_df = pd.read_csv(os.path.join(config['data_path'], 'train_labels.csv')); print(train_df.shape)
sample_df = pd.read_csv(sample_submission_path); print(sample_df.shape)
test_df = sample_df.copy(); print(test_df.shape)

In [None]:
# Getting each folder paths of BraTS21ID

train_df['imfolder'] = ['{:05d}'.format(s) for s in train_df['BraTS21ID']]
train_df['path'] = [os.path.join(train_folder, s) for s in train_df['imfolder']]
train_df

In [None]:
# Counting the files in FLAIR folder

#input_modality = ["FLAIR", "T1w", "T1wCE", "T2w"] 
input_modality = ["FLAIR"] 
for modality in input_modality:   
    modality_count = []
    for i in range(len(train_df)):
        sample_folder = train_df['path'].iloc[i]
        modality_folder = os.path.join(sample_folder, modality)
        if os.path.exists(modality_folder):
            modality_count.append(len(os.listdir(modality_folder)))
        else:
            modality_count.append(0)
        
    train_df[f'{modality}_count'] = modality_count    
    
train_df = train_df.query("FLAIR_count >= 16").reset_index()
    
train_df

In [None]:
# k-fold (n=5) for cross-validation (I conducted hold-out validation in this notebook, though.)

skf = StratifiedKFold(n_splits=config['nfolds'], shuffle=True, random_state=global_seed)

for index, (train_index, val_index) in enumerate(skf.split(X=train_df.index, y=train_df.MGMT_value)):
    train_df.loc[val_index, 'fold'] = index
    
print(train_df.groupby(['fold', train_df.MGMT_value]).size())

In [None]:
test_df['imfolder'] = ['{:05d}'.format(s) for s in test_df['BraTS21ID']]
test_df['path'] = [os.path.join(test_folder, s) for s in test_df['imfolder']]
test_df

In [None]:
#input_modality = ["FLAIR", "T1w", "T1wCE", "T2w"] 
input_modality = ["FLAIR"] 

for modality in input_modality:   
    modality_count = []
    for i in range(len(test_df)):
        sample_folder = test_df['path'].iloc[i]
        modality_folder = os.path.join(sample_folder, modality)
        if os.path.exists(modality_folder):
            modality_count.append(len(os.listdir(modality_folder)))
        else:
            modality_count.append(0)
        
    test_df[f'{modality}_count'] = modality_count    
    
test_df = test_df.query("FLAIR_count >= 16").reset_index()

test_df

In [None]:
def get_img_path_3d(df, index, mri_type='FLAIR'):
    patient_id = df['BraTS21ID'][index]
    patient_path = df['path'][index]
    modality_path = os.path.join(patient_path, mri_type)
    total_img_num = df[f'{mri_type}_count'][index]
    
    files = sorted(glob.glob(f"{modality_path}/*.dcm"), 
                   key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])
    
    mid_num = total_img_num // 2
    num_3d2 = config['num_3d'] // 2
    start_idx = max(0, mid_num - num_3d2)
    end_idx = min(len(files), mid_num + num_3d2)
    
    target_file_paths = files[start_idx:end_idx]
    
    return target_file_paths

@tf.function
def preprocessing_img(img, threashold=5):
    img = img - tf.math.reduce_mean(img)
    img = img / tf.math.reduce_variance(img)
    img = img - tf.math.reduce_min(img)
    img = tf.where(img<threashold, img, threashold)
    return img

    
class ImageGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, mri_type='FLAIR'):
        self.df = df
        self.mri_type = mri_type

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        paths = get_img_path_3d(self.df, index)
        img_list = []
        for path in paths:
            dicom = pydicom.read_file(path)
            img = dicom.pixel_array
            img = np.expand_dims(img, -1)
            #img = np.repeat(img, 3, axis=-1)
            img = tf.convert_to_tensor(img, dtype=tf.float32)
            img = tf.image.resize(img, [config['img_size'], config['img_size']])
            img = tf.expand_dims(img, -2)
            img_list.append(img)
        img_3d = tf.concat(img_list, axis=-2)
        return img_3d
    
    
def parse(x):
    result = tf.io.parse_tensor(x, out_type=tf.float32)
    result = tf.reshape(result, [config['img_size'], config['img_size'], config['num_3d'], 1])
    return result


def build_3d_train_dataloader(train_df, p_fold=0):
    p_train = train_df.query(f'fold != {p_fold}').reset_index(drop=True)
    p_valid = train_df.query(f'fold == {p_fold}').reset_index(drop=True)

    AUTOTUNE = tf.data.experimental.AUTOTUNE

    train_datasets = []
    for mode, df in zip(['train', 'valid'], [p_train, p_valid]):
        i_g = ImageGenerator(df)
        img_ds = tf.data.Dataset.from_generator(lambda: map(tuple, i_g),
                                                output_types=(tf.float32),
                                                output_shapes=(tf.TensorShape([config['img_size'], config['img_size'], config['num_3d'], 1])),
                                                 )
        
        serial_ds = img_ds.map(tf.io.serialize_tensor)

        if not os.path.exists(f'{mode}-{p_fold}-img.tfrec'):
            img_tfrec = tf.data.experimental.TFRecordWriter(f'{mode}-{p_fold}-img.tfrec')
            img_tfrec.write(serial_ds)
        serial_ds = tf.data.TFRecordDataset(f'{mode}-{p_fold}-img.tfrec')
        serial_ds = serial_ds.map(parse, num_parallel_calls=AUTOTUNE)

        labels = df['MGMT_value']
        label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(labels, tf.int32))

        ds = tf.data.Dataset.zip((img_ds, label_ds))
        
        ds = ds.cache(filename=f'./cache.tf-{mode}-{p_fold}-data')
        if mode == 'train':
            train_count = len(df)
            ds = ds.shuffle(buffer_size=train_count)
        ds = ds.batch(config['batch_size'], drop_remainder=True)
        ds = ds.prefetch(buffer_size=AUTOTUNE)
        train_datasets.append(ds)

    return train_datasets

In [None]:
# Building Dataset
p_fold = 0

train_datasets = build_3d_train_dataloader(train_df, p_fold=p_fold)
train_ds = train_datasets[0]
valid_ds = train_datasets[1]

for d, l in train_ds.take(1):
    print('Train Data shape: ', d.shape)
    print('Train Label shape: ', l.shape)
    
for d, l in valid_ds.take(1):
    print('Valid Data shape: ', d.shape)
    print('Valid Label shape: ', l.shape)

In [None]:
# TestDataset without Labels
def build_3d_test_dataloader(test_df):
    AUTOTUNE = tf.data.experimental.AUTOTUNE

    i_g = ImageGenerator(test_df)
    img_ds = tf.data.Dataset.from_generator(lambda: map(tuple, i_g),
                                         output_types=(tf.float32),
                                         output_shapes=(tf.TensorShape([config['img_size'], config['img_size'], config['num_3d'], 1])),
                                                 )
    serial_ds = img_ds.map(tf.io.serialize_tensor)

    if not os.path.exists('test-img.tfrec'):
        img_tfrec = tf.data.experimental.TFRecordWriter('test-img.tfrec')
        img_tfrec.write(serial_ds)
    serial_ds = tf.data.TFRecordDataset('test-img.tfrec')
    test_ds = serial_ds.map(parse, num_parallel_calls=AUTOTUNE)

    test_ds = test_ds.cache(filename='./cache.tf-test-data')
    test_ds = test_ds.batch(config['batch_size'], drop_remainder=False)
    test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

    return test_ds

In [None]:
test_ds = build_3d_test_dataloader(test_df)

for d in test_ds.take(1):
    print('Test Data shape: ', d.shape)

In [None]:
def get_3d_model(width=config['img_size'], height=config['img_size'], depth=config['num_3d']):
    """Build a 3D convolutional neural network model."""

    inputs = keras.Input((width, height, depth, 1))
    
    x = layers.Conv3D(filters=32, kernel_size=3, padding='same', activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Conv3D(filters=32, kernel_size=3, padding='same', activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Conv3D(filters=64, kernel_size=3, padding='same', activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.01)(x)
    
    x = layers.Conv3D(filters=128, kernel_size=3, padding='same', activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.02)(x)

    x = layers.Conv3D(filters=256, kernel_size=3, padding='same', activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.03)(x)

    x = layers.Conv3D(filters=512, kernel_size=3, padding='same', activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.04)(x)

    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(units=1024, activation="relu")(x)
    x = layers.Dropout(0.08)(x)

    outputs = layers.Dense(units=1, activation="sigmoid")(x)

    model = keras.Model(inputs, outputs, name="3dcnn")

    return model


model = get_3d_model()
model.summary()

In [None]:
class BrainTumorModel3D(tf.keras.Model):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)        
        self.cnn = get_3d_model()
        
    @tf.function
    def call(self, input_tensor, training=False, **kwargs):
        x = self.cnn(input_tensor)
        return x
    
    def build_graph(self, raw_shape):
        x = tf.keras.layers.Input(shape=raw_shape)
        return tf.keras.Model(inputs=[x], outputs=self.call(x))


if tf.test.is_gpu_available():
    device_name = tf.test.gpu_device_name()
else:
    device_name = 'cpu:0'

with tf.device(device_name):
    model = BrainTumorModel3D()

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=config['learning_rate'])

loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)

train_acc_metric = tf.keras.metrics.BinaryAccuracy()
val_acc_metric = tf.keras.metrics.BinaryAccuracy()

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=config['output_path'],
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True)

@tf.function
def train_step(x, y):
    
    with tf.GradientTape() as tape:
        pred_y = model(x, training=True)
        train_loss = loss_fn(y, pred_y)
        
    grads = tape.gradient(train_loss, model.trainable_weights)
    
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    
    train_acc_metric.update_state(y_true=y, y_pred=pred_y)
    
    return train_loss


@tf.function
def valid_step(x, y):
    pred_y = model(x, training=False)
    val_loss = loss_fn(y, pred_y)
    
    val_acc_metric.update_state(y_true=y, y_pred=pred_y)
    
    return val_loss

In [None]:
train_history = []
valid_history = []

for epoch in range(config['num_epochs']):
    t = time.time()
    
    train_loss_list = []
    val_loss_list = []
    
    for x, y in train_ds:
        train_batch_loss = train_step(x, y)
        train_loss_list.append(train_batch_loss)
        
    for x, y in valid_ds:
        val_batch_loss = valid_step(x, y)
        val_loss_list.append(val_batch_loss)
        
    train_loss = sum(train_loss_list) / len(train_loss_list)
    val_loss = sum(val_loss_list) / len(val_loss_list)
    
    train_acc = train_acc_metric.result()
    val_acc = val_acc_metric.result()
    
    train_history.append(train_loss)
    valid_history.append(val_loss)
    
    template = 'ETA: {} -- epoch: {}, loss: {}  acc: {}  val_loss: {}  val_acc: {}\n'
    print(template.format(
                   round((time.time() -  t) / 60, 2), epoch+1,
                   (train_loss, '.3f'), (train_acc, '.3f'),
                   (val_loss, '.3f'), (val_acc, '.3f'))
         )
    
    train_acc_metric.reset_states()
    val_acc_metric.reset_states()

In [None]:
class GradAcumModel(tf.keras.Model):
    def __init__(self, model, n_gradients=config['n_gradients'], *args, **kwargs):
        super(GradAcumModel, self).__init__(*args, **kwargs)
        self.model = model
        self.n_gradients = tf.constant(n_gradients, dtype=tf.int32)
        self.n_acum_step = tf.Variable(0, dtype=tf.int32, trainable=False)
        self.gradient_accumulation = [tf.Variable(tf.zeros_like(v, dtype=tf.float32),
                                                  trainable=False)
                                       for v in self.model.trainable_variables]

    @tf.function
    def train_step(self, data):
        self.n_acum_step.assign_add(1)
        images, labels = data

        with tf.GradientTape() as tape:
            predictions = self.model(images, training=True)
            loss = self.compiled_loss(labels, predictions)

        gradients = tape.gradient(loss, self.model.trainable_variables)

        for i in range(len(self.gradient_accumulation)):
            self.gradient_accumulation[i].assign_add(gradients[i])

        # If n_acum_step reach the n_gradients then we apply accumulated gradients -
        # - to update the variables otherwise do nothing
        tf.cond(tf.equal(self.n_acum_step, self.n_gradients),
                self.apply_accu_gradients, lambda: None)
        
        self.compiled_metrics.update_state(labels, predictions)
        return {m.name: m.result() for m in self.metrics}

    def apply_accu_gradients(self):
        self.optimizer.apply_gradients(zip(self.gradient_accumulation,
                                           self.model.trainable_variables))
        
        # Reset
        self.n_acum_step.assign(0)
        for i in range(len(self.gradient_accumulation)):
            self.gradient_accumulation[i].assign(
                tf.zeros_like(self.model.trainable_variables[i], dtype=tf.float32)
            )

    @tf.function
    def test_step(self, data):
        images, labels = data

        predictions = self.model(images, training=False)
        loss = self.compiled_loss(labels, predictions)
        self.compiled_metrics.update_state(labels, predictions)
        return {m.name: m.result() for m in self.metrics}

    def call(self, inputs, *args, **kwargs):
        return self.model(inputs)

with tf.device(device_name):
    grad_acum_model = GradAcumModel(model, n_gradients=4)

In [None]:
proba = model.predict(test_ds, batch_size=config['batch_size'], verbose=1)
proba

In [None]:
test_df['prediction'] = proba
sample_df['MGMT_value'] = test_df['prediction']
sample_df

In [None]:
sample_df.to_csv("submission_3dcnn.csv", index=False)

In [None]:
submission_3dcnn = sample_df.copy()

### [RSNA-MICCAI] Monai - ensemble

In [None]:
import os
import sys 
import json
import glob
import random
import re
import collections
import time

import numpy as np
import pandas as pd
import pydicom
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

In [None]:
data_directory = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification'
input_monaipath = "/kaggle/input/monai-v060-deep-learning-in-healthcare-imaging/"
monaipath = "/kaggle/tmp/monai/"

In [None]:
!mkdir -p {monaipath}
!cp -r {input_monaipath}/* {monaipath}

In [None]:
mri_types = ['FLAIR', 'T1w', 'T1wCE', 'T2w']
SIZE = 256
NUM_IMAGES = 64
BATCH_SIZE = 4
N_EPOCHS = 16
SEED = 12345
LEARNING_RATE = 0.0005
LR_DECAY = 0.9

sys.path.append(monaipath)

from monai.networks.nets.densenet import DenseNet121

In [None]:
def load_dicom_image(path, img_size=SIZE):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if np.min(data)==np.max(data):
        data = np.zeros((img_size,img_size))
        return data
    
    data = cv2.resize(data, (img_size, img_size))
    return data


def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)


def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=SIZE, mri_type="FLAIR", split="train"):
    files = natural_sort(glob.glob(f"{data_directory}/{split}/{scan_id}/{mri_type}/*.dcm"))
    
    every_nth = len(files) / num_imgs
    indexes = [min(int(round(i*every_nth)), len(files)-1) for i in range(0,num_imgs)]
    
    files_to_load = [files[i] for i in indexes]
    
    img3d = np.stack([load_dicom_image(f) for f in files_to_load]).T 
    
    img3d = img3d - np.min(img3d)
    if np.max(img3d) != 0:
        img3d = img3d / np.max(img3d)
    
    return np.expand_dims(img3d,0)


load_dicom_images_3d("00000", mri_type=mri_types[0]).shape

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(SEED)

In [None]:
samples_to_exclude = [109, 123, 709]

train_df = pd.read_csv(f"{data_directory}/train_labels.csv")
print("original shape", train_df.shape)
train_df = train_df[~train_df.BraTS21ID.isin(samples_to_exclude)]
print("new shape", train_df.shape)
display(train_df)

df_train, df_valid = sk_model_selection.train_test_split(
    train_df, 
    test_size=0.2, 
    random_state=SEED, 
    stratify=train_df["MGMT_value"],
)

In [None]:
class Dataset(torch_data.Dataset):
    def __init__(self, paths, targets=None, mri_type=None, split="train"):
        self.paths = paths
        self.targets = targets
        self.mri_type = mri_type
        self.split = split
          
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        scan_id = self.paths[index]
        if self.targets is None:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split=self.split)
        else:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split="train")
            
        if self.targets is None:
            return {"X": data, "id": scan_id}
        else:
            return {"X": data, "y": torch.tensor(self.targets[index], dtype=torch.float)}

In [None]:
def build_model():
    model = DenseNet121(spatial_dims=3, in_channels=1, out_channels=1)
    return model    

In [None]:
modelfiles = ['FLAIR-e2-loss0.720-auc0.615.pth', 'T1w-e9-loss0.712-auc0.651.pth', 'T1wCE-e8-loss0.703-auc0.588.pth', 'T2w-e4-loss0.722-auc0.611.pth']
print(modelfiles)

In [None]:
def predict(modelfile, df, mri_type, split):
    print("Predict:", modelfile, mri_type, df.shape)
    df.loc[:,"MRI_Type"] = mri_type
    data_retriever = Dataset(
        df.index.values, 
        mri_type=df["MRI_Type"].values,
        split=split
    )

    data_loader = torch_data.DataLoader(
        data_retriever,
        batch_size=4,
        shuffle=False,
        num_workers=8,
    )
   
    model = build_model()
    model.to(device)
    
    checkpoint = torch.load(f'../input/for-densenet/{modelfile}')
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    y_pred = []
    ids = []

    for e, batch in enumerate(data_loader,1):
        print(f"{e}/{len(data_loader)}", end="\r")
        with torch.no_grad():
            tmp_pred = torch.sigmoid(model(torch.tensor(batch["X"]).float().to(device)).squeeze(1)).cpu().numpy().squeeze()
            if tmp_pred.size == 1:
                y_pred.append(tmp_pred)
            else:
                y_pred.extend(tmp_pred.tolist())
            ids.extend(batch["id"].numpy().tolist())
            
    preddf = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred}) 
    preddf = preddf.set_index("BraTS21ID")
    return preddf

In [None]:
submission_densnet = pd.read_csv(f"{data_directory}/sample_submission.csv", index_col="BraTS21ID")

submission_densnet["MGMT_value"] = 0
for m, mtype in zip(modelfiles, mri_types):
    pred = predict(m, submission_densnet, mtype, split="test")
    submission_densnet["MGMT_value"] += pred["MGMT_value"]

submission_densnet["MGMT_value"] /= len(modelfiles)
submission_densnet["MGMT_value"].to_csv("submission_densnet.csv")

### Ensembling

* densnet score is 0.656
* effnet3d 1 score is 0.684
* bt3d score is 0.683
* mobile net score is 0.667
* effnet3d 2 score is 0.674
* 3dcnn score is 0.663

In [None]:
fsubmission = submission_mob.copy()
fsubmission['MGMT_value'] = submission_densnet['MGMT_value'].values*0.05 + submission_effnet3d_score_0684['MGMT_value'].values*0.4 + submission_bt3d['MGMT_value'].values*0.3 + \
                            submission_mob['MGMT_value'].values*0.1 + submission_effnet3d_score_0674['MGMT_value'].values*0.1 + submission_3dcnn['MGMT_value'].values*0.05

In [None]:
fsubmission['BraTS21ID'] = fsubmission['BraTS21ID'].apply(lambda x: str(x).zfill(5))

In [None]:
fsubmission

In [None]:
submissionDF01 = fsubmission.set_index('BraTS21ID')
scoreDict01 = submissionDF01['MGMT_value'].to_dict()
print(scoreDict01)

In [None]:
listOfStudyPaths = glob.glob('../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/*')
listOfStudies = [eachPath.split('/')[-1] for eachPath in listOfStudyPaths]

predList = []
for eachStudy in listOfStudies:
    if eachStudy not in scoreDict01:
        predList.append('0.500')
    else:
        score = float(scoreDict01[eachStudy])
        predList.append(score)
        
submissionDF = pd.DataFrame({'BraTS21ID':listOfStudies,'MGMT_value':predList})
submissionDF.to_csv('submission.csv', index=False)

In [None]:
submissionDF