# Setup

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# General imports.
import os
import random

import numpy as np
import pandas as pd
import torch

# Specific Imports.

from sklearn.model_selection import GroupKFold
from sklearn.model_selection import StratifiedKFold

import warnings
warnings.simplefilter('ignore')

# Utility Functions

In [None]:
def seed_everything(seed=123):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)

# Hyperparameters

In [None]:
class Config:
  n_splits = 5

  seed_everything()

In [None]:
cfg = Config()

# Building the KFold DataFrame

In [None]:
det_train_meta = pd.read_csv(r"/content/gdrive/MyDrive/Kaggle Competitions/RSNA-brain-tumor/detailed_train_meta.csv")
train_kfold = pd.read_csv(r"/content/gdrive/MyDrive/Kaggle Competitions/RSNA-brain-tumor/train_kfold.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [None]:
det_train_meta.shape, train_kfold.shape

((348641, 51), (253856, 13))

In [None]:
train_kfold = train_kfold[train_kfold.PatientID != 109]
train_kfold = train_kfold[train_kfold.PatientID != 123]
train_kfold = train_kfold[train_kfold.PatientID != 709].reset_index(drop=True)

In [None]:
det_train_meta.shape, train_kfold.shape

((348641, 51), (253129, 13))

In [None]:
train_kfold["patientid_sd"] = train_kfold.PatientID.astype("string") + "_" + train_kfold.SeriesDescription
det_train_meta["patientid_sd"] = det_train_meta.PatientID.astype("string") + "_" + det_train_meta.SeriesDescription

In [None]:
orientation_dict = {}
for x in det_train_meta.patientid_sd.unique():
  orientation_dict[x] = det_train_meta[det_train_meta.patientid_sd == x].Orientation.values[0]

In [None]:
train_kfold["plane"] = 0
for key, value in orientation_dict.items():
  train_kfold.loc[(train_kfold.patientid_sd == key), "plane"] = value

In [None]:
train_kfold = train_kfold.drop(columns=["fold_gkf_patientid", "fold_skf_patientid", "fold_skf_seriesdescription", 
                                        "fold_skf_MGMT_value", "fold_gkf_png_filepath", "patientid_sd"])

In [None]:
axial_FLAIR = train_kfold[(train_kfold.plane == "axial") & (train_kfold.SeriesDescription == "FLAIR")].reset_index(drop=True)
coronal_FLAIR = train_kfold[(train_kfold.plane == "coronal") & (train_kfold.SeriesDescription == "FLAIR")].reset_index(drop=True)
sagittal_FLAIR = train_kfold[(train_kfold.plane == "sagittal") & (train_kfold.SeriesDescription == "FLAIR")].reset_index(drop=True)

axial_T1w = train_kfold[(train_kfold.plane == "axial") & (train_kfold.SeriesDescription == "T1w")].reset_index(drop=True)
coronal_T1w = train_kfold[(train_kfold.plane == "coronal") & (train_kfold.SeriesDescription == "T1w")].reset_index(drop=True)
sagittal_T1w = train_kfold[(train_kfold.plane == "sagittal") & (train_kfold.SeriesDescription == "T1w")].reset_index(drop=True)

axial_T1wCE = train_kfold[(train_kfold.plane == "axial") & (train_kfold.SeriesDescription == "T1wCE")].reset_index(drop=True)
coronal_T1wCE = train_kfold[(train_kfold.plane == "coronal") & (train_kfold.SeriesDescription == "T1wCE")].reset_index(drop=True)
sagittal_T1wCE = train_kfold[(train_kfold.plane == "sagittal") & (train_kfold.SeriesDescription == "T1wCE")].reset_index(drop=True)

axial_T2w = train_kfold[(train_kfold.plane == "axial") & (train_kfold.SeriesDescription == "T2w")].reset_index(drop=True)
coronal_T2w = train_kfold[(train_kfold.plane == "coronal") & (train_kfold.SeriesDescription == "T2w")].reset_index(drop=True)
sagittal_T2w = train_kfold[(train_kfold.plane == "sagittal") & (train_kfold.SeriesDescription == "T2w")].reset_index(drop=True)

In [None]:
group_kfold = GroupKFold(n_splits=cfg.n_splits)
strat_kfold = StratifiedKFold(n_splits=cfg.n_splits)

for train_df in [axial_FLAIR, coronal_FLAIR, sagittal_FLAIR,
                 axial_T1w, coronal_T1w, sagittal_T1w,
                 axial_T1wCE, coronal_T1wCE, sagittal_T1wCE,
                 axial_T2w, coronal_T2w , sagittal_T2w]:
  # GroupKFold-PatientID. 
  for fold, train_val_idx in enumerate(group_kfold.split(train_df, groups=getattr(train_df, "PatientID"))):
    train_idx, val_idx = train_val_idx[0], train_val_idx[1]
    train_df.loc[val_idx, 'fold_gkf_patientid'] = fold

  # GroupKFold-png_filepath.
  for fold, train_val_idx in enumerate(group_kfold.split(train_df, groups=getattr(train_df, "png_filepath"))):
    train_idx, val_idx = train_val_idx[0], train_val_idx[1]
    train_df.loc[val_idx, 'fold_gkf_png_filepath'] = fold

  # StratifiedKFold-PatientID. 
  for fold, train_val_idx in enumerate(strat_kfold.split(train_df, getattr(train_df, "PatientID"))):
    train_idx, val_idx = train_val_idx[0], train_val_idx[1]
    train_df.loc[val_idx, 'fold_skf_patientid'] = fold

  # StratifiedKFold-SeriesDescription. 
  for fold, train_val_idx in enumerate(strat_kfold.split(train_df, getattr(train_df, "SeriesDescription"))):
    train_idx, val_idx = train_val_idx[0], train_val_idx[1]
    train_df.loc[val_idx, 'fold_skf_seriesdescription'] = fold

  # StratifiedKFold-MGMT_value. 
  for fold, train_val_idx in enumerate(strat_kfold.split(train_df, getattr(train_df, "MGMT_value"))):
    train_idx, val_idx = train_val_idx[0], train_val_idx[1]
    train_df.loc[val_idx, 'fold_skf_MGMT_value'] = fold

In [None]:
save_path = r"/content/gdrive/MyDrive/Kaggle Competitions/RSNA-brain-tumor"

axial_FLAIR.to_csv(os.path.join(save_path, "axial_FLAIR.csv"), index=False)
coronal_FLAIR.to_csv(os.path.join(save_path, "coronal_FLAIR.csv"), index=False)
sagittal_FLAIR.to_csv(os.path.join(save_path, "sagittal_FLAIR.csv"), index=False)

axial_T1w.to_csv(os.path.join(save_path, "axial_T1w.csv"), index=False)
coronal_T1w.to_csv(os.path.join(save_path, "coronal_T1w.csv"), index=False)
sagittal_T1w.to_csv(os.path.join(save_path, "sagittal_T1w.csv"), index=False)

axial_T1wCE.to_csv(os.path.join(save_path, "axial_T1wCE.csv"), index=False)
coronal_T1wCE.to_csv(os.path.join(save_path, "coronal_T1wCE.csv"), index=False)
sagittal_T1wCE.to_csv(os.path.join(save_path, "sagittal_T1wCE.csv"), index=False)

axial_T2w.to_csv(os.path.join(save_path, "axial_T2w.csv"), index=False)
coronal_T2w.to_csv(os.path.join(save_path, "coronal_T2w.csv"), index=False)
sagittal_T2w.to_csv(os.path.join(save_path, "sagittal_T2w.csv"), index=False)