## This Kernel is a fork of [Efficientnet3D with one MRI type](https://www.kaggle.com/rluethy/efficientnet3d-with-one-mri-type)

As many others have said during the course of this competition, most models aren't learning anything. 
1. Be it the difference between the [**Public Leaderboard and Local CV AUC**](https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/255352)
2. [**Models failing to generalize**](https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/266173)
3. [**Models not training**](https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/265777) in the first place

## [**Weights and Biases Report ⭐️**](https://wandb.ai/sauravmaheshkar/RSNA-MICCAI/reports/The-Fluke--VmlldzoxMDA2MDQy) | [**Weights and Biases Project**](https://wandb.ai/sauravmaheshkar/RSNA-MICCAI)

The motivation for these experiments come from [**Chai Time Kaggle Talks with Anjum Sayed (Datasaurus)**](https://youtu.be/udw-uSV66EQ) Video on the [**Weights and Biases Channel**](https://www.youtube.com/WeightsBiases). Anjum mentioned that a good way to check if the models are learning anything is to just change the random seeds and see if it affects the performance.

[![Video Title](https://api.wandb.ai/files/sauravmaheshkar/images/projects/436131/cc7a6207.png)](https://youtu.be/udw-uSV66EQ)

# Models Don't Learn 🤷🏻

![](https://raw.githubusercontent.com/SauravMaheshkar/RSNA-MICCAI/main/assets/Fluke-Training-Loss.svg)

![](https://raw.githubusercontent.com/SauravMaheshkar/RSNA-MICCAI/main/assets/Fluke-Validation-Loss.svg)

In [None]:
%%capture
import os
import sys 
import json
import glob
import random
import collections
import time
import re

import numpy as np
import pandas as pd
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional
import torch.nn.functional as F

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

# Install Weights and Biases 
!pip3 install -r ../input/rsnawrapper/requirements.txt

# Weights and Biases Setup
import wandb
from wandb.keras import WandbCallback
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
api_key = user_secrets.get_secret("WANDB_API_KEY")
wandb.login(key=api_key);

from rsna.utils import load_dicom_images_3d
a = load_dicom_images_3d("00000")
print(a.shape)
print(np.min(a), np.max(a), np.mean(a), np.median(a))

In [None]:
if os.path.exists("../input/rsna-miccai-brain-tumor-radiogenomic-classification"):
    data_directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
    pytorch3dpath = "../input/efficientnetpyttorch3d/EfficientNet-PyTorch-3D"
    wrapperdir = "../input/rsnawrapper"
    
mri_types = ['FLAIR','T1w','T1wCE','T2w']
SIZE = 256
NUM_IMAGES = 64

sys.path.append(pytorch3dpath)
sys.path.append(wrapperdir)
from efficientnet_pytorch_3d import EfficientNet3D

# Random Seed 🌱

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(21)

# ✂️ Train/Test Split

In [None]:
train_df = pd.read_csv(f"{data_directory}/train_labels.csv")
display(train_df)

df_train, df_valid = sk_model_selection.train_test_split(
    train_df, 
    test_size=0.2, 
    random_state=12, 
    stratify=train_df["MGMT_value"],
)

# 💿 Dataset

In [None]:
# Boiler Plate code from the library
from rsna.dataloader import Dataset

# 🏠 Model Class

In [None]:
# Boiler Plate code from the library
from rsna.nn import Model

# 💪🏻 Trainer Class

In [None]:
# Boiler Plate code from the library
from rsna.engine import Trainer

# 🔥 Training

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CONFIG = dict(
    BATCH_SIZE = 4,
    EPOCHS = 10,
    PATIENCE = 10,
    SIZE = 256,
    NUM_IMAGES = 64,
    competition = 'rsna-miccai-brain',
    _wandb_kernel = 'sauravm'
)

def train_mri_type(df_train, df_valid, mri_type):
    if mri_type=="all":
        train_list = []
        valid_list = []
        for mri_type in mri_types:
            df_train.loc[:,"MRI_Type"] = mri_type
            train_list.append(df_train.copy())
            df_valid.loc[:,"MRI_Type"] = mri_type
            valid_list.append(df_valid.copy())

        df_train = pd.concat(train_list)
        df_valid = pd.concat(valid_list)
    else:
        df_train.loc[:,"MRI_Type"] = mri_type
        df_valid.loc[:,"MRI_Type"] = mri_type

    print(df_train.shape, df_valid.shape)
    display(df_train.head())
    
    train_data_retriever = Dataset(
        df_train["BraTS21ID"].values, 
        df_train["MGMT_value"].values, 
        df_train["MRI_Type"].values,
        augment=False
    )

    valid_data_retriever = Dataset(
        df_valid["BraTS21ID"].values, 
        df_valid["MGMT_value"].values,
        df_valid["MRI_Type"].values
    )

    train_loader = torch_data.DataLoader(
        train_data_retriever,
        batch_size=CONFIG['BATCH_SIZE'],
        shuffle=True,
        num_workers=8,
    )

    valid_loader = torch_data.DataLoader(
        valid_data_retriever, 
        batch_size=CONFIG['BATCH_SIZE'],
        shuffle=False,
        num_workers=8,
    )
    
    run = wandb.init(project='RSNA-MICCAI', 
                     entity='sauravmaheshkar', 
                     group='4-types', 
                     job_type='train', 
                     config=CONFIG)

    model = Model(model_name = "efficientnet-b0")
    model.to(device)

    wandb.watch(model)
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

    criterion = torch_functional.binary_cross_entropy_with_logits

    trainer = Trainer(
        model, 
        device, 
        optimizer, 
        criterion
    )

    history = trainer.fit(
        CONFIG['EPOCHS'], 
        train_loader, 
        valid_loader, 
        f"{mri_type}", 
        CONFIG['PATIENCE'],
    )
    
    run.finish()
    
    return trainer.lastmodel

modelfiles = None


if not modelfiles:
    modelfiles = [train_mri_type(df_train, df_valid, m) for m in mri_types]
    print(modelfiles)

# ⚙️ Prediction

In [None]:
def predict(modelfile, df, mri_type, split):
    print("Predict:", modelfile, mri_type, df.shape)
    df.loc[:,"MRI_Type"] = mri_type
    data_retriever = Dataset(
        df.index.values, 
        mri_type=df["MRI_Type"].values,
        split=split
    )

    data_loader = torch_data.DataLoader(
        data_retriever,
        batch_size=CONFIG['BATCH_SIZE'],
        shuffle=False,
        num_workers=8,
    )
   
    model = Model()
    model.to(device)
    
    checkpoint = torch.load(modelfile)
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    y_pred = []
    ids = []

    for e, batch in enumerate(data_loader,1):
        print(f"{e}/{len(data_loader)}", end="\r")
        with torch.no_grad():
            tmp_pred = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            if tmp_pred.size == 1:
                y_pred.append(tmp_pred)
            else:
                y_pred.extend(tmp_pred.tolist())
            ids.extend(batch["id"].numpy().tolist())
            
    preddf = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred}) 
    preddf = preddf.set_index("BraTS21ID")
    return preddf

## Ensemble for validation

In [None]:
df_valid = df_valid.set_index("BraTS21ID")
df_valid["MGMT_pred"] = 0
for m, mtype in zip(modelfiles,  mri_types):
    pred = predict(m, df_valid, mtype, "train")
    df_valid["MGMT_pred"] += pred["MGMT_value"]
df_valid["MGMT_pred"] /= len(modelfiles)
auc = roc_auc_score(df_valid["MGMT_value"], df_valid["MGMT_pred"])
print(f"Validation ensemble AUC: {auc:.4f}")
sns.displot(df_valid["MGMT_pred"])

## Ensemble for submission

In [None]:
submission = pd.read_csv(f"{data_directory}/sample_submission.csv", index_col="BraTS21ID")

submission["MGMT_value"] = 0
for m, mtype in zip(modelfiles, mri_types):
    pred = predict(m, submission, mtype, split="test")
    submission["MGMT_value"] += pred["MGMT_value"]

submission["MGMT_value"] /= len(modelfiles)
submission["MGMT_value"].to_csv("submission.csv")

In [None]:
submission

In [None]:
sns.displot(submission["MGMT_value"])