# Importing libraries 

In [None]:
import os
import sys 
import json 
import random 
import collections 
import time
import re 
import math 
import numpy as np 
import pandas as pd 
import cv2
import glob
import matplotlib.pyplot as plt 
import seaborn as sns 

import pydicom 
from  pydicom.pixel_data_handlers.util import apply_voi_lut

from random import shuffle 
from sklearn import model_selection as sk_model_selection 

from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.metrics import AUC 
from tensorflow.keras.utils import Sequence
import tensorflow as tf 

## Loading data

In [None]:
data_direc ='../input/rsna-miccai-brain-tumor-radiogenomic-classification'

mri_types = ['FLAIR', 'T1w', 'T1wCE', 'T2w']
mri_types_id = [0,1,2,3]

image_size = 128 
num_images  = 64 
batch_size = 4 

num_folds = 4 


In [None]:
# Directories of image files 
train_folder = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train'
test_folder = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/test'
train_label_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv'
sample_sub_path  = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv'

sample_sheet = pd.read_csv(sample_sub_path)
# get list of all files 
train_files = glob.glob(train_folder +  '/**/*.dcm', recursive  = True)
test_files = glob.glob(test_folder + '/**/*.dcm', recursive = True)

#  get the labels labels 
train_labels = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv')          
#train_df = pd.read_csv(train_label_path) 
                          
# start adding data to the dataframe
train_df = pd.DataFrame(train_files, columns = {'FilePath'})
test_df = pd.DataFrame(test_files, columns = {'FilePath'})

#train_df['BraTS21ID_5'] = [format(x , '05d') for x in train_df.BraTS21ID]
train_df.head()

In [None]:
def parseString(filepath, indx_val):
    parts  = filepath.split('/')
    return parts[indx_val] 

In [None]:
# Add data columns to the test and train dataframes 
train_df['Type'] = train_df.apply(lambda row: parseString(row['FilePath'], -2) , axis =1 )
train_df['BraTS21ID'] = train_df.apply(lambda row: parseString(row['FilePath'], -3) , axis =1 )


test_df['Type'] = test_df.apply(lambda row: parseString(row['FilePath'], -2) , axis =1 )
test_df['BraTS21ID'] = test_df.apply(lambda row: parseString(row['FilePath'], -3) , axis =1 )

# Get a dictionary of  BraID and labels and map them to the dataframes 
train_labels = train_labels.astype({'BraTS21ID': 'int'})
train_labels['BraTS21ID_5'] = [format(x , '05d') for x in train_labels.BraTS21ID]
label_dict = train_labels.set_index('BraTS21ID_5').to_dict()['MGMT_value']

# map all values from the dicitonary to the dataframe 
train_df['MGMT_value'] = train_df['BraTS21ID'].map(label_dict)
test_df['MGMT_value'] = test_df['BraTS21ID'].map(label_dict)

In [None]:
test_df.head()

# Custom data generators 

In [None]:
# reference: 🧠Brain Tumor 3D [Training]
def load_dicom(path , img_size= image_size, voi_lut = True, rotate = 0):
    dicom  = pydicom.read_file(path)
    data = dicom.pixel_array 
    if voi_lut: 
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else: 
        data = dicom.pixel_array
        
    if rotate > 0: 
        rot_choices  = [0, cv2.ROTATE_90_CLOCKWISE,
                       cv2.ROTATE_90_COUNTERCLOCKWISE, 
                       cv2.ROTATE_180]
        data = cv2.rotate(data , rot_choices[rotate])
        
    data = cv2.resize(data ,  (img_size , img_size))
    return data 

In [None]:
# reference: 🧠Brain Tumor 3D [Training]
class Dataset(Sequence): 
    def __init__(self, df_test, is_train = True, batch_size = batch_size, shuffle = True):
        self.y = df_test['MGMT_value'].values 
        self.paths = df_test['FilePath'].values
        self.is_train = is_train
        self.batch_size = batch_size
        self.shuffle = shuffle 
    def __len__(self):
        return math.ceil(len(self.y)/self.batch_size)
    
    def __getitem__(self, ids): 
        id_path =self.paths[ids]
        batch_paths = self.paths[ids*self.batch_size:(ids + 1)*self.batch_size]
        
        if self.y is not None: 
            batch_y = self.y[ids*self.batch_size:(ids + 1)*self.batch_size]
            
        if self.is_train:
            list_x = [load_dicom(x) for x in batch_paths]
            #print(list_x[0].shape)
            batch_x = np.stack(list_x, axis = 0)
            return batch_x, batch_y
        else: 
            list_x = [load_dicom(x) for x in batch_paths]
            batch_x  = np.stack(list_x, axis =0)
            return batch_x
        
    def on_epoch_end(self): 
        if self.shuffle and self.is_train:
            ids_y = list(zip(self.paths, self.y))
            shuffle(ids_y)
            self.idx, self.y = list(zip(*ids_y))
            

In [None]:
train_dataset = Dataset(train_df, batch_size=batch_size)
test_dataset = Dataset(test_df, batch_size=batch_size, is_train= False)

In [None]:
for i in range(1):
    images, label = train_dataset[i]
    print("Dimension of the CT scan is:", images.shape)
    print("label=",label)
    print(images.shape)
    plt.imshow(images[1,:,:], cmap="gray")
    plt.show()

In [None]:
tf.keras.backend.clear_session()
def get_model(width=image_size, height=image_size):
    """Build a 3D convolutional neural network model."""

    inputs = tf.keras.Input(shape = (width, height, 1))
     
    x = layers.Conv2D(filters=32, kernel_size=3, activation="relu",padding = 'same')(inputs)
    x = layers.MaxPool2D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Conv2D(filters=64, kernel_size=3, activation="relu",padding = 'same')(x)
    x = layers.MaxPool2D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.6)(x)

    x = layers.Conv2D(filters=128, kernel_size=3, activation="relu",padding = 'same')(x)
    x = layers.MaxPool2D(pool_size=3)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.6)(x)

    x = layers.Conv2D(filters=256, kernel_size=3, activation="relu",padding = 'same')(x)
    x = layers.MaxPool2D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)

    x = layers.GlobalAveragePooling2D()(x)
    #x = layers.Dense(units=1024, activation="relu")(x)
    #x = layers.Dropout(0.2)(x)

    outputs = layers.Dense(units=1, activation="sigmoid")(x)

    # Define the model.
    model = keras.Model(inputs, outputs, name="2DCNN")

    return model

# Build model.
model = get_model(width=image_size, height=image_size)
model.summary()

In [None]:
# Compile model.
initial_learning_rate = 0.0001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
)
model.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
    metrics=[AUC(name='auc'),"acc"],
)
# Define callbacks.
filepath = '../output/model.epoch{epoch:02d}.hdf5'

model_save = ModelCheckpoint(filepath, 
                             save_best_only = True, 
                             monitor = 'val_auc', 
                             mode = 'max', verbose = 1)

# Train the model, doing validation at the end of each epoch
epochs = 5
model.fit(
    train_dataset,
    epochs=epochs,
    shuffle=True,
    verbose=1,
)

In [None]:
predictions = model.predict(test_dataset, verbose = True)

In [None]:
test_df['MGMT_value'] = predictions
average_values = test_df.groupby(['BraTS21ID']).mean().reset_index()
label_dict_submission = average_values.set_index('BraTS21ID').to_dict()['MGMT_value']

In [None]:
#test_df['BraTS21ID'] = test_df.apply(lambda row: parseString(row['FilePath'], -3) , axis =1 average_valuesission = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
average_values.to_csv("submission.csv", index=False)