# Developing a model architecture for glioma classification

## Packages to use

In [21]:
## Imports go here
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import nibabel as nib
import scikeras

## Working with data

In [22]:
## empty for now for data import or whatever

## Model architecture - CNN

In [31]:
input_shape = (240, 240, 155, 1)
#around 270 observations

In [41]:
#we need 1 kernel as we have 1 label for segmentation, kernel size = 3x3x3?

#initialize model
def initialize_model(dropout = 0.5, dense_1 = 50, \
    learning_rate = 0.01, kernel_size=(3,3,3), pool_size = (2,2,2)):
    model = Sequential()
    
    #Add convo layers to the model
    model.add(Conv3D(32, kernel_size=kernel_size, activation='relu', input_shape=input_shape))
    model.add(MaxPooling3D(pool_size=pool_size))
    model.add(Conv3D(64, kernel_size=kernel_size, activation='relu'))
    model.add(MaxPooling3D(pool_size=pool_size))
    model.add(Conv3D(128, kernel_size=kernel_size, activation='relu'))
    model.add(MaxPooling3D(pool_size=pool_size))
    
    #Add a flatten layer
    model.add(Flatten())
    
    #maybe add age here for the second model
     
    #Add dense levels
    model.add(Dense(dense_1, activation='relu'))
    model.add(Dropout(dropout))

    
    #Add layer with activation
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    #Model compilation
    optim=Adam(learning_rate=learning_rate)
    model.compile(loss = 'binary_crossentropy',
                  optimizer = optim,
                  metrics = ['accuracy'])
    return model


In [33]:
#instantiate a model
model_seg = initialize_model()
model_seg.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_6 (Conv3D)           (None, 238, 238, 153, 32  896       
                             )                                   
                                                                 
 max_pooling3d_6 (MaxPooling  (None, 119, 119, 76, 32)  0        
 3D)                                                             
                                                                 
 conv3d_7 (Conv3D)           (None, 117, 117, 74, 64)  55360     
                                                                 
 max_pooling3d_7 (MaxPooling  (None, 58, 58, 37, 64)   0         
 3D)                                                             
                                                                 
 conv3d_8 (Conv3D)           (None, 56, 56, 35, 128)   221312    
                                                      

In [26]:
#better to write it down as a function

In [34]:
#baseline model score

es = EarlyStopping(patience=3, restore_best_weights = True)
history = model_seg.fit(X_train, y_train,
                        epochs = 30,
                        batch_size = 16,
                        callbacks = [es],
                        validation_split = 0.2,
                        shuffle =True,
                        verbose = 1)

NameError: name 'X_train' is not defined

In [35]:
#plot the learning curve
def plot_loss(history):
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(13,4))
    ax1.plot(history.history['loss'])
    ax1.plot(history.history['val_loss'])
    ax1.set_title('Model loss')
    ax1.set_ylabel('Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylim(ymin=0, ymax=200)
    ax1.legend(['Train', 'Validation'], loc='best')
    ax1.grid(axis="x",linewidth=0.5)
    ax1.grid(axis="y",linewidth=0.5)    
    
    ax2.plot(history.history['accuracy'])
    ax2.plot(history.history['val_accuracy'])
    ax2.set_title('Accuracy')
    ax2.set_ylabel('Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylim(ymin=0, ymax=20)
    ax2.legend(['Train', 'Validation'], loc='best')
    ax2.grid(axis="x",linewidth=0.5)
    ax2.grid(axis="y",linewidth=0.5)    

    plt.show()    

In [36]:
keras_estimator = KerasClassifier(build_fn = initialize_model, verbose = 1)

  keras_estimator = KerasClassifier(build_fn = initialize_model, verbose = 1)


In [37]:
estimator = Pipeline([('kc', keras_estimator)])

In [53]:
keras_estimator

{'verbose': 1,
 'build_fn': <function __main__.initialize_model(dropout=0.5, dense_1=50, learning_rate=0.01, kernel_size=(3, 3, 3), pool_size=(2, 2, 2))>}

In [54]:
estimator.get_params()#.get("kc__verbose")


{'memory': None,
 'steps': [('kc',
   <keras.wrappers.scikit_learn.KerasClassifier at 0x15feb1bd0>)],
 'verbose': False,
 'kc': <keras.wrappers.scikit_learn.KerasClassifier at 0x15feb1bd0>,
 'kc__verbose': 1,
 'kc__build_fn': <function __main__.initialize_model(dropout=0.5, dense_1=50, learning_rate=0.01, kernel_size=(3, 3, 3), pool_size=(2, 2, 2))>}

In [39]:
#hyperparameters tuning
# Define the hyperparameters
param_grid = {
    'kc__dense_1': [20, 30, 50, 100],
    'kc__kernel_size': [(2,2,2),(3,3,3), (5,5,5), (7,7,7)],
    'kc__pool_size': [(2,2,2),(3,3,3)],
    'kc__batch_size':[8, 16, 32],
    'kc__dropout': [0.5, 0.4, 0.3, 0.2, 0.1, 0],
    'kc__learning_rate': [0.001, 0.01, 0.1]
}


In [40]:
kfold_splits = 5
grid = GridSearchCV(estimator=estimator,  
                    n_jobs=-1, 
                    verbose=1,
                    return_train_score=True,
                    cv=kfold_splits,  #StratifiedKFold(n_splits=kfold_splits, shuffle=True)
                    param_grid=param_grid,)

In [None]:
grid_result = grid.fit(X, y) #callbacks=[tbCallBack]

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
y_pred = model.predict(X_test)

In [None]:
#prediction with the final model 
model_seg.evaluate(X_test, y_test)

## Model acrhitecture - UNET

In [None]:
#maybe we don't need it for now, will check later

## Model architecture - 2D CNN

In [None]:
#maybe we don't need it for now, will check later