## Step 1: Import libraries

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, GlobalAveragePooling2D, BatchNormalization, Dropout, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

%matplotlib inline 

## Step 2: Set Constants

In [3]:
SEED = 137

MIN_WIDTH = 29
MAX_WIDTH = 215

MIN_HEIGHT = 45
MAX_HEIGHT = 239

MIN_DEPTH = 15
MAX_DEPTH = 143

## Step 3: Load Data

In [4]:
df = pd.read_pickle("../raw_data/Grade_ID_nii.pkl")

df['Grade'] = df['Grade'].apply(lambda x: 0 if x == 'HGG' else 1)

FileNotFoundError: [Errno 2] No such file or directory: '../raw_data/Grade_ID_nii.pkl'

## Step 4: EDA before slicing

In [82]:
print(f"Number of samples: {df['nii'].shape[0]}")
print(f"Negative examples: {df[df['Grade'] == 0].shape}")
print(f"Positive examples: {df[df['Grade'] == 1].shape}")

Number of samples: 369
Negative examples: (293, 3)
Positive examples: (76, 3)

Single example Shape:
Height: 240
Width: 240
Depth: 155


In [None]:
# code to calculate crop parameters for depth
non_zero = []
min_value = []
max_value = []

for n_image in range(df['nii'].shape[0]):
    image = df['nii'][n_image]
    image_depth_sum = [np.sum(image[:,:,i]) for i in range(image.shape[2])]
    non_zero.append(np.nonzero(image_depth_sum)[0].shape[0])
    min_value.append(np.min(np.nonzero(image_depth_sum)[0]))
    max_value.append(np.max(np.nonzero(image_depth_sum)[0]))
    plt.plot(image_depth_sum)
plt.show()
# here we calculate limits for the depth parameter, but similarly 
# limits can be found for other parameters as well
print(f"Defined limits: min - {np.min(min_value)}; max - {np.max(max_value)}")

In [None]:
# code to calculate crop parameters for depth
non_zero = []
min_value = []
max_value = []

for n_image in range(df['nii'].shape[0]):
    image = df['nii'][n_image]
    image_depth_sum = [np.sum(image[:,:,i]) for i in range(image.shape[2])]
    non_zero.append(np.nonzero(image_depth_sum)[0].shape[0])
    min_value.append(np.min(np.nonzero(image_depth_sum)[0]))
    max_value.append(np.max(np.nonzero(image_depth_sum)[0]))
    plt.plot(image_depth_sum)
plt.show()
# here we calculate limits for the depth parameter, but similarly 
# limits can be found for other parameters as well
print(f"Defined limits: min - {np.min(min_value)}; max - {np.max(max_value)}")

## Step 5: Crop Images and run Augmentation

In [83]:
# crop images
df['nii'] = df['nii'].apply(lambda x: np.array(x[MIN_HEIGHT:MAX_HEIGHT,MIN_WIDTH:MAX_WIDTH,MIN_DEPTH:MAX_DEPTH]))
X = df['nii']
y = df['Grade']

In [84]:
X = np.array([np.array(val) for val in X])
X = X.reshape(len(X), X[0].shape[0], X[0].shape[1], X[0].shape[2], 1)

In [85]:
def rotate_image(X, y, i):
    angles = [-30, -20, -15, -10, -5, 5, 10, 15, 20, 30]
    angle = random.choice(angles)
    rotated_example = ndimage.rotate(X[i], angle, reshape=False)
    X = np.append(X, np.expand_dims(rotated_example, axis=0), axis=0)
    y = y.append(pd.Series([y[i]]), ignore_index=True)
    
    return X, y

In [86]:
for i in range(len(X)):
    if y[i] == 0:
        pass
    else:
        X,y = rotate_image(X, y, i)
        X,y = rotate_image(X, y, i)

  y = y.append(pd.Series([y[i]]), ignore_index=True) # append label


In [87]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=SEED)


In [88]:
X_train.shape

(416, 194, 186, 128, 1)

In [89]:
y_train.value_counts()

0    234
1    182
dtype: int64

## Step 6: Define Model

In [90]:
def initialize_model(input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])):
    model = Sequential()
    model.add(Conv2D(filters=16, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=2))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=32, kernel_size=3, activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(BatchNormalization())
    model.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(BatchNormalization())
    model.add(Flatten())
    model.add(Dense(units=64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(units=1, activation='sigmoid'))

    model.compile(optimizer=Adam(lr=0.0001), 
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model


In [91]:
model = initialize_model()
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_29 (Conv3D)           (None, 192, 184, 126, 64) 1792      
_________________________________________________________________
max_pooling3d_27 (MaxPooling (None, 96, 92, 63, 64)    0         
_________________________________________________________________
batch_normalization_27 (Batc (None, 96, 92, 63, 64)    256       
_________________________________________________________________
conv3d_30 (Conv3D)           (None, 94, 90, 61, 64)    110656    
_________________________________________________________________
max_pooling3d_28 (MaxPooling (None, 47, 45, 30, 64)    0         
_________________________________________________________________
batch_normalization_28 (Batc (None, 47, 45, 30, 64)    256       
_________________________________________________________________
conv3d_31 (Conv3D)           (None, 45, 43, 28, 128) 

In [92]:
print(f" Size of X_train: {(X_train.size * X_train.itemsize) / 1e9} Gb")

 Size of X_train: 7.685603328 Gb


## Step 7: Train Model

In [93]:
es = EarlyStopping(patience=5, restore_best_weights=True)
    
history = model.fit(X_train, y_train,
                        epochs=30,
                        batch_size=2,
                        callbacks=[es],
                        validation_data=(X_test, y_test),
                        shuffle=True,
                        verbose=1)


2023-03-12 22:59:11.111464: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 7685603328 exceeds 10% of free system memory.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
 38/208 [====>.........................] - ETA: 1:51 - loss: 0.3645 - accuracy: 0.8828

KeyboardInterrupt: 

In [None]:
#plot the learning curve
def plot_loss(history):
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(13,4))
    ax1.plot(history.history['loss'])
    ax1.plot(history.history['val_loss'])
    ax1.set_title('Model loss')
    ax1.set_ylabel('Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylim(ymin=0, ymax=1)
    ax1.legend(['Train', 'Validation'], loc='best')
    ax1.grid(axis="x",linewidth=0.2)
    ax1.grid(axis="y",linewidth=0.2)    
    
    ax2.plot(history.history['accuracy'])
    ax2.plot(history.history['val_accuracy'])
    ax2.set_title('Accuracy')
    ax2.set_ylabel('Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylim(ymin=0, ymax=1)
    ax2.legend(['Train', 'Validation'], loc='best')
    ax2.grid(axis="x",linewidth=0.2)
    ax2.grid(axis="y",linewidth=0.2)    

    plt.show()    

In [None]:
plot_loss(history)