In [14]:
import os
import pandas as pd
import numpy as np
import nibabel as nib
import pickle

from sklearn.model_selection import train_test_split

from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras import utils
from tensorflow.python.keras.layers import Dense, Dropout, Flatten, Activation, Conv3D, MaxPooling3D

## T1 MRI

In [2]:
t1_hgg = []
counter = 0

for file in os.listdir('./data/bet_processed/'):
    counter += 1
    if counter%100 == 0 or counter == len(os.listdir('./data/bet_processed/'))-1:
        print(f"{counter} out of {len(os.listdir('./data/bet_processed/'))-1}")
              
    if file.endswith('t1_n4_bet.nii.gz'):
        file_path = './data/bet_processed/' + file
        img = nib.load(file_path)
        array_flat = img.get_data() / 255
        t1_hgg.append(array_flat)

100 out of 1032
200 out of 1032
300 out of 1032
400 out of 1032
500 out of 1032
600 out of 1032
700 out of 1032
800 out of 1032
900 out of 1032
1000 out of 1032
1032 out of 1032


In [3]:
t1_lgg = []
counter = 0

for file in os.listdir('./data/lgg_bet_processed/'):
    counter += 1
    if counter%100 == 0 or counter == len(os.listdir('./data/lgg_bet_processed/'))-1:
        print(f"{counter} out of {len(os.listdir('./data/lgg_bet_processed/'))-1}")
              
    if file.endswith('t1_n4_bet.nii.gz'):
        file_path = './data/lgg_bet_processed/' + file
        img = nib.load(file_path)
        array_flat = img.get_data() / 255
        t1_lgg.append(array_flat)

100 out of 303
200 out of 303
300 out of 303
303 out of 303


In [4]:
t1_hgg_y = np.ones(shape=len(t1_hgg), dtype=int)
t1_lgg_y = np.zeros(shape=len(t1_lgg), dtype=int)

X = np.array(t1_hgg + t1_lgg)
y = np.array(list(t1_hgg_y) + list(t1_lgg_y))

In [5]:
# % of Data that's Target: 1
sum(y) / len(y)

0.7724550898203593

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y,
                                                   stratify=y)

In [7]:
X_train.shape

(250, 240, 240, 155)

In [8]:
X_test.shape

(84, 240, 240, 155)

In [9]:
X_train = X_train.reshape(X_train.shape[0], 240, 240, 155, 1) 
X_test = X_test.reshape(X_test.shape[0], 240, 240, 155, 1)

In [10]:
y_train = utils.to_categorical(y_train) 
y_test = utils.to_categorical(y_test)

In [11]:
y_train

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.

In [12]:
# REALLY BASIC 3D-CNN JUST TO SEE

cnn = Sequential()

cnn.add(Conv3D(
    filters=6,
    kernel_size=(3,3,3), # height/width of filter
    activation='relu',
    input_shape=(240, 240, 155, 1)))

cnn.add(MaxPooling3D(
    pool_size=(2,2,2)
))

cnn.add(Conv3D(
    filters= 16,
    kernel_size= (3,3,3),
    activation='relu'
))

cnn.add(MaxPooling3D(
    pool_size=(2,2,2)
))

cnn.add(Flatten())

cnn.add(Dense(
    units=128,
    activation='relu'
))

# Output Layer
cnn.add(Dense(
    units=2,
    activation='softmax'
))

cnn.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [13]:
history = cnn.fit(np.array(X_train), y_train,
                 batch_size=32,
                 validation_data=(np.array(X_test), y_test),
                 epochs=3,
                 verbose=1)

Train on 250 samples, validate on 84 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [18]:
cnn.save(filepath='./pickles/01_model.h5')

## T1 & T2 MRI
- If improved, can go ahead and include all 5 different MRI types into model

In [2]:
t1_t2_hgg = []
counter = 0

for file in os.listdir('./data/bet_processed/'):
    counter += 1
    if counter%100 == 0 or counter == len(os.listdir('./data/bet_processed/'))-1:
        print(f"{counter} out of {len(os.listdir('./data/bet_processed/'))-1}")
              
    if file.endswith('t1_n4_bet.nii.gz') or file.endswith('t2_n4_bet.nii.gz'):
        file_path = './data/bet_processed/' + file
        img = nib.load(file_path)
        array_flat = img.get_data() / 255
        t1_hgg.append(array_flat)

100 out of 1032
200 out of 1032
300 out of 1032
400 out of 1032
500 out of 1032
600 out of 1032
700 out of 1032
800 out of 1032
900 out of 1032
1000 out of 1032
1032 out of 1032


In [3]:
t1_t2_lgg = []
counter = 0

for file in os.listdir('./data/lgg_bet_processed/'):
    counter += 1
    if counter%100 == 0 or counter == len(os.listdir('./data/lgg_bet_processed/'))-1:
        print(f"{counter} out of {len(os.listdir('./data/lgg_bet_processed/'))-1}")
              
    if file.endswith('t1_n4_bet.nii.gz') or file.endswith('t2_n4_bet.nii.gz'):
        file_path = './data/lgg_bet_processed/' + file
        img = nib.load(file_path)
        array_flat = img.get_data() / 255
        t1_lgg.append(array_flat)

100 out of 303
200 out of 303
300 out of 303
303 out of 303


In [4]:
t1_t2_hgg_y = np.ones(shape=len(t1_t2_hgg), dtype=int)
t1_t2_lgg_y = np.zeros(shape=len(t1_t2_lgg), dtype=int)

X = np.array(t1_t2_hgg + t1_t2_lgg)
y = np.array(list(t1_t2_hgg_y) + list(t1_t2_lgg_y))

In [5]:
# % of Data that's Target: 1
sum(y) / len(y)

0.7724550898203593

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y,
                                                   stratify=y)

In [7]:
X_train.shape

(250, 240, 240, 155)

In [8]:
X_test.shape

(84, 240, 240, 155)

In [9]:
X_train = X_train.reshape(X_train.shape[0], 240, 240, 155, 1) 
X_test = X_test.reshape(X_test.shape[0], 240, 240, 155, 1)

In [10]:
y_train = utils.to_categorical(y_train) 
y_test = utils.to_categorical(y_test)

In [12]:
# REALLY BASIC 3D-CNN JUST TO SEE

cnn = Sequential()

cnn.add(Conv3D(
    filters=6,
    kernel_size=(3,3,3), # height/width of filter
    activation='relu',
    input_shape=(240, 240, 155, 1)))

cnn.add(MaxPooling3D(
    pool_size=(2,2,2)
))

cnn.add(Conv3D(
    filters= 16,
    kernel_size= (3,3,3),
    activation='relu'
))

cnn.add(MaxPooling3D(
    pool_size=(2,2,2)
))

cnn.add(Flatten())

cnn.add(Dense(
    units=128,
    activation='relu'
))

# Output Layer
cnn.add(Dense(
    units=2,
    activation='softmax'
))

cnn.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [None]:
history = cnn.fit(np.array(X_train), y_train,
                 batch_size=32,
                 validation_data=(np.array(X_test), y_test),
                 epochs=3,
                 verbose=1)

Train on 250 samples, validate on 84 samples
Epoch 1/3
 32/250 [==>...........................] - ETA: 3:36:28 - loss: 0.6826 - acc: 0.5625

## All images compiled together
- Need to see if model gets better w/ 2 different types of data first

In [2]:
hgg_img_data_train = []
counter = 0

for file in os.listdir('./data/bet_processed/'):
    if file.endswith('.nii.gz'):
        file_path = './data/bet_processed/' + file
        img = nib.load(file_path)
        array_flat = img.get_data() / 255
        hgg_img_data_train.append(array_flat)
        
        counter += 1
        if counter%100 == 0:
            print(f"{counter} out of {len(os.listdir('./data/bet_processed/'))-1}")

100 out of 1032
200 out of 1032
300 out of 1032
400 out of 1032
500 out of 1032
600 out of 1032
700 out of 1032
800 out of 1032
900 out of 1032
1000 out of 1032


In [3]:
hgg_y = np.ones(shape=len(hgg_img_data_train), dtype=int)

In [4]:
hgg_y

array([1, 1, 1, ..., 1, 1, 1])

In [5]:
lgg_img_data_train = []
counter = 0

for file in os.listdir('./data/lgg_bet_processed/'):
    if file.endswith('.nii.gz'):
        file_path = './data/lgg_bet_processed/' + file
        img = nib.load(file_path)
        array_flat = img.get_data() / 255
        lgg_img_data_train.append(array_flat)
        
        counter += 1
        if counter%100 == 0:
            print(f"{counter} out of {len(os.listdir('./data/lgg_bet_processed/'))-1}")

100 out of 303
200 out of 303
300 out of 303


In [6]:
lgg_y = np.zeros(shape=len(lgg_img_data_train), dtype=int)

In [7]:
X = np.array(hgg_img_data_train + lgg_img_data_train)

In [8]:
y = np.array(list(hgg_y) + list(lgg_y))

In [9]:
# % of Data that's Target: 1
sum(y) / len(y)

0.7724550898203593

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,
                                                   stratify=y)

In [35]:
X_train.shape

(240, 240, 155)

In [40]:
np.array(X_train).shape

(1002, 240, 240, 155)

In [None]:
X

In [43]:
# REALLY BASIC 3D-CNN JUST TO SEE

cnn = Sequential()

cnn.add(Conv3D(
    filters=6,
    kernel_size=(3,3,3), # height/width of filter
    activation='relu',
    input_shape=(240, 240, 155, 1)))

cnn.add(MaxPooling3D(
    pool_size=(2,2,2)
))

cnn.add(Conv3D(
    filters= 16,
    kernel_size= (3,3,3),
    activation='relu'
))

cnn.add(MaxPooling3D(
    pool_size=(2,2,2)
))

cnn.add(Flatten())

cnn.add(Dense(
    units=128,
    activation='relu'
))

# Output Layer
cnn.add(Dense(
    units=2,
    activation='softmax'
))

cnn.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
history = cnn.fit(np.array(X_train), y_train,
                 batch_size=512,
                 validation_data=(np.array(X_test), y_test),
                 epochs=5,
                 verbose=2)

In [63]:
test = np.ones(10)

In [64]:
test

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [65]:
test/10

array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])

In [38]:
test_train = []
counter = 0

for file in os.listdir('./data/bet_processed/')[:5]:
    if file.endswith('.nii.gz'):
        file_path = './data/bet_processed/' + file
        img = nib.load(file_path)
        array_flat = img.get_data() / 0
        print(array_flat)
#         test_train.append(array_flat)
        
#         counter += 1
#         if counter%100 == 0:
#             print(f"{counter} out of {len(os.listdir('./data/bet_processed/')[:5])-1}")

  
  


[[[nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  ...
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]]

 [[nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  ...
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]]

 [[nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  ...
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]]

 ...

 [[nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  ...
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]]

 [[nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  ...
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan nan]
  [nan nan nan ... nan nan n