# Train Model

## Import Modules

In [0]:
import numpy as np
from google.colab import drive
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from scipy.io import loadmat, savemat
from sklearn.model_selection import KFold
import skimage.filters as filters
from skimage.transform import rescale

## Get a List of File Names of the Dataset

In [2]:
BASE_DIR = os.getcwd()

# Mount the google drive to the virtue machine running this script.
# This will prompt for authorization.
DRIVE_MOUNT_DIR = os.path.join(os.getcwd(), 'drive')
drive.mount(DRIVE_MOUNT_DIR, force_remount=True)

Mounted at /content/drive


In [7]:
# Now the google drive can be accessed at 'DRIVE_MOUNT_DIR/My Drive'.
# Specify the directory to the data set.
REPOSITORY_NAME = 'CS168-Automatic-TICI'
REPOSITORY = os.path.join(DRIVE_MOUNT_DIR, 'My Drive', REPOSITORY_NAME)
DATA_DIR_NAME = 'augmented_images'
DATA_DIR = os.path.join(REPOSITORY, DATA_DIR_NAME)

print(DATA_DIR)

# Get a list of full paths of all mat files in the data directory.
for root, _dirs, files in os.walk(DATA_DIR):
    files = list(filter(lambda fname: fname.lower().endswith('.mat'), sorted(files)))
nfiles = len(files)
print('{} files found in the data directory \'{}\'.'.format(nfiles, DATA_DIR))

/content/drive/My Drive/CS168-Automatic-TICI/augmented_images
1450 files found in the data directory '/content/drive/My Drive/CS168-Automatic-TICI/augmented_images'.


In [8]:
sample_content = loadmat(os.path.join(DATA_DIR, files[0]))
image_shape = np.shape(sample_content['image'])
image_shape = (image_shape[0] // 2, image_shape[1] // 2, image_shape[2])
print(image_shape)

(512, 512, 1)


In [9]:
images = np.empty((nfiles, ) + image_shape)
TICIs = []

# Extract the image set and TICI information for all mat files.
for n in range(nfiles):
    # Print the progress.
    if n % 100 == 0:
        print('{} / {} done'.format(n, nfiles))
    content = loadmat(os.path.join(DATA_DIR, files[n]))
    images[n] = rescale(content['image'], 1.0 / 2.0, anti_aliasing=True)

    TICIs.append(str(content['TICI'][0]))

0 / 1450 done


  warn('The default multichannel argument (None) is deprecated.  Please '


100 / 1450 done
200 / 1450 done
300 / 1450 done
400 / 1450 done
500 / 1450 done
600 / 1450 done
700 / 1450 done
800 / 1450 done
900 / 1450 done
1000 / 1450 done
1100 / 1450 done
1200 / 1450 done
1300 / 1450 done
1400 / 1450 done


## Reformat TICI scores

In [10]:
# The number of different TICI scores.
# Including 0, 1, 2a, 2b, 3.
num_TICI_classes = 5

# Convert a TICI string to a number
def map_TICI_str_to_num(TICI):
    relation = {
        '0': 0,
        '1': 1,
        '2a': 2,
        '2b': 3,
        '3': 4,
        '0 (bilateral MCA)': 0,
        '2a?': 2
    }
    return relation[TICI]

# Convert a numerical encoded TICI to a string
def map_TICI_num_to_str(label):
    relation = ['0', '1', '2a', '2b', '3']
    return relation[label]



# Convert TICI scores in the form of strings to numeric labels before fed to the model.
TICI_nums = list(map(map_TICI_str_to_num, TICIs))

# Convert the array of integer labels (0 ~ num_TICI_classes-1) to an array of 
# one-hot (aka one-of-K) encoded labels, for better accuracy.
TICI_one_hot = tf.keras.utils.to_categorical(TICI_nums, num_TICI_classes)

print(np.shape(TICI_one_hot))

(1450, 5)


## Train the Model

In [11]:
# Build the model with tensorflow.keras.
# The general idea is to reduce the size by maxpooling and 
# extract more features with convolutions of an increasing 
# number of filters.


# another try from stackoverflow
# model = tf.keras.Sequential([
#     tf.keras.layers.Conv2D(32, 5, padding='same', activation='relu', 
#                         input_shape=(image_shape[0], image_shape[1], 1)),
#     tf.keras.layers.Conv2D(32, 5, padding='same', activation='relu'),
#     tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
#     tf.keras.layers.BatchNormalization(),
    
#     tf.keras.layers.Conv2D(64, 5, padding='same', activation='relu'),
#     tf.keras.layers.Conv2D(64, 5, padding='same', activation='relu'),
#     tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),

#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dropout(0.5),
    
#     tf.keras.layers.Dense(num_TICI_classes, activation='softmax')
# ])


# from last correct version
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 5, padding='same', activation='relu', 
                        input_shape=(image_shape[0], image_shape[1], 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same'),
    tf.keras.layers.BatchNormalization(),
    
    tf.keras.layers.Conv2D(64, 5, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same'),

    tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(num_TICI_classes, activation='softmax')
])


model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 512, 512, 32)      832       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 32)      0         
_________________________________________________________________
batch_normalization_v1 (Batc (None, 128, 128, 32)      128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 64)      51264     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 64)        0         
_________________________________________________________________
flatten (Flatten)    

In [0]:
model.compile(
    loss=tf.keras.losses.categorical_crossentropy,
    optimizer='adam',
    metrics=['accuracy'])

# opt = tf.keras.optimizers.SGD(lr=0.01)
# model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])

In [13]:
BATCH_SIZE = 32
EPOCHS = 15

# K-fold cross validation
kf = KFold(n_splits=5)

for train_index, val_index in kf.split(images):
    x_train, x_val = images[train_index], images[val_index]
    y_train, y_val = TICI_one_hot[train_index], TICI_one_hot[val_index]
    
    model.fit(
        x=x_train,
        y=y_train,
        batch_size=32,
        epochs=10,
        verbose=1,
        validation_data=(x_val, y_val)
    )

    print (model.evaluate(x_val, y_val))


Train on 1160 samples, validate on 290 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[1.4155483114308325, 0.45172414]
Train on 1160 samples, validate on 290 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[1.039480725701513, 0.6827586]
Train on 1160 samples, validate on 290 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.23360731082743613, 0.93448275]
Train on 1160 samples, validate on 290 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.028648257461087456, 1.0]
Train on 1160 samples, validate on 290 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.038307159728017445, 0.9896552]
