# MATH 4570 Project


In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib as mpl

from glob import glob

## Data Preparation

In [2]:
labels = pd.read_csv('BrainDataCT/hemorrhage-labels.csv')

# pick rows that we have images for
# types = ['brain_bone_window', 'brain_window', 'max_contrast_window', 'subdural_window']
imageType = 'max_contrast_window'
files = [file.split('/')[-1].split('.')[0] for file in glob(f'BrainDataCT/renders/**/{imageType}/*.jpg', recursive=True)]
labels = labels.loc[labels.Image.isin(files)]
display(labels)

# filter out rows that have multiple labels
labels = labels[labels['epidural'] + labels['intraparenchymal'] + labels['intraventricular'] + labels['subarachnoid'] + labels['subdural'] <= 1]
display(labels)

# take a random sample for local development
labels = labels.sample(5000, random_state = 1)
display(labels)

Unnamed: 0,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
0,ID_000012eaf,0,0,0,0,0,0
1,ID_000039fa0,0,0,0,0,0,0
2,ID_00005679d,0,0,0,0,0,0
3,ID_00008ce3c,0,0,0,0,0,0
4,ID_0000950d7,0,0,0,0,0,0
...,...,...,...,...,...,...,...
752755,ID_fffc60817,1,0,1,1,0,0
752769,ID_fffd00949,1,0,0,0,1,0
752783,ID_fffe2edb8,1,0,1,1,0,0
752799,ID_ffff922b9,1,0,0,1,0,0


Unnamed: 0,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
0,ID_000012eaf,0,0,0,0,0,0
1,ID_000039fa0,0,0,0,0,0,0
2,ID_00005679d,0,0,0,0,0,0
3,ID_00008ce3c,0,0,0,0,0,0
4,ID_0000950d7,0,0,0,0,0,0
...,...,...,...,...,...,...,...
752737,ID_fffaebafd,1,0,1,0,0,0
752740,ID_fffb16e96,1,0,0,0,0,1
752769,ID_fffd00949,1,0,0,0,1,0
752799,ID_ffff922b9,1,0,0,1,0,0


Unnamed: 0,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
120900,ID_29167fe2d,1,0,0,0,0,1
295348,ID_642b8b0e2,1,0,0,0,1,0
204785,ID_4593898a9,1,0,0,0,0,1
726427,ID_f70771536,1,0,0,1,0,0
199255,ID_43abedac6,1,0,0,0,1,0
...,...,...,...,...,...,...,...
642952,ID_da856f276,1,0,0,0,1,0
648956,ID_dc89acbe4,1,0,1,0,0,0
541265,ID_b7b7929f7,1,1,0,0,0,0
27401,ID_094532ebd,1,0,0,0,1,0


In [3]:
labelDict = {
    'normal': 0,
    'epidural': 1,
    'intraparenchymal': 2,
    'intraventricular': 3,
    'subarachnoid': 4,
    'subdural': 5
}

def getLabel(row):
    return next((x for x in labelDict.keys() if x != 'normal' and row[x] == 1), 'normal')

y = labels.apply(lambda row: labelDict.get(getLabel(row)), axis = 1)
np.histogram(y, bins=[0, 1, 2, 3, 4, 5, 6])

(array([ 518,   94,  918,  568,  959, 1943]), array([0, 1, 2, 3, 4, 5, 6]))

In [4]:
def importImages(labels, imageType):
    total = len(labels);
    pixels = 512 * 512
    data = np.zeros([total, pixels])
    for n, file_name in enumerate(labels.Image):
        label = getLabel(labels[labels['Image'] == file_name].iloc[0])
        print(f'importing {n + 1}/{total}', end='\r')
        try:
            path = f'BrainDataCT/renders/{label}/{imageType}/{file_name}.jpg'
            # select the first 512 * 512 pixels
            img = np.mean(mpl.image.imread(path), axis=2).reshape(-1)[:pixels]
            # pad with zeros up to 512 * 512 
            data[n,:] = np.pad(img, (0, pixels - len(img)), 'constant')
        except BaseException as err:
            print(f'\nFailed to import BrainDataCT/renders/{label}/{imageType}/{file_name}.jpg. {err}')
    print('\nDone')
    return data

originalImages = importImages(labels, imageType)

importing 5000/5000
Done


In [None]:
DS = 16             
im_size = int(512 * 512 / DS)
data = np.zeros([len(labels), im_size])
for n, file_name in enumerate(labels.Image):    
    data[n,:] = originalImages[n][::DS]

In [None]:
plt.imshow(originalImages[1].reshape(512, 512))

In [None]:
plt.imshow(data[1].reshape(128, 128))

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=1)
input_shape = X_train.shape[1]
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

## Linear Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(solver='lbfgs', max_iter=1000)
clf.fit(X_train[0:1000],y_train[0:1000])

print("Logistic Regression Score: %.3f"%clf.score(X_test,y_test))

## Artificial Neural Network Model

In [None]:
import tensorflow as tf
from tensorflow import keras

K = keras.backend

class ExponentialLearningRate(keras.callbacks.Callback):
    def __init__(self, factor):
        self.factor = factor
        self.rates = []
        self.losses = []
    def on_batch_end(self, batch, logs):
        self.rates.append(K.get_value(self.model.optimizer.lr))
        self.losses.append(logs["loss"])
        K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)
        
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

# TODO: Implement Design - Temporary Design from Lab 3 Example
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[input_shape]),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(6, activation="softmax")
])

model.summary()

In [None]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])

### Training the Neural Network model

In [None]:
expon_lr = ExponentialLearningRate(factor=1.005)
history = model.fit(X_train, y_train, 
                    epochs=1,
                    validation_data=(X_test, y_test),
                    callbacks=[expon_lr])

In [None]:
plt.plot(expon_lr.rates, expon_lr.losses)
plt.gca().set_xscale('log')
plt.hlines(min(expon_lr.losses), min(expon_lr.rates), max(expon_lr.rates))
plt.axis([min(expon_lr.rates), max(expon_lr.rates), 0, expon_lr.losses[0]])
plt.grid()
plt.xlabel("Learning rate")
plt.ylabel("Loss")

In [None]:
min(expon_lr.losses)

In [None]:
expon_lr.rates[np.argmin(expon_lr.losses)]

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[input_shape]),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(5, activation="softmax")
])

# TODO: Update with new learning rate
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])
history = model.fit(X_train, y_train, batch_size=8, epochs=30,
                    validation_data=(X_test, y_test))

## Convoluted Neural Network Model

In [None]:
## We have to add an extra dimension to allow for the multiple images we will be creating
X_train = X_train.reshape(-1, input_shape, 1)
X_test = X_test.reshape(-1, input_shape, 1)

Y_train = np_utils.to_categorical(y_train, len(labelDict.keys()))
Y_test = np_utils.to_categorical(y_test, len(labelDict.keys()))

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

model = Sequential()
model.add(Conv2D(32, (7, 7),
                 padding='valid',
                 input_shape=(input_shape,1),
                 activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(32, (3,3),
                 activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
 
model.add(Flatten())
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(units=len(labelDict.keys()), activation='softmax'))
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
epochs = 5
history = model.fit(X_train, 
                    Y_train, 
                    epochs=epochs, 
                    batch_size=128,
                    verbose=1,
                    validation_data=(X_test, Y_test))