In [None]:
from itertools import chain
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras.utils.np_utils import to_categorical
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Activation, MaxPooling2D, GlobalAveragePooling2D, LSTM, TimeDistributed, Dropout, Dense
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
import numpy as np
import cv2
from PIL import Image
from tqdm import tqdm
import pandas as pd

from data import Data

In [None]:
LABEL_INDEX = {
    'ap': 0,
    'bs': 1,
    'mid': 2,
    'oap': 3,
    'obs': 4,
}

In [None]:
data = Data()

In [None]:
dataset = 'CAT'
phase = 0

patients = data.data[dataset]
patients = sorted(patients.items())
phases = [p[1][phase] for p in patients]
def dict_to_ordered_list(d):
    items = sorted(d.items())
    return [item[1] for item in items]
phases = [dict_to_ordered_list(d) for d in phases]  # for LSTM training
slices = list(chain(*phases))  # for CNN training
slice_labels = [data.labels[s] for s in slices]

In [None]:
def path_to_array(path, target_size=(224, 224)):
    image = Image.open(data.paths[path])
    image = image.resize(target_size, Image.NEAREST)
    image = img_to_array(image)
    image = np.repeat(image, 3, axis=2)
    image = image.astype('float32') / 256 / 256
    return image

In [None]:
slice_x = np.stack([path_to_array(path) for path in slices])
slice_y = np.array([LABEL_INDEX[label] for label in slice_labels])
slice_y = to_categorical(slice_y)
cut70 = slice_x.shape[0] * 7 // 10

slice_tx = slice_x[:cut70]
slice_vx = slice_x[cut70:]
slice_ty = slice_y[:cut70]
slice_vy = slice_y[cut70:]

In [None]:
backbone = keras.applications.resnet50.ResNet50(include_top=False, pooling='avg', weights='imagenet', input_shape=(224, 224, 3))

In [None]:
slice_xe = backbone.predict(slice_x)

In [None]:
slice_xe.shape

In [None]:
slice_txe = slice_xe[:cut70]
slice_vxe = slice_xe[cut70:]

In [None]:
cnn_model = Sequential()
cnn_model.add(Dense(512, activation="relu"))
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(512, activation="relu"))
cnn_model.add(Dropout(0.5))
cnn_model.add(Dense(5, activation="softmax"))

cnn_model.compile(loss='categorical_crossentropy',
                  optimizer="adam",
                  metrics=['accuracy'])

In [None]:
cnn_history = cnn_model.fit(slice_txe, slice_ty, validation_data=(slice_vxe, slice_vy), batch_size=32, epochs=100)

In [None]:
array = np.ones((12, 224, 224, 3))

In [None]:
def slice_list_to_array(slices):
    stack = np.stack([path_to_array(path) for path in slices])
    stack = backbone.predict(stack)
    remaining = 25 - stack.shape[0]
    stack = np.pad(stack, [(0, remaining), (0,0), ], constant_values=0)
    return stack

def slice_list_to_label_array(slices):
    labels = [data.labels[s] for s in slices]
    labels = np.array([LABEL_INDEX[label] for label in labels])
    labels = to_categorical(labels, num_classes=5)
    remaining = 25 - labels.shape[0]
    labels = np.pad(labels, [(0, remaining), (0, 0)], constant_values=0)
    return labels

patient_x = []
for phase in tqdm(phases):
    patient_x.append(slice_list_to_array(phase))
patient_x = np.stack(patient_x)
patient_y = np.stack([slice_list_to_label_array(p) for p in phases])

In [None]:
print(patient_x.shape)
print(patient_y.shape)

In [None]:
cut70 = patient_x.shape[0] * 7 // 10

patient_tx = patient_x[:cut70]
patient_vx = patient_x[cut70:]
patient_ty = patient_y[:cut70]
patient_vy = patient_y[cut70:]

In [None]:
rnn_model = Sequential()
rnn_model.add(LSTM(512, input_shape=(25, 2048), return_sequences=True))
rnn_model.add(Dropout(0.5))
rnn_model.add(LSTM(512, input_shape=(25, 2048), return_sequences=True))
rnn_model.add(Dropout(0.5))
rnn_model.add(TimeDistributed(Dense(5, activation="softmax")))
rnn_model.compile(loss='categorical_crossentropy',
                  optimizer="adam",
                  metrics=['accuracy'])

In [None]:
rnn_history = rnn_model.fit(patient_tx, patient_ty, validation_data=(patient_vx, patient_vy), batch_size=4, epochs=100)

In [None]:
history = {}
history["cnn_val_acc"] = cnn_history.history["val_accuracy"]
history["rnn_val_acc"] = rnn_history.history["val_accuracy"]

In [None]:
pd.DataFrame(history).plot(xlim=[0, 100], ylim=[0.5, 0.9])

In [None]:
vp = rnn_model.predict(patient_vx)

In [None]:
correct = 0
valid = 0
for i in range(30):
    for j in range(25):
        prediction = tf.argmax(vp[i][j]).numpy()
        if np.count_nonzero(patient_vy[i][j]) == 1:
            valid += 1
        if patient_vy[i][j][prediction] == 1:
            correct += 1
correct / valid

In [None]:
slice_vp = cnn_model.predict(slice_vxe)

In [None]:
correct = 0
valid = 0
for i in range(379):
    prediction = tf.argmax(slice_vp[i]).numpy()
    if np.count_nonzero(slice_vy[i]) == 1:
        valid += 1
    if slice_vy[i][prediction] == 1:
        correct += 1
correct / valid