In [3]:
import numpy as np
import seaborn as sns
import pandas as pd
from PIL import Image
from matplotlib import pyplot
import matplotlib.pylab as plt 
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn import metrics

import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Reshape
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Concatenate, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.applications.vgg16 import VGG16

In [5]:
# load image data

data = []
file_list = []
y = []

for png in os.listdir("data/CT_COVID"):
    pic = plt.imread("data/CT_COVID/" + png, 0)
    data.append(pic)
    file_list.append(png)
    y.append(1)

for png in os.listdir("data/CT_NonCOVID"):
    pic = plt.imread("data/CT_NonCOVID/" + png, 0)
    data.append(pic)
    file_list.append(png)
    y.append(0)

In [6]:
metadata = pd.read_excel("data/COVID-CT-MetaInfo.xlsx", header = None, names = ['file','patient','column3','note'])

In [7]:
image_dim = (300, 400) #use the averages for the dimensions

data_cleaned = []

for img in data:
    #First, take the mean of the 3rd dimension (channels) if it exists
    if len(img.shape) == 3:
        img = np.mean(img, axis = 2)
    
    #Using PIL Image processor, resize using high quality down-sampling filter 
    img = Image.fromarray(img)
    img = img.resize((image_dim[1], image_dim[0]), Image.ANTIALIAS)
    img = np.array(img)
    
    #Normalize image values
    img = img/255
    
    data_cleaned.append(img)

data_cleaned = np.array(data_cleaned)
data_cleaned = np.expand_dims(data_cleaned,axis = 3)

In [8]:
data_train, data_val, y_train, y_val = train_test_split(data_cleaned, np.array(y),\
                                                        train_size = 0.75, random_state = 10)

In [9]:
#Flip horizontally
horiz_flip = tf.image.flip_left_right(data_train) 
#Flip vertically
vert_flip = tf.image.flip_up_down(data_train)

data_train_augmented = np.concatenate((data_train, horiz_flip, vert_flip), axis = 0)
y_train_augmented = np.concatenate((y_train,y_train,y_train), axis = 0)

In [16]:
# load VGG and concatenate input images into 3 channels because vgg takes rgb images
img_input = Input(shape=(300, 400, 1))
model = VGG16(weights="imagenet", include_top=False, input_tensor=Concatenate()([img_input, img_input, img_input]))

In [17]:
# add a global spatial average pooling layer and a dense layer to classify 2 classes
x = model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(2, activation='softmax')(x)

# new model to train
new_model = Model(inputs=model.input, outputs=predictions)

# freeze all convolutional VGG layers
for layer in model.layers:
    layer.trainable = False

In [18]:
new_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 300, 400, 1) 0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 300, 400, 3)  0           input_2[0][0]                    
                                                                 input_2[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 300, 400, 64) 1792        concatenate_1[0][0]              
____________________________________________________________________________________________

In [19]:
sgd = SGD(lr=5e-4)
new_model.compile(optimizer=sgd, loss=tf.keras.losses.binary_crossentropy, metrics=['accuracy'])
history = new_model.fit(
        data_train_augmented,
        tf.keras.utils.to_categorical(y_train_augmented),
        epochs=10,
        batch_size = 16, 
        validation_data=(data_val, tf.keras.utils.to_categorical(y_val)))

Train on 1677 samples, validate on 187 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# next to try: instead of replicating 3 channels use different rescalings