In [1]:
import argparse, os
import numpy as np
import tensorflow as tf
from keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.callbacks import Callback, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow.keras.metrics as keras_metrics
from tensorflow.python.training.rmsprop import RMSPropOptimizer
import subprocess
import sys
from tensorflow.keras.applications import efficientnet as efn
from tensorflow.keras.applications import ResNet50


In [2]:
#check I am running on GPUs
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce RTX 3090, pci bus id: 0000:01:00.0, compute capability: 8.6



In [9]:
lr = 0.0001
epochs     = 10
model_dir  = "models"


train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range = 40, 
                                   width_shift_range = 0.2, 
                                   height_shift_range = 0.2, 
                                   shear_range = 0.2, 
                                   zoom_range = 0.2, 
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    'train',
    target_size=(256, 256),
    batch_size=32,
    color_mode="grayscale",
    class_mode='binary')

test_generator = test_datagen.flow_from_directory(
    'val',
    target_size=(256, 256),
    batch_size=32,
    color_mode="grayscale",
    class_mode='binary')

Found 22396 images belonging to 2 classes.
Found 5506 images belonging to 2 classes.


In [183]:
#create the subset of destination folders
#we are splitting the data into 5 folders
for i in range(1,6):
    train_dir_numbered = 'train' + str(i)
    val_dir_numbered = 'val' + str(i)
    os.mkdir(train_dir_numbered)
    os.mkdir(val_dir_numbered)
    os.mkdir(train_dir_numbered + '/negative')
    os.mkdir(train_dir_numbered + '/positive')
    os.mkdir(val_dir_numbered + '/negative')
    os.mkdir(val_dir_numbered + '/positive')
    
    


In [212]:
import shutil
import glob

src_dir_list = ['train/positive', 'train/negative', 'val/positive', 'val/negative']

#remove files from folder
for source_dir in src_dir_list:
    print("Removing files from: ", source_dir)
    for i in range(1,6):
        patient_file_index = i - 1 
        dest_dir = str(source_dir).split("/")[0] + str(i) + '/' + str(source_dir).split("/")[1]
        files = glob.glob(dest_dir + '/*.jpg', recursive=True)
        for f in files:
            try:
#                 print("removing:", f)
                os.remove(f)
            except OSError as e:
                print("Error: %s : %s" % (f, e.strerror))


Removing files from:  train/positive
Removing files from:  train/negative
Removing files from:  val/positive
Removing files from:  val/negative


In [213]:
#split files into folders

src_dir_list = ['train/positive', 'train/negative', 'val/positive', 'val/negative']
dest_dir_dict = {}

for source_dir in src_dir_list:
    print("Doing files from: ", source_dir)
    not_enough_images = 0
    #get unique patient prefixes from file names from the directory : source_dir
    file_names = os.listdir(source_dir)
    file_prefixes = []
    for filename in file_names:
        file_prefixes.append(filename.split('well')[0])
    file_prefixes = list(dict.fromkeys(file_prefixes))

    for file_prefix in file_prefixes:
        #get all the files for a prefix
        file_regexp = source_dir + '/' + str(file_prefix) + '*.jpg'
        patient_files = glob.glob(file_regexp)
        #make sure we have 5 per patient
        if len(patient_files) < 5:
            not_enough_images += 1
            next
        else:
            #copy the individual files to the numbered folders
            for i in range(1,6):
                patient_file_index = i - 1 
                dest_dir = str(source_dir).split("/")[0] + str(i) + '/' + str(source_dir).split("/")[1]
#                 print("copying: ", patient_files[patient_file_index], "to ", dest_dir)
                shutil.copy2(patient_files[patient_file_index], dest_dir)
                if dest_dir not in dest_dir_dict:
                    dest_dir_dict[dest_dir] = 0
                dest_dir_dict[dest_dir] += 1
    
    print("couldn't do ", not_enough_images, "images in ",source_dir )
    print(dest_dir_dict)

Doing files from:  train/positive
couldn't do  66 images in  train/positive
{'train1/positive': 1595, 'train2/positive': 1595, 'train3/positive': 1595, 'train4/positive': 1595, 'train5/positive': 1595}
Doing files from:  train/negative
couldn't do  133 images in  train/negative
{'train1/positive': 1595, 'train2/positive': 1595, 'train3/positive': 1595, 'train4/positive': 1595, 'train5/positive': 1595, 'train1/negative': 2040, 'train2/negative': 2040, 'train3/negative': 2040, 'train4/negative': 2040, 'train5/negative': 2040}
Doing files from:  val/positive
couldn't do  21 images in  val/positive
{'train1/positive': 1595, 'train2/positive': 1595, 'train3/positive': 1595, 'train4/positive': 1595, 'train5/positive': 1595, 'train1/negative': 2040, 'train2/negative': 2040, 'train3/negative': 2040, 'train4/negative': 2040, 'train5/negative': 2040, 'val1/positive': 404, 'val2/positive': 404, 'val3/positive': 404, 'val4/positive': 404, 'val5/positive': 404}
Doing files from:  val/negative
could

In [3]:
# input_imgen = ImageDataGenerator(rescale = 1./255, 
#                                    shear_range = 0.2, 
#                                    zoom_range = 0.2,
#                                    rotation_range=5.,
#                                    horizontal_flip = True)



#this data generator streams from 5 folders seperate folders, each folder is a time point
def generate_generator_multiple(generator, batch_size, img_height, img_width, folders):
    generators_list = []

    for n_gen in range(len(folders)):
        print(n_gen, ':', folders[n_gen])
        gen = generator.flow_from_directory(folders[n_gen],
                                          target_size = (img_height,img_width),
                                          class_mode = 'binary',
                                          color_mode="grayscale",
                                          batch_size = batch_size,
                                          shuffle=False, 
                                          seed=7)
        generators_list.append(gen)
    
    while True:
            X1i = generators_list[0].next()
            X2i = generators_list[1].next()
            X3i = generators_list[2].next()
            X4i = generators_list[3].next()
            X5i = generators_list[4].next()
            yield [X1i[0], X2i[0], X3i[0], X4i[0], X5i[0]], X5i[1]  #Yield 5 images and their mutual label

    


In [6]:
input_imgen = ImageDataGenerator(rescale = 1./255)
test_imgen = ImageDataGenerator(rescale = 1./255)

image_height = 256
image_width = 256
batch_size = 32
traingenerator = generate_generator_multiple(input_imgen, batch_size, image_height, image_width, ['train1', 'train2', 'train3', 'train4', 'train5'])

In [7]:
traingenerator

<generator object generate_generator_multiple at 0x000001847732B1C8>

In [8]:
from keras.layers import TimeDistributed, Conv2D, Dense, MaxPooling2D, Flatten, LSTM, Dropout, BatchNormalization
from keras import models
model_cnlst = models.Sequential()
model_cnlst.add(TimeDistributed(Conv2D(128, (3, 3), strides=(1,1),activation='relu'),input_shape=(32, 256, 256, 1)))
model_cnlst.add(TimeDistributed(Conv2D(64, (3, 3), strides=(1,1),activation='relu')))
model_cnlst.add(TimeDistributed(MaxPooling2D(2,2)))
model_cnlst.add(TimeDistributed(Conv2D(64, (3, 3), strides=(1,1),activation='relu')))
model_cnlst.add(TimeDistributed(Conv2D(32, (3, 3), strides=(1,1),activation='relu')))
model_cnlst.add(TimeDistributed(MaxPooling2D(2,2)))
model_cnlst.add(TimeDistributed(BatchNormalization()))


model_cnlst.add(TimeDistributed(Flatten()))
model_cnlst.add(Dropout(0.2))

model_cnlst.add(LSTM(32,return_sequences=False,dropout=0.2)) # used 32 units

model_cnlst.add(Dense(64,activation='relu'))
model_cnlst.add(Dense(32,activation='relu'))
model_cnlst.add(Dropout(0.2))
model_cnlst.add(Dense(1, activation='sigmoid'))
model_cnlst.summary()


NotImplementedError: Cannot convert a symbolic Tensor (lstm/strided_slice:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported

Found 22396 images belonging to 2 classes.
Found 22396 images belonging to 2 classes.
Found 22396 images belonging to 2 classes.
Found 22396 images belonging to 2 classes.
Found 22396 images belonging to 2 classes.


In [80]:
batch_np[1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)

In [60]:
 history=model.fit_generator(inputgenerator,
                        steps_per_epoch=trainsetsize/batch_size,
                        epochs = epochs,
                        validation_data = testgenerator,
                        validation_steps = testsetsize/batch_size,
                        use_multiprocessing=True,
                        shuffle=False)

NameError: name 'trainsetsize' is not defined

In [19]:
my_gen

<__main__.JoinedGen at 0x18b4adcef48>

In [8]:

#####################################efficientnet model
base_model = efn.EfficientNetB7(input_shape = (256, 256, 1), 
                                weights = None,
                                include_top = False)

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)

# Add a final sigmoid layer with 1 node for classification output
predictions = Dense(1, activation="sigmoid")(x)
model = Model(inputs = base_model.input, outputs = predictions)    


model.compile(loss="binary_crossentropy",
#               optimizer=tf.keras.optimizers.RMSprop(learning_rate=lr, rho=1e-6, momentum=0.9),
              optimizer=tf.keras.optimizers.Nadam(),
              metrics='accuracy')

model.fit(train_generator, epochs=epochs, validation_data=test_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 

In [42]:
# save Keras model in SavedModel format for Tensorflow Serving
model.save(os.path.join(model_dir, 'efficientnet7'))
print(val_accuracy: 0.6099)


INFO:tensorflow:Assets written to: models\efficientnet7\assets


In [11]:
#####################################simple model
input_shape = (256, 256, 1)
batch_norm_axis=-1

model = Sequential()

# 1st convolution block
model.add(Conv2D(64, kernel_size=(3,3), padding='same', input_shape=input_shape))
model.add(BatchNormalization(axis=batch_norm_axis))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=2))

# 2nd convolution block
model.add(Conv2D(128, kernel_size=(3,3), padding='valid'))
model.add(BatchNormalization(axis=batch_norm_axis))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=2))

# Fully connected block
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

# Output layer
model.add(Dense(1, activation='sigmoid'))

#####################################

model.compile(loss="binary_crossentropy",
#               optimizer=tf.keras.optimizers.RMSprop(learning_rate=lr, rho=1e-6, momentum=0.9),
              optimizer=tf.keras.optimizers.Nadam(),
              metrics='accuracy')

model.fit(train_generator, epochs=epochs, validation_data=test_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
 47/700 [=>............................] - ETA: 1:15 - loss: 0.6601 - accuracy: 0.6240

KeyboardInterrupt: 

In [None]:
# save Keras model in SavedModel format for Tensorflow Serving
model.save(os.path.join(model_dir, 'custom'))


In [14]:
#####################################ResNet50 model
from tensorflow.keras.applications.ResNet import InceptionV3
base_model = InceptionV3(input_shape = (256, 256, 1), 
                                weights = None,
                                include_top = False)

# for layer in base_model.layers:
#     layer.trainable = False

x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)

# Add a final sigmoid layer with 1 node for classification output
predictions = Dense(1, activation="sigmoid")(x)
model = Model(inputs = base_model.input, outputs = predictions)    


model.compile(loss="binary_crossentropy",
#               optimizer=tf.keras.optimizers.RMSprop(learning_rate=lr, rho=1e-6, momentum=0.9),
              optimizer=tf.keras.optimizers.Nadam(),
              metrics='accuracy')

model.fit(train_generator, epochs=epochs, validation_data=test_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x191ec2949c8>

In [None]:
# save Keras model in SavedModel format for Tensorflow Serving
model.save(os.path.join(model_dir, 'resnet50'))
