# Models ResNet and VGG16 for binary classification

In [14]:
# global imports for vgg16

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import skimage.io
import os 
import tqdm
import glob
import tensorflow 

from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

from skimage.io import imread, imshow
from skimage.transform import resize
from skimage.color import grey2rgb

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, BatchNormalization, Dropout, Flatten, Dense, Activation, MaxPool2D, Conv2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.utils import to_categorical
from keras import optimizers
from tensorflow.keras.optimizers import Adam

from keras.callbacks import Callback,ModelCheckpoint,ReduceLROnPlateau
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
import keras.backend as K

#import tensorflow_addons as tfa
#from tensorflow.keras.metrics import Metric
#from tensorflow_addons.utils.types import AcceptableDTypes, FloatTensorLike
from typeguard import typechecked
from typing import Optional

In [4]:
# code for dataset generation - autotune
AUTOTUNE = tf.data.experimental.AUTOTUNE

# generating train/test/valid
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   validation_split = 0.2,
                                  
        rotation_range=5,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        #zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest')

valid_datagen = ImageDataGenerator(rescale = 1./255,
                                  validation_split = 0.2)

test_datagen  = ImageDataGenerator(rescale = 1./255
                                  )

In [15]:
# with vgg16 models the target size is always 224x224
train_dataset  = train_datagen.flow_from_directory(directory = './Desktop/CTscns2C/train',
                                                   target_size = (224,224),
                                                   class_mode = 'categorical',
                                                   batch_size = 64)

# validation data
valid_dataset = valid_datagen.flow_from_directory(directory = './Desktop/CTscns2C/val',
                                                  target_size = (224,224),
                                                  class_mode = 'categorical',
                                                  batch_size = 64)

# test data
test_dataset = test_datagen.flow_from_directory(directory = './Desktop/CTscns2C/test',
                                                  target_size = (224,224),
                                                  class_mode = 'categorical',
                                                  batch_size = 64)

Found 567 images belonging to 2 classes.
Found 25 images belonging to 2 classes.
Found 68 images belonging to 2 classes.


In [6]:
# loading in the same vgg model used for the other data set

base_model_v2 = tf.keras.applications.VGG16(input_shape=(224,224,3),include_top=False,weights="imagenet")
# using the weights used in the imagenet competition

# Freezing layers

for layer in base_model_v2.layers[:-8]:
    layer.trainable=False

2022-08-16 15:33:23.600913: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
# Adding sequential layers/ building model - just as in the first iteration

model=Sequential()
model.add(base_model_v2)
model.add(Dropout(0.5))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(32,kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(32,kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(32,kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(2,activation='softmax')) # must use softmax at last layer

In [7]:
# defing f1 score using predictions

def f1_score(y_true, y_pred): 
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

METRICS = [
      'accuracy',f1_score
]

In [10]:
# compile the model and run over ttraining data

# adding early stopping and other call backs to avoid massive training times
reduce = ReduceLROnPlateau(monitor = 'val_loss',patience = 3,verbose = 1,factor = 0.50, min_lr = 1e-7)# reducing learning rate, stopping overfitting

early_stopping = EarlyStopping(verbose=1, patience=3)


# compiling the model using adam as opt. and using x entropy for loss
model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = METRICS)

# training the model over the training data, using validation data also
%time
history=model.fit(train_dataset,validation_data=valid_dataset,epochs = 40,verbose = 1, callbacks=[reduce, early_stopping])

CPU times: user 2 µs, sys: 2 µs, total: 4 µs
Wall time: 8.82 µs
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 00020: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 00020: early stopping


In [45]:
# save the model

model.save("./Desktop/Diss_saved_models/binarymodels/vgg16binary")

2022-08-16 14:14:02.236652: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /Users/elliotttrott/Desktop/Diss_saved_models/binarymodels/vgg16binary/assets


In [14]:
# evaluate over the test data
model.evaluate(test_dataset)



[0.1120179146528244, 0.9700176119804382, 0.9699178338050842]

In [1]:
# necessary imports for resnet model
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, Sequential
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# best results from designing generator with some augmentation
datagen = ImageDataGenerator(
    horizontal_flip=True, 
    validation_split=0.2, 
    width_shift_range=0.2,
    height_shift_range=0.2,
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input,
    dtype=tf.float32,
    rotation_range=25)

In [5]:
# paths for training - setting class mode to sparse is the only difference with how these sets are compiled
train_datasetRes  = train_datagen.flow_from_directory(directory = './Desktop/CTscns2C/train',
                                                   target_size = (224,224),
                                                   class_mode = 'sparse',
                                                   batch_size = 64)

# validation data
valid_datasetRes = valid_datagen.flow_from_directory(directory = './Desktop/CTscns2C/val',
                                                  target_size = (224,224),
                                                  class_mode = 'sparse',
                                                  batch_size = 64)

# test data
test_datasetRes = test_datagen.flow_from_directory(directory = './Desktop/CTscns2C/test',
                                                  target_size = (224,224),
                                                  class_mode = 'sparse',
                                                  batch_size = 64)

Found 567 images belonging to 2 classes.
Found 25 images belonging to 2 classes.
Found 68 images belonging to 2 classes.


In [6]:
class_names = ["Cancerous",
"normal"]

In [7]:
# loading in the base model
IMG_SHAPE = (224,224, 3)
base_model_ResBinary = tf.keras.applications.ResNet50(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

2022-09-04 18:18:40.799823: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
# freezing layers
base_model_ResBinary.trainable = False

In [15]:
# defining metrics
def f1_score(y_true, y_pred): 
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

METRICS = [
      'accuracy',f1_score
]

In [16]:
# adding additional layers
modelResBinary = Sequential([
    base_model_ResBinary,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(2, activation='softmax')
])

# compiling the model, adam optimizer
modelResBinary.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=[METRICS])

In [17]:
modelResBinary.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_average_pooling2d_1   (None, 2048)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 dense_2 (Dense)             (None, 256)               524544    
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_3 (Dense)             (None, 2)                 514       
                                                                 
Total params: 24,112,770
Trainable params: 525,058
Non-trainable params: 23,587,712
____________________________________

In [18]:
# Early stopping callback to prevent overfitting
early_stopping = EarlyStopping(min_delta=0.01, patience=2, restore_best_weights=True)

In [19]:
# training model over training data
historyResBinary = modelResBinary.fit(train_datasetRes, epochs=15, batch_size=36, validation_data=valid_datasetRes, 
                             callbacks=[early_stopping])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15


In [20]:
# evaluating over the test data

modelResBinary.evaluate(test_datasetRes)



[0.21695712208747864, 0.9264705777168274, 0.5333333015441895]

In [25]:
# test over unseen data as well as holdout
# testing against unseen data

unseentest_dataset = test_datagen.flow_from_directory(directory = './Desktop/CTbenignremoved/test',
                                                  target_size = (224,224),
                                                  class_mode = 'sparse',
                                                  batch_size = 64)

Found 100 images belonging to 2 classes.


In [26]:
modelResBinary.evaluate(unseentest_dataset)



[3.1718082427978516, 0.46000000834465027, 0.6043955683708191]