## Imports

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf


from keras import models, layers, optimizers, regularizers
from keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications import EfficientNetB6
import os
from pathlib import Path
from PIL import Image
print(os.listdir("train"))

['Benign', 'Malignant']


## Extract and preprocess data:
Extract dataset for training and testing from folders: train and test   
  
Be careful! Run the only one of the three next cells in this section:
* First cell for sample centering  
* Second Cell for feature Centering  
* Third cell for using a center crop of (180,180), the model will also need input images of (180,180)  

Sample Centering

In [5]:
train_datagen = ImageDataGenerator(rotation_range=40,
                                   shear_range=20,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   validation_split=0.15,
                                   samplewise_center=True,
                                   fill_mode='nearest',
                                   rescale=1.0/255.0)


test_datagen = ImageDataGenerator(samplewise_center=True,
                                  rescale=1.0/255.0)



train_generator = train_datagen.flow_from_directory(
    "train", target_size=(224, 224), batch_size=16, class_mode='categorical', shuffle=True, subset='training')


val_generator = train_datagen.flow_from_directory(
    "train", target_size=(224, 224), batch_size=8, class_mode='categorical', shuffle=True, subset='validation')

test_generator = test_datagen.flow_from_directory(
    "test", target_size=(224, 224), batch_size=8, class_mode='categorical')

Found 10098 images belonging to 2 classes.
Found 1781 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


Feature Centering

In [None]:
def read_pil_image(img_path, height, width):
        with open(img_path, 'rb') as f:
            return np.array(Image.open(f).convert('RGB').resize((width, height)))

def load_all_images(dataset_path, height, width, img_ext='jpg'):
    return np.array([read_pil_image(str(p), height, width) for p in 
                                    Path(dataset_path).rglob("*."+img_ext)]) 

all_images = load_all_images('train/', 224, 224)

train_datagen = ImageDataGenerator(rotation_range=40,
                                   shear_range=20,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   validation_split=0.15,
                                   featurewise_center=True,
                                   fill_mode='nearest',
                                   rescale=1.0/255.0)


test_datagen = ImageDataGenerator(featurewise_center=True,
                                  rescale=1.0/255.0)

train_datagen.fit(np.array([all_images.mean(axis=0)]))
test_datagen.fit(np.array([all_images.mean(axis=0)]))

train_generator = train_datagen.flow_from_directory(
    "train", target_size=(224, 224), batch_size=16, class_mode='categorical', shuffle=True, subset='training')


val_generator = train_datagen.flow_from_directory(
    "train", target_size=(224, 224), batch_size=8, class_mode='categorical', shuffle=True, subset='validation')

test_generator = test_datagen.flow_from_directory(
    "test", target_size=(224, 224), batch_size=8, class_mode='categorical')

Center crop of (180,180)  

In [None]:


train_datagen = ImageDataGenerator(preprocessing_function=tf.keras.layers.CenterCrop(180, 180),
                                   rotation_range=40,
                                   shear_range=20,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   samplewise_center=True,
                                   fill_mode='nearest',
                                   validation_split=0.15,
                                   rescale=1.0/255.0)


test_datagen = ImageDataGenerator(preprocessing_function=tf.keras.layers.CenterCrop(180, 180),
                                  samplewise_center=True,
                                  rescale=1.0/255.0)


train_generator = train_datagen.flow_from_directory(
    "train", target_size=(180, 180), batch_size=16, class_mode='categorical', shuffle=True, subset='training')


val_generator = train_datagen.flow_from_directory(
    "train", target_size=(180, 180), batch_size=8, class_mode='categorical', shuffle=True, subset='validation')

test_generator = test_datagen.flow_from_directory(
    "test", target_size=(180, 180), batch_size=8, class_mode='categorical', shuffle=True)

## Model
Create model:  

 * EfficientNetB6 base layer with top fully connected layer removed
 * a global average pooling 2D layer for feature extraction from EfficientNet
 * a dense layer with ReLu activation (because it is less susceptible to vanishing gradients)
 * a dense layer with softmax activation for classification (or sigmoid activation)   
   
__Use input shape according to the datagenerator chosen__

If you want pretrained weights on ImageNet dataset use:
```
EfficientNetB6(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
```

If you want a random initialisation:
```
EfficientNetB6(weights=None, include_top=False, input_shape=(224, 224, 3))
```



In [6]:
# EfficientNetV2 Model
base = EfficientNetB6(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model= Sequential()
model.add(base)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(2, activation='softmax')) #binary output layer


model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(learning_rate=0.0001), metrics=['accuracy', 'AUC'])

Train and save model weights

In [8]:
history = model.fit(train_generator, epochs=10, steps_per_epoch=train_generator.n//train_generator.batch_size,
                    validation_data=val_generator, validation_steps=val_generator.n//val_generator.batch_size)

model.save("models/efficient_net_b6_samplewise_10.h5")

Epoch 1/10


  self._warn_if_super_not_called()
I0000 00:00:1723908898.385156  133228 service.cc:146] XLA service 0x791874003ef0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1723908898.385173  133228 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce GTX 1060, Compute Capability 6.1
2024-08-17 17:35:00.975779: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-08-17 17:35:10.333584: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907
E0000 00:00:1723908935.479379  133228 gpu_timer.cc:183] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1723908935.628459  133228 gpu_timer.cc:183] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, ple

[1m590/631[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m25s[0m 621ms/step - AUC: 0.9138 - accuracy: 0.8384 - loss: 0.3643

E0000 00:00:1723909382.159801  133228 gpu_timer.cc:183] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1723909382.295400  133228 gpu_timer.cc:183] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1723909382.434657  133228 gpu_timer.cc:183] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1723909382.572645  133228 gpu_timer.cc:183] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1723909382.933184  133228 gpu_timer.cc:183] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000

[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m662s[0m 800ms/step - AUC: 0.9158 - accuracy: 0.8408 - loss: 0.3602 - val_AUC: 0.6934 - val_accuracy: 0.6115 - val_loss: 0.8481
Epoch 2/10
[1m  1/631[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:19[0m 602ms/step - AUC: 0.9648 - accuracy: 0.8750 - loss: 0.2704

2024-08-17 17:44:53.009859: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-08-17 17:44:53.011623: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
  self.gen.throw(typ, value, traceback)


[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 16ms/step - AUC: 0.9648 - accuracy: 0.8750 - loss: 0.2704 - val_AUC: 0.9600 - val_accuracy: 0.8000 - val_loss: 0.3657
Epoch 3/10


2024-08-17 17:45:02.953621: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m406s[0m 643ms/step - AUC: 0.9672 - accuracy: 0.9062 - loss: 0.2323 - val_AUC: 0.8978 - val_accuracy: 0.7945 - val_loss: 0.4419
Epoch 4/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 155us/step - AUC: 0.9727 - accuracy: 0.9375 - loss: 0.1962 - val_AUC: 1.0000 - val_accuracy: 1.0000 - val_loss: 0.1188
Epoch 5/10


2024-08-17 17:51:50.076811: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m405s[0m 639ms/step - AUC: 0.9785 - accuracy: 0.9244 - loss: 0.1870 - val_AUC: 0.9005 - val_accuracy: 0.8232 - val_loss: 0.4320
Epoch 6/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 143us/step - AUC: 0.9727 - accuracy: 0.9375 - loss: 0.2165 - val_AUC: 0.7200 - val_accuracy: 0.8000 - val_loss: 0.5448
Epoch 7/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m433s[0m 685ms/step - AUC: 0.9807 - accuracy: 0.9281 - loss: 0.1776 - val_AUC: 0.9088 - val_accuracy: 0.8238 - val_loss: 0.4494
Epoch 8/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 144us/step - AUC: 0.9727 - accuracy: 0.9375 - loss: 0.2096 - val_AUC: 1.0000 - val_accuracy: 1.0000 - val_loss: 0.0368
Epoch 9/10


2024-08-17 18:05:49.520796: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m447s[0m 707ms/step - AUC: 0.9865 - accuracy: 0.9459 - loss: 0.1465 - val_AUC: 0.9148 - val_accuracy: 0.8350 - val_loss: 0.4420
Epoch 10/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 186us/step - AUC: 0.9961 - accuracy: 0.9375 - loss: 0.1235 - val_AUC: 1.0000 - val_accuracy: 1.0000 - val_loss: 0.0157




Evaluate Model

In [17]:
test_AUC, test_acc, test_loss = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 93ms/step - AUC: 0.9681 - accuracy: 0.9053 - loss: 0.2303


To generate list of predictions from test set images

In [8]:
test_predictions = model.predict(test_generator)


[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 48ms/step


Test Models

Be careful to use same activation function for final layer (softmax or sigmoid depending on ckpt)

In [None]:
ckpt ='efficient_net_b6_samplewise_10.h5'

# # Use if using featurewise centering
# test_datagen = ImageDataGenerator(featurewise_center=True, rescale=1.0/255.0)
# test_datagen.fit(np.array([all_images.mean(axis=0)]))
# test_generator = test_datagen.flow_from_directory("test", target_size=(224, 224), batch_size=8, class_mode='categorical')



# #Use if using samplewise centering
# test_datagen = ImageDataGenerator(samplewise_center=True, rescale=1.0/255.0)
# test_generator = test_datagen.flow_from_directory("test", target_size=(224, 224), batch_size=8, class_mode='categorical')

#Use if using samplewise centering and the (180,180) center crop model
test_datagen = ImageDataGenerator(preprocessing_function=tf.keras.layers.CenterCrop(180, 180), samplewise_center=True, rescale=1.0/255.0)
test_generator = test_datagen.flow_from_directory("test", target_size=(180, 180), batch_size=8, class_mode='categorical', shuffle=True)
   
   
test_model = tf.keras.models.load_model(filepath='models/' + ckpt)

print(f"Model:{ckpt}")
test_model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)