In [1]:
import matplotlib.pyplot as plt
import numpy as np

import os

import tensorflow as tf
from tensorflow import keras
from keras.utils import image_dataset_from_directory
from keras import Sequential, Input
from keras.layers import RandomFlip, RandomContrast, RandomRotation, RandomZoom, Rescaling
from keras.layers import GlobalAveragePooling2D, Dense, Conv2D, BatchNormalization, Dropout, Flatten
from keras.applications import EfficientNetV2L as base
from keras.optimizers import Adam
from keras.losses import SparseCategoricalCrossentropy as scc
from keras.callbacks import EarlyStopping, LearningRateScheduler

from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix

2024-05-02 21:16:28.160467: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
strategy = tf.distribute.MirroredStrategy()
print('DEVICES AVAILABLE: {}'.format(strategy.num_replicas_in_sync))

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
DEVICES AVAILABLE: 1


In [2]:
image_size = 256
batch_size = 32
epochs = 5
lr_init = .002

train_ds = tf.keras.utils.image_dataset_from_directory(
  '/Users/dragxn/Desktop/projhealth/Oral Cancer',
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(image_size, image_size),
  batch_size=batch_size)


val_ds = tf.keras.utils.image_dataset_from_directory(
  '/Users/dragxn/Desktop/projhealth/Oral Cancer',
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(image_size, image_size),
  batch_size=batch_size)

Found 10002 files belonging to 2 classes.
Using 8002 files for training.
Found 10002 files belonging to 2 classes.
Using 2000 files for validation.


In [3]:
class_names = train_ds.class_names
val_batches = tf.data.experimental.cardinality(val_ds)
test_ds = val_ds.take(val_batches // 2)
val_ds = val_ds.skip(val_batches // 2)
class_names

['Normal', 'Squamous Cell Carcinoma']

In [4]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

In [16]:
es = EarlyStopping(monitor='val_accuracy', patience=10, min_delta=0.001, restore_best_weights=True)
def create_model(base_model):
    input = Input(shape=(image_size, image_size, 3))
    x = RandomFlip('horizontal')(input)
    x = RandomRotation(0.2)(x)
    x = RandomZoom(0.2,0.2)(x)
    x = RandomContrast(0.2)(x)
    b_model = base_model(x)
    flatten = Flatten()(b_model)
    x = Dense(1024,activation='relu')(flatten)
    norm = BatchNormalization()(x)
    x = Dense(512,activation='swish')(flatten)
    norm = BatchNormalization()(x)
    x = Dense(128,activation='relu')(norm)
    norm = BatchNormalization()(x)
    dropout = Dropout(0.2)(norm)
    x = Dense(len(class_names))(dropout)
    model = tf.keras.models.Model(inputs=input, outputs=x)

    model.compile(optimizer=Adam(lr_init),
              loss=scc(from_logits=True),
              metrics=['accuracy'])
    return model
def schedule(epoch,lr):
    if epoch < 5:
        return ((epoch)+1)*lr_init/5
    if epoch < 15:
        return lr
    else:
        return lr * tf.math.exp(-0.1)
lr_scheduler = LearningRateScheduler(schedule,verbose=1)

def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss=history.history['loss']
    val_loss=history.history['val_loss']
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

In [14]:
with strategy.scope():
    base_model = base(input_shape=(image_size,image_size,3), include_top=False, weights='imagenet')
    base_model.trainable = False
    model = create_model(base_model)

In [17]:
history = model.fit(train_ds,
                    epochs=epochs,
                    validation_data=val_ds,
                   callbacks=[es,lr_scheduler])


Epoch 1: LearningRateScheduler setting learning rate to 0.0004.
Epoch 1/5


2024-05-02 21:27:17.694576: W tensorflow/core/framework/dataset.cc:959] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m250/251[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m14s[0m 15s/step - accuracy: 0.6653 - loss: 0.7711

2024-05-02 22:29:43.970339: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]


[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15s/step - accuracy: 0.6654 - loss: 0.7705 

2024-05-02 22:29:45.771926: W tensorflow/core/framework/dataset.cc:959] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.
2024-05-02 22:36:59.480106: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]


[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4191s[0m 16s/step - accuracy: 0.6656 - loss: 0.7700 - val_accuracy: 0.7808 - val_loss: 0.5378 - learning_rate: 4.0000e-04

Epoch 2: LearningRateScheduler setting learning rate to 0.0008.
Epoch 2/5


2024-05-02 22:37:09.013648: W tensorflow/core/framework/dataset.cc:959] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m138/251[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m27:03[0m 14s/step - accuracy: 0.7335 - loss: 0.5584