In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, losses
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.applications import VGG19
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import datetime, re, pickle

In [None]:
data_dir_train = '/home/meredithc/Transfer-Learning-for-Cancer-Detection/Normalized-Train/'
data_dir_validation = '/home/meredithc/Transfer-Learning-for-Cancer-Detection/Normalized-Validation/'
data_dir_test = '/home/meredithc/Transfer-Learning-for-Cancer-Detection/Normalized-Test/'
BATCH_SIZE = 48
IMG_SIZE = 224
BASE_LEARNING_RATE = 0.001
SEED = 51432
tf.keras.utils.set_random_seed(SEED)

In [None]:
datagen = ImageDataGenerator(rescale=1.0/255)

In [None]:
train_generator = datagen.flow_from_directory(
    data_dir_train,
    shuffle=True,
    target_size=(IMG_SIZE,IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary')

validation_generator = datagen.flow_from_directory(
    data_dir_validation,
    shuffle=True,
    target_size=(IMG_SIZE,IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary')

test_generator = datagen.flow_from_directory(
    data_dir_test,
    shuffle=False,
    target_size=(IMG_SIZE,IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary')

### AlexNet

In [None]:
model = models.Sequential()
model.add(
    layers.experimental.preprocessing.Resizing(
        224,
        224,
        interpolation="bilinear",
        input_shape=(
            IMG_SIZE,
            IMG_SIZE,
            3,
        ),
    )
)
model.add(layers.Conv2D(96, 11, strides=4, padding="same"))
model.add(layers.Lambda(tf.nn.local_response_normalization))
model.add(layers.Conv2D(256, 5, strides=3, padding="same"))
model.add(layers.MaxPooling2D(3, strides=2))
model.add(layers.Lambda(tf.nn.local_response_normalization))
model.add(layers.Conv2D(384, 3, strides=4, padding="same"))
model.add(layers.MaxPooling2D(2, strides=2))
model.add(layers.Flatten())
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation="sigmoid"))
model._name = "AlexNet-48BSize-BalancedData-001Learn-BinaryFocal"
model.compile(
            optimizer=tf.keras.optimizers.Adam(BASE_LEARNING_RATE),
            loss=tf.keras.losses.BinaryFocalCrossentropy(),
            metrics=[tf.keras.metrics.AUC(name="roc_auc"), "binary_accuracy"],
        )

### VGGNet

In [None]:
base = VGG19(
            include_top=False,
            input_shape=(IMG_SIZE, IMG_SIZE, 3),
            weights="imagenet",
            pooling="avg",
        )
#base.trainable = False
model = tf.keras.Sequential(
    [
        layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)),
        # Data augmentation
        #layers.RandomBrightness(0.2, seed=SEED),
        #layers.RandomFlip(seed=SEED),
        #layers.RandomRotation(0.2, seed=SEED),
        # VGG19
        layers.Lambda(tf.keras.applications.vgg19.preprocess_input),
        base,
        layers.Dropout(0.4),
        # Fully connected layers
        layers.Dense(384, activation="relu"),
        layers.Dropout(0.3),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(1, activation="sigmoid"),
    ],
    name="VGG19",
)
model.compile(
    optimizer=tf.keras.optimizers.Adam(BASE_LEARNING_RATE),
    loss=tf.keras.losses.MeanAbsoluteError(),
    metrics=[tf.keras.metrics.AUC(name="roc_auc"), "binary_accuracy"],
)

### Train model

In [None]:
early_stopping = EarlyStopping(
    min_delta=1e-4, patience=5, verbose=1, restore_best_weights=True
)
reduce_lr = ReduceLROnPlateau(factor=0.5, patience=4, verbose=1)

In [None]:
model.summary()

In [None]:
EPOCHS = 25
history = model.fit(train_generator, epochs=EPOCHS, validation_data=validation_generator, callbacks=[early_stopping, reduce_lr])

In [None]:
predictions = model.predict(test_generator)

# Convert predictions to class labels
predicted_labels = (predictions > 0.5).astype(int)

# Get the true labels from the test generator
true_labels = test_generator.classes

In [None]:
print(predicted_labels)

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(15, 4), dpi=160)
curves = [metrics.RocCurveDisplay, metrics.PrecisionRecallDisplay]
for ax, curve in zip(axes[:2], curves):
    curve.from_predictions(true_labels, predicted_labels, ax=ax, name=model.name)
metrics.ConfusionMatrixDisplay.from_predictions(
    true_labels,
    predicted_labels,
    ax=axes[2],
    colorbar=False,
)
titles = ["ROC-AUC Curve", "Precision-Recall Curve", "Confusion Matrix"]
for ax, title in zip(axes.flat, titles):
    ax.set_title(title, size=14, pad=10)
print(f"Finished building plots for {model.name}.")


In [None]:
model_path = f'/home/meredithc/Transfer-Learning-for-Cancer-Detection/Trained-Models/{model.name}.pkl'
with open(model_path, 'wb') as model_file:
    pickle.dump(model, model_file)
fig.savefig(f'/home/meredithc/Transfer-Learning-for-Cancer-Detection/Trained-Models/{model.name}_plots.png')