In [1]:
import os
import h5py
import numpy as np
import tensorflow as tf
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ModelCheckpoint

In [2]:
def hdf5_batch_generator(file_path, dataset, batch_size):
    with h5py.File(file_path, 'r') as file:
        dataset = file[dataset]
        total_size = dataset.shape[0]

        for start_idx in range(0, total_size, batch_size):
            end_idx = np.min([start_idx + batch_size, total_size])
            batch_x = np.array(dataset[start_idx:end_idx])
            yield batch_x.astype('float32')

In [3]:
datasetLen = 15000

In [4]:
x_generator = hdf5_batch_generator('camelyonpatch_level_2_split_train_x.h5', 'x', datasetLen)
y_generator = hdf5_batch_generator('camelyonpatch_level_2_split_train_y.h5', 'y', datasetLen)

In [5]:
x_batch = next(x_generator)
y_batch = next(y_generator)

In [6]:
len(x_batch)

15000

In [7]:
len(y_batch)

15000

In [8]:
print(f'Number of cancerous images {sum(y_batch)}')

Number of cancerous images [[[7484.]]]


In [9]:
trainSplit, valSplit, testSplit = int(0.70 * datasetLen), int(0.20 * datasetLen), int(0.1 * datasetLen)

In [10]:
Y_train, Y_validation, Y_test = y_batch[ : trainSplit], y_batch[trainSplit : trainSplit + valSplit], y_batch[trainSplit + valSplit : ]
X_train, X_validation, X_test = x_batch[ : trainSplit], x_batch[trainSplit : trainSplit + valSplit], x_batch[trainSplit + valSplit : ]

In [11]:
print(X_train.shape)
print(X_validation.shape)
print(X_test.shape)

(10500, 96, 96, 3)
(3000, 96, 96, 3)
(1500, 96, 96, 3)


In [12]:
print(len(Y_train))
print(len(Y_validation))
print(len(Y_test))

10500
3000
1500


In [13]:
print(f'Percentage of cancerous images in train: {sum(Y_train) / trainSplit * 100}%')
print(f'Percentage of cancerous images in validation: {sum(Y_validation) / valSplit * 100}%')
print(f'Percentage of cancerous images in test: {sum(Y_test) / testSplit * 100}%')

Percentage of cancerous images in train: [[[50.438095]]]%
Percentage of cancerous images in validation: [[[51.133335]]]%
Percentage of cancerous images in test: [[[43.6]]]%


In [35]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import keras
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization


In [36]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(96, 96, 3))

x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=x)

for layer in base_model.layers[:160]:
    layer.trainable = False
for layer in base_model.layers[160:]:
    layer.trainable = True

model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

checkpoint_path = 'resnet50_model_fine_tuned_datagen.keras'
checkpoint_dir = os.path.dirname(checkpoint_path)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=20,
    restore_best_weights=True
)

cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                                                 monitor='val_loss',
                                                 save_weights_only=False,
                                                 save_best_only=True,
                                                 verbose=1)

In [20]:
Y_train, Y_validation, Y_test = y_batch[ : trainSplit], y_batch[trainSplit : trainSplit + valSplit], y_batch[trainSplit + valSplit : ]
X_train, X_validation, X_test = x_batch[ : trainSplit], x_batch[trainSplit : trainSplit + valSplit], x_batch[trainSplit + valSplit : ]

Y_train = np.squeeze(Y_train)
Y_train = Y_train.astype('float32')
Y_validation = np.squeeze(Y_validation)
Y_validation = Y_validation.astype('float32')
Y_test = np.squeeze(Y_test)
Y_test = Y_test.astype('float32')

X_train = X_train.astype('float32') / 255.
X_validation = X_validation.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.


# datagen = ImageDataGenerator(
#     width_shift_range=4,
#     height_shift_range=4,
#     horizontal_flip=True,
#     vertical_flip=True,
#     validation_split=0.2,
# )

# model.fit(train_generator, epochs=100, batch_size=32, validation_data=val_generator, callbacks=[early_stopping, cp_callback], verbose=1)
model.fit(X_train, Y_train, epochs=300, batch_size=32, validation_data=(X_validation, Y_validation), callbacks=[cp_callback], verbose=1)

Epoch 1/300
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step - accuracy: 0.6258 - loss: 5.6238
Epoch 1: val_loss improved from inf to 4.86598, saving model to resnet50_model_fine_tuned_100ep.keras
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 185ms/step - accuracy: 0.6259 - loss: 5.6230 - val_accuracy: 0.6720 - val_loss: 4.8660
Epoch 2/300
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - accuracy: 0.7000 - loss: 4.7236
Epoch 2: val_loss improved from 4.86598 to 4.26935, saving model to resnet50_model_fine_tuned_100ep.keras
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 185ms/step - accuracy: 0.7000 - loss: 4.7232 - val_accuracy: 0.7233 - val_loss: 4.2693
Epoch 3/300
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step - accuracy: 0.7236 - loss: 4.1754
Epoch 3: val_loss improved from 4.26935 to 3.84727, saving model to resnet50_model_fine_tuned_100ep.keras
[1m329/3

<keras.src.callbacks.history.History at 0x1761c02b650>

In [22]:
model.save('resnet50_model_fine_tuned_100ep.keras')

In [23]:
model = keras.models.load_model('resnet50_model_fine_tuned_100ep.keras')

  saveable.load_own_variables(weights_store.get(inner_path))


In [24]:
model.evaluate(X_test, Y_test)

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 133ms/step - accuracy: 0.7502 - loss: 1.3080


[1.331390142440796, 0.7486666440963745]

In [25]:
truePositives = 0
trueNegatives = 0
falsePositives = 0
falseNegatives = 0

for i in range(len(X_test)):
    prediction = model.predict(X_test[i].reshape(1, 96, 96, 3))
    if prediction >= 0.5:
        if Y_test[i] == 1:
            truePositives += 1
        else:
            falsePositives += 1
    else:
        if Y_test[i] == 0:
            trueNegatives += 1
        else:
            falseNegatives += 1

print(f'True Positives: {truePositives}')
print(f'True Negatives: {trueNegatives}')

print(f'False Positives: {falsePositives}')
print(f'False Negatives: {falseNegatives}')

print(f'Accuracy: {(truePositives + trueNegatives) / (truePositives + trueNegatives + falsePositives + falseNegatives)}')
print(f'Precision: {truePositives / (truePositives + falsePositives)}')
print(f'Recall: {truePositives / (truePositives + falseNegatives)}')
print(f'Specificity: {trueNegatives / (trueNegatives + falsePositives)}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 859ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [37]:
Y_train, Y_validation, Y_test = y_batch[ : trainSplit], y_batch[trainSplit : trainSplit + valSplit], y_batch[trainSplit + valSplit : ]
X_train, X_validation, X_test = x_batch[ : trainSplit], x_batch[trainSplit : trainSplit + valSplit], x_batch[trainSplit + valSplit : ]

Y_train = np.squeeze(Y_train)
Y_train = Y_train.astype('float32')
Y_validation = np.squeeze(Y_validation)
Y_validation = Y_validation.astype('float32')
Y_test = np.squeeze(Y_test)
Y_test = Y_test.astype('float32')

X_train = X_train.astype('float32') / 255.
X_validation = X_validation.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.

# training again, but with data generator
datagen = ImageDataGenerator(
    width_shift_range=4,
    height_shift_range=4,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split=0.2,
)

train_generator = datagen.flow(X_train, Y_train, batch_size=32, subset='training')
val_generator = datagen.flow(X_train, Y_train, batch_size=32, subset='validation')

model.fit(train_generator, epochs=300, validation_data=val_generator, callbacks=[cp_callback], verbose=1)

Epoch 1/300


  self._warn_if_super_not_called()


[1m263/263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step - accuracy: 0.6187 - loss: 5.6363
Epoch 1: val_loss improved from inf to 5.03394, saving model to resnet50_model_fine_tuned_datagen.keras
[1m263/263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 180ms/step - accuracy: 0.6188 - loss: 5.6354 - val_accuracy: 0.5467 - val_loss: 5.0339
Epoch 2/300
[1m263/263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step - accuracy: 0.6745 - loss: 4.8652
Epoch 2: val_loss improved from 5.03394 to 4.36978, saving model to resnet50_model_fine_tuned_datagen.keras
[1m263/263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 180ms/step - accuracy: 0.6745 - loss: 4.8646 - val_accuracy: 0.7205 - val_loss: 4.3698
Epoch 3/300
[1m263/263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - accuracy: 0.6771 - loss: 4.3775
Epoch 3: val_loss improved from 4.36978 to 3.98302, saving model to resnet50_model_fine_tuned_datagen.keras
[1m263/263[0m

<keras.src.callbacks.history.History at 0x176740e7830>

In [38]:
model.save('resnet50_datagen_fine_tuned.keras')

In [39]:
model = keras.models.load_model('resnet50_datagen_fine_tuned.keras')

In [40]:
preds = model.predict(X_test)

truePositives = 0
trueNegatives = 0
falsePositives = 0
falseNegatives = 0

for i in range(len(X_test)):
    prediction = preds[i]
    if prediction >= 0.5:
        if Y_test[i] == 1:
            truePositives += 1
        else:
            falsePositives += 1
    else:
        if Y_test[i] == 0:
            trueNegatives += 1
        else:
            falseNegatives += 1

print(f'True Positives: {truePositives}')
print(f'True Negatives: {trueNegatives}')

print(f'False Positives: {falsePositives}')
print(f'False Negatives: {falseNegatives}')

print(f'Accuracy: {(truePositives + trueNegatives) / (truePositives + trueNegatives + falsePositives + falseNegatives)}')
print(f'Precision: {truePositives / (truePositives + falsePositives)}')
print(f'Recall: {truePositives / (truePositives + falseNegatives)}')
print(f'Specificity: {trueNegatives / (trueNegatives + falsePositives)}')

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 130ms/step
True Positives: 480
True Negatives: 682
False Positives: 164
False Negatives: 174
Accuracy: 0.7746666666666666
Precision: 0.7453416149068323
Recall: 0.7339449541284404
Specificity: 0.806146572104019


# VGG-16

In [14]:
import cv2
import keras
from tensorflow.keras.callbacks import EarlyStopping
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import tensorflow as tf
model = keras.Sequential()

In [15]:
# Block 1
model.add(Conv2D(64, kernel_size=(3, 3), padding="same", activation="relu", input_shape=(96, 96, 3)))
model.add(Conv2D(64, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# Block 2
model.add(Conv2D(128, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(128, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# Block 3
model.add(Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(256, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# Block 4
model.add(Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# Block 5
model.add(Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(Conv2D(512, kernel_size=(3, 3), padding="same", activation="relu"))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# Dense layers
model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dense(4096, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.build()
model.summary()
import matplotlib.pyplot as plt


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# setting another learning rate
model.optimizer.learning_rate = 1e-5


checkpoint_path = 'vgg-16-full.keras'
checkpoint_dir = os.path.dirname(checkpoint_path)

early_stopping = EarlyStopping(
    monitor='val_loss',     # Monitor validation loss
    patience=35,             # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restore the weights of the best epoch
)

cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                                                 monitor='val_loss',
                                                 save_weights_only=False,
                                                 save_best_only=True,
                                                 verbose=1)

In [18]:
X_train, Y_validation, Y_test = y_batch[ : trainSplit], y_batch[trainSplit : trainSplit + valSplit], y_batch[trainSplit + valSplit : ]
X_train, X_validation, X_test = x_batch[ : trainSplit], x_batch[trainSplit : trainSplit + valSplit], x_batch[trainSplit + valSplit : ]

Y_train = np.squeeze(Y_train)
Y_train = Y_train.astype('float32')
Y_validation = np.squeeze(Y_validation)
Y_validation = Y_validation.astype('float32')
Y_test = np.squeeze(Y_test)
Y_test = Y_test.astype('float32')

X_train = X_train.astype('float32') / 255.
X_validation = X_validation.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.

datagen = ImageDataGenerator(
    width_shift_range=4,
    height_shift_range=4,
    horizontal_flip=True,
    vertical_flip=True,
)

train_generator = datagen.flow(X_train, Y_train, batch_size=32)
val_generator = datagen.flow(X_validation, Y_validation, batch_size=32)

model.fit(train_generator, epochs=300, validation_data=val_generator, callbacks=[cp_callback, early_stopping], verbose=1)

# model.fit(train_generator, epochs=300, validation_data=val_generator, callbacks=[cp_callback], verbose=1)
# model.fit(X_train, Y_train, epochs=300, batch_size=32, validation_data=(X_validation, Y_validation), callbacks=[cp_callback], verbose=1)

Epoch 1/300


  self._warn_if_super_not_called()


[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 809ms/step - accuracy: 0.6144 - loss: 0.6327
Epoch 1: val_loss improved from inf to 0.51973, saving model to vgg-16-full.keras
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m293s[0m 883ms/step - accuracy: 0.6147 - loss: 0.6325 - val_accuracy: 0.7593 - val_loss: 0.5197
Epoch 2/300
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 822ms/step - accuracy: 0.7692 - loss: 0.5017
Epoch 2: val_loss improved from 0.51973 to 0.49709, saving model to vgg-16-full.keras
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m295s[0m 896ms/step - accuracy: 0.7692 - loss: 0.5016 - val_accuracy: 0.7650 - val_loss: 0.4971
Epoch 3/300
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 817ms/step - accuracy: 0.7680 - loss: 0.4946
Epoch 3: val_loss did not improve from 0.49709
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m291s[0m 886ms/step - accuracy: 0.7680 - loss: 0.4946 - val

<keras.src.callbacks.history.History at 0x1bf12f967b0>

In [19]:
model.save('vgg-16-full_model.keras')

In [22]:
model = keras.models.load_model('vgg-16-full.keras')

In [28]:
preds = model.predict(X_test)

truePositives = 0
trueNegatives = 0
falsePositives = 0
falseNegatives = 0

for i in range(len(X_test)):
    prediction = preds[i]
    if prediction >= 0.75:
        if Y_test[i] == 1:
            truePositives += 1
        else:
            falsePositives += 1
    else:
        if Y_test[i] == 0:
            trueNegatives += 1
        else:
            falseNegatives += 1

print(f'True Positives: {truePositives}')
print(f'True Negatives: {trueNegatives}')

print(f'False Positives: {falsePositives}')
print(f'False Negatives: {falseNegatives}')

print(f'Accuracy: {(truePositives + trueNegatives) / (truePositives + trueNegatives + falsePositives + falseNegatives)}')
print(f'Precision: {truePositives / (truePositives + falsePositives)}')
print(f'Recall: {truePositives / (truePositives + falseNegatives)}')
print(f'Specificity: {trueNegatives / (trueNegatives + falsePositives)}')

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 245ms/step
True Positives: 580
True Negatives: 803
False Positives: 43
False Negatives: 74
Accuracy: 0.922
Precision: 0.9309791332263242
Recall: 0.8868501529051988
Specificity: 0.9491725768321513
