In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
import cv2
import matplotlib.pyplot as plt
import lime
from lime import lime_image
import shap
from tf_explain.core.grad_cam import GradCAM
from tf_explain.core.integrated_gradients import IntegratedGradients
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


In [14]:
# Set paths
base_dir = 'Dataset_preprocessed'
train_dir = os.path.join(base_dir, 'Training_set')
test_dir = os.path.join(base_dir, 'Test_set')
classes = ['Abnormal', 'Normal']

# Image parameters
IMG_SIZE = 224
BATCH_SIZE = 32

In [15]:
# Data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    # preprocessing_function=tf.keras.applications.resnet50.preprocess_input
)
test_datagen = ImageDataGenerator(
    rescale=1./255,
    # preprocessing_function=tf.keras.applications.resnet50.preprocess_input
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    color_mode='rgb'  # ResNet50 expects 3 channels
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    color_mode='rgb',
    shuffle=False
)


Found 1640 images belonging to 2 classes.
Found 322 images belonging to 2 classes.


In [16]:
# Build pretrained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = False  # Freeze base model

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



# Train model
model.fit(train_generator, epochs=5, validation_data=test_generator)#, callbacks=[early_stop, checkpoint])

Epoch 1/5
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 4s/step - accuracy: 0.5116 - loss: 0.7753 - val_accuracy: 0.6429 - val_loss: 0.6516
Epoch 2/5
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 4s/step - accuracy: 0.5257 - loss: 0.7110 - val_accuracy: 0.6429 - val_loss: 0.6846
Epoch 3/5
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 4s/step - accuracy: 0.4991 - loss: 0.6921 - val_accuracy: 0.4627 - val_loss: 0.6933
Epoch 4/5
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 3s/step - accuracy: 0.5086 - loss: 0.6937 - val_accuracy: 0.3571 - val_loss: 0.6935
Epoch 5/5
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m308s[0m 6s/step - accuracy: 0.4902 - loss: 0.6932 - val_accuracy: 0.3571 - val_loss: 0.6935


<keras.src.callbacks.history.History at 0x28f9d4363c0>

In [17]:
# Unfreeze some layers for fine-tuning
base_model.trainable = True
for layer in base_model.layers[:143]:  # Freeze first 143 layers
    layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_generator, epochs=5, validation_data=test_generator)#, callbacks=[early_stop, checkpoint])

# Save model after fine-tuning
model.save('pretrained_model/resnet50_finetuned_preprocessed.keras')
print("Model saved as 'resnet50_finetuned.keras'")

# Evaluate model
test_loss, test_accuracy = model.evaluate(test_generator)
print(f'Test Accuracy: {test_accuracy:.4f}')

Epoch 1/5


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m408s[0m 7s/step - accuracy: 0.5508 - loss: 0.7043 - val_accuracy: 0.6149 - val_loss: 0.6913
Epoch 2/5
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.6567 - loss: 0.6319

KeyboardInterrupt: 

In [None]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=test_generator
    # callbacks=[early_stop, checkpoint]
)

# Plot accuracy
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.legend()
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid(True)
plt.show()

# Plot loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.show()



Epoch 1/10
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 4s/step - accuracy: 0.7766 - loss: 0.4656 - val_accuracy: 0.4627 - val_loss: 1.1071
Epoch 2/10
[1m36/52[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m1:37[0m 6s/step - accuracy: 0.8195 - loss: 0.4391

KeyboardInterrupt: 

# Final code

In [3]:
import tensorflow as tf

data_dir = 'C:/Users/Anne/OneDrive - National University of Ireland, Galway/Documents/Data Analytics/PROJECT/Capstone2025_Anne/kaggle/working/merged_images'  # Update with the dataset path

# Create a dataset for the entire data to use for split
full_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    labels='inferred',
    label_mode='categorical',
    # image_size=(224, 224),
    image_size=(224, 224),
    seed=50,
    shuffle=True,
    batch_size=13
)
# Calculate the total number of samples
total_samples = tf.data.experimental.cardinality(full_dataset).numpy()

train_size = int(0.8 * total_samples)                 # 70% for training
val_size   = int(0.15 * total_samples)                # 20% for validation
test_size = total_samples - train_size - val_size     # 10% for testing

# Create train, validation, and test datasets
train_dataset       = full_dataset.take(train_size)
validation_dataset  = full_dataset.skip(train_size).take(val_size)
test_dataset        = full_dataset.skip(train_size + val_size)

train_dataset      = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test_dataset       = test_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Print the number of samples in each dataset
print(f"Train samples:      {train_size}     batches(13) ==> {train_size*13}")
print(f"Validation samples: {val_size}       batches(13) ==> {val_size*13}")
print(f"Test samples:       {test_size}      batches(13) ==> {test_size*13}")


Found 23575 files belonging to 2 classes.
Train samples:      1451     batches(13) ==> 18863
Validation samples: 272       batches(13) ==> 3536
Test samples:       91      batches(13) ==> 1183


In [4]:
from tensorflow.keras.applications import ResNet50

def try_model():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # Freeze all layers initially
    for layer in base_model.layers:
        layer.trainable = False

    # Calculate the index to start unfreezing layers
    from_index = int(np.round((len(base_model.layers) - 1) * (1.0 - 50.0 / 100.0)))

    # Unfreeze layers from the calculated index onwards
    for layer in base_model.layers[from_index:]:
        layer.trainable = True

    # Add custom layers on top (Upper-Layers)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(2, activation='softmax')(x)  # Assuming binary classification

    model = Model(inputs=base_model.input, outputs=predictions)
    
    # # Clear the base model from memory if needed (optional)
    # del model_dict, base_model, from_index, x, predictions;    gc.collect()
    return model

In [5]:
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.optimizers import Adam

trymodel = try_model()

trymodel.compile(optimizer=Adam(learning_rate=1e-4),
                      loss='categorical_crossentropy',
                      metrics=['accuracy', Precision(name='precision'), Recall(name='recall')])  # Compile the model
# trymodel.summary()

In [6]:
history = trymodel.fit(
            train_dataset,
            validation_data=validation_dataset,
            batch_size=13,
            epochs=7
        )

Epoch 1/7
[1m1451/1451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1981s[0m 1s/step - accuracy: 0.7788 - loss: 0.4840 - precision: 0.7788 - recall: 0.7788 - val_accuracy: 0.9265 - val_loss: 0.1930 - val_precision: 0.9265 - val_recall: 0.9265
Epoch 2/7
[1m1451/1451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1989s[0m 1s/step - accuracy: 0.9551 - loss: 0.1215 - precision: 0.9551 - recall: 0.9551 - val_accuracy: 0.9033 - val_loss: 0.3054 - val_precision: 0.9033 - val_recall: 0.9033
Epoch 3/7
[1m1451/1451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9767 - loss: 0.0669 - precision: 0.9767 - recall: 0.9767

KeyboardInterrupt: 

In [None]:
trymodel.save("ResNet50_test.keras")

In [None]:
# Define the second model with adjusted hyperparameters
def try_model_v2():
    base_model_v2 = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # Freeze all layers initially
    for layer in base_model_v2.layers:
        layer.trainable = False

    # Unfreeze 60% of layers instead of 50%
    from_index_v2 = int(np.round((len(base_model_v2.layers) - 1) * (1.0 - 60.0 / 100.0)))

    # Unfreeze layers from the calculated index onwards
    for layer in base_model_v2.layers[from_index_v2:]:
        layer.trainable = True

    # Add custom layers on top (Upper-Layers)
    x_v2 = base_model_v2.output
    x_v2 = GlobalAveragePooling2D()(x_v2)
    x_v2 = Dense(512, activation='relu')(x_v2)  # Reduced Dense layer size from 1024 to 512
    x_v2 = Dropout(0.3)(x_v2)  # Lowered dropout rate from 0.5 to 0.3
    predictions_v2 = Dense(2, activation='softmax')(x_v2)  # Assuming binary classification

    model_v2 = Model(inputs=base_model_v2.input, outputs=predictions_v2)
    
    return model_v2

# Create and compile the second model
trymodel_v2 = try_model_v2()
trymodel_v2.compile(optimizer=Adam(learning_rate=5e-5),  # Adjusted learning rate to 5e-5
                    loss='categorical_crossentropy',
                    metrics=['accuracy', Precision(name='precision_v2'), Recall(name='recall_v2')])


In [None]:
# Train the second model
history_v2 = trymodel_v2.fit(
            train_dataset,
            validation_data=validation_dataset,
            batch_size=13,
            epochs=7
        )

# Summarize the second model
# trymodel_v2.summary()

In [None]:
trymodel_v2.save("Resnet50_v2.keras")

In [None]:
import matplotlib.pyplot as plt

# Plot the validation loss
plt.plot(history_v2.history['val_loss'], label='Validation Loss')
plt.plot(history_v2.history['loss'], label='Training Loss')
plt.title('Validation and Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot the validation accuracy
plt.plot(history_v2.history['val_accuracy'], label='Validation Accuracy')
plt.plot(history_v2.history['accuracy'], label='Training Accuracy')
plt.title('Validation and Training Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


In [None]:
# Define the third model with different hyperparameters
def try_model_v3():
    base_model_v3 = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # Freeze all layers initially
    for layer in base_model_v3.layers:
        layer.trainable = False

    # Unfreeze 70% of layers (more layers unfreezed compared to v1 and v2)
    from_index_v3 = int(np.round((len(base_model_v3.layers) - 1) * (1.0 - 70.0 / 100.0)))

    # Unfreeze layers from the calculated index onwards
    for layer in base_model_v3.layers[from_index_v3:]:
        layer.trainable = True

    # Add custom layers on top (Upper-Layers)
    x_v3 = base_model_v3.output
    x_v3 = GlobalAveragePooling2D()(x_v3)
    x_v3 = Dense(1024, activation='relu')(x_v3)  # Return Dense layer size to 1024
    x_v3 = Dropout(0.4)(x_v3)  # Moderate dropout rate (between v1 and v2)
    predictions_v3 = Dense(2, activation='softmax')(x_v3)  # Assuming binary classification

    model_v3 = Model(inputs=base_model_v3.input, outputs=predictions_v3)
    
    return model_v3

# Create and compile the third model
trymodel_v3 = try_model_v3()
trymodel_v3.compile(optimizer=RMSprop(learning_rate=1e-4),  # Change optimizer to RMSprop and learning rate back to 1e-4
                    loss='categorical_crossentropy',
                    metrics=['accuracy', Precision(name='precision_v3'), Recall(name='recall_v3')])



In [None]:
# Train the third model
history_v3 = trymodel_v3.fit(
            train_dataset,
            validation_data=validation_dataset,
            batch_size=13,
            epochs=7
        )

# Summarize the third model
# trymodel_v3.summary()

In [None]:
trymodel_v3.save("Resnet50_v3.keras")

In [None]:
# Plot Precision
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(history.history['precision'], label='Model 1 Precision')
plt.plot(history_v2.history['precision_v2'], label='Model 2 Precision')
plt.plot(history_v3.history['precision_v3'], label='Model 3 Precision')
plt.title('Training Precision')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['val_precision'], label='Model 1 Val Precision')
plt.plot(history_v2.history['val_precision_v2'], label='Model 2 Val Precision')
plt.plot(history_v3.history['val_precision_v3'], label='Model 3 Val Precision')
plt.title('Validation Precision')
plt.legend()

plt.tight_layout()
plt.show()

# Plot Recall
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(history.history['recall'], label='Model 1 Recall')
plt.plot(history_v2.history['recall_v2'], label='Model 2 Recall')
plt.plot(history_v3.history['recall_v3'], label='Model 3 Recall')
plt.title('Training Recall')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['val_recall'], label='Model 1 Val Recall')
plt.plot(history_v2.history['val_recall_v2'], label='Model 2 Val Recall')
plt.plot(history_v3.history['val_recall_v3'], label='Model 3 Val Recall')
plt.title('Validation Recall')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Compare the validation precision and recall to determine the best model
best_model = "Model 1"
best_val_precision = max(history.history['val_precision'], history_v2.history['val_precision_v2'], history_v3.history['val_precision_v3'])
best_val_recall = max(history.history['val_recall'], history_v2.history['val_recall_v2'], history_v3.history['val_recall_v3'])

# Display the best model based on precision and recall
if best_val_precision > best_val_recall:
    best_model = "Model 1"
elif best_val_precision < best_val_recall:
    best_model = "Model 2"
else:
    best_model = "Model 3"

print(f"The best model is: {best_model}")


In [None]:
import os
import random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image

# --- Load model ---
model = tf.keras.models.load_model("Resnet50_v3.keras")  # <- Update path

# --- Base directory ---
base_dir = r"C:\Users\Anne\OneDrive - National University of Ireland, Galway\Documents\Data Analytics\PROJECT\Capstone2025_Anne\kaggle\working\merged_images"

# --- Parameters ---
img_size = (224, 224)
num_samples = 12

# --- OPTIONAL: Uncomment for reproducible results
# random.seed(42)

# --- Collect image paths and labels ---
image_paths, labels = [], []
for label_folder in ["0", "1"]:
    folder_path = os.path.join(base_dir, label_folder)
    for fname in os.listdir(folder_path):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_paths.append(os.path.join(folder_path, fname))
            labels.append(int(label_folder))

# --- Random sample ---
sample = random.sample(list(zip(image_paths, labels)), num_samples)
sample_paths, sample_labels = zip(*sample)

# --- Preprocessing + Prediction ---
def preprocess(img_path):
    img = image.load_img(img_path, target_size=img_size)
    img_array = image.img_to_array(img)
    img_array = tf.keras.applications.resnet50.preprocess_input(img_array)
    return np.expand_dims(img_array, axis=0), img

predictions = []
original_imgs = []

for img_path in sample_paths:
    input_arr, orig = preprocess(img_path)
    pred = model.predict(input_arr, verbose=0)[0][0]
    predictions.append(pred)
    original_imgs.append(orig)

# --- Plotting ---
plt.figure(figsize=(16, 10))
for i in range(num_samples):
    plt.subplot(3, 4, i + 1)
    plt.imshow(original_imgs[i])
    actual = "Malignant" if sample_labels[i] == 1 else "Benign"
    predicted = "Malignant" if predictions[i] < 0.5 else "Benign"
    confidence = f"{predictions[i]:.2f}"
    title_color = 'green' if actual == predicted else 'red'
    plt.title(f"Actual: {actual}\nPredicted: {predicted} ({confidence})", color=title_color)
    plt.axis('off')

plt.tight_layout()
plt.show()
