In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os, shutil
import cv2
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline
plt.style.use('ggplot')

In [None]:
# Dataset
import zipfile

z = zipfile.ZipFile('archive imbalanced.zip')

z.extractall()

In [3]:
folder = 'brain_tumor_dataset/yes/'
count = 1

for filename in os.listdir(folder):
    source = folder + filename
    destination = folder + "Y_" +str(count)+".jpg"
    os.rename(source, destination)
    count+=1
print("All files are renamed in the yes dir.")

All files are renamed in the yes dir.


In [4]:
folder = 'brain_tumor_dataset/no/'
count = 1

for filename in os.listdir(folder):
    source = folder + filename
    destination = folder + "N_" +str(count)+".jpg"
    os.rename(source, destination)
    count+=1
print("All files are renamed in the no dir.")

All files are renamed in the no dir.


In [5]:
# EDA(Exploratory Data Analysis)

In [6]:
listyes = os.listdir("brain_tumor_dataset/yes/")
number_files_yes = len(listyes)
print(number_files_yes)

listno = os.listdir("brain_tumor_dataset/no/")
number_files_no = len(listno)
print(number_files_no)

155
98


In [7]:
# Data Augmentation

In [8]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

In [9]:
import os
import cv2
import time
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def timing(sec_elapsed):
    h = int(sec_elapsed / 3600)
    m = int((sec_elapsed % 3600) / 60)
    s = sec_elapsed % 60
    return f"{h}:{m}:{s}"

def augmented_data(file_dir, target_count, save_to_dir):
    data_gen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        brightness_range=(0.3, 1.0),
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest'
    )

    image_list = os.listdir(file_dir)
    original_count = len(image_list)
    os.makedirs(save_to_dir, exist_ok=True)

    existing_count = len(os.listdir(save_to_dir))
    total_count = existing_count  # Count only images in augmented_data

    if total_count >= target_count:
        print(f"✅ {total_count} images already in {save_to_dir}. No augmentation needed.")
        return

    additional_needed = target_count - total_count
    print(f"🛠 Augmenting {additional_needed} images for {save_to_dir}...")

    n_generated_samples = additional_needed // original_count
    extra_needed = additional_needed % original_count

    generated_count = 0
    for idx, filename in enumerate(image_list):
        if generated_count >= additional_needed:
            break

        img_path = os.path.join(file_dir, filename)
        image = cv2.imread(img_path)
        if image is None:
            continue  # Skip unreadable images

        image = image.reshape((1,) + image.shape)
        save_prefix = 'aug_' + os.path.splitext(filename)[0]

        samples_to_generate = n_generated_samples + (1 if idx < extra_needed else 0)

        i = 0
        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir, save_prefix=save_prefix, save_format="jpg"):
            i += 1
            generated_count += 1
            if i >= samples_to_generate or generated_count >= additional_needed:
                break

def data_summary(path):
    yes_path = os.path.join(path, 'yes')
    no_path = os.path.join(path, 'no')

    n_pos = len(os.listdir(yes_path)) if os.path.exists(yes_path) else 0
    n_neg = len(os.listdir(no_path)) if os.path.exists(no_path) else 0
    n_total = n_pos + n_neg

    pos_per = (n_pos * 100) / n_total if n_total > 0 else 0
    neg_per = (n_neg * 100) / n_total if n_total > 0 else 0

    print(f" **Data Summary**")
    print(f" Number of samples: {n_total}")
    print(f" {n_pos} positive samples ({pos_per:.2f}%)")
    print(f" {n_neg} negative samples ({neg_per:.2f}%)")
    print(f" **Total images: {n_total}**\n")

# Define directories
yes_path = 'brain_tumor_dataset/yes'
no_path = 'brain_tumor_dataset/no'

augmented_data_path = 'augmented_data'
yes_aug_path = os.path.join(augmented_data_path, 'yes')
no_aug_path = os.path.join(augmented_data_path, 'no')

os.makedirs(yes_aug_path, exist_ok=True)
os.makedirs(no_aug_path, exist_ok=True)

print(" **Original Data Summary:**")
data_summary('brain_tumor_dataset')

start_time = time.time()

augmented_data(file_dir=yes_path, target_count=310, save_to_dir=yes_aug_path)
augmented_data(file_dir=no_path, target_count=196, save_to_dir=no_aug_path)

end_time = time.time()
print(f" Augmentation took: {timing(int(end_time - start_time))}\n")

print(" **Augmented Data Summary:**")
data_summary(augmented_data_path)


 **Original Data Summary:**
 **Data Summary**
 Number of samples: 253
 155 positive samples (61.26%)
 98 negative samples (38.74%)
 **Total images: 253**

🛠 Augmenting 310 images for augmented_data/yes...
🛠 Augmenting 196 images for augmented_data/no...
 Augmentation took: 0:0:23

 **Augmented Data Summary:**
 **Data Summary**
 Number of samples: 506
 310 positive samples (61.26%)
 196 negative samples (38.74%)
 **Total images: 506**



In [10]:
listyes = os.listdir("augmented_data/yes/")
number_files_yes = len(listyes)
print(number_files_yes)

listno = os.listdir("augmented_data/no/")
number_files_no = len(listno)
print(number_files_no)

310
196


In [11]:
# Data Preprocessing
# Convert BGR TO GRAY
# GaussianBlur
# Threshold
# Erode
# Dilate
# Find Contours

In [12]:
import imutils
def crop_brain_tumor(image, plot=False):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5,5), 0)

    thres = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thres =cv2.erode(thres, None, iterations = 2)
    thres = cv2.dilate(thres, None, iterations = 2)

    cnts = cv2.findContours(thres.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key = cv2.contourArea)

    extLeft = tuple(c[c[:,:,0].argmin()][0])
    extRight = tuple(c[c[:,:,0].argmax()][0])
    extTop = tuple(c[c[:,:,1].argmin()][0])
    extBot = tuple(c[c[:,:,1].argmax()][0])

    new_image = image[extTop[1]:extBot[1], extLeft[0]:extRight[0]]

    if plot:
        plt.figure()
        plt.subplot(1, 2, 1)
        plt.imshow(image)

        plt.tick_params(axis='both', which='both',
                        top=False, bottom=False, left=False, right=False,
                        labelbottom=False, labeltop=False, labelleft=False, labelright=False)

        plt.title('Original Image')

        plt.subplot(1, 2, 2)
        plt.imshow(new_image)

        plt.tick_params(axis='both', which='both',
                        top=False, bottom=False, left=False, right=False,
                        labelbottom=False, labeltop=False, labelleft=False, labelright=False)

        plt.title('Cropped Image')
        plt.show()
    return new_image

In [13]:
folder1 = 'augmented_data/no/'
folder2 = 'augmented_data/yes/'

for filename in os.listdir(folder1):
    img = cv2.imread(folder1 + filename)
    img = crop_brain_tumor(img, False)
    cv2.imwrite(folder1 + filename, img)
for filename in os.listdir(folder2):
    img = cv2.imread(folder2 + filename)
    img = crop_brain_tumor(img, False)
    cv2.imwrite(folder2 + filename, img)

In [14]:
# image loading

In [15]:
from sklearn.utils import shuffle
def load_data(dir_list, image_size):
    X=[]
    y=[]

    image_width, image_height=image_size

    for directory in dir_list:
        for filename in os.listdir(directory):
            image = cv2.imread(directory + '/' + filename)
            image = crop_brain_tumor(image, plot=False)
            image = cv2.resize(image, dsize=(image_width, image_height), interpolation = cv2.INTER_CUBIC)
            image = image/255.00
            X.append(image)
            if directory[-3:] == "yes":
                y.append(1)
            else:
                y.append(0)
    X=np.array(X)
    y=np.array(y)

    X,y = shuffle(X,y)
    print(f"Number of example is : {len(X)}")
    print(f"X SHAPE is : {X.shape}")
    print(f"y SHAPE is : {y.shape}")
    return X,y


In [16]:
augmented_path = 'augmented_data/'
augmeneted_yes = augmented_path + 'yes'
augmeneted_no = augmented_path + 'no'

IMAGE_WIDTH, IMAGE_HEIGHT = (240,240)

X,y = load_data([augmeneted_yes, augmeneted_no], (IMAGE_WIDTH, IMAGE_HEIGHT))

Number of example is : 506
X SHAPE is : (506, 240, 240, 3)
y SHAPE is : (506,)


In [17]:
def plot_sample_images(X, y, n=50):

    for label in [0,1]:
        images = X[np.argwhere(y == label)]
        n_images = images[:n]

        columns_n = 10
        rows_n = int(n/ columns_n)

        plt.figure(figsize=(20, 10))

        i = 1
        for image in n_images:
            plt.subplot(rows_n, columns_n, i)
            plt.imshow(image[0])

            plt.tick_params(axis='both', which='both',
                            top=False, bottom=False, left=False, right=False,
                            labelbottom=False, labeltop=False, labelleft=False,
                            labelright=False)

            i += 1

        label_to_str = lambda label: "Yes" if label == 1 else "No"
        plt.suptitle(f"Brain Tumor: {label_to_str(label)}")
        plt.show()

In [18]:
# Data Spliting
# Train
# Test
# Validation

In [19]:
if not os.path.isdir('tumorous_and_nontumorous'):
    base_dir = 'tumorous_and_nontumorous'
    os.mkdir(base_dir)

In [20]:
if not os.path.isdir('tumorous_and_nontumorous/train'):
    train_dir = os.path.join(base_dir , 'train')
    os.mkdir(train_dir)
if not os.path.isdir('tumorous_and_nontumorous/test'):
    test_dir = os.path.join(base_dir , 'test')
    os.mkdir(test_dir)
if not os.path.isdir('tumorous_and_nontumorous/valid'):
    valid_dir = os.path.join(base_dir , 'valid')
    os.mkdir(valid_dir)

if not os.path.isdir('tumorous_and_nontumorous/train/tumorous'):
    infected_train_dir = os.path.join(train_dir, 'tumorous')
    os.mkdir(infected_train_dir)
if not os.path.isdir('tumorous_and_nontumorous/test/tumorous'):
    infected_test_dir = os.path.join(test_dir, 'tumorous')
    os.mkdir(infected_test_dir)
if not os.path.isdir('tumorous_and_nontumorous/valid/tumorous'):
    infected_valid_dir = os.path.join(valid_dir, 'tumorous')
    os.mkdir(infected_valid_dir)

if not os.path.isdir('tumorous_and_nontumorous/train/nontumorous'):
    healthy_train_dir = os.path.join(train_dir, 'nontumorous')
    os.mkdir(healthy_train_dir)
if not os.path.isdir('tumorous_and_nontumorous/test/nontumorous'):
    healthy_test_dir = os.path.join(test_dir, 'nontumorous')
    os.mkdir(healthy_test_dir)
if not os.path.isdir('tumorous_and_nontumorous/valid/nontumorous'):
    healthy_valid_dir = os.path.join(valid_dir, 'nontumorous')
    os.mkdir(healthy_valid_dir)

In [21]:
original_dataset_tumorours = os.path.join('augmented_data','yes/')
original_dataset_nontumorours = os.path.join('augmented_data','no/')

In [22]:
# 80% 10% 10% - YES

In [23]:
files = os.listdir('augmented_data/yes/')
fnames = []
for i in range(0,247):
    fnames.append(files[i])
for fname in fnames:
    src = os.path.join(original_dataset_tumorours, fname)
    dst = os.path.join(infected_train_dir, fname)
    shutil.copyfile(src, dst)

files = os.listdir('augmented_data/yes/')
fnames = []
for i in range(248,278):
    fnames.append(files[i])
for fname in fnames:
    src = os.path.join(original_dataset_tumorours, fname)
    dst = os.path.join(infected_test_dir, fname)
    shutil.copyfile(src, dst)

files = os.listdir('augmented_data/yes/')
fnames = []
for i in range(279,310):
    fnames.append(files[i])
for fname in fnames:
    src = os.path.join(original_dataset_tumorours, fname)
    dst = os.path.join(infected_valid_dir, fname)
    shutil.copyfile(src, dst)

In [24]:
# 80% 10% 10% - NO

In [25]:
files = os.listdir('augmented_data/no/')
fnames = []
for i in range(0,155):
    fnames.append(files[i])
for fname in fnames:
    src = os.path.join(original_dataset_nontumorours, fname)
    dst = os.path.join(healthy_train_dir, fname)
    shutil.copyfile(src, dst)


files = os.listdir('augmented_data/no/')
fnames = []
for i in range(156,175):
    fnames.append(files[i])
for fname in fnames:
    src = os.path.join(original_dataset_nontumorours, fname)
    dst = os.path.join(healthy_test_dir, fname)
    shutil.copyfile(src, dst)

files = os.listdir('augmented_data/no/')
fnames = []
for i in range(176,196):
    fnames.append(files[i])
for fname in fnames:
    src = os.path.join(original_dataset_nontumorours, fname)
    dst = os.path.join(healthy_valid_dir, fname)
    shutil.copyfile(src, dst)

In [26]:
# Model Buliding

In [27]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                  horizontal_flip=0.4,
                  vertical_flip=0.4,
                  rotation_range=40,
                  shear_range=0.2,
                  width_shift_range=0.4,
                  height_shift_range=0.4,
                  fill_mode='nearest')
test_data_gen = ImageDataGenerator(rescale=1.0/255)
valid_data_gen = ImageDataGenerator(rescale=1.0/255)

In [28]:
train_generator = train_datagen.flow_from_directory('tumorous_and_nontumorous/train/', batch_size=32, target_size=(240,240), class_mode='categorical',shuffle=True, seed = 42, color_mode = 'rgb')

Found 402 images belonging to 2 classes.


In [29]:
test_generator = train_datagen.flow_from_directory('tumorous_and_nontumorous/test/', batch_size=32, target_size=(240,240), class_mode='categorical',shuffle=True, seed = 42, color_mode = 'rgb')

Found 49 images belonging to 2 classes.


In [30]:
valid_generator = train_datagen.flow_from_directory('tumorous_and_nontumorous/valid/', batch_size=32, target_size=(240,240), class_mode='categorical',shuffle=True, seed = 42, color_mode = 'rgb')

Found 51 images belonging to 2 classes.


In [31]:
class_labels = train_generator.class_indices
class_name = {value: key for (key,value) in class_labels.items()}

In [32]:
class_name

{0: 'nontumorous', 1: 'tumorous'}

In [33]:
"""
#Best Hyperparameters: {'learning_rate': 2.8706295492775003e-05, 'dense_1': 3584, 'dense_2': 1024, 'optimizer': 'Adam'}

import optuna
import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.optimizers import Adam, AdamW, SGD
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from optuna.integration import TFKerasPruningCallback
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data Augmentation to prevent overfitting
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

valid_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/content/tumorous_and_nontumorous/train',
    target_size=(240, 240),
    batch_size=32,
    class_mode='categorical'
)

valid_generator = valid_datagen.flow_from_directory(
    '/content/tumorous_and_nontumorous/valid',
    target_size=(240, 240),
    batch_size=32,
    class_mode='categorical'
)

# Optuna Objective Function
def objective(trial):
    # Hyperparameter Search Space
    lr = trial.suggest_loguniform('learning_rate', 1e-5, 5e-3)
    dropout_rate = 0.5  # Fixed
    dense_1 = trial.suggest_int('dense_1', 1024, 4608, step=512)
    dense_2 = trial.suggest_int('dense_2', 256, 2048, step=256)
    l2_lambda = 1e-4  # Fixed
    optimizer_choice = trial.suggest_categorical('optimizer', ['SGD', 'Adam', 'AdamW'])

    # Load Pretrained Model
    base_model = VGG19(input_shape=(240,240,3), include_top=False, weights='imagenet')

    # Unfreeze last 3 layers for fine-tuning
    for layer in base_model.layers[-3:]:
        layer.trainable = True

    # Model Definition
    x = Flatten()(base_model.output)
    x = Dense(dense_1, activation='relu', kernel_regularizer=l2(l2_lambda))(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(dense_2, activation='relu', kernel_regularizer=l2(l2_lambda))(x)
    x = Dropout(dropout_rate / 2)(x)
    output = Dense(2, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=output)

    # Choose Optimizer
    if optimizer_choice == "SGD":
        optimizer = SGD(learning_rate=lr, momentum=0.9, nesterov=True)
    elif optimizer_choice == "Adam":
        optimizer = Adam(learning_rate=lr)
    else:
        optimizer = AdamW(learning_rate=lr)

    # Compile Model with metrics
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy', 'Recall'])

    # Callbacks
    es = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True, verbose=1)
    lr_reduce = ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.3, min_lr=1e-6, verbose=1)
    pruning_cb = TFKerasPruningCallback(trial, 'val_accuracy')

    # Train Model
    history = model.fit(
        train_generator,
        epochs=10,
        validation_data=valid_generator,
        callbacks=[es, lr_reduce, pruning_cb]
    )

    # Debug: Print available metrics
    print("Logged metrics:", history.history.keys())

    val_acc = max(history.history.get('val_accuracy', [0]))
    val_recall = max(history.history.get('val_recall', [0]))  # Prevent KeyError

    print(f"Trial {trial.number}: Accuracy = {val_acc}, Recall = {val_recall}")

    return val_recall  # Optimizing for recall

# Run Optuna Study
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=2)

# Best Hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)
Trial 0: Accuracy = 0.686274528503418, Recall = 0
Trial 1: Accuracy = 0.7058823704719543, Recall = 0
Best Hyperparameters: {'learning_rate': 2.8706295492775003e-05, 'dense_1': 3584, 'dense_2': 1024, 'optimizer': 'Adam'}

"""

'\n#Best Hyperparameters: {\'learning_rate\': 2.8706295492775003e-05, \'dense_1\': 3584, \'dense_2\': 1024, \'optimizer\':\xa0\'Adam\'}\n\nimport optuna\nimport tensorflow as tf\nfrom tensorflow.keras.applications import VGG19\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization\nfrom tensorflow.keras.optimizers import Adam, AdamW, SGD\nfrom tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau\nfrom tensorflow.keras.regularizers import l2\nfrom optuna.integration import TFKerasPruningCallback\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\n\n# Data Augmentation to prevent overfitting\ntrain_datagen = ImageDataGenerator(\n    rescale=1./255,\n    rotation_range=20,\n    width_shift_range=0.2,\n    height_shift_range=0.2,\n    shear_range=0.2,\n    zoom_range=0.2,\n    horizontal_flip=True,\n    fill_mode=\'nearest\'\n)\n\nvalid_datagen = ImageDataGenerator(rescale=1./255)\n\ntrain_

In [34]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.metrics import Recall

# Best Hyperparameters from Optuna
best_lr = 2.8706295492775003e-05
best_dropout = 0.5  # Assuming dropout remains the same
best_dense_1 = 3584
best_dense_2 = 1024

# Load base model (VGG19) without the top classification layer
base_model = VGG19(input_shape=(240, 240, 3), include_top=False, weights='imagenet')

# Freeze Base Model Layers
for layer in base_model.layers:
    layer.trainable = False

# Add Custom Top Layers
x = Flatten()(base_model.output)
x = Dense(best_dense_1, activation='relu')(x)
x = Dropout(best_dropout)(x)
x = Dense(best_dense_2, activation='relu')(x)
output = Dense(2, activation='softmax')(x)

# Create final model
model_01 = Model(inputs=base_model.input, outputs=output)
model_01.summary()

# Compile model using Adam optimizer with best learning rate
adam = Adam(learning_rate=best_lr)
model_01.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy', Recall()])

# Checkpoint to save best model
filepath = 'model.h5'
cp = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', save_freq='epoch')

# Train the model
history_01 = model_01.fit(
    train_generator,
    steps_per_epoch=32,
    epochs=20,
    callbacks=[cp],
    validation_data=valid_generator
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m80134624/80134624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m23s[0m 1s/step - accuracy: 0.5158 - loss: 1.3442 - recall: 0.5158




Epoch 1: val_loss improved from inf to 1.06431, saving model to model.h5




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 1s/step - accuracy: 0.5094 - loss: 1.2762 - recall: 0.5094 - val_accuracy: 0.6078 - val_loss: 1.0643 - val_recall: 0.6078
Epoch 2/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m10s[0m 558ms/step - accuracy: 0.5834 - loss: 0.8630 - recall: 0.5834
Epoch 2: val_loss improved from 1.06431 to 0.60614, saving model to model.h5




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 610ms/step - accuracy: 0.5782 - loss: 0.8003 - recall: 0.5782 - val_accuracy: 0.6863 - val_loss: 0.6061 - val_recall: 0.6863
Epoch 3/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m9s[0m 482ms/step - accuracy: 0.6793 - loss: 0.6412 - recall: 0.6793
Epoch 3: val_loss did not improve from 0.60614
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 269ms/step - accuracy: 0.6511 - loss: 0.6668 - recall: 0.6511 - val_accuracy: 0.5098 - val_loss: 0.6831 - val_recall: 0.5098
Epoch 4/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m8s[0m 469ms/step - accuracy: 0.5997 - loss: 0.6587 - recall: 0.5997
Epoch 4: val_loss did not improve from 0.60614
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 211ms/step - accuracy: 0.6217 - loss: 0.6542 - recall: 0.6217 - val_accuracy: 0.6078 - val_loss:



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 491ms/step - accuracy: 0.6517 - loss: 0.6172 - recall: 0.6517 - val_accuracy: 0.6863 - val_loss: 0.5946 - val_recall: 0.6863
Epoch 7/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m8s[0m 467ms/step - accuracy: 0.6599 - loss: 0.6441 - recall: 0.6599
Epoch 7: val_loss did not improve from 0.59464
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 224ms/step - accuracy: 0.6595 - loss: 0.6384 - recall: 0.6595 - val_accuracy: 0.6863 - val_loss: 0.5970 - val_recall: 0.6863
Epoch 8/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m8s[0m 462ms/step - accuracy: 0.7035 - loss: 0.5444 - recall: 0.7035
Epoch 8: val_loss improved from 0.59464 to 0.56795, saving model to model.h5




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 657ms/step - accuracy: 0.6993 - loss: 0.5619 - recall: 0.6993 - val_accuracy: 0.7647 - val_loss: 0.5680 - val_recall: 0.7647
Epoch 9/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m10s[0m 540ms/step - accuracy: 0.6829 - loss: 0.5838 - recall: 0.6829
Epoch 9: val_loss did not improve from 0.56795
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 239ms/step - accuracy: 0.6984 - loss: 0.5703 - recall: 0.6984 - val_accuracy: 0.5686 - val_loss: 0.6405 - val_recall: 0.5686
Epoch 10/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m10s[0m 566ms/step - accuracy: 0.7028 - loss: 0.5736 - recall: 0.7028
Epoch 10: val_loss improved from 0.56795 to 0.56394, saving model to model.h5




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 570ms/step - accuracy: 0.6946 - loss: 0.5826 - recall: 0.6946 - val_accuracy: 0.7255 - val_loss: 0.5639 - val_recall: 0.7255
Epoch 11/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m9s[0m 476ms/step - accuracy: 0.6890 - loss: 0.6088 - recall: 0.6890
Epoch 11: val_loss did not improve from 0.56394
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 213ms/step - accuracy: 0.6846 - loss: 0.6084 - recall: 0.6846 - val_accuracy: 0.5686 - val_loss: 0.6383 - val_recall: 0.5686
Epoch 12/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m10s[0m 544ms/step - accuracy: 0.6859 - loss: 0.5764 - recall: 0.6859
Epoch 12: val_loss did not improve from 0.56394
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 240ms/step - accuracy: 0.6893 - loss: 0.5801 - recall: 0.6893 - val_accuracy: 0.6863 - val



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 652ms/step - accuracy: 0.7016 - loss: 0.5524 - recall: 0.7016 - val_accuracy: 0.6863 - val_loss: 0.4902 - val_recall: 0.6863
Epoch 19/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m10s[0m 560ms/step - accuracy: 0.7074 - loss: 0.5325 - recall: 0.7074
Epoch 19: val_loss did not improve from 0.49024
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 300ms/step - accuracy: 0.7113 - loss: 0.5319 - recall: 0.7113 - val_accuracy: 0.7059 - val_loss: 0.5161 - val_recall: 0.7059
Epoch 20/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m9s[0m 479ms/step - accuracy: 0.7245 - loss: 0.5124 - recall: 0.7245
Epoch 20: val_loss did not improve from 0.49024
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 216ms/step - accuracy: 0.7212 - loss: 0.5191 - recall: 0.7212 - val_accuracy: 0.6667 - val

In [35]:
import os

# Ensure the directory exists
if not os.path.isdir("model_weights/"):
    os.mkdir("model_weights/")

# Save weights with correct filename extension
model_01.save_weights(filepath="model_weights/vgg19_model_01.weights.h5", overwrite=True)

In [36]:
# Load model weights
model_01.load_weights("model_weights/vgg19_model_01.weights.h5")

# Evaluate on validation set
vgg_val_eval_01 = model_01.evaluate(valid_generator)
print(f"Validation - Loss: {vgg_val_eval_01[0]}, Accuracy: {vgg_val_eval_01[1]}, Recall: {vgg_val_eval_01[2]}")

# Evaluate on test set
vgg_test_eval_01 = model_01.evaluate(test_generator)
print(f"Test - Loss: {vgg_test_eval_01[0]}, Accuracy: {vgg_test_eval_01[1]}, Recall: {vgg_test_eval_01[2]}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 270ms/step - accuracy: 0.6814 - loss: 0.5770 - recall: 0.6814
Validation - Loss: 0.6052923202514648, Accuracy: 0.6470588445663452, Recall: 0.6470588445663452
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7s/step - accuracy: 0.7262 - loss: 0.4728 - recall: 0.7262
Test - Loss: 0.49257320165634155, Accuracy: 0.7142857313156128, Recall: 0.7142857313156128


In [37]:
import numpy as np

# Get filenames from test generator
filenames = test_generator.filenames
nb_sample = len(filenames)

# Ensure the correct number of steps (batches) for prediction
steps_per_epoch = len(test_generator)

# Debugging information
print(f"Number of test samples: {nb_sample}")
print(f"Batch size: {test_generator.batch_size}")
print(f"Total batches in test_generator: {steps_per_epoch}")

# Run predictions with corrected steps
vgg_prediction_01 = model_01.predict(test_generator, steps=steps_per_epoch, verbose=1)

# Convert softmax probabilities to class labels
y_pred = np.argmax(vgg_prediction_01, axis=1)

# Output predicted class indices
print(f"Predicted class indices: {y_pred}")

Number of test samples: 49
Batch size: 32
Total batches in test_generator: 2
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 729ms/step
Predicted class indices: [0 1 1 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 1 1 0 0 1 1 1 1 1 0
 1 0 1 0 1 1 1 0 1 1 0 1]


In [39]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Recall
from tensorflow.keras.callbacks import ModelCheckpoint

# Best Hyperparameters from Optuna
best_lr = 2.8706295492775003e-05
best_dropout = 0.6
best_dense_1 = 3584
best_dense_2 = 1024

# Load VGG19 base model
base_model = VGG19(input_shape=(240, 240, 3), include_top=False, weights='imagenet')

# Add Custom Top Layers for Model_02
x = Flatten()(base_model.output)
x = Dense(best_dense_1, activation='relu')(x)
x = Dropout(best_dropout)(x)
x = Dense(best_dense_2, activation='relu')(x)
output = Dense(2, activation='softmax')(x)

model_02 = Model(inputs=base_model.input, outputs=output)

# Load pre-trained weights from Model_01
model_02.load_weights('model_weights/vgg19_model_01.weights.h5')

# ✅ Unfreeze only specific layers for fine-tuning
for layer in model_02.layers:
    if layer.name in ['block5_conv1', 'block5_conv2', 'block5_conv3', 'block5_conv4']:
        layer.trainable = True
    else:
        layer.trainable = False

# Compile model using Adam optimizer and best learning rate
adam = Adam(learning_rate=best_lr)
model_02.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy', Recall()])

model_02.summary()

# ModelCheckpoint callback
cp = ModelCheckpoint(
    'model_weights/vgg19_model_02.weights.h5',
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)

# Train Model_02
history_02 = model_02.fit(
    train_generator,
    steps_per_epoch=32,
    epochs=20,
    validation_data=valid_generator,
    callbacks=[cp]
)

Epoch 1/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m13s[0m 687ms/step - accuracy: 0.6710 - loss: 0.6480 - recall_1: 0.6710
Epoch 1: val_loss improved from inf to 0.59008, saving model to model_weights/vgg19_model_02.weights.h5




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 458ms/step - accuracy: 0.6699 - loss: 0.6399 - recall_1: 0.6699 - val_accuracy: 0.7647 - val_loss: 0.5901 - val_recall_1: 0.7647
Epoch 2/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m10s[0m 569ms/step - accuracy: 0.7593 - loss: 0.4883 - recall_1: 0.7593
Epoch 2: val_loss improved from 0.59008 to 0.52038, saving model to model_weights/vgg19_model_02.weights.h5




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 552ms/step - accuracy: 0.7560 - loss: 0.4790 - recall_1: 0.7560 - val_accuracy: 0.7843 - val_loss: 0.5204 - val_recall_1: 0.7843
Epoch 3/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m9s[0m 504ms/step - accuracy: 0.7667 - loss: 0.4741 - recall_1: 0.7667 
Epoch 3: val_loss did not improve from 0.52038
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 224ms/step - accuracy: 0.7664 - loss: 0.4620 - recall_1: 0.7664 - val_accuracy: 0.7647 - val_loss: 0.5589 - val_recall_1: 0.7647
Epoch 4/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m9s[0m 490ms/step - accuracy: 0.8105 - loss: 0.4080 - recall_1: 0.8105
Epoch 4: val_loss did not improve from 0.52038
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 218ms/step - accuracy: 0.8078 - loss: 0.4187 - recall_1: 0.8078 - val_accuracy:



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 595ms/step - accuracy: 0.8841 - loss: 0.2897 - recall_1: 0.8841 - val_accuracy: 0.7843 - val_loss: 0.4946 - val_recall_1: 0.7843
Epoch 10/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m10s[0m 559ms/step - accuracy: 0.9139 - loss: 0.2185 - recall_1: 0.9139
Epoch 10: val_loss did not improve from 0.49462
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 257ms/step - accuracy: 0.9000 - loss: 0.2492 - recall_1: 0.9000 - val_accuracy: 0.8431 - val_loss: 0.5891 - val_recall_1: 0.8431
Epoch 11/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m9s[0m 515ms/step - accuracy: 0.8570 - loss: 0.3208 - recall_1: 0.8570 
Epoch 11: val_loss did not improve from 0.49462
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 228ms/step - accuracy: 0.8680 - loss: 0.3080 - recall_1: 0.8680 - val_accur



[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 404ms/step - accuracy: 0.8890 - loss: 0.2304 - recall_1: 0.8890 - val_accuracy: 0.9412 - val_loss: 0.3906 - val_recall_1: 0.9412
Epoch 15/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m9s[0m 489ms/step - accuracy: 0.9056 - loss: 0.2258 - recall_1: 0.9056
Epoch 15: val_loss did not improve from 0.39063
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 218ms/step - accuracy: 0.8952 - loss: 0.2450 - recall_1: 0.8952 - val_accuracy: 0.8627 - val_loss: 0.6959 - val_recall_1: 0.8627
Epoch 16/20
[1m13/32[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m10s[0m 557ms/step - accuracy: 0.8803 - loss: 0.2810 - recall_1: 0.8803
Epoch 16: val_loss did not improve from 0.39063
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 245ms/step - accuracy: 0.8834 - loss: 0.2709 - recall_1: 0.8834 - val_accurac

In [40]:
if not os.path.isdir('model_weights/'):
    os.mkdir('model_weights/')
model_02.save_weights(filepath="model_weights/vgg19_model_02.weights.h5", overwrite=True)

In [41]:
model_02.load_weights("model_weights/vgg19_model_02.weights.h5")
vgg_val_eval_02 = model_02.evaluate(valid_generator)
vgg_test_eval_02 = model_02.evaluate(test_generator)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 268ms/step - accuracy: 0.8538 - loss: 0.4982 - recall_1: 0.8538
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 681ms/step - accuracy: 0.8287 - loss: 0.3566 - recall_1: 0.8287


In [42]:
#unfreezing the entire network

In [43]:
import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Recall
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.losses import CategoricalCrossentropy

# Best Hyperparameters from Optuna
best_lr = 2.8706295492775003e-05
best_dropout = 0.6
best_dense_1 = 3584
best_dense_2 = 1024

# Load base model
base_model = VGG19(include_top=False, input_shape=(240, 240, 3))

# Add custom layers
x = base_model.output
x = Flatten()(x)
x = Dense(best_dense_1, activation='relu')(x)
x = Dropout(best_dropout)(x)
x = Dense(best_dense_2, activation='relu')(x)
output = Dense(2, activation='softmax')(x)

# Create final model
model_03 = Model(inputs=base_model.input, outputs=output)

# Load weights from Model 02
model_03.load_weights('model_weights/vgg19_model_02.weights.h5')

# Unfreeze block5 layers only
set_trainable = False
for layer in model_03.layers:
    if layer.name in ['block5_conv1', 'block5_conv2', 'block5_conv3', 'block5_pool']:
        set_trainable = True
    layer.trainable = set_trainable

# Compile model
adam = Adam(learning_rate=best_lr)
loss_fn = CategoricalCrossentropy(label_smoothing=0.1)
model_03.compile(loss=loss_fn, optimizer=adam, metrics=['accuracy', Recall(name='recall')])

# Callbacks
checkpoint = ModelCheckpoint("model_weights/vgg19_model_03.weights.h5", save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3)

# Train model
history_03 = model_03.fit(
    train_generator,
    steps_per_epoch=32,
    epochs=20,
    validation_data=valid_generator,
    callbacks=[checkpoint,reduce_lr]
)

Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 978ms/step - accuracy: 0.8769 - loss: 0.4201 - recall: 0.8769 - val_accuracy: 0.8824 - val_loss: 0.5185 - val_recall: 0.8824 - learning_rate: 2.8706e-05
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 594ms/step - accuracy: 0.8995 - loss: 0.3677 - recall: 0.8995 - val_accuracy: 0.8627 - val_loss: 0.5102 - val_recall: 0.8627 - learning_rate: 2.8706e-05
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 251ms/step - accuracy: 0.9055 - loss: 0.3576 - recall: 0.9055 - val_accuracy: 0.8235 - val_loss: 0.5356 - val_recall: 0.8235 - learning_rate: 2.8706e-05
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 798ms/step - accuracy: 0.9384 - loss: 0.3363 - recall: 0.9384 - val_accuracy: 0.8431 - val_loss: 0.4875 - val_recall: 0.8431 - learning_rate: 2.8706e-05
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 687ms/ste

In [44]:
if not os.path.isdir('model_weights/'):
    os.mkdir('model_weights/vgg_unfrozen.h5')
model_03.save_weights(filepath="model_weights/vgg19_unfrozen.weights.h5", overwrite=True)

In [45]:
model_03.load_weights("model_weights/vgg19_unfrozen.weights.h5")
vgg_val_eval_03 = model_03.evaluate(valid_generator)
vgg_test_eval_03 = model_03.evaluate(test_generator)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 247ms/step - accuracy: 0.8799 - loss: 0.4736 - recall: 0.8799
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step - accuracy: 0.9520 - loss: 0.2816 - recall: 0.9520


In [46]:
from tensorflow.keras.models import load_model
import os

# Ensure directory exists
if not os.path.isdir('model_weights/'):
    os.mkdir('model_weights/')

# Save the entire model along with weights
model_03.save("model_weights/vgg19_model_03.h5", overwrite=True)



