## Importing Libraries

In [None]:
import pandas as pd
import matplotlib as mat
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
%matplotlib inline

import random
import os

from numpy.random import seed
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import glob
import cv2

from tensorflow.random import set_seed

import warnings

In [None]:
pd.options.display.max_colwidth = 100

seed(42)

random.seed(42)
os.environ['PYTHONHASHSEED'] = str(42)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

set_seed(42)

warnings.filterwarnings('ignore')

In [None]:
IMG_SIZE = 224
BATCH = 32
SEED = 42

# Loading Dataset

In [None]:
main_path = "/content/drive/MyDrive/Datasets/Pneumonia_Detection"

train_path = os.path.join(main_path,"train")
test_path=os.path.join(main_path,"test")

train_normal = glob.glob(train_path+"/NORMAL/*.jpeg")
train_pneumonia = glob.glob(train_path+"/PNEUMONIA/*.jpeg")

test_normal = glob.glob(test_path+"/NORMAL/*.jpeg")
test_pneumonia = glob.glob(test_path+"/PNEUMONIA/*.jpeg")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
train_list = [x for x in train_normal]
train_list.extend([x for x in train_pneumonia])

df_train = pd.DataFrame(np.concatenate([['Normal']*len(train_normal) , ['PNEUMONIA']*len(train_pneumonia)]), columns = ['class'])
df_train['image'] = [x for x in train_list]

test_list = [x for x in test_normal]
test_list.extend([x for x in test_pneumonia])

df_test = pd.DataFrame(np.concatenate([['Normal']*len(test_normal) , ['PNEUMONIA']*len(test_pneumonia)]), columns = ['class'])
df_test['image'] = [x for x in test_list]

In [None]:
train_df, val_df = train_test_split(df_train, test_size = 0.20, random_state = SEED, stratify = df_train['class'])

In [None]:
train_df

Unnamed: 0,class,image
589,Normal,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/NORMAL/NORMAL-7288289-0001.jpeg
4894,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/BACTERIA-3582443-0001.jpeg
2875,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/BACTERIA-9450861-0001.jpeg
5096,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/BACTERIA-4070068-0001.jpeg
1857,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/VIRUS-7435285-0006.jpeg
...,...,...
4601,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/BACTERIA-1718141-0004.jpeg
3556,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/BACTERIA-5023591-0005.jpeg
3048,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/VIRUS-1135060-0001.jpeg
678,Normal,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/NORMAL/NORMAL-7725506-0001.jpeg


In [None]:
val_df

Unnamed: 0,class,image
2323,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/VIRUS-8995871-0002.jpeg
4876,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/BACTERIA-345112-0001.jpeg
2045,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/VIRUS-853038-0003.jpeg
2673,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/BACTERIA-8303986-0001.jpeg
2588,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/BACTERIA-8302026-0001.jpeg
...,...,...
508,Normal,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/NORMAL/NORMAL-6943766-0001.jpeg
4875,PNEUMONIA,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/PNEUMONIA/BACTERIA-3482207-0001.jpeg
878,Normal,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/NORMAL/NORMAL-9211217-0002.jpeg
200,Normal,/content/drive/MyDrive/Datasets/Pneumonia_Detection/train/NORMAL/NORMAL-4656588-0001.jpeg


In [None]:
train_datagen = ImageDataGenerator(rescale=1/255.,
                                  zoom_range = 0.1,
                                  #rotation_range = 0.1,
                                  width_shift_range = 0.1,
                                  height_shift_range = 0.1)

val_datagen = ImageDataGenerator(rescale=1/255.)

ds_train = train_datagen.flow_from_dataframe(train_df,
                                             #directory=train_path, #dataframe contains the full paths
                                             x_col = 'image',
                                             y_col = 'class',
                                             target_size = (IMG_SIZE, IMG_SIZE),
                                             class_mode = 'binary',
                                             batch_size = BATCH,
                                             seed = SEED)

ds_val = val_datagen.flow_from_dataframe(val_df,
                                            #directory=train_path,
                                            x_col = 'image',
                                            y_col = 'class',
                                            target_size = (IMG_SIZE, IMG_SIZE),
                                            class_mode = 'binary',
                                            batch_size = BATCH,
                                            seed = SEED)

ds_test = val_datagen.flow_from_dataframe(df_test,
                                            #directory=test_path,
                                            x_col = 'image',
                                            y_col = 'class',
                                            target_size = (IMG_SIZE, IMG_SIZE),
                                            class_mode = 'binary',
                                            batch_size = 1,
                                            shuffle = False)

Found 4196 validated image filenames belonging to 2 classes.
Found 1050 validated image filenames belonging to 2 classes.
Found 651 validated image filenames belonging to 2 classes.


## Transfer Learning
Model 1: ResNet152V2

In [None]:
base_model = tf.keras.applications.ResNet152V2(
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False)

base_model.trainable = False

def get_pretrained():

    #Input shape = [width, height, color channels]
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    x = base_model(inputs)

    # Head
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.1)(x)

    #Final Layer (Output)
    output = layers.Dense(1, activation='sigmoid')(x)

    model = keras.Model(inputs=[inputs], outputs=output)

    return model

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m234545216/234545216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
keras.backend.clear_session()

model_pretrained = get_pretrained()
model_pretrained.compile(loss='binary_crossentropy', optimizer = keras.optimizers.Adam(learning_rate=5e-5), metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
# Define early stopping callback
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# Stop training when validation loss stops improving for 3 epochs

# Define learning rate reduction callback
plateau = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, verbose=1, min_lr=1e-7)

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=len(train_df)//BATCH,
          validation_steps=len(val_df)//BATCH);

Epoch 1/5
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2004s[0m 15s/step - binary_accuracy: 0.7153 - loss: 0.5388 - val_binary_accuracy: 0.9307 - val_loss: 0.2115 - learning_rate: 5.0000e-05
Epoch 2/5
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 373ms/step - binary_accuracy: 0.8438 - loss: 0.3850 - val_binary_accuracy: 0.9268 - val_loss: 0.2151 - learning_rate: 5.0000e-05
Epoch 3/5
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 1s/step - binary_accuracy: 0.9185 - loss: 0.2201 - val_binary_accuracy: 0.9443 - val_loss: 0.1715 - learning_rate: 5.0000e-05
Epoch 4/5
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 156ms/step - binary_accuracy: 0.9375 - loss: 0.2520 - val_binary_accuracy: 0.9443 - val_loss: 0.1690 - learning_rate: 5.0000e-05
Epoch 5/5
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 1s/step - binary_accuracy: 0.9344 - loss: 0.1717 - val_binary_accuracy: 0.9502 - val_loss: 0.1509 - 

In [None]:
score = model_pretrained.evaluate(ds_val, steps = len(val_df)//BATCH, verbose = 0)
print('train loss:', score[0])
print('train accuracy:', score[1])

train loss: 0.15188048779964447
train accuracy: 0.9501953125


In [None]:
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.3022271692752838
Test accuracy: 0.8709677457809448


### ResNet152V2 Fine Tuning

In [None]:
base_model.trainable = True

# Freeze all layers except for the 13 
for layer in base_model.layers[:-13]:
    layer.trainable = False

In [None]:
# Check which layers are tuneable (trainable)
for layer_number, layer in enumerate(base_model.layers):
    print(layer_number, layer.name, layer.trainable)

0 input_layer False
1 conv1_pad False
2 conv1_conv False
3 pool1_pad False
4 pool1_pool False
5 conv2_block1_preact_bn False
6 conv2_block1_preact_relu False
7 conv2_block1_1_conv False
8 conv2_block1_1_bn False
9 conv2_block1_1_relu False
10 conv2_block1_2_pad False
11 conv2_block1_2_conv False
12 conv2_block1_2_bn False
13 conv2_block1_2_relu False
14 conv2_block1_0_conv False
15 conv2_block1_3_conv False
16 conv2_block1_out False
17 conv2_block2_preact_bn False
18 conv2_block2_preact_relu False
19 conv2_block2_1_conv False
20 conv2_block2_1_bn False
21 conv2_block2_1_relu False
22 conv2_block2_2_pad False
23 conv2_block2_2_conv False
24 conv2_block2_2_bn False
25 conv2_block2_2_relu False
26 conv2_block2_3_conv False
27 conv2_block2_out False
28 conv2_block3_preact_bn False
29 conv2_block3_preact_relu False
30 conv2_block3_1_conv False
31 conv2_block3_1_bn False
32 conv2_block3_1_relu False
33 conv2_block3_2_pad False
34 conv2_block3_2_conv False
35 conv2_block3_2_bn False
36 conv2_

In [None]:
model_pretrained.compile(loss='binary_crossentropy', optimizer = keras.optimizers.Adam(learning_rate=2e-6), metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
#Train
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 2,
          validation_data=ds_val,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=(len(train_df)//BATCH),
          validation_steps=(len(val_df)//BATCH));

Epoch 1/2
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 1s/step - binary_accuracy: 0.9252 - loss: 0.2632 - val_binary_accuracy: 0.9473 - val_loss: 0.1956 - learning_rate: 2.0000e-06
Epoch 2/2
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 312ms/step - binary_accuracy: 1.0000 - loss: 0.1606 - val_binary_accuracy: 0.9473 - val_loss: 0.1967 - learning_rate: 2.0000e-06


In [None]:
#Evaluation
score = model_pretrained.evaluate(ds_val, steps = len(val_df)//BATCH, verbose = 0)
print('train loss:', score[0])
print('train accuracy:', score[1])

train loss: 0.19741755723953247
train accuracy: 0.9462890625


In [None]:
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.35417553782463074
Test accuracy: 0.8479262590408325


In [None]:
ds_test.reset()
predictions = model_pretrained.predict(ds_test, steps=len(ds_test), verbose=0)
pred_labels= np.where(predictions>0.5, 1, 0)

### ResNet152V2 Metrics

In [None]:
from sklearn.metrics import classification_report, roc_auc_score,roc_curve,confusion_matrix
true_labels = ds_test.classes  # Extract true labels
cm_df = pd.DataFrame(confusion_matrix(true_labels,pred_labels))
cm_df.columns = ['Predicted 0','Predicted 1']
cm_df = cm_df.rename(index={0: 'Actual 0',1:'Actual 1'})
cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,170,91
Actual 1,8,382


In [None]:
roc_auc=roc_auc_score(true_labels, pred_labels)
print('ROC AUC=%.3f' % roc_auc)

ROC AUC=0.815


### Model Improving to avoid overfitting

In [None]:
IMG_SIZE = 128
BATCH = 16
SEED = 42

In [None]:
train_df, val_df = train_test_split(df_train, test_size = 0.20, random_state = SEED, stratify = df_train['class'])

### Data Augmentation

In [None]:
train_datagen = ImageDataGenerator( rescale=1/255.,
                                    zoom_range=0.2,  # Increased from 0.1
                                    rotation_range=20,  # Enable rotations
                                    width_shift_range=0.2,  # Increased from 0.1
                                    height_shift_range=0.2,  # Increased from 0.1
                                    horizontal_flip=True,  # Add horizontal flip
                                    brightness_range=[0.8, 1.2]  # Vary brightness
)

val_datagen = ImageDataGenerator(rescale=1/255.)

ds_train = train_datagen.flow_from_dataframe(train_df,
                                             #directory=train_path, #dataframe contains the full paths
                                             x_col = 'image',
                                             y_col = 'class',
                                             target_size = (IMG_SIZE, IMG_SIZE),
                                             class_mode = 'binary',
                                             batch_size = BATCH,
                                             seed = SEED)

ds_val = val_datagen.flow_from_dataframe(val_df,
                                            #directory=train_path,
                                            x_col = 'image',
                                            y_col = 'class',
                                            target_size = (IMG_SIZE, IMG_SIZE),
                                            class_mode = 'binary',
                                            batch_size = BATCH,
                                            seed = SEED)

ds_test = val_datagen.flow_from_dataframe(df_test,
                                            #directory=test_path,
                                            x_col = 'image',
                                            y_col = 'class',
                                            target_size = (IMG_SIZE, IMG_SIZE),
                                            class_mode = 'binary',
                                            batch_size = 1,
                                            shuffle = False)

Found 4196 validated image filenames belonging to 2 classes.
Found 1050 validated image filenames belonging to 2 classes.
Found 651 validated image filenames belonging to 2 classes.


## Transfer Learning
### ResNet50V2

In [None]:
from tensorflow.keras.regularizers import l2

base_model = tf.keras.applications.ResNet50V2( # simpler model than ResNet152V2
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False)

base_model.trainable = False

def get_pretrained():

    #Input shape = [width, height, color channels]
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    x = base_model(inputs)

    # Head
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu', kernel_regularizer=l2(0.001))(x) # Added L2 regularization
    x = layers.Dropout(0.3)(x)  # Increased dropout from 0.1 to 0.3

    #Final Layer (Output)
    output = layers.Dense(1, activation='sigmoid')(x)

    model = keras.Model(inputs=[inputs], outputs=output)

    return model

In [None]:
keras.backend.clear_session()

model_pretrained = get_pretrained()
model_pretrained.compile(loss='binary_crossentropy'
                       , optimizer = keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.CosineDecayRestarts(
                                                                                                          1e-4,
                                                                                                          first_decay_steps=10,
                                                                                                          t_mul=2.0,
                                                                                                          m_mul=0.8,
                                                                                                          alpha=1e-6
                                                                                                      ))
                       , metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
# Define early stopping callback
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) #Increase patience from 5 to 10
# Stop training when validation loss stops improving for 3 epochs

# Define learning rate reduction callback
plateau = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, verbose=1, min_lr=1e-7)

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=len(train_df)//BATCH,
          validation_steps=len(val_df)//BATCH);

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 549ms/step - binary_accuracy: 0.7615 - loss: 0.7789 - val_binary_accuracy: 0.8808 - val_loss: 0.4929 - learning_rate: 8.4422e-06
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 55ms/step - binary_accuracy: 0.8125 - loss: 0.5867 - val_binary_accuracy: 0.8817 - val_loss: 0.4923 - learning_rate: 8.1193e-06
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 358ms/step - binary_accuracy: 0.8412 - loss: 0.5885 - val_binary_accuracy: 0.8913 - val_loss: 0.4564 - learning_rate: 7.9610e-06
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 78ms/step - binary_accuracy: 1.0000 - loss: 0.3499 - val_binary_accuracy: 0.8923 - val_loss: 0.4551 - learning_rate: 7.8235e-06
Epoch 5/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 357ms/step - binary_accuracy: 0.8840 - loss: 0.4846 - val_binary_accuracy: 0.8981 - val_loss: 0.449

In [None]:
score = model_pretrained.evaluate(ds_val, steps = len(val_df)//BATCH, verbose = 0)
print('train loss:', score[0])
print('train accuracy:', score[1])

train loss: 0.4480469524860382
train accuracy: 0.8999999761581421


In [None]:
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.5567795038223267
Test accuracy: 0.8602150678634644


### ResNEt50V2 Fine Tuning

#### Balance the data

In [None]:
df_train['class'].value_counts()

Unnamed: 0_level_0,count
class,Unnamed: 1_level_1
PNEUMONIA,3897
Normal,1349


In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight('balanced', classes=np.unique(df_train['class']), y=df_train['class'])
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

Instead of unfreezing 25 layers at once, fine-tune incrementally

#### Step 1: Unfreeze last 10 layers and train for 5 epochs

In [None]:
base_model.trainable = True

# Step 1: Unfreeze last 10 layers and train for 5 epochs
for layer in base_model.layers[-10:]:  # Instead of -13
    layer.trainable = False

In [None]:
model_pretrained.compile(loss='binary_crossentropy', optimizer = keras.optimizers.Adam(learning_rate=2e-6), metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          class_weight=class_weight_dict,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=(len(train_df)//BATCH),
          validation_steps=(len(val_df)//BATCH));

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 437ms/step - binary_accuracy: 0.7739 - loss: 0.7347 - val_binary_accuracy: 0.8750 - val_loss: 0.5235 - learning_rate: 2.0000e-06
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 73ms/step - binary_accuracy: 1.0000 - loss: 0.4660 - val_binary_accuracy: 0.8740 - val_loss: 0.5238 - learning_rate: 2.0000e-06
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 424ms/step - binary_accuracy: 0.8074 - loss: 0.6377 - val_binary_accuracy: 0.8913 - val_loss: 0.4660 - learning_rate: 2.0000e-06
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 55ms/step - binary_accuracy: 0.9375 - loss: 0.5879 - val_binary_accuracy: 0.8923 - val_loss: 0.4633 - learning_rate: 2.0000e-06
Epoch 5/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 420ms/step - binary_accuracy: 0.8620 - loss: 0.5414 - val_binary_accuracy: 0.9288 - val_loss: 0.39

#### Step 2: Unfreeze last 30 layers and train again


In [None]:
# Step 2: Unfreeze last 30 layers and train again
for layer in base_model.layers[-30:]:
    layer.trainable = True

In [None]:
model_pretrained.compile(loss='binary_crossentropy', optimizer = keras.optimizers.Adam(learning_rate=2e-6), metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          class_weight=class_weight_dict,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=(len(train_df)//BATCH),
          validation_steps=(len(val_df)//BATCH));

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 432ms/step - binary_accuracy: 0.8640 - loss: 0.5963 - val_binary_accuracy: 0.9019 - val_loss: 0.4785 - learning_rate: 2.0000e-06
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 76ms/step - binary_accuracy: 0.8750 - loss: 0.5647 - val_binary_accuracy: 0.9000 - val_loss: 0.4809 - learning_rate: 2.0000e-06
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 371ms/step - binary_accuracy: 0.8751 - loss: 0.5334
Epoch 3: ReduceLROnPlateau reducing learning rate to 3.999999989900971e-07.
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 424ms/step - binary_accuracy: 0.8752 - loss: 0.5333 - val_binary_accuracy: 0.8913 - val_loss: 0.4899 - learning_rate: 2.0000e-06
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 53ms/step - binary_accuracy: 0.9375 - loss: 0.4351 - val_binary_accuracy: 0.8894 - val_loss: 0.4920 - learning

In [None]:
score = model_pretrained.evaluate(ds_val, steps = len(val_df)//BATCH, verbose = 0)
print('train loss:', score[0])
print('train accuracy:', score[1])

train loss: 0.47975170612335205
train accuracy: 0.9009615182876587


In [None]:
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.48896491527557373
Test accuracy: 0.8847926259040833


In [None]:
ds_test.reset()
predictions = model_pretrained.predict(ds_test, steps=len(ds_test), verbose=0)
pred_labels= np.where(predictions>0.5, 1, 0)

### ResNet50V2 Metrics

In [None]:
from sklearn.metrics import classification_report, roc_auc_score,roc_curve,confusion_matrix
true_labels = ds_test.classes  # Extract true labels
cm_df = pd.DataFrame(confusion_matrix(true_labels,pred_labels))
cm_df.columns = ['Predicted 0','Predicted 1']
cm_df = cm_df.rename(index={0: 'Actual 0',1:'Actual 1'})
cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,222,39
Actual 1,36,354


In [None]:
roc_auc=roc_auc_score(true_labels, pred_labels)
print('ROC AUC=%.3f' % roc_auc)

ROC AUC=0.879


In [None]:
model_pretrained.save('/content/drive/MyDrive/TransferLearningModel.keras')  # Saves as a directory

#### Model Improving to increase accuracy

In [None]:
IMG_SIZE = 128
BATCH = 16
SEED = 42

In [None]:
train_df, val_df = train_test_split(df_train, test_size = 0.20, random_state = SEED, stratify = df_train['class'])

Instead of just augmenting during training, create additional augmented images and add them to the dataset:

In [None]:
train_datagen = ImageDataGenerator( rescale=1/255.,
                                    zoom_range=0.2,  # Increased from 0.1
                                    rotation_range=20,  # Enable rotations
                                    width_shift_range=0.2,  # Increased from 0.1
                                    height_shift_range=0.2,  # Increased from 0.1
                                    horizontal_flip=True,  # Add horizontal flip
                                    brightness_range=[0.8, 1.2]  # Vary brightness
)

val_datagen = ImageDataGenerator(rescale=1/255.)

ds_train = train_datagen.flow_from_dataframe(train_df,
                                             #directory=train_path, #dataframe contains the full paths
                                             x_col = 'image',
                                             y_col = 'class',
                                             target_size = (IMG_SIZE, IMG_SIZE),
                                             class_mode = 'binary',
                                             batch_size = BATCH,
                                             seed = SEED)

ds_val = val_datagen.flow_from_dataframe(val_df,
                                            #directory=train_path,
                                            x_col = 'image',
                                            y_col = 'class',
                                            target_size = (IMG_SIZE, IMG_SIZE),
                                            class_mode = 'binary',
                                            batch_size = BATCH,
                                            seed = SEED)

ds_test = val_datagen.flow_from_dataframe(df_test,
                                            #directory=test_path,
                                            x_col = 'image',
                                            y_col = 'class',
                                            target_size = (IMG_SIZE, IMG_SIZE),
                                            class_mode = 'binary',
                                            batch_size = 1,
                                            shuffle = False)

Found 4196 validated image filenames belonging to 2 classes.
Found 1050 validated image filenames belonging to 2 classes.
Found 651 validated image filenames belonging to 2 classes.


## Transfer Learning
### EfficientNetB3

In [None]:
 #EfficientNetB3 or EfficientNetB4, which often generalize better than ResNet50V2; 
 # It scales width, depth, and resolution together, making it more parameter-efficient.
from tensorflow.keras.regularizers import l2

base_model = tf.keras.applications.EfficientNetB3(
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False)

base_model.trainable = False

def get_pretrained():

    #Input shape = [width, height, color channels]
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    x = base_model(inputs)

    # Head
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu', kernel_regularizer=l2(0.001))(x) # Added L2 regularization
    x = layers.Dropout(0.3)(x)  # Increased dropout from 0.1 to 0.3

    #Final Layer (Output)
    output = layers.Dense(1, activation='sigmoid')(x)

    model = keras.Model(inputs=[inputs], outputs=output)

    return model

In [None]:
keras.backend.clear_session()

model_pretrained = get_pretrained()
model_pretrained.compile(loss='binary_crossentropy'
                       , optimizer = keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.CosineDecayRestarts(
                                                                                                          1e-4,
                                                                                                          first_decay_steps=10,
                                                                                                          t_mul=2.0,
                                                                                                          m_mul=0.8,
                                                                                                          alpha=1e-6
                                                                                                      ))
                       , metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
# Define early stopping callback
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) #Increase patience from 5 to 10
# Stop training when validation loss stops improving for 3 epochs

# Define learning rate reduction callback
plateau = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, verbose=1, min_lr=1e-7)

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=len(train_df)//BATCH,
          validation_steps=len(val_df)//BATCH);

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 361ms/step - binary_accuracy: 0.7268 - loss: 0.8158 - val_binary_accuracy: 0.7433 - val_loss: 0.7562 - learning_rate: 8.4422e-06
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 54ms/step - binary_accuracy: 0.5000 - loss: 0.9692 - val_binary_accuracy: 0.7442 - val_loss: 0.7551 - learning_rate: 8.1193e-06
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 544ms/step - binary_accuracy: 0.7402 - loss: 0.7602 - val_binary_accuracy: 0.7462 - val_loss: 0.7257 - learning_rate: 7.9610e-06
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 77ms/step - binary_accuracy: 0.4375 - loss: 1.0858 - val_binary_accuracy: 0.7423 - val_loss: 0.7296 - learning_rate: 7.8235e-06
Epoch 5/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 392ms/step - binary_accuracy: 0.7430 - loss: 0.7330 - val_binary_accuracy: 0.7433 - val_loss: 0.70

In [None]:
score = model_pretrained.evaluate(ds_val, steps = len(val_df)//BATCH, verbose = 0)
print('train loss:', score[0])
print('train accuracy:', score[1])

train loss: 0.7080068588256836
train accuracy: 0.7442307472229004


In [None]:
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.8772689700126648
Test accuracy: 0.599078357219696


### EfficientNetB3 Fine Tuning

Balance the data

In [None]:
df_train['class'].value_counts()

Unnamed: 0_level_0,count
class,Unnamed: 1_level_1
PNEUMONIA,3897
Normal,1349


In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight('balanced', classes=np.unique(df_train['class']), y=df_train['class'])
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

Instead of unfreezing 25 layers at once, fine-tune incrementally

#### Step 1: Unfreeze last 10 layers and train for 5 epochs

In [None]:
base_model.trainable = True

# Step 1: Unfreeze last 10 layers and train for 5 epochs
for layer in base_model.layers[-10:]:  # Instead of -13
    layer.trainable = False

In [None]:
model_pretrained.compile(loss='binary_crossentropy', optimizer = keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.CosineDecayRestarts(
                                                                                                          1e-4,
                                                                                                          first_decay_steps=10,
                                                                                                          t_mul=2.0,
                                                                                                          m_mul=0.8,
                                                                                                          alpha=1e-6
                                                                                                      ))
                                                  , metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          class_weight=class_weight_dict,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=(len(train_df)//BATCH),
          validation_steps=(len(val_df)//BATCH));

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 465ms/step - binary_accuracy: 0.9283 - loss: 0.3563 - val_binary_accuracy: 0.5250 - val_loss: 1.0501 - learning_rate: 6.9286e-06
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 75ms/step - binary_accuracy: 0.7500 - loss: 0.4105 - val_binary_accuracy: 0.5952 - val_loss: 0.9400 - learning_rate: 6.8719e-06
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 458ms/step - binary_accuracy: 0.9242 - loss: 0.3515 - val_binary_accuracy: 0.5375 - val_loss: 0.8265 - learning_rate: 2.0913e-05
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 77ms/step - binary_accuracy: 0.9375 - loss: 0.3506 - val_binary_accuracy: 0.5317 - val_loss: 0.8299 - learning_rate: 2.0911e-05
Epoch 5/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 462ms/step - binary_accuracy: 0.9268 - loss: 0.3419 - val_binary_accuracy: 0.9038 - val_loss: 0.42

  #### Step 2: Unfreeze last 30 layers and train again

In [None]:
  # Step 2: Unfreeze last 30 layers and train again
for layer in base_model.layers[-30:]:
    layer.trainable = True

In [None]:
model_pretrained.compile(loss='binary_crossentropy', optimizer = keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.CosineDecayRestarts(
                                                                                                          1e-4,
                                                                                                          first_decay_steps=10,
                                                                                                          t_mul=2.0,
                                                                                                          m_mul=0.8,
                                                                                                          alpha=1e-6
                                                                                                      ))
                                                  , metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          class_weight=class_weight_dict,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=(len(train_df)//BATCH),
          validation_steps=(len(val_df)//BATCH));

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 475ms/step - binary_accuracy: 0.9113 - loss: 0.3502 - val_binary_accuracy: 0.5913 - val_loss: 0.8373 - learning_rate: 8.4422e-06
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 55ms/step - binary_accuracy: 0.9375 - loss: 0.2943 - val_binary_accuracy: 0.6202 - val_loss: 0.8069 - learning_rate: 8.1193e-06
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 465ms/step - binary_accuracy: 0.9427 - loss: 0.3143 - val_binary_accuracy: 0.8817 - val_loss: 0.4381 - learning_rate: 7.9610e-06
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 54ms/step - binary_accuracy: 0.9375 - loss: 0.2919 - val_binary_accuracy: 0.8760 - val_loss: 0.4434 - learning_rate: 7.8235e-06
Epoch 5/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 462ms/step - binary_accuracy: 0.9304 - loss: 0.3211 - val_binary_accuracy: 0.8990 - val_loss: 0.39

In [None]:
score = model_pretrained.evaluate(ds_val, steps = len(val_df)//BATCH, verbose = 0)
print('train loss:', score[0])
print('train accuracy:', score[1])

train loss: 0.4012756049633026
train accuracy: 0.8971154093742371


In [None]:
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.3469134569168091
Test accuracy: 0.9231950640678406


In [None]:
ds_test.reset()
predictions = model_pretrained.predict(ds_test, steps=len(ds_test), verbose=0)
pred_labels= np.where(predictions>0.5, 1, 0)

### EfficientNetB3 Metrics

In [None]:
from sklearn.metrics import classification_report, roc_auc_score,roc_curve,confusion_matrix
true_labels = ds_test.classes  # Extract true labels
cm_df = pd.DataFrame(confusion_matrix(true_labels,pred_labels))
cm_df.columns = ['Predicted 0','Predicted 1']
cm_df = cm_df.rename(index={0: 'Actual 0',1:'Actual 1'})
cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,249,12
Actual 1,38,352


In [None]:
roc_auc=roc_auc_score(true_labels, pred_labels)
print('ROC AUC=%.3f' % roc_auc)

ROC AUC=0.928


#### Model Improving 2 to increase accuracy

In [None]:
IMG_SIZE = 128
BATCH = 16
SEED = 42

In [None]:
train_df, val_df = train_test_split(df_train, test_size = 0.20, random_state = SEED, stratify = df_train['class'])

In [None]:
train_datagen = ImageDataGenerator( rescale=1/255.,
                                    zoom_range=0.2,
                                    rotation_range=30,  # Increased
                                    width_shift_range=0.2,
                                    height_shift_range=0.2,
                                    horizontal_flip=True,
                                    shear_range=0.2,  # New
                                    brightness_range=[0.8, 1.2]
)

val_datagen = ImageDataGenerator(rescale=1/255.)

ds_train = train_datagen.flow_from_dataframe(train_df,
                                             #directory=train_path, #dataframe contains the full paths
                                             x_col = 'image',
                                             y_col = 'class',
                                             target_size = (IMG_SIZE, IMG_SIZE),
                                             class_mode = 'binary',
                                             batch_size = BATCH,
                                             seed = SEED)

ds_val = val_datagen.flow_from_dataframe(val_df,
                                            #directory=train_path,
                                            x_col = 'image',
                                            y_col = 'class',
                                            target_size = (IMG_SIZE, IMG_SIZE),
                                            class_mode = 'binary',
                                            batch_size = BATCH,
                                            seed = SEED)

ds_test = val_datagen.flow_from_dataframe(df_test,
                                            #directory=test_path,
                                            x_col = 'image',
                                            y_col = 'class',
                                            target_size = (IMG_SIZE, IMG_SIZE),
                                            class_mode = 'binary',
                                            batch_size = 1,
                                            shuffle = False)

Found 4196 validated image filenames belonging to 2 classes.
Found 1050 validated image filenames belonging to 2 classes.
Found 651 validated image filenames belonging to 2 classes.


**Transfer Learning**

Back to ResNet152V2 but optimize fine-tuning

In [None]:
from tensorflow.keras.regularizers import l2

base_model = tf.keras.applications.ResNet152V2(
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False)

base_model.trainable = False

def get_pretrained():

    #Input shape = [width, height, color channels]
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    x = base_model(inputs)

    # Head
    #increase neurons in the dense layer and lower dropout slightly
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=l2(0.001))(x) # Increased from 128 to 256
    x = layers.Dropout(0.2)(x)  # Lower dropout

    #Final Layer (Output)
    output = layers.Dense(1, activation='sigmoid')(x)

    model = keras.Model(inputs=[inputs], outputs=output)

    return model

In [None]:
keras.backend.clear_session()

model_pretrained = get_pretrained()
model_pretrained.compile(loss='binary_crossentropy'
                       , optimizer = keras.optimizers.Adam(learning_rate=2e-6)
                       , metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
# Define early stopping callback
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) #Increase patience from 5 to 10
# Stop training when validation loss stops improving for 3 epochs

# Define learning rate reduction callback
plateau = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, verbose=1, min_lr=1e-7)

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=len(train_df)//BATCH,
          validation_steps=len(val_df)//BATCH);

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1621s[0m 6s/step - binary_accuracy: 0.7036 - loss: 1.1140 - val_binary_accuracy: 0.8173 - val_loss: 0.8585 - learning_rate: 2.0000e-06
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 156ms/step - binary_accuracy: 0.6250 - loss: 1.0203 - val_binary_accuracy: 0.8173 - val_loss: 0.8616 - learning_rate: 2.0000e-06
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 414ms/step - binary_accuracy: 0.7570 - loss: 0.9847 - val_binary_accuracy: 0.8625 - val_loss: 0.8039 - learning_rate: 2.0000e-06
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 75ms/step - binary_accuracy: 0.7500 - loss: 0.9619 - val_binary_accuracy: 0.8654 - val_loss: 0.8013 - learning_rate: 2.0000e-06
Epoch 5/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 489ms/step - binary_accuracy: 0.7785 - loss: 0.9306 - val_binary_accuracy: 0.8712 - val_loss: 0.766

In [None]:
score = model_pretrained.evaluate(ds_val, steps = len(val_df)//BATCH, verbose = 0)
print('train loss:', score[0])
print('train accuracy:', score[1])

train loss: 0.7668096423149109
train accuracy: 0.870192289352417


In [None]:
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.9669387936592102
Test accuracy: 0.7342550158500671


**Fine Tuning**

Balance the data

In [None]:
df_train['class'].value_counts()

Unnamed: 0_level_0,count
class,Unnamed: 1_level_1
PNEUMONIA,3897
Normal,1349


In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight('balanced', classes=np.unique(df_train['class']), y=df_train['class'])
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

Instead of unfreezing 25 layers at once, fine-tune incrementally

In [None]:
base_model.trainable = True

# Step 1: Unfreeze last 10 layers and train for 5 epochs
for layer in base_model.layers[-20:]:  # Instead of -10
    layer.trainable = False

In [None]:
model_pretrained.compile(loss='binary_crossentropy', optimizer = keras.optimizers.Adam(learning_rate=2e-6), metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          class_weight=class_weight_dict,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=(len(train_df)//BATCH),
          validation_steps=(len(val_df)//BATCH));

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 613ms/step - binary_accuracy: 0.6504 - loss: 1.1558 - val_binary_accuracy: 0.8385 - val_loss: 0.9237 - learning_rate: 2.0000e-06
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 70ms/step - binary_accuracy: 0.8750 - loss: 0.8488 - val_binary_accuracy: 0.8413 - val_loss: 0.9228 - learning_rate: 2.0000e-06
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 587ms/step - binary_accuracy: 0.8051 - loss: 0.9093 - val_binary_accuracy: 0.8933 - val_loss: 0.8062 - learning_rate: 2.0000e-06
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 62ms/step - binary_accuracy: 0.8750 - loss: 0.7959 - val_binary_accuracy: 0.8913 - val_loss: 0.8088 - learning_rate: 2.0000e-06
Epoch 5/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 775ms/step - binary_accuracy: 0.8696 - loss: 0.7798 - val_binary_accuracy: 0.9202 - val_loss: 0.70

In [None]:
# Step 2: Unfreeze last 50 layers and train again
for layer in base_model.layers[-50:]:
    layer.trainable = True

In [None]:
model_pretrained.compile(loss='binary_crossentropy', optimizer = keras.optimizers.Adam(learning_rate=2e-6), metrics=['binary_accuracy'])

model_pretrained.summary()

In [None]:
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 5,
          validation_data=ds_val,
          class_weight=class_weight_dict,
          callbacks=[early_stopping, plateau],
          steps_per_epoch=(len(train_df)//BATCH),
          validation_steps=(len(val_df)//BATCH));

Epoch 1/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 614ms/step - binary_accuracy: 0.8709 - loss: 0.8324 - val_binary_accuracy: 0.8923 - val_loss: 0.7621 - learning_rate: 2.0000e-06
Epoch 2/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 72ms/step - binary_accuracy: 0.9375 - loss: 0.8293 - val_binary_accuracy: 0.8923 - val_loss: 0.7616 - learning_rate: 2.0000e-06
Epoch 3/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 598ms/step - binary_accuracy: 0.8768 - loss: 0.7762 - val_binary_accuracy: 0.8913 - val_loss: 0.7598 - learning_rate: 2.0000e-06
Epoch 4/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 66ms/step - binary_accuracy: 0.8125 - loss: 1.0672 - val_binary_accuracy: 0.8904 - val_loss: 0.7609 - learning_rate: 2.0000e-06
Epoch 5/5
[1m262/262[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 770ms/step - binary_accuracy: 0.8941 - loss: 0.7116 - val_binary_accuracy: 0.8923 - val_loss: 0.72

### Improved ResNet152V2 Metrics

In [None]:
score = model_pretrained.evaluate(ds_val, steps = len(val_df)//BATCH, verbose = 0)
print('train loss:', score[0])
print('train accuracy:', score[1])

train loss: 0.7267483472824097
train accuracy: 0.8913461565971375


In [None]:
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.6729220151901245
Test accuracy: 0.9155145883560181


In [None]:
ds_test.reset()
predictions = model_pretrained.predict(ds_test, steps=len(ds_test), verbose=0)
pred_labels= np.where(predictions>0.5, 1, 0)

In [None]:
from sklearn.metrics import classification_report, roc_auc_score,roc_curve,confusion_matrix
true_labels = ds_test.classes  # Extract true labels
cm_df = pd.DataFrame(confusion_matrix(true_labels,pred_labels))
cm_df.columns = ['Predicted 0','Predicted 1']
cm_df = cm_df.rename(index={0: 'Actual 0',1:'Actual 1'})
cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,238,23
Actual 1,32,358


In [None]:
roc_auc=roc_auc_score(true_labels, pred_labels)
print('ROC AUC=%.3f' % roc_auc)

ROC AUC=0.915
