# 1. Setup and Data Loading for Pneumonia Classifier

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import numpy as np
import os
import matplotlib.pyplot as plt
from google.colab import drive
from sklearn.utils import class_weight
import shutil

# Check TensorFlow and GPU
print(f"TensorFlow Version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

# --- Step 0: Mount ny any eGoogle Drive and Set Paths ---
print("\nMounting Google Drive...")
drive.mount('/content/drive')
print("Google Drive mounted successfully!")

# Define the base directory for your dataset in Google Drive.
# IMPORTANT: CHANGE THIS PATH!
GOOGLE_DRIVE_DATASET_ROOT = '/content/drive/MyDrive/Dataset/chest_xray'
BASE_DIR = GOOGLE_DRIVE_DATASET_ROOT

# Define directories for training, validation, and testing
train_dir = os.path.join(BASE_DIR, 'train')
val_dir = os.path.join(BASE_DIR, 'val')
test_dir = os.path.join(BASE_DIR, 'test')

# Define directories to save models and visualizations
MODEL_SAVE_DIR = '/content/drive/MyDrive/pneumonia_project_models'
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
CLASSIFIER_PATH = '/content/drive/MyDrive/my_model_checkpoints/pneumonia_resnet_best_model.h5'
VISUALIZATIONS_DIR = '/content/drive/MyDrive/pneumonia_project_visualizations'
os.makedirs(VISUALIZATIONS_DIR, exist_ok=True)

# Image parameters
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32
NUM_CLASSES = 2

# --- Step 1: Prepare Data Loaders ---
print("\n--- Preparing Data Loaders ---")
train_datagen = ImageDataGenerator(
    rescale=1./255, rotation_range=10, width_shift_range=0.1,
    height_shift_range=0.1, zoom_range=0.1, horizontal_flip=True, fill_mode='nearest'
)
val_test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir, target_size=(IMG_HEIGHT, IMG_WIDTH), batch_size=BATCH_SIZE, class_mode='binary', shuffle=True
)
validation_generator = val_test_datagen.flow_from_directory(
    val_dir, target_size=(IMG_HEIGHT, IMG_WIDTH), batch_size=BATCH_SIZE, class_mode='binary', shuffle=False
)
test_generator = val_test_datagen.flow_from_directory(
    test_dir, target_size=(IMG_HEIGHT, IMG_WIDTH), batch_size=BATCH_SIZE, class_mode='binary', shuffle=False
)

print("\nClass Indices Mapping:", train_generator.class_indices)

# --- Step 1.5: Address Class Imbalance with Class Weights ---
print("\n--- Calculating Class Weights for Imbalanced Data ---")
y_train = train_generator.classes
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = dict(zip(np.unique(y_train), class_weights))
print("Class Weights:", class_weights_dict)

TensorFlow Version: 2.19.0
GPU Available: []

Mounting Google Drive...
Mounted at /content/drive
Google Drive mounted successfully!

--- Preparing Data Loaders ---
Found 5277 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.

Class Indices Mapping: {'NORMAL': 0, 'PNEUMONIA': 1}

--- Calculating Class Weights for Imbalanced Data ---
Class Weights: {np.int32(0): np.float64(1.9529977794226498), np.int32(1): np.float64(0.6720580743759552)}


# 2. Build and/or Train the Classifier Model

In [None]:
# --- Step 2: Build or Load the Model ---
print("\n--- Building or Loading the Model ---")

checkpoint_filepath = '/content/drive/MyDrive/my_model_checkpoints/pneumonia_resnet_best_model.h5'

# Check if a previously saved model exists in Google Drive
if os.path.exists(checkpoint_filepath):
    print(f"Loading previously best saved model from {checkpoint_filepath} to resume training...")
    model = tf.keras.models.load_model(checkpoint_filepath)

    if not model.layers[0].trainable: # Assuming the first layer is the base model
      print("Model is in the frozen base training phase.")
    else:
      print("Model is in the fine-tuning phase.")
      # It's good practice to re-compile with the low LR if resuming fine-tuning
      model.compile(optimizer=Adam(learning_rate=0.000001),
                    loss='binary_crossentropy',
                    metrics=['accuracy',
                             tf.keras.metrics.Precision(),
                             tf.keras.metrics.Recall(),
                             tf.keras.metrics.AUC()])

else:
    print("No previously saved model found. Building a new model from scratch...")

    # Load ResNet50 pre-trained on ImageNet, excluding the top classification layer
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
    base_model.trainable = False # Start with the base frozen

    # Add custom classification layers on top
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    # Compile for the initial frozen base training phase
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy',
                           tf.keras.metrics.Precision(),
                           tf.keras.metrics.Recall(),
                           tf.keras.metrics.AUC()])

print("\nModel Summary:")
model.summary()

# --- Step 3: Define Callbacks ---
print("\n--- Defining Callbacks ---")
# Save the best model to Google Drive
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False, # Save the entire model for easy resuming
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1
)
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)
reduce_lr_on_plateau_callback = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=3,
    min_lr=0.00001,
    verbose=1
)
callbacks = [model_checkpoint_callback, early_stopping_callback, reduce_lr_on_plateau_callback]

# --- Step 4: Build, Compile, and Train the Model (Combined Approach) ---
print("\n--- Building and Training the Model (Combined Approach) ---")

# Check if a previously saved model exists in Google Drive
if os.path.exists(checkpoint_filepath):
    print(f"Loading previously best saved model from {checkpoint_filepath} to resume training...")
    model = tf.keras.models.load_model(checkpoint_filepath)
else:
    print("No previously saved model found. Building a new model from scratch...")

    # Load ResNet50 pre-trained on ImageNet, excluding the top classification layer
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

    # Enable selective fine-tuning from the beginning
    base_model.trainable = True
    for layer in base_model.layers[:-50]:  # Unfreeze only the last 50 layers
        layer.trainable = False

    # Add custom classification layers on top
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

# Recompile the model with a very low learning rate for this combined fine-tuning
model.compile(optimizer=Adam(learning_rate=0.000001),
              loss='binary_crossentropy',
              metrics=['accuracy',
                       tf.keras.metrics.Precision(),
                       tf.keras.metrics.Recall(),
                       tf.keras.metrics.AUC()])

print("\nModel Summary (Combined Training):")
model.summary()

print("\n--- Starting Combined Training ---")

steps_per_epoch_train = train_generator.samples // BATCH_SIZE

history_combined = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch_train,
    epochs=30,  # Increased epochs for a single training run
    validation_data=validation_generator,
    callbacks=callbacks,
    class_weight=class_weights_dict
)
print("\nCombined training complete. Best model saved to Google Drive.")

# 3. Evaluate the Classifier Model

In [5]:
# --- Final Evaluation ---

checkpoint_filepath = '/content/drive/MyDrive/my_model_checkpoints/pneumonia_resnet_best_model.h5'
model = tf.keras.models.load_model(checkpoint_filepath)

print("\n--- Final Evaluation on Test Set ---")
model.load_weights(checkpoint_filepath)
test_loss, test_acc, test_precision, test_recall, test_auc = model.evaluate(test_generator)

print("\n--- Test Set Results ---")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")
print(f"Test AUC: {test_auc:.4f}")




--- Final Evaluation on Test Set ---


  self._warn_if_super_not_called()


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 9s/step - accuracy: 0.7753 - auc_1: 0.6196 - loss: 0.4785 - precision_1: 0.4512 - recall_1: 0.6200

--- Test Set Results ---
Test Loss: 0.3447
Test Accuracy: 0.8446
Test Precision: 0.8337
Test Recall: 0.9385
Test AUC: 0.9302
