<a href="https://colab.research.google.com/github/asheta66/CNN/blob/main/Chest%20X_Ray/Chest_XRay_TL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chest X-Ray Classification using Transfer Learning

Cleaned & Fixed Pipeline

In [2]:
# ============================================
# COMPLETE CHEST X-RAY CLASSIFICATION PIPELINE
# MobileNetV2 - Train, Evaluate, Save Results
# ============================================

import os
import shutil
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import label_binarize
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# ----------------------------
# 0. Hyperparameters / Summary
# ----------------------------
IMG_SIZE = (224, 224)
BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 0.01
FINE_TUNE_LAYERS = 20
# ROTATION_RANGE = 15
# ZOOM_RANGE = 0.1
# WIDTH_SHIFT_RANGE = 0.1
# HEIGHT_SHIFT_RANGE = 0.1
# HORIZONTAL_FLIP = True
MAX_IMAGES_PER_CLASS = 500
TRAIN_TEST_SPLIT = 0.8

In [4]:
# ----------------------------
# 1. Mount Google Drive
# ----------------------------
from google.colab import drive
drive.mount('/content/drive')

# ----------------------------
# 2. Dataset Split
# ----------------------------
dataset_dir = '/content/drive/MyDrive/Chest X_Ray'
output_dir = '/content/Chest_XRay_split'
train_dir = os.path.join(output_dir, 'train')
test_dir = os.path.join(output_dir, 'test')

for folder in [train_dir, test_dir]:
    for cls in ['NORMAL', 'PNEUMONIA']:
        os.makedirs(os.path.join(folder, cls), exist_ok=True)

for cls in ['NORMAL', 'PNEUMONIA']:
    cls_dir = os.path.join(dataset_dir, cls)
    images = [img for img in os.listdir(cls_dir) if img.lower().endswith(('.png', '.jpg', '.jpeg'))][:MAX_IMAGES_PER_CLASS]
    train_imgs, test_imgs = train_test_split(images, test_size=0.2, random_state=42)
    for img in train_imgs:
        shutil.copy(os.path.join(cls_dir, img), os.path.join(train_dir, cls, img))
    for img in test_imgs:
        shutil.copy(os.path.join(cls_dir, img), os.path.join(test_dir, cls, img))

print("Dataset split completed successfully!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset split completed successfully!


In [15]:
# ============================
# Data Generators
# ============================

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    zoom_range=0.1,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=1,
    class_mode='categorical',
    shuffle=False
)


Found 800 images belonging to 2 classes.
Found 200 images belonging to 2 classes.


In [16]:
# ===== Model Builder =====
def build_model(base_model, lr=1e-4):
    base_model.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    outputs = Dense(NUM_CLASSES, activation='softmax')(x)
    model = Model(base_model.input, outputs)
    model.compile(
        optimizer=Adam(learning_rate=lr),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [17]:
# ===== Training =====
def train_model(model):
    return model.fit(
        train_generator,
        validation_data=test_generator,
        epochs=EPOCHS,
        verbose=1
    )

In [18]:
# ===== Evaluation =====
def evaluate_model(model, name):
    results = []
    for label, gen in [('Train', train_generator), ('Test', test_generator)]:
        gen.reset()
        y_true = gen.classes
        y_prob = model.predict(gen, steps=len(gen), verbose=0)
        y_pred = np.argmax(y_prob, axis=1)

        acc = accuracy_score(y_true, y_pred)
        prec = precision_score(y_true, y_pred, average='macro')
        rec = recall_score(y_true, y_pred, average='macro')
        f1 = f1_score(y_true, y_pred, average='macro')

        results.append([name, label, acc, prec, rec, f1])

        # Confusion Matrix
        cm = confusion_matrix(y_true, y_pred)
        plt.figure(figsize=(4,4))
        sns.heatmap(cm, annot=True, fmt='d', square=True, cmap='Blues')
        plt.title(f"{name} - {label} CM")
        plt.show()

        # ROC
        y_bin = label_binarize(y_true, classes=[0,1])
        fpr, tpr, _ = roc_curve(y_bin.ravel(), y_prob[:,1])
        plt.figure(figsize=(4,4))
        plt.plot(fpr, tpr, label=f"AUC={auc(fpr,tpr):.3f}")
        plt.plot([0,1],[0,1],'--')
        plt.grid(True); plt.legend()
        plt.title(f"{name} - {label} ROC")
        plt.show()

    return pd.DataFrame(results, columns=['Model','Dataset','Accuracy','Precision','Recall','F1'])

In [19]:
import pandas as pd
from tensorflow.keras.applications import VGG16, ResNet50, InceptionV3, MobileNetV2

# List to collect results (optional)
all_results = []

In [20]:
import os

NUM_CLASSES = len(os.listdir(train_dir))
print(f"Number of classes: {NUM_CLASSES}")

Number of classes: 2


In [21]:
from tensorflow.keras.applications import VGG16, ResNet50, InceptionV3, MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
import tensorflow as tf

In [None]:
print("\n===== Training ResNet50 =====")

base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(*IMG_SIZE, 3)
)

model_resnet = build_model(base_model)
history_resnet = train_model(model_resnet)

df_resnet = evaluate_model(
    model=model_resnet,
    history=history_resnet,
    train_dir=train_dir,
    test_dir=test_dir,
    IMG_SIZE=IMG_SIZE,
    model_name="ResNet50"
)

df_resnet
all_results.append(df_resnet)



===== Training ResNet50 =====


  self._warn_if_super_not_called()


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13s/step - accuracy: 0.5076 - loss: 0.8140 

In [None]:
print("\n===== Training InceptionV3 =====")

base_model = InceptionV3(
    weights='imagenet',
    include_top=False,
    input_shape=(*IMG_SIZE, 3)
)

model_inception = build_model(base_model)
history_inception = train_model(model_inception)

df_inception = evaluate_model(
    model=model_inception,
    history=history_inception,
    train_dir=train_dir,
    test_dir=test_dir,
    IMG_SIZE=IMG_SIZE,
    model_name="InceptionV3"
)

df_inception
all_results.append(df_inception)


In [12]:
print("\n===== Training MobileNetV2 =====")

base_model = MobileNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=(*IMG_SIZE, 3)
)

model_mobilenet = build_model(base_model)
history_mobilenet = train_model(model_mobilenet)

df_mobilenet = evaluate_model(
    model=model_mobilenet,
    history=history_mobilenet,
    train_dir=train_dir,
    test_dir=test_dir,
    IMG_SIZE=IMG_SIZE,
    model_name="MobileNetV2"
)

df_mobilenet
all_results.append(df_mobilenet)



===== Training MobileNetV2 =====
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


NameError: name 'test_generator' is not defined

In [None]:
print("\n===== Training VGG16 =====")

base_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(*IMG_SIZE, 3)
)

model = build_model(base_model)
history = train_model(model)

df_vgg16 = evaluate_model(
    model=model,
    history=history,
    train_dir=train_dir,
    test_dir=test_dir,
    IMG_SIZE=IMG_SIZE,
    model_name='VGG16'
)

display(df_vgg16)
all_results.append(df_vgg16)


In [None]:
final_results = pd.concat(all_results, ignore_index=True)
final_results