# Chest Pneumonia Classification with TensorFlow/Keras

This notebook reproduces the work from **bhavya1600**. It builds a convolutional neural network with TensorFlow, applies extensive image augmentation, and evaluates the trained model on validation and test splits.


## 1. Imports and environment checks


In [None]:
import os
from pathlib import Path
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np

print(f"TensorFlow version: {tf.__version__}")
print(f"Using GPU: {tf.config.list_physical_devices('GPU')}")


## 2. Dataset paths

Confirm that the dataset follows the expected directory layout.


In [None]:
data_root = Path('..') / 'data' / 'chest_xray'
train_dir = data_root / 'train'
val_dir = data_root / 'val'
test_dir = data_root / 'test'

for split in (train_dir, val_dir, test_dir):
    for label in ('NORMAL', 'PNEUMONIA'):
        path = split / label
        if not path.exists():
            raise FileNotFoundError(f'Missing expected folder: {path}')

print('All dataset folders are present.')


## 3. Data generators

Set up training and validation image data generators with augmentations matching the original project.


In [None]:
image_size = (224, 224)
batch_size = 32

train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

test_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)


## 4. Visualise augmented samples


In [None]:
images, labels = next(train_generator)
plt.figure(figsize=(8, 8))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    plt.imshow(images[i])
    plt.title('PNEUMONIA' if labels[i] else 'NORMAL')
    plt.axis('off')
plt.tight_layout()
plt.show()


## 5. Model architecture

Define the CNN that bhavya1600 used, with batch normalisation and dropout for regularisation.


In [None]:
def build_model(input_shape):
    inputs = keras.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, activation='relu', padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Conv2D(256, 3, activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)

    x = layers.Flatten()(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)

    outputs = layers.Dense(1, activation='sigmoid')(x)
    return keras.Model(inputs, outputs, name='bhavya1600_cnn')

model = build_model(input_shape=image_size + (3,))
model.summary()


## 6. Compile the model


In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)


## 7. Callbacks


In [None]:
checkpoint_path = 'bhavya1600_cnn.h5'
callbacks = [
    keras.callbacks.ModelCheckpoint(checkpoint_path, monitor='val_accuracy', save_best_only=True),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1)
]


## 8. Training


In [None]:
epochs = 25
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=epochs,
    callbacks=callbacks
)


## 9. Plot training curves


In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.legend()
plt.title('Accuracy over epochs')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title('Loss over epochs')
plt.show()


## 10. Evaluation on validation and test sets


In [None]:
val_loss, val_acc = model.evaluate(val_generator)
print(f'Validation accuracy: {val_acc:.4f}')

test_loss, test_acc = model.evaluate(test_generator)
print(f'Test accuracy: {test_acc:.4f}')


## 11. Classification metrics


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

pred_probs = model.predict(test_generator)
preds = (pred_probs > 0.5).astype(int)
report = classification_report(test_generator.classes, preds, target_names=['NORMAL', 'PNEUMONIA'])
print(report)

cm = confusion_matrix(test_generator.classes, preds)
print('Confusion matrix:
', cm)


## 12. Load best weights and save


In [None]:
model.load_weights(checkpoint_path)
model.save('bhavya1600_cnn_saved_model')
model.save('bhavya1600_cnn.h5')
print('Saved trained model and weights.')
