# CHEST X-RAY Pneumonia Classification with EfficientNet

This notebook captures the **nikoneri** TensorFlow implementation that leverages EfficientNetB3 with staged fine-tuning, cosine decay learning rate schedules, and rich evaluation on the held-out test set.


## 1. Setup


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pathlib import Path
import math
import matplotlib.pyplot as plt
import numpy as np

print(f"TensorFlow version: {tf.__version__}")
print('GPUs:', tf.config.list_physical_devices('GPU'))


## 2. Data pipeline

Use `tf.data` pipelines with caching and prefetching to stream the dataset efficiently.


In [None]:
data_root = Path('..') / 'data' / 'chest_xray'
train_dir = data_root / 'train'
val_dir = data_root / 'val'
test_dir = data_root / 'test'

for split in (train_dir, val_dir, test_dir):
    for label in ('NORMAL', 'PNEUMONIA'):
        path = split / label
        if not path.exists():
            raise FileNotFoundError(f'Missing expected folder: {path}')

print('Dataset directories verified.')


In [None]:
IMG_SIZE = 300
BATCH_SIZE = 16
SEED = 123

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    labels='inferred',
    label_mode='binary',
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    labels='inferred',
    label_mode='binary',
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    labels='inferred',
    label_mode='binary',
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False
)

class_names = train_ds.class_names
print('Classes:', class_names)


In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)


## 3. Data augmentation


In [None]:
data_augmentation = keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.2),
    layers.RandomTranslation(0.1, 0.1)
])


## 4. Model definition

Initialise EfficientNetB3 with ImageNet weights and append a custom classification head.


In [None]:
base_model = keras.applications.EfficientNetB3(
    include_top=False,
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)
base_model.trainable = False

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = data_augmentation(inputs)
x = keras.applications.efficientnet.preprocess_input(x)
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs, outputs, name='nikoneri_efficientnet')
model.summary()


## 5. Compile and initial training


In [None]:
steps_per_epoch = math.ceil(train_ds.cardinality().numpy())
initial_epochs = 10
learning_rate = tf.keras.optimizers.schedules.CosineDecay(1e-3, decay_steps=steps_per_epoch * initial_epochs)

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history_frozen = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=initial_epochs
)


## 6. Fine-tuning


In [None]:
base_model.trainable = True
for layer in base_model.layers[:300]:
    layer.trainable = False

fine_tune_epochs = 15
total_epochs = initial_epochs + fine_tune_epochs
fine_lr = tf.keras.optimizers.schedules.CosineDecay(1e-4, decay_steps=steps_per_epoch * fine_tune_epochs)

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=fine_lr),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history_fine = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=total_epochs,
    initial_epoch=history_frozen.epoch[-1] + 1
)


## 7. Learning curves


In [None]:
def plot_history(histories, metric='accuracy'):
    plt.figure(figsize=(12, 5))
    for label, hist in histories.items():
        plt.plot(hist.history[metric], label=f'{label} {metric}')
        plt.plot(hist.history[f'val_{metric}'], label=f'{label} val_{metric}')
    plt.legend()
    plt.title(f'{metric} over epochs')
    plt.show()

plot_history({'frozen': history_frozen, 'fine': history_fine}, metric='accuracy')
plot_history({'frozen': history_frozen, 'fine': history_fine}, metric='loss')


## 8. Evaluation


In [None]:
val_loss, val_acc = model.evaluate(val_ds)
print(f'Validation accuracy: {val_acc:.4f}')

test_loss, test_acc = model.evaluate(test_ds)
print(f'Test accuracy: {test_acc:.4f}')


## 9. Detailed metrics


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

probabilities = model.predict(test_ds)
predictions = (probabilities > 0.5).astype(int)
true_labels = np.concatenate([y.numpy() for _, y in test_ds.unbatch().batch(1)])

report = classification_report(true_labels, predictions, target_names=class_names)
print(report)

cm = confusion_matrix(true_labels, predictions)
print('Confusion matrix:
', cm)


## 10. Save model


In [None]:
model.save('nikoneri_efficientnet_saved_model')
model.save('nikoneri_efficientnet.h5')
print('Saved EfficientNet model.')
