In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

print("TensorFlow version:", tf.__version__)


TensorFlow version: 2.20.0


In [12]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models, optimizers

# Base paths (relative to notebook location in notebooks/)
BASE_DIR = os.path.abspath("..")  # D:\HealthAI-Project
DATA_DIR = os.path.join(BASE_DIR, "datasets", "chest_xray", "chest_xray")  # D:\HealthAI-Project\datasets\chest_xray\chest_xray

TRAIN_DIR = os.path.join(DATA_DIR, "train")
VAL_DIR   = os.path.join(DATA_DIR, "test")

IMG_SIZE = (224, 224)
BATCH_SIZE = 32

print("BASE_DIR:", BASE_DIR)
print("DATA_DIR:", DATA_DIR)
print("TRAIN_DIR:", TRAIN_DIR)
print("VAL_DIR:", VAL_DIR)


BASE_DIR: d:\HealthAI-Project
DATA_DIR: d:\HealthAI-Project\datasets\chest_xray\chest_xray
TRAIN_DIR: d:\HealthAI-Project\datasets\chest_xray\chest_xray\train
VAL_DIR: d:\HealthAI-Project\datasets\chest_xray\chest_xray\test


Set Up ImageDataGenerator

In [13]:
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=10,
    width_shift_range=0.05,
    height_shift_range=0.05,
    zoom_range=0.1,
    horizontal_flip=True,
)

val_datagen = ImageDataGenerator(
    rescale=1.0/255.0
)

train_gen = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

val_gen = val_datagen.flow_from_directory(
    VAL_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)


Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


Model Skeleton (Transfer Learning)

In [14]:
base_model = MobileNetV2(
    input_shape=IMG_SIZE + (3,),
    include_top=False,
    weights='imagenet'
)

base_model.trainable = False  # we will fine-tune later

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')  # binary: NORMAL vs PNEUMONIA
])

model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-3),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()


Train the Pneumonia Model (Light Training Pass)

In [15]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import os

EPOCHS = 5  # start with 5

checkpoint_path = os.path.join(BASE_DIR, "models", "xray_disease_model.h5")
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

checkpoint = ModelCheckpoint(
    checkpoint_path,
    monitor='val_loss',
    save_best_only=True
)

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    callbacks=[early_stop, checkpoint]
)


Epoch 1/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.8086 - loss: 0.4023



[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 1s/step - accuracy: 0.8779 - loss: 0.2833 - val_accuracy: 0.8237 - val_loss: 0.3620
Epoch 2/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9318 - loss: 0.1737



[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 1s/step - accuracy: 0.9350 - loss: 0.1672 - val_accuracy: 0.8782 - val_loss: 0.2730
Epoch 3/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 1s/step - accuracy: 0.9436 - loss: 0.1446 - val_accuracy: 0.8702 - val_loss: 0.2925
Epoch 4/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 1s/step - accuracy: 0.9492 - loss: 0.1288 - val_accuracy: 0.8750 - val_loss: 0.2856


In [16]:
val_loss, val_acc = model.evaluate(val_gen)
print("Validation loss:", val_loss)
print("Validation accuracy:", val_acc)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 784ms/step - accuracy: 0.8782 - loss: 0.2730
Validation loss: 0.27300092577934265
Validation accuracy: 0.8782051205635071


Get class mapping (NORMAL vs PNEUMONIA)

In [17]:
# Check class indices used by the generator
class_indices = train_gen.class_indices
print(class_indices)

# Reverse mapping: 0 -> NORMAL, 1 -> PNEUMONIA (or as per your dataset)
idx_to_class = {v: k for k, v in class_indices.items()}
print(idx_to_class)


{'NORMAL': 0, 'PNEUMONIA': 1}
{0: 'NORMAL', 1: 'PNEUMONIA'}


In [22]:
import json
import os
from pathlib import Path

BASE_DIR = Path("..").resolve()  # D:\HealthAI-Project
MODELS_DIR = BASE_DIR / "models"
MODELS_DIR.mkdir(exist_ok=True)

mapping_path = MODELS_DIR / "xray_class_mapping.json"

# idx_to_class should look like {0: 'NORMAL', 1: 'PNEUMONIA'}
print("idx_to_class:", idx_to_class)

# Save as JSON (keys as strings)
with open(mapping_path, "w") as f:
    json.dump({str(k): v for k, v in idx_to_class.items()}, f, indent=2)

print("Saved class mapping to:", mapping_path)


idx_to_class: {0: 'NORMAL', 1: 'PNEUMONIA'}
Saved class mapping to: D:\HealthAI-Project\models\xray_class_mapping.json


Helper: predict on a single X-ray image

In [18]:
import tensorflow as tf
import numpy as np
import os

def predict_xray_image(img_path, model, idx_to_class, img_size=(224, 224)):
    # Load image
    img = tf.keras.utils.load_img(img_path, target_size=img_size, color_mode='rgb')
    img_array = tf.keras.utils.img_to_array(img)
    img_array = img_array / 255.0  # same normalization as training
    img_array = np.expand_dims(img_array, axis=0)  # (1, H, W, 3)

    # Predict
    prob = model.predict(img_array)[0][0]  # single scalar between 0 and 1

    # Threshold at 0.5
    predicted_class_index = 1 if prob >= 0.5 else 0
    predicted_label = idx_to_class[predicted_class_index]

    # For convenience, give pneumonia probability as "prob of class 1"
    pneumonia_prob = float(prob)
    
    return {
        "predicted_label": predicted_label,
        "pneumonia_probability": pneumonia_prob
    }


Test with a real image from your dataset

PNEUMONIA SET

In [19]:
test_example_path = os.path.join(
    VAL_DIR,  # this is pointing to .../chest_xray/test
    "PNEUMONIA",
    os.listdir(os.path.join(VAL_DIR, "PNEUMONIA"))[0]  # first pneumonia image
)

print("Sample test image:", test_example_path)

result = predict_xray_image(test_example_path, model, idx_to_class, IMG_SIZE)
print(result)


Sample test image: d:\HealthAI-Project\datasets\chest_xray\chest_xray\test\PNEUMONIA\person100_bacteria_475.jpeg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 964ms/step
{'predicted_label': 'PNEUMONIA', 'pneumonia_probability': 0.5602346658706665}


NORMAL SET

In [20]:
test_normal_path = os.path.join(
    VAL_DIR,
    "NORMAL",
    os.listdir(os.path.join(VAL_DIR, "NORMAL"))[0]
)

print("Sample NORMAL image:", test_normal_path)

result_normal = predict_xray_image(test_normal_path, model, idx_to_class, IMG_SIZE)
print(result_normal)


Sample NORMAL image: d:\HealthAI-Project\datasets\chest_xray\chest_xray\test\NORMAL\IM-0001-0001.jpeg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
{'predicted_label': 'NORMAL', 'pneumonia_probability': 0.06982818245887756}


SAVE THE MODEL

In [21]:
checkpoint_path = os.path.join(BASE_DIR, "models", "xray_disease_model.h5")
