In [20]:
import os
import random
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# -----------------------------
# CONFIG
# -----------------------------
DATASET_DIR = "/kaggle/input/chest-xray-pneumonia/chest_xray"
IMG_SIZE = 224  # MobileNetV3 input size
BATCH_SIZE = 16
EPOCHS = 10
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# -----------------------------
# DATA PREPARATION
# -----------------------------
def collect_image_paths(dataset_root):
    import glob
    train_path = os.path.join(dataset_root, "train")
    test_path  = os.path.join(dataset_root, "test")

    def make_df(path):
        normal = glob.glob(os.path.join(path, "NORMAL", "*"))
        pneumonia = glob.glob(os.path.join(path, "PNEUMONIA", "*"))
        return pd.DataFrame({
            "image": normal + pneumonia,
            "class": ["Normal"] * len(normal) + ["Pneumonia"] * len(pneumonia)
        })

    return make_df(train_path), make_df(test_path)

def build_generator(df, img_size, batch_size, shuffle=True, augment=False):
    datagen = ImageDataGenerator(
        rescale=1./255,
        zoom_range=0.1 if augment else 0.0,
        width_shift_range=0.1 if augment else 0.0,
        height_shift_range=0.1 if augment else 0.0,
        horizontal_flip=augment
    )
    return datagen.flow_from_dataframe(
        df, x_col="image", y_col="class",
        target_size=(img_size, img_size),
        class_mode="binary",
        batch_size=batch_size, shuffle=shuffle
    )

# -----------------------------
# LOAD DATA
# -----------------------------
df_train_all, df_test = collect_image_paths(DATASET_DIR)
train_df, val_df = train_test_split(df_train_all, test_size=0.2,
                                    stratify=df_train_all["class"], random_state=SEED)

train_gen = build_generator(train_df, IMG_SIZE, BATCH_SIZE, augment=True)
val_gen   = build_generator(val_df, IMG_SIZE, BATCH_SIZE)
test_gen  = build_generator(df_test, IMG_SIZE, 1, shuffle=False)


Found 4172 validated image filenames belonging to 2 classes.
Found 1044 validated image filenames belonging to 2 classes.
Found 624 validated image filenames belonging to 2 classes.


In [None]:
print("Train:", df_train_all['class'].value_counts())
print("Val:", val_df['class'].value_counts())
print("Test:", df_test['class'].value_counts())


In [23]:
print("Train:", len(train_df), "Val:", len(val_df), "Test:", len(df_test))
print("Example train labels:", train_df['class'].unique())
print("Class indices:", train_gen.class_indices)
print("Validation accuracy last epoch:", history.history['val_binary_accuracy'][-1])


Train: 4172 Val: 1044 Test: 624
Example train labels: ['Normal' 'Pneumonia']
Class indices: {'Normal': 0, 'Pneumonia': 1}
Validation accuracy last epoch: 0.7432950139045715


In [24]:
# -----------------------------
# MODEL (MobileNetV3 Small)
# -----------------------------
base_model = tf.keras.applications.MobileNetV3Small(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet"
)

base_model.trainable = False  # freeze base layers

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs, outputs)
model.compile(optimizer=keras.optimizers.Adam(1e-4),
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])
model.summary()



In [None]:
# -----------------------------
# MODEL (MobileNetV3 Small)
# -----------------------------
base_model = tf.keras.applications.MobileNetV3Small(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights=None
)

base_model.trainable = False  # freeze base layers

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs, outputs)
model.compile(optimizer=keras.optimizers.Adam(1e-4),
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])
model.summary()



In [2]:
# -----------------------------
# TRAINING
# -----------------------------
es = callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
rlrop = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2)

history = model.fit(train_gen,
                    validation_data=val_gen,
                    epochs=EPOCHS,
                    callbacks=[es, rlrop],
                    verbose=1)

model.save("pneumonia_mobilenetv3.h5")
print("✅ Model saved as pneumonia_mobilenetv3.h5")



NameError: name 'callbacks' is not defined

In [3]:
# -----------------------------
# EVALUATION
# -----------------------------
preds = model.predict(test_gen, verbose=1)
pred_labels = (preds > 0.5).astype(int).reshape(-1)
y_true = df_test["class"].map({"Normal":0, "Pneumonia":1}).values

print("Test Accuracy:", accuracy_score(y_true, pred_labels))
print(classification_report(y_true, pred_labels, target_names=["Normal","Pneumonia"]))
print("ROC-AUC:", roc_auc_score(y_true, preds))


NameError: name 'model' is not defined

In [None]:
# -----------------------------
# TFLITE INT8 QUANTIZATION
# -----------------------------
TFLITE_MODEL_PATH = "pneumonia_mobilenetv3_int8.tflite"
N_REPRESENTATIVE = 100

def representative_dataset_gen_from_folder(img_folder, img_size, n=N_REPRESENTATIVE):
    input_name = model.inputs[0].name
    images = [os.path.join(img_folder, f) for f in os.listdir(img_folder) 
              if f.lower().endswith((".png", ".jpg", ".jpeg"))]
    images = random.sample(images, min(n, len(images)))
    
    for img_path in images:
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (img_size, img_size))
        img = np.expand_dims(img, axis=0).astype(np.uint8)  # INT8 input
        yield {input_name: img}

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = lambda: representative_dataset_gen_from_folder(DATASET_DIR, IMG_SIZE)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model = converter.convert()

with open(TFLITE_MODEL_PATH, "wb") as f:
    f.write(tflite_model)

print(f"✅ Fully INT8 quantized MobileNetV3 model saved as {TFLITE_MODEL_PATH}")


In [25]:
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# -----------------------------
# CONFIG
# -----------------------------
DATASET_DIR = "/kaggle/input/chest-xray-pneumonia/chest_xray"
IMG_SIZE = 224
BATCH_SIZE = 16
SEED = 42
EPOCHS_FEATURE_EXTRACTION = 5
EPOCHS_FINE_TUNE = 15

# -----------------------------
# DATA PREPARATION
# -----------------------------
def collect_image_paths(dataset_root):
    import glob
    train_path = os.path.join(dataset_root, "train")
    test_path  = os.path.join(dataset_root, "test")

    def make_df(path):
        normal = glob.glob(os.path.join(path, "NORMAL", "*"))
        pneumonia = glob.glob(os.path.join(path, "PNEUMONIA", "*"))
        return pd.DataFrame({
            "image": normal + pneumonia,
            "class": ["Normal"] * len(normal) + ["Pneumonia"] * len(pneumonia)
        })

    return make_df(train_path), make_df(test_path)

train_df_all, test_df = collect_image_paths(DATASET_DIR)
train_df, val_df = train_test_split(train_df_all, test_size=0.2,
                                    stratify=train_df_all["class"], random_state=SEED)

# Generators
train_gen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
).flow_from_dataframe(
    train_df, x_col="image", y_col="class",
    target_size=(IMG_SIZE, IMG_SIZE),
    class_mode="binary",
    batch_size=BATCH_SIZE,
    shuffle=True
)

val_gen = ImageDataGenerator(rescale=1./255).flow_from_dataframe(
    val_df, x_col="image", y_col="class",
    target_size=(IMG_SIZE, IMG_SIZE),
    class_mode="binary",
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_gen = ImageDataGenerator(rescale=1./255).flow_from_dataframe(
    test_df, x_col="image", y_col="class",
    target_size=(IMG_SIZE, IMG_SIZE),
    class_mode="binary",
    batch_size=1,
    shuffle=False
)

# -----------------------------
# MODEL (MobileNetV3Small)
# -----------------------------
base_model = tf.keras.applications.MobileNetV3Small(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet"
)

# 1️⃣ Feature Extraction Stage
base_model.trainable = False

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs, outputs)
model.compile(optimizer=keras.optimizers.Adam(1e-4),
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])

# -----------------------------
# CALLBACKS
# -----------------------------
es = keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', patience=5, restore_best_weights=True)
rlrop = keras.callbacks.ReduceLROnPlateau(monitor='val_binary_accuracy', factor=0.3, patience=3, min_lr=1e-6)

# -----------------------------
# TRAINING - Feature Extraction
# -----------------------------
history_1 = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS_FEATURE_EXTRACTION,
    callbacks=[es, rlrop],
    verbose=1
)

# -----------------------------
# FINE-TUNING STAGE
# -----------------------------
base_model.trainable = True

# Freeze lower layers (optional: keep first 100 layers frozen)
for layer in base_model.layers[:100]:
    layer.trainable = False

model.compile(
    optimizer=keras.optimizers.Adam(1e-5),
    loss='binary_crossentropy',
    metrics=['binary_accuracy']
)

history_2 = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS_FINE_TUNE,
    callbacks=[es, rlrop],
    verbose=1
)

# -----------------------------
# EVALUATION
# -----------------------------
preds = model.predict(test_gen, verbose=1)
pred_labels = (preds > 0.5).astype(int).reshape(-1)
y_true = test_df["class"].map({"Normal":0, "Pneumonia":1}).values

from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

print("Test Accuracy:", accuracy_score(y_true, pred_labels))
print(classification_report(y_true, pred_labels, target_names=["Normal","Pneumonia"]))
print("ROC-AUC:", roc_auc_score(y_true, preds))

# -----------------------------
# SAVE MODEL
# -----------------------------
model.save("pneumonia_mobilenetv3_finetuned.h5")
print("✅ Model saved as pneumonia_mobilenetv3_finetuned.h5")


Found 4172 validated image filenames belonging to 2 classes.
Found 1044 validated image filenames belonging to 2 classes.
Found 624 validated image filenames belonging to 2 classes.


  self._warn_if_super_not_called()


Epoch 1/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 472ms/step - binary_accuracy: 0.7315 - loss: 0.5923 - val_binary_accuracy: 0.7433 - val_loss: 0.5666 - learning_rate: 1.0000e-04
Epoch 2/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 478ms/step - binary_accuracy: 0.7429 - loss: 0.5767 - val_binary_accuracy: 0.7433 - val_loss: 0.5649 - learning_rate: 1.0000e-04
Epoch 3/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 467ms/step - binary_accuracy: 0.7517 - loss: 0.5632 - val_binary_accuracy: 0.7433 - val_loss: 0.5658 - learning_rate: 1.0000e-04
Epoch 4/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 450ms/step - binary_accuracy: 0.7410 - loss: 0.5742 - val_binary_accuracy: 0.7433 - val_loss: 0.5628 - learning_rate: 1.0000e-04
Epoch 5/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 456ms/step - binary_accuracy: 0.7440 - loss: 0.5694 - val_binary_accuracy: 0.7433 - val_loss: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


✅ Model saved as pneumonia_mobilenetv3_finetuned.h5


In [31]:
import tensorflow as tf
import numpy as np
import cv2
import os
import random

# -----------------------------
IMG_SIZE = 224
N_REPRESENTATIVE = 100
TRAIN_IMG_FOLDER = "/kaggle/input/chest-xray-pneumonia/chest_xray/train"
EXPORT_DIR = "pneumonia_mobilenetv3_savedmodel"
TFLITE_MODEL_PATH = "pneumonia_mobilenetv3_int8.tflite"

# -----------------------------
# 1️⃣ EXPORT MODEL FOR TFLITE
# -----------------------------
model.export(EXPORT_DIR)
print(f"✅ Model exported for TFLite at {EXPORT_DIR}")

# -----------------------------
# 2️⃣ REPRESENTATIVE DATASET GENERATOR (float32)
# -----------------------------
def representative_dataset_gen(img_folder, img_size, n=N_REPRESENTATIVE):
    images = []
    for root, dirs, files in os.walk(img_folder):
        for file in files:
            if file.lower().endswith((".png", ".jpg", ".jpeg")):
                images.append(os.path.join(root, file))
    images = random.sample(images, min(n, len(images)))

    for img_path in images:
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (img_size, img_size))
        img = img.astype(np.float32) / 255.0   # <-- IMPORTANT: normalize to [0,1]
        img = np.expand_dims(img, axis=0)
        yield [img]

# -----------------------------
# 3️⃣ CONVERT TO FULLY INT8 TFLITE
# -----------------------------
converter = tf.lite.TFLiteConverter.from_saved_model(EXPORT_DIR)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = lambda: representative_dataset_gen(TRAIN_IMG_FOLDER, IMG_SIZE)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8   # input will be uint8 on device
converter.inference_output_type = tf.uint8  # output will be uint8 on device

tflite_model = converter.convert()

# -----------------------------
# 4️⃣ SAVE TFLITE MODEL
# -----------------------------
with open(TFLITE_MODEL_PATH, "wb") as f:
    f.write(tflite_model)

print(f"✅ Fully INT8 quantized TFLite model saved as {TFLITE_MODEL_PATH}")


Saved artifact at 'pneumonia_mobilenetv3_savedmodel'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_1250')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  134829961982800: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134829961983568: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134829961984528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134829961984144: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134829961983952: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134829961985872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134829961980496: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134829924041360: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134829961985488: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134829961984720: TensorSpec(shape=(), dtype=tf.resource, name=N

W0000 00:00:1760369159.532597      37 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1760369159.532637      37 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.


✅ Fully INT8 quantized TFLite model saved as pneumonia_mobilenetv3_int8.tflite


fully_quantize: 0, inference_type: 6, input_inference_type: UINT8, output_inference_type: UINT8
