In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [6]:
import os
import random
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# -----------------------------
# CONFIG
# -----------------------------
DATASET_DIR = "/kaggle/input/chest-xray-pneumonia/chest_xray"
IMG_SIZE = 224  # EfficientNetV2-S input size
BATCH_SIZE = 16
EPOCHS = 10
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# -----------------------------
# DATA PREPARATION
# -----------------------------
def collect_image_paths(dataset_root):
    import glob
    train_path = os.path.join(dataset_root, "train")
    test_path  = os.path.join(dataset_root, "test")

    def make_df(path):
        normal = glob.glob(os.path.join(path, "NORMAL", "*"))
        pneumonia = glob.glob(os.path.join(path, "PNEUMONIA", "*"))
        return pd.DataFrame({
            "image": normal + pneumonia,
            "class": ["Normal"] * len(normal) + ["Pneumonia"] * len(pneumonia)
        })

    return make_df(train_path), make_df(test_path)

def build_generator(df, img_size, batch_size, shuffle=True, augment=False):
    datagen = ImageDataGenerator(
        rescale=1./255,
        zoom_range=0.1 if augment else 0.0,
        width_shift_range=0.1 if augment else 0.0,
        height_shift_range=0.1 if augment else 0.0,
        horizontal_flip=augment
    )
    return datagen.flow_from_dataframe(
        df, x_col="image", y_col="class",
        target_size=(img_size, img_size),
        class_mode="binary",
        batch_size=batch_size, shuffle=shuffle
    )


In [7]:
# -----------------------------
# LOAD DATA
# -----------------------------
df_train_all, df_test = collect_image_paths(DATASET_DIR)
train_df, val_df = train_test_split(df_train_all, test_size=0.2,
                                    stratify=df_train_all["class"], random_state=SEED)

train_gen = build_generator(train_df, IMG_SIZE, BATCH_SIZE, augment=True)
val_gen   = build_generator(val_df, IMG_SIZE, BATCH_SIZE)
test_gen  = build_generator(df_test, IMG_SIZE, 1, shuffle=False)


Found 4172 validated image filenames belonging to 2 classes.
Found 1044 validated image filenames belonging to 2 classes.
Found 624 validated image filenames belonging to 2 classes.


In [8]:
# -----------------------------
# MODEL (EfficientNetV2-S)
# -----------------------------
base_model = tf.keras.applications.EfficientNetV2S(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False  # freeze base layers initially

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs, outputs)
model.compile(optimizer=keras.optimizers.Adam(1e-4),
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])
model.summary()



2025-10-13 15:33:54.644468: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-s_notop.h5
[1m82420632/82420632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [9]:
# -----------------------------
# TRAINING
# -----------------------------
es = callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
rlrop = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2)

history = model.fit(train_gen,
                    validation_data=val_gen,
                    epochs=EPOCHS,
                    callbacks=[es, rlrop],
                    verbose=1)

model.save("pneumonia_efficientnetv2s.h5")
print("✅ Model saved as pneumonia_efficientnetv2s.h5")



  self._warn_if_super_not_called()


Epoch 1/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m526s[0m 2s/step - binary_accuracy: 0.7311 - loss: 0.6256 - val_binary_accuracy: 0.7433 - val_loss: 0.5817 - learning_rate: 1.0000e-04
Epoch 2/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m469s[0m 2s/step - binary_accuracy: 0.7265 - loss: 0.5727 - val_binary_accuracy: 0.7500 - val_loss: 0.5328 - learning_rate: 1.0000e-04
Epoch 3/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m471s[0m 2s/step - binary_accuracy: 0.7348 - loss: 0.5394 - val_binary_accuracy: 0.7749 - val_loss: 0.5036 - learning_rate: 1.0000e-04
Epoch 4/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m473s[0m 2s/step - binary_accuracy: 0.7533 - loss: 0.5112 - val_binary_accuracy: 0.7835 - val_loss: 0.4820 - learning_rate: 1.0000e-04
Epoch 5/10
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m526s[0m 2s/step - binary_accuracy: 0.7686 - loss: 0.4921 - val_binary_accuracy: 0.7912 - val_loss: 0.4656 - l

In [10]:
# -----------------------------
# EVALUATION
# -----------------------------
preds = model.predict(test_gen, verbose=1)
pred_labels = (preds > 0.5).astype(int).reshape(-1)
y_true = df_test["class"].map({"Normal":0, "Pneumonia":1}).values

print("Test Accuracy:", accuracy_score(y_true, pred_labels))
print(classification_report(y_true, pred_labels, target_names=["Normal","Pneumonia"]))
print("ROC-AUC:", roc_auc_score(y_true, preds))

[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 127ms/step
Test Accuracy: 0.6426282051282052
              precision    recall  f1-score   support

      Normal       0.59      0.15      0.24       234
   Pneumonia       0.65      0.94      0.77       390

    accuracy                           0.64       624
   macro avg       0.62      0.54      0.50       624
weighted avg       0.63      0.64      0.57       624

ROC-AUC: 0.721137409598948


In [None]:
# -----------------------------
# TFLITE INT8 QUANTIZATION
# -----------------------------
TFLITE_MODEL_PATH = "pneumonia_efficientnetv2s_int8.tflite"
N_REPRESENTATIVE = 100

def representative_dataset_gen_from_folder(img_folder, img_size, n=N_REPRESENTATIVE):
    input_name = model.inputs[0].name
    images = [os.path.join(img_folder, f) for f in os.listdir(img_folder) 
              if f.lower().endswith((".png", ".jpg", ".jpeg"))]
    images = random.sample(images, min(n, len(images)))
    
    for img_path in images:
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (img_size, img_size))
        img = np.expand_dims(img, axis=0).astype(np.uint8)  # INT8 input
        yield {input_name: img}

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = lambda: representative_dataset_gen_from_folder(DATASET_DIR, IMG_SIZE)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model = converter.convert()

with open(TFLITE_MODEL_PATH, "wb") as f:
    f.write(tflite_model)

print(f"✅ Fully INT8 quantized EfficientNetV2-S model saved as {TFLITE_MODEL_PATH}")


In [11]:
import os
import random
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# -----------------------------
# CONFIG
# -----------------------------
DATASET_DIR = "/kaggle/input/chest-xray-pneumonia/chest_xray"
IMG_SIZE = 224
BATCH_SIZE = 16
EPOCHS_TOP = 5        # phase 1: top layers
EPOCHS_FINE = 5       # phase 2: fine-tuning
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# -----------------------------
# DATA PREPARATION
# -----------------------------
def collect_image_paths(dataset_root):
    import glob
    train_path = os.path.join(dataset_root, "train")
    test_path  = os.path.join(dataset_root, "test")

    def make_df(path):
        normal = glob.glob(os.path.join(path, "NORMAL", "*"))
        pneumonia = glob.glob(os.path.join(path, "PNEUMONIA", "*"))
        return pd.DataFrame({
            "image": normal + pneumonia,
            "class": ["Normal"] * len(normal) + ["Pneumonia"] * len(pneumonia)
        })

    return make_df(train_path), make_df(test_path)

def build_generator(df, img_size, batch_size, shuffle=True, augment=False):
    datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=15 if augment else 0,
        zoom_range=0.1 if augment else 0,
        width_shift_range=0.1 if augment else 0,
        height_shift_range=0.1 if augment else 0,
        horizontal_flip=augment
    )
    return datagen.flow_from_dataframe(
        df, x_col="image", y_col="class",
        target_size=(img_size, img_size),
        class_mode="binary",
        batch_size=batch_size, shuffle=shuffle
    )

# -----------------------------
# LOAD DATA
# -----------------------------
df_train_all, df_test = collect_image_paths(DATASET_DIR)
train_df, val_df = train_test_split(df_train_all, test_size=0.2,
                                    stratify=df_train_all["class"], random_state=SEED)

train_gen = build_generator(train_df, IMG_SIZE, BATCH_SIZE, augment=True)
val_gen   = build_generator(val_df, IMG_SIZE, BATCH_SIZE)
test_gen  = build_generator(df_test, IMG_SIZE, 1, shuffle=False)

Found 4172 validated image filenames belonging to 2 classes.
Found 1044 validated image filenames belonging to 2 classes.
Found 624 validated image filenames belonging to 2 classes.


In [12]:

# -----------------------------
# CLASS WEIGHTS
# -----------------------------
classes = train_df['class'].map({'Normal':0,'Pneumonia':1}).values
class_weights = compute_class_weight('balanced', classes=np.unique(classes), y=classes)
class_weights = dict(enumerate(class_weights))
print("Class weights:", class_weights)

# -----------------------------
# MODEL (EfficientNetV2-S)
# -----------------------------
base_model = tf.keras.applications.EfficientNetV2S(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False  # Phase 1: freeze

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs, outputs)
model.compile(optimizer=keras.optimizers.Adam(1e-4),
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])
model.summary()



Class weights: {0: 1.9440820130475303, 1: 0.6731203614069055}


In [13]:
# -----------------------------
# CALLBACKS
# -----------------------------
es = callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)
rlrop = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2)

# -----------------------------
# PHASE 1: TRAIN TOP LAYERS
# -----------------------------
history_top = model.fit(train_gen,
                        validation_data=val_gen,
                        epochs=EPOCHS_TOP,
                        class_weight=class_weights,
                        callbacks=[es, rlrop],
                        verbose=1)

# -----------------------------
# PHASE 2: FINE-TUNE TOP BLOCKS
# -----------------------------
# Unfreeze top layers
base_model.trainable = True
for layer in base_model.layers[:-50]:  # freeze bottom layers
    layer.trainable = False

model.compile(optimizer=keras.optimizers.Adam(1e-5),  # smaller LR
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])

history_ft = model.fit(train_gen,
                       validation_data=val_gen,
                       epochs=EPOCHS_FINE,
                       class_weight=class_weights,
                       callbacks=[es, rlrop],
                       verbose=1)


  self._warn_if_super_not_called()


Epoch 1/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m582s[0m 2s/step - binary_accuracy: 0.5944 - loss: 0.6838 - val_binary_accuracy: 0.7011 - val_loss: 0.6615 - learning_rate: 1.0000e-04
Epoch 2/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m473s[0m 2s/step - binary_accuracy: 0.6766 - loss: 0.6333 - val_binary_accuracy: 0.7241 - val_loss: 0.6398 - learning_rate: 1.0000e-04
Epoch 3/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m475s[0m 2s/step - binary_accuracy: 0.7111 - loss: 0.5936 - val_binary_accuracy: 0.7308 - val_loss: 0.6284 - learning_rate: 1.0000e-04
Epoch 4/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m470s[0m 2s/step - binary_accuracy: 0.7288 - loss: 0.5788 - val_binary_accuracy: 0.7443 - val_loss: 0.6146 - learning_rate: 1.0000e-04
Epoch 5/5
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m467s[0m 2s/step - binary_accuracy: 0.7419 - loss: 0.5545 - val_binary_accuracy: 0.7471 - val_loss: 0.6169 - learni

In [14]:
# -----------------------------
# EVALUATION
# -----------------------------
preds = model.predict(test_gen, verbose=1)
pred_labels = (preds > 0.5).astype(int).reshape(-1)
y_true = df_test["class"].map({"Normal":0, "Pneumonia":1}).values

print("Test Accuracy:", accuracy_score(y_true, pred_labels))
print(classification_report(y_true, pred_labels, target_names=["Normal","Pneumonia"]))
print("ROC-AUC:", roc_auc_score(y_true, preds))


[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 125ms/step
Test Accuracy: 0.7916666666666666
              precision    recall  f1-score   support

      Normal       0.73      0.70      0.71       234
   Pneumonia       0.82      0.85      0.84       390

    accuracy                           0.79       624
   macro avg       0.78      0.77      0.78       624
weighted avg       0.79      0.79      0.79       624

ROC-AUC: 0.8254985754985755


In [16]:
# -----------------------------
# TFLITE INT8 QUANTIZATION
# -----------------------------
TFLITE_MODEL_PATH = "pneumonia_efficientnetv2s_int8.tflite"
N_REPRESENTATIVE = 100

def representative_dataset_gen_from_folder(img_folder, img_size, n=N_REPRESENTATIVE):
    input_name = model.inputs[0].name
    images = [os.path.join(img_folder, f) for f in os.listdir(img_folder) 
              if f.lower().endswith((".png", ".jpg", ".jpeg"))]
    images = random.sample(images, min(n, len(images)))
    
    for img_path in images:
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (img_size, img_size))
        img = np.expand_dims(img, axis=0).astype(np.uint8)  # INT8 input
        yield {input_name: img}

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = lambda: representative_dataset_gen_from_folder(DATASET_DIR, IMG_SIZE)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

tflite_model = converter.convert()

with open(TFLITE_MODEL_PATH, "wb") as f:
    f.write(tflite_model)

print(f"✅ Fully INT8 quantized EfficientNetV2-S model saved as {TFLITE_MODEL_PATH}")


Saved artifact at '/tmp/tmpzx1qi2vw'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_1031')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  136221016588944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136220608735056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136220608735632: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136221016589136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136221016588752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136220608736208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136220608735824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136220608732560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136220608736016: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136220608734096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136220608

W0000 00:00:1760380732.956885      37 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1760380732.956932      37 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
fully_quantize: 0, inference_type: 6, input_inference_type: UINT8, output_inference_type: UINT8


✅ Fully INT8 quantized EfficientNetV2-S model saved as pneumonia_efficientnetv2s_int8.tflite
