# ResNet Model: 
1. Multi-Label Stratified K-Fold
    - Multi-label imbalance handled: All rare diseases are proportionally represented.
    - View imbalance handled: Each fold has close to the same Frontal/Lateral distribution.
    - Sex & Age: Since they’re balanced already, we don’t need to augment stratification with them.

2. Data Generator: (train ML model in batches to increase time efficiency, since training data >100gb)

3. ResNet Model: fine tuned and pre-trained

4. Train Model & Evaluate

Problems and Solutions:
- No internet pip loading in kaggle, couldn't load iterstat or imagenet pretrained weights for MultiLabelKfold Stratify and using pretrained weights for my ResNet model. **Solution**: Load iterstat package manually from github and use. Load resnet50 pretrained weights manually from keras into kaggle and load weights into resnet model.
- fixed by verifying phone number on kaggle, now i can load iterstrat and imagenet pretrained weights


In [2]:
# Imports
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.image as mpimg
import random
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence
import cv2
import os
import sys
from io import StringIO
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence
import tensorflow.keras.applications.resnet50 as resnet
import warnings
warnings.filterwarnings('ignore')

In [3]:
print(os.listdir("/kaggle/input"))

# Path to competition dataset
data_dir = "/kaggle/input/grand-xray-slam-division-b"
# Check what files are inside
print('Filenames of the data', os.listdir(data_dir))

['newresnet', 'iterstat', 'resnet-pretrainedmodel', 'grand-xray-slam-division-b']
Filenames of the data ['test2', 'sample_submission_2.csv', 'train2.csv', 'train2']


In [4]:
# Load the training CSV metadata with labels
train = pd.read_csv("/kaggle/input/grand-xray-slam-division-b/train2.csv")

print('Metadata shape:',train.shape)
train.head()

Metadata shape: (108494, 21)


Unnamed: 0,Image_name,Patient_ID,Study,Sex,Age,ViewCategory,ViewPosition,Atelectasis,Cardiomegaly,Consolidation,...,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,00000003_001_001.jpg,3,1,Male,41.0,Frontal,AP,0,1,0,...,1,0,0,1,0,0,0,0,0,0
1,00000004_001_001.jpg,4,1,Female,20.0,Frontal,PA,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,00000004_001_002.jpg,4,1,Female,20.0,Lateral,Lateral,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,00000006_001_001.jpg,6,1,Female,42.0,Frontal,AP,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,00000010_001_001.jpg,10,1,Female,50.0,Frontal,PA,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [5]:
# 1. Feature & Target Preperation
# Define labels
conditions = [
    'Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum',
    'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion',
    'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices'
]
# Features you want
features = ["ViewCategory", "ViewPosition", "Age", "Sex"]

# Encode categorical features
from sklearn.preprocessing import LabelEncoder

train_enc = train.copy()   # train data encoded
for col in ["ViewCategory", "ViewPosition", "Sex"]:  # features that can be encoded
    le = LabelEncoder()
    train_enc[col] = le.fit_transform(train[col].astype(str))

X = train_enc[features].values
y = train[conditions].values

In [6]:
print(X.shape) # 4 features (ViewCategory, ViewPosition, Age, Sex)
print(y.shape)  # 14 conditions

(108494, 4)
(108494, 14)


In [7]:
# 2. Adding ViewBalancing for Stratification: ViewCategory= Frontal, Lateral; since ViewCategory is unbalanced

# One-hot encode ViewCategory and append to 
view_onehot = pd.get_dummies(train["ViewCategory"], prefix="view").values

y_aug = np.hstack([y, view_onehot])  # augmented target matrix (added ViewCategory as y to stratify and reduce bias)

# **Multi-Label Stratified Kfold**
- Split to train and val data using multlabelKfold
    - Multi-label imbalance handled: All rare diseases are proportionally represented.
    - View imbalance handled: Each fold has close to the same Frontal/Lateral distribution.
    - Sex & Age: Since they’re balanced already, we don’t need to augment stratification with them

**Note**: cannot dowwnload iterstat pakcage on kaggle.
1. Solution fetch and load Multi-Kfold stratification function in script manually, this way we get the same behavoir of stratification without the need of internet
 https://github.com/trent-b/iterative-stratification/blob/master/iterstrat/ml_stratifiers.py

In [10]:
!pip install iterative-stratification

In [9]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

# 3. Multilabel Stratified K-Fold Split
mskf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_idx, val_idx) in enumerate(mskf.split(X, y_aug)):
    print(f"Fold {fold}")
    print(" Train:", len(train_idx), " Val:", len(val_idx))

    train_df = train.iloc[train_idx].reset_index(drop=True)
    val_df   = train.iloc[val_idx].reset_index(drop=True)

    # Check condition + view balance
    print("  Train views:", train_df["ViewCategory"].value_counts().to_dict())
    print("  Val views:", val_df["ViewCategory"].value_counts().to_dict())
    print("  Train labels sum:", train_df[conditions].sum().to_dict())
    print("  Val labels sum:", val_df[conditions].sum().to_dict())
    print("-"*60)

# Use first fold for training
train_df, val_df = folds[0]
print(f"✅ Selected Fold 1 — Train {train_df.shape}, Val {val_df.shape}")

Fold 0
 Train: 86795  Val: 21699
  Train views: {'Frontal': 76011, 'Lateral': 10784}
  Val views: {'Frontal': 19003, 'Lateral': 2696}
  Train labels sum: {'Atelectasis': 30890, 'Cardiomegaly': 27984, 'Consolidation': 23715, 'Edema': 21254, 'Enlarged Cardiomediastinum': 30053, 'Fracture': 11662, 'Lung Lesion': 9885, 'Lung Opacity': 39217, 'No Finding': 27391, 'Pleural Effusion': 27655, 'Pleural Other': 5544, 'Pneumonia': 11453, 'Pneumothorax': 6991, 'Support Devices': 29908}
  Val labels sum: {'Atelectasis': 7723, 'Cardiomegaly': 6996, 'Consolidation': 5929, 'Edema': 5313, 'Enlarged Cardiomediastinum': 7513, 'Fracture': 2916, 'Lung Lesion': 2472, 'Lung Opacity': 9805, 'No Finding': 6974, 'Pleural Effusion': 6914, 'Pleural Other': 1387, 'Pneumonia': 2863, 'Pneumothorax': 1747, 'Support Devices': 7477}
------------------------------------------------------------
Fold 1
 Train: 86795  Val: 21699
  Train views: {'Frontal': 76011, 'Lateral': 10784}
  Val views: {'Frontal': 19003, 'Lateral': 

In [11]:
print(train_df.shape)
print(val_df.shape)

(86796, 21)
(21698, 21)


# Data Generator
To handle large datasets, we use a custom generator to load images in batches.

Each image is preprocessed with ResNet-50’s preprocessing function and resized to 224×224 (the default input size for ResNet-50).

In [12]:
class XRayDataGenerator(Sequence):
    def __init__(self, dataframe, batch_size=32, img_size=(224, 224), is_test=False, **kwargs):
        super().__init__(**kwargs)
        self.dataframe = dataframe.reset_index(drop=True)
        self.batch_size = batch_size
        self.img_size = img_size
        self.is_test = is_test
        self.image_dir = '/kaggle/input/grand-xray-slam-division-b/train2/' if not is_test else '/kaggle/input/grand-xray-slam-division-b/test2/'
        self.conditions = conditions
        
        if not os.path.exists(self.image_dir):
            print(f"Error: Directory {self.image_dir} not found.")
            raise FileNotFoundError(f"Directory {self.image_dir} missing.")
    
    def __len__(self):
        return (len(self.dataframe) + self.batch_size - 1) // self.batch_size
    
    def __getitem__(self, idx):
        start = idx * self.batch_size
        end = min(start + self.batch_size, len(self.dataframe))
        batch_data = self.dataframe.iloc[start:end]
        
        images, labels = [], []
        
        for _, row in batch_data.iterrows():
            img_path = os.path.join(self.image_dir, row['Image_name'])
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
            
            if img is not None and img.shape[0] > 0 and img.shape[1] > 0:
                img = cv2.resize(img, self.img_size)
                img = resnet.preprocess_input(img)
                images.append(img)
                
                if not self.is_test:
                    labels.append(row[self.conditions].values.astype(np.float32))
        
        if not images:
            dummy_img = np.zeros((*self.img_size, 3), dtype=np.float32)
            images.append(dummy_img)
            if not self.is_test:
                labels.append(np.zeros(len(self.conditions), dtype=np.float32))
        
        if not self.is_test:
            return np.array(images), np.array(labels)
        else:
            return np.array(images)

# Create generators
batch_size = 32
train_generator = XRayDataGenerator(train_df, batch_size=batch_size)
val_generator = XRayDataGenerator(val_df, batch_size=batch_size)
print("Data generators created.")

# Also define test_df (important for submission)
test_df = pd.read_csv("/kaggle/input/grand-xray-slam-division-b/sample_submission_2.csv")
test_df["Image_name"] = test_df["Image_name"].astype(str)

print("✅ Data generators ready — Train, Val, and Test loaded.")

Data generators created.


# Build ResNet-50 Model¶
We load a pretrained ResNet-50 model with weights from ImageNet.
The convolutional base is frozen to retain pretrained features, and we add a custom classifier head:

- Global Average Pooling to reduce feature maps.
- Dense layer for feature learning.
- Dropout to reduce overfitting.
- Sigmoid output for multi-label classification across 14 chest conditions.

In [14]:
# from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications import ResNet50

def build_resnet_model(num_classes=14):
    # Load Resnet with cached ImageNet weights
    base_model = ResNet50(weights="imagenet",  include_top=False, input_shape=(224, 224, 3))
    
    print("✅ Weights loaded successfully.")
    base_model.trainable = False   # freeze backbone for now
    
    # add custom head
    inputs = base_model.input
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation="sigmoid")(x)  # multilabel
    
    model = Model(inputs, outputs)
    return model
    
model = build_resnet_model()
model.compile(
    optimizer=Adam(learning_rate=0.0001), loss="binary_crossentropy", metrics=[tf.keras.metrics.AUC(name="auc")]
)

print("Model Architecture: ResNet50 + Custom Head")
print(f"Total parameters: {model.count_params():,}")
trainable_params = sum([tf.size(v).numpy() for v in model.trainable_variables])
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {model.count_params() - trainable_params:,}")
print("Model compiled successfully!")

I0000 00:00:1759705911.666022      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
✅ Weights loaded successfully.
Model Architecture: ResNet50 + Custom Head
Total parameters: 24,115,854
Trainable parameters: 528,142
Non-trainable parameters: 23,587,712
Model compiled successfully!


# Train the Model
- now we train the ResNet model for 3 epoch using the training and validation generators
- the performance is tracked using AUC-ROC, which evaluates each of the 14 conditions

In [16]:
# ----------------------------------------
# ⚙️ GPU Optimization
# ----------------------------------------
tf.config.optimizer.set_jit(True)  # Enable XLA (compiles graph for speed)
tf.config.experimental.enable_tensor_float_32_execution(True)
mixed_precision.set_global_policy('mixed_float16')
print("✅ Mixed precision + XLA enabled for faster GPU performance.")


✅ Mixed precision + XLA enabled for faster GPU performance.


In [18]:
# # Train for 3 epochs takes a long time
# history = model.fit(
#     train_generator,
#     validation_data=val_generator,
#     epochs=3,
#     verbose=1
# )

# # Display final validation AUC
# val_auc = history.history['val_AUC'][-1] if 'val_AUC' in history.history else 0.0
# print(f"Final Validation AUC-ROC: {val_auc:.4f}")

# Make Preditions or try better hyperparemeters and submit

In [19]:
import tensorflow as tf
print("GPUs Available:", len(tf.config.list_physical_devices('GPU')))


GPUs Available: 1


In [21]:
# ============================================
# 🧠 GRAND X-RAY SLAM - RESNET50 FINAL TRAINING CELL . AUC=0.9021
# ============================================
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import mixed_precision
import os


# 🧠 Compile model (from my build_resnet_model function)
print("✅ Model compiled successfully.")
print(f"Total parameters: {model.count_params():,}")

# ----------------------------------------
# 🚀 Train model (resume safe)
# ----------------------------------------
EPOCHS = 3  # Adjust for longer runs later

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,
    verbose=1,
)

# ----------------------------------------
# 🧾 Evaluate and save results
# ----------------------------------------
val_auc = history.history.get('val_AUC', [0])[-1]
print(f"\n✅ Final Validation AUC: {val_auc:.4f}")

# Save final model
model.save("final_resnet_model.h5")
print("✅ Model saved as final_resnet_model.h5")

✅ Model compiled successfully.
Total parameters: 24,115,854
⚡ Starting fresh training run...
Epoch 1/3
[1m2713/2713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5464s[0m 2s/step - AUC: 0.8324 - loss: 0.4167 - val_AUC: 0.8907 - val_loss: 0.3494 - learning_rate: 1.0000e-04
Epoch 2/3
[1m2713/2713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4773s[0m 2s/step - AUC: 0.8813 - loss: 0.3587 - val_AUC: 0.8994 - val_loss: 0.3346 - learning_rate: 1.0000e-04
Epoch 3/3
[1m2713/2713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5001s[0m 2s/step - AUC: 0.8926 - loss: 0.3439 - val_AUC: 0.9021 - val_loss: 0.3299 - learning_rate: 1.0000e-04

✅ Final Validation AUC: 0.0000
✅ Model saved as final_resnet_model.h5


In [34]:
val_auc = history.history.get('val_AUC', [0])[-1]
print(f"\n✅ Final Validation AUC: {val_auc:.4f}")


✅ Final Validation AUC: 0.9021


In [47]:
# ----------------------------------------
# 🔮 Generate predictions and submission
# ----------------------------------------
print("\nGenerating predictions for submission...")
# model.load_weights("best_model.h5")  # load best checkpoint for inference
# model = tf.keras.models.load_model("final_resnet_model.h5", compile=False)
#  define test_df (important for submission)
test_df = pd.read_csv("/kaggle/input/grand-xray-slam-division-b/sample_submission_2.csv")
test_df["Image_name"] = test_df["Image_name"].astype(str)

test_generator = XRayDataGenerator(test_df, batch_size=32, is_test=True)
preds = model.predict(test_generator, verbose=1)

submission = pd.DataFrame(preds, columns=conditions)
submission.insert(0, "Image_name", test_df["Image_name"].values) # start, column, 
submission.to_csv("ResNet1submission.csv", index=False)

print("✅ Submission file created successfully: submission.csv")
print(submission.head())


Generating predictions for submission...
[1m1498/1498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2426s[0m 2s/step
✅ Submission file created successfully: submission.csv
             Image_name  Atelectasis  Cardiomegaly  Consolidation     Edema  \
0  00000002_002_001.jpg     0.655762      0.536621       0.580566  0.392090   
1  00000002_001_001.jpg     0.737793      0.547363       0.604980  0.290039   
2  00000002_001_002.jpg     0.739258      0.609863       0.702148  0.498047   
3  00000008_001_001.jpg     0.861816      0.810059       0.675293  0.532715   
4  00000008_002_001.jpg     0.543945      0.569824       0.519043  0.528320   

   Enlarged Cardiomediastinum  Fracture  Lung Lesion  Lung Opacity  \
0                    0.717773  0.463867     0.219604      0.791504   
1                    0.669434  0.640137     0.112061      0.774414   
2                    0.815430  0.603516     0.176758      0.879883   
3                    0.900391  0.189819     0.065613      0.943848   

In [48]:
submission.shape

(47927, 15)

In [51]:
submission.to_csv("submission.csv", index=False)

The AUC for the above resnet model that uses 1 MultiLabelStratify kfold and above resnet model + custom head and feature stratification.
**AUC= 0.9021**


# Improve the AUC + Runtime for model

In [None]:
# # ============================================================
# # ⚡ GPU-Optimized GRAND X-RAY SLAM — ResNet50 AUC Pipeline
# # ============================================================
# import os, gc
# import numpy as np
# import pandas as pd
# import tensorflow as tf
# from tensorflow.keras.applications import ResNet50
# from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
# from tensorflow.keras.optimizers.schedules import CosineDecayRestarts
# from tensorflow.keras import mixed_precision
# from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

# # Enable GPU optimizations
# tf.config.optimizer.set_jit(True)
# tf.config.experimental.enable_tensor_float_32_execution(True)
# mixed_precision.set_global_policy('mixed_float16')
# print("✅ Mixed precision + XLA enabled for faster GPU performance.")

# # ============================================================
# # 🪄 Dataset Preprocessing (tf.data pipeline)
# # ============================================================
# IMG_SIZE = (224, 224)
# BATCH_SIZE = 32

# def decode_image(filename, label=None, augment=False):
#     img = tf.io.read_file(filename)
#     img = tf.image.decode_jpeg(img, channels=3)
#     img = tf.image.resize(img, IMG_SIZE)
#     img = tf.keras.applications.resnet50.preprocess_input(img)

#     if augment:
#         img = tf.image.random_flip_left_right(img)
#         img = tf.image.random_brightness(img, 0.1)
#     return (img, label) if label is not None else img

# def make_dataset(df, is_test=False, augment=False):
#     img_paths = ("/kaggle/input/grand-xray-slam-division-b/test2/" + df["Image_name"]) if is_test else ("/kaggle/input/grand-xray-slam-division-b/train2/" + df["Image_name"])
#     labels = None if is_test else df[conditions].values.astype(np.float32)
    
#     ds = tf.data.Dataset.from_tensor_slices((img_paths, labels) if not is_test else img_paths)
#     if not is_test:
#         ds = ds.map(lambda x, y: decode_image(x, y, augment), num_parallel_calls=tf.data.AUTOTUNE)
#     else:
#         ds = ds.map(lambda x: decode_image(x, None, False), num_parallel_calls=tf.data.AUTOTUNE)
    
#     return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# # ============================================================
# # 🧱 Build ResNet50 model
# # ============================================================
# def build_resnet_model(num_classes=14, unfreeze_layers=None):
#     base = ResNet50(weights="imagenet", include_top=False, input_shape=(224,224,3))
#     if unfreeze_layers:
#         for layer in base.layers[-unfreeze_layers:]:
#             layer.trainable = True
#     else:
#         base.trainable = False

#     x = tf.keras.layers.GlobalAveragePooling2D()(base.output)
#     x = tf.keras.layers.Dense(256, activation="relu")(x)
#     x = tf.keras.layers.Dropout(0.4)(x)
#     out = tf.keras.layers.Dense(num_classes, activation="sigmoid")(x)
#     return tf.keras.Model(inputs=base.input, outputs=out)

# # ============================================================
# # ⚙️ Callbacks
# # ============================================================
# def get_callbacks(fold):
#     return [
#         ModelCheckpoint(f"fold_{fold}_best.h5", monitor="val_AUC", mode="max", save_best_only=True, verbose=1),
#         EarlyStopping(monitor="val_AUC", mode="max", patience=5, restore_best_weights=True, verbose=1),
#     ]

# # ============================================================
# # 🚀 Cross-validation training loop
# # ============================================================
# mskf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# fold_aucs = []

# for fold, (train_idx, val_idx) in enumerate(mskf.split(X, y_aug)):
#     print(f"\n================ FOLD {fold+1} ================")
#     train_df = train.iloc[train_idx].reset_index(drop=True)
#     val_df   = train.iloc[val_idx].reset_index(drop=True)

#     # tf.data datasets instead of Sequence
#     train_ds = make_dataset(train_df, augment=True)
#     val_ds   = make_dataset(val_df)

#     # Learning rate schedule
#     lr_schedule = CosineDecayRestarts(
#         initial_learning_rate=1e-4,
#         first_decay_steps=len(train_ds)*2,
#         t_mul=2.0, m_mul=0.9, alpha=1e-6
#     )

#     # Stage 1 — Train top layers
#     model = build_resnet_model()
#     model.compile(optimizer=tf.keras.optimizers.Adam(lr_schedule),
#                   loss="binary_crossentropy",
#                   metrics=[tf.keras.metrics.AUC(name="auc")])
#     history = model.fit(train_ds, validation_data=val_ds, epochs=3, callbacks=get_callbacks(fold), verbose=1)
    
#     # Stage 2 — Fine-tune last 20 layers
#     for layer in model.layers[-20:]: layer.trainable = True
#     model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),
#                   loss="binary_crossentropy",
#                   metrics=[tf.keras.metrics.AUC(name="auc")])
#     history_ft = model.fit(train_ds, validation_data=val_ds, epochs=3, callbacks=get_callbacks(fold), verbose=1)

#     # Save final model per fold
#     model.save(f"fold_{fold}_final.h5")
#     best_auc = max(history.history['val_auc'] + history_ft.history['val_AUC'])
#     fold_aucs.append(best_auc)
#     print(f"✅ Fold {fold+1} Best AUC: {best_auc:.4f}")
#     gc.collect()

# # ============================================================
# # 🧾 CV Summary
# # ============================================================
# print(f"\n📊 Cross-validation AUCs: {fold_aucs}")
# print(f"🏆 Mean CV AUC: {np.mean(fold_aucs):.4f}")

# # ============================================================
# # 🔮 Ensemble predictions (mean of 5 folds)
# # ============================================================
# print("\nGenerating final ensemble predictions...")
# test_ds = make_dataset(test_df, is_test=True)
# fold_preds = []

# for fold in range(5):
#     m = tf.keras.models.load_model(f"fold_{fold}_best.h5", compile=False)
#     fold_preds.append(m.predict(test_ds, verbose=1))

# final_preds = np.mean(fold_preds, axis=0)
# submission = pd.DataFrame(final_preds, columns=conditions)
# submission.insert(0, "Image_name", test_df["Image_name"].values)
# submission.to_csv("submission.csv", index=False)
# print("✅ submission.csv created successfully!")
