# ResNet contineud
- Load prev trained model, more epochs, decrease learning rate

In [None]:
!pip install iterative-stratification

In [None]:
import tensorflow as tf
print("GPUs Available:", len(tf.config.list_physical_devices('GPU')))


In [None]:
# Imports
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.image as mpimg
import random
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence
import cv2
import os
import sys
from io import StringIO
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence
import tensorflow.keras.applications.resnet50 as resnet
import warnings
warnings.filterwarnings('ignore')

In [None]:
print(os.listdir("/kaggle/input"))

# Path to competition dataset
data_dir = "/kaggle/input/grand-xray-slam-division-b"
# Check what files are inside
print('Filenames of the data', os.listdir(data_dir))

In [None]:
# Load the training CSV metadata with labels
train = pd.read_csv("/kaggle/input/grand-xray-slam-division-b/train2.csv")

print('Metadata shape:',train.shape)
train.head()

In [None]:
# 1. Feature & Target Preperation
# Define labels
conditions = [
    'Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum',
    'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion',
    'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices'
]
# Features you want
features = ["ViewCategory", "ViewPosition", "Age", "Sex"]

# Encode categorical features
from sklearn.preprocessing import LabelEncoder

train_enc = train.copy()   # train data encoded
for col in ["ViewCategory", "ViewPosition", "Sex"]:  # features that can be encoded
    le = LabelEncoder()
    train_enc[col] = le.fit_transform(train[col].astype(str))

X = train_enc[features].values
y = train[conditions].values

In [None]:
print(X.shape) # 4 features (ViewCategory, ViewPosition, Age, Sex)
print(y.shape)  # 14 conditions

In [None]:
# 2. Adding ViewBalancing for Stratification: ViewCategory= Frontal, Lateral; since ViewCategory is unbalanced

# One-hot encode ViewCategory and append to 
view_onehot = pd.get_dummies(train["ViewCategory"], prefix="view").values

y_aug = np.hstack([y, view_onehot])  # augmented target matrix (added ViewCategory as y to stratify and reduce bias)

# MSKF

In [None]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

# 3. Multilabel Stratified K-Fold Split
mskf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=42)
folds = []
for fold, (train_idx, val_idx) in enumerate(mskf.split(X, y_aug)):
    print(f"Fold {fold}")
    print(" Train:", len(train_idx), " Val:", len(val_idx))

    train_df = train.iloc[train_idx].reset_index(drop=True)
    val_df   = train.iloc[val_idx].reset_index(drop=True)

    # Check condition + view balance
    print("  Train views:", train_df["ViewCategory"].value_counts().to_dict())
    print("  Val views:", val_df["ViewCategory"].value_counts().to_dict())
    print("  Train labels sum:", train_df[conditions].sum().to_dict())
    print("  Val labels sum:", val_df[conditions].sum().to_dict())
    print("-"*60)

# Use first fold for training
train_df, val_df = folds[0]
print(f"✅ Selected Fold 1 — Train {train_df.shape}, Val {val_df.shape}")

# data generator

In [None]:
class XRayDataGenerator(Sequence):
    def __init__(self, dataframe, batch_size=32, img_size=(224, 224), is_test=False, **kwargs):
        super().__init__(**kwargs)
        self.dataframe = dataframe.reset_index(drop=True)
        self.batch_size = batch_size
        self.img_size = img_size
        self.is_test = is_test
        self.image_dir = '/kaggle/input/grand-xray-slam-division-b/train2/' if not is_test else '/kaggle/input/grand-xray-slam-division-b/test2/'
        self.conditions = conditions
        
        if not os.path.exists(self.image_dir):
            print(f"Error: Directory {self.image_dir} not found.")
            raise FileNotFoundError(f"Directory {self.image_dir} missing.")
    
    def __len__(self):
        return (len(self.dataframe) + self.batch_size - 1) // self.batch_size
    
    def __getitem__(self, idx):
        start = idx * self.batch_size
        end = min(start + self.batch_size, len(self.dataframe))
        batch_data = self.dataframe.iloc[start:end]
        
        images, labels = [], []
        
        for _, row in batch_data.iterrows():
            img_path = os.path.join(self.image_dir, row['Image_name'])
            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
            
            if img is not None and img.shape[0] > 0 and img.shape[1] > 0:
                img = cv2.resize(img, self.img_size)
                img = resnet.preprocess_input(img)
                images.append(img)
                
                if not self.is_test:
                    labels.append(row[self.conditions].values.astype(np.float32))
        
        if not images:
            dummy_img = np.zeros((*self.img_size, 3), dtype=np.float32)
            images.append(dummy_img)
            if not self.is_test:
                labels.append(np.zeros(len(self.conditions), dtype=np.float32))
        
        if not self.is_test:
            return np.array(images), np.array(labels)
        else:
            return np.array(images)

# Create generators
batch_size = 32
train_generator = XRayDataGenerator(train_df, batch_size=batch_size)
val_generator = XRayDataGenerator(val_df, batch_size=batch_size)
print("Data generators created.")

# Also define test_df (important for submission)
test_df = pd.read_csv("/kaggle/input/grand-xray-slam-division-b/sample_submission_2.csv")
test_df["Image_name"] = test_df["Image_name"].astype(str)

print("✅ Data generators ready — Train, Val, and Test loaded.")

# resnet improved 

In [None]:
# from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications import ResNet50

def build_resnet_model(num_classes=14):
    # Load Resnet with cached ImageNet weights
    base_model = ResNet50(weights="imagenet",  include_top=False, input_shape=(224, 224, 3))
    
    print("✅ Weights loaded successfully.")
    base_model.trainable = False   # freeze backbone for now
    
    # add custom head
    inputs = base_model.input
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation="sigmoid")(x)  # multilabel
    
    model = Model(inputs, outputs)
    return model
    
# model = build_resnet_model()
from tensorflow.keras.models import load_model

model = load_model("/kaggle/input/newresnet/final_resnet_model.h5", compile=False)

model.compile(
    optimizer=Adam(learning_rate=0.0001), loss="binary_crossentropy", metrics=[tf.keras.metrics.AUC(name="auc")]
)

print("Model Architecture: ResNet50 + Custom Head")
print(f"Total parameters: {model.count_params():,}")
trainable_params = sum([tf.size(v).numpy() for v in model.trainable_variables])
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {model.count_params() - trainable_params:,}")
print("Model compiled successfully!")

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

callbacks = [
    ModelCheckpoint("resnet_finetune.weights.h5", save_weights_only=True, save_best_only=True,
                    monitor="val_auc", mode="max", verbose=1),
    ReduceLROnPlateau(monitor="val_auc", factor=0.5, patience=2, mode="max", verbose=1),
    EarlyStopping(monitor="val_auc", patience=5, mode="max", restore_best_weights=True)
]
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,
    verbose=1,
    callbacks=callbacks
)
val_auc = history.history.get("val_auc", [0])[-1]
print(f"✅ Continued fine-tuning done — new Validation AUC ≈ {val_auc:.4f}")
model.save("final_resnet_finetuned.h5")
