In [5]:
# Imports
import numpy as np
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications import VGG16, ResNet50, DenseNet121, EfficientNetB3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import KFold
import pickle
import collections
from sklearn.metrics import (
    accuracy_score, roc_auc_score, precision_score, recall_score,
    f1_score, confusion_matrix, roc_curve
)

In [6]:
# Use Mixed Precision (save VRAM)
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy("mixed_float16")
print("mixed precision enabled.")

mixed precision enabled.


In [7]:
# Load Preprocessed Data --- balanced checked
DATA_PATH = "/kaggle/input/preprocessed-mammo-splits"  
train = np.load(os.path.join(DATA_PATH, "train_data.npz"))
val = np.load(os.path.join(DATA_PATH, "val_data.npz"))
test = np.load(os.path.join(DATA_PATH, "test_data.npz"))

X_train, y_train = train["X"], train["y"]
X_val, y_val = val["X"], val["y"]
X_test, y_test = test["X"], test["y"]

In [8]:
# Compute Class Weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(zip(np.unique(y_train), class_weights))
print("Class Weights:", class_weight_dict)

Class Weights: {0: 1.1308917197452228, 1: 0.8962645128722867}


In [9]:
# Expand dims because TF expects (H, W, 1) from (H, W)
X_train = X_train[..., np.newaxis].astype("float32")
X_val = X_val[..., np.newaxis].astype("float32")
X_test = X_test[..., np.newaxis].astype("float32")

In [10]:

# Enhanced data augmentation
def convert_to_rgb(image, label):
    image_rgb = tf.image.grayscale_to_rgb(image)  
    image_rgb = tf.squeeze(image_rgb) 
    return image_rgb, label

In [11]:
def augment(image, label):
    # Random rotation (0-15 degrees)
    angle = tf.random.uniform([], -0.26, 0.26)  # ~15 degrees in radians
    image = tf.image.rot90(image, k=tf.cast(angle * 2 / 3.14159, tf.int32))
    
    # Random flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    # Random brightness/contrast adjustments
    image = tf.image.random_brightness(image, 0.2)
    image = tf.image.random_contrast(image, 0.8, 1.2)
    
    # Random zoom (crop and resize)
    zoom_factor = tf.random.uniform([], 0.8, 1.0, dtype=tf.float32)
    h, w = tf.shape(image)[0], tf.shape(image)[1]
    crop_size_h = tf.cast(tf.cast(h, tf.float32) * zoom_factor, tf.int32)
    crop_size_w = tf.cast(tf.cast(w, tf.float32) * zoom_factor, tf.int32)
    
    # Ensure crop dimensions don't exceed image dimensions
    crop_size_h = tf.minimum(crop_size_h, h)
    crop_size_w = tf.minimum(crop_size_w, w)
    
    image = tf.image.random_crop(image, size=[crop_size_h, crop_size_w, 3])
    image = tf.image.resize(image, [224, 224])
    
    return image, label

In [12]:
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

# Create datasets
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val))
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test))

# Apply preprocessing and augmentation
train_ds = (
    train_ds.shuffle(1024)
    .map(convert_to_rgb, num_parallel_calls=AUTOTUNE)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

val_ds = (
    val_ds.map(convert_to_rgb, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

test_ds = (
    test_ds.map(convert_to_rgb, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

I0000 00:00:1745486477.846563      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [13]:
def build_improved_model(base_model_fn, name="model", lr=1e-4):
    base_model = base_model_fn(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    # Initially freeze the base model
    base_model.trainable = False
    
    inputs = Input(shape=(224, 224, 3))
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    
    # Enhanced architecture with more layers
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    outputs = Dense(1, activation='sigmoid', dtype='float32')(x)
    
    model = Model(inputs, outputs, name=name)

    # Use simple float learning rate instead of schedule
    model.compile(
        optimizer=Adam(learning_rate=lr),  # Simple float learning rate
        loss='binary_crossentropy',
        metrics=[
            'accuracy', 
            tf.keras.metrics.AUC(name='auc'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')
        ]
    )
    
    return model, base_model

In [14]:
# unfreeze_model function --- finetuning
def unfreeze_model(model, base_model, lr=1e-5):
    # Unfreeze the base model
    base_model.trainable = True
    
    # Freeze earlier layers, unfreeze later layers (fine-tuning)
    for layer in base_model.layers[:-30]:  # Keep the first layers frozen
        layer.trainable = False
    
    # Use simple float learning rate
    model.compile(
        optimizer=Adam(learning_rate=lr),  # Simple float learning rate
        loss='binary_crossentropy',
        metrics=[
            'accuracy', 
            tf.keras.metrics.AUC(name='auc'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall')
        ]
    )
    
    return model

In [15]:
# Function to find the optimal threshold using the validation set
def find_optimal_threshold(model, val_ds):
    # Get predictions
    val_pred = model.predict(val_ds)
    
    # Get true labels
    val_true = np.concatenate([y for x, y in val_ds], axis=0)
    
    # Calculate ROC curve and find optimal threshold
    fpr, tpr, thresholds = roc_curve(val_true, val_pred)
    j_scores = tpr - fpr
    best_idx = np.argmax(j_scores)
    best_threshold = thresholds[best_idx]
    
    print(f"Optimal threshold: {best_threshold:.4f} (Youden's J = {j_scores[best_idx]:.4f})")
    print(f"At threshold {best_threshold:.4f}: TPR={tpr[best_idx]:.4f}, FPR={fpr[best_idx]:.4f}")
    
    return best_threshold

In [16]:
def evaluate_with_threshold(model, ds, threshold=0.5):
    # Get predictions
    pred = model.predict(ds)
    
    # Get true labels
    true = np.concatenate([y for x, y in ds], axis=0)
    
    # Apply threshold
    pred_binary = (pred > threshold).astype(int)
    
    # Calculate metrics
    acc = accuracy_score(true, pred_binary)
    auc = roc_auc_score(true, pred)
    precision = precision_score(true, pred_binary)
    recall = recall_score(true, pred_binary)
    f1 = f1_score(true, pred_binary)
    cm = confusion_matrix(true, pred_binary)
    
    # Calculate specificity
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn + fp)
    
    print(f"Accuracy: {acc:.4f}")
    print(f"AUC: {auc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall (Sensitivity): {recall:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Confusion Matrix:\n{cm}")
    
    return {
        'accuracy': acc,
        'auc': auc,
        'precision': precision,
        'recall': recall,
        'specificity': specificity,
        'f1': f1,
        'confusion_matrix': cm,
        'predictions': pred,
        'threshold': threshold
    }

In [17]:
# Prepare Callbacks
os.makedirs("/kaggle/working/models", exist_ok=True)

def get_callbacks(name):
    return [
        # Stop training when validation loss doesn't improve for 15 epochs
        EarlyStopping(
            monitor='val_loss',
            patience=15,
            restore_best_weights=True,
            verbose=1
        ),
        # Save the best model during training
        ModelCheckpoint(
            f"/kaggle/working/models/{name}.keras",
            save_best_only=True,
            monitor='val_auc',
            mode='max',
            verbose=1
        ),
        # Reduce learning rate when validation loss plateaus
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=5,
            min_lr=1e-6,
            verbose=1
        )
    ]

In [18]:
models_to_train = {
    # "VGG16": VGG16
    # "ResNet50": ResNet50
    "DenseNet121": DenseNet121
    # "EfficientNetB3": EfficientNetB3  
}

In [19]:
# Store training histories and model results
history_dict = {}
model_results = {}
all_trained_models = {}

In [20]:
for name, model_fn in models_to_train.items():
    print(f"\n{'='*50}")
    print(f"Training {name}...")
    print(f"{'='*50}")
    
    # Build model
    model, base_model = build_improved_model(model_fn, name=name)
    
    print(f"Initial training with frozen base layers...")
    
    # Phase 1: Train with frozen base model
    history1 = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=30,  # Initial training epochs
        class_weight=class_weight_dict,
        callbacks=get_callbacks(f"{name}_phase1"),
        verbose=2
    )
    
    # Phase 2: Fine-tuning with unfrozen layers
    print(f"\nFine-tuning with unfrozen layers...")
    model = unfreeze_model(model, base_model, lr=1e-5)
    history2 = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=30,  # Fine-tuning epochs
        class_weight=class_weight_dict,
        callbacks=get_callbacks(f"{name}_phase2"),
        verbose=2
    )
    
    # Find optimal threshold
    print("\nFinding optimal threshold...")
    optimal_threshold = find_optimal_threshold(model, val_ds)
    
    # Evaluate model on test set with optimal threshold
    print("\nEvaluating on test set...")
    test_results = evaluate_with_threshold(model, test_ds, threshold=optimal_threshold)
    model_results[name] = test_results
    
    # Save model
    model.save(f"{name}_trained_model.h5")
    print(f"Saved model: {name}_trained_model.h5")
    
    # Save training history
    combined_history = {
        'phase1': history1.history,
        'phase2': history2.history
    }
    history_dict[name] = combined_history
    with open(f"{name}_history.pkl", "wb") as f:
        pickle.dump(combined_history, f)
    print(f"Saved training history: {name}_history.pkl")
    
    # Store model for ensemble
    all_trained_models[name] = model


Training DenseNet121...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Initial training with frozen base layers...
Epoch 1/30


I0000 00:00:1745486626.509503      95 service.cc:148] XLA service 0x7e87a8003220 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745486626.510138      95 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1745486629.724857      95 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1745486643.278648      95 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.



Epoch 1: val_auc improved from -inf to 0.88923, saving model to /kaggle/working/models/DenseNet121_phase1.keras
555/555 - 115s - 206ms/step - accuracy: 0.6679 - auc: 0.7290 - loss: 0.6524 - precision: 0.7271 - recall: 0.6480 - val_accuracy: 0.7856 - val_auc: 0.8892 - val_loss: 0.4375 - val_precision: 0.8587 - val_recall: 0.7368 - learning_rate: 1.0000e-04
Epoch 2/30

Epoch 2: val_auc improved from 0.88923 to 0.92608, saving model to /kaggle/working/models/DenseNet121_phase1.keras
555/555 - 32s - 58ms/step - accuracy: 0.7641 - auc: 0.8491 - loss: 0.4872 - precision: 0.8081 - recall: 0.7568 - val_accuracy: 0.8408 - val_auc: 0.9261 - val_loss: 0.3593 - val_precision: 0.8935 - val_recall: 0.8112 - learning_rate: 1.0000e-04
Epoch 3/30

Epoch 3: val_auc improved from 0.92608 to 0.94576, saving model to /kaggle/working/models/DenseNet121_phase1.keras
555/555 - 31s - 57ms/step - accuracy: 0.7973 - auc: 0.8855 - loss: 0.4262 - precision: 0.8353 - recall: 0.7930 - val_accuracy: 0.8647 - val_auc