In [None]:
############################
# 1) SETUP AND IMPORTS
############################
!pip install focal-loss --quiet  # (Optional) if you want to try focal loss later

from google.colab import drive
drive.mount('/content/drive')

import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.densenet import DenseNet121, preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow as tf


############################
# 2) UNZIP CHEXPERT DATASET
############################
# Make sure you have "archive.zip" (the CheXpert dataset) in your Drive
!unzip -q "/content/drive/MyDrive/archive.zip" -d /content/chexpert

# Check folder
print("Files in /content/chexpert:")
print(os.listdir('/content/chexpert'))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
replace /content/chexpert/train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: Files in /content/chexpert:
['valid.csv', 'valid', 'train.csv', 'train']


In [None]:
############################
# 3) LOAD & INSPECT DATA
############################
train_csv_path = "/content/chexpert/train.csv"  # Adjust if needed
df = pd.read_csv(train_csv_path)
print("Total rows in train.csv:", len(df))

# All 14 CheXpert labels (for reference)
all_labels = [
    "No Finding","Enlarged Cardiomediastinum","Cardiomegaly","Lung Opacity","Lung Lesion",
    "Edema","Consolidation","Pneumonia","Atelectasis","Pneumothorax",
    "Pleural Effusion","Pleural Other","Fracture","Support Devices"
]

# OPTIONAL: quick distribution check
print("Label distribution (raw counts of 1, 0, -1, NaN) for each label:")
label_counts = {}
for label in all_labels:
    col = df[label]
    counts = {
        "1.0": (col == 1.0).sum(),
        "0.0": (col == 0.0).sum(),
        "-1.0": (col == -1.0).sum(),
        "NaN": col.isna().sum()
    }
    label_counts[label] = counts
label_counts_df = pd.DataFrame(label_counts).T
label_counts_df.columns = ["Count_1.0", "Count_0.0", "Count_-1.0", "Count_NaN"]
display(label_counts_df)

Total rows in train.csv: 223414
Label distribution (raw counts of 1, 0, -1, NaN) for each label:


Unnamed: 0,Count_1.0,Count_0.0,Count_-1.0,Count_NaN
No Finding,22381,0,0,201033
Enlarged Cardiomediastinum,10798,21638,12403,178575
Cardiomegaly,27000,11116,8087,177211
Lung Opacity,105581,6599,5598,105636
Lung Lesion,9186,1270,1488,211470
Edema,52246,20726,12984,137458
Consolidation,14783,28097,27742,152792
Pneumonia,6039,2799,18770,195806
Atelectasis,33376,1328,33739,154971
Pneumothorax,19448,56341,3145,144480


In [None]:


############################
# 4) FILTER DATA & DEFINE LABELS
############################
# Load metadata
csv_path = "/content/chexpert/train.csv"
df = pd.read_csv(csv_path)

# Add full image path
df["filepath"] = df["Path"].apply(lambda x: str(Path("/content/chexpert") / Path(x).relative_to("CheXpert-v1.0-small")))

# Filter to frontal images only
df = df[df["Frontal/Lateral"] == "Frontal"]

# Select diagnoses
target_labels = ["Pleural Effusion", "Edema", "Cardiomegaly"] # Tried a few times, going for this combo due to fair amount of data and balance.

# Replace -1 with 0 (treat uncertainty as negative)
df[target_labels] = df[target_labels].replace(-1.0, 0.0)

# Drop rows where any of the selected labels are NaN
df = df.dropna(subset=target_labels)

# Drop rows where image doesn't exist
df = df[df["filepath"].apply(lambda x: Path(x).is_file())]

# Show filtered shape and label distributions
print("Filtered shape:", df.shape)
display(df[target_labels].apply(pd.Series.value_counts).fillna(0).astype(int))

############################
# 5) SPLIT INTO TRAIN/VAL
############################
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42
).copy()

print(f"Train set shape: {train_df.shape}")
print(f"Validation set shape: {val_df.shape}")



Filtered shape: (13180, 20)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[target_labels] = df[target_labels].replace(-1.0, 0.0)


Unnamed: 0,Pleural Effusion,Edema,Cardiomegaly
1.0,8745,8009,9278
0.0,4435,5171,3902


Train set shape: (10544, 20)
Validation set shape: (2636, 20)


In [None]:
############################
# 6) IMAGE DATA GENERATORS
############################
# We will use color mode = "rgb" so that our DenseNet (pretrained on ImageNet) can handle 3 channels.
# You can replicate grayscale images into three channels, but typically just letting Keras load them as RGB is fine.
IMG_SIZE = (256, 256)
BATCH_SIZE = 16

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # Use DenseNet's built-in preprocessor
    rotation_range=10,
    width_shift_range=0.05,
    height_shift_range=0.05,
    zoom_range=0.05,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepath',
    y_col=target_labels,
    target_size=IMG_SIZE,
    color_mode='rgb',
    class_mode='raw',  # multi-label => 'raw'
    batch_size=BATCH_SIZE,
    shuffle=True
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='filepath',
    y_col=target_labels,
    target_size=IMG_SIZE,
    color_mode='rgb',
    class_mode='raw',
    batch_size=BATCH_SIZE,
    shuffle=False
)

Found 10544 validated image filenames.
Found 2636 validated image filenames.


In [None]:
############################
# 7) BUILD PRETRAINED DENSENET
############################
def create_densenet_model(input_shape=(256, 256, 3), num_classes=3, freeze_until=None):
    """
    freeze_until: layer name or index until which layers are frozen (optional).
    """
    base_model = DenseNet121(include_top=False, weights='imagenet', input_shape=input_shape)

    # OPTIONAL: Freeze some or all layers of base_model. For example:
    # if freeze_until is not None:
    #     trainable = False
    #     for layer in base_model.layers:
    #         if layer.name == freeze_until:
    #             trainable = True
    #         layer.trainable = trainable
    # else:
    #     # freeze entire base_model
    #     for layer in base_model.layers:
    #         layer.trainable = False

    # Add new top layers
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dropout(0.3)(x)
    outputs = Dense(num_classes, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=outputs)
    # If you want to try focal loss, do:
    # from focal_loss import BinaryFocalLoss
    # model.compile(optimizer=Adam(1e-4), loss=BinaryFocalLoss(gamma=2), metrics=['accuracy'])
    model.compile(
        optimizer=Adam(learning_rate=1e-4),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

model = create_densenet_model(input_shape=(256, 256, 3), num_classes=len(target_labels))
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
############################
# 7.5) RESUME FROM LATEST CHECKPOINT (if exists)
############################
from tensorflow.keras.models import load_model
import glob

checkpoint_dir = "/content/drive/MyDrive/chexpert_checkpoints"
latest_checkpoint = None

# Find all saved models
checkpoint_files = sorted(
    glob.glob(os.path.join(checkpoint_dir, "*.keras")),
    key=os.path.getmtime  # sort by modification time
)

if checkpoint_files:
    latest_checkpoint = checkpoint_files[-1]
    print(f"🔁 Resuming from latest checkpoint:\n{latest_checkpoint}")
    model = load_model(latest_checkpoint)
else:
    print("🆕 No checkpoint found, starting training from scratch.")


In [None]:

############################
# 8) TRAIN MODEL (WITH CHECKPOINTING)
############################
from tensorflow.keras.callbacks import ModelCheckpoint
import os

# Create a folder in your Google Drive to store the checkpoints
checkpoint_dir = "/content/drive/MyDrive/chexpert_checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)

# Save model after every epoch
checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, "epoch_{epoch:02d}-val_loss_{val_loss:.4f}.keras"),
    monitor='val_loss',
    save_best_only=False,       # Save every epoch
    save_weights_only=False,    # Save full model
    verbose=1
)

# Full callbacks list
callbacks = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1),
    checkpoint_callback  # Added checkpointing here
]

# Start training
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    callbacks=callbacks
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m659/659[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15s/step - accuracy: 0.4159 - loss: 0.5929 
Epoch 1: saving model to /content/drive/MyDrive/chexpert_checkpoints/epoch_01-val_loss_0.4826.keras
[1m659/659[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10681s[0m 16s/step - accuracy: 0.4160 - loss: 0.5929 - val_accuracy: 0.4431 - val_loss: 0.4826 - learning_rate: 1.0000e-04
Epoch 2/10
[1m659/659[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15s/step - accuracy: 0.5156 - loss: 0.4998 
Epoch 2: saving model to /content/drive/MyDrive/chexpert_checkpoints/epoch_02-val_loss_0.4749.keras
[1m659/659[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10497s[0m 16s/step - accuracy: 0.5156 - loss: 0.4998 - val_accuracy: 0.7204 - val_loss: 0.4749 - learning_rate: 1.0000e-04
Epoch 3/10
[1m659/659[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15s/step - accuracy: 0.5129 - loss: 0.4769 
Epoch 3: saving model to /content/drive/MyDrive/chexpert_checkpoints/ep

In [None]:
############################
# 9) EVALUATE MODEL
############################
# Evaluate final model on val set
val_loss, val_acc = model.evaluate(val_generator)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_acc:.4f}")

# Predict probabilities
y_pred_prob = model.predict(val_generator)
y_true = val_generator.labels  # ground truth

# Binarize with threshold=0.5
y_pred = (y_pred_prob > 0.5).astype(int)

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=target_labels))

# AUC for each class
print("AUC Scores per Label:")
for i, label in enumerate(target_labels):
    auc_score = roc_auc_score(y_true[:, i], y_pred_prob[:, i])
    print(f"{label}: {auc_score:.4f}")

[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m489s[0m 3s/step - accuracy: 0.6367 - loss: 0.4491
Validation Loss: 0.4488
Validation Accuracy: 0.6396
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m499s[0m 3s/step

Classification Report:
                  precision    recall  f1-score   support

Pleural Effusion       0.85      0.94      0.90      1749
           Edema       0.78      0.91      0.84      1628
    Cardiomegaly       0.81      0.88      0.85      1887

       micro avg       0.82      0.91      0.86      5264
       macro avg       0.82      0.91      0.86      5264
    weighted avg       0.82      0.91      0.86      5264
     samples avg       0.68      0.77      0.70      5264

AUC Scores per Label:
Pleural Effusion: 0.8982
Edema: 0.8390
Cardiomegaly: 0.7810


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
############################
# 10) PLOT ROC CURVES
############################
plt.figure(figsize=(7,6))
for i, label in enumerate(target_labels):
    fpr, tpr, _ = roc_curve(y_true[:, i], y_pred_prob[:, i])
    auc_score = roc_auc_score(y_true[:, i], y_pred_prob[:, i])
    plt.plot(fpr, tpr, label=f"{label} (AUC={auc_score:.2f})")
plt.plot([0,1], [0,1], 'k--')
plt.title("ROC Curves")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc='lower right')
plt.grid(True)
plt.show()

In [None]:
############################
# 11) CONFUSION MATRIX
############################
for i, label in enumerate(target_labels):
    cm = confusion_matrix(y_true[:, i], y_pred[:, i])
    plt.figure(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", cbar=False)
    plt.title(f"Confusion Matrix for {label}")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()


In [None]:

############################
# 12) GRAD-CAM DEMO
############################
# Example Grad-CAM for a single image from the validation set
import cv2
import numpy as np

def plot_gradcam(model, img_path, layer_name, label_index=0):
    """
    Generates Grad-CAM heatmap for a single label_index on a single image.
    model: your trained model
    img_path: path to an image
    layer_name: name of the last conv layer in DenseNet (e.g., 'conv5_block16_concat')
    label_index: which label (0.. num_classes-1) to visualize
    """
    # 1) Load & preprocess image
    img = tf.keras.utils.load_img(img_path, target_size=IMG_SIZE)
    x = tf.keras.utils.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)  # DenseNet's preprocess

    # 2) Forward pass
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(layer_name).output, model.output]
    )
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(x)
        loss = predictions[:, label_index]  # focusing on specific label

    # 3) Compute gradients
    grads = tape.gradient(loss, conv_outputs)
    # Global average pooling across spatial dimensions
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # 4) Weight the channels by corresponding gradients
    conv_outputs = conv_outputs[0]          # shape: (H, W, Channels)
    pooled_grads = pooled_grads            # shape: (Channels,)
    conv_outputs = conv_outputs * pooled_grads
    heatmap = tf.reduce_mean(conv_outputs, axis=-1)  # average out channels -> shape: (H, W)

    # 5) Normalize the heatmap for visualization
    heatmap = np.maximum(heatmap, 0) / (tf.math.reduce_max(heatmap) + 1e-8)
    heatmap = heatmap.numpy()

    # 6) Superimpose on original image
    # Load original image in openCV
    img_orig = cv2.imread(img_path)
    img_orig = cv2.resize(img_orig, IMG_SIZE)

    heatmap = cv2.resize(heatmap, (IMG_SIZE[1], IMG_SIZE[0]))
    heatmap = np.uint8(255 * heatmap)

    heatmap_color = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    superimposed_img = cv2.addWeighted(heatmap_color, 0.4, img_orig, 0.6, 0)

    # 7) Display side by side
    fig, axes = plt.subplots(1, 2, figsize=(10, 4))
    axes[0].imshow(cv2.cvtColor(img_orig, cv2.COLOR_BGR2RGB))
    axes[0].set_title("Original Image")
    axes[0].axis("off")

    axes[1].imshow(cv2.cvtColor(superimposed_img, cv2.COLOR_BGR2RGB))
    axes[1].set_title(f"Grad-CAM for label: {target_labels[label_index]}")
    axes[1].axis("off")

    plt.tight_layout()
    plt.show()

# Pick a random validation sample
sample_idx = np.random.randint(len(val_df))
sample_path = val_df.iloc[sample_idx]["filepath"]
print(f"Showing Grad-CAM for:\n{sample_path}")

# The last DenseNet layer you can target is typically "conv5_block16_concat" or check model.summary()
plot_gradcam(model, sample_path, layer_name='conv5_block16_concat', label_index=0)
