In [1]:
# resnet_angle_gradcam_pipeline.py
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# -------------------------
# 0. Settings
# -------------------------
IMG_SMALL = (75,75)
IMG_SIZE = (224,224)   # for ResNet
BATCH = 32
EPOCHS = 20
PATIENCE = 5
MODEL_PATH = "best_resnet_angle.keras"   # native Keras format

# -------------------------
# 1. Load data (train.json + test.json expected)
# -------------------------
train_df = pd.read_json("train.json")
test_df  = pd.read_json("test.json")

# numeric angles, fill NA with median from train
train_df["inc_angle"] = pd.to_numeric(train_df["inc_angle"], errors="coerce")
angle_median = train_df["inc_angle"].median()
train_df["inc_angle"] = train_df["inc_angle"].fillna(angle_median)

test_df["inc_angle"] = pd.to_numeric(test_df["inc_angle"], errors="coerce")
test_df["inc_angle"] = test_df["inc_angle"].fillna(angle_median)

def build_images(df):
    imgs = []
    for _, r in df.iterrows():
        b1 = np.array(r["band_1"], dtype=np.float32).reshape(*IMG_SMALL)
        b2 = np.array(r["band_2"], dtype=np.float32).reshape(*IMG_SMALL)
        c3 = 0.5 * (b1 + b2)
        img = np.stack([b1, b2, c3], axis=-1)   # (75,75,3)
        imgs.append(img)
    return np.stack(imgs, axis=0)

X = build_images(train_df)
X_test = build_images(test_df)
angles = train_df["inc_angle"].values.astype(np.float32).reshape(-1,1)
angles_test = test_df["inc_angle"].values.astype(np.float32).reshape(-1,1)
y = train_df["is_iceberg"].values.astype(np.int32)

# -------------------------
# 2. Train/val split (stratified)
# -------------------------
X_train, X_val, ang_train, ang_val, y_train, y_val = train_test_split(
    X, angles, y, test_size=0.2, random_state=SEED, stratify=y
)

print("Shapes:", X_train.shape, X_val.shape, y_train.sum(), y_val.sum())

# -------------------------
# 3. tf.data pipeline
#    - Per-sample min-max scaling -> [0,255] -> resize -> preprocess_input
# -------------------------
AUTOTUNE = tf.data.AUTOTUNE

def preprocess_image(image):
    image = tf.cast(image, tf.float32)
    minv = tf.reduce_min(image, axis=(0,1), keepdims=True)
    maxv = tf.reduce_max(image, axis=(0,1), keepdims=True)
    image = (image - minv) / (maxv - minv + 1e-6)
    image = image * 255.0
    image = tf.image.resize(image, IMG_SIZE, method='bilinear')
    return preprocess_input(image)

def preprocess_fn(image, angle, label=None, augment=False):
    image = preprocess_image(image)
    if augment:
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)
        # random rotation k * 90 degrees is OK for SAR; keep rotation steps simple
        k = tf.random.uniform([], 0, 4, dtype=tf.int32)
        image = tf.image.rot90(image, k=k)
        image = tf.image.random_brightness(image, max_delta=0.03)
    angle = tf.cast(angle, tf.float32)
    if label is None:
        return (image, angle)
    return (image, angle), label

def make_dataset(Xa, angles_a, ya=None, batch=BATCH, shuffle=False, augment=False):
    if ya is None:
        ds = tf.data.Dataset.from_tensor_slices((Xa, angles_a))
        ds = ds.map(lambda a,b: preprocess_fn(a,b,label=None, augment=False), num_parallel_calls=AUTOTUNE)
        return ds.batch(batch).prefetch(AUTOTUNE)
    ds = tf.data.Dataset.from_tensor_slices((Xa, angles_a, ya))
    if shuffle:
        ds = ds.shuffle(2048, seed=SEED)
    ds = ds.map(lambda a,b,c: preprocess_fn(a,b,label=c, augment=augment), num_parallel_calls=AUTOTUNE)
    return ds.batch(batch).prefetch(AUTOTUNE)

train_ds = make_dataset(X_train, ang_train, y_train, batch=BATCH, shuffle=True, augment=True)
val_ds   = make_dataset(X_val,   ang_val,   y_val,   batch=BATCH, shuffle=False, augment=False)
test_ds  = make_dataset(X_test,  angles_test, ya=None, batch=BATCH)

# -------------------------
# 4. Build model: ResNet50V2 backbone (include_top=False) + angle MLP fusion
# -------------------------
def find_last_conv_layer(model):
    # return name of last Conv2D layer in model
    for layer in reversed(model.layers):
        if isinstance(layer, tf.keras.layers.Conv2D):
            return layer.name
    return None

def build_resnet_angle_model(freeze_backbone=True, angle_mlp_size=32):
    img_in = layers.Input(shape=(*IMG_SIZE,3), name="image")
    angle_in = layers.Input(shape=(1,), name="inc_angle")

    backbone = ResNet50V2(include_top=False, weights="imagenet", input_tensor=img_in)
    if freeze_backbone:
        backbone.trainable = False

    # get conv output (feature map) and pooled feature vector
    # pooling=avg to get features, but we also keep conv map (we will re-access the backbone conv layer later)
    x = backbone.output   # conv feature map (H x W x C)
    gap = layers.GlobalAveragePooling2D(name="backbone_gap")(x)   # vector

    # angle MLP
    a = layers.Dense(64, activation="relu", name="angle_dense1")(angle_in)
    a = layers.Dense(angle_mlp_size, activation="relu", name="angle_dense2")(a)

    fused = layers.Concatenate(name="concat_features")([gap, a])
    h = layers.Dense(128, activation="relu", name="head_dense1")(fused)
    h = layers.Dropout(0.4, name="head_dropout")(h)
    h = layers.Dense(64, activation="relu", name="head_dense2")(h)
    out = layers.Dense(1, activation="sigmoid", name="out")(h)

    model = models.Model(inputs=[img_in, angle_in], outputs=out, name="resnet50v2_angle")
    # keep handle to backbone and last conv name
    last_conv_name = find_last_conv_layer(backbone)
    return model, backbone, last_conv_name

model, backbone, last_conv_name = build_resnet_angle_model(freeze_backbone=True)
print("Built model; last conv layer inside backbone:", last_conv_name)
model.summary()

# -------------------------
# 5. Compile + callbacks (native Keras .keras format)
# -------------------------
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="binary_crossentropy",
    metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy"), tf.keras.metrics.AUC(name="auc")]
)

checkpoint = callbacks.ModelCheckpoint(MODEL_PATH, monitor="val_auc", mode="max",
                                       save_best_only=True, save_weights_only=False)
es = callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=PATIENCE, restore_best_weights=True)
rlr = callbacks.ReduceLROnPlateau(monitor="val_auc", mode="max", factor=0.5, patience=3, min_lr=1e-7)

class_weights = compute_class_weight("balanced", classes=np.unique(y_train), y=y_train)
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}
print("class weights:", class_weight_dict)

# -------------------------
# 6. Train (short; you may increase epochs)
# -------------------------
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    class_weight=class_weight_dict,
    callbacks=[checkpoint, es, rlr]
)

# optional fine-tuning
backbone.trainable = True
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5),
              loss="binary_crossentropy",
              metrics=[tf.keras.metrics.BinaryAccuracy(name="accuracy"), tf.keras.metrics.AUC(name="auc")])
history_ft = model.fit(train_ds, validation_data=val_ds, epochs=5, class_weight=class_weight_dict, callbacks=[checkpoint, es, rlr])

# -------------------------
# 7. Evaluate on validation
# -------------------------
best = tf.keras.models.load_model(MODEL_PATH, compile=False)
y_true = []
y_prob = []
for (img_batch, ang_batch), label_batch in val_ds:
    p = best.predict_on_batch([img_batch, ang_batch])
    y_prob.append(p.ravel())
    y_true.append(label_batch.numpy())
y_prob = np.concatenate(y_prob)
y_true = np.concatenate(y_true)
y_pred = (y_prob >= 0.5).astype(int)

print("Val ROC AUC:", roc_auc_score(y_true, y_prob))
print(classification_report(y_true, y_pred, digits=4))
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))

2025-12-03 20:29:17.867334: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Shapes: (1283, 75, 75, 3) (321, 75, 75, 3) 602 151


I0000 00:00:1764811776.690093  257100 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22320 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:c1:00.0, compute capability: 8.6


Built model; last conv layer inside backbone: conv5_block3_3_conv


class weights: {0: np.float64(0.9419970631424376), 1: np.float64(1.0656146179401993)}
Epoch 1/20


2025-12-03 20:29:46.043982: I external/local_xla/xla/service/service.cc:163] XLA service 0x7ff96c012fa0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-12-03 20:29:46.044012: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 3090, Compute Capability 8.6
2025-12-03 20:29:46.239422: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-12-03 20:29:47.406216: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91002
2025-12-03 20:29:47.649553: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-12-03 20:29:47.649665: I e

[1m 1/41[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9:53[0m 15s/step - accuracy: 0.6562 - auc: 0.6190 - loss: 0.6960

I0000 00:00:1764811796.646926  257212 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step - accuracy: 0.6033 - auc: 0.6101 - loss: 0.6846

2025-12-03 20:30:06.707459: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.



[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 422ms/step - accuracy: 0.6267 - auc: 0.6825 - loss: 0.6368 - val_accuracy: 0.7508 - val_auc: 0.8316 - val_loss: 0.5272 - learning_rate: 1.0000e-04
Epoch 2/20
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - accuracy: 0.7054 - auc: 0.7997 - loss: 0.5342 - val_accuracy: 0.7477 - val_auc: 0.8471 - val_loss: 0.4819 - learning_rate: 1.0000e-04
Epoch 3/20
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - accuracy: 0.7334 - auc: 0.8368 - loss: 0.4914 - val_accuracy: 0.7664 - val_auc: 0.8610 - val_loss: 0.4569 - learning_rate: 1.0000e-04
Epoch 4/20
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - accuracy: 0.7451 - auc: 0.8529 - loss: 0.4666 - val_accuracy: 0.7757 - val_auc: 0.8665 - val_loss: 0.4467 - learning_rate: 1.0000e-04
Epoch 5/20
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - accuracy: 0.7568 - auc: 0.8677 - loss:

2025-12-03 20:32:15.269804: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [3]:
def make_gradcam_heatmap(image_tensor, angle_tensor, model, last_conv_layer_name, eps=1e-8):
    """
    image_tensor: np array shape (1, H, W, 3) already preprocessed with preprocess_input
    angle_tensor: np array shape (1,1)
    model: the full model
    last_conv_layer_name: name of conv layer to use
    returns: heatmap resized to IMG_SIZE
    """
    # Build a model that maps model inputs -> (last_conv_output, model.output)
    last_conv_layer = model.get_layer(last_conv_layer_name)
    grad_model = tf.keras.models.Model(
        inputs=model.inputs,
        outputs=[last_conv_layer.output, model.output]
    )

    image_tensor = tf.convert_to_tensor(image_tensor)
    angle_tensor = tf.convert_to_tensor(angle_tensor)

    with tf.GradientTape() as tape:
        # enable watch on conv outputs internally
        tape.watch(image_tensor)
        conv_outputs, predictions = grad_model([image_tensor, angle_tensor])
        # we want the score for the positive class (scalar)
        loss = predictions[:, 0]

    grads = tape.gradient(loss, conv_outputs)  # shape (1, h, w, c)
    # channel-wise mean of gradients
    weights = tf.reduce_mean(grads, axis=(1,2))  # (1, channels)
    cam = tf.reduce_sum(tf.multiply(tf.expand_dims(tf.expand_dims(weights,1),1), conv_outputs), axis=-1)  # (1,h,w)
    cam = tf.squeeze(cam).numpy()
    cam = np.maximum(cam, 0)
    cam = cam - cam.min()
    cam = cam / (cam.max() + eps)
    # resize to original display size
    cam_resized = tf.image.resize(cam[..., np.newaxis], IMG_SIZE).numpy().squeeze()
    return cam_resized

def overlay_heatmap_on_image(orig_image_preprocessed, heatmap, alpha=0.4, cmap="jet"):
    # orig_image_preprocessed: preprocessed input (H,W,3) float32
    img = orig_image_preprocessed.copy()
    # scale for visualization 0..1
    img_viz = (img - img.min()) / (img.max() - img.min() + 1e-9)
    cmap_fn = plt.get_cmap(cmap)
    heat_rgba = cmap_fn(heatmap)
    heat_rgb = heat_rgba[..., :3]
    over = img_viz * (1-alpha) + heat_rgb * alpha
    over = np.clip(over, 0, 1)
    return over

# -------------------------------------------------------
# Create a Grad-CAM for an example validation image
# -------------------------------------------------------
# pick an index that exists
idx = 5
raw_img = X_val[idx:idx+1]         # shape (1,75,75,3)
raw_ang = ang_val[idx:idx+1]       # shape (1,1)

# preprocess exactly as in pipeline (min-max -> 0..255 -> resize -> preprocess_input)
def preprocess_for_model(single_img):
    img = single_img.astype(np.float32)
    minv = img.min(axis=(0,1), keepdims=True)
    maxv = img.max(axis=(0,1), keepdims=True)
    img = (img - minv) / (maxv - minv + 1e-6)
    img = img * 255.0
    img = tf.image.resize(img, IMG_SIZE, method='bilinear').numpy()
    img = preprocess_input(img)
    return img

img_proc = preprocess_for_model(raw_img[0])        # (224,224,3)
img_proc_batch = np.expand_dims(img_proc, axis=0)  # (1,224,224,3)
ang_batch = raw_ang.astype(np.float32)             # (1,1)

heatmap = make_gradcam_heatmap(img_proc_batch, ang_batch, best, last_conv_name)
overlay = overlay_heatmap_on_image(img_proc, heatmap)

plt.imsave("gradcam_overlay.png", overlay)
print("Saved gradcam_overlay.png")

Saved gradcam_overlay.png


In [4]:
def draw_resnet_angle_diagram(savepath="resnet_angle_diagram.png"):
    fig, ax = plt.subplots(figsize=(9,4))
    ax.axis("off")

    backbone_box = plt.Rectangle((0.05, 0.25), 0.36, 0.55, fill=True, color="#c6def6", ec="k")
    ax.add_patch(backbone_box)
    ax.text(0.225, 0.6, "ResNet50V2\n(backbone convs)", ha="center", va="center", fontsize=12)

    ax.add_patch(plt.Rectangle((0.44, 0.62), 0.18, 0.18, fill=True, color="#d5f5e3", ec="k"))
    ax.text(0.53, 0.71, "Global\nAvgPool\n(features)", ha="center", va="center", fontsize=10)

    ax.add_patch(plt.Rectangle((0.44, 0.28), 0.18, 0.18, fill=True, color="#fdebd0", ec="k"))
    ax.text(0.53, 0.37, "Incidence\nAngle\nMLP", ha="center", va="center", fontsize=10)

    ax.add_patch(plt.Rectangle((0.66, 0.36), 0.30, 0.30, fill=True, color="#f9e79f", ec="k"))
    ax.text(0.81, 0.5, "Concatenate\nDense head\n(128 -> 64 -> 1)", ha="center", va="center", fontsize=10)

    ax.annotate("", xy=(0.41,0.7), xytext=(0.44,0.7), arrowprops=dict(arrowstyle="->", lw=2))
    ax.annotate("", xy=(0.41,0.37), xytext=(0.44,0.37), arrowprops=dict(arrowstyle="->", lw=2))
    ax.annotate("", xy=(0.62,0.5), xytext=(0.66,0.5), arrowprops=dict(arrowstyle="->", lw=2))

    ax.text(0.02, 0.82, "Input: 75×75 SAR chips\n(band1, band2, avg)", fontsize=9)
    ax.text(0.02, 0.18, "Input: Incidence angle (scalar)", fontsize=9)

    plt.savefig(savepath, dpi=200, bbox_inches="tight")
    plt.close(fig)
    print("Saved diagram to", savepath)

draw_resnet_angle_diagram()

Saved diagram to resnet_angle_diagram.png
