In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import os
os.environ["TF_ENABLE_LAYOUT_OPTIMIZER"] = "0"
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, BatchNormalization, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import AUC, Precision, Recall

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

def __len__(self):
    return len(self.indices) // self.batch_size 
strategy = tf.distribute.MirroredStrategy()
print("Number of devices:", strategy.num_replicas_in_sync)
print("Done")
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# =========================
# Dual-GPU ConvLSTM Training (Kaggle T4 x2)
# =========================

# --- MUST be set before importing TensorFlow ---
import os
os.environ["TF_ENABLE_LAYOUT_OPTIMIZER"] = "0"   # avoid ConvLSTM graph layout cycle
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"         # quieter logs

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, BatchNormalization, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import AUC

# -------------------------
# Config
# -------------------------
TIME_STEPS = 5
HEIGHT = 224
WIDTH  = 168
BANDS  = 6               # physical bands (before mask)
CHANNELS = 7             # 6 bands + 1 mask
BATCH_SIZE = 8           # effective global batch = BATCH_SIZE; each GPU gets BATCH_SIZE/num_gpus
EPOCHS = 50
LR = 5e-5

X_PATH = "/kaggle/input/cyclone-features-and-labels/Features_data.npy"
Y_PATH = "/kaggle/input/cyclone-features-and-labels/Label_data.npy"

OUT_DIR = "/kaggle/working"
MODEL_PATH = os.path.join(OUT_DIR, "best_convlstm.keras")
NORM_PATH  = os.path.join(OUT_DIR, "norm_stats.npz")

# -------------------------
# Load arrays
# -------------------------
X = np.load(X_PATH, mmap_mode="r")  # (num_days, 6, H, W)
Y = np.load(Y_PATH, mmap_mode="r")  # (num_days,)
num_days = X.shape[0]
print("X:", X.shape, "Y:", Y.shape)

# -------------------------
# Build sequence indices & labels (label = last day of each window)
# -------------------------
seq_start_idxs = np.arange(0, num_days - TIME_STEPS + 1)
seq_labels = Y[TIME_STEPS-1:]   # aligns with window end

# Stratified train/val split at SEQUENCE level (prevents leakage)
train_idx, val_idx, y_train_seq, y_val_seq = train_test_split(
    seq_start_idxs, seq_labels, test_size=0.2, random_state=42, stratify=seq_labels
)
print(f"Train sequences: {len(train_idx)} | Val sequences: {len(val_idx)}")
print(f"Positives in train: {int(y_train_seq.sum())} | Positives in val: {int(y_val_seq.sum())}")

# -------------------------
# Compute per-band normalization stats on TRAINING DAYS ONLY
# -------------------------
train_day_mask = np.zeros(num_days, dtype=bool)
for s in train_idx:
    train_day_mask[s:s+TIME_STEPS] = True

train_days = X[train_day_mask]  # (N_train_days, 6, H, W)
band_means = np.nanmean(train_days, axis=(0,2,3)).astype(np.float32)            # (6,)
band_stds  = np.nanstd(train_days, axis=(0,2,3)).astype(np.float32)
band_stds[band_stds < 1e-3] = 1.0
np.savez(NORM_PATH, band_means=band_means, band_stds=band_stds)
print("Saved normalization stats ->", NORM_PATH)
print("Band means:", band_means)
print("Band stds :", band_stds)

# -------------------------
# Data Generator (sequence level) - multi-GPU safe
# -------------------------
class SeqDataGenerator(Sequence):
    """
    Generates ConvLSTM sequences with:
      - NaN -> 0 (safe replace)
      - z-score per band (using provided means/stds; computed from train only)
      - mask channel (from first band: valid=1, NaN=0)
      - output: (batch, TIME_STEPS, H, W, CHANNELS)
    """
    def __init__(self, X, Y, seq_starts, time_steps, band_means, band_stds,
                 batch_size=8, shuffle=True, drop_remainder=True):
        self.X = X
        self.Y = Y
        self.seq_starts = np.array(seq_starts, dtype=np.int64)
        self.time_steps = time_steps
        self.band_means = band_means.astype(np.float32)
        self.band_stds  = band_stds.astype(np.float32)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.drop_remainder = drop_remainder
        self.indexes = np.arange(len(self.seq_starts))
        self.on_epoch_end()

    def __len__(self):
        if self.drop_remainder:
            return len(self.indexes) // self.batch_size
        return int(np.ceil(len(self.indexes) / self.batch_size))

    def __getitem__(self, idx):
        batch_ids = self.indexes[idx*self.batch_size:(idx+1)*self.batch_size]
        # drop short last batch (important for multi-GPU all-reduce)
        if self.drop_remainder and len(batch_ids) < self.batch_size:
            batch_ids = self.indexes[-self.batch_size:]

        X_batch, Y_batch = [], []
        for bi in batch_ids:
            s = self.seq_starts[bi]
            # seq_x: (T, 6, H, W)
            seq_x = self.X[s:s+self.time_steps].astype(np.float32)

            # mask from FIRST band (True where not NaN)
            m = ~np.isnan(seq_x[:, 0, :, :])                   # (T, H, W) bool
            m = m.astype(np.float32)[:, None, :, :]            # (T, 1, H, W)

            # replace NaNs with 0 (safe)
            seq_x = np.nan_to_num(seq_x, nan=0.0, posinf=0.0, neginf=0.0)

            # z-score per band (broadcast across T,H,W)
            seq_x = (seq_x - self.band_means[None, :, None, None]) / self.band_stds[None, :, None, None]

            # concat mask
            seq_x = np.concatenate([seq_x, m], axis=1)         # (T, 7, H, W)

            # to (T, H, W, C)
            seq_x = np.transpose(seq_x, (0, 2, 3, 1)).astype(np.float32)

            X_batch.append(seq_x)
            Y_batch.append(self.Y[s + self.time_steps - 1])

        Xb = np.stack(X_batch, axis=0)                         # (B, T, H, W, C)
        Yb = np.array(Y_batch, dtype=np.float32)               # (B,)
        # safety checks (catch NaNs before the model)
        if not np.isfinite(Xb).all():
            raise ValueError("Non-finite values in X batch")
        if not np.isfinite(Yb).all():
            raise ValueError("Non-finite values in Y batch")
        return Xb, Yb

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

# -------------------------
# Build Generators
# -------------------------
train_gen = SeqDataGenerator(X, Y, train_idx, TIME_STEPS, band_means, band_stds,
                             batch_size=BATCH_SIZE, shuffle=True,  drop_remainder=True)
val_gen   = SeqDataGenerator(X, Y, val_idx,   TIME_STEPS, band_means, band_stds,
                             batch_size=BATCH_SIZE, shuffle=False, drop_remainder=True)

# -------------------------
# Class Weights (sequence-level) — optional clipping to avoid instability
# -------------------------
cw = compute_class_weight(class_weight='balanced', classes=np.unique(y_train_seq), y=y_train_seq)
class_weights = dict(enumerate(np.minimum(cw, 5.0)))   # clip huge weights
print("Class weights (sequence-level):", class_weights)

# -------------------------
# Multi-GPU Strategy
# -------------------------
strategy = tf.distribute.OneDeviceStrategy("GPU:0")  # uses all visible GPUs
print("GPUs in sync:", strategy.num_replicas_in_sync)

with strategy.scope():
    model = Sequential([
        ConvLSTM2D(filters=32, kernel_size=(3,3), padding="same",
                   return_sequences=True, input_shape=(TIME_STEPS, HEIGHT, WIDTH, CHANNELS)),
        BatchNormalization(),

        ConvLSTM2D(filters=32, kernel_size=(3,3), padding="same",
                   return_sequences=False),
        BatchNormalization(),

        GlobalAveragePooling2D(),
        Dense(128, activation="relu"),
        Dropout(0.3),
        Dense(1, activation="sigmoid")
    ])

    optimizer = Adam(learning_rate=LR, clipnorm=1.0) # grad clipping helps stability
    model.compile(
        optimizer=optimizer,
        loss="binary_crossentropy",
        metrics=[
            "accuracy",
            AUC(curve="ROC", name="auc")
        ]
    )

model.summary()

# -------------------------
# Callbacks (EarlyStopping + Best Model)
# -------------------------
callbacks = [
    EarlyStopping(monitor="val_auc", mode="max", patience=8, restore_best_weights=True, verbose=1),
    ModelCheckpoint(MODEL_PATH, monitor="val_auc", mode="max", save_best_only=True, verbose=1)
]

# -------------------------
# Train
# -------------------------
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    class_weight=class_weights,
    callbacks=callbacks,
    verbose=1
)

print("Best model saved ->", MODEL_PATH)


X: (5479, 6, 224, 168) Y: (5479,)
Train sequences: 4380 | Val sequences: 1095
Positives in train: 345 | Positives in val: 86
Saved normalization stats -> /kaggle/working/norm_stats.npz
Band means: [ 2.8866348e+03  8.2428616e-01  6.8315333e-01 -1.4377959e-02
  9.9217078e+04  1.7229028e-02]
Band stds : [1.20197723e+02 3.43794990e+00 3.12991667e+00 1.15119345e-01
 3.58106348e+03 3.90403694e-03]
Class weights (sequence-level): {0: 0.5427509293680297, 1: 5.0}
GPUs in sync: 1


  super().__init__(**kwargs)


  self._warn_if_super_not_called()


Epoch 1/50


E0000 00:00:1755660845.817249      36 meta_optimizer.cc:966] layout failed: INVALID_ARGUMENT: MutableGraphView::SortTopologically error: detected edge(s) creating cycle(s) {'StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/body/_248/sequential_1/conv_lstm2d_1_2/while/conv_lstm_cell_1/mul_1' -> 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/body/_248/sequential_1/conv_lstm2d_1_2/while/conv_lstm_cell_1/add_7', 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/body/_248/sequential_1/conv_lstm2d_1_2/while/conv_lstm_cell_1/Sigmoid_1' -> 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/body/_248/sequential_1/conv_lstm2d_1_2/while/conv_lstm_cell_1/mul', 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/body/_248/sequential_1/conv_lstm2d_1_2/while/conv_lstm_cell_1/Sigmoid_2' -> 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/body/_248/sequential_1/conv_lstm2d_1_2/while/conv_lstm_cell_1/mul_2', 'StatefulPartitionedCall/sequential

[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 778ms/step - accuracy: 0.8416 - auc: 0.6710 - loss: 0.5837

E0000 00:00:1755661279.353431      36 meta_optimizer.cc:966] layout failed: INVALID_ARGUMENT: MutableGraphView::SortTopologically error: detected edge(s) creating cycle(s) {'Func/StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/body/_76/input/_172' -> 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/body/_76/sequential_1/conv_lstm2d_1_2/while/conv_lstm_cell_1/mul', 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/zeros_switch/_99-0-TransposeNHWCToNCHW-LayoutOptimizer' -> 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/while/StatefulPartitionedCall/sequential_1/conv_lstm2d_1_2/zeros_switch/_99', 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1/while/StatefulPartitionedCall/sequential_1/conv_lstm2d_1/zeros_switch/_56-0-TransposeNHWCToNCHW-LayoutOptimizer' -> 'StatefulPartitionedCall/sequential_1/conv_lstm2d_1/while/StatefulPartitionedCall/sequential_1/conv_lstm2d_1/zeros_switch/_56', 'StatefulPa


Epoch 1: val_auc improved from -inf to 0.88373, saving model to /kaggle/working/best_convlstm.keras
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m471s[0m 844ms/step - accuracy: 0.8416 - auc: 0.6711 - loss: 0.5837 - val_accuracy: 0.9274 - val_auc: 0.8837 - val_loss: 0.3565
Epoch 2/50
[1m359/547[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m2:27[0m 783ms/step - accuracy: 0.8802 - auc: 0.7964 - loss: 0.5315