# Hypoxia Forecasting with LSTM (t+1, t+7, t+10)

Sequence classification using LSTM/GRU for short-horizon hypoxia prediction. This notebook focuses on end-to-end deep learning (TensorFlow/Keras) with sliding windows and horizon-specific targets.

In [None]:
# Imports & Config
import os, numpy as np, pandas as pd
from datetime import timedelta

DATA_CSV = r"C:\\Users\\hafez\\MSU\\Research\\msGOM\\mssound\\bloom\\data\\processed\\hypoxia_timeseries.csv"
DATE_COL, TARGET_COL = 'date', 'label'
GROUP_COLS = ['lat','lon']
FEATURE_COLS = [
    'chlor_a','nflh','poc','sst','Rrs_412','Rrs_443','Rrs_469','Rrs_488',
    'Rrs_531','Rrs_547','Rrs_555','Rrs_645','Rrs_667','Rrs_678'
]

try:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    print('TensorFlow', tf.__version__)
except Exception as e:
    raise SystemExit('Please install TensorFlow (pip install tensorflow) to run this notebook')

# Load
df = pd.read_csv(DATA_CSV)
df[DATE_COL] = pd.to_datetime(df[DATE_COL])
df = df.sort_values([*GROUP_COLS, DATE_COL]).reset_index(drop=True)

# Basic imputation per pixel
df[FEATURE_COLS] = df.groupby(GROUP_COLS)[FEATURE_COLS].apply(lambda g: g.ffill().bfill()).reset_index(level=GROUP_COLS, drop=True)

# Normalization (global min-max)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[[f'{c}_scaled' for c in FEATURE_COLS]] = scaler.fit_transform(df[FEATURE_COLS])
FEATS = [f'{c}_scaled' for c in FEATURE_COLS]

# Build sequences
H_K = {1:2, 7:7, 10:10}

def build_sequences(df_in: pd.DataFrame, k: int, h: int):
    X_list, y_list = [], []
    for _, g in df_in.groupby(GROUP_COLS):
        g = g.sort_values(DATE_COL)
        V = g[FEATS].values
        y_arr = g[TARGET_COL].values.astype('int32')
        for t in range(k, len(g) - h):
            X_list.append(V[t-k:t, :])
            y_list.append(y_arr[t + h])
    return np.stack(X_list, axis=0), np.array(y_list)

# Model factory
def make_lstm(seq_len, n_feat):
    model = keras.Sequential([
        layers.Input(shape=(seq_len, n_feat)),
        layers.Masking(mask_value=0.0),
        layers.LSTM(64, return_sequences=False),
        layers.Dropout(0.2),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=keras.optimizers.Adam(1e-3),
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

results = {}

for h in [1,7,10]:
    k = H_K[h]
    print(f"\n=== LSTM t+{h} with k={k} ===")
    X, y = build_sequences(df, k=k, h=h)
    n = len(y)
    i_val, i_test = int(n*0.6), int(n*0.8)
    X_tr, y_tr = X[:i_val], y[:i_val]
    X_va, y_va = X[i_val:i_test], y[i_val:i_test]
    X_te, y_te = X[i_test:], y[i_test:]

    model = make_lstm(seq_len=k, n_feat=X.shape[-1])
    cb = [keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)]
    hist = model.fit(X_tr, y_tr, validation_data=(X_va, y_va),
                     epochs=30, batch_size=256, callbacks=cb, verbose=1)

    y_prob = model.predict(X_te, verbose=0).ravel()

    # Metrics
    from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
    y_pred = (y_prob >= 0.5).astype(int)
    metrics = {
        'accuracy': float(accuracy_score(y_te, y_pred)),
        'precision': float(precision_score(y_te, y_pred, zero_division=0)),
        'recall': float(recall_score(y_te, y_pred, zero_division=0)),
        'f1': float(f1_score(y_te, y_pred, zero_division=0)),
        'roc_auc': float(roc_auc_score(y_te, y_prob)) if len(np.unique(y_te))>1 else None
    }
    print('Test metrics:', metrics)

    # Save
    out_dir = os.path.dirname(DATA_CSV)
    save_path = os.path.join(out_dir, f'lstm_t+{h}.keras')
    model.save(save_path)
    results[h] = metrics

print('\nSummary:', results)