In [1]:
import rasterio
import pandas as pd
from datetime import datetime, timedelta

# ---- Input files ----
ERA5_FILES = {
    "t2m": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_t2m_2015_2016_stack.tif",
    "d2m": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_d2m_2015_2016_stack.tif",
    "tp":  r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_tp_2015_2016_stack.tif",
    "u10": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_u10_2015_2016_stack.tif",
    "v10": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_v10_2015_2016_stack.tif",
}
VIIRS_FILE  = r"C:\Users\Ankit\Datasets_Forest_fire\VIIRS_fire_time_stack.tif"
DEM_FILE    = r"C:\Users\Ankit\Datasets_Forest_fire\merged_DEM_30m_32644_aligned_filled.tif"
LULC_2015   = r"C:\Users\Ankit\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m_filled_categorical.tif"
LULC_2016   = r"C:\Users\Ankit\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m_filled_categorical.tif"

# ---- Config ----
START_TIME  = datetime(2015, 1, 1, 0)
SEQ_LEN     = 6
HORIZONS    = [1, 2, 3]
OUT_CSV     = r"C:\Users\Ankit\Datasets_Forest_fire\sequence0_index.csv"

# ---- Get number of time steps from one ERA5 variable (multi-band TIFF) ----
with rasterio.open(ERA5_FILES["t2m"]) as src:
    n_bands = src.count
    height, width = src.height, src.width

print(f"ERA5 variables cover {n_bands} timesteps")

# ---- Build index ----
rows = []
for center_idx in range(SEQ_LEN, n_bands - max(HORIZONS)):
    center_time = START_TIME + timedelta(minutes=30*center_idx)  # 30m resolution

    # Input sequence indices
    seq_idxs = list(range(center_idx - SEQ_LEN + 1, center_idx + 1))

    # Target indices
    tgt_idxs = [center_idx + h for h in HORIZONS]

    # LULC depending on year
    lulc_file = LULC_2015 if center_time.year == 2015 else LULC_2016

    rows.append({
        "center_time": center_time,
        "seq_band_idxs": seq_idxs,
        "target_band_idxs": tgt_idxs,
        "era5_t2m_file": ERA5_FILES["t2m"],
        "era5_d2m_file": ERA5_FILES["d2m"],
        "era5_tp_file":  ERA5_FILES["tp"],
        "era5_u10_file": ERA5_FILES["u10"],
        "era5_v10_file": ERA5_FILES["v10"],
        "viirs_file": VIIRS_FILE,
        "dem_file": DEM_FILE,
        "lulc_file": lulc_file,
    })

df = pd.DataFrame(rows)
df.to_csv(OUT_CSV, index=False)
print(f"✅ Saved {len(df)} sequences to {OUT_CSV}")


ERA5 variables cover 17544 timesteps
✅ Saved 17535 sequences to C:\Users\Ankit\Datasets_Forest_fire\sequence0_index.csv
