In [None]:
import rasterio
import pandas as pd
from datetime import datetime, timedelta

# ---- Input files ----
ERA5_FILE   = r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_30m_stack_filled_final.tif"
VIIRS_FILE  = r"C:\Users\Ankit\Datasets_Forest_fire\VIIRS_fire_label_2015_2016_30m_filled.tif"
DEM_FILE    = r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM_30m_32644_aligned.tif"
LULC_2015   = r"C:\Users\Ankit\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m_categorical.tif"
LULC_2016   = r"lulc_2016.tif"

# ---- Config ----
START_TIME  = datetime(2015, 1, 1, 0)  # first band timestamp
SEQ_LEN     = 6
HORIZONS    = [1, 2, 3]
OUT_CSV     = "sequence_index.csv"

# ---- Check band counts ----
with rasterio.open(ERA5_FILE) as src:
    n_bands = src.count
with rasterio.open(VIIRS_FILE) as src:
    assert src.count == n_bands, "ERA5 and VIIRS must have the same # of bands"

print(f"ERA5/VIIRS bands: {n_bands} → covers {START_TIME} to {START_TIME + timedelta(hours=n_bands-1)}")

# ---- Build index ----
rows = []
for center_idx in range(SEQ_LEN, n_bands - max(HORIZONS)):
    center_time = START_TIME + timedelta(hours=center_idx)

    # Input sequence bands
    seq_idxs = list(range(center_idx - SEQ_LEN + 1, center_idx + 1))

    # Target bands
    tgt_idxs = [center_idx + h for h in HORIZONS]

    # Pick LULC depending on year
    lulc_file = LULC_2015 if center_time.year == 2015 else LULC_2016

    rows.append({
        "center_time": center_time,
        "seq_band_idxs": seq_idxs,
        "target_band_idxs": tgt_idxs,
        "era5_file": ERA5_FILE,
        "viirs_file": VIIRS_FILE,
        "dem_file": DEM_FILE,
        "lulc_file": lulc_file,
    })

df = pd.DataFrame(rows)
df.to_csv(OUT_CSV, index=False)
print(f"✅ Saved {len(df)} sequences to {OUT_CSV}")
