In [1]:
import xarray as xr
import rasterio
from rasterio.transform import from_origin
import numpy as np
import os

# ---- Input / Output ----
ERA5_NC = r"C:\Users\Ankit\Datasets_Forest_fire\compressed_cleaned_era5_2015_2016.nc"
OUT_DIR = r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks"
os.makedirs(OUT_DIR, exist_ok=True)

# ---- Load dataset ----
ds = xr.open_dataset(ERA5_NC)

time_dim = "valid_time"
lat_dim = "latitude"
lon_dim = "longitude"

times = ds[time_dim].values
lat = ds[lat_dim].values
lon = ds[lon_dim].values

# ---- Ensure north-up orientation ----
if lat[0] > lat[-1]:
    ds = ds.reindex({lat_dim: list(reversed(lat))})
    lat = ds[lat_dim].values

# ---- Create transform (affine) ----
res_lon = abs(lon[1] - lon[0])
res_lat = abs(lat[1] - lat[0])
transform = from_origin(lon.min(), lat.max(), res_lon, res_lat)

# ---- Loop through all variables ----
for var in ds.data_vars:
    da = ds[var]  # DataArray
    print(f"Processing {var} with dims {da.dims}")

    # Load all timesteps at once → shape: (time, lat, lon)
    arr = da.values.astype("float32")

    n_bands = arr.shape[0]
    profile = {
        "driver": "GTiff",
        "height": arr.shape[1],
        "width": arr.shape[2],
        "count": n_bands,
        "dtype": "float32",
        "crs": "EPSG:4326",
        "transform": transform,
        "tiled": True,            # enable tiling for faster IO
        "compress": "deflate"     # compression to save space
    }

    out_tif = os.path.join(OUT_DIR, f"ERA5_{var}_2015_2016_stack.tif")

    with rasterio.open(out_tif, "w", **profile) as dst:
        dst.write(arr)  # write full time series in one shot

    print(f"✅ Saved {var} to {out_tif} ({n_bands} time bands)")


Processing t2m with dims ('valid_time', 'latitude', 'longitude')
✅ Saved t2m to C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_t2m_2015_2016_stack.tif (17544 time bands)
Processing d2m with dims ('valid_time', 'latitude', 'longitude')
✅ Saved d2m to C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_d2m_2015_2016_stack.tif (17544 time bands)
Processing u10 with dims ('valid_time', 'latitude', 'longitude')
✅ Saved u10 to C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_u10_2015_2016_stack.tif (17544 time bands)
Processing v10 with dims ('valid_time', 'latitude', 'longitude')
✅ Saved v10 to C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_v10_2015_2016_stack.tif (17544 time bands)
Processing tp with dims ('valid_time', 'latitude', 'longitude')
✅ Saved tp to C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_tp_2015_2016_stack.tif (17544 time bands)


In [3]:
print("Dims:", da.dims)
print("Coords:", list(da.coords))


Dims: ('valid_time', 'latitude', 'longitude')
Coords: ['number', 'valid_time', 'latitude', 'longitude', 'expver']


In [2]:
import rasterio
import numpy as np

# List of ERA5 stacked files
era5_files = [
    r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_t2m_2015_2016_stack.tif",
    r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_d2m_2015_2016_stack.tif",
    r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_u10_2015_2016_stack.tif",
    r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_v10_2015_2016_stack.tif",
    r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_tp_2015_2016_stack.tif",
]

for f in era5_files:
    with rasterio.open(f) as src:
        print(f"\n📂 File: {f}")
        print(f"   Bands: {src.count}")
        print(f"   Shape: {src.height} x {src.width}")

        # Check for nodata value
        nodata = src.nodata
        print(f"   NoData metadata: {nodata}")

        # Read a sample band (1st, middle, last)
        for b in [1, src.count//2, src.count]:
            arr = src.read(b, masked=True)
            missing = np.sum(arr.mask) if hasattr(arr, "mask") else np.isnan(arr).sum()
            print(f"   Band {b}: missing pixels = {missing}")



📂 File: C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_t2m_2015_2016_stack.tif
   Bands: 17544
   Shape: 13 x 17
   NoData metadata: None
   Band 1: missing pixels = 0
   Band 8772: missing pixels = 0
   Band 17544: missing pixels = 0

📂 File: C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_d2m_2015_2016_stack.tif
   Bands: 17544
   Shape: 13 x 17
   NoData metadata: None
   Band 1: missing pixels = 0
   Band 8772: missing pixels = 0
   Band 17544: missing pixels = 0

📂 File: C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_u10_2015_2016_stack.tif
   Bands: 17544
   Shape: 13 x 17
   NoData metadata: None
   Band 1: missing pixels = 0
   Band 8772: missing pixels = 0
   Band 17544: missing pixels = 0

📂 File: C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_v10_2015_2016_stack.tif
   Bands: 17544
   Shape: 13 x 17
   NoData metadata: None
   Band 1: missing pixels = 0
   Band 8772: missing pixels = 0
   Band 17544: missing pixels 

In [5]:
import rasterio
import pandas as pd
from datetime import datetime, timedelta

# ---- Input files ----
ERA5_FILES = {
    "t2m": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_t2m_2015_2016_stack.tif",
    "d2m": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_d2m_2015_2016_stack.tif",
    "tp":  r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_tp_2015_2016_stack.tif",
    "u10": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_u10_2015_2016_stack.tif",
    "v10": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_fast_tif_stacks\ERA5_v10_2015_2016_stack.tif",
}
VIIRS_FILE  = r"C:\Users\Ankit\Datasets_Forest_fire\VIIRS_fire_time_stack.tif"
DEM_FILE    = r"C:\Users\Ankit\Datasets_Forest_fire\merged_DEM_30m_32644_aligned_filled.tif"
LULC_2015   = r"C:\Users\Ankit\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m_filled_categorical.tif"
LULC_2016   = r"C:\Users\Ankit\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m_filled_categorical.tif"

# ---- Config ----
START_TIME  = datetime(2015, 1, 1, 0)
SEQ_LEN     = 6
HORIZONS    = [1, 2, 3]
OUT_CSV     = r"C:\Users\Ankit\Datasets_Forest_fire\sequence0_index.csv"

# ---- Get number of time steps from one ERA5 variable (multi-band TIFF) ----
with rasterio.open(ERA5_FILES["t2m"]) as src:
    n_bands = src.count
    height, width = src.height, src.width

print(f"ERA5 variables cover {n_bands} timesteps")

# ---- Build index ----
rows = []
for center_idx in range(SEQ_LEN, n_bands - max(HORIZONS)):
    center_time = START_TIME + timedelta(minutes=30*center_idx)  # 30m resolution

    # Input sequence indices
    seq_idxs = list(range(center_idx - SEQ_LEN + 1, center_idx + 1))

    # Target indices
    tgt_idxs = [center_idx + h for h in HORIZONS]

    # LULC depending on year
    lulc_file = LULC_2015 if center_time.year == 2015 else LULC_2016

    rows.append({
        "center_time": center_time,
        "seq_band_idxs": seq_idxs,
        "target_band_idxs": tgt_idxs,
        "era5_t2m_file": ERA5_FILES["t2m"],
        "era5_d2m_file": ERA5_FILES["d2m"],
        "era5_tp_file":  ERA5_FILES["tp"],
        "era5_u10_file": ERA5_FILES["u10"],
        "era5_v10_file": ERA5_FILES["v10"],
        "viirs_file": VIIRS_FILE,
        "dem_file": DEM_FILE,
        "lulc_file": lulc_file,
    })

df = pd.DataFrame(rows)
df.to_csv(OUT_CSV, index=False)
print(f"✅ Saved {len(df)} sequences to {OUT_CSV}")


ERA5 variables cover 17544 timesteps


PermissionError: [Errno 13] Permission denied: 'C:\\Users\\Ankit\\Datasets_Forest_fire\\sequence_index.csv'

In [1]:
import rasterio

ERA5_FILE = r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_30m_stack_filled_final.tif"
VIIRS_FILE = r"C:\Users\Ankit\Datasets_Forest_fire\VIIRS_fire_label_2015_2016_30m_filled.tif"

with rasterio.open(ERA5_FILE) as src:
    print("ERA5 bands:", src.count)

with rasterio.open(VIIRS_FILE) as src:
    print("VIIRS bands:", src.count)


ERA5 bands: 5
VIIRS bands: 1
