In [1]:
import rasterio
import numpy as np
import pandas as pd
from pathlib import Path

# -----------------------------
# 1. Define file paths
# -----------------------------
era5_dir = Path(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\ERA5_30m_stack_filled.tif")
viirs_dir = Path(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\VIIRS_fire_label_30m_filled.tif")
dem_path = Path(r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM.tif")

lulc_2015_path = Path(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m.tif")
lulc_2016_path = Path(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m.tif")

# -----------------------------
# 2. Load DEM and LULC maps
# -----------------------------
with rasterio.open(dem_path) as dem_src:
    dem_data = dem_src.read(1)

with rasterio.open(lulc_2015_path) as lulc_src:
    lulc_2015_data = lulc_src.read(1)

with rasterio.open(lulc_2016_path) as lulc_src:
    lulc_2016_data = lulc_src.read(1)

# -----------------------------
# 3. Loop through time series rasters
# -----------------------------
records = []

dates = sorted(set([f.stem.split("_")[-1] for f in era5_dir.glob("*.tif")]))  # extract dates like YYYYMMDD

for date in dates:
    year = int(date[:4])  # extract year from date string

    # Select LULC for this year
    if year == 2015:
        lulc_data = lulc_2015_data
    elif year == 2016:
        lulc_data = lulc_2016_data
    else:
        continue  # skip if outside study years

    # Load ERA5 variables for this date
    era5_vars = {}
    for var in ["t2m", "d2m", "u10", "v10", "tp"]:
        f = era5_dir / f"{var}_{date}.tif"
        if f.exists():
            with rasterio.open(f) as src:
                era5_vars[var] = src.read(1)

    # Load VIIRS target
    viirs_file = viirs_dir / f"viirs_{date}.tif"
    if not viirs_file.exists():
        continue
    with rasterio.open(viirs_file) as src:
        viirs_data = src.read(1)

    # Flatten pixel values
    rows, cols = viirs_data.shape
    for r in range(rows):
        for c in range(cols):
            record = {
                "time": date,
                "row": r, "col": c,
                "DEM": dem_data[r, c],
                "LULC": lulc_data[r, c],
                "VIIRS": viirs_data[r, c]
            }
            for var, data in era5_vars.items():
                record[var] = data[r, c]
            records.append(record)

# -----------------------------
# 4. Save DataFrame
# -----------------------------
df = pd.DataFrame(records)
print("DataFrame shape:", df.shape)
print(df.head())

df.to_csv("combined_raster_dataset.csv", index=False)


RasterioIOError: 'C:\Users\Ankit\OneDrive\Desktop\merged_DEM.tif' not recognized as being in a supported file format.