In [2]:
import rasterio
import numpy as np
import pandas as pd
from pathlib import Path

# -----------------------------
# 1. Define file paths
# -----------------------------
era5_dir = Path(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\ERA5_30m_stack_filled.tif")
viirs_dir = Path(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\VIIRS_fire_label_30m_filled.tif")
dem_path = Path(r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM_30m_32644_aligned.tif")

lulc_2015_path = Path(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m_aligned.tif")
lulc_2016_path = Path(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m_aligned.tif")

# -----------------------------
# 2. Load DEM and LULC maps
# -----------------------------
with rasterio.open(dem_path) as dem_src:
    dem_data = dem_src.read(1)

with rasterio.open(lulc_2015_path) as lulc_src:
    lulc_2015_data = lulc_src.read(1)

with rasterio.open(lulc_2016_path) as lulc_src:
    lulc_2016_data = lulc_src.read(1)

# -----------------------------
# 3. Loop through time series rasters
# -----------------------------
records = []

dates = sorted(set([f.stem.split("_")[-1] for f in era5_dir.glob("*.tif")]))  # extract dates like YYYYMMDD

for date in dates:
    year = int(date[:4])  # extract year from date string

    # Select LULC for this year
    if year == 2015:
        lulc_data = lulc_2015_data
    elif year == 2016:
        lulc_data = lulc_2016_data
    else:
        continue  # skip if outside study years

    # Load ERA5 variables for this date
    era5_vars = {}
    for var in ["t2m", "d2m", "u10", "v10", "tp"]:
        f = era5_dir / f"{var}_{date}.tif"
        if f.exists():
            with rasterio.open(f) as src:
                era5_vars[var] = src.read(1)

    # Load VIIRS target
    viirs_file = viirs_dir / f"viirs_{date}.tif"
    if not viirs_file.exists():
        continue
    with rasterio.open(viirs_file) as src:
        viirs_data = src.read(1)

    # Flatten pixel values
    rows, cols = viirs_data.shape
    for r in range(rows):
        for c in range(cols):
            record = {
                "time": date,
                "row": r, "col": c,
                "DEM": dem_data[r, c],
                "LULC": lulc_data[r, c],
                "VIIRS": viirs_data[r, c]
            }
            for var, data in era5_vars.items():
                record[var] = data[r, c]
            records.append(record)

# -----------------------------
# 4. Save DataFrame
# -----------------------------
df = pd.DataFrame(records)
print("DataFrame shape:", df.shape)
print(df.head())

df.to_csv("combined_raster_dataset.csv", index=False)


DataFrame shape: (0, 0)
Empty DataFrame
Columns: []
Index: []


In [10]:
import rasterio

# Quick function to check metadata
def check_extent(file):
    with rasterio.open(file) as src:
        bounds = src.bounds
        crs = src.crs
        print(f"File: {file}")
        print(f" CRS: {crs}")
        print(f" Bounds: {bounds}")
        print(f" Resolution: {src.res[0]} x {src.res[1]}")
        print("-"*50)

# Paths to your datasets
era5_tif  = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\ERA5_30m_stack_filled.tif"
lulc_2015 = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m.tif"
lulc_2016 = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m.tif"
dem_tif   = r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM.tif"
viirs_tif = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\VIIRS_fire_label_30m_filled.tif"

# Check all
for f in [era5_tif, lulc_2015, lulc_2016, dem_tif, viirs_tif]:
    check_extent(f)


File: C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\ERA5_30m_stack_filled.tif
 CRS: EPSG:32644
 Bounds: BoundingBox(left=95639.47984440275, bottom=3149146.0089508956, right=503759.47984440275, top=3506266.0089508956)
 Resolution: 30.0 x 30.0
--------------------------------------------------
File: C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m.tif
 CRS: EPSG:32644
 Bounds: BoundingBox(left=108365.73527523666, bottom=3152714.2869458776, right=499835.73527523666, top=3491954.2869458776)
 Resolution: 30.0 x 30.0
--------------------------------------------------
File: C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m.tif
 CRS: EPSG:32644
 Bounds: BoundingBox(left=108365.73527524032, bottom=3152714.2869458767, right=499835.7352752403, top=3491954.2869458767)
 Resolution: 30.0 x 30.0
--------------------------------------------------
File: C:\Users\Ankit\OneDrive\Desktop\merged_DEM.tif
 CRS: EPSG:4326
 B

In [9]:
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling

# Input DEM (EPSG:4326)
input_dem = r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM.tif"
# Output DEM in meters (EPSG:32644)
output_dem = r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM_30m_32644.tif"

dst_crs = "EPSG:32644"  # UTM Zone 44N (meters)

with rasterio.open(input_dem) as src:
    transform, width, height = calculate_default_transform(
        src.crs, dst_crs, src.width, src.height, *src.bounds, resolution=30
    )

    kwargs = src.meta.copy()
    kwargs.update({
        "crs": dst_crs,
        "transform": transform,
        "width": width,
        "height": height,
        "dtype": src.dtypes[0]
    })

    with rasterio.open(output_dem, "w", **kwargs) as dst:
        for i in range(1, src.count + 1):
            reproject(
                source=rasterio.band(src, i),
                destination=rasterio.band(dst, i),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=dst_crs,
                resampling=Resampling.bilinear  # bilinear works well for DEM
            )

print("✅ DEM successfully reprojected to EPSG:32644 (meters, 30m resolution)")

✅ DEM successfully reprojected to EPSG:32644 (meters, 30m resolution)


In [12]:
import rasterio

# Quick function to check metadata
def check_extent(file):
    with rasterio.open(file) as src:
        bounds = src.bounds
        crs = src.crs
        print(f"File: {file}")
        print(f" CRS: {crs}")
        print(f" Bounds: {bounds}")
        print(f" Resolution: {src.res[0]} x {src.res[1]}")
        print("-"*50)

# Paths to your datasets
era5_tif  = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\ERA5_30m_stack_filled.tif"
lulc_2015 = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m.tif"
lulc_2016 = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m.tif"
dem_tif   = r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM_30m_32644.tif"
viirs_tif = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\VIIRS_fire_label_30m_filled.tif"

# Check all
for f in [era5_tif, lulc_2015, lulc_2016, dem_tif, viirs_tif]:
    check_extent(f)


File: C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\ERA5_30m_stack_filled.tif
 CRS: EPSG:32644
 Bounds: BoundingBox(left=95639.47984440275, bottom=3149146.0089508956, right=503759.47984440275, top=3506266.0089508956)
 Resolution: 30.0 x 30.0
--------------------------------------------------
File: C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m.tif
 CRS: EPSG:32644
 Bounds: BoundingBox(left=108365.73527523666, bottom=3152714.2869458776, right=499835.73527523666, top=3491954.2869458776)
 Resolution: 30.0 x 30.0
--------------------------------------------------
File: C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m.tif
 CRS: EPSG:32644
 Bounds: BoundingBox(left=108365.73527524032, bottom=3152714.2869458767, right=499835.7352752403, top=3491954.2869458767)
 Resolution: 30.0 x 30.0
--------------------------------------------------
File: C:\Users\Ankit\OneDrive\Desktop\merged_DEM_30m_32644.tif
 CRS: EP

In [1]:
import rasterio
from rasterio.warp import reproject, Resampling

# Reference raster (VIIRS)
ref_path = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\VIIRS_fire_label_30m_filled.tif"
with rasterio.open(ref_path) as ref:
    ref_meta = ref.meta.copy()
    ref_crs = ref.crs
    ref_transform = ref.transform
    ref_width = ref.width
    ref_height = ref.height

# Files to align (LULC + DEM only)
input_files = [
    r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m.tif",
    r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m.tif",
    r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM_30m_32644.tif"
]

for in_file in input_files:
    out_file = in_file.replace(".tif", "_aligned.tif")
    with rasterio.open(in_file) as src:
        out_meta = ref_meta.copy()
        out_meta.update({"dtype": src.dtypes[0], "count": src.count})

        with rasterio.open(out_file, "w", **out_meta) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=ref_transform,
                    dst_crs=ref_crs,
                    dst_width=ref_width,
                    dst_height=ref_height,
                    resampling=Resampling.nearest if "LULC" in in_file else Resampling.bilinear
                )

    print(f"✅ Saved aligned raster: {out_file}")


✅ Saved aligned raster: C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m_aligned.tif
✅ Saved aligned raster: C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m_aligned.tif
✅ Saved aligned raster: C:\Users\Ankit\OneDrive\Desktop\merged_DEM_30m_32644_aligned.tif


In [1]:
import rasterio
import numpy as np
import pandas as pd

# File paths
viirs_file = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\VIIRS_fire_label_30m_filled.tif"
era5_file  = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\ERA5_30m_stack_filled.tif"
lulc2015   = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m_aligned.tif"
lulc2016   = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m_aligned.tif"
dem_file   = r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM_30m_32644_aligned.tif"

# Load VIIRS (labels)
with rasterio.open(viirs_file) as src:
    viirs = src.read(1).flatten()  # flatten to 1D
    mask = viirs != src.nodata      # valid pixels only

# Load ERA5 stack
with rasterio.open(era5_file) as src:
    era5 = src.read().reshape(src.count, -1).T  # shape: (pixels, bands)

# Load LULC maps
with rasterio.open(lulc2015) as src:
    lulc15 = src.read(1).flatten()
with rasterio.open(lulc2016) as src:
    lulc16 = src.read(1).flatten()

# Load DEM
with rasterio.open(dem_file) as src:
    dem = src.read(1).flatten()

# Combine into DataFrame
data = pd.DataFrame(era5, columns=[f"ERA5_var{i+1}" for i in range(era5.shape[1])])
data["LULC_2015"] = lulc15
data["LULC_2016"] = lulc16
data["DEM"] = dem
data["VIIRS_label"] = viirs

# Apply mask (remove invalid pixels)
data = data[mask]

print("✅ DataFrame shape:", data.shape)
print(data.head())

# Save to CSV for ML
out_csv = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\final_dataset.csv"
data.to_csv(out_csv, index=False)
print(f"Saved dataset → {out_csv}")

✅ DataFrame shape: (157359134, 9)
     ERA5_var1  ERA5_var2  ERA5_var3  ERA5_var4  ERA5_var5  LULC_2015  \
433        NaN        0.0        0.0        0.0        0.0        255   
434        NaN        0.0        0.0        0.0        0.0        255   
435        NaN        0.0        0.0        0.0        0.0        255   
436        NaN        0.0        0.0        0.0        0.0        255   
437        NaN        0.0        0.0        0.0        0.0        255   

     LULC_2016    DEM  VIIRS_label  
433        255  255.0          0.0  
434        255  255.0          0.0  
435        255  255.0          0.0  
436        255  255.0          0.0  
437        255  255.0          0.0  
Saved dataset → C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\final_dataset.csv


In [1]:
import pandas as pd 
df = pd.read_csv(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\final_dataset.csv")

In [1]:
import pandas as pd

# Load your existing dataset
df = pd.read_csv(r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\final_dataset.csv")

# Mapping of old names to actual ERA5 variables
rename_dict = {
    "ERA5_var1": "t2m",   # 2m temperature
    "ERA5_var2": "d2m",   # 2m dewpoint
    "ERA5_var3": "u10",   # 10m U-wind
    "ERA5_var4": "v10",   # 10m V-wind
    "ERA5_var5": "tp"     # total precipitation
}

# Create new DataFrame with renamed columns
df1 = df.rename(columns=rename_dict)

In [2]:
df1.sample(5)

Unnamed: 0,t2m,d2m,u10,v10,tp,LULC_2015,LULC_2016,DEM,VIIRS_label
113007791,10.660814,0.0,0.0,0.0,0.0,29,29,189.51651,0.0
139847101,11.671098,0.0,0.0,0.0,0.0,31,31,162.18091,0.0
885409,,0.0,0.0,0.0,0.0,255,255,5044.266,0.0
14932034,1.36567,0.0,0.0,0.0,0.0,22,22,826.35944,0.0
21784486,2.959463,0.0,0.0,0.0,0.0,0,0,255.0,0.0


In [None]:
import rasterio
from rasterio.mask import mask
import numpy as np
import pandas as pd
from shapely.geometry import box
import geopandas as gpd

# ----------------------------
# File paths
# ----------------------------
viirs_file = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\VIIRS_fire_label_30m_filled.tif"
era5_file  = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\ERA5_30m_stack_filled.tif"
lulc2015   = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2015_clipped_30m_aligned.tif"
lulc2016   = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\lulc_maps_tif\LULC_2016_clipped_30m_aligned.tif"
dem_file   = r"C:\Users\Ankit\OneDrive\Desktop\merged_DEM_30m_32644_aligned.tif"

# ----------------------------
# Step 1: Find common extent
# ----------------------------
files = [viirs_file, era5_file, lulc2015, lulc2016, dem_file]
bounds = []

for f in files:
    with rasterio.open(f) as src:
        bounds.append(src.bounds)

# Compute intersection of bounding boxes
xmin = max(b.left for b in bounds)
ymin = max(b.bottom for b in bounds)
xmax = min(b.right for b in bounds)
ymax = min(b.top for b in bounds)

common_extent = box(xmin, ymin, xmax, ymax)
print("🟢 Common extent:", common_extent.bounds)

# ----------------------------
# Step 2: Crop rasters to common extent
# ----------------------------
def crop_to_common(src_file, extent_geom):
    with rasterio.open(src_file) as src:
        out_img, out_transform = mask(src, [extent_geom], crop=True)
        out_img = out_img.astype(float)
        if src.nodata is not None:
            out_img[out_img == src.nodata] = np.nan
    return out_img

viirs = crop_to_common(viirs_file, common_extent)[0].flatten()
era5  = crop_to_common(era5_file, common_extent).reshape(-1, crop_to_common(era5_file, common_extent).shape[0]).T
lulc15 = crop_to_common(lulc2015, common_extent)[0].flatten()
lulc16 = crop_to_common(lulc2016, common_extent)[0].flatten()
dem    = crop_to_common(dem_file, common_extent)[0].flatten()

# ----------------------------
# Step 3: Build DataFrame
# ----------------------------
era5_df = pd.DataFrame(era5, columns=["t2m", "d2m", "u10", "v10", "tp"])
df2 = era5_df.copy()
df["LULC_2015"] = lulc15
df["LULC_2016"] = lulc16
df["DEM"] = dem
df["VIIRS_label"] = viirs

print("🔹 DataFrame shape after cropping:", df.shape)

# ----------------------------
# Step 4: Missing value check
# ----------------------------
print("\nMissing values before filling:")
print(df.isna().sum())

# ----------------------------
# Step 5: Fill missing values (if any)
# ----------------------------
if df.isna().sum().sum() > 0:
    print("\n⚠️ Missing values detected → filling...")
    for col in df.columns:
        if "LULC" in col:  # categorical
            df[col].fillna(df[col].mode()[0], inplace=True)
        else:  # continuous
            df[col].fillna(df[col].mean(), inplace=True)
else:
    print("\n✅ No missing values detected")

# ----------------------------
# Step 6: Save dataset
# ----------------------------
out_csv = r"C:\Users\Ankit\OneDrive\Desktop\Datasets_Forest_fire\final_dataset_common_extent.csv"
df.to_csv(out_csv, index=False)

print(f"\n💾 Final dataset saved → {out_csv}")
print("Final shape:", df.shape)
