In [None]:
import os
import glob
import numpy as np
import pandas as pd
import rasterio
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

Configure Paths

In [3]:
PROCESSED_DIR = "../data/processed"
OUTPUT_DIR = "../data/outputs"

os.makedirs(OUTPUT_DIR, exist_ok=True)

Raster Visualization Helper Function

In [4]:
def visualize_raster(path, title=None, cmap="viridis", vmin=None, vmax=None):
    with rasterio.open(path) as src:
        img = src.read(1)
        plt.figure(figsize=(6, 6))
        plt.imshow(img, cmap=cmap, vmin=vmin, vmax=vmax)
        plt.title(title or os.path.basename(path))
        plt.colorbar()
        plt.show()

Raster to Dataframe Helper Function

In [5]:
def raster_to_dataframe(path, band_name):
    with rasterio.open(path) as src:
        data = src.read(1).flatten()
    return pd.DataFrame({band_name: data})

Collect Processed Files

In [6]:
raster_files = glob.glob(os.path.join(PROCESSED_DIR, "*.tif"))
print(f"Found {len(raster_files)} raster files.")

from collections import defaultdict
fire_groups = defaultdict(dict)
for f in raster_files:
    fname = os.path.basename(f)
    fire, layer = fname.split("_")[0], fname.split("_")[1].split(".")[0]
    fire_groups[fire][layer] = f

Found 18 raster files.


Build Clean Dataset

In [7]:
all_data = []
for fire, layers in fire_groups.items():
    print(f"Processing fire: {fire}")

# Required layers
expected_layers = ["NDVI", "EVI", "NBR", "VCI", "SPI", "dNBR"]
available_layers = {k: v for k, v in layers.items() if k in expected_layers}

# Load available rasters
dfs = []
for lyr in expected_layers:
    if lyr in available_layers:
        df = raster_to_dataframe(available_layers[lyr], lyr)
        dfs.append(df)
    else:
        print(f" Warning: Missing {lyr} for {fire}, filling with NaN.")
        # Fill with NaN if layer missing
        dfs.append(pd.DataFrame({lyr: [np.nan] * len(dfs[0]) if dfs else [np.nan]}))

    fire_df = pd.concat(dfs, axis=1)
    fire_df["fire"] = fire

    # Drop rows where everything is NaN
    fire_df = fire_df.dropna(how="all", subset=expected_layers)

    all_data.append(fire_df)

dataset = pd.concat(all_data, ignore_index=True)
print(f"Final dataset shape: {dataset.shape}")

Processing fire: Bootleg
Processing fire: Camp
Processing fire: Creek
Processing fire: CZU
Processing fire: Dixie


KeyError: ['EVI', 'NBR', 'VCI', 'SPI', 'dNBR']