In [1]:
import os
import tempfile
import warnings
from os.path import join as pjoin

import dask
import dask.dataframe as dd
import dask_geopandas as dgpd
import geopandas as gpd
import numpy as np
import pandas as pd
import rasterio
from scipy.fft import dst
import tqdm
import xarray as xr
from dask.diagnostics import ProgressBar
from rasterio.crs import CRS

from raster_tools import Raster, Vector, open_vectors, clipping, zonal, warp
from raster_tools.dtypes import F32, U8, U16

import matplotlib.pyplot as plt

# change pandas max col display
pd.set_option('display.max_columns', 500)

In [4]:
# Location for temporary storage
TMP_LOC = "/home/jake/FireLab/Project/data/temp/"
TMP_LOC2 = "/home/jake/FireLab/Project/data/temp2/"
TMP_LOC3 = "/home/jake/FireLab/Project/data/temp3/"
DATA_LOC = "/home/jake/FireLab/Project/data/"

STATE = "OR"

# Location of clipped DEM files
DEM_DATA_DIR = pjoin(TMP_LOC, "dem_data")

# location of feature data files
FEATURE_DIR = pjoin(DATA_LOC, "FeatureData")
EDNA_DIR = pjoin(DATA_LOC, "terrain")
MTBS_DIR = pjoin(DATA_LOC, "MTBS_Data")
VIIRS_DIR = pjoin(DATA_LOC, "viirs_data")

PATHS = {
    "states": pjoin(EDNA_DIR, "state_borders/cb_2018_us_state_5m.shp"),
    "dem": pjoin(EDNA_DIR, "us_orig_dem/us_orig_dem/orig_dem/hdr.adf"),
    "dem_slope": pjoin(EDNA_DIR, "us_slope/us_slope/slope/hdr.adf"),
    "dem_aspect": pjoin(EDNA_DIR, "us_aspect/aspect/hdr.adf"),
    "dem_flow_acc": pjoin(EDNA_DIR, "us_flow_acc/us_flow_acc/flow_acc/hdr.adf"),
    "gm_srad": pjoin(FEATURE_DIR, "gridmet/srad_1986_2020_weekly.nc"),
    "gm_vpd": pjoin(FEATURE_DIR, "gridmet/vpd_1986_2020_weekly.nc"),
    "aw_mat": pjoin(FEATURE_DIR, "adaptwest/Normal_1991_2020_MAT.tif"),
    "aw_mcmt": pjoin(FEATURE_DIR, "adaptwest/Normal_1991_2020_MCMT.tif"),
    "aw_mwmt": pjoin(FEATURE_DIR, "adaptwest/Normal_1991_2020_MWMT.tif"),
    "aw_td": pjoin(FEATURE_DIR, "adaptwest/Normal_1991_2020_TD.tif"),
    "dm_tmax": pjoin(FEATURE_DIR, "daymet/tmax_1986_2020.nc"),
    "dm_tmin": pjoin(FEATURE_DIR, "daymet/tmin_1986_2020.nc"),
    "biomass_afg": pjoin(
        FEATURE_DIR, "biomass/biomass_afg_1986_2020_{}.nc".format(STATE)
    ),
    "biomass_pfg": pjoin(
        FEATURE_DIR, "biomass/biomass_pfg_1986_2020_{}.nc".format(STATE)
    ),
    "landfire_fvt": pjoin(
        FEATURE_DIR, "landfire/LF2020_FVT_200_CONUS/Tif/LC20_FVT_200.tif"
    ),
    "landfire_fbfm40": pjoin(
        FEATURE_DIR, "landfire/LF2020_FBFM40_200_CONUS/Tif/LC20_F40_200.tif"
    ),
    "ndvi": pjoin(FEATURE_DIR, "ndvi/access/weekly/ndvi_1986_2020_weekavg.nc"),
    "mtbs_root": pjoin(MTBS_DIR, "MTBS_BSmosaics/"),
    "mtbs_perim": pjoin(MTBS_DIR, "mtbs_perimeter_data/mtbs_perims_DD.shp"),
    "viirs_root": VIIRS_DIR,
    "viirs_perim": pjoin(VIIRS_DIR, "viirs_perims_shapefile.shp"),
}

In [5]:
import os
from os.path import join as pjoin

import rasterio as rio
from rasterio.warp import calculate_default_transform, reproject, Resampling

from raster_tools import Raster, open_dataset

def preprocess_features(reference_raster_path, feature_paths, output_dir):
    """
    Preprocesses feature files to match the grid and CRS of a reference raster.

    Args:
        reference_raster_path (str): Path to the reference raster file.
        feature_paths (list): List of paths to feature files (netcdfs, tifs, etc.).
        output_dir (str): Directory to save the preprocessed feature files.
    """
    # Open reference raster and get grid information
    with rio.open(reference_raster_path) as src:
        reference_crs = src.crs
        reference_transform = src.transform
        reference_width = src.width
        reference_height = src.height

    # Process each feature file
    for feature_path in feature_paths:
        # Get output path
        filename = os.path.basename(feature_path)
        output_path = pjoin(output_dir, filename)

        # Handle netCDF files differently
        if feature_path.endswith(".nc"):
            # Open netCDF dataset
            dataset = open_dataset(feature_path)

            # Process each variable in the dataset
            for var_name, raster in dataset.items():
                # Reproject and resample
                reproject(
                    source=raster.xdata,
                    destination=rio.open(output_path, "w", driver="GTiff", 
                                        width=reference_width, height=reference_height,
                                        count=1, dtype=raster.dtype,
                                        crs=reference_crs, transform=reference_transform),
                    src_transform=raster.affine,
                    src_crs=raster.crs,
                    dst_transform=reference_transform,
                    dst_crs=reference_crs,
                    resampling=Resampling.nearest
                )
        else:
            # Open raster file
            with rio.open(feature_path) as src:
                transform, width, height = calculate_default_transform(
                    src.crs, reference_crs, src.width, src.height, *src.bounds
                )

                # Reproject and resample
                reproject(
                    source=rio.band(src, 1),
                    destination=rio.open(output_path, "w", driver="GTiff", 
                                        width=reference_width, height=reference_height,
                                        count=1, dtype=src.dtypes[0],
                                        crs=reference_crs, transform=transform),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=reference_crs,
                    resampling=Resampling.nearest
                )

# Example usage
reference_raster_path = PATHS["dm_tmax"]
feature_paths = [
    PATHS["biomass_afg"],
]
output_dir = pjoin(DATA_LOC, "TransformedFeatureData")
preprocess_features(reference_raster_path, feature_paths, output_dir)



RasterioIOError: Attempt to create new tiff file '/home/jake/FireLab/Project/data/TransformedFeatureData/biomass_afg_1986_2020_OR.nc' failed: No such file or directory