In [14]:
import os
import time
import tempfile
import warnings
from os.path import join as pjoin

import dask
import dask.dataframe as dd
import dask_geopandas as dgpd
import geopandas as gpd
import numpy as np
import pandas as pd
import rasterio
from scipy.fft import dst
import tqdm
import xarray as xr
from dask.diagnostics import ProgressBar
from rasterio.crs import CRS

from raster_tools import Raster, Vector, open_vectors, clipping, zonal, warp
from raster_tools.dtypes import F32, U8, U16

# Filter out warnings from dask_geopandas and dask
warnings.filterwarnings(
    "ignore", message=".*initial implementation of Parquet.*"
)
warnings.filterwarnings(
    "ignore", message=".*Slicing is producing a large chunk.*"
)


# Location for temporary storage
TMP_LOC = "/home/jake/FireLab/Project/data/temp/"
TMP_LOC2 = "/home/jake/FireLab/Project/data/temp2/"
TMP_LOC3 = "/home/jake/FireLab/Project/data/temp3/"
DATA_LOC = "/home/jake/FireLab/Project/data/"

STATE = "OR"

# Location of clipped DEM files
DEM_DATA_DIR = pjoin(TMP_LOC, "dem_data")

# location of feature data files
FEATURE_DIR = pjoin(DATA_LOC, "FeatureData")
EDNA_DIR = pjoin(DATA_LOC, "terrain")
MTBS_DIR = pjoin(DATA_LOC, "MTBS_Data")
VIIRS_DIR = pjoin(DATA_LOC, "viirs_data")

PATHS = {
    "states": pjoin(EDNA_DIR, "state_borders/cb_2018_us_state_5m.shp"),
    "dem": pjoin(EDNA_DIR, "us_orig_dem/us_orig_dem/orig_dem/hdr.adf"),
    "dem_slope": pjoin(EDNA_DIR, "us_slope/us_slope/slope/hdr.adf"),
    "dem_aspect": pjoin(EDNA_DIR, "us_aspect/aspect/hdr.adf"),
    "dem_flow_acc": pjoin(EDNA_DIR, "us_flow_acc/us_flow_acc/flow_acc/hdr.adf"),
    "gm_srad": pjoin(FEATURE_DIR, "gridmet/srad_1986_2020_weekly.nc"),
    "gm_vpd": pjoin(FEATURE_DIR, "gridmet/vpd_1986_2020_weekly.nc"),
    "aw_mat": pjoin(FEATURE_DIR, "adaptwest/Normal_1991_2020_MAT.tif"),
    "aw_mcmt": pjoin(FEATURE_DIR, "adaptwest/Normal_1991_2020_MCMT.tif"),
    "aw_mwmt": pjoin(FEATURE_DIR, "adaptwest/Normal_1991_2020_MWMT.tif"),
    "aw_td": pjoin(FEATURE_DIR, "adaptwest/Normal_1991_2020_TD.tif"),
    "dm_tmax": pjoin(FEATURE_DIR, "daymet/tmax_1986_2020.nc"),
    "dm_tmin": pjoin(FEATURE_DIR, "daymet/tmin_1986_2020.nc"),
    "biomass_afg": pjoin(
        FEATURE_DIR, "biomass/biomass_afg_1986_2020_{}.nc".format(STATE)
    ),
    "biomass_pfg": pjoin(
        FEATURE_DIR, "biomass/biomass_pfg_1986_2020_{}.nc".format(STATE)
    ),
    "landfire_fvt": pjoin(
        FEATURE_DIR, "landfire/LF2020_FVT_200_CONUS/Tif/LC20_FVT_200.tif"
    ),
    "landfire_fbfm40": pjoin(
        FEATURE_DIR, "landfire/LF2020_FBFM40_200_CONUS/Tif/LC20_F40_200.tif"
    ),
    "ndvi": pjoin(FEATURE_DIR, "ndvi/access/weekly/ndvi_1986_2020_weekavg.nc"),
    "mtbs_root": pjoin(MTBS_DIR, "MTBS_BSmosaics/"),
    "mtbs_perim": pjoin(MTBS_DIR, "mtbs_perimeter_data/mtbs_perims_DD.shp"),
    "viirs_root": VIIRS_DIR,
    "viirs_perim": pjoin(VIIRS_DIR, "viirs_perims_shapefile.shp"),
}
YEARS = list(range(2018, 2021))
GM_KEYS = list(filter(lambda x: x.startswith("gm_"), PATHS))
AW_KEYS = list(filter(lambda x: x.startswith("aw_"), PATHS))
DM_KEYS = list(filter(lambda x: x.startswith("dm_"), PATHS))
BIOMASS_KEYS = list(filter(lambda x: x.startswith("biomass_"), PATHS))
LANDFIRE_KEYS = list(filter(lambda x: x.startswith("landfire_"), PATHS))
NDVI_KEYS = list(filter(lambda x: x.startswith("ndvi"), PATHS))
DEM_KEYS = list(filter(lambda x: x.startswith("dem"), PATHS))

# NC_KEYSET = set(GM_KEYS + DM_KEYS + BIOMASS_KEYS + NDVI_KEYS)
NC_KEYSET = [DM_KEYS, GM_KEYS, BIOMASS_KEYS, NDVI_KEYS]
TIF_KEYSET = [AW_KEYS, LANDFIRE_KEYS]

MTBS_DF_PATH = pjoin(TMP_LOC, f"{STATE}_mtbs.parquet")
MTBS_DF_PARQUET_PATH_NEW = pjoin(TMP_LOC, f"{STATE}_mtbs_new.parquet")
MTBS_DF_TEMP_PATH = pjoin(TMP_LOC, f"{STATE}_mtbs_temp.parquet")
MTBS_DF_TEMP_PATH_2 = pjoin(TMP_LOC, f"{STATE}_mtbs_temp_2.parquet")
CHECKPOINT_1_PATH = pjoin(TMP_LOC, "check1")
CHECKPOINT_2_PATH = pjoin(TMP_LOC, "check2")
CHECKPOINT_3_PATH = pjoin(TMP_LOC, "check3")
CHECKPOINT_4_PATH = pjoin(TMP_LOC, "check4")

In [None]:
# the purpose of this cell is to take in the list of raster files and map them to the same grid size and projection based on the specified info
crs = CRS.from_epsg(5070)
grid_size = 30 # meters
# we will be using raster_tools to do this
# we will also be using dask to parallelize the process

# first we need to get the bounding box of the state
state = gpd.read_file(PATHS["states"])
state = state[state["STUSPS"] == STATE]
state = Vector(state)
state = state.to_crs(crs)
state_bounds = state.bounds.compute()

# now we can use the state bounding box to clip the rasters
# we will also use the state bounding box to create a grid of the same size

# we will use the raster_tools clipping function to clip the rasters

# first we will clip the dem data
dem_files = [PATHS[key] for key in DEM_KEYS]
dems_fixed = []
for file in dem_files:
    dem = Raster(file)
    dem = clipping.clip(state, dem, state_bounds)
    dem = warp.reproject(dem, crs, "bilinear", grid_size)
    dems_fixed.append(dem)

# now we will clip the aw and landfire data
aw_files = [PATHS[key] for key in AW_KEYS]
landfire_files = [PATHS[key] for key in LANDFIRE_KEYS]
aw_fixed = []
landfire_fixed = []
for file in aw_files:
    aw = Raster(file)
    state = state.to_crs(aw.crs)
    state_bounds = state.bounds.compute()
    aw = clipping.clip(state, aw, state_bounds)
    aw = warp.reproject(aw, crs, "bilinear", grid_size)
    aw_fixed.append(aw)
for file in landfire_files:
    landfire = Raster(file)
    state = state.to_crs(landfire.crs)
    state_bounds = state.bounds.compute()
    landfire = clipping.clip(state, landfire, state_bounds)
    landfire = warp.reproject(landfire, crs, "bilinear", grid_size)
    landfire_fixed.append(landfire)

In [None]:
# now we will clip the biomass, dm, gm, and ndvi data
from raster_tools.io import open_dataset
biomass_files = [PATHS[key] for key in BIOMASS_KEYS]
dm_files = [PATHS[key] for key in DM_KEYS]
gm_files = [PATHS[key] for key in GM_KEYS]
ndvi_files = [PATHS[key] for key in NDVI_KEYS]
biomass_fixed = []
dm_fixed = []
gm_fixed = []
ndvi_fixed = []
for file in biomass_files:
    biomass = open_dataset(file)
    biomass = biomass[list(biomass.keys())[0]]
    state = state.to_crs(biomass.crs)
    state_bounds = state.bounds.compute()
    biomass = clipping.clip(state, biomass, state_bounds)
    biomass = warp.reproject(dm, crs, "bilinear", grid_size)
    biomass_fixed.append(biomass)
# for file in dm_files:
#     dm = Raster(file)
#     state = state.to_crs(dm.crs)
#     state_bounds = state.bounds.compute()
#     dm = clipping.clip(state, dm, state_bounds)
#     dm = warp.reproject(dm, crs, "bilinear", grid_size)
#     dm_fixed.append(dm)
# for file in gm_files:
#     gm = Raster(file)
#     state = state.to_crs(gm.crs)
#     state_bounds = state.bounds.compute()
#     gm = clipping.clip(state, gm, state_bounds)
#     gm = warp.reproject(gm, crs, "bilinear", grid_size)
#     gm_fixed.append(gm)
# for file in ndvi_files:
#     ndvi = Raster(file)
#     state = state.to_crs(ndvi.crs)
#     state_bounds = state.bounds.compute()
#     ndvi = clipping.clip(state, ndvi, state_bounds)
#     ndvi = warp.reproject(ndvi, crs, "bilinear", grid_size)
#     ndvi_fixed.append(ndvi)

In [None]:
list(biomass.keys())[0]


In [7]:
        stdf = open_vectors(PATHS["states"], 0).data.to_crs("EPSG:5071")
        states = {st: stdf[stdf.STUSPS == st].geometry for st in list(stdf.STUSPS)}
        state_shape = states[STATE]
        states = None
        stdf = None
        perimdf = open_vectors(PATHS["viirs_perim"]).data.to_crs("EPSG:5071")
        # perimdf = dgpd.read_parquet(DATA_LOC + "viirs_perims.parquet").compute().to_crs("EPSG:5071")
        # perimdf = perimdf.rename(columns={"t": "Ig_Date"})
        state_fire_perims = perimdf.clip(state_shape.compute())
        state_fire_perims = (
            state_fire_perims.assign(
                Ig_Date=lambda frame: dd.to_datetime(
                    frame.Ig_Date, format="%Y-%m-%d"
                )
            )
            .sort_values("Ig_Date")
            .compute()
        )
        state_fire_perims = state_fire_perims[state_fire_perims.Ig_Date.dt.year.between(2018, 2020)]
        year_to_perims = {
            y: state_fire_perims[state_fire_perims.Ig_Date.dt.year == y]
            for y in YEARS
        }
        state_fire_perims = None

        year_to_mtbs_file = {
            y: pjoin(PATHS["mtbs_root"], f"mtbs_{STATE}_{y}.tif")
            for y in YEARS
        }

In [None]:
year_to_perims[2018].fireid.eq("F4693")

In [15]:
# get fires from 2018 with fireid F4693
fire = year_to_perims[2018][year_to_perims[2018].fireid.eq("F4693")]
fire.crs

<Projected CRS: EPSG:5071>
Name: NAD83(HARN) / Conus Albers
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: United States (USA) - CONUS onshore - Alabama; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming.
- bounds: (-124.79, 24.41, -66.91, 49.38)
Coordinate Operation:
- name: Conus Albers
- method: Albers Equal Area
Datum: NAD83 (High Accuracy Reference Network)
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [31]:
year_to_perims[2018]

Unnamed: 0,duration,fireid,Ig_Date,geometry
1,0.0,F4693,2018-04-24,"POLYGON ((-2009341.124 2399604.254, -2009337.2..."
0,0.0,F4734,2018-04-24,"POLYGON ((-2040826.093 2451916.315, -2040808.6..."
2,1.0,F4693,2018-04-25,"POLYGON ((-2009053.374 2399913.203, -2009038.6..."
3,1.0,F4734,2018-04-25,"POLYGON ((-2041844.983 2451282.932, -2041845.2..."
4,1.5,F4734,2018-04-26,"POLYGON ((-2041845.246 2451285.527, -2041845.3..."
...,...,...,...,...
1130,0.0,F19437,2018-12-12,"POLYGON ((-2089307.891 2476931.802, -2089306.3..."
1131,1.0,F19385,2018-12-12,"MULTIPOLYGON (((-1959289.579 2398569.998, -195..."
1134,1.5,F19385,2018-12-12,"MULTIPOLYGON (((-1958544.085 2403209.619, -195..."
1136,2.5,F19385,2018-12-13,"MULTIPOLYGON (((-1958550.542 2403192.381, -195..."


In [32]:
viirs_final_perimeters_2018 = year_to_perims[2018].loc[year_to_perims[2018].groupby('fireid')['duration'].idxmax()]
viirs_final_perimeters_2019 = year_to_perims[2019].loc[year_to_perims[2019].groupby('fireid')['duration'].idxmax()]
viirs_final_perimeters_2020 = year_to_perims[2020].loc[year_to_perims[2020].groupby('fireid')['duration'].idxmax()]

In [46]:
# viirs_final_perimeters_2018 where geometry starts with MULTIPOLYGON
len(viirs_final_perimeters_2018[viirs_final_perimeters_2018.geometry.apply(lambda x: x.geom_type.startswith("Multi"))])

23

In [44]:
viirs_final_perimeters_2018[viirs_final_perimeters_2018.fireid.eq("F11067")].geometry.geom_type

595    MultiPolygon
dtype: object

In [26]:
year_to_perims[2018][year_to_perims[2018].fireid.eq("F8994")]

Unnamed: 0,duration,fireid,Ig_Date,geometry
68,0.0,F8994,2018-07-16,"POLYGON ((-2247657.566 2484333.518, -2247656.9..."
83,1.5,F8994,2018-07-17,"POLYGON ((-2247362.914 2480379.876, -2247363.3..."
80,1.0,F8994,2018-07-17,"POLYGON ((-2247363.641 2480370.800, -2247366.3..."
92,2.0,F8994,2018-07-18,"POLYGON ((-2247363.025 2480378.808, -2247363.0..."
115,2.5,F8994,2018-07-18,"POLYGON ((-2248044.706 2480036.541, -2248047.3..."
...,...,...,...,...
877,74.5,F8994,2018-09-28,"POLYGON ((-2253173.218 2487389.193, -2253164.6..."
883,75.5,F8994,2018-09-29,"POLYGON ((-2253096.264 2487516.364, -2253091.2..."
880,75.0,F8994,2018-09-29,"POLYGON ((-2253164.684 2487404.721, -2253096.2..."
885,77.0,F8994,2018-10-01,"POLYGON ((-2253091.227 2487524.145, -2252749.2..."
