# Introduction

Describe the analysis and why useful

In [None]:
# Import standard packages
import os                           # Reproducible file names
import warnings                     # View warnings

# Import third part packages
import earthpy as et                # File organization
import geopandas as gpd             # Enables work in geodataframes
import geoviews as gv               # Enables work with geographic data
import holoviews as hv              # For use with interactive plotting
import hvplot.pandas                # Plotting maps and plots
import pandas as pd                 # Work with dataframes
import pyogrio                       # Help with import of geodatabase

# warning.simplefilter('ignore')    # Suppress warnings

### Watershed Dataset Description

The watershed data used in this analysis comes from the [USGS Watershed Boundary Dataset (WBD)](https://www.usgs.gov/national-hydrography/watershed-boundary-dataset) for the United States. All watersheds in this dataset have gauges for stream monitoring. The data used herein are the **HUC-2 Watersheds (Regional Scale)**. 

Describe the dataset and add citation

In [None]:
# Download watershed data from the WBD Website
# Data is a geodatabase of watersheds from the entire nation
# Note, to download large data, may need to change machine type of codespace
wbd_url = (
    "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD"
    "/National/GDB/WBD_National_GDB.zip")

wbd_dir = et.data.get_data(url=wbd_url)    # Path to downloaded file directory
wbd_dir

In [None]:
# Import 2-digit HU layer
# Call the GDB file
wbd_path = os.path.join(wbd_dir, 'WBD_National_GDB.gdb')

# Read in the GDB and layer of interest (regional watersheds - HU2)
wbd_hu2_gdf = gpd.read_file(wbd_path, layer='WBDHU2', from_disk=True)
wbd_hu2_gdf

In [None]:
# Test plot to demonstrate successful import of data

wbd_hu2_gdf.plot()

### Wildfire Dataset Description

The wildfire data used in this analysis comes from the [USDA's wildfire occurance data for the United States, 1992-2020](https://www.fs.usda.gov/rds/archive/Catalog/RDS-2013-0009.6). This is the fifth version of the dataset and was generated from reporting systems of federal, state, and local fire organizations.

Data Citation: 
Short, Karen C. 2022. Spatial wildfire occurrence data for the United States, 1992-2020 [FPA_FOD_20221014]. 6th Edition. Fort Collins, CO: Forest Service Research Data Archive. https://doi.org/10.2737/RDS-2013-0009.6

In [None]:
# Download wildfire occurance data from geodatabase

fire_url = (
    "https://www.fs.usda.gov/rds/archive/products/RDS-2013-0009.6"
    "/RDS-2013-0009.6_Data_Format2_GDB.zip"
)

fire_dir = et.data.get_data(url=fire_url)   # Path to downloaded fire directory
fire_dir

In [None]:
# Import fires layer from geodatabase (cashing data)
# Put import in an if statement to check if data already downloaded
fire_path = os.path.join(fire_dir, 'Data','FPA_FOD_20221014.gdb')
if not 'fire_gdf' in globals():
    print('fire_gdf does not exist. Loading...')
    fire_gdf = pyogrio.read_dataframe(fire_path, layer='Fires')

fire_gdf.head()                     # Prints only first few lines of data

In [None]:
# Clean up the data (Lat/Long already assigned to geometry)
# Create an updated geodatabase, populating specific variables
# [[]] Two sets, one to search and one to define a list
fire_clean_gdf = (
    fire_gdf
    [['FOD_ID', 'DISCOVERY_DATE', 'FIRE_SIZE', 'geometry']]
    .set_index('FOD_ID')
)

# Convert the existing date to a datetime format
fire_clean_gdf.DISCOVERY_DATE = pd.to_datetime(fire_clean_gdf.DISCOVERY_DATE)

# Reproject dataframe to match the CRS of watershed boundary
print('Geodetic CRS before reprojection: ' + str(fire_clean_gdf.crs))
fire_clean_gdf = fire_clean_gdf.to_crs(wbd_hu2_gdf.crs)
print('Geodetic CRS after reprojection: ' + str(fire_clean_gdf.crs))

fire_clean_gdf                  # Use .info() to see data types

In [None]:
# Spatially join the watershed with the fire history geodataframes
fire_region_gdf = (
    wbd_hu2_gdf[['name', 'geometry']]
    .sjoin(fire_clean_gdf, how='inner', predicate='intersects')
)

# Calculate max fire size for each year in watershed combination
fire_region_gdf = (fire_region_gdf
    .groupby(['name', fire_region_gdf.DISCOVERY_DATE.dt.year])
    .agg(
        fire_size=('FIRE_SIZE','max'),          # New name = old, how
        num_fires=('index_right', 'count'))     # New name = old, how
)

print('Total number of fires: ' + str(fire_region_gdf.num_fires.sum()))
fire_region_gdf

In [None]:
#Calculate the area of each watershed (use Albers Equal Area Projection)
wbd_hu2_gdf['area_ha'] = (        # Add an area column to gdf
    wbd_hu2_gdf.to_crs(9822)      # Convert new temp gdf to Albers epsg
    .area/10000/1000000           # Calculate the watershed area (millions ha)
)                                 # Note 1 hectare is 10,000 sq.m.

# Calculate total number of fires in each watershed region
fire_density_gdf = (fire_region_gdf
    .reset_index()              # Reset because name is currently in index
    [['name', 'num_fires']]
    .groupby('name')            # Group by the name of the region
    .sum()                      # Sum of the number of fires 
    .join(wbd_hu2_gdf.set_index('name'))   # Join wbd to fire_density gdf
    [['num_fires', 'area_ha']]             # Add area and geomety back in
)

# Calculate fire density (number of fires per area)
fire_density_gdf['fire_density_per_ha'] = (
    fire_density_gdf.num_fires / fire_density_gdf.area_ha
)

# Print the index
fire_density_gdf.fire_density_per_ha