# Introduction

Describe the analysis and why useful

In [None]:
# Import standard packages
import os                           # Reproducible file names
import warnings                     # View warnings

# Import third part packages
import earthpy as et                # File organization
import geopandas as gpd             # Enables work in geodataframes
import geoviews as gv               # Enables work with geographic data
import holoviews as hv              # For use with interactive plotting
import hvplot.pandas                # Plotting maps and plots
import pandas as pd                 # Work with dataframes
import pyogrio                       # Help with import of geodatabase

# warning.simplefilter('ignore')    # Suppress warnings

### Watershed Dataset Description

The watershed data used in this analysis comes from the [USGS Watershed Boundary Dataset (WBD)](https://www.usgs.gov/national-hydrography/watershed-boundary-dataset) for the United States. All watersheds in this dataset have gauges for stream monitoring. The data used herein are the **HUC-2 Watersheds (Regional Scale)**. 

Describe the dataset and add citation

In [None]:
# Download watershed data from the WBD Website
# Data is a geodatabase of watersheds from the entire nation
# Note, to download large data, may need to change machine type of codespace
wbd_url = (
    "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD"
    "/National/GDB/WBD_National_GDB.zip")

wbd_dir = et.data.get_data(url=wbd_url)    # Path to downloaded file directory
wbd_dir

In [None]:
# Import 2-digit HU layer
wbd_path = os.path.join(wbd_dir, 'WBD_National_GDB.gdb')    # Call gdb file
gpd.read_file(wbd_path, layer='WBDHU2')             # Read in gdb and layer

In [None]:
# Test plot to demonstrate successful import of data

wbd_hu2_gdf.plot()

### Wildfire Dataset Description

The wildfire data used in this analysis comes from the [USDA's wildfire occurance data for the United States, 1992-2020](https://www.fs.usda.gov/rds/archive/Catalog/RDS-2013-0009.6). This is the fifth version of the dataset and was generated from reporting systems of federal, state, and local fire organizations.

Data Citation: 
Short, Karen C. 2022. Spatial wildfire occurrence data for the United States, 1992-2020 [FPA_FOD_20221014]. 6th Edition. Fort Collins, CO: Forest Service Research Data Archive. https://doi.org/10.2737/RDS-2013-0009.6

In [None]:
# Download wildfire occurance data from geodatabase

fire_url = (
    "https://www.fs.usda.gov/rds/archive/products/RDS-2013-0009.6"
    "/RDS-2013-0009.6_Data_Format2_GDB.zip"
)

fire_dir = et.data.get_data(url=fire_url)   # Path to downloaded fire directory
fire_dir

In [None]:
# Import fires layer from geodatabase (cashing data)
# Put import in an if statement to check if data already downloaded
fire_path = os.path.join(fire_dir, 'Data','FPA_FOD_20221014.gdb')
if not 'fire_gdf' in globals():
    print('fire_gdf does not exist. Loading...')
    fire_gdf = pyogrio.read_dataframe(fire_path, layer='Fires', from_disk=True)

fire_gdf

In [None]:
# Clean up the data
# Create an updated geodatabase, populating specific variables
fire_clean_gdf = (
    fire_gdf
    [['FOD_ID', 'LATITUDE','LONGITUDE', DISCOVERY_DATE',
      'NWCG_GENERAL_CAUSE', 'FIRE_SIZE']]
    .set_index('FOD_ID')            # Set the index to a unique identifier
)

# Convert the existing date to a datetime format
fire_clean_gdf.DISCOVERY_DATE = (
  pd.to_datetime(fire_clean_gdf.DISCOVERY_DATE))

# Reproject the dataframe to match the CRS of the watershed boundary
fire_clean_gdf = fire_clean_gdf.to_crs(wbd_hu2_gdf.crs)
fire_clean_gdf