# Fire stats data

Process raw annual severity and structural data to generate tabular data of area burned. Specifically, calculate the stratified area burned in each westside ecoregion and in LSRs in each ecoregion from 1985-2020, as well as the total are of old growth in each ecoregion and in LSRs in each ecoregion.

Strata
- Severity
- Structural condition
- Year
- Ecoregion
- State

In [None]:
import os
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio as rio
import time

## Constants

In [None]:
TARGET_CRS = "EPSG:5070"

NODATA = 0
PIXEL_AREA = 30 * 30
SQM_PER_HA = 10000

In [None]:
struc_classes = {
    -1: "Nonforest",
    1: "Sparse",
    2: "Open",
    3: "Sapling/pole - moderate/closed", 
    4: "Small/medium tree - moderate/closed", 
    5: "Large tree - moderate/closed", 
    6: "Large/giant tree - moderate/closed"
}

sev_classes = {
    0: "masked",
    1: "Very low / unburned",
    2: "Low", 
    3: "Low / moderate",
    4: "Moderate / high",
    5: "High", 
    6: "Very high"
}

## Fire Area
Creates a table showing the final fire areas of major and other fires, from NIFC data.

In [None]:
data_dir = os.path.join("..", "data")

In [None]:
fire_path = os.path.join(data_dir, "NIFC", "Public_NIFS_Perimeters_westside_ecoregions_complete.shp")
fires = gpd.read_file(fire_path).to_crs(TARGET_CRS)

In [None]:
fires["fire_name"] = fires.IncidentNa.str.title()
# Exclude irrelevant fires
exclude_fires = ["S. Obenchain", "White River", "Grizzly Creek"]
fires = fires[~fires.fire_name.isin(exclude_fires)]

# Calculate fire area in hectares
fires.ha = fires.area.divide(SQM_PER_HA)

fire_area_all = fires.sort_values(["ha"], ascending=False)[["fire_name", "ha"]]

# Fires smaller than this threshold will be grouped into "Other"
other_threshold_ha = 10_000
fires['fire_name'] = fires.apply(lambda x : "Other" if x.ha < other_threshold_ha else x.fire_name, axis=1)

# Group and sum the "Other" fires
fires = fires.groupby('fire_name').sum().reset_index()

fire_area_other = fires.sort_values(["ha"], ascending=False)[["fire_name", "ha"]]

In [None]:
fire_area_all.to_csv("fire_area_all.csv", index=False)
fire_area_other.to_csv("fire_area_other.csv", index=False)

## Pre-processing

### Ecoregion data

In [None]:
eco_path = os.path.join(data_dir, "study_area.gpkg")
eco = gpd.read_file(eco_path).to_crs(TARGET_CRS)
eco.plot()

### Severity and struccond data

In [None]:
# Available https://lemma.forestry.oregonstate.edu/data/structure-maps
struc_dir = os.path.join(data_dir, "struccond")
# The severity grids can be generated using `scripts/severity.js`
sev_dir = os.path.join(data_dir, "severity")

### LSR data

In [None]:
# LSR polygons pre-clipped to the Westside ecoregions
lsr_path = os.path.join("..", "data", "boundary", "LSR_westside_ecoregions_dissolve.shp")
lsr = gpd.read_file(lsr_path).to_crs(TARGET_CRS)

# Clip the ecoregions to the LSRs so that LSRs can be iterated by ecoregion.
eco_lsr = gpd.clip(eco, lsr, keep_geom_type=True)

eco_lsr.plot()

## Ecoregions

In [None]:
def get_sev_distribution(struc_arr, sev_arr, year, ecoregion, state):
    """
    Return a dataframe of the distribution of burn severity pixels, stratified by structural class.
    """
    df_list = []
    
    # For each structural condition class
    for struc_level, struc_label in struc_classes.items():
        # Mask all but one structural class
        struc_mask = np.where(struc_arr == struc_level, 1, 0)
        sev_masked = sev_arr * struc_mask

        # Count the occurence of every possible severity class
        sev_table = np.bincount(sev_masked.flatten(), minlength=len(sev_classes))
        sev_df = pd.DataFrame(sev_table, columns=["n_burned"])
        sev_df["hectares_burned"] = sev_df["n_burned"] * PIXEL_AREA / SQM_PER_HA
        sev_df["severity"] = sev_classes.values()
        sev_df["struccond"] = struc_label
        sev_df["year"] = year
        sev_df["ecoregion"] = ecoregion
        sev_df["state"] = state
        # Remove masked pixels
        sev_df = sev_df[sev_df["severity"] != "masked"]

        df_list.append(sev_df)

    return pd.concat(df_list)

### Processing

Calculate the stratified area burned in each westside ecoregion and in LSRs in each ecoregion from 1985-2020, as well as the total are of old growth in each ecoregion and in LSRs in each ecoregion.

Strata
- Severity
- Structural condition
- Year
- Ecoregion
- State

In [None]:
from datetime import date
today = date.today().strftime(format="%Y%m%d")

In [None]:
# Reset indexes so we can index LSRs using iterrows
eco.reset_index(drop=True, inplace=True)
eco_lsr.reset_index(drop=True, inplace=True)

In [None]:
all_yr_dfs = []
working_files = []

yr_range = range(1986, 2021)


# 30 minutes to run
for year in yr_range:
    print(year)
    sev_year = year
    # Prefire conditions
    struc_year = year - 1
    
    struc_path = os.path.join(struc_dir, f"struccond_{struc_year}.tif")
    
    sev_path = os.path.join(sev_dir, f"severity_{sev_year}.tif")
    
    struc = rio.open(struc_path)
    sev = rio.open(sev_path)
    
    for i, row in eco.iterrows():       
        ecoregion = row.NA_L3NAME
        state = row.STATE_NAME
        geom = row.geometry
        print(f"\t{state} - {ecoregion}")
        
        # Calculate area by structure and severity throughout the ecoregion
        struc_arr, _ = rio.mask.mask(struc, geom, crop=True, indexes=1, nodata=NODATA)
        sev_arr, _ = rio.mask.mask(sev, geom, crop=True, indexes=1, nodata=NODATA)
        assert struc_arr.shape == sev_arr.shape, (struc_arr.shape, sev_arr.shape)
        
        region_data = get_sev_distribution(struc_arr, sev_arr, year, ecoregion, state)
        # Store the region area to allow calculating proportion of area
        region_data = region_data.assign(region_hectares=geom.area / SQM_PER_HA)

        # Count the pixels of each structural class in the region to calculate total (not burned) OG area
        region_val, region_counts = np.unique(struc_arr, return_counts=True)
        struc_df = pd.DataFrame(zip(region_val, region_counts), columns=["struc_id", "n"], index=None)
        # Remove no data
        struc_df = struc_df[struc_df.struc_id != 0]
        # Calculate area in hectares using pixel counts
        struc_df["hectares"] = struc_df["n"] * PIXEL_AREA / SQM_PER_HA
        # Total hectares of old growth in westside ecoregions
        og_hectares = struc_df[struc_df.struc_id.isin([5, 6])].hectares.sum()
        
        
        # Calculate area by structure and severity within LSRs in the ecoregion
        lsr = eco_lsr.iloc[i]
        lsr_geom = lsr.geometry
        
        lsr_struc_arr, _ = rio.mask.mask(struc, lsr_geom, crop=True, indexes=1, nodata=NODATA)
        lsr_sev_arr, _ = rio.mask.mask(sev, lsr_geom, crop=True, indexes=1, nodata=NODATA)
        assert lsr_struc_arr.shape == lsr_sev_arr.shape, (lsr_struc_arr.shape, lsr_sev_arr.shape)
        
        lsr_data = get_sev_distribution(lsr_struc_arr, lsr_sev_arr, year, ecoregion, state)
        
        # Count the pixels of each structural class in LSRs to calculate total (not burned) LSR OG area
        lsr_val, lsr_counts = np.unique(lsr_struc_arr, return_counts=True)
        lsr_struc_df = pd.DataFrame(zip(lsr_val, lsr_counts), columns=["struc_id", "n"], index=None)
        # Remove no data
        lsr_struc_df = lsr_struc_df[lsr_struc_df.struc_id != 0]
        # Calculate area in hectares using pixel counts
        lsr_struc_df["hectares"] = lsr_struc_df["n"] * PIXEL_AREA / SQM_PER_HA
        # Total hectares of old growth in LSR in westside ecoregions
        lsr_og_hectares = lsr_struc_df[lsr_struc_df.struc_id.isin([5, 6])].hectares.sum()
        
        
        region_data = region_data.assign(
            lsr_n_burned = lsr_data["n_burned"],
            lsr_hectares_burned = lsr_data["hectares_burned"],
            region_lsr_hectares = lsr_geom.area / SQM_PER_HA,
            region_og_hectares = og_hectares,
            region_lsr_og_hectares = lsr_og_hectares
        )
        
        # Subset and re-arrange the columns
        region_data = region_data[[
            "year",
            "ecoregion",
            "state",
            "severity",
            "struccond",
            "hectares_burned",
            "lsr_hectares_burned",
            "region_hectares",
            "region_lsr_hectares",
            "region_og_hectares",
            "region_lsr_og_hectares"
        ]]
        
        all_yr_dfs.append(region_data)
    
    # Save a working file in case something breaks. Store the path for cleanup once finished.
    working_df = pd.concat(all_yr_dfs)
    working_file = f"working_data_{int(time.time())}.csv"
    working_df.to_csv(working_file, index=False)
    working_files.append(working_file)
        
all_dfs = pd.concat(all_yr_dfs, ignore_index=True)
# Assign year groups
all_dfs = all_dfs.assign(yr_group=all_dfs.year.apply(lambda x : "2020" if x == 2020 else "1985 - 2019"))

all_dfs.to_csv(f"fire_stats_data_{today}.csv", index=False)

for file in working_files:
    if os.path.isfile(file):
        os.remove(file)