# Process Historical Fire Perimeters

## Scope

In [1]:
import geopandas as gpd
import fiona
import os
import zipfile
import rasterio
import numpy as np
from rasterio.features import rasterize
from rasterio.transform import from_origin
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import contextily as ctx
import ipywidgets as widgets
from IPython.display import display

# Prepare data for processing

## Extract and read in fire perimeter vector data

In [2]:
# File paths
fire_perimeters_zip_path = r"C:\Users\imire\OneDrive - UW\Documents\GDA567\disturbance_interaction_analysis\downloaded_data\historic_fire_perimeters\InterAgencyFirePerimeterHistory_All_Years_View.zip"
output_shp_folder_path = r"C:\Users\imire\OneDrive - UW\Documents\GDA567\disturbance_interaction_analysis\downloaded_data\historic_fire_perimeters"
output_mnf_subset_folder_path = r"C:\Users\imire\OneDrive - UW\Documents\GDA567\disturbance_interaction_analysis\extracted_mnf_subset"

In [3]:
# Define unzip to output location function
def unzip_folder(zip_path, extract_to): 
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Extracted to: {extract_to}")

In [4]:
unzip_folder(fire_perimeters_zip_path, output_shp_folder_path) # Unzip shp

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\imire\\OneDrive - UW\\Documents\\GDA567\\disturbance_interaction_analysis\\downloaded_data\\historic_fire_perimeters\\InterAgencyFirePerimeterHistory_All_Years_View.zip'

In [None]:
fires_gdf = gpd.read_file(output_shp_folder_path + "\\InterAgencyFirePerimeterHistory_All_Years_View.shp")

In [None]:
fires_gdf.head()

## Clean fire perimeter gdf

* Ensure "FIRE_YEAR" is numeric to allow arithmetic operations.
* Remove records associated with invalid years "0", "1", and "9999."
* Check that invalid records were dropped correctly.

In [None]:
# Convert FIRE_YEAR to numeric, handling any errors
fires_gdf["FIRE_YEAR"] = pd.to_numeric(fires_gdf["FIRE_YEAR"], errors = "coerce") # Convert fire year to numeric

# Get unique values as a sorted list
unique_years = sorted(fires_gdf["FIRE_YEAR"].unique()) # List unique values of year

# Exclude records from invalid years
excluded_years = {9999, 1, 0}
valid_incidents = fires_gdf[~fires_gdf["FIRE_YEAR"].isin(excluded_years)].sort_values(by = "FIRE_YEAR", ascending = True)

In [None]:
valid_incidents.head()

In [None]:
# Check valid incidents gdf
data_summary = {
    "all_records_count": len(fires_gdf),
    "valid_records_count": len(valid_incidents),
    "first_incident": valid_incidents.iloc[0]["INCIDENT"],
    "first_fire_year": valid_incidents.iloc[0]["FIRE_YEAR"],
    "last_incident": valid_incidents.iloc[-1]["INCIDENT"],
    "last_fire_year": valid_incidents.iloc[-1]["FIRE_YEAR"]
}

summary_text = '''The NIFC Historic Fire Perimeter dataset contains {all_records_count} records of which {valid_records_count} are labeled with a valid year.
The oldest valid record is for the {first_incident} incident {first_fire_year}. 
The newest valid record is for the {last_incident} incident in {last_fire_year}.
'''.format(**data_summary)

print(summary_text)

## Prepare to subset fires_gdf

* Extract all perimeters intersecting the boundary of Malheur National Forest (MNF).
* Write perimeter features intersecting MNF to a GeoJSON.

In [None]:
mnf_bounds_gdf = gpd.read_file(r"C:\Users\imire\OneDrive - UW\Documents\GDA567\boundary_geoJSONs\malheur_bounds.geojson") # Read in gdf with Malheur National Forest boundary

In [None]:
mnf_bounds_gdf.head()

In [None]:
mnf_bounds_gdf.columns

In [None]:
### Put this step in the initial forest admin boundary extraction
mnf_bounds_gdf = mnf_bounds_gdf[["FORESTNAME", "geometry"]] # Drop all columns other than FORESTNAME and geometry

In [None]:
# Make sure CRS of mnf_bounds_gdf and fires_gdf are the same
if mnf_bounds_gdf.crs == fires_gdf.crs:
    print("Both datasets are in " + str(mnf_bounds_gdf.crs) + ".")
else:
    print("Datasets are not in same CRS.")

# Both already in Web Mercator

## Filter fires_gdf to include only fires that overlap the Malheur NF boundary

In [None]:
# Perform spatial join to subset valid fires that overlapped the Malheur NF boundary
mnf_fires_all = gpd.sjoin(valid_incidents, mnf_bounds_gdf, how = "left", predicate = "intersects")
mnf_fires_all = mnf_fires_all[mnf_fires_all["FORESTNAME"] == "Malheur National Forest"].sort_values(by = "FIRE_YEAR", ascending = True)

In [None]:
mnf_fires_all.columns

In [None]:
mnf_record_count = len(mnf_fires_all)
mnf_first_fire_year = mnf_fires_all.iloc[0]["FIRE_YEAR"]
mnf_first_incident = mnf_fires_all.iloc[0]["INCIDENT"]
mnf_last_fire_year = mnf_fires_all.iloc[-1]["FIRE_YEAR"]
mnf_last_incident = mnf_fires_all.iloc[-1]["INCIDENT"]

In [None]:
# Define the dictionary with string keys
data_summary = {
    "mnf_record_count" :  mnf_record_count,
    "mnf_first_incident": mnf_first_incident,
    "mnf_first_fire_year": mnf_first_fire_year,
    "mnf_last_incident": mnf_last_incident,
    "mnf_last_fire_year": mnf_last_fire_year
}

# Format the text correctly
summary_text = '''The NIFC Historic Fire Perimeter dataset contains {mnf_record_count} valid records of fires with perimeters overlapping the Malneur National Forest boundary.
The oldest record is for the {mnf_first_incident} incident in {mnf_first_fire_year}. 
The newest record is for the {mnf_last_incident} incident in {mnf_last_fire_year}.
'''.format(**data_summary)

# Print the formatted text
print(summary_text)

In [None]:
# Group by year and count occurrences
fires_per_year = mnf_fires_all.groupby('FIRE_YEAR').size().reset_index(name = 'count')

year_range = pd.DataFrame({'FIRE_YEAR': range(1910, 2024)})
fires_per_year = year_range.merge(fires_per_year, on="FIRE_YEAR", how="left").fillna(0)
fires_per_year["count"] = fires_per_year["count"].astype(int)


plt.figure(figsize=(12, 6))
plt.bar(fires_per_year["FIRE_YEAR"], fires_per_year["count"])
plt.xlabel("Year")
plt.ylabel("Fire Count")
plt.title("Number of Fires Per Year (1910-2023)")
tick_positions = np.arange(1910, 2025, 5)  # Every 5 years
plt.xticks(tick_positions, rotation=45)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

The fires in 1910 and 1939 are well recorded and have sizable overlap with later fires, so they weill be kept in the dataset. However, the absence of records for the intervening years between records, especially prior to the 1950s, should not be assumed to indicate a lack of fire but instead a potential lack of /recorded/ fire. While the larger dataset does include fires going back to the 8th century mapped using carbon dating, there very well may be fires that occured within Malheur that went unrecorded or even unnoticed. While Malheur was designated a National Forest in 1908, it was not exactly a hot destination for recreators at the time. Future studies of historical sources or further investigation of natural markers of fire history (e.g., fire scars, pollen record, soil cores) may reveal unknown fires. 

However, it is also likely that there were in fact few to zero fires in the early 20th century due to the aggressive fire exclusion and suppression approaches to forest management employed by the Forest Service. Ultimately, it may not be possible to ever consider this dataset truly complete or known how close to complete it actually is. That being said, it is a rich source of spatially explicit data that allows us to begin untangling complex interactions between disturbance agents on a landscape. 

## QA/QC fire perimeters in mnf_fires_all

In [None]:
'''Since we are planning to calculate spatial statistics based on the frequency of and time between fires, we need to make sure that there are no duplicate records skewing the stats.
This dataset has a known issue with duplicate perimeters submitted by different agencies (usually USFS and BLM) with slightly different incident names, calculated areas, and shapes. 
The QA/QC steps in this section are aimed at weeding out as many of those duplicates as possible without manual review of all 216 records. First, we identify all features in 
mnf_fires_all that overlap another feature in the dataset. From these, records with the same "INCIDENT" and "FIRE_YEAR" are culled, keeping the first record; preference is given to records submitted by the USFS
where available as they are the primary fire managers in the region of interest and maintain many of the current fire perimeter datasets that this one is derived from. Next, the remaining overlapping features 
are 
 '''


In [None]:
# Define colors
fill_color = "#CC5500"  # Burnt Orange (fire perimeters)
edge_color = "#993300"  # Dark Orange (fire outlines)
boundary_edge_color = "#006400"  # Dark Green (Malheur boundary)

# Ensure GeoDataFrames are in Web Mercator (EPSG:3857) for basemap compatibility
mnf_fires_all = mnf_fires_all.to_crs(epsg=3857)
mnf_bounds_gdf = mnf_bounds_gdf.to_crs(epsg=3857)

# Create the figure and axis
fig, ax = plt.subplots(figsize=(10, 8))

# Plot the fire perimeters
mnf_fires_all.plot(ax=ax, color=fill_color, edgecolor=edge_color, alpha=0.6, linewidth=0.8)

# Plot Malheur National Forest boundary with no fill, only dark green outline
mnf_bounds_gdf.plot(ax=ax, color="none", edgecolor=boundary_edge_color, linewidth=2)

# Add basemap
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, zoom=10, alpha=0.8)

# Add a custom legend
legend_patches = [
    mpatches.Patch(color=fill_color, label="Incident Perimeter", alpha=0.6, edgecolor=edge_color),
    mpatches.Patch(color="none", edgecolor=boundary_edge_color, label="Malheur National Forest Boundary", linewidth=2)
]
ax.legend(handles=legend_patches, loc="upper right")

# Title and axis labels
ax.set_title("All Malheur National Forest Fire Incidents (1910-2023)", fontsize=14)

# Remove axis labels (optional, since basemap provides context)
ax.set_xticks([])
ax.set_yticks([])

# Show plot
plt.show()

In [None]:
mnf_fires_cleaned = mnf_fires_all.sort_values("GIS_ACRES", ascending=False).drop_duplicates(subset = ["INCIDENT", "FIRE_YEAR"], keep = "first") # Drop records with identical name and year. Retain record with the largest area (GIS_ACRES).

In [None]:
def find_overlapping_fires(df):
    """
    Identifies overlapping fire records where 'FIRE_YEAR' matches 
    and geometries intersect, returning a GeoDataFrame.
    
    :param df: GeoDataFrame with 'FIRE_YEAR', 'INCIDENT', 'AGENCY', 'GIS_ACRES_left', and 'geometry' columns
    :return: GeoDataFrame of overlapping fire pairs
    """
    potential_overlaps = []

    # Ensure the GeoDataFrame has a spatial index for faster spatial queries
    df = df.copy()
    df = df.set_geometry("geometry")

    for i, fire in df.iterrows():
        overlapping_fires = df[(df['FIRE_YEAR'] == fire['FIRE_YEAR']) & (df.index != i) & (df.intersects(fire.geometry))]

        for _, overlap in overlapping_fires.iterrows():
            potential_overlaps.append({
                "FIRE_YEAR": fire['FIRE_YEAR'],
                "INCIDENT_1": fire['INCIDENT'],
                "AGENCY_1": fire['AGENCY'],
                "GIS_ACRES_1": fire['GIS_ACRES'],
                "INCIDENT_2": overlap['INCIDENT'],
                "AGENCY_2": overlap['AGENCY'],
                "GIS_ACRES_2": overlap['GIS_ACRES'],
                "geometry_1": fire['geometry'],
                "geometry_2": overlap['geometry']
            })

    # Convert to GeoDataFrame
    gdf_overlaps = gpd.GeoDataFrame(potential_overlaps, geometry="geometry_1", crs=df.crs)
    return gdf_overlaps

overlapping_fires_gdf = find_overlapping_fires(mnf_fires_cleaned)
overlapping_fires_gdf 

In [None]:
# Define color mapping for agencies
agency_colors = {
    "USFS": "lightblue",
    "CAL FIRE": "lightcoral",
    "NPS": "lightgreen",
    "BLM": "gold",
    "BIA": "purple"
}

def plot_feature_pair(geo1, geo2, incident1, incident2, agency1, agency2):
    """
    Plots two overlapping fire features with:
    - Colors based on AGENCY
    - Normal cross-hatching for all INCIDENTS
    - Legends positioned outside the plot for better clarity
    """
    fig, ax = plt.subplots(figsize=(8, 8), constrained_layout=True)  # Increased figure size & auto-layout
    
    # Get colors for each agency
    color1 = agency_colors.get(agency1, "gray")
    color2 = agency_colors.get(agency2, "gray")

    # Plot first geometry
    gpd.GeoSeries(geo1).plot(ax=ax, facecolor=color1, edgecolor="black", alpha=0.7, hatch="xx", label=f"{incident1} ({agency1})")

    # Plot second geometry
    gpd.GeoSeries(geo2).plot(ax=ax, facecolor=color2, edgecolor="black", alpha=0.7, label=f"{incident2} ({agency2})")

    # Set title with padding
    ax.set_title(f"Overlapping Fires: {incident1} vs {incident2}", pad=20)

    # Create agency color legend
    agency_patches = [mpatches.Patch(color=color, label=agency) for agency, color in agency_colors.items()]
    agency_legend = ax.legend(handles=agency_patches, title="AGENCY Colors", loc="upper left", bbox_to_anchor=(1.05, 1))  # Move legend outside

    # Add hatching legend separately
    hatch_patch = mpatches.Patch(facecolor="white", edgecolor="black", hatch="xx", label="Hatch = INCIDENT Type")
    hatch_legend = ax.legend(handles=[hatch_patch], title="INCIDENT Hatching", loc="lower left", bbox_to_anchor=(1.05, 0))  # Move hatching legend outside

    ax.add_artist(agency_legend)  # Ensure the first legend stays

    plt.show()

def display_feature_pairs(gdf):
    """
    Creates a widget to interactively view overlapping fire features in pairs.
    """
    pair_options = [(f"Pair {i+1}: {row['INCIDENT_1']} vs {row['INCIDENT_2']}", i) for i, row in gdf.iterrows()]
    
    dropdown = widgets.Dropdown(
        options=pair_options,
        description="Feature Pair:",
        style={'description_width': 'initial'},
        layout=widgets.Layout(width="50%")
    )

    output = widgets.Output()

    def update_plot(change):
        output.clear_output()
        with output:
            selected_index = dropdown.value
            selected_pair = gdf.iloc[selected_index]
            plot_feature_pair(
                selected_pair['geometry_1'], selected_pair['geometry_2'],
                selected_pair['INCIDENT_1'], selected_pair['INCIDENT_2'],
                selected_pair['AGENCY_1'], selected_pair['AGENCY_2']
            )

    dropdown.observe(update_plot, names='value')

    display(dropdown, output)
    
    # Show the first pair by default
    update_plot({'name': 'value', 'new': 0})

# Run the widget to visualize pairs
display_feature_pairs(overlapping_fires_gdf)

In [None]:
drop_list = ['Canyon Creek Complex','Pine Springs Basin', 'South Fork', 'Egley Complex', 'Rail', 'Buck Spr', 
             'IRONSIDE', 'POWDER', 'Whiting Sp', '0848', 'Wolf Creek EC 81', 'Slide Mountain', 'BOX', 'OR-MAF-2011-11297', 'LITTLE WOLF', '#7176']
             

# Default to USFS polygon unless significantly smaller than other polygon.
# Corral Basin and Snowshoe have different shapes but are from the same year and agency (1990, USFS). 
# Egley and Bear Canyon are a great example of why visual review is important; they intersect, are close to the same size, and are from the same year but are in fact two separate incidents.

In [None]:
mnf_fires_cleaned = mnf_fires_cleaned[~mnf_fires_cleaned["INCIDENT"].isin(drop_list)] # Drop all records for incidents in drop list

In [None]:
mnf_fires_cleaned

In [None]:
# 163 incidents remain after cleaning

In [None]:
# Define colors
fill_color = "#CC5500"  # Burnt Orange (fire perimeters)
edge_color = "#993300"  # Dark Orange (fire outlines)
boundary_edge_color = "#006400"  # Dark Green (Malheur boundary)

# Create the figure and axis
fig, ax = plt.subplots(figsize=(10, 8))

# Plot the fire perimeters
mnf_fires_cleaned.plot(ax=ax, color=fill_color, edgecolor=edge_color, alpha=0.6, linewidth=0.8)

# Plot Malheur National Forest boundary with no fill, only dark green outline
mnf_bounds_gdf.plot(ax=ax, color="none", edgecolor=boundary_edge_color, linewidth=2)

# Add basemap
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, zoom=10, alpha=0.8)

# Add a custom legend
legend_patches = [
    mpatches.Patch(color=fill_color, label="Incident Perimeter", alpha=0.6, edgecolor=edge_color),
    mpatches.Patch(color="none", edgecolor=boundary_edge_color, label="Malheur National Forest Boundary", linewidth=2)
]
ax.legend(handles=legend_patches, loc="upper right")

# Title and axis labels
ax.set_title("All Malheur National Forest Fire Incidents (1910-2023) [duplicates removed)", fontsize=14)

# Remove axis labels (optional, since basemap provides context)
ax.set_xticks([])
ax.set_yticks([])

# Show plot
plt.show()

In [None]:
mnf_fires_cleaned.to_file(r"C:\Users\imire\OneDrive - UW\Documents\GDA567\mnf_fires_cleaned.geojson", driver="GeoJSON")