In [19]:
#Check which ones are needed
import geopandas as gpd
import fiona
import os
import zipfile
import rasterio
import numpy as np
from rasterio.features import rasterize
from rasterio.transform import from_origin
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import contextily as ctx
import ipywidgets as widgets
from IPython.display import display

In [20]:
# Update with cleaned fires
mnf_fires_all = gpd.read_file(r"C:\Users\imire\OneDrive - UW\Documents\GDA567\disturbance_interaction_analysis\0_transfer\mnf_fires_all.geojson")
mtbs_points = gpd.read_file(r"C:\Users\imire\OneDrive - UW\Documents\GDA567\disturbance_interaction_analysis\0_transfer\mnf_mtbs_pts.geojson")

In [21]:
mnf_fires_all

Unnamed: 0,OBJECTID,MAP_METHOD,DATE_CUR,COMMENTS,GEO_ID,IRWINID,UNQE_FIRE_,FIRE_YEAR,LOCAL_NUM,INCIDENT,...,SOURCE,AGENCY,FIRE_YEAR_,Shape__Are,Shape__Len,FORID,OTHERID,index_right,FORESTNAME,geometry
0,32469,Digitized-Other,200805290000,Added from 1MM Scale recitifed image,{61D7987C-830C-4F28-BB5E-37F57D6EB9A6},,1910-ORMAF-000028,1910,,1910 Fire 28,...,USFS,USFS,1910,1.658106e+08,53060.785589,,,0.0,Malheur National Forest,"POLYGON ((-13171332.211 5500377.617, -13171139..."
1,4509,Digitized-Other,200805290000,Added from 1MM Scale recitifed image,{41ECC8F1-8906-440D-8339-CA43F745789D},,1910-ORMAF-000029,1910,,1910 Fire 29,...,USFS,USFS,1910,4.130851e+08,84394.520424,,,0.0,Malheur National Forest,"POLYGON ((-13301937.82 5454895.413, -13309798...."
2,3335,Digitized-Other,200805290000,Added from 1MM Scale recitifed image,{05EBF502-3A99-4DAD-90F1-334E43A9BB2A},,1910-ORMAF-000027,1910,,1910 Fire 27,...,USFS,USFS,1910,1.524486e+08,48956.458053,,,0.0,Malheur National Forest,"POLYGON ((-13204840.04 5513395.884, -13204789...."
3,3326,Digitized-Other,200805290000,Added from 1MM Scale recitifed image,{C8DD94E6-A1D3-4862-8BDD-7E0D26B16DF5},,1910-ORMAF-000031,1910,,1910 Fire 31,...,USFS,USFS,1910,9.766016e+07,36910.349331,,,0.0,Malheur National Forest,"POLYGON ((-13220752.497 5441600.566, -13220881..."
4,32192,Digitized-Other,200805290000,Added from 1MM Scale recitifed image,{E70A5124-AAFE-437E-A824-690872FB9F18},,1910-ORMAF-000025,1910,,1910 Fire 25,...,USFS,USFS,1910,1.765018e+08,51860.411857,,,0.0,Malheur National Forest,"POLYGON ((-13264432.376 5547406.561, -13267574..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,31393,Other,202303081935,,{A5773E46-548B-4332-AB2E-BB2B0374975C},{A4C9C635-54A3-4FDF-A8DE-5C8CBB748A2B},2022-ORMAF-022252,2022,,Wickiup,...,USFS,USFS,2022,1.081425e+05,1451.628264,{5CFE250F-7AEE-4FC9-AB16-77A0FF552A82},,0.0,Malheur National Forest,"POLYGON ((-13227252.361 5498701.761, -13226895..."
212,80841,IR Image Interpretation,2022,DRTI 09/15/2022 @ 1200,{EB946ACE-4C19-4487-94AA-28CF942C72F6},{87FDD0AB-7B6E-4941-A5AA-6174BE19C02B},2022-ORPRD-000918,2022,000918,COUGAR GULCH 0918 PR,...,WFIGS,BLM,2022,1.039576e+06,6283.640593,{443FED13-F520-4538-819C-C20BC93BDDD9},,0.0,Malheur National Forest,"POLYGON ((-13299370.024 5500692.172, -13299302..."
213,4237,Other,202210172012,,{3AFD39B3-6A7E-4F58-8C8F-3BE55DF54D05},{8C60E4DA-62F7-428A-ABE5-A661FCBC7D6E},2022-ORMAF-002365,2022,,Myrtle,...,USFS,USFS,2022,1.157398e+03,148.727279,{EE441DBA-82BA-4CA0-9113-DD4DA0EB0379},,0.0,Malheur National Forest,"POLYGON ((-13260955.114 5448711.897, -13260953..."
214,12304,Other,202210172024,,{1B812AB1-E415-40A5-B2E6-282DABC2FFE8},{9579F4ED-281F-407D-99C0-9B28F8E7B423},2022-ORMAF-002300,2022,,Zolgmann,...,USFS,USFS,2022,1.411641e+02,54.632114,{C734A54A-A7A7-4A22-94DD-A6BDFE103300},,0.0,Malheur National Forest,"POLYGON ((-13303668.246 5432299.531, -13303666..."


In [24]:
# Ensure Ig_Date is datetime
mtbs_points["Ig_Date"] = pd.to_datetime(mtbs_points["Ig_Date"], errors="coerce")

# Extract year and convert to integer, handling NaT safely
mtbs_points["Ig_Year"] = mtbs_points["Ig_Date"].dt.year.astype("Int64")  # nullable integer type

MTBS data are only available from 1984 on, so the lower date bound is effectively set at 1984. We set the upper bound at 2005 since that is 15-25 years post-outbreak and the direct effects of the outbreaks on fire are likely to be diminished if they exist at all.

In [25]:
# Ensure CRS matches
if mnf_fires_all.crs != mtbs_points.crs:
    mtbs_points = mtbs_points.to_crs(mnf_fires_all.crs)

# Drop 'index_right' if it exists
if 'index_right' in mnf_fires_all.columns:
    mnf_fires_all = mnf_fires_all.drop(columns='index_right')
if 'index_right' in mtbs_points.columns:
    mtbs_points = mtbs_points.drop(columns='index_right')
    
# Spatial join (within)
joined = gpd.sjoin(mnf_fires_all, mtbs_points, how="left", predicate="contains")

# Drop mismatched years
matched_years = joined[joined["Ig_Year"] == joined["FIRE_YEAR"]]

# Preview result
print(f"Joined features with matching years: {len(matched_years)}")

Joined features with matching years: 36


In [26]:
matched_years

Unnamed: 0,OBJECTID,MAP_METHOD,DATE_CUR,COMMENTS,GEO_ID,IRWINID,UNQE_FIRE_,FIRE_YEAR,LOCAL_NUM,INCIDENT,...,dNBR_offst,dNBR_stdDv,NoData_T,IncGreen_T,Low_T,Mod_T,High_T,Comment,ORIG_FID,Ig_Year
31,3675,Other,200805290000,,{5F63E1F0-39AC-4DA1-AA71-D76149C9D9BC},,,1986,,Deardorff,...,53.0,-9999.0,-970.0,-150.0,100.0,331.0,600.0,,7814.0,1986
38,4512,Other,200805290000,,{71BBAD01-679F-4340-A397-CBAB397E7ED3},,,1989,,Glacier,...,-22.0,-9999.0,-970.0,-150.0,100.0,304.0,550.0,,1408.0,1989
41,65010,Hand Sketch,201508120000,,{4A2C1AD3-2BC0-4977-B2B7-77422177221A},,1990-ORBUD-000000,1990,0,Buck Spr,...,-5.0,-9999.0,-970.0,-150.0,55.0,166.0,325.0,,8726.0,1990
45,32223,Other,200805290000,From old Snow Mountain district library,{C197950F-6E11-477F-82CF-CB3E91C2EB6A},,,1990,,Pine Springs,...,-16.0,-9999.0,-970.0,-150.0,55.0,193.0,375.0,,8661.0,1990
46,65077,Unknown,201308060000,,{7F2C13D7-370D-4C96-9895-AFEA82FB7EE8},,1990-ORBUD-000000,1990,0,Pine Springs Basin,...,-16.0,-9999.0,-970.0,-150.0,55.0,193.0,375.0,,8661.0,1990
47,3339,Other,200805290000,,{5DA44F9E-C7B2-47B2-A9A9-5D7F0A39C1B4},,1990-ORMAF-000069,1990,,Snowshoe,...,32.0,0.0,-970.0,-150.0,120.0,258.0,450.0,,8723.0,1990
48,3312,Other,200805290000,From old Snow Mountain district library,{5F283D30-BBE5-4B96-B3F5-0C1DC9940897},,1990-ORMAF-000604,1990,,604,...,-27.0,-9999.0,-970.0,-150.0,65.0,165.0,315.0,,8725.0,1990
49,3311,Other,200805290000,From old Snow Mountain district library,{D74FBB06-25A8-4955-87B2-F74D34D53A43},,,1990,,Buck Springs,...,-5.0,-9999.0,-970.0,-150.0,55.0,166.0,325.0,,8726.0,1990
52,32473,Other,200805290000,,{4F0FDFBF-E45C-4ACC-B3EA-7F0A2174EDF2},,1990-ORMAF-000135,1990,,Sheep Mountain,...,-34.0,4.0,-970.0,-150.0,70.0,293.0,550.0,,8724.0,1990
59,65210,Unknown,201308060000,,{FCC63174-FB76-4C92-ACEB-2DF6A2B4043B},,1994-OR952S-000000,1994,0,Jordan Springs,...,-34.0,34.0,-970.0,-150.0,15.0,260.0,525.0,,10089.0,1994


In [None]:
# Add widget here

In [None]:
# Identify and reconcile overlapping fires
'''
Iterates through features to identify incidents that share the same year and have overlapping geometries.
Handles duplicate records for features with >90% overlapping geometries: Exact matches retain USFS record 
when applicable or larger incident, and non-exact matches with >90% overlap retain the larger incident. If there is <=90% overlap
and no match on INCIDENT, then the incident is added to a list for manual review.
Returns a gdf with duplicates removed and a gdf with a list of overlapping incident pairs for manual review.
'''
def process_overlapping_fires(df):
    df = df.copy()
    df["area"] = df.geometry.area  # Compute area for overlap calculations
    to_review = []
    to_remove = set()
    
    for i, fire in df.iterrows():
        overlapping_fires = df[(df['FIRE_YEAR'] == fire['FIRE_YEAR']) & (df.index != i) & (df.intersects(fire.geometry))] # Subset all incidents that share a FIRE_YEAR and that intersect
        
        for j, overlap in overlapping_fires.iterrows():
            intersection_area = fire.geometry.intersection(overlap.geometry).area
            fire_overlap_pct = intersection_area / fire.area
            overlap_overlap_pct = intersection_area / overlap.area
            
            # If INCIDENT names match, prefer USFS or largest fire
            if fire["INCIDENT"] == overlap["INCIDENT"]:
                if fire["AGENCY"] == "USFS":
                    to_remove.add(j)
                elif overlap["AGENCY"] == "USFS":
                    to_remove.add(i)
                else:
                    to_remove.add(i if fire["GIS_ACRES"] < overlap["GIS_ACRES"] else j)
            
            # If >90% overlap, prefer USFS or largest fire
            elif fire_overlap_pct > 0.9 and overlap_overlap_pct > 0.9:
                if fire["AGENCY"] == "USFS":
                    to_remove.add(j)
                elif overlap["AGENCY"] == "USFS":
                    to_remove.add(i)
                else:
                    to_remove.add(i if fire["GIS_ACRES"] <= overlap["GIS_ACRES"] else j)
            
            # If less than 90% overlap, add to review list
            else:
                to_review.append({
                    "FIRE_YEAR": fire['FIRE_YEAR'],
                    "INCIDENT_1": fire['INCIDENT'], "AGENCY_1": fire['AGENCY'], "geometry_1": fire['geometry'],
                    "INCIDENT_2": overlap['INCIDENT'], "AGENCY_2": overlap['AGENCY'], "geometry_2": overlap['geometry']
                })
    
    df_cleaned = df.drop(index=list(to_remove))
    return df_cleaned, gpd.GeoDataFrame(to_review, geometry="geometry_1", crs=df.crs)

mnf_fires_cleaned, overlapping_mnf_fires = process_overlapping_fires(mnf_fires_all)

In [None]:
# These are the remaining incidents requiring manual review
overlapping_mnf_fires

In [None]:
# Widget for manual review
def plot_feature_pair(row):
    fig, ax = plt.subplots(figsize=(10, 10))
    
#    color1 = agency_colors.get(row["AGENCY_1"], "gray")
#    color2 = agency_colors.get(row["AGENCY_2"], "gray")
    
    gpd.GeoSeries([row["geometry_1"]]).plot(ax=ax, edgecolor="black", alpha=0.7, hatch="xx", label=f"{row['INCIDENT_1']} ({row['AGENCY_1']})")
    gpd.GeoSeries([row["geometry_2"]]).plot(ax=ax, edgecolor="black", alpha=0.7, label=f"{row['INCIDENT_2']} ({row['AGENCY_2']})")
    
    # Create legend
    
    plt.show()

def display_feature_pairs(gdf):
    dropdown = widgets.Dropdown(options=[(f"{row['INCIDENT_1']} vs {row['INCIDENT_2']}", i) for i, row in gdf.iterrows()], description="Feature Pair:")
    output = widgets.Output()
    def update_plot(change):
        output.clear_output()
        with output:
            plot_feature_pair(gdf.iloc[dropdown.value])
    dropdown.observe(update_plot, names='value')
    display(dropdown, output)
    update_plot({'name': 'value', 'new': 0})

display_feature_pairs(overlapping_mnf_fires)

In [None]:
drop_list = ['Whiting Sp', 'Corral Basin', 'Buck Spr', 'Slide Mountain', 'Egley Complex']

# Drop specific incident names
mnf_fires_cleaned = mnf_fires_cleaned[~mnf_fires_cleaned["INCIDENT"].isin(drop_list)]
mnf_fires_cleaned

# Default to USFS polygon unless significantly smaller than other polygon.
# Corral Basin and Snowshoe have different shapes but are from the same year and agency (1990, USFS). Corral Basin dropped b/c very unlikely to burn twice in the same year/would register basically as one fire rather than 2.
# Egley and Bear Canyon are a great example of why visual review is important; they intersect, are close to the same size, and are from the same year but are in fact two separate incidents.