In [15]:
import pandas as pd
import re

In [2]:
filter_df = pd.read_csv('/Users/aviva/Desktop/MSDS/quarter_4/Human Centered Data Science/Part 1/GeoJSON Exports/intermediate_data/filter.csv')

In [11]:
print(filter_df['OverlapFlags'].unique())

[nan
 'Caution, this Wildfire in 1963 overlaps with a Wildfire that occurred in 1964 (1 year difference). The overlapping fire overlaps by 11.9% (592.0 acres). Overlapping fire USGS Assigned ID: 14618.'
 'Caution, this Wildfire in 1963 overlaps with a Wildfire that occurred in 1962 (1 year difference). The overlapping fire overlaps by 69.4% (220.0 acres). Overlapping fire USGS Assigned ID: 14005.'
 ...
 'Caution, this Prescribed Fire in 2020 overlaps with a Prescribed Fire that occurred in 2018 (2 year difference). The overlapping fire overlaps by 100.0% (20.0 acres). Overlapping fire USGS Assigned ID: 124961.'
 'Caution, this Prescribed Fire in 2020 overlaps with a Prescribed Fire that occurred in 2019 (1 year difference). The overlapping fire overlaps by 100.0% (7.0 acres). Overlapping fire USGS Assigned ID: 135002.'
 'Caution, this Prescribed Fire in 2020 overlaps with a Prescribed Fire that occurred in 2019 (1 year difference). The overlapping fire overlaps by 100.0% (9.0 acres). O

In [3]:
filter_df.head()

Unnamed: 0,OBJECTID,FireType,FireYear,GISAcres,OverlapFlags,shortest_dist
0,14299,Wildfire,1963,40992.458271,,1045.62
1,14300,Wildfire,1963,25757.090203,,1074.55
2,14301,Wildfire,1963,45527.210986,,1038.72
3,14302,Wildfire,1963,10395.010334,,990.02
4,14303,Wildfire,1963,9983.605738,,1034.55


The objective is now to estimate the impact of smoke from a wildfire on Salina.

We have two primary pieces of information: the number of acres burned by the wildfire `GISAcres` and the distance between the wildfire and the city `shortest_dist`.  

Now, smoke production and dispersion are influenced by various complex factors, including vegetation type, fire intensity, wind direction, weather conditions, and topography. To simplify the problem, we introduce a proportional relationship by assuming that the amount of smoke reaching the city is directly proportional to the size of the fire and inversely proportional to the distance from the fire to the city.   

In order to account for the variation in smoke production per acre, we introduce a constant factor `Smoke_Factor`. This factor is a simplification of various factors that affect the amount of smoke produced per acre, such as vegetation type and fire intensity.  

Based on the proportional relationship and the introduction of the constant factor, we construct the formula:    

`Smoke_Estimate = (Burned_Acres / Distance_from_City) * Smoke_Factor * (1 + Overlap_Factor)` 

Overlap Factor is computed as such:
`Overlap_Factor = (Years_Since_Previous_Burn + 1) * (1 + (Overlap_Percentage / 100))`


After understanding heat release (see [this](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7336523/#:~:text=Fire%20behavior%20and%20smoke%20models%20are%20numerical%20tools%20that%20provide,of%20wildland%20fires%20and%20develop), I am estimating the smoke factor to be 

In [20]:
def calculate_overlap_factor(overlap_flags):
    if pd.isna(overlap_flags):
        return 0  

    if "No overlap information available" in overlap_flags:
        return 0 
    year_pattern = r'Wildfire in (\d{4}) overlaps with a Wildfire that occurred in (\d{4})'
    overlap_pattern = r'overlaps by ([\d.]+)%'

    years_match = re.search(year_pattern, overlap_flags)
    overlap_match = re.search(overlap_pattern, overlap_flags)

    if years_match and overlap_match:
        current_year = int(years_match.group(1))
        previous_year = int(years_match.group(2))
        overlap_percentage = float(overlap_match.group(1))

        years_since_previous_burn = current_year - previous_year

        if years_since_previous_burn < 0:
            years_since_previous_burn = 0  

        overlap_factor = (years_since_previous_burn + 1) * (1 + (overlap_percentage / 100))
        return years_since_previous_burn, overlap_percentage, overlap_factor
    else:
        return 0, 0, 0  

In [21]:
filter_df['Years_Since_Previous_Burn'] = filter_df['OverlapFlags'].apply(calculate_overlap_factor)
filter_df['Overlap_Percentage'] = filter_df['OverlapFlags'].apply(calculate_overlap_factor)
filter_df['Overlap_Factor'] = filter_df['OverlapFlags'].apply(calculate_overlap_factor)

filter_df.head()

Unnamed: 0,OBJECTID,FireType,FireYear,GISAcres,OverlapFlags,shortest_dist,Years_Since_Previous_Burn,Overlap_Percentage,Overlap_Factor
0,14299,Wildfire,1963,40992.458271,,1045.62,0,0,0
1,14300,Wildfire,1963,25757.090203,,1074.55,0,0,0
2,14301,Wildfire,1963,45527.210986,,1038.72,0,0,0
3,14302,Wildfire,1963,10395.010334,,990.02,0,0,0
4,14303,Wildfire,1963,9983.605738,,1034.55,0,0,0


In [33]:
filter_df.count()

OBJECTID                     91781
FireType                     91781
FireYear                     91781
GISAcres                     91781
OverlapFlags                 13475
shortest_dist                91781
Years_Since_Previous_Burn    91781
Overlap_Percentage           91781
Overlap_Factor               91781
dtype: int64

In [32]:
filter_df['Years_Since_Previous_Burn'] = filter_df['Years_Since_Previous_Burn'].apply(lambda x: x[0] if isinstance(x, tuple) else x)
filter_df['Overlap_Percentage'] = filter_df['Overlap_Percentage'].apply(lambda x: x[0] if isinstance(x, tuple) else x)
filter_df['Overlap_Factor'] = filter_df['Overlap_Factor'].apply(lambda x: x[0] if isinstance(x, tuple) else x)

print("Number of rows with Overlap_Factor equal to 0:", len(filter_df[filter_df['Overlap_Factor'] == 0]))
print("Number of rows with negative Overlap_Factor:", len(filter_df[filter_df['Overlap_Factor'] < 0]))
print("Number of rows with positive Overlap_Factor:", len(filter_df[filter_df['Overlap_Factor'] > 0]))

Number of rows with Overlap_Factor equal to 0: 90164
Number of rows with negative Overlap_Factor: 0
Number of rows with positive Overlap_Factor: 1617


Proximity to the City: For wildfires that are within a few miles of the city, you might assume a higher "Smoke Factor" in the range of 0.1 to 0.5.

Size of the Fire: Larger wildfires could have a "Smoke Factor" ranging from 0.01 to 0.1, with the larger end of the range for massive fires.

In [35]:
gisacres_range = (filter_df['GISAcres'].min(), filter_df['GISAcres'].max())
shortest_distance_range = (filter_df['shortest_dist'].min(), filter_df['shortest_dist'].max())

print("Range of GISAcres: ({}, {})".format(gisacres_range[0], gisacres_range[1]))
print("Range of Shortest_Distance: ({}, {})".format(shortest_distance_range[0], shortest_distance_range[1]))

Range of GISAcres: (2.2753461981788584e-08, 1566273.1853343395)
Range of Shortest_Distance: (5.51, 1249.99)


In [41]:
acre_boundaries = [0, 50000, 500000, filter_df['GISAcres'].max()]
dist_boundaries = [0, 200, 600, filter_df['shortest_dist'].max()]

acre_labels = ['small fire', 'medium fire', 'large fire']
dist_labels = ['close fire', 'intermediate fire', 'far fire']

filter_df['acre classification'] = pd.cut(filter_df['GISAcres'], bins=acre_boundaries, labels=acre_labels)
filter_df['dist classification'] = pd.cut(filter_df['shortest_dist'], bins=dist_boundaries, labels=dist_labels)

filter_df.head()

Unnamed: 0,OBJECTID,FireType,FireYear,GISAcres,OverlapFlags,shortest_dist,Years_Since_Previous_Burn,Overlap_Percentage,Overlap_Factor,acre classification,dist classification
0,14299,Wildfire,1963,40992.458271,,1045.62,0,0,0,small fire,far fire
1,14300,Wildfire,1963,25757.090203,,1074.55,0,0,0,small fire,far fire
2,14301,Wildfire,1963,45527.210986,,1038.72,0,0,0,small fire,far fire
3,14302,Wildfire,1963,10395.010334,,990.02,0,0,0,small fire,far fire
4,14303,Wildfire,1963,9983.605738,,1034.55,0,0,0,small fire,far fire


In [54]:
smoke_factor_mapping = {
    ('small fire', 'close fire'): 0.1,             # Smaller fires close to the city
    ('small fire', 'intermediate fire'): 0.2,      # Smaller fires at an intermediate distance
    ('small fire', 'far fire'): 0.01,              # Smaller fires far from the city (minimum value)
    ('medium fire', 'close fire'): 0.5,           # Medium-sized fires close to the city
    ('medium fire', 'intermediate fire'): 0.4,    # Medium-sized fires at an intermediate distance
    ('medium fire', 'far fire'): 0.3,            # Medium-sized fires far from the city
    ('large fire', 'close fire'): 0.9,            # Larger fires close to the city (maximum value)
    ('large fire', 'intermediate fire'): 0.8,     # Larger fires at an intermediate distance
    ('large fire', 'far fire'): 0.7,             # Larger fires far from the city
}

filter_df['Smoke_Factor'] = filter_df.apply(lambda row: smoke_factor_mapping.get((row['acre classification'], row['dist classification'])), axis=1)

In [55]:
filter_df.head()

Unnamed: 0,OBJECTID,FireType,FireYear,GISAcres,OverlapFlags,shortest_dist,Years_Since_Previous_Burn,Overlap_Percentage,Overlap_Factor,acre classification,dist classification,Smoke Factor,Smoke_Factor,Smoke_Estimate
0,14299,Wildfire,1963,40992.458271,,1045.62,0,0,0,small fire,far fire,0.3,0.01,11.761192
1,14300,Wildfire,1963,25757.090203,,1074.55,0,0,0,small fire,far fire,0.3,0.01,7.191035
2,14301,Wildfire,1963,45527.210986,,1038.72,0,0,0,small fire,far fire,0.3,0.01,13.149033
3,14302,Wildfire,1963,10395.010334,,990.02,0,0,0,small fire,far fire,0.3,0.01,3.149939
4,14303,Wildfire,1963,9983.605738,,1034.55,0,0,0,small fire,far fire,0.3,0.01,2.895057


In [56]:
filter_df['Smoke_Estimate'] = (filter_df['GISAcres'] / filter_df['shortest_dist']) * filter_df['Smoke_Factor'] * (1 + filter_df['Overlap_Factor'])
filter_df

Unnamed: 0,OBJECTID,FireType,FireYear,GISAcres,OverlapFlags,shortest_dist,Years_Since_Previous_Burn,Overlap_Percentage,Overlap_Factor,acre classification,dist classification,Smoke Factor,Smoke_Factor,Smoke_Estimate
0,14299,Wildfire,1963,40992.458271,,1045.62,0,0,0,small fire,far fire,0.3,0.01,0.392040
1,14300,Wildfire,1963,25757.090203,,1074.55,0,0,0,small fire,far fire,0.3,0.01,0.239701
2,14301,Wildfire,1963,45527.210986,,1038.72,0,0,0,small fire,far fire,0.3,0.01,0.438301
3,14302,Wildfire,1963,10395.010334,,990.02,0,0,0,small fire,far fire,0.3,0.01,0.104998
4,14303,Wildfire,1963,9983.605738,,1034.55,0,0,0,small fire,far fire,0.3,0.01,0.096502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91776,135052,Prescribed Fire,2020,60.879054,,1115.61,0,0,0,small fire,far fire,0.3,0.01,0.000546
91777,135056,Prescribed Fire,2020,14.545208,,1111.90,0,0,0,small fire,far fire,0.3,0.01,0.000131
91778,135058,Prescribed Fire,2020,7.050837,"Caution, this Prescribed Fire in 2020 overlaps...",1117.25,0,0,0,small fire,far fire,0.3,0.01,0.000063
91779,135059,Prescribed Fire,2020,9.342668,"Caution, this Prescribed Fire in 2020 overlaps...",1117.31,0,0,0,small fire,far fire,0.3,0.01,0.000084


In [57]:
smoke_est_df = filter_df.groupby('FireYear')['Smoke_Estimate'].sum().reset_index()
smoke_est_df

Unnamed: 0,FireYear,Smoke_Estimate
0,1963,6.808
1,1964,37.571759
2,1965,35.805272
3,1966,35.523419
4,1967,35.597942
5,1968,2.976105
6,1969,2.734763
7,1970,100.763601
8,1971,104.668
9,1972,158.951806


In [58]:
smoke_est_df.to_csv('/Users/aviva/Desktop/MSDS/quarter_4/Human Centered Data Science/Part 1/GeoJSON Exports/intermediate_data/smoke_est.csv', index=False)