## Acknowledgement

Utitilzed chatGPT for assistance with some code in this notebook. Reference sections below for specific prompts used

## Calculation Explanation

## Update to include info about calculation steps

In [55]:
import json
import geojson
import pandas as pd
import matplotlib.pyplot as plt

In [56]:
with open('intermediate_file_paths.json') as output_path_file:
    output_paths = json.load(output_path_file)


FIRE_INPUT_PATH = output_paths.get('stage0_fire_json')
#FIRE_INPUT_PATH = output_paths.get('POC_stage0_fire_json')

In [57]:
SI_PER_FIRE_OUTPUT_PATH = output_paths.get('stage1_si_per_fire_csv')
SI_PER_YEAR_OUTPUT_PATH = output_paths.get("stage1_si_per_year_csv")

In [58]:
with open(FIRE_INPUT_PATH) as fire_file:
    fire_data = geojson.load(fire_file)

In [59]:
# Function to check the number of records and ensure all have the same keys
def check_records(data, record_type="Record"):
    # Get the set of keys from the first record
    first_record_keys = set(data[0].get('attributes', {}).keys())

    # Initialize a list to collect error records and a counter for mismatches
    error_records = []
    mismatched_count = 0

    # Iterate through all records to compare the keys
    for index, feature in enumerate(data):
        current_keys = set(feature.get('attributes', {}).keys())

        # Check if the keys match with the first record's keys
        if current_keys != first_record_keys:
            mismatched_count += 1
            print(f"{record_type} {index} has a different set of keys.")
            print(f"Expected keys: {first_record_keys}")
            print(f"Found keys: {current_keys}")
            
            # Add the mismatched record to the error list
            error_records.append(feature)

    # Final report
    total_records = len(data)
    print(f"\nTotal number of {record_type.lower()}s: {total_records}")
    
    if mismatched_count == 0:
        print(f"All {record_type.lower()}s contain the same set of keys.")
    else:
        print(f"{mismatched_count} {record_type.lower()}s have mismatched keys.")

    # Return the error records if there are any mismatches
    return error_records, first_record_keys


In [60]:
# Check the fire_data and features_with_smoke_impact
print("\nChecking fire_data...")
fire_data_errors, fire_data_keys = check_records(fire_data['features'], record_type="Fire data record")

# Optional: You can save the error records for later analysis
if fire_data_errors:
    with open('fire_data_error_records.json', 'w') as fire_error_file:
        json.dump({"features": fire_data_errors}, fire_error_file, indent=4)
    print("Fire data error records have been written to 'fire_data_error_records.json'.")


Checking fire_data...

Total number of fire data records: 101906
All fire data records contain the same set of keys.


## Convert to Pandas Dataframe

In [61]:
# Extract the list of fire attributes from each feature
fire_attributes = [feature['attributes'] for feature in fire_data['features']]

# Convert the list of attributes dictionaries to a DataFrame
fire_df = pd.DataFrame(fire_attributes)

In [62]:
print(fire_df.columns)
print(f"Number of rows: {fire_df.shape[0]}")

Index(['OBJECTID', 'USGS_Assigned_ID', 'Assigned_Fire_Type', 'Fire_Year',
       'Fire_Polygon_Tier', 'Fire_Attribute_Tiers', 'GIS_Acres',
       'GIS_Hectares', 'Source_Datasets', 'Listed_Fire_Types',
       'Listed_Fire_Names', 'Listed_Fire_Codes', 'Listed_Fire_IDs',
       'Listed_Fire_IRWIN_IDs', 'Listed_Fire_Dates', 'Listed_Fire_Causes',
       'Listed_Fire_Cause_Class', 'Listed_Rx_Reported_Acres',
       'Listed_Map_Digitize_Methods', 'Listed_Notes', 'Processing_Notes',
       'Wildfire_Notice', 'Prescribed_Burn_Notice', 'Wildfire_and_Rx_Flag',
       'Overlap_Within_1_or_2_Flag', 'Circleness_Scale', 'Circle_Flag',
       'Exclude_From_Summary_Rasters', 'Shape_Length', 'Shape_Area',
       'distance'],
      dtype='object')
Number of rows: 101906


In [63]:
# Display the first few rows to verify the structure
print(fire_df.head())

   OBJECTID  USGS_Assigned_ID Assigned_Fire_Type  Fire_Year  \
0     14600             14600           Wildfire       1964   
1     14602             14602           Wildfire       1964   
2     14605             14605           Wildfire       1964   
3     14606             14606           Wildfire       1964   
4     14607             14607           Wildfire       1964   

   Fire_Polygon_Tier Fire_Attribute_Tiers     GIS_Acres  GIS_Hectares  \
0                  1         1 (1), 3 (3)  65338.877636  26441.705659   
1                  1         1 (2), 3 (3)  19218.105903   7777.291530   
2                  1         1 (2), 3 (4)  14101.443662   5706.651785   
3                  1         1 (2), 3 (3)  11365.328284   4599.385176   
4                  1         1 (1), 3 (1)  11131.171732   4504.625381   

                                     Source_Datasets  \
0  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
1  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
2  Comb_Nation

## Smoke Impact Calculations per Fire

prompt used for chatgpt (model 4o): Need some code to get the smoke impact for each fire based on its size and distance away from memphis. also want to use the fire duration to amortize the fire over the fire season. Only reliable data fields to use are fire size (GIS_ACRES) and distance from memphis. Other variables are not reliable. Want to normalize the size and distance factors. Working on this assignment requirement: It seems reasonable that a large fire, that burns a large number of acres, and that is close to a city would put more smoke into a city than a small fire that is much further away. One task is to define your smoke estimate and then apply it to every fire within the specified distance to your city. Should your smoke estimate be cumulative during each year or somehow amortized over the fire season? 

In [67]:
# Constants for distance and fire duration
MIN_FIRE_DURATION = 10  # Minimum duration of a fire in days
MAX_FIRE_DURATION = 150  # Maximum duration of a fire in days
MAX_DISTANCE = 650       # Maximum distance for considering impact

# Define function for estimating fire duration based on size
def estimate_fire_duration(gis_acres, max_acres):
    normalized_size = gis_acres / max_acres
    fire_duration = MIN_FIRE_DURATION + (normalized_size * (MAX_FIRE_DURATION - MIN_FIRE_DURATION))
    return max(MIN_FIRE_DURATION, min(fire_duration, MAX_FIRE_DURATION))

# Function to calculate smoke impact for each fire
def get_smoke_impact(gis_acres, dist_from_memphis, max_acres):
    if gis_acres is None or dist_from_memphis is None:
        return 0  # Ignore fires with missing data
    
    # Calculate normalized size and distance factors
    size_factor = gis_acres / max_acres
    distance_factor = 1 - (dist_from_memphis / MAX_DISTANCE)

    # Smoke impact score for this fire
    smoke_impact = size_factor * distance_factor
    
    return max(smoke_impact, 0)

In [65]:
# Load and preprocess fire data
filtered_fire_df = fire_df[fire_df['distance'] <= MAX_DISTANCE]
max_acres_within_radius = filtered_fire_df['GIS_Acres'].max()

# Apply smoke impact calculation for each fire
filtered_fire_df['smoke_impact'] = filtered_fire_df.apply(
    lambda row: get_smoke_impact(row['GIS_Acres'], row['distance'], max_acres_within_radius),
    axis=1
)
filtered_fire_df['fire_duration'] = filtered_fire_df['GIS_Acres'].apply(
    lambda acres: estimate_fire_duration(acres, max_acres_within_radius)
)


# Daily smoke impact for a single fire
filtered_fire_df['amortized_smoke_impact'] = filtered_fire_df['smoke_impact'] / filtered_fire_df['fire_duration']

filtered_fire_df = filtered_fire_df.rename(columns={'GIS_Acres': 'total_acres_burned', 'Fire_Year': 'fire_year'})

# Save per-fire data to CSV
filtered_fire_df.to_csv(SI_PER_FIRE_OUTPUT_PATH, index=False)

print(filtered_fire_df[['USGS_Assigned_ID','fire_year', 'distance','total_acres_burned','amortized_smoke_impact']].head(5))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_fire_df['smoke_impact'] = filtered_fire_df.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_fire_df['fire_duration'] = filtered_fire_df['GIS_Acres'].apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_fire_df['amortized_smoke_impact'] = filtered_fire_df['smoke_impa

     USGS_Assigned_ID  fire_year    distance  total_acres_burned  \
157             14783       1964  384.142162           28.468121   
340             15001       1965  381.724329           29.902511   
352             15017       1965  357.252742           11.047444   
370             15040       1965  374.861207            2.831538   
683             15383       1966  256.234293           12.781395   

     amortized_smoke_impact  
157            7.432192e-07  
340            7.877565e-07  
352            3.176369e-07  
370            7.652131e-08  
683            4.942944e-07  


In [66]:
# Calculate total and average daily smoke impact
yearly_smoke_impact_df = filtered_fire_df.groupby('fire_year').agg(
    total_amortized_smoke_impact=('amortized_smoke_impact', 'sum'),
    total_fire_duration=('fire_duration', 'sum'),  # Total days of impact for all fires in the year
    total_acres_burned=('total_acres_burned', 'sum')  # Sum of acres burned for each year
).reset_index()

# Calculate the raw average daily smoke impact per year
yearly_smoke_impact_df['avg_daily_smoke_impact'] = (
    yearly_smoke_impact_df['total_amortized_smoke_impact'] / yearly_smoke_impact_df['total_fire_duration']
)

# Save per-year data to CSV
yearly_smoke_impact_df.to_csv(SI_PER_YEAR_OUTPUT_PATH, index=False)

# Display the results
print(yearly_smoke_impact_df[['fire_year', 'avg_daily_smoke_impact', 'total_amortized_smoke_impact', 'total_acres_burned']].head(5))

   fire_year  avg_daily_smoke_impact  total_amortized_smoke_impact  \
0       1964            7.430301e-08                  7.432192e-07   
1       1965            3.939202e-08                  1.181915e-06   
2       1966            3.440880e-08                  1.032347e-06   
3       1967            5.019618e-07                  1.508150e-05   
4       1968            3.645253e-08                  1.093709e-06   

   total_acres_burned  
0           28.468121  
1           43.781494  
2           26.964910  
3          504.821511  
4           40.723316  
