In [154]:
import json
import geojson
import pandas as pd
import matplotlib.pyplot as plt

In [155]:
with open('intermediate_file_paths.json') as output_path_file:
    output_paths = json.load(output_path_file)


FIRE_INPUT_PATH = output_paths.get('stage0_fire_json')
#FIRE_INPUT_PATH = output_paths.get('POC_stage0_fire_json')

In [156]:
SI_PER_FIRE_OUTPUT_PATH = output_paths.get('stage1_si_per_fire_csv')
SI_PER_YEAR_OUTPUT_PATH = output_paths.get("stage1_si_per_year_csv")

In [157]:
with open(FIRE_INPUT_PATH) as fire_file:
    fire_data = geojson.load(fire_file)

In [158]:
# Function to check the number of records and ensure all have the same keys
def check_records(data, record_type="Record"):
    # Get the set of keys from the first record
    first_record_keys = set(data[0].get('attributes', {}).keys())

    # Initialize a list to collect error records and a counter for mismatches
    error_records = []
    mismatched_count = 0

    # Iterate through all records to compare the keys
    for index, feature in enumerate(data):
        current_keys = set(feature.get('attributes', {}).keys())

        # Check if the keys match with the first record's keys
        if current_keys != first_record_keys:
            mismatched_count += 1
            print(f"{record_type} {index} has a different set of keys.")
            print(f"Expected keys: {first_record_keys}")
            print(f"Found keys: {current_keys}")
            
            # Add the mismatched record to the error list
            error_records.append(feature)

    # Final report
    total_records = len(data)
    print(f"\nTotal number of {record_type.lower()}s: {total_records}")
    
    if mismatched_count == 0:
        print(f"All {record_type.lower()}s contain the same set of keys.")
    else:
        print(f"{mismatched_count} {record_type.lower()}s have mismatched keys.")

    # Return the error records if there are any mismatches
    return error_records, first_record_keys


In [159]:
# Check the fire_data and features_with_smoke_impact
print("\nChecking fire_data...")
fire_data_errors, fire_data_keys = check_records(fire_data['features'], record_type="Fire data record")

# Optional: You can save the error records for later analysis
if fire_data_errors:
    with open('fire_data_error_records.json', 'w') as fire_error_file:
        json.dump({"features": fire_data_errors}, fire_error_file, indent=4)
    print("Fire data error records have been written to 'fire_data_error_records.json'.")


Checking fire_data...

Total number of fire data records: 101906
All fire data records contain the same set of keys.


## Convert to Pandas Dataframe

In [160]:
# Extract the list of fire attributes from each feature
fire_attributes = [feature['attributes'] for feature in fire_data['features']]

# Convert the list of attributes dictionaries to a DataFrame
fire_df = pd.DataFrame(fire_attributes)

In [161]:
print(fire_df.columns)
print(f"Number of rows: {fire_df.shape[0]}")

Index(['OBJECTID', 'USGS_Assigned_ID', 'Assigned_Fire_Type', 'Fire_Year',
       'Fire_Polygon_Tier', 'Fire_Attribute_Tiers', 'GIS_Acres',
       'GIS_Hectares', 'Source_Datasets', 'Listed_Fire_Types',
       'Listed_Fire_Names', 'Listed_Fire_Codes', 'Listed_Fire_IDs',
       'Listed_Fire_IRWIN_IDs', 'Listed_Fire_Dates', 'Listed_Fire_Causes',
       'Listed_Fire_Cause_Class', 'Listed_Rx_Reported_Acres',
       'Listed_Map_Digitize_Methods', 'Listed_Notes', 'Processing_Notes',
       'Wildfire_Notice', 'Prescribed_Burn_Notice', 'Wildfire_and_Rx_Flag',
       'Overlap_Within_1_or_2_Flag', 'Circleness_Scale', 'Circle_Flag',
       'Exclude_From_Summary_Rasters', 'Shape_Length', 'Shape_Area',
       'distance'],
      dtype='object')
Number of rows: 101906


In [162]:
# Display the first few rows to verify the structure
print(fire_df.head())

   OBJECTID  USGS_Assigned_ID Assigned_Fire_Type  Fire_Year  \
0     14600             14600           Wildfire       1964   
1     14602             14602           Wildfire       1964   
2     14605             14605           Wildfire       1964   
3     14606             14606           Wildfire       1964   
4     14607             14607           Wildfire       1964   

   Fire_Polygon_Tier Fire_Attribute_Tiers     GIS_Acres  GIS_Hectares  \
0                  1         1 (1), 3 (3)  65338.877636  26441.705659   
1                  1         1 (2), 3 (3)  19218.105903   7777.291530   
2                  1         1 (2), 3 (4)  14101.443662   5706.651785   
3                  1         1 (2), 3 (3)  11365.328284   4599.385176   
4                  1         1 (1), 3 (1)  11131.171732   4504.625381   

                                     Source_Datasets  \
0  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
1  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
2  Comb_Nation

## Smoke Impact Calculations per Fire

In [163]:
# Calculate smoke impact with the given formula
def get_smoke_impact(gis_acres, dist_from_memphis, max_acres, max_distance=650):
    if gis_acres is None:
        return -1, "Error: Fire size (GIS_Acres) is missing."
    if dist_from_memphis is None:
        return -1, "Error: Distance from city is missing."
    if dist_from_memphis > max_distance:
        return -1, f"Error: Distance from city exceeds max distance of {max_distance} miles."

    # Calculate the smoke impact score, giving more weight to distance
    smoke_impact = 500 * (gis_acres / max_acres) * (1 - (dist_from_memphis / max_distance) ** 2)
    smoke_impact = max(0, min(smoke_impact, 500))

    return smoke_impact, "Successful calculation"

In [164]:
# Filter fires within 650 miles of Memphis
filtered_fire_df = fire_df[fire_df['distance'] <= 650]

In [165]:
print(filtered_fire_df.columns)
print(f"Number of rows: {filtered_fire_df.shape[0]}")

Index(['OBJECTID', 'USGS_Assigned_ID', 'Assigned_Fire_Type', 'Fire_Year',
       'Fire_Polygon_Tier', 'Fire_Attribute_Tiers', 'GIS_Acres',
       'GIS_Hectares', 'Source_Datasets', 'Listed_Fire_Types',
       'Listed_Fire_Names', 'Listed_Fire_Codes', 'Listed_Fire_IDs',
       'Listed_Fire_IRWIN_IDs', 'Listed_Fire_Dates', 'Listed_Fire_Causes',
       'Listed_Fire_Cause_Class', 'Listed_Rx_Reported_Acres',
       'Listed_Map_Digitize_Methods', 'Listed_Notes', 'Processing_Notes',
       'Wildfire_Notice', 'Prescribed_Burn_Notice', 'Wildfire_and_Rx_Flag',
       'Overlap_Within_1_or_2_Flag', 'Circleness_Scale', 'Circle_Flag',
       'Exclude_From_Summary_Rasters', 'Shape_Length', 'Shape_Area',
       'distance'],
      dtype='object')
Number of rows: 28270


In [166]:
max_acres_within_radius = filtered_fire_df['GIS_Acres'].max()
print(f"Maximum acres within 650 mi radius of Memphis: {max_acres_within_radius}")

Maximum acres within 650 mi radius of Memphis: 1566273.1853343395


In [167]:
# Select relevant columns for the final DataFrame
si_per_fire_df = filtered_fire_df[
    ['OBJECTID', 'USGS_Assigned_ID', 'Source_Datasets', 'Fire_Year', 'distance', 'GIS_Acres']
].rename(columns={
    'OBJECTID': 'object_id',
    'USGS_Assigned_ID': 'usgs_id',
    'Source_Datasets': 'source_dataset',
    'Fire_Year': 'fire_year',
    'distance': 'distance_from_city',
    'GIS_Acres': 'gis_acres'
})

In [168]:
print(si_per_fire_df.columns)
print(f"Number of rows: {si_per_fire_df.shape[0]}")

Index(['object_id', 'usgs_id', 'source_dataset', 'fire_year',
       'distance_from_city', 'gis_acres'],
      dtype='object')
Number of rows: 28270


In [169]:
si_per_fire_df['smoke_impact'], si_per_fire_df['smoke_impact_message'] = zip(
    *si_per_fire_df.apply(
        lambda row: get_smoke_impact(row['gis_acres'], row['distance_from_city'], max_acres_within_radius),
        axis=1
    )
)

In [170]:
print(si_per_fire_df.columns)
print(f"Number of rows: {si_per_fire_df.shape[0]}")

Index(['object_id', 'usgs_id', 'source_dataset', 'fire_year',
       'distance_from_city', 'gis_acres', 'smoke_impact',
       'smoke_impact_message'],
      dtype='object')
Number of rows: 28270


In [171]:
# Display the first few rows to verify
print(si_per_fire_df.head(5))

     object_id  usgs_id                                     source_dataset  \
157      14783    14783  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
340      15001    15001  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
352      15017    15017  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
370      15040    15040  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
683      15383    15383  Comb_National_NIFC_Interagency_Fire_Perimeter_...   

     fire_year  distance_from_city  gis_acres  smoke_impact  \
157       1964          384.142162  28.468121      0.005914   
340       1965          381.724329  29.902511      0.006254   
352       1965          357.252742  11.047444      0.002461   
370       1965          374.861207   2.831538      0.000603   
683       1966          256.234293  12.781395      0.003446   

       smoke_impact_message  
157  Successful calculation  
340  Successful calculation  
352  Successful calculation  
370  Successful calculation  
68

In [172]:
# Write the DataFrame to a CSV file
si_per_fire_df.to_csv(SI_PER_FIRE_OUTPUT_PATH, index=False)
print(f"DataFrame written to {SI_PER_FIRE_OUTPUT_PATH}")

DataFrame written to intermediate/stage1-output/smoke_impacts_per_FIRE.csv


## Amortized Smoke Impact Per Year

In [173]:
# Weighted Distribution Based on Fire Size
min_fire_duration = 10  # Smallest fires burn for 10 days
max_fire_duration = 150  # Largest fires burn for 150 days

def estimate_fire_duration(gis_acres, max_acres):
    """
    Estimate the duration of the fire based on its size.
    Larger fires will last longer.
    """
    fire_duration = min_fire_duration + ((gis_acres / max_acres) * (max_fire_duration - min_fire_duration))
    return max(min_fire_duration, min(fire_duration, max_fire_duration))  # Ensure within bounds

def calculate_weighted_smoke_impact(si_per_fire_df, max_acres_within_radius):
    """
    Calculate total weighted smoke impact and total acres burned, grouped by fire year.
    Returns a DataFrame with columns for year, total weighted smoke impact, and total acres burned.
    """
    # Filter out rows with missing or zero values in 'gis_acres' or 'smoke_impact'
    filtered_df = si_per_fire_df[(si_per_fire_df['gis_acres'] > 0) & (si_per_fire_df['smoke_impact'] > 0)]

    # Calculate fire duration using max_acres_within_radius and amortized smoke impact for each fire
    filtered_df['fire_duration'] = filtered_df['gis_acres'].apply(
        lambda acres: estimate_fire_duration(acres, max_acres_within_radius)
    )
    filtered_df['amortized_smoke_impact'] = filtered_df['smoke_impact'] / filtered_df['fire_duration']

    # Group by fire year and calculate total weighted smoke impact and total acres burned
    grouped_df = filtered_df.groupby('fire_year').agg(
        total_weighted_smoke_impact=('amortized_smoke_impact', 'sum'),
        total_acres_burned=('gis_acres', 'sum')
    ).reset_index()

    # Ensure all years from 1961 to 2021 are included, filling missing years with 0
    all_years = pd.DataFrame({'fire_year': range(1961, 2022)})
    grouped_df = all_years.merge(grouped_df, on='fire_year', how='left').fillna(0)

    return grouped_df

# def calculate_weighted_smoke_impact(si_per_fire_df):
#     """
#     Calculate total weighted smoke impact and total acres burned, grouped by fire year.
#     Returns a DataFrame with columns for year, total weighted smoke impact, and total acres burned.
#     """
#     # Filter out rows with missing or zero values in 'gis_acres' or 'smoke_impact'
#     filtered_df = si_per_fire_df[(si_per_fire_df['gis_acres'] > 0) & (si_per_fire_df['smoke_impact'] > 0)]

#     # Calculate fire duration and amortized smoke impact for each fire
#     filtered_df['fire_duration'] = filtered_df['gis_acres'].apply(estimate_fire_duration)
#     filtered_df['amortized_smoke_impact'] = filtered_df['smoke_impact'] / filtered_df['fire_duration']

#     # Group by fire year and calculate total weighted smoke impact and total acres burned
#     grouped_df = filtered_df.groupby('fire_year').agg(
#         total_weighted_smoke_impact=('amortized_smoke_impact', 'sum'),
#         total_acres_burned=('gis_acres', 'sum')
#     ).reset_index()

#     # Ensure all years from 1961 to 2021 are included, filling missing years with 0
#     all_years = pd.DataFrame({'fire_year': range(1961, 2022)})
#     grouped_df = all_years.merge(grouped_df, on='fire_year', how='left').fillna(0)

#     return grouped_df

In [174]:
# Calculate smoke impact and total acres burned for all years
si_per_year_df = calculate_weighted_smoke_impact(si_per_fire_df, max_acres_within_radius)

In [175]:
print(f"Number of row in si_per_year_df: {len(si_per_year_df)}\n")

print(si_per_year_df.sort_values(by='total_weighted_smoke_impact', ascending=False).head(5))


Number of row in si_per_year_df: 61

    fire_year  total_weighted_smoke_impact  total_acres_burned
50       2011                    58.811352        4.859570e+06
55       2016                    57.827190        3.800291e+06
53       2014                    49.698368        3.049966e+06
56       2017                    39.752341        3.867709e+06
57       2018                    39.397955        2.453311e+06


In [176]:
# Write the results to a CSV file
si_per_year_df.to_csv(SI_PER_YEAR_OUTPUT_PATH, index=False)

print(f"Smoke impact results have been saved to '{SI_PER_YEAR_OUTPUT_PATH}'")

Smoke impact results have been saved to 'intermediate/stage1-output/smoke_impacts_per_YEAR.csv'
