In [9]:
import json
import geojson
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
with open('intermediate_file_paths.json') as output_path_file:
    output_paths = json.load(output_path_file)


FIRE_INPUT_PATH = output_paths.get('stage0_fire_json')
#FIRE_INPUT_PATH = output_paths.get('POC_stage0_fire_json')

In [3]:
SI_PER_FIRE_OUTPUT_PATH = output_paths.get('stage1_si_per_fire_csv')
SI_PER_YEAR_OUTPUT_PATH = output_paths.get("stage1_si_per_year_csv")

In [4]:
with open(FIRE_INPUT_PATH) as fire_file:
    fire_data = json.load(fire_file)

In [5]:
# Function to check the number of records and ensure all have the same keys
def check_records(data, record_type="Record"):
    # Get the set of keys from the first record
    first_record_keys = set(data[0].get('attributes', {}).keys())

    # Initialize a list to collect error records and a counter for mismatches
    error_records = []
    mismatched_count = 0

    # Iterate through all records to compare the keys
    for index, feature in enumerate(data):
        current_keys = set(feature.get('attributes', {}).keys())

        # Check if the keys match with the first record's keys
        if current_keys != first_record_keys:
            mismatched_count += 1
            print(f"{record_type} {index} has a different set of keys.")
            print(f"Expected keys: {first_record_keys}")
            print(f"Found keys: {current_keys}")
            
            # Add the mismatched record to the error list
            error_records.append(feature)

    # Final report
    total_records = len(data)
    print(f"\nTotal number of {record_type.lower()}s: {total_records}")
    
    if mismatched_count == 0:
        print(f"All {record_type.lower()}s contain the same set of keys.")
    else:
        print(f"{mismatched_count} {record_type.lower()}s have mismatched keys.")

    # Return the error records if there are any mismatches
    return error_records, first_record_keys


In [6]:
def get_smoke_impact(gis_acres, dist_from_memphis, max_acres=3614020.62856015, max_distance=650):
    # Check for any potential issues with the inputs
    if gis_acres is None:
        return -1, "Error: Fire size (GIS_Acres) is missing."
    if dist_from_memphis is None:
        return -1, "Error: Distance from city is missing."
    if dist_from_memphis > max_distance:
        return -1, f"Error: Distance from city exceeds max distance of {max_distance} miles."
    
    # Calculate the smoke impact score, giving more weight to the distance
    smoke_impact = 500 * (gis_acres / max_acres) * (1 - (dist_from_memphis / max_distance) ** 2)
    
    # Ensure the smoke impact is within the range 0 to 100
    smoke_impact = max(0, min(smoke_impact, 100))

    # Return the score along with a success message
    return smoke_impact, "Successful calculation"


In [7]:
# Check the fire_data and features_with_smoke_impact
print("\nChecking fire_data...")
fire_data_errors, fire_data_keys = check_records(fire_data['features'], record_type="Fire data record")

# Optional: You can save the error records for later analysis
if fire_data_errors:
    with open('fire_data_error_records.json', 'w') as fire_error_file:
        json.dump({"features": fire_data_errors}, fire_error_file, indent=4)
    print("Fire data error records have been written to 'fire_data_error_records.json'.")


Checking fire_data...

Total number of fire data records: 28270
All fire data records contain the same set of keys.


## Smoke Impact Calculations per Fire

In [8]:
si_per_fire = []

for feature in fire_data['features']:

    dist_from_memphis = feature.get('attributes', {}).get('distance')

    #print(feature)
    if dist_from_memphis <= 650:
   
        # Extract distance and fire size from attributes (properties in GeoJSON)
        object_id = feature.get('attributes', {}).get('OBJECTID')
        usgs_id = feature.get('attributes', {}).get('USGS_Assigned_ID')
        source_dataset = feature.get('attributes', {}).get('Source_Datasets')
        fire_year = feature.get('attributes', {}).get('Fire_Year')
        
        gis_acres = feature.get('attributes', {}).get('GIS_Acres')


        # Calculate smoke impact using the new formula, now returning both a score and a message
        smoke_impact, message = get_smoke_impact(gis_acres, dist_from_memphis)

        # Ensure that 'properties' exist and add smoke impact score and message to the 'properties'
        # if 'properties' not in feature:
        #     feature['properties'] = feature.get('attributes', {})  # Use 'attributes' as the base for properties
        
        # feature['properties']['smoke_impact_score'] = smoke_impact
        # feature['properties']['calculation_message'] = message
        
        # Append the modified feature to the new list
        new_si_per_fire_record = {
            "object_id": object_id,
            "usgs_id": usgs_id,
            "source_dataset": source_dataset,
            "fire_year": fire_year,
            "distance_from_city": dist_from_memphis,
            "gis_acres": gis_acres,
            "smoke_impact": smoke_impact,
            "smoke_impact_calc_message": message
        }

        print(new_si_per_fire_record)

        si_per_fire.append(new_si_per_fire_record)

{'object_id': 14783, 'usgs_id': 14783, 'source_dataset': 'Comb_National_NIFC_Interagency_Fire_Perimeter_History (1), Comb_National_NPS_Wildland_Fire_Perimeters (1)', 'fire_year': 1964, 'distance_from_city': 384.14216230819386, 'gis_acres': 28.468121417707913, 'smoke_impact': 0.0025629562118909977, 'smoke_impact_calc_message': 'Successful calculation'}
{'object_id': 15001, 'usgs_id': 15001, 'source_dataset': 'Comb_National_NIFC_Interagency_Fire_Perimeter_History (1), Comb_National_NPS_Wildland_Fire_Perimeters (1)', 'fire_year': 1965, 'distance_from_city': 381.7243293694474, 'gis_acres': 29.90251148046571, 'smoke_impact': 0.00271022464874557, 'smoke_impact_calc_message': 'Successful calculation'}
{'object_id': 15017, 'usgs_id': 15017, 'source_dataset': 'Comb_National_NIFC_Interagency_Fire_Perimeter_History (1), Comb_National_NPS_Wildland_Fire_Perimeters (1)', 'fire_year': 1965, 'distance_from_city': 357.2527421670821, 'gis_acres': 11.047444041327319, 'smoke_impact': 0.0010667085311137404

In [9]:
# Convert si_per_fire list to a DataFrame
si_per_fire_df = pd.DataFrame(si_per_fire)

print(si_per_fire_df.head(5))

# Write the DataFrame to a CSV file
si_per_fire_df.to_csv(SI_PER_FIRE_OUTPUT_PATH, index=False)
print(f"DataFrame written to {SI_PER_FIRE_OUTPUT_PATH}")

   object_id  usgs_id                                     source_dataset  \
0      14783    14783  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
1      15001    15001  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
2      15017    15017  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
3      15040    15040  Comb_National_NIFC_Interagency_Fire_Perimeter_...   
4      15383    15383  Comb_National_NIFC_Interagency_Fire_Perimeter_...   

   fire_year  distance_from_city  gis_acres  smoke_impact  \
0       1964          384.142162  28.468121      0.002563   
1       1965          381.724329  29.902511      0.002710   
2       1965          357.252742  11.047444      0.001067   
3       1965          374.861207   2.831538      0.000261   
4       1966          256.234293  12.781395      0.001494   

  smoke_impact_calc_message  
0    Successful calculation  
1    Successful calculation  
2    Successful calculation  
3    Successful calculation  
4    Successful calculatio

## Amortized Smoke Impact Per Year

In [10]:
# Weighted Distribution Based on Fire Size
min_fire_duration = 10  # Smallest fires burn for 10 days
max_fire_duration = 150  # Largest fires burn for 150 days

def estimate_fire_duration(gis_acres, max_acres=3614020.62856015):
    """
    Estimate the duration of the fire based on its size.
    Larger fires will last longer.
    """
    fire_duration = min_fire_duration + ((gis_acres / max_acres) * (max_fire_duration - min_fire_duration))
    return max(min_fire_duration, min(fire_duration, max_fire_duration))  # Ensure within bounds

def calculate_weighted_smoke_impact(si_per_fire_df):
    """
    Calculate total weighted smoke impact and total acres burned, grouped by fire year.
    Returns a DataFrame with columns for year, total weighted smoke impact, and total acres burned.
    """
    # Filter out rows with missing or zero values in 'gis_acres' or 'smoke_impact'
    filtered_df = si_per_fire_df[(si_per_fire_df['gis_acres'] > 0) & (si_per_fire_df['smoke_impact'] > 0)]

    # Calculate fire duration and amortized smoke impact for each fire
    filtered_df['fire_duration'] = filtered_df['gis_acres'].apply(estimate_fire_duration)
    filtered_df['amortized_smoke_impact'] = filtered_df['smoke_impact'] / filtered_df['fire_duration']

    # Group by fire year and calculate total weighted smoke impact and total acres burned
    grouped_df = filtered_df.groupby('fire_year').agg(
        total_weighted_smoke_impact=('amortized_smoke_impact', 'sum'),
        total_acres_burned=('gis_acres', 'sum')
    ).reset_index()

    # Ensure all years from 1961 to 2021 are included, filling missing years with 0
    all_years = pd.DataFrame({'fire_year': range(1961, 2022)})
    grouped_df = all_years.merge(grouped_df, on='fire_year', how='left').fillna(0)

    return grouped_df

In [11]:
# Calculate smoke impact and total acres burned for all years
si_per_year_df = calculate_weighted_smoke_impact(si_per_fire_df)

In [12]:
print(f"Number of row in si_per_year_df: {len(si_per_year_df)}\n")

print(si_per_year_df.sort_values(by='total_weighted_smoke_impact', ascending=False).head(5))


Number of row in si_per_year_df: 61

    fire_year  total_weighted_smoke_impact  total_acres_burned
50       2011                    28.343604        4.859570e+06
55       2016                    27.516200        3.800291e+06
53       2014                    23.576570        3.049966e+06
56       2017                    19.908641        3.867709e+06
54       2015                    18.046823        2.303847e+06


In [14]:
# Write the results to a CSV file
si_per_year_df.to_csv(SI_PER_YEAR_OUTPUT_PATH, index=False)

print(f"Smoke impact results have been saved to '{SI_PER_YEAR_OUTPUT_PATH}'")

Smoke impact results have been saved to 'intermediate/stage1-output/smoke_impacts_per_YEAR.csv'
