In [1]:
import json
import geojson
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
with open('intermediate_file_paths.json') as output_path_file:
    output_paths = json.load(output_path_file)


FIRE_INPUT_PATH = output_paths.get('stage0_fire_json')
#FIRE_INPUT_PATH = output_paths.get('POC_stage0_fire_json')

In [3]:
SMOKE_IMPACT_OUTPUT_PATH = output_paths.get('stage1_smoke_impact_json')
#SMOKE_IMPACT_OUTPUT_PATH = output_paths.get('POC_stage1_smoke_impact_json')
SMOKE_PER_YEAR_OUTPUT_PATH = output_paths.get("smoke_impacts_per_year_csv")


In [4]:
with open(FIRE_INPUT_PATH) as fire_file:
    fire_data = json.load(fire_file)

In [5]:
# Function to check the number of records and ensure all have the same keys
def check_records(data, record_type="Record"):
    # Get the set of keys from the first record
    first_record_keys = set(data[0].get('attributes', {}).keys())

    # Initialize a list to collect error records and a counter for mismatches
    error_records = []
    mismatched_count = 0

    # Iterate through all records to compare the keys
    for index, feature in enumerate(data):
        current_keys = set(feature.get('attributes', {}).keys())

        # Check if the keys match with the first record's keys
        if current_keys != first_record_keys:
            mismatched_count += 1
            print(f"{record_type} {index} has a different set of keys.")
            print(f"Expected keys: {first_record_keys}")
            print(f"Found keys: {current_keys}")
            
            # Add the mismatched record to the error list
            error_records.append(feature)

    # Final report
    total_records = len(data)
    print(f"\nTotal number of {record_type.lower()}s: {total_records}")
    
    if mismatched_count == 0:
        print(f"All {record_type.lower()}s contain the same set of keys.")
    else:
        print(f"{mismatched_count} {record_type.lower()}s have mismatched keys.")

    # Return the error records if there are any mismatches
    return error_records, first_record_keys


In [6]:
def get_smoke_impact(gis_acres, dist_from_memphis, max_acres=3614020.62856015, max_distance=650):
    # Check for any potential issues with the inputs
    if gis_acres is None:
        return -1, "Error: Fire size (GIS_Acres) is missing."
    if dist_from_memphis is None:
        return -1, "Error: Distance from city is missing."
    if dist_from_memphis > max_distance:
        return -1, f"Error: Distance from city exceeds max distance of {max_distance} miles."
    
    # Calculate the smoke impact score, giving more weight to the distance
    smoke_impact = 500 * (gis_acres / max_acres) * (1 - (dist_from_memphis / max_distance) ** 2)
    
    # Ensure the smoke impact is within the range 0 to 100
    smoke_impact = max(0, min(smoke_impact, 100))

    # Return the score along with a success message
    return smoke_impact, "Successful calculation"


In [7]:
# Check the fire_data and features_with_smoke_impact
print("\nChecking fire_data...")
fire_data_errors, fire_data_keys = check_records(fire_data['features'], record_type="Fire data record")

# Optional: You can save the error records for later analysis
if fire_data_errors:
    with open('fire_data_error_records.json', 'w') as fire_error_file:
        json.dump({"features": fire_data_errors}, fire_error_file, indent=4)
    print("Fire data error records have been written to 'fire_data_error_records.json'.")


Checking fire_data...

Total number of fire data records: 28270
All fire data records contain the same set of keys.


In [8]:
features_with_smoke_impact = []

for feature in fire_data['features']:
    # Extract distance and fire size from attributes (properties in GeoJSON)
    dist_from_memphis = feature.get('attributes', {}).get('distance')
    gis_acres = feature.get('attributes', {}).get('GIS_Acres')

    # Calculate smoke impact using the new formula, now returning both a score and a message
    smoke_impact, message = get_smoke_impact(gis_acres, dist_from_memphis)

    # Ensure that 'properties' exist and add smoke impact score and message to the 'properties'
    if 'properties' not in feature:
        feature['properties'] = feature.get('attributes', {})  # Use 'attributes' as the base for properties
    
    feature['properties']['smoke_impact_score'] = smoke_impact
    feature['properties']['calculation_message'] = message
    
    # Append the modified feature to the new list
    features_with_smoke_impact.append(feature)

In [9]:
print("\nChecking features_with_smoke_impact...")
smoke_impact_errors, smoke_impact_keys = check_records(features_with_smoke_impact, record_type="Smoke impact record")

if smoke_impact_errors:
    with open('smoke_impact_error_records.json', 'w') as smoke_error_file:
        json.dump({"features": smoke_impact_errors}, smoke_error_file, indent=4)
    print("Smoke impact error records have been written to 'smoke_impact_error_records.json'.")


Checking features_with_smoke_impact...

Total number of smoke impact records: 28270
All smoke impact records contain the same set of keys.


In [10]:
geojson_data = geojson.FeatureCollection(features_with_smoke_impact)

# Output features_with_smoke_impact to a GeoJSON file
output_path = SMOKE_IMPACT_OUTPUT_PATH
with open(output_path, 'w') as output_file:
    geojson.dump(geojson_data, output_file)

print(f"GeoJSON file written to '{output_path}'.")

# Now open the GeoJSON file and verify the fields in the first record
with open(output_path, 'r') as input_file:
    geojson_data_loaded = geojson.load(input_file)

# Verify the fields in the first record's properties
first_record = geojson_data_loaded['features'][0]
properties = first_record.get('properties', {})

# Check if the necessary fields are present
if 'smoke_impact_score' in properties and 'calculation_message' in properties:
    print("Verification passed: 'smoke_impact_score' and 'calculation_message' are present in the first record.")
else:
    missing_fields = []
    if 'smoke_impact_score' not in properties:
        missing_fields.append('smoke_impact_score')
    if 'calculation_message' not in properties:
        missing_fields.append('calculation_message')
    
    print(f"Verification failed: The following fields are missing in the first record: {missing_fields}")


GeoJSON file written to 'intermediate/stage1-output/stage1_fire_with_smoke_impact.json'.
Verification passed: 'smoke_impact_score' and 'calculation_message' are present in the first record.


In [11]:
print(features_with_smoke_impact[0])

{'attributes': {'OBJECTID': 14783, 'USGS_Assigned_ID': 14783, 'Assigned_Fire_Type': 'Wildfire', 'Fire_Year': 1964, 'Fire_Polygon_Tier': 1, 'Fire_Attribute_Tiers': '1 (1), 3 (1)', 'GIS_Acres': 28.468121417707913, 'GIS_Hectares': 11.520639999291427, 'Source_Datasets': 'Comb_National_NIFC_Interagency_Fire_Perimeter_History (1), Comb_National_NPS_Wildland_Fire_Perimeters (1)', 'Listed_Fire_Types': 'Wildfire (2)', 'Listed_Fire_Names': 'LODGE BALD (2)', 'Listed_Fire_Codes': 'No code provided (2)', 'Listed_Fire_IDs': '1964-TNGSP-4011 (2)', 'Listed_Fire_IRWIN_IDs': '', 'Listed_Fire_Dates': 'Listed Wildfire Discovery Date(s): 1964-04-10 (1) | Listed Wildfire Controlled Date(s): 1964-04-10 (1) | Listed Wildfire Out Date(s): 1964-04-13 (1) | Listed Other Fire Date(s): 2017-03-13 - NIFC DATE_CUR field (1) | Listed Upload Date(s): 2012-07-24 (1)', 'Listed_Fire_Causes': 'Natural (1)', 'Listed_Fire_Cause_Class': 'Undetermined (1), Natural (1)', 'Listed_Rx_Reported_Acres': None, 'Listed_Map_Digitize_M

## Smoke Impact per Year

In [14]:
# Weighted Distribution Based on Fire Size
min_fire_duration = 10  # Smallest fires burn for 10 days
max_fire_duration = 150  # Largest fires burn for 150 days

def estimate_fire_duration(gis_acres, max_acres=3614020.62856015):
    """
    Estimate the duration of the fire based on its size.
    Larger fires will last longer.
    """
    fire_duration = min_fire_duration + ((gis_acres / max_acres) * (max_fire_duration - min_fire_duration))
    return max(min_fire_duration, min(fire_duration, max_fire_duration))  # Ensure within bounds

def calculate_weighted_smoke_impact(features_with_smoke_impact, year):
    """
    Calculate total weighted smoke impact and total acres burned for a given year.
    Returns the total smoke impact and total acres burned for the year.
    """
    filtered_features = []

    for feature in features_with_smoke_impact:
        fire_year = feature.get('attributes', {}).get('Fire_Year')  # Use .get() to safely access keys

        # Check if the fire year matches the provided year
        if fire_year == year:
            filtered_features.append(feature)

    # Initialize variables for total smoke impact and total acres burned
    total_weighted_smoke_impact = 0
    total_acres_burned = 0

    # Loop through each fire feature and calculate the weighted smoke impact and total acres
    for feature in filtered_features:
        # Extract relevant attributes
        gis_acres = feature['properties'].get('GIS_Acres', 0)
        smoke_impact = feature['properties'].get('smoke_impact_score', 0)

        # Check for missing or zero values
        if gis_acres == 0 or smoke_impact == 0:
            continue

        # Accumulate total acres burned for the year
        total_acres_burned += gis_acres

        # Estimate the fire duration based on its size
        fire_duration = estimate_fire_duration(gis_acres)

        # Amortize the smoke impact over the estimated fire duration
        amortized_smoke_impact = smoke_impact / fire_duration

        # Accumulate the total smoke impact for the year
        total_weighted_smoke_impact += amortized_smoke_impact

    # Return both the total weighted smoke impact and the total acres burned for the year
    return total_weighted_smoke_impact, total_acres_burned

# %%
# Calculate smoke impact and total acres burned for all years from 1961 to 2021
def calculate_smoke_impact_all_years():
    smoke_impact_per_year = []
    for year in range(1961, 2021 + 1):
        weighted_smoke_impact, total_acres_burned = calculate_weighted_smoke_impact(features_with_smoke_impact, year)
        smoke_impact_per_year.append({
            'year': year, 
            'total_weighted_smoke_impact': weighted_smoke_impact, 
            'total_acres_burned': total_acres_burned
        })
    return smoke_impact_per_year

# Call the function to calculate smoke impact for all years
smoke_impact_results = calculate_smoke_impact_all_years()

# %%
# Convert the results to a DataFrame and save to CSV
df_smoke_impact = pd.DataFrame(smoke_impact_results)
df_smoke_impact.to_csv(SMOKE_PER_YEAR_OUTPUT_PATH, index=False)

print(f"Smoke impact results have been saved to '{SMOKE_PER_YEAR_OUTPUT_PATH}'")


Smoke impact results have been saved to 'intermediate/stage1-output/smoke_impacts_per_year.csv'
