In [37]:
import json
import geojson
import pandas as pd

In [38]:
with open('intermediate_file_paths.json') as output_path_file:
    output_paths = json.load(output_path_file)


FIRE_INPUT_PATH = output_paths.get('stage0_fire_json')
#FIRE_INPUT_PATH = output_paths.get('POC_stage0_fire_json')

In [39]:
SMOKE_IMPACT_OUTPUT_PATH = output_paths.get('stage1_smoke_impact_json')
#SMOKE_IMPACT_OUTPUT_PATH = output_paths.get('POC_stage1_smoke_impact_json')


In [40]:
with open(FIRE_INPUT_PATH) as fire_file:
    fire_data = json.load(fire_file)

In [41]:
# Function to check the number of records and ensure all have the same keys
def check_records(data, record_type="Record"):
    # Get the set of keys from the first record
    first_record_keys = set(data[0].get('attributes', {}).keys())

    # Initialize a list to collect error records and a counter for mismatches
    error_records = []
    mismatched_count = 0

    # Iterate through all records to compare the keys
    for index, feature in enumerate(data):
        current_keys = set(feature.get('attributes', {}).keys())

        # Check if the keys match with the first record's keys
        if current_keys != first_record_keys:
            mismatched_count += 1
            print(f"{record_type} {index} has a different set of keys.")
            print(f"Expected keys: {first_record_keys}")
            print(f"Found keys: {current_keys}")
            
            # Add the mismatched record to the error list
            error_records.append(feature)

    # Final report
    total_records = len(data)
    print(f"\nTotal number of {record_type.lower()}s: {total_records}")
    
    if mismatched_count == 0:
        print(f"All {record_type.lower()}s contain the same set of keys.")
    else:
        print(f"{mismatched_count} {record_type.lower()}s have mismatched keys.")

    # Return the error records if there are any mismatches
    return error_records, first_record_keys


In [42]:
def filter_features(fire_data, filter_variable_name, filter_value):
    # Ensure the data contains the features we need
    if 'features' not in fire_data:
        return []
    
    # Filter the features by checking the 'Fire_Year' attribute
    filtered_features = [
        feature for feature in fire_data['features'] 
        if feature.get('attributes', {}).get(filter_variable_name) == filter_value
    ]
    
    return filtered_features

In [43]:
def get_smoke_impact(gis_acres, dist_from_memphis, max_acres=3614020.62856015, max_distance=650):
    # Check for any potential issues with the inputs
    if gis_acres is None:
        return -1, "Error: Fire size (GIS_Acres) is missing."
    if dist_from_memphis is None:
        return -1, "Error: Distance from city is missing."
    if dist_from_memphis > max_distance:
        return -1, f"Error: Distance from city exceeds max distance of {max_distance} miles."
    
    # Calculate the smoke impact score, giving more weight to the distance
    smoke_impact = 100 * (gis_acres / max_acres) * (1 - (dist_from_memphis / max_distance) ** 2)
    
    # Ensure the smoke impact is within the range 0 to 100
    smoke_impact = max(0, min(smoke_impact, 100))

    # Return the score along with a success message
    return smoke_impact, "Successful calculation"


In [44]:
# Check the fire_data and features_with_smoke_impact
print("\nChecking fire_data...")
fire_data_errors, fire_data_keys = check_records(fire_data['features'], record_type="Fire data record")

# Optional: You can save the error records for later analysis
if fire_data_errors:
    with open('fire_data_error_records.json', 'w') as fire_error_file:
        json.dump({"features": fire_data_errors}, fire_error_file, indent=4)
    print("Fire data error records have been written to 'fire_data_error_records.json'.")


Checking fire_data...

Total number of fire data records: 28270
All fire data records contain the same set of keys.


In [45]:
features_with_smoke_impact = []

for feature in fire_data['features']:
    # Extract distance and fire size from attributes (properties in GeoJSON)
    dist_from_memphis = feature.get('attributes', {}).get('distance')
    gis_acres = feature.get('attributes', {}).get('GIS_Acres')

    # Calculate smoke impact using the new formula, now returning both a score and a message
    smoke_impact, message = get_smoke_impact(gis_acres, dist_from_memphis)

    # Ensure that 'properties' exist and add smoke impact score and message to the 'properties'
    if 'properties' not in feature:
        feature['properties'] = feature.get('attributes', {})  # Use 'attributes' as the base for properties
    
    feature['properties']['smoke_impact_score'] = smoke_impact
    feature['properties']['calculation_message'] = message
    
    # Append the modified feature to the new list
    features_with_smoke_impact.append(feature)

In [46]:
# features_with_smoke_impact = []

# for feature in fire_data['features']:
#     #print(feature)

#     dist_from_memphis = feature.get('attributes').get('distance')
#     gis_acres = feature.get('attributes').get('GIS_Acres')

#     #print(f'distance from memphis: {dist_from_memphis}')
#     #print(f'gis_acres: {gis_acres}')

#     # Calculate smoke impact using the new formula, now returning both a score and a message
#     smoke_impact, message = get_smoke_impact(gis_acres, dist_from_memphis)

#     #print(f'smoke impact score: {smoke_impact}')
#     #print(f'message: {message}')

#     # Add smoke impact score and message to the feature
#     feature['smoke_impact_score'] = smoke_impact
#     feature['calculation_message'] = message
#     features_with_smoke_impact.append(feature)

In [47]:
print("\nChecking features_with_smoke_impact...")
smoke_impact_errors, smoke_impact_keys = check_records(features_with_smoke_impact, record_type="Smoke impact record")

if smoke_impact_errors:
    with open('smoke_impact_error_records.json', 'w') as smoke_error_file:
        json.dump({"features": smoke_impact_errors}, smoke_error_file, indent=4)
    print("Smoke impact error records have been written to 'smoke_impact_error_records.json'.")


Checking features_with_smoke_impact...

Total number of smoke impact records: 28270
All smoke impact records contain the same set of keys.


In [48]:
# # Prepare the FeatureCollection for GeoJSON
# geojson_data = geojson.FeatureCollection(features_with_smoke_impact)

# # Output features_with_smoke_impact to a GeoJSON file
# with open("intermediate/stage1-output/GEOJSON_stage1_fire_with_smoke_impact.json", 'w') as output_file:
#     geojson.dump(geojson_data, output_file)

In [49]:
geojson_data = geojson.FeatureCollection(features_with_smoke_impact)

# Output features_with_smoke_impact to a GeoJSON file
output_path = SMOKE_IMPACT_OUTPUT_PATH
with open(output_path, 'w') as output_file:
    geojson.dump(geojson_data, output_file)

print(f"GeoJSON file written to '{output_path}'.")

# Now open the GeoJSON file and verify the fields in the first record
with open(output_path, 'r') as input_file:
    geojson_data_loaded = geojson.load(input_file)

# Verify the fields in the first record's properties
first_record = geojson_data_loaded['features'][0]
properties = first_record.get('properties', {})

# Check if the necessary fields are present
if 'smoke_impact_score' in properties and 'calculation_message' in properties:
    print("Verification passed: 'smoke_impact_score' and 'calculation_message' are present in the first record.")
else:
    missing_fields = []
    if 'smoke_impact_score' not in properties:
        missing_fields.append('smoke_impact_score')
    if 'calculation_message' not in properties:
        missing_fields.append('calculation_message')
    
    print(f"Verification failed: The following fields are missing in the first record: {missing_fields}")


GeoJSON file written to 'intermediate/stage1-output/stage1_fire_with_smoke_impact.json'.
Verification passed: 'smoke_impact_score' and 'calculation_message' are present in the first record.


In [50]:
# # Output features_with_smoke_impact to a JSON file
# with open(SMOKE_IMPACT_OUTPUT_PATH, 'w') as output_file:
#     json.dump({"features": features_with_smoke_impact}, output_file, indent=4)

# print(f'Features with smoke impact json has been saved here: {SMOKE_IMPACT_OUTPUT_PATH}')
    

In [51]:
# for feature in features_with_smoke_impact:
#     print(feature['smoke_impact_score'], feature['calculation_message'])


In [52]:
# # Example: Filter features where Fire_Year is 2009
# filtered_features = filter_features(fire_data,'Fire_Year' , 2009)

# # Output the filtered features
# for feature in filtered_features:
#     print(json.dumps(feature, indent=4))

# num_filtered_features = len(filtered_features)

# # Output the number of filtered features
# print(f"Number of features with Fire_Year 2009: {num_filtered_features}")