In [None]:
import requests
import os
from datetime import datetime
from pathlib import Path
import pyarrow
import geopandas as gpd 
import pandas as pd 
import matplotlib.pyplot as plt

root_dir = "~/Desktop/Desktop/epidemiology_PhD/00_repos/"
# root_dir = "/Volumes/squirrel-utopia 1/los_angeles_2025_fire_disasters_exp/"

In [None]:
# Download latest fire boundaries from CalFire on ARCGIS API
url = "https://hub.arcgis.com/api/v3/datasets/025fb2ea05f14890b2b11573341b5b18_0/downloads/data?format=geojson&spatialRefId=4326&where=1%3D1"
output_dir = root_dir + "los_angeles_2025_fire_disasters_exp/data/calfire_boundaries"
data_dir = root_dir + "los_angeles_2025_fire_disasters_exp/data"

In [None]:
# generate a filename with the current date
filename = f"data_{datetime.now().strftime('%Y_%m_%d')}.geojson"
output_path = os.path.join(output_dir, filename)

# check 
print(filename)
print(output_path)

In [None]:
# Download the file
response = requests.get(url)
# Check if the request was successful
response.raise_for_status()  

In [None]:
# Save
# with open(output_path, "wb") as file:
#     file.write(response.content)

# lbw this doesnt work for me! i did it manually

In [None]:
# Read in data - contains wf data going back to 2024 or 2023 - want only 
# dates since January 7th, 2025
fires = gpd.read_file(output_path)
fires = fires[fires['poly_DateCurrent'] > '2025-01-06']

In [None]:
# Plot fires
fires.plot()

In [None]:
# Let's list them: 
# - Palisades Fire
# - Eaton Fire
# - Hurst Fire 
# - Auto Fire
# - Sunset Fire
# - Lidia Fire
# - Archer Fire 
# - Kenneth Fire
# as of Jan 14th, 2024

In [None]:
# need to get the largest burned area for each fire
# want to union all geometries together 
print(fires)

In [None]:
# Perform unary union to merge intersecting polygons
merged_fires = fires.geometry.unary_union

# If you want to convert the result back to a GeoDataFrame
merged_fires = gpd.GeoDataFrame(geometry=[merged_fires], crs=fires.crs)
print(merged_fires)

In [None]:
# plot merged fires
merged_fires.plot()

In [None]:
# Read in ZCTA data 
zcta_path = os.path.join(data_dir, 'zctas_2020.parquet')
zctas = gpd.read_parquet(zcta_path)
# Add column which is area of each zcta
zctas['zcta_area'] = zctas.area
zcta_path

In [None]:
# Intersect zctas with fires 
zctas_fires = gpd.overlay(zctas, merged_fires, how='intersection')
# Plot
zctas_fires.plot()

In [None]:
print(zctas_fires)

In [None]:
# Add column which is area of intersection of zcta x fire
zctas_fires['zcta_fire_intersection_area'] = zctas_fires.area
print(zctas_fires)

In [None]:
# From zctas_fires, select zcta name, zcta area, and zcta fire intersection 
# area, and fire name, although fire name is missing in 90% of cases
zctas_fires = zctas_fires[['ZCTA5CE20', 'zcta_area', 'zcta_fire_intersection_area', 'geometry']]
# rename ZCTA5CE20 to zcta
zctas_fires.rename(columns={'ZCTA5CE20': 'zcta'}, inplace=True)

In [None]:
# Calculate area affected by each fire within each zcta
zctas_fires = zctas_fires.drop(columns='geometry')
# Group by zcta and sum the variable zcta_fire_intersection_area
zctas_fires_sum = zctas_fires.groupby('zcta').agg({
    'zcta_fire_intersection_area': 'sum',
    'zcta_area': 'first'  # or 'mean', 'max', etc., depending on your needs
}).reset_index()
# Add col which is zcta_fire_intersection_area / zcta_area
zctas_fires_sum['fire_area_ratio'] = zctas_fires_sum['zcta_fire_intersection_area'] / zctas_fires_sum['zcta_area'] * 100


In [None]:
print(zctas_fires_sum)

In [None]:
# OK now to merge w Kaiser data
# read in kaiser zips
kaiser_zips_path = os.path.join(data_dir, 'kpsc_zcta_counts.csv')
kaiser_zips = pd.read_csv(kaiser_zips_path)

In [None]:
# view kaiser zips
kaiser_zips.head()

In [None]:
# Change zcta to character type in kaiser
kaiser_zips['zcta'] = kaiser_zips['zcta'].astype(str)
# Change in zcta_fires_sum to character type
zctas_fires_sum['zcta'] = zctas_fires_sum['zcta'].astype(str)

# Left join kaiser zips to zctas_fires_sum
zctas_fires_sum = zctas_fires_sum.merge(kaiser_zips, how='left')

In [None]:
# Change the fire area ratio to max out at 100 - zctas where it's higher 
# have that bc of the way the ZCTA file is set up
zctas_fires_sum['fire_area_ratio'] = zctas_fires_sum['fire_area_ratio'].clip(upper=100)

# Make a new col fire_area_ratio times kaiser count
zctas_fires_sum['ppl_affected'] = zctas_fires_sum['fire_area_ratio'] * zctas_fires_sum['kpsc_pop_age_60p'] / 100

In [None]:
# Remove column 'classification'
zctas_fires_sum = zctas_fires_sum.drop(columns='classification')

# Sort descending by ppl affected
zctas_fires_sum = zctas_fires_sum.sort_values(by='ppl_affected', ascending=False)
# Round ppl affected up to the nearest integer
zctas_fires_sum['ppl_affected'] = zctas_fires_sum['ppl_affected'].apply(lambda x: round(x))

In [None]:
# Write as csv
zctas_fires_sum_path = os.path.join(data_dir, 'num_kaiser_pop_age_60_affected_la_wf.csv')
zctas_fires_sum.to_csv(zctas_fires_sum_path, index=False)
