In [None]:
import requests
import os
from datetime import datetime
from pathlib import Path
import pyarrow
import pandas as pd

In [None]:
# URL of the file to download
url = "https://hub.arcgis.com/api/v3/datasets/025fb2ea05f14890b2b11573341b5b18_0/downloads/data?format=geojson&spatialRefId=4326&where=1%3D1"


In [None]:
# Directory to save the downloaded file
output_dir = "/Volumes/squirrel-utopia 1/los_angeles_2025_fire_disasters_exp/los_angeles_2025_fire_disasters_exp/data/calfire_boundaries"

In [None]:
# Generate a filename with the current date
filename = f"data_{datetime.now().strftime('%Y_%m_%d')}.geojson"
output_path = os.path.join(output_dir, filename)
print(filename)
print(output_path)

In [None]:
# Download the file
response = requests.get(url)
response.raise_for_status()  # Check if the request was successful

In [None]:
# view response
print(response.headers)

In [None]:
with open(output_path, "wb") as file:
    file.write(response.content)

print(f"File downloaded and saved as {output_path}")

In [None]:
# read that file and plot it
import geopandas as gpd
import matplotlib.pyplot as plt

gdf = gpd.read_file(output_path)
 

In [None]:
   

# plot
gdf.plot()

In [None]:
print(gdf)


In [None]:
# print column names
print(gdf.columns)
# print head
print(gdf.head())
# get class of poly_DateC
print(gdf['poly_DateCurrent'].dtype)
# view whole dataset
print(gdf)


# filter to dates january 7th or later
#gdf['datetime'] = pd.to_datetime(gdf['datetime'])

In [None]:
# going to treat polydate as the relevant date
# sort shape to only include dates after january 6th
gdf = gdf[gdf['poly_DateCurrent'] > '2025-01-06']


In [None]:
# plot
gdf.plot()

In [None]:
import pyarrow
# read in zctas from data folder
zctas = gpd.read_parquet('/Volumes/squirrel-utopia 1/los_angeles_2025_fire_disasters_exp/los_angeles_2025_fire_disasters_exp/data/zctas_2020.parquet')

In [None]:
# plot
zctas.plot()

In [None]:
# find zcta crs
print(zctas.crs)
# find fire crs
print(gdf.crs)

In [None]:
# convert fires to zcta crs
gdf = gdf.to_crs(zctas.crs)


In [None]:
# add column which is area of each zcta
zctas['zcta_area'] = zctas.area

In [None]:
# intersect zctas with fires 
zctas_fires = gpd.overlay(zctas, gdf, how='intersection')
# plot
zctas_fires.plot()

In [None]:
# add column which is area of intersection
zctas_fires['zcta_fire_intersection_area'] = zctas_fires.area

In [None]:
# view the zctas_fires dataframe
print(zctas_fires)

In [None]:
# from zctas_fires, select zcta name, zcta area, and zcta fire intersection area, and fire name
zctas_fires = zctas_fires[['ZCTA5CE20', 'zcta_area', 'zcta_fire_intersection_area', 'incident_name', 'geometry']]

In [None]:
# drop geometery of zctas_fires
#zctas_fires = zctas_fires.drop(columns='geometry')
#  group by zcta and sum the variable zcta_fire_intersection_area
zctas_fires_sum = zctas_fires.groupby('ZCTA5CE20').agg({
    'zcta_fire_intersection_area': 'sum',
    'zcta_area': 'first'  # or 'mean', 'max', etc., depending on your needs
}).reset_index()



In [None]:
print(zctas_fires_sum)

In [None]:
# add col which is zcta_fire_intersection_area / zcta_area
zctas_fires_sum['fire_area_ratio'] = zctas_fires_sum['zcta_fire_intersection_area'] / zctas_fires_sum['zcta_area'] * 100

In [None]:
print(zctas_fires_sum)

In [None]:
# rename ZCTA5CE20 to zcta
zctas_fires_sum = zctas_fires_sum.rename(columns={'ZCTA5CE20': 'zcta'})

In [None]:
# read in kaiser zips
import pandas as pd
kaiser_zips = pd.read_csv('/Volumes/squirrel-utopia 1/los_angeles_2025_fire_disasters_exp/los_angeles_2025_fire_disasters_exp/data/kpsc_zcta_counts.csv')

In [None]:
# change zcta to character type
kaiser_zips['zcta'] = kaiser_zips['zcta'].astype(str)
# change in zcta_fires_sum to character type
zctas_fires_sum['zcta'] = zctas_fires_sum['zcta'].astype(str)

In [None]:
print(kaiser_zips)

In [None]:
# left join kaiser zips to zctas_fires_sum
zctas_fires_sum = zctas_fires_sum.merge(kaiser_zips, how='left')

In [None]:
print(zctas_fires_sum)

In [None]:
# change the fire area ratio to max out at 100 - zctas where it's higher 
# have that bc of the way the ZCTA file is set up
zctas_fires_sum['fire_area_ratio'] = zctas_fires_sum['fire_area_ratio'].clip(upper=100)

# make a new col fire_area_ratio times kaiser count
zctas_fires_sum['ppl_affected'] = zctas_fires_sum['fire_area_ratio'] * zctas_fires_sum['kpsc_pop_age_60p']

In [None]:
# remove column classification
#zctas_fires_sum = zctas_fires_sum.drop(columns='classification')

# sort descending by ppl affected
zctas_fires_sum = zctas_fires_sum.sort_values(by='ppl_affected', ascending=False)
# round ppl affected up to the nearest integer
zctas_fires_sum['ppl_affected'] = zctas_fires_sum['ppl_affected'].apply(lambda x: round(x))

# write as csv
zctas_fires_sum.to_csv('/Volumes/squirrel-utopia 1/los_angeles_2025_fire_disasters_exp/los_angeles_2025_fire_disasters_exp/data/num_kaiser_pop_age_60_affected_la_wf.csv', index=False)