# Sourcing fire data
This notebook stitches together downloaded jsons from fire.ca.gov to create several dataframes used in the main notebook.

The underlying jsons are not provided, but one can download it for themselves from ca.fire.gov and run this notebook to create updated files.

In [1]:
import json  # for raw fire data

import numpy as np  # for nan
import pandas as pd  # to convert json data to dataframe
import pyarrow.feather as feather  # lightweight export of dataframe
from scipy.spatial.distance import cdist

In [2]:
# Loading Data
path_to_read_data = "./data"  # this data can be downloaded from ca.fire.gov; replace path as appropriate
path_to_write_data = "./data"

# Opening JSON
start = 2016
end = 2021

In [3]:
# initiate data with first year
with open(f"{path_to_read_data}/calfire-{start}.json") as f:
    start_data = json.load(f)
    annual_data = start_data[f"firedata{start}"]

print(f"Number of fires in {start} : {len(annual_data)}")

Number of fires in 2016 : 154


In [4]:
# add second year through final year
for year in range(start + 1, end):
    with open(f"{path_to_read_data}/calfire-{year}.json") as f:
        additional_data = json.load(f)
        new_data = additional_data[f"firedata{year}"]
        print(f"Number of fires in {year} : {len(new_data)}")
        for new_datum in new_data:
            annual_data.append(new_datum)

Number of fires in 2017 : 427
Number of fires in 2018 : 302
Number of fires in 2019 : 262
Number of fires in 2020 : 255


In [5]:
# clean and convert data to dataframe
df = pd.DataFrame.from_dict(annual_data)
df = df.loc[df["AcresBurnedDisplay"] != 0]  # removing trivial data

Creating a dataframe hosting the relationship between each solar data station to the wildfire locations and a dataframe with fire lat/lons

In [6]:
# create df of fire+lat+lon
fire_latlon = df.filter(["Name", "Latitude", "Longitude", "StartedDate"], axis=1)

fire_latlon["LatLon"] = [
    (x, y) for x, y in zip(fire_latlon["Latitude"], fire_latlon["Longitude"])
]

fire_latlon["When"] = pd.to_datetime(fire_latlon["StartedDate"]).dt.strftime("%Y-%m-%d")
del fire_latlon["StartedDate"]
fire_latlon.head()

Unnamed: 0,Name,Latitude,Longitude,LatLon,When
0,Creek Fire,38.40958,-122.43172,"(38.40958, -122.43172)",2016-10-10
1,Taglio Fire,37.2171,-121.08036,"(37.2171, -121.08036)",2016-04-24
3,Tulloch Fire,37.927613,-120.528836,"(37.927613, -120.528836)",2016-05-30
4,Metz Fire,36.38123,-121.20059,"(36.38123, -121.20059)",2016-05-22
5,Wheatland Fire,34.276,-118.354,"(34.276, -118.354)",2016-05-23


In [7]:
fire_latlon["Name"] = fire_latlon["When"].astype(str) + fire_latlon["Name"]
del fire_latlon["When"]

In [8]:
# pull in station lat+lon
solar_df = pd.read_feather(
    "s3://data.atoti.io/notebooks/ca-solar/nsrdb_station_lat_lon.feather"
)

solar_df["LatLon"] = [
    (x, y) for x, y in zip(solar_df["Latitude"], solar_df["Longitude"])
]
solar_df.head()

Unnamed: 0,Station,Latitude,Longitude,LatLon
0,station_000071236,32.650002,-117.059998,"(32.650001525878906, -117.05999755859375)"
1,station_000071239,32.650002,-116.940002,"(32.650001525878906, -116.94000244140625)"
2,station_000071242,32.650002,-116.82,"(32.650001525878906, -116.81999969482422)"
3,station_000071245,32.650002,-116.699997,"(32.650001525878906, -116.69999694824219)"
4,station_000071248,32.650002,-116.580002,"(32.650001525878906, -116.58000183105469)"


In [9]:
# create matrix of distances
fire_station_dist = cdist(list(solar_df["LatLon"]), list(fire_latlon["LatLon"]))

fs_dist = pd.DataFrame(data=fire_station_dist, columns=fire_latlon["Name"])
fs_dist.head()

Name,2016-10-10Creek Fire,2016-04-24Taglio Fire,2016-05-30Tulloch Fire,2016-05-22Metz Fire,2016-05-23Wheatland Fire,2016-05-25DeWolf Fire,2016-06-07Coe Fire,2016-05-27Dunstone Fire,2016-05-2970 Fire,2016-05-10Shedd Fire,...,2020-10-26Blue Ridge Fire,2020-11-01Cypress Fire,2020-11-06San Dimas Fire,2020-11-17257 Fire,2020-12-01Airport Fire,2020-12-03Cerritos Fire,2020-12-02Bond Fire,2020-12-03Thomas Fire,2020-12-07Cornell Fire,2020-12-13Sanderson Fire
0,7.875795,6.084546,6.315538,5.573739,2.078055,4.964468,6.282299,8.185195,8.318893,4.469197,...,1.372759,1.456257,1.659798,9.583624,1.36409,1.123785,1.25486,9.536479,2.629766,1.28269
1,7.958122,6.164491,6.382234,5.663451,2.154823,5.028208,6.366208,8.255114,8.384913,4.559569,...,1.430551,1.498072,1.71516,9.631965,1.417674,1.129267,1.317824,9.578845,2.722933,1.285922
2,8.041402,6.245723,6.450477,5.754271,2.235403,5.093982,6.451263,8.32618,8.452124,4.651288,...,1.495759,1.548085,1.776917,9.681554,1.479075,1.147343,1.388329,9.622525,2.818137,1.300268
3,8.125601,6.328188,6.520214,5.846144,2.319396,5.161708,6.537415,8.398359,8.520496,4.744268,...,1.567456,1.605526,1.844423,9.732371,1.547362,1.177433,1.465283,9.667496,2.915173,1.325368
4,8.210685,6.411834,6.591394,5.939015,2.406437,5.231305,6.624615,8.471619,8.589996,4.838432,...,1.644787,1.669625,1.917065,9.784392,1.62166,1.218645,1.54772,9.713739,3.013856,1.360624


In [10]:
dist = pd.concat([solar_df["Station"], fs_dist], axis=1)

In [11]:
dist.head()

Unnamed: 0,Station,2016-10-10Creek Fire,2016-04-24Taglio Fire,2016-05-30Tulloch Fire,2016-05-22Metz Fire,2016-05-23Wheatland Fire,2016-05-25DeWolf Fire,2016-06-07Coe Fire,2016-05-27Dunstone Fire,2016-05-2970 Fire,...,2020-10-26Blue Ridge Fire,2020-11-01Cypress Fire,2020-11-06San Dimas Fire,2020-11-17257 Fire,2020-12-01Airport Fire,2020-12-03Cerritos Fire,2020-12-02Bond Fire,2020-12-03Thomas Fire,2020-12-07Cornell Fire,2020-12-13Sanderson Fire
0,station_000071236,7.875795,6.084546,6.315538,5.573739,2.078055,4.964468,6.282299,8.185195,8.318893,...,1.372759,1.456257,1.659798,9.583624,1.36409,1.123785,1.25486,9.536479,2.629766,1.28269
1,station_000071239,7.958122,6.164491,6.382234,5.663451,2.154823,5.028208,6.366208,8.255114,8.384913,...,1.430551,1.498072,1.71516,9.631965,1.417674,1.129267,1.317824,9.578845,2.722933,1.285922
2,station_000071242,8.041402,6.245723,6.450477,5.754271,2.235403,5.093982,6.451263,8.32618,8.452124,...,1.495759,1.548085,1.776917,9.681554,1.479075,1.147343,1.388329,9.622525,2.818137,1.300268
3,station_000071245,8.125601,6.328188,6.520214,5.846144,2.319396,5.161708,6.537415,8.398359,8.520496,...,1.567456,1.605526,1.844423,9.732371,1.547362,1.177433,1.465283,9.667496,2.915173,1.325368
4,station_000071248,8.210685,6.411834,6.591394,5.939015,2.406437,5.231305,6.624615,8.471619,8.589996,...,1.644787,1.669625,1.917065,9.784392,1.62166,1.218645,1.54772,9.713739,3.013856,1.360624


In [12]:
dist_df = pd.melt(
    dist, id_vars="Station", ignore_index=False, var_name="Fire", value_name="Distance"
)

dist_df.head()

Unnamed: 0,Station,Fire,Distance
0,station_000071236,2016-10-10Creek Fire,7.875795
1,station_000071239,2016-10-10Creek Fire,7.958122
2,station_000071242,2016-10-10Creek Fire,8.041402
3,station_000071245,2016-10-10Creek Fire,8.125601
4,station_000071248,2016-10-10Creek Fire,8.210685


In [13]:
fire_loc = fire_latlon.drop("LatLon", axis=1)
fire_loc.rename(
    columns={
        "Name": "Fire",
    },
    inplace=True,
)

In [14]:
feather.write_feather(
    dist_df, f"{path_to_write_data}/distance.feather", compression="zstd"
)
feather.write_feather(
    fire_loc, f"{path_to_write_data}/fire_loc.feather", compression="zstd"
)

Creating a dataframe holding the key fire data

In [15]:
df.head()

Unnamed: 0,IncidentId,UniqueId,Name,Location,Latitude,Longitude,AcresBurnedDisplay,PercentContainedDisplay,ExternalIncidentLink,CountiesList,UpdatedDate,StartedDate,AdminUnit,IncidentTypeDisplay,Url,IsActive
0,998,3135c367-1274-4cc4-9152-661f6fd7707e,Creek Fire,Dry Creek Road 3 miles west of Yountville,38.40958,-122.43172,65,100%,,Napa,2016-10-12 18:00:00,2016-10-10,CAL FIRE Sonoma-Lake-Napa Unit,,https://www.fire.ca.gov/incidents/2016/10/10/c...,N
1,1075,7106ba43-100b-41f9-acbf-9fb9e9ab9ef7,Taglio Fire,"I-5 & Taglio Road, east of Gustine",37.2171,-121.08036,30,100%,,Merced,2016-04-24 15:30:00,2016-04-24,CAL FIRE Madera-Mariposa-Merced Unit,,https://www.fire.ca.gov/incidents/2016/4/24/ta...,N
3,1088,ffec404e-dea9-4175-8c50-8f2b0ca3efdd,Tulloch Fire,"off New Peoria Flat Road and Old Melones Road,...",37.927613,-120.528836,85,100%,,Tuolumne,2016-06-01 19:45:00,2016-05-30,CAL FIRE Tuolumne-Calaveras Unit,,https://www.fire.ca.gov/incidents/2016/5/30/tu...,N
4,1090,55fa95e6-f09e-42c4-889b-a4b5b63dccf2,Metz Fire,"Metz Rd & Blue Jay Ln, southeast of Soledad",36.38123,-121.20059,3876,100%,,Monterey,2016-05-25 18:15:00,2016-05-22,CAL FIRE San Benito-Monterey Unit,,https://www.fire.ca.gov/incidents/2016/5/22/me...,N
5,1091,ace1f464-7a70-4dcc-b8c5-d64fb1021d0e,Wheatland Fire,"Hwy 210 and Wheatland Ave, southeast of Lake V...",34.276,-118.354,156,100%,,Los Angeles,2016-05-25 18:45:00,2016-05-23,LA County Fire Department/Angeles National Forest,,https://www.fire.ca.gov/incidents/2016/5/23/wh...,N


In [16]:
fire_data = df.filter(
    ["Name", "AcresBurnedDisplay", "StartedDate", "UpdatedDate"], axis=1
)
fire_data["StartedMonth"] = pd.to_datetime(fire_data["StartedDate"]).dt.strftime("%m")

fire_data["When"] = pd.to_datetime(fire_data["StartedDate"]).dt.strftime("%Y-%m-%d")

fire_data["UpdatedDate"] = pd.to_datetime(fire_data["UpdatedDate"])
fire_data["UpdatedDate"] = [d.date() for d in fire_data["UpdatedDate"]]

fire_data["Name"] = fire_data["When"].astype(str) + fire_data["Name"]
del fire_data["When"]

fire_data.rename(
    columns={
        "UpdatedDate": "EndedDate",
        "AcresBurnedDisplay": "AcresBurned",
        "Name": "Fire",
    },
    inplace=True,
)

fire_data.head()

Unnamed: 0,Fire,AcresBurned,StartedDate,EndedDate,StartedMonth
0,2016-10-10Creek Fire,65,2016-10-10,2016-10-12,10
1,2016-04-24Taglio Fire,30,2016-04-24,2016-04-24,4
3,2016-05-30Tulloch Fire,85,2016-05-30,2016-06-01,5
4,2016-05-22Metz Fire,3876,2016-05-22,2016-05-25,5
5,2016-05-23Wheatland Fire,156,2016-05-23,2016-05-25,5


In [17]:
feather.write_feather(
    fire_data, f"{path_to_write_data}/fire_data.feather", compression="zstd"
)