In [38]:
# Dependencies
import requests
import pandas as pd
import os

## API Request
---
Set API variables and retrieve json

In [39]:
# Save config information.
url = "https://data.ca.gov/api/3/action/datastore_search?resource_id=d73ee828-c2c6-485c-91dc-c26bd9ce3991&"
# Build partial query URL
limit = 1000
year = 2013
query_url = f"{url}&limit={limit}&q={year}"
response = requests.get(query_url)
response

<Response [200]>

## Parse Data
---
Store response in a dataframe then reduce data set by removing irrelevant data and invalid data

In [40]:
# Parse API reponse and create dataframe
df = pd.DataFrame(response.json()['result']['records'])
# Filter data by year
yrdf = df.loc[df['Year'] == f'{str(year)}']
# Filter data and cull unnecessary data
columns_to_keep = ['Year', 'Alarm Date','Containment Date','Cause','GIS Calculated Acres']
columns_to_drop = [col for col in yrdf.columns if col not in columns_to_keep]
yrdf = yrdf.drop(columns=columns_to_drop)
# Remove invalid data entries
yrdf = yrdf.dropna (subset = ["Containment Date"])
yrdf.tail(1)

Unnamed: 0,Year,Alarm Date,Containment Date,Cause,GIS Calculated Acres
299,2013,8/1/2013 12:00:00 AM,8/10/2013 12:00:00 AM,9,69.25648


## Reformat Data
---
### Refactor Cause Number to Words
Adjust date entries for greater legibility

In [41]:
yrdf = yrdf.rename(columns={'Cause': 'Cause #'})
yrdf['Cause'] = ''
yrdf['Cause #'] = yrdf['Cause #'].astype(int)
def map_cause_to_meaning(i):
    if i == 1:
        return 'Lightning'
    elif i == 2:
        return 'Equipment Use'
    elif i == 3:
        return 'Smoking'
    elif i == 4:
        return 'Campfire'
    elif i == 5:
        return 'Debris'
    elif i == 6:
        return 'Railroad'
    elif i == 7:
        return 'Arson'
    elif i == 8:
        return 'Playing with fire'
    elif i == 9:
        return 'Miscellaneous'
    elif i == 10:
        return 'Vehicle'
    elif i == 11:
        return 'Powerline'
    elif i == 12:
        return 'Firefighter Training'
    elif i == 13:
        return 'Non-Firefighter Training'
    elif i == 14:
        return 'Unknown / Unidentified'
    elif i == 15:
        return 'Structure'
    elif i == 16:
        return 'Aircraft'
    elif i == 18:
        return 'Escaped Prescribed Burn'
    elif i == 19:
        return 'Illegal Alien Campfire'
    else:
        return 'N/A'
yrdf['Cause'] = yrdf['Cause #'].apply(map_cause_to_meaning)
yrdf.tail(1)

Unnamed: 0,Year,Alarm Date,Containment Date,Cause #,GIS Calculated Acres,Cause
299,2013,8/1/2013 12:00:00 AM,8/10/2013 12:00:00 AM,9,69.25648,Miscellaneous


### Reformat Date
Change formating of date to more accessible format

In [42]:
# Strip date and time data to date
yrdf['Alarm Date'] = [dt.replace(' 12:00:00 AM', '') for dt in yrdf['Alarm Date']]
yrdf['Containment Date'] = [str(item) for item in yrdf['Containment Date']]
yrdf['Containment Date'] = [dt.replace(' 12:00:00 AM', '') for dt in yrdf['Containment Date']]
yrdf.tail(1)

Unnamed: 0,Year,Alarm Date,Containment Date,Cause #,GIS Calculated Acres,Cause
299,2013,8/1/2013,8/10/2013,9,69.25648,Miscellaneous


In [33]:
# Attempt to convert 'Alarm Date' to datetime, coercing errors
yrdf['Alarm Date'] = pd.to_datetime(yrdf['Alarm Date'], errors='coerce')

# Display rows with invalid dates
invalid_dates = yrdf[yrdf['Alarm Date'].isna()]
print(invalid_dates)

# Drop rows where 'Alarm Date' is in 2017
yrdf = yrdf[yrdf['Alarm Date'].dt.year != 2001]

# Continue with the processing
df_byalarm = yrdf.sort_values(by='Alarm Date').reset_index()
df_byalarm.tail(1)

Empty DataFrame
Columns: [Year, Alarm Date, Containment Date, Cause #, GIS Calculated Acres, Cause]
Index: []


Unnamed: 0,index,Year,Alarm Date,Containment Date,Cause #,GIS Calculated Acres,Cause
295,100,2013,2013-12-29,2013-12-29,14,12.89286,Unknown / Unidentified


In [30]:
earliest_fire = df_byalarm['Alarm Date'][0]
earliest_fire

Timestamp('2013-01-22 00:00:00')

In [34]:
# Convert 'Containment Date' to datetime, coercing errors
yrdf['Containment Date'] = pd.to_datetime(yrdf['Containment Date'], errors='coerce')

# Drop rows with invalid 'Containment Date'
yrdf_filtered = yrdf.dropna(subset=['Containment Date'])

# Now you can sort and process the DataFrame as needed
df_bycontainment = yrdf_filtered.sort_values(by='Containment Date', ascending=False).reset_index(drop=True)
latest_containment = df_bycontainment['Containment Date'].iloc[0]

df_bycontainment.tail(1)

Unnamed: 0,Year,Alarm Date,Containment Date,Cause #,GIS Calculated Acres,Cause
295,2013,2013-01-22,2013-01-24,5,318.5916,Debris


In [32]:
latest_containment

Timestamp('2013-12-29 00:00:00')

## Export Data as `csv`
---

In [6]:
yrdf.to_csv(f"outputs/fires_{year}.csv",
                  encoding="utf-8", index=False, header=True)

In [12]:
output_path = os.path.join("outputs", "timeline_dates_2013.csv")
with open(output_path, 'w') as csvfile:

    # Initialize csv.writer
    csvwriter = csv.writer(csvfile, delimiter=',')

    # Write the first row (column headers)
    csvwriter.writerow(['Title', 'Date'])
    csvwriter.writerow(['Earliest Fire', earliest_fire])

    # Write the second row
    csvwriter.writerow(['Latest Containment', latest_containment])