In [14]:
# Dependencies
import matplotlib.pyplot as plt
import requests
import pandas as pd
from pprint import pprint

In [15]:
# initialize variables
years = [2020, 2021, 2022, 2023]

# Save config information.
url = "https://data.ca.gov/api/3/action/datastore_search?resource_id=d73ee828-c2c6-485c-91dc-c26bd9ce3991" # base url

# Build partial query URL
query_url = f"{url}&limit=1000"
response = requests.get(query_url).json()
records = response['result']['records']

# put records in dataframe
records_df = pd.DataFrame(records)
records_df.head()

Unnamed: 0,_id,OBJECTID,Year,State,Agency,Unit ID,Fire Name,Local Incident Number,Alarm Date,Containment Date,...,Collection Method,Management Objective,GIS Calculated Acres,Comments,Complex Name,IRWIN ID,Fire Number (historical use),Complex ID,Shape__Area,Shape__Length
0,1,1,2023,CA,CDF,SKU,WHITWORTH,4808,6/17/2023 12:00:00 AM,6/17/2023 12:00:00 AM,...,1,1,5.729125,,,{7985848C-0AC2-4BA4-8F0E-29F778652E61},,,41407.83984375,1247.16603398886
1,2,2,2023,CA,LRA,BTU,KAISER,10225,6/2/2023 12:00:00 AM,6/2/2023 12:00:00 AM,...,1,1,13.60238,,,{43EBCC88-B3AC-48EB-8EF5-417FE0939CCF},,,93455.87890625,1285.51454971661
2,3,3,2023,CA,CDF,AEU,JACKSON,17640,7/1/2023 12:00:00 AM,7/2/2023 12:00:00 AM,...,1,1,27.81446,,,{B64E1355-BF1D-441A-95D0-BC1FBB93483B},,,183028.5,2697.58742917456
3,4,4,2023,CA,CDF,AEU,CARBON,18821,7/11/2023 12:00:00 AM,7/11/2023 12:00:00 AM,...,1,1,58.76023,,,{CB41DB0A-E4B1-489D-A4EA-738F2CD6DB3B},,,388221.953125,2548.73877855158
4,5,5,2023,CA,CDF,AEU,LIBERTY,18876,7/11/2023 12:00:00 AM,7/12/2023 12:00:00 AM,...,1,1,70.979,,,{F83F70A4-07A7-40B8-BD51-10CCC1C30D63},,,466456.0078125,5106.93652582734


In [16]:
# remove first two columns
records_df = records_df.drop(columns=['_id', 'OBJECTID'])

# rename columns
renamed_df = records_df.rename(columns={'GIS Calculated Acres': 'Acres', 'Shape__Area': 'Area', 'Shape__Length':'Perimeter'})

# reorder columns
new_order = ['Year', 'Fire Name', 'State', 'Alarm Date', 'Containment Date', 'Cause',
       'Collection Method', 'Management Objective', 'Acres', 'Comments',
       'Complex Name', 'IRWIN ID', 'Fire Number (historical use)',
       'Complex ID', 'Area', 'Perimeter', 'Local Incident Number', 'Agency', 'Unit ID', ]

df_reordered = renamed_df[new_order]
df_reordered.head()

Unnamed: 0,Year,Fire Name,State,Alarm Date,Containment Date,Cause,Collection Method,Management Objective,Acres,Comments,Complex Name,IRWIN ID,Fire Number (historical use),Complex ID,Area,Perimeter,Local Incident Number,Agency,Unit ID
0,2023,WHITWORTH,CA,6/17/2023 12:00:00 AM,6/17/2023 12:00:00 AM,5,1,1,5.729125,,,{7985848C-0AC2-4BA4-8F0E-29F778652E61},,,41407.83984375,1247.16603398886,4808,CDF,SKU
1,2023,KAISER,CA,6/2/2023 12:00:00 AM,6/2/2023 12:00:00 AM,5,1,1,13.60238,,,{43EBCC88-B3AC-48EB-8EF5-417FE0939CCF},,,93455.87890625,1285.51454971661,10225,LRA,BTU
2,2023,JACKSON,CA,7/1/2023 12:00:00 AM,7/2/2023 12:00:00 AM,2,1,1,27.81446,,,{B64E1355-BF1D-441A-95D0-BC1FBB93483B},,,183028.5,2697.58742917456,17640,CDF,AEU
3,2023,CARBON,CA,7/11/2023 12:00:00 AM,7/11/2023 12:00:00 AM,9,1,1,58.76023,,,{CB41DB0A-E4B1-489D-A4EA-738F2CD6DB3B},,,388221.953125,2548.73877855158,18821,CDF,AEU
4,2023,LIBERTY,CA,7/11/2023 12:00:00 AM,7/12/2023 12:00:00 AM,14,1,1,70.979,,,{F83F70A4-07A7-40B8-BD51-10CCC1C30D63},,,466456.0078125,5106.93652582734,18876,CDF,AEU


In [17]:
# Convert 'Alarm Dates' column to datetime format
df_reordered['Alarm Date'] = pd.to_datetime(df_reordered['Alarm Date'])
df_reordered['Containment Date'] = pd.to_datetime(df_reordered['Containment Date'])

# Extract date part only
df_reordered['Alarm Date'] = df_reordered['Alarm Date'].dt.date
df_reordered['Containment Date'] = df_reordered['Containment Date'].dt.date

# Convert 'Alarm Date' and 'Containment Date' columns to datetime format
df_reordered['Alarm Date'] = pd.to_datetime(df_reordered['Alarm Date'])
df_reordered['Containment Date'] = pd.to_datetime(df_reordered['Containment Date'])


# Calculate duration in days and add as a new column
df_reordered['Fire Duration (Days)'] = (df_reordered['Containment Date'] - df_reordered['Alarm Date']).dt.days

new_order = ['Year', 'Fire Name', 'State', 'Alarm Date', 'Containment Date', 'Fire Duration (Days)', 'Cause',
       'Collection Method', 'Management Objective', 'Acres', 'Comments',
       'Complex Name', 'IRWIN ID', 'Fire Number (historical use)',
       'Complex ID', 'Area', 'Perimeter', 'Local Incident Number', 'Agency', 'Unit ID', ]

df_reordered = df_reordered[new_order]

df_reordered.head()

  df_reordered['Alarm Date'] = pd.to_datetime(df_reordered['Alarm Date'])
  df_reordered['Containment Date'] = pd.to_datetime(df_reordered['Containment Date'])


Unnamed: 0,Year,Fire Name,State,Alarm Date,Containment Date,Fire Duration (Days),Cause,Collection Method,Management Objective,Acres,Comments,Complex Name,IRWIN ID,Fire Number (historical use),Complex ID,Area,Perimeter,Local Incident Number,Agency,Unit ID
0,2023,WHITWORTH,CA,2023-06-17,2023-06-17,0.0,5,1,1,5.729125,,,{7985848C-0AC2-4BA4-8F0E-29F778652E61},,,41407.83984375,1247.16603398886,4808,CDF,SKU
1,2023,KAISER,CA,2023-06-02,2023-06-02,0.0,5,1,1,13.60238,,,{43EBCC88-B3AC-48EB-8EF5-417FE0939CCF},,,93455.87890625,1285.51454971661,10225,LRA,BTU
2,2023,JACKSON,CA,2023-07-01,2023-07-02,1.0,2,1,1,27.81446,,,{B64E1355-BF1D-441A-95D0-BC1FBB93483B},,,183028.5,2697.58742917456,17640,CDF,AEU
3,2023,CARBON,CA,2023-07-11,2023-07-11,0.0,9,1,1,58.76023,,,{CB41DB0A-E4B1-489D-A4EA-738F2CD6DB3B},,,388221.953125,2548.73877855158,18821,CDF,AEU
4,2023,LIBERTY,CA,2023-07-11,2023-07-12,1.0,14,1,1,70.979,,,{F83F70A4-07A7-40B8-BD51-10CCC1C30D63},,,466456.0078125,5106.93652582734,18876,CDF,AEU


In [18]:
# create a column that breaks down causes into fewer categories