In [1]:
# dependencies
import pandas as pd

# import the fire data csv
FireFile = "./Data/CA_Fires.csv"

# read the file and store in a data frame
FireData = pd.read_csv(FireFile)

# see all columns
FireData.columns

# remove extraneous columns
FireData = FireData[["incident_name","incident_county","incident_latitude",
            "incident_longitude","incident_acres_burned","incident_id",
            "incident_dateonly_created","incident_dateonly_extinguished"]]

# rename the columns
FireData = FireData.rename(columns={"incident_name":"Name","incident_county":"County",
            "incident_acres_burned":"Acres Burned","incident_latitude":"Lat",
            "incident_longitude":"Long","incident_id":"ID","incident_dateonly_extinguished":
            "Date Extinguished","incident_dateonly_created":"Date Started"})

# make sure there are no incident repeats then drop ID field
FireData.drop_duplicates(subset=["ID"])
FireData = FireData[["Name","County","Lat","Long","Acres Burned",
                     "Date Started","Date Extinguished"]]

In [2]:
# create a column that contains the duration of each fire
# first convert the date columns to datetime
FireData["Date Started"] = pd.to_datetime(FireData["Date Started"])
FireData["Date Extinguished"] = pd.to_datetime(FireData["Date Extinguished"])

# subtract the two dates
FireData["Duration (Days)"] = FireData["Date Extinguished"] - FireData["Date Started"]

# convert duration to string, remove "days", and convert back to 
FireData["Duration (Days)"] = FireData["Duration (Days)"].astype(str)
FireData["Duration (Days)"] = FireData["Duration (Days)"].str.replace("days","")

# create a column that holds the year of each start date
FireData["Year"] = FireData["Date Started"].dt.year

# remove the date started and date extinguished columns
FireData = FireData[["Name","Year","County","Lat","Long","Acres Burned","Duration (Days)"]]

In [14]:
# remove any years that aren't 2013-2020
FireData = FireData.loc[(FireData["Year"]>=2013)&(FireData["Year"]<=2020),:]

# remove any NaNs
FireData = FireData.dropna(subset=["Acres Burned"])

In [15]:
# reset the index
FireData.reset_index(inplace=True,drop=True)

In [16]:
FireData

Unnamed: 0,Name,Year,County,Lat,Long,Acres Burned,Duration (Days)
0,Bridge Fire,2017,Shasta,40.774000,-122.309000,37.0,70
1,River Fire,2013,Inyo,36.602575,-118.016510,406.0,4
2,Fawnskin Fire,2013,San Bernardino,34.288877,-116.941311,30.0,2
3,Gold Fire,2013,Madera,37.116295,-119.635004,274.0,1
4,Panther Fire,2013,Tehama,40.190062,-121.595555,6965.0,8
...,...,...,...,...,...,...,...
1701,Cerritos Fire,2020,Riverside,33.773754,-117.051463,200.0,NaT
1702,Bond Fire,2020,Orange,33.743842,-117.674967,6686.0,8
1703,Thomas Fire,2020,Lassen,41.591948,-120.374514,24.0,0
1704,Cornell Fire,2020,Ventura,34.336220,-119.078000,174.0,1


In [None]:
# convert NaTs and remove them after separating acres and duration into two dataframes
# FireData = FireData.dropna(subset=["Duration (Days)"])