# Cleaning California wildfire perimeters data

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import json
import jenkspy
import altair_latimes as lat
import altair as alt
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

#### Download perimeters from National Interagency Fire Center

In [2]:
# # Current
# active_2020_url = 'https://opendata.arcgis.com/datasets/5da472c6d27b4b67970acc7b5044c862_0.geojson'
# inactive_2020_url = 'https://opendata.arcgis.com/datasets/bf373b4ff85e4f0299036ecc31a1bcbb_0.geojson'
# historic_url = 'https://opendata.arcgis.com/datasets/4454e5d8e8c44b0280258b51bcf24794_0.geojson'

In [3]:
# historic = gpd.read_file(historic_url)

#### Read data from CalFire

In [4]:
# metadata: https://frap.fire.ca.gov/frap-projects/fire-perimeters/
wildfires = gpd.read_file('/Users/mhustiles/data/data/GIS/wildfires/FRAP/input/wildfires_frap.geojson')

In [5]:
wildfires.sort_values(by='ALARM_DATE', ascending=False).head()

Unnamed: 0,OBJECTID,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,REPORT_AC,GIS_ACRES,C_METHOD,OBJECTIVE,FIRE_NUM,Shape_Leng,Shape_Area,geometry
9506,20991.0,2018,CA,CCO,LAC,OAK,389425,2018-12-25T00:00:00,2018-12-25T00:00:00,11.0,,3.2,2.18085,1.0,1.0,,479.605646,8825.584829,"MULTIPOLYGON (((-118.69256 34.47384, -118.6925..."
9411,20895.0,2018,CA,CCO,VNC,KUEHNER,104479,2018-12-14T00:00:00,2018-12-14T00:00:00,14.0,Cause undetermined after investigation 18-0104479,0.0,0.009721,6.0,1.0,,25.15895,39.340153,"MULTIPOLYGON (((-118.65989 34.27775, -118.6598..."
9462,20946.0,2018,CA,CCO,ORC,AMENO,18130289,2018-11-26T00:00:00,2018-11-26T00:00:00,14.0,,,0.792964,1.0,1.0,,319.518001,3209.012151,"MULTIPOLYGON (((-117.59664 33.43143, -117.5966..."
9354,20838.0,2018,CA,CCO,VNC,VISTA,96450,2018-11-16T00:00:00,2018-11-16T00:00:00,2.0,Equipment 18-0096450,1.0,1.401421,6.0,1.0,,286.809668,5671.349189,"MULTIPOLYGON (((-119.31402 34.36335, -119.3139..."
9641,21126.0,2018,CA,BIA,HIA,SIGNBOARD,174,2018-11-16T00:00:00,2018-11-17T00:00:00,,,,81.154282,3.0,1.0,,3195.933523,328419.733216,"MULTIPOLYGON (((-123.56902 41.03555, -123.5690..."


In [6]:
wildfires.columns = wildfires.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [7]:
wildfires[['cause', 'objectid', 'c_method', 'objective', 'report_ac']] = \
wildfires[['cause', 'objectid', 'c_method', 'objective', 'report_ac']].astype(str)
wildfires['cause'] = wildfires['cause'].str.replace('.0','', regex=False)
wildfires['cause'] = wildfires['cause'].str.replace('.0','', regex=False)
wildfires['report_ac'] = wildfires['report_ac'].str.replace('.0','', regex=False)
wildfires['c_method'] = wildfires['c_method'].str.replace('.0','', regex=False)
wildfires['objective'] = wildfires['objective'].str.replace('.0','', regex=False)
wildfires['objectid'] = wildfires['objectid'].str.replace('.0','', regex=False)
wildfires['sqmiles'] = (wildfires['gis_acres'] / 640).round(2) 
wildfires.drop(columns=['shape_area', 'shape_leng'], inplace=True)

In [8]:
wildfires['alarm_date'] = wildfires['alarm_date'].str.replace('T00:00:00', '')

In [9]:
wildfires['cont_date'] = wildfires['cont_date'].str.replace('T00:00:00', '')

In [10]:
wildfires['alarm_date'] = pd.to_datetime(wildfires['alarm_date'], format='%Y/%m/%d')
wildfires['alarm_year'] = wildfires['alarm_date'].dt.year
wildfires['alarm_quarter'] = wildfires['alarm_date'].dt.quarter
wildfires['alarm_day'] = wildfires['alarm_date'].dt.day
wildfires['alarm_month'] = wildfires['alarm_date'].dt.month
wildfires['alarm_monthname'] = wildfires['alarm_date'].dt.month_name()

In [11]:
wildfires['alarm_year'] = wildfires['alarm_year'].astype(str)
wildfires['alarm_quarter'] = wildfires['alarm_quarter'].astype(str)
wildfires['alarm_day'] = wildfires['alarm_day'].astype(str)
wildfires['alarm_month'] = wildfires['alarm_month'].astype(str)

In [12]:
cause_codes = {
'1':'Lightning',
'2':'Equipment Use',
'3':'Smoking',
'4':'Campfire',
'5':'Debris',
'6':'Railroad',
'7':'Arson',
'8':'Playing with Fire',
'9':'Miscellaneous',
'10':'Vehicle',
'11':'Power Line',
'12':'Firefighter Training',
'13':'Non-Firefighter Training',
'14':'Unknown/Unidentified',
'15':'Structure',
'16':'Aircraft',
'17':'Volcanic',
'18':'Escaped Prescribed Burn',
'19':'Illegal Alien Campfire',
}

In [13]:
wildfires.head()

Unnamed: 0,objectid,year_,state,agency,unit_id,fire_name,inc_num,alarm_date,cont_date,cause,...,c_method,objective,fire_num,geometry,sqmiles,alarm_year,alarm_quarter,alarm_day,alarm_month,alarm_monthname
0,1,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21,2007-10-23,14,...,8,1,233414,"MULTIPOLYGON (((-118.49851 34.38242, -118.4979...",0.04,2007,4,21,10,October
1,2,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22,2007-10-25,14,...,8,1,233077,"MULTIPOLYGON (((-118.58448 34.41978, -118.5842...",4.41,2007,4,22,10,October
2,3,2007,CA,USF,ANF,RANCH,166,2007-10-20,2007-11-15,2,...,7,1,166,"MULTIPOLYGON (((-118.75645 34.59651, -118.7545...",91.27,2007,4,20,10,October
3,4,2007,CA,CCO,LAC,EMMA,201384,2007-09-11,2007-09-11,14,...,8,1,201384,"MULTIPOLYGON (((-118.07277 34.50198, -118.0727...",0.27,2007,3,11,9,September
4,5,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24,2007-11-27,14,...,8,1,259483,"MULTIPOLYGON (((-118.74408 34.08130, -118.7438...",7.36,2007,4,24,11,November


In [14]:
wildfires['cause_description'] = wildfires['cause'].map(cause_codes)

In [15]:
wildfires['cause_description'].value_counts('normalize').round(3) *100

Unknown/Unidentified        27.6
Lightning                   22.2
Miscellaneous               12.9
Equipment Use               10.5
Arson                        7.6
Debris                       4.0
Vehicle                      3.9
Power Line                   3.5
Campfire                     2.9
Playing with Fire            1.6
Smoking                      1.4
Escaped Prescribed Burn      0.8
Railroad                     0.6
Illegal Alien Campfire       0.2
Structure                    0.2
Aircraft                     0.1
Non-Firefighter Training     0.1
Firefighter Training         0.0
Name: cause_description, dtype: float64

In [16]:
agencies = {
'BIA':'USDI Bureau of Indian Affairs',
'BLM':'Bureau of Land Management',
'CDF':'California Department of Forestry and Fire Protection',
'CCO':'Contract Counties',
'DOD':'Department of Defense',
'FWS':'USDI Fish and Wildlife Service',
'LRA':'Local Response Area',
'NOP':'No Protection',
'NPS':'National Park Service',
'PVT':'Private',
'USF':'United States Forest Service',
'OTH':'Other',
}

In [17]:
units = {
'AFV':'Vandenberg Air Force Base',
'ANF':'Angeles National Forest',
'BDF':'San Bernardino National Forest',
'BNP':'Lava Beds National Monument',
'BRR':'Bitter Creek National Wildlife Refuge',
'CDD':'CA Desert District - BLM',
'CLR':'Clear Lake National Wildlife Refuge',
'CNF':'Cleveland National Forest',
'CNP':'Channel Islands - NPS',
'CRB':'Camp Roberts Military Base',
'ENF':'Eldorado National Forest',
'GNP':'Golden Gate National Recreation Area - NPS',
'HIA':'Hoopa Valley Tribe',
'HPR':'Hopper Mountain National Wildlife Refuge',
'HTF':'Humbolt-Toiyabe National Forest',
'INF':'Inyo National Forest',
'JTP':'Joshua Tree NP',
'KNF':'Klamath National Forest',
'KNP':'Sequoia - Kings Canyon NP',
'KRN':'Kern County',
'KRR':'Kern National Wildlife Refuge',
'LAC':'Los Angeles County',
'LKR':'Lower Klamath National Wildlife Refuge',
'LNF':'Lassen National Forest',
'LNP':'Lassen Volcanic NP',
'LPF':'Los Padres National Forest',
'LUR':'San Luis National Wildlife Refuge',
'MDF':'Modoc National Forest',
'MNF':'Mendocino National Forest',
'MNP':'Mojave - NPS',
'MRN':'Marin County',
'NOD':'Northern CA District - BLM',
'ORC':'Orange County',
'PIP':'Pinnacles National Park',
'PLR':'Pixley National Wildlife Refuge',
'PNF':'Plumas National Forest',
'RNP':'Point Reyes National Seashore',
'RWP':'Redwood National and State Parks',
'SBC':'Santa Barbara County',
'SHF':'Shasta-Trinity National Forest',
'SJR':'San Joaquin River National Wildlife Refuge',
'SMP':'Santa Monica Mtns National Recreation Area',
'SNF':'Sierra National Forest',
'SNU':'Sonoma CDF (old)',
'SOR':'Sonny Bono Salton Sea National Wildlife Refuge',
'SQF':'Sequoia National Forest',
'SRF':'Six Rivers National Forest',
'STF':'Stanislaus National Forest',
'SWR':'Sacramento National Wildlife Refuge',
'TMU':'Lake Taho Basin Management Unit',
'TNF':'Tahoe National Forest',
'TNR':'San Diego National Wildlife Refuge',
'VNC':'Ventura County',
'WED':'City of Weed Vol. Fire Dept.',
'WNP':'Whiskeytown National Recreation Area',
'YNP':'Yosemite National Park',
'TOI':'Toiyabe National Forest',
'DVP':'Death Valley NP',
'AEU':'Amador - El Dorado CAL FIRE',
'BDU':'San Bernardino CAL FIRE',
'BEU':'Monterey - San Benito CAL FIRE',
'BTU':'Butte CAL FIRE',
'CZU':'San Mateo - Santa Cruz CAL FIRE',
'FKU':'Fresno-Kings CAL FIRE',
'HUU':'Humboldt - Del Norte CAL FIRE',
'LMU':'Lassen - Modoc CAL FIRE',
'LNU':'Sonoma - Lake - Napa CAL FIRE',
'MEU':'Mendocino CAL FIRE',
'MMU':'Madera - Mariposa CAL FIRE',
'NEU':'Nevada - Yuba - Placer CAL FIRE',
'RRU':'Riverside CAL FIRE',
'SCU':'Santa Clara CAL FIRE',
'SHU':'Shasta - Trinity CAL FIRE',
'SKU':'Siskiyou CAL FIRE',
'SLU':'San Luis Obispo CAL FIRE',
'TCU':'Tuolumne - Calaveras CAL FIRE',
'LDF':'Los Angeles City FD',
'MCP':'Camp Pendleton Marine Corps Base',
'CCD':'Carson City District or - BLM',
'MVU':'San Diego CAL FIRE',
'TGU':'Tehama - Glenn CAL FIRE',
'FHL':'Fort Hunter Liggett',
'BBD':'Bakersfield District - BLM  (old)',
'RRS':'Rouge River- Siskiyou',
'FNF':'Fremont National Forest',
'CND':'Central CA District BLM',
'TUU':'Tulare CAL FIRE',
'VLJ':'Vallejo Fire Dept.',
}

In [18]:
wildfires['agency_description'] = wildfires['agency'].map(agencies)
wildfires['unit_description'] = wildfires['unit_id'].map(units)
wildfires['fire'] = wildfires['fire_name'].str.capitalize()

In [19]:
wildfires.head()

Unnamed: 0,objectid,year_,state,agency,unit_id,fire_name,inc_num,alarm_date,cont_date,cause,...,sqmiles,alarm_year,alarm_quarter,alarm_day,alarm_month,alarm_monthname,cause_description,agency_description,unit_description,fire
0,1,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21,2007-10-23,14,...,0.04,2007,4,21,10,October,Unknown/Unidentified,Contract Counties,Los Angeles County,October
1,2,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22,2007-10-25,14,...,4.41,2007,4,22,10,October,Unknown/Unidentified,Contract Counties,Los Angeles County,Magic
2,3,2007,CA,USF,ANF,RANCH,166,2007-10-20,2007-11-15,2,...,91.27,2007,4,20,10,October,Equipment Use,United States Forest Service,Angeles National Forest,Ranch
3,4,2007,CA,CCO,LAC,EMMA,201384,2007-09-11,2007-09-11,14,...,0.27,2007,3,11,9,September,Unknown/Unidentified,Contract Counties,Los Angeles County,Emma
4,5,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24,2007-11-27,14,...,7.36,2007,4,24,11,November,Unknown/Unidentified,Contract Counties,Los Angeles County,Corral


---

### Woolsey fire

In [20]:
woolsey = wildfires[wildfires['fire'] == 'Woolsey']

In [21]:
woolsey

Unnamed: 0,objectid,year_,state,agency,unit_id,fire_name,inc_num,alarm_date,cont_date,cause,...,sqmiles,alarm_year,alarm_quarter,alarm_day,alarm_month,alarm_monthname,cause_description,agency_description,unit_description,fire
9501,20986,2018,CA,CCO,LAC,WOOLSEY,338981,2018-11-08,2018-11-08,14,...,151.48,2018,4,8,11,November,Unknown/Unidentified,Contract Counties,Los Angeles County,Woolsey


### Aggregates by years

In [22]:
years = wildfires.groupby(['year_']).agg({'objectid': 'size', 'sqmiles': 'sum' })\
.reset_index().sort_values('year_', ascending=False).rename(columns={'year_':'year', 'objectid': 'count' })

In [23]:
years.head()

Unnamed: 0,year,count,sqmiles
38,2018,411,2484.34
37,2017,607,2225.83
36,2016,347,830.75
35,2015,311,1232.99
34,2014,238,891.96


---

### Export cleaned dataframe to GeoJSON

In [24]:
wildfires.head()

Unnamed: 0,objectid,year_,state,agency,unit_id,fire_name,inc_num,alarm_date,cont_date,cause,...,sqmiles,alarm_year,alarm_quarter,alarm_day,alarm_month,alarm_monthname,cause_description,agency_description,unit_description,fire
0,1,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21,2007-10-23,14,...,0.04,2007,4,21,10,October,Unknown/Unidentified,Contract Counties,Los Angeles County,October
1,2,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22,2007-10-25,14,...,4.41,2007,4,22,10,October,Unknown/Unidentified,Contract Counties,Los Angeles County,Magic
2,3,2007,CA,USF,ANF,RANCH,166,2007-10-20,2007-11-15,2,...,91.27,2007,4,20,10,October,Equipment Use,United States Forest Service,Angeles National Forest,Ranch
3,4,2007,CA,CCO,LAC,EMMA,201384,2007-09-11,2007-09-11,14,...,0.27,2007,3,11,9,September,Unknown/Unidentified,Contract Counties,Los Angeles County,Emma
4,5,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24,2007-11-27,14,...,7.36,2007,4,24,11,November,Unknown/Unidentified,Contract Counties,Los Angeles County,Corral


In [25]:
wildfires.to_file('/Users/mhustiles/data/data/GIS/wildfires/FRAP/output/wildfires.geojson', driver='GeoJSON')