In [1]:
# increase cell width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import gc
import time
import re
import s2_py as s2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shapefile as shp
import geopandas as gpd
from shapely.geometry import Polygon, mapping, box

### Wildfire Perimeters Data

Source: http://frap.fire.ca.gov/data/frapgisdata-sw-fireperimeters_download

Data Description: http://frap.fire.ca.gov/projects/fire_data/fire_perimeters_data_description

Incident data: http://cdfdata.fire.ca.gov/incidents/incidents_archived

In [3]:
cal_fire_hist_df = gpd.read_file("./Data/fire18_1.gdb", layer=1)
cal_fire_hist_df.shape

(20508, 18)

In [4]:
cal_fire_hist_df = cal_fire_hist_df.to_crs({'init': 'epsg:4326'})
cal_fire_hist_df.head()

Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,REPORT_AC,GIS_ACRES,C_METHOD,OBJECTIVE,FIRE_NUM,Shape_Length,Shape_Area,geometry
0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,,,25.736713,8.0,1.0,233414,1902.439051,104152.8,(POLYGON ((-118.4985124819225 34.3824189370446...
1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,,,2824.877197,8.0,1.0,233077,20407.965662,11431870.0,(POLYGON ((-118.5844782794717 34.4197766738174...
2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,,54716.0,58410.335938,7.0,1.0,166,169150.71569,236378200.0,(POLYGON ((-118.7564468802518 34.5965130520924...
3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,,,172.214951,8.0,1.0,201384,6117.777086,696929.2,(POLYGON ((-118.0727716195954 34.5019812566583...
4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,,,4707.99707,8.0,1.0,259483,22907.182174,19052590.0,(POLYGON ((-118.7440804532698 34.0812976688129...


In [5]:
cal_fire_hist_df['YEAR'] = pd.to_numeric(cal_fire_hist_df.YEAR_)
cal_fire_hist_df[cal_fire_hist_df.YEAR >= 2000].YEAR.value_counts().sort_index()

2000.0    191
2001.0    207
2002.0    243
2003.0    341
2004.0    277
2005.0    306
2006.0    315
2007.0    349
2008.0    438
2009.0    254
2010.0    209
2011.0    317
2012.0    351
2013.0    298
2014.0    238
2015.0    318
2016.0    352
2017.0    607
2018.0    413
Name: YEAR, dtype: int64

In [6]:
cal_fire_hist_2000p_df = cal_fire_hist_df[cal_fire_hist_df.YEAR >= 2000]
cal_fire_hist_2000p_df['YEAR'] = cal_fire_hist_2000p_df.YEAR.astype(int)
cal_fire_hist_2000p_df.shape

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


(6024, 19)

### Checking Widfire data quality

In [7]:
def extract_date(x):
    """Extracts year_mo_date for each wildfire"""
    if x:
        date_part = x[:10]
        return date_part.replace('-','_')
    else:
        return 'NA'
cal_fire_hist_2000p_df['ALARM_DATE_DT'] = cal_fire_hist_2000p_df.ALARM_DATE.apply(extract_date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [8]:
# cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT == '2106_09_26']
# cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT == '0208_11_13']
# cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.INC_NUM == '030251']
# cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.FIRE_NAME=='DARK HOLE']

In [9]:
# fixing fire start dates
## obvious typos
cal_fire_hist_2000p_df.loc[19127, 'ALARM_DATE_DT'] = '2016_09_26'
cal_fire_hist_2000p_df.loc[20154, 'ALARM_DATE_DT'] = '2018_11_13'
## negative fire duration made me double check these numbers
cal_fire_hist_2000p_df.loc[18906, 'ALARM_DATE_DT'] = '2015_06_19'
cal_fire_hist_2000p_df.loc[18725, 'ALARM_DATE_DT'] = '2014_07_17'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [10]:
# how many wildfires without start date?
cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT=='NA'].shape

(46, 20)

In [11]:
cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE.isna()].sort_values(by='GIS_ACRES', ascending=False).head()

Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,REPORT_AC,GIS_ACRES,C_METHOD,OBJECTIVE,FIRE_NUM,Shape_Length,Shape_Area,geometry,YEAR,ALARM_DATE_DT
12697,2016,CA,DOD,AFV,CANYON,,,2016-09-28T00:00:00,14.0,2016-CAAFV-003151 geomac,,12713.62207,8.0,1.0,,42096.094909,51450200.0,(POLYGON ((-120.5491337172958 34.6332596225124...,2016,
9362,2004,NV,USF,HTF,GATES COMPLEX,A5TO,,,,,,8905.458984,,,A5TO,37358.742867,36039110.0,(POLYGON ((-119.5536878800478 38.6921777759196...,2004,
18412,2001,CA,NPS,YNP,HOOVER,00001965,,,1.0,,7233.0,7230.623535,7.0,2.0,00001965,108315.518842,29261300.0,(POLYGON ((-119.4739094520905 37.7051893498334...,2001,
9363,2003,CA,USF,CNF,,,,,14.0,,2714.0,2713.937988,8.0,,,13963.115941,10982920.0,(POLYGON ((-116.6539116366962 33.3411918901142...,2003,
12699,2016,CA,DOD,MCP,ROBLAR,,,2016-07-25T00:00:00,14.0,2016-CAMCP-002125 geomac,,1244.5,8.0,1.0,,16578.901384,5036313.0,"(POLYGON ((-117.35411079992 33.41205605265164,...",2016,


In [12]:
# source: https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/
## CANYON 
cal_fire_hist_2000p_df.loc[12697, 'ALARM_DATE_DT'] = '2016_09_19'
## ROBLAR 
cal_fire_hist_2000p_df.loc[12699, 'ALARM_DATE_DT'] = '2016_07_23'
## SKYLINE LRA 
cal_fire_hist_2000p_df.loc[20350, 'ALARM_DATE_DT'] = '2018_07_25'
## WASHINGTON
cal_fire_hist_2000p_df.loc[12700, 'ALARM_DATE_DT'] = '2016_09_28'

# source: http://cdfdata.fire.ca.gov/incidents/incidents_archived
## SCOTT
cal_fire_hist_2000p_df.loc[19361, 'ALARM_DATE_DT'] = '2016_07_10'
## BELL
cal_fire_hist_2000p_df.loc[19364, 'ALARM_DATE_DT'] = '2016_09_23'
## AVOCADO
cal_fire_hist_2000p_df.loc[16432, 'ALARM_DATE_DT'] = '2008_05_20'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [13]:
cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT=='NA'].shape

(39, 20)

In [14]:
cal_fire_hist_2000p_df['ALARM_DATE_DT_DT'] = pd.to_datetime(cal_fire_hist_2000p_df.ALARM_DATE_DT, format='%Y_%m_%d', errors='coerce')
cal_fire_hist_2000p_df['CONT_DATE_DT_DT'] = pd.to_datetime(cal_fire_hist_2000p_df.CONT_DATE, errors='coerce')
cal_fire_hist_2000p_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,...,C_METHOD,OBJECTIVE,FIRE_NUM,Shape_Length,Shape_Area,geometry,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT
0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,,...,8.0,1.0,233414,1902.439051,104152.8,(POLYGON ((-118.4985124819225 34.3824189370446...,2007,2007_10_21,2007-10-21,2007-10-23
1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,,...,8.0,1.0,233077,20407.965662,11431870.0,(POLYGON ((-118.5844782794717 34.4197766738174...,2007,2007_10_22,2007-10-22,2007-10-25
2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,,...,7.0,1.0,166,169150.71569,236378200.0,(POLYGON ((-118.7564468802518 34.5965130520924...,2007,2007_10_20,2007-10-20,2007-11-15
3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,,...,8.0,1.0,201384,6117.777086,696929.2,(POLYGON ((-118.0727716195954 34.5019812566583...,2007,2007_09_11,2007-09-11,2007-09-11
4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,,...,8.0,1.0,259483,22907.182174,19052590.0,(POLYGON ((-118.7440804532698 34.0812976688129...,2007,2007_11_24,2007-11-24,2007-11-27


In [15]:
# fixing fire containment dates because of negative fire duration
cal_fire_hist_2000p_df.loc[18906, 'CONT_DATE_DT_DT'] = pd.to_datetime('2015-07-04', errors='coerce')
cal_fire_hist_2000p_df.loc[18725, 'CONT_DATE_DT_DT'] = pd.to_datetime('2014-08-12', errors='coerce')
cal_fire_hist_2000p_df.loc[123, 'CONT_DATE_DT_DT'] = pd.to_datetime('2006-09-17', errors='coerce')
cal_fire_hist_2000p_df.loc[123, 'YEAR'] = 2006

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [16]:
cal_fire_hist_2000p_df['FIRE_DUR'] = 1 + (cal_fire_hist_2000p_df['CONT_DATE_DT_DT'] - cal_fire_hist_2000p_df['ALARM_DATE_DT_DT'])/ np.timedelta64(1, 'D')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [17]:
cal_fire_hist_2000p_df['FIRE_DUR'].describe()

count    5621.000000
mean       10.022772
std        26.479822
min      -264.000000
25%         1.000000
50%         2.000000
75%         6.000000
max       367.000000
Name: FIRE_DUR, dtype: float64

In [18]:
cal_fire_hist_2000p_df.OBJECTIVE.value_counts()

1.0    5782
2.0      95
Name: OBJECTIVE, dtype: int64

In [19]:
cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.OBJECTIVE==2].CAUSE.value_counts()

1.0     93
9.0      1
14.0     1
Name: CAUSE, dtype: int64

**Create final wildfire dataset:**

1. Drop wildfires without start date
2. Drop wildfires not assigned to CA
3. Drop wildfires with negative duration
4. Drop wildfires with duration over 300 days
5. Create new name to account for wildfires without names
6. For wildfires with the same name/start date create unique id
7. Create custom wildfire ID

In [20]:
cal_fire_hist_2000p_clean_df = cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT!='NA']
cal_fire_hist_2000p_clean_df = cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.STATE=='CA']
cal_fire_hist_2000p_clean_df = cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.FIRE_DUR>0]
cal_fire_hist_2000p_clean_df = cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.FIRE_DUR<300]
cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'] = ['UNKNOWN' if x in ['', ' '] else x for x in cal_fire_hist_2000p_clean_df.FIRE_NAME]
cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'] = [x if x else 'UNKNOWN' for x in cal_fire_hist_2000p_clean_df.FIRE_NAME_NEW]
cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'] = cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'].apply(lambda x: re.sub(r'[^\w\s]','',x.strip().replace(' ', '_')))
cal_fire_hist_2000p_clean_df['DUP_ID'] = cal_fire_hist_2000p_clean_df.groupby(['FIRE_NAME_NEW','ALARM_DATE_DT']).cumcount()
cal_fire_hist_2000p_clean_df['FIRE_CUSTOM_ID'] = cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'] + '_' + cal_fire_hist_2000p_clean_df['ALARM_DATE_DT'] + '_' + cal_fire_hist_2000p_clean_df['DUP_ID'].astype(str)
cal_fire_hist_2000p_clean_df.reset_index(inplace=True)
cal_fire_hist_2000p_clean_df.shape

(5584, 27)

In [21]:
cal_fire_hist_2000p_clean_df.head()

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,Shape_Area,geometry,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID
0,0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,...,104152.8,(POLYGON ((-118.4985124819225 34.3824189370446...,2007,2007_10_21,2007-10-21,2007-10-23,3.0,OCTOBER,0,OCTOBER_2007_10_21_0
1,1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,...,11431870.0,(POLYGON ((-118.5844782794717 34.4197766738174...,2007,2007_10_22,2007-10-22,2007-10-25,4.0,MAGIC,0,MAGIC_2007_10_22_0
2,2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,...,236378200.0,(POLYGON ((-118.7564468802518 34.5965130520924...,2007,2007_10_20,2007-10-20,2007-11-15,27.0,RANCH,0,RANCH_2007_10_20_0
3,3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,...,696929.2,(POLYGON ((-118.0727716195954 34.5019812566583...,2007,2007_09_11,2007-09-11,2007-09-11,1.0,EMMA,0,EMMA_2007_09_11_0
4,4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,...,19052590.0,(POLYGON ((-118.7440804532698 34.0812976688129...,2007,2007_11_24,2007-11-24,2007-11-27,4.0,CORRAL,0,CORRAL_2007_11_24_0


In [22]:
cal_fire_hist_2000p_clean_df.FIRE_CUSTOM_ID.value_counts().sort_index()

09_SHU_LIGHTNING_COMPLEX_2009_08_01_0     1
09_SHU_LIGHTNING_COMPLEX_2009_08_02_0     1
10_2003_09_03_0                           1
111_RAY_SPRINGS_LIGHTNING_2008_06_26_0    1
118_FWY_2015_06_04_0                      1
121_PORK_LIGHTNING_2008_07_04_0           1
128_2002_09_04_0                          1
128_2006_07_07_0                          1
132_2004_07_23_0                          1
132_2009_06_19_0                          1
13_2013_04_29_0                           1
152_2003_06_16_0                          1
152_2004_05_01_0                          1
152_2004_05_01_1                          1
152_FIRE_2005_06_14_0                     1
155_2003_07_11_0                          1
155_FIRE_2012_02_09_0                     1
156_FIRE_2005_11_19_0                     1
165_2001_06_13_0                          1
166_2010_07_12_0                          1
16_SKUNK_2_LIGHTNING_2008_06_25_0         1
198_2010_06_06_0                          1
198_2012_01_09_0                

In [23]:
cal_fire_hist_2000p_clean_df.FIRE_CUSTOM_ID.nunique()

5584

### Loading another wildfire DB for double checking

In [24]:
hist_df = gpd.read_file("./Data/US_HIST_FIRE_PERIMTRS_DD83/US_HIST_FIRE_PERIMTRS_DD83.shp")
hist_df['state'] = hist_df.unit_id.str[:2]
hist = hist_df[hist_df.state=='CA']
hist_df.head()

Unnamed: 0,objectid,year_,acres,fire_name,unit_id,irwinid,fire_num,st_area_sh,st_length_,geometry,state
0,21999,2001,11059.673693,BACON POND,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G290,0.004912,0.464703,POLYGON ((-114.2894247559472 42.60646789757629...,ID
1,22000,2001,72.801837,S WALCOTT,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G296,3.2e-05,0.03365,POLYGON ((-113.4203092376806 42.57474289596877...,ID
2,22001,2001,15.452375,ROCK CREEK,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G303,7e-06,0.0128,POLYGON ((-114.3107468093172 42.38745396363043...,ID
3,22002,2001,101.092962,I84MP246,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G308,4.5e-05,0.051676,POLYGON ((-113.1928317623754 42.31456795155066...,ID
4,22003,2001,191.213671,HWY84MP18,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G312,8.5e-05,0.037007,POLYGON ((-113.4726872483172 42.54493802247265...,ID


In [25]:
hist_df[hist_df.fire_name=='RALSTON']

Unnamed: 0,objectid,year_,acres,fire_name,unit_id,irwinid,fire_num,st_area_sh,st_length_,geometry,state
14640,37784,2006,8447.807089,RALSTON,CA-KNP,{00000000-0000-0000-0000-000000000000},2006-CA-KNP-C0TT,0.003556,0.474796,POLYGON ((-120.7471438786865 39.00449101684785...,CA
21962,47788,2018,63.191321,RALSTON,COLSD,{C7DA352A-6CCA-4FF0-95EE-4975662CD5D2},2018-COLSD-000496,2.7e-05,0.027205,POLYGON ((-107.7267003351517 40.40124067621713...,CO


### Wildfire EDA

In [26]:
cal_fire_hist_2000p_clean_df.GIS_ACRES.describe()

count      5578.000000
mean       2227.786551
std       15381.533061
min           0.001357
25%          14.062096
50%          53.260897
75%         297.765724
max      501082.031250
Name: GIS_ACRES, dtype: float64

In [27]:
cal_fire_hist_2000p_clean_df.FIRE_DUR.describe()

count    5584.000000
mean       10.197708
std        23.915083
min         1.000000
25%         1.000000
50%         2.000000
75%         6.000000
max       223.000000
Name: FIRE_DUR, dtype: float64

In [28]:
cal_fire_hist_2000p_clean_df['ALARM_DATE_MONTH'] = cal_fire_hist_2000p_clean_df.ALARM_DATE_DT_DT.dt.month

In [29]:
cal_fire_hist_2000p_clean_df.ALARM_DATE_MONTH.value_counts().sort_index()

1       73
2       55
3       67
4      138
5      416
6     1039
7     1428
8     1031
9      775
10     342
11     153
12      67
Name: ALARM_DATE_MONTH, dtype: int64

In [30]:
cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.ALARM_DATE_MONTH==12].GIS_ACRES.describe()

count        67.000000
mean       4935.566501
std       34438.425411
min           0.001357
25%           9.684571
50%          19.825445
75%          61.843996
max      281790.875000
Name: GIS_ACRES, dtype: float64

In [31]:
cal_fire_hist_2000p_clean_df[(cal_fire_hist_2000p_clean_df.ALARM_DATE_MONTH==12) & (cal_fire_hist_2000p_clean_df.GIS_ACRES>200000)]

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,geometry,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH
5016,19929,2017,CA,USF,VNC,THOMAS,3583,2017-12-04T00:00:00,2018-01-12T00:00:00,9.0,...,(POLYGON ((-119.6204504308018 34.4438152012489...,2017,2017_12_04,2017-12-04,2018-01-12,40.0,THOMAS,0,THOMAS_2017_12_04_0,12


In [32]:
cal_fire_hist_2000p_clean_df.groupby(['ALARM_DATE_MONTH']).GIS_ACRES.mean()

ALARM_DATE_MONTH
1      278.592930
2      610.646942
3      161.534270
4      179.245391
5      571.839480
6     1672.387259
7     3138.584176
8     2390.907592
9     1554.019401
10    4566.768270
11    2122.960667
12    4935.566501
Name: GIS_ACRES, dtype: float64

### Extracting S2 Cells

In [33]:
def extract_max_polygon(fire_poly):
    """Return the largest polygon for each wildfire multipolygon"""
    fire_map = mapping(fire_poly)
    if 'coordinates' in fire_map:
        coords = fire_map['coordinates']
    elif 'features' in fire_map:
        coords = fire_map['features'][0]['geometry']['coordinates']
    
    max_poly = coords[0][0]
    for i in range(len(coords)):
        if len(coords[i][0]) > len(max_poly):
            max_poly = coords[i][0]
    return max_poly

In [34]:
cal_fire_hist_2000p_clean_df['FIRE_POLY'] = cal_fire_hist_2000p_clean_df.geometry.apply(extract_max_polygon)
cal_fire_hist_2000p_clean_df.head()

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH,FIRE_POLY
0,0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,...,2007,2007_10_21,2007-10-21,2007-10-23,3.0,OCTOBER,0,OCTOBER_2007_10_21_0,10,"((-118.4985124819225, 34.38241893704466), (-11..."
1,1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,...,2007,2007_10_22,2007-10-22,2007-10-25,4.0,MAGIC,0,MAGIC_2007_10_22_0,10,"((-118.5844782794717, 34.41977667381746), (-11..."
2,2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,...,2007,2007_10_20,2007-10-20,2007-11-15,27.0,RANCH,0,RANCH_2007_10_20_0,10,"((-118.7564468802518, 34.596513052092455), (-1..."
3,3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,...,2007,2007_09_11,2007-09-11,2007-09-11,1.0,EMMA,0,EMMA_2007_09_11_0,9,"((-118.07277161959536, 34.5019812566583), (-11..."
4,4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,...,2007,2007_11_24,2007-11-24,2007-11-27,4.0,CORRAL,0,CORRAL_2007_11_24_0,11,"((-118.7440804532698, 34.08129766881295), (-11..."


In [35]:
def create_S2_loop(max_poly):
    """Converts Polygon into S2 Loop"""
    points = []
    for coord in tuple(reversed(max_poly)):
        long, lat = coord
        latlng = s2.S2LatLng.FromDegrees(lat, long)
        points.append(latlng.ToPoint())
    return s2.S2Loop(points)

In [36]:
cal_fire_hist_2000p_clean_df['FIRE_S2_LOOP'] = cal_fire_hist_2000p_clean_df.FIRE_POLY.apply(create_S2_loop)
cal_fire_hist_2000p_clean_df.head()

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH,FIRE_POLY,FIRE_S2_LOOP
0,0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,...,2007_10_21,2007-10-21,2007-10-23,3.0,OCTOBER,0,OCTOBER_2007_10_21_0,10,"((-118.4985124819225, 34.38241893704466), (-11...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
1,1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,...,2007_10_22,2007-10-22,2007-10-25,4.0,MAGIC,0,MAGIC_2007_10_22_0,10,"((-118.5844782794717, 34.41977667381746), (-11...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
2,2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,...,2007_10_20,2007-10-20,2007-11-15,27.0,RANCH,0,RANCH_2007_10_20_0,10,"((-118.7564468802518, 34.596513052092455), (-1...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
3,3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,...,2007_09_11,2007-09-11,2007-09-11,1.0,EMMA,0,EMMA_2007_09_11_0,9,"((-118.07277161959536, 34.5019812566583), (-11...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
4,4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,...,2007_11_24,2007-11-24,2007-11-27,4.0,CORRAL,0,CORRAL_2007_11_24_0,11,"((-118.7440804532698, 34.08129766881295), (-11...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...


In [37]:
def create_S2_coverer(loop, lvl):
    """Generates a list of S2 Cells of specified level"""
    coverer = s2.S2RegionCoverer()
    coverer.set_min_level(lvl)
    coverer.set_max_level(lvl)
    return coverer.GetCovering(loop)

In [38]:
# coordinates create incorrect loops
trouble_loops = [
    'MILLERTON_2008_06_18_0',
    'OLSEN_2015_07_30_0',
    'POTRERO_2015_05_01_0',
    'RIVER_2016_06_26_0',
    'FREEDOM_2017_07_04_0'
]
trouble_loops_old_index = [
    16181,
    19004,
    19037,
    19163,
    19854
]

In [39]:
cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.FIRE_CUSTOM_ID.isin(trouble_loops)]

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH,FIRE_POLY,FIRE_S2_LOOP
1994,16181,2008,CA,CDF,MMU,MILLERTON,7784,2008-06-18T00:00:00,2008-06-19T00:00:00,14.0,...,2008_06_18,2008-06-18,2008-06-19,2.0,MILLERTON,0,MILLERTON_2008_06_18_0,6,"((-119.69404901416097, 37.03096503161547), (-1...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
4194,19004,2015,CA,USF,SHF,OLSEN,2108,2015-07-30T00:00:00,2015-08-13T00:00:00,1.0,...,2015_07_30,2015-07-30,2015-08-13,15.0,OLSEN,0,OLSEN_2015_07_30_0,7,"((-123.42975081422335, 40.724486208815804), (-...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
4225,19037,2015,CA,CCO,VNC,POTRERO,28865,2015-05-01T00:00:00,2015-05-01T00:00:00,14.0,...,2015_05_01,2015-05-01,2015-05-01,1.0,POTRERO,0,POTRERO_2015_05_01_0,5,"((-118.87445906614587, 34.14524176133166), (-1...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
4344,19163,2016,CA,CDF,SLU,RIVER,6897,2016-06-26T00:00:00,2016-06-26T00:00:00,9.0,...,2016_06_26,2016-06-26,2016-06-26,1.0,RIVER,0,RIVER_2016_06_26_0,6,"((-120.68454700020713, 35.676515000013474), (-...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
4942,19854,2017,CA,CDF,FKU,FREEDOM,9398,2017-07-04T00:00:00,2017-07-04T00:00:00,2.0,...,2017_07_04,2017-07-04,2017-07-04,1.0,FREEDOM,0,FREEDOM_2017_07_04_0,7,"((-119.51218452463861, 36.82090004224125), (-1...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...


In [40]:
trouble_loops_new_index = cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.FIRE_CUSTOM_ID.isin(trouble_loops)].index
cal_fire_hist_2000p_clean2_df = cal_fire_hist_2000p_clean_df.drop(index=trouble_loops_new_index).reset_index()
cal_fire_hist_2000p_clean2_df.drop(columns=['level_0', 'index'], inplace=True)
cal_fire_hist_2000p_clean2_df.shape

(5579, 29)

In [41]:
cal_fire_hist_2000p_clean2_df.tail()

Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,...,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH,FIRE_POLY,FIRE_S2_LOOP
5574,2018,CA,CDF,SLU,RESERVOIR,10073,2018-09-16T00:00:00,2018-09-16T00:00:00,11.0,Electrical Power,...,2018_09_16,2018-09-16,2018-09-16,1.0,RESERVOIR,0,RESERVOIR_2018_09_16_0,9,"((-120.63711999984076, 35.304379999726656), (-...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
5575,2018,CA,CDF,SLU,COLONY,10007,2018-09-14T00:00:00,2018-09-14T00:00:00,11.0,Electrical Power,...,2018_09_14,2018-09-14,2018-09-14,1.0,COLONY,0,COLONY_2018_09_14_0,9,"((-120.6971999995147, 35.328551999973335), (-1...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
5576,2018,CA,CDF,SLU,CAMINO,11247,2018-10-19T00:00:00,2018-10-19T00:00:00,10.0,No damage to structures or infrastructure,...,2018_10_19,2018-10-19,2018-10-19,1.0,CAMINO,0,CAMINO_2018_10_19_0,10,"((-120.48996000052175, 35.178769999849386), (-...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
5577,2018,CA,BIA,HIA,SIGNBOARD,174,2018-11-16T00:00:00,2018-11-17T00:00:00,,,...,2018_11_16,2018-11-16,2018-11-17,2.0,SIGNBOARD,0,SIGNBOARD_2018_11_16_0,11,"((-123.56902470487695, 41.03555498079958), (-1...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
5578,2018,CA,BIA,HIA,MILL CREEK 1,115,2018-09-05T00:00:00,2018-09-06T00:00:00,,,...,2018_09_05,2018-09-05,2018-09-06,2.0,MILL_CREEK_1,0,MILL_CREEK_1_2018_09_05_0,9,"((-123.63136870902643, 41.161147784378954), (-...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...


## S2 Cell Level 14: 0.32 sq km or 74 acres

In [58]:
start = time.time()
cal_fire_hist_2000p_clean2_df['S2_Cells'] = cal_fire_hist_2000p_clean2_df.FIRE_S2_LOOP.apply(create_S2_coverer, args=[13])
print(time.time()-start)

0.20633697509765625


In [59]:
cal_fire_hist_2000p_clean2_df.head()

Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,...,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH,FIRE_POLY,FIRE_S2_LOOP,S2_Cells
0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,,...,2007-10-21,2007-10-23,3.0,OCTOBER,0,OCTOBER_2007_10_21_0,10,"((-118.4985124819225, 34.38241893704466), (-11...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...,"(4/0012011003000�, 4/0012011003001�, 4/0012011..."
1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,,...,2007-10-22,2007-10-25,4.0,MAGIC,0,MAGIC_2007_10_22_0,10,"((-118.5844782794717, 34.41977667381746), (-11...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...,"(4/0012010333322�, 4/0012010333323�, 4/0012010..."
2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,,...,2007-10-20,2007-11-15,27.0,RANCH,0,RANCH_2007_10_20_0,10,"((-118.7564468802518, 34.596513052092455), (-1...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...,"(4/0012010330100�, 4/0012010330102�, 4/0012010..."
3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,,...,2007-09-11,2007-09-11,1.0,EMMA,0,EMMA_2007_09_11_0,9,"((-118.07277161959536, 34.5019812566583), (-11...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...,"(4/0012011330201�, 4/0012011330310�)"
4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,,...,2007-11-24,2007-11-27,4.0,CORRAL,0,CORRAL_2007_11_24_0,11,"((-118.7440804532698, 34.08129766881295), (-11...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...,"(4/0013100030111�, 4/0013100030112�, 4/0013100..."


In [44]:
# for i in range(4703,5960):
#     temp = cal_fire_hist_2000p_clean_df.loc[i, 'FIRE_S2_LOOP']
#     print(i)
#     create_S2_coverer(temp, 11)

In [45]:
def split_data_frame_list(df, target_column, row_id):
    """
    Splits a column with lists into rows
    
    Keyword arguments:
        df -- dataframe
        target_column -- name of column that contains lists        
    """
    
    # create a new dataframe with each item in a seperate column, dropping rows with missing values
    col_df = pd.DataFrame(df[target_column].tolist())\
                .join(df)\
                .drop(columns=[target_column])\
                .set_index(row_id)

    # create a series with columns stacked as rows         
    stacked = col_df.stack()\
                    .reset_index()\
                    .drop(columns='level_1')
    stacked.columns = [row_id, target_column]

    return stacked

In [60]:
cal_fire_hist_2000p_clean2_df.shape

(5579, 30)

In [61]:
cal_fire_s2_df = cal_fire_hist_2000p_clean2_df[['FIRE_CUSTOM_ID', 'S2_Cells']]
cal_fire_s2_df = split_data_frame_list(cal_fire_s2_df, 'S2_Cells', 'FIRE_CUSTOM_ID')
cal_fire_s2_df['S2_Cells_ID'] = cal_fire_s2_df.S2_Cells.apply(lambda x: x.ToToken())
cal_fire_s2_df.shape

(75642, 3)

In [62]:
cal_fire_s2_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells,S2_Cells_ID
0,OCTOBER_2007_10_21_0,4/0012011003000�,80c28604
1,OCTOBER_2007_10_21_0,4/0012011003001�,80c2860c
2,OCTOBER_2007_10_21_0,4/0012011003002�,80c28614
3,OCTOBER_2007_10_21_0,4/0012011003003�,80c2861c
4,MAGIC_2007_10_22_0,4/0012010333322�,80c27fd4


In [63]:
cal_fire_s2_df.groupby(['FIRE_CUSTOM_ID']).count().describe()

Unnamed: 0,S2_Cells,S2_Cells_ID
count,5579.0,5579.0
mean,13.558344,13.558344
std,67.717759,67.717759
min,1.0,1.0
25%,1.0,1.0
50%,3.0,3.0
75%,5.0,5.0
max,2087.0,2087.0


In [64]:
len(set(cal_fire_s2_df.S2_Cells_ID))

63603

In [45]:
# cal_fire_hist_2000p_clean2_df['S2_Cells_2lvldown'] = cal_fire_hist_2000p_clean2_df.FIRE_S2_LOOP.apply(create_S2_coverer, args=[16])
# cal_fire_s2_df2 = cal_fire_hist_2000p_clean2_df[['FIRE_CUSTOM_ID', 'S2_Cells_2lvldown']]
# cal_fire_s2_df2 = split_data_frame_list(cal_fire_s2_df2, 'S2_Cells_2lvldown', 'FIRE_CUSTOM_ID')
# cal_fire_s2_df2.shape

In [65]:
keep_cols = [
    'AGENCY',
    'UNIT_ID',
    'CAUSE',
#     'COMMENTS',
    'GIS_ACRES',
    'C_METHOD',
    'OBJECTIVE',
    'Shape_Length',
    'Shape_Area',
#     'geometry',
    'YEAR',
    'ALARM_DATE_DT_DT',
    'CONT_DATE_DT_DT',
    'FIRE_DUR',
    'FIRE_CUSTOM_ID',
    'ALARM_DATE_MONTH'
]

In [66]:
cal_fire_s2id_df = cal_fire_s2_df[['FIRE_CUSTOM_ID', 'S2_Cells_ID']].set_index('FIRE_CUSTOM_ID').join(cal_fire_hist_2000p_clean2_df[keep_cols].set_index('FIRE_CUSTOM_ID'), on='FIRE_CUSTOM_ID', how='left').reset_index()
cal_fire_s2id_df.shape

(75642, 15)

In [68]:
cal_fire_s2id_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells_ID,AGENCY,UNIT_ID,CAUSE,GIS_ACRES,C_METHOD,OBJECTIVE,Shape_Length,Shape_Area,YEAR,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,ALARM_DATE_MONTH
0,OCTOBER_2007_10_21_0,80c28604,CCO,LAC,14.0,25.736713,8.0,1.0,1902.439051,104152.8,2007,2007-10-21,2007-10-23,3.0,10
1,OCTOBER_2007_10_21_0,80c2860c,CCO,LAC,14.0,25.736713,8.0,1.0,1902.439051,104152.8,2007,2007-10-21,2007-10-23,3.0,10
2,OCTOBER_2007_10_21_0,80c28614,CCO,LAC,14.0,25.736713,8.0,1.0,1902.439051,104152.8,2007,2007-10-21,2007-10-23,3.0,10
3,OCTOBER_2007_10_21_0,80c2861c,CCO,LAC,14.0,25.736713,8.0,1.0,1902.439051,104152.8,2007,2007-10-21,2007-10-23,3.0,10
4,MAGIC_2007_10_22_0,80c27fd4,CCO,LAC,14.0,2824.877197,8.0,1.0,20407.965662,11431870.0,2007,2007-10-22,2007-10-25,4.0,10


In [69]:
cal_fire_s2id_df.to_csv('./Data/Processed/WildFire_S2Cells13_nogeom.csv', index=False)

In [56]:
for i in list(cal_fire_s2id_df):
    print(i, ":")

FIRE_CUSTOM_ID :
S2_Cells_ID :
AGENCY :
UNIT_ID :
CAUSE :
GIS_ACRES :
C_METHOD :
OBJECTIVE :
Shape_Length :
Shape_Area :
YEAR :
ALARM_DATE_DT_DT :
CONT_DATE_DT_DT :
FIRE_DUR :
ALARM_DATE_MONTH :


In [165]:
1+1

2