In [105]:
# increase cell width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [106]:
import gc
import time
import re
import s2_py as s2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shapefile as shp
import geopandas as gpd
from shapely.geometry import Polygon, mapping, box
from datetime import date, timedelta

### Wildfire Perimeters Data

Source: https://frap.fire.ca.gov/media/2525/fire18_1.zip

Data Description: https://frap.fire.ca.gov/frap-projects/fire-perimeters/

Incident data: https://www.fire.ca.gov/incidents/

In [107]:
cal_fire_hist_df = gpd.read_file("./Data/fire18_1.gdb", layer=1)
cal_fire_hist_df.shape

(20508, 18)

In [108]:
cal_fire_hist_df = cal_fire_hist_df.to_crs({'init': 'epsg:4326'})
cal_fire_hist_df.head()

Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,REPORT_AC,GIS_ACRES,C_METHOD,OBJECTIVE,FIRE_NUM,Shape_Length,Shape_Area,geometry
0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,,,25.736713,8.0,1.0,233414,1902.439051,104152.8,(POLYGON ((-118.4985124819225 34.3824189370446...
1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,,,2824.877197,8.0,1.0,233077,20407.965662,11431870.0,(POLYGON ((-118.5844782794717 34.4197766738174...
2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,,54716.0,58410.335938,7.0,1.0,166,169150.71569,236378200.0,(POLYGON ((-118.7564468802518 34.5965130520924...
3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,,,172.214951,8.0,1.0,201384,6117.777086,696929.2,(POLYGON ((-118.0727716195954 34.5019812566583...
4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,,,4707.99707,8.0,1.0,259483,22907.182174,19052590.0,(POLYGON ((-118.7440804532698 34.0812976688129...


In [109]:
cal_fire_hist_df['YEAR'] = pd.to_numeric(cal_fire_hist_df.YEAR_)
cal_fire_hist_df[cal_fire_hist_df.YEAR >= 2000].YEAR.value_counts().sort_index()

2000.0    191
2001.0    207
2002.0    243
2003.0    341
2004.0    277
2005.0    306
2006.0    315
2007.0    349
2008.0    438
2009.0    254
2010.0    209
2011.0    317
2012.0    351
2013.0    298
2014.0    238
2015.0    318
2016.0    352
2017.0    607
2018.0    413
Name: YEAR, dtype: int64

In [110]:
cal_fire_hist_2000p_df = cal_fire_hist_df[cal_fire_hist_df.YEAR >= 2000]
cal_fire_hist_2000p_df['YEAR'] = cal_fire_hist_2000p_df.YEAR.astype(int)
cal_fire_hist_2000p_df.shape

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


(6024, 19)

### Checking Widfire data quality

In [111]:
def extract_date(x):
    """Extracts year_mo_date for each wildfire"""
    if x:
        date_part = x[:10]
        return date_part.replace('-','_')
    else:
        return 'NA'
cal_fire_hist_2000p_df['ALARM_DATE_DT'] = cal_fire_hist_2000p_df.ALARM_DATE.apply(extract_date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [112]:
# cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT == '2106_09_26']
# cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT == '0208_11_13']
# cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.INC_NUM == '030251']
# cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.FIRE_NAME=='DARK HOLE']

In [113]:
# fixing fire start dates
## obvious typos
cal_fire_hist_2000p_df.loc[19127, 'ALARM_DATE_DT'] = '2016_09_26'
cal_fire_hist_2000p_df.loc[20154, 'ALARM_DATE_DT'] = '2018_11_13'
## negative fire duration made me double check these numbers
cal_fire_hist_2000p_df.loc[18906, 'ALARM_DATE_DT'] = '2015_06_19'
cal_fire_hist_2000p_df.loc[18725, 'ALARM_DATE_DT'] = '2014_07_17'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [114]:
# how many wildfires without start date?
cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT=='NA'].shape

(46, 20)

In [115]:
cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE.isna()].sort_values(by='GIS_ACRES', ascending=False).head()

Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,REPORT_AC,GIS_ACRES,C_METHOD,OBJECTIVE,FIRE_NUM,Shape_Length,Shape_Area,geometry,YEAR,ALARM_DATE_DT
12697,2016,CA,DOD,AFV,CANYON,,,2016-09-28T00:00:00,14.0,2016-CAAFV-003151 geomac,,12713.62207,8.0,1.0,,42096.094909,51450200.0,(POLYGON ((-120.5491337172958 34.6332596225124...,2016,
9362,2004,NV,USF,HTF,GATES COMPLEX,A5TO,,,,,,8905.458984,,,A5TO,37358.742867,36039110.0,(POLYGON ((-119.5536878800478 38.6921777759196...,2004,
18412,2001,CA,NPS,YNP,HOOVER,00001965,,,1.0,,7233.0,7230.623535,7.0,2.0,00001965,108315.518842,29261300.0,(POLYGON ((-119.4739094520905 37.7051893498334...,2001,
9363,2003,CA,USF,CNF,,,,,14.0,,2714.0,2713.937988,8.0,,,13963.115941,10982920.0,(POLYGON ((-116.6539116366962 33.3411918901142...,2003,
12699,2016,CA,DOD,MCP,ROBLAR,,,2016-07-25T00:00:00,14.0,2016-CAMCP-002125 geomac,,1244.5,8.0,1.0,,16578.901384,5036313.0,"(POLYGON ((-117.35411079992 33.41205605265164,...",2016,


In [116]:
# source: https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/
## CANYON 
cal_fire_hist_2000p_df.loc[12697, 'ALARM_DATE_DT'] = '2016_09_19'
## ROBLAR 
cal_fire_hist_2000p_df.loc[12699, 'ALARM_DATE_DT'] = '2016_07_23'
## SKYLINE LRA 
cal_fire_hist_2000p_df.loc[20350, 'ALARM_DATE_DT'] = '2018_07_25'
## WASHINGTON
cal_fire_hist_2000p_df.loc[12700, 'ALARM_DATE_DT'] = '2016_09_28'

# source: http://cdfdata.fire.ca.gov/incidents/incidents_archived
## SCOTT
cal_fire_hist_2000p_df.loc[19361, 'ALARM_DATE_DT'] = '2016_07_10'
## BELL
cal_fire_hist_2000p_df.loc[19364, 'ALARM_DATE_DT'] = '2016_09_23'
## AVOCADO
cal_fire_hist_2000p_df.loc[16432, 'ALARM_DATE_DT'] = '2008_05_20'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [117]:
cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT=='NA'].shape

(39, 20)

In [118]:
cal_fire_hist_2000p_df['ALARM_DATE_DT_DT'] = pd.to_datetime(cal_fire_hist_2000p_df.ALARM_DATE_DT, format='%Y_%m_%d', errors='coerce')
cal_fire_hist_2000p_df['CONT_DATE_DT_DT'] = pd.to_datetime(cal_fire_hist_2000p_df.CONT_DATE, errors='coerce')
cal_fire_hist_2000p_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,...,C_METHOD,OBJECTIVE,FIRE_NUM,Shape_Length,Shape_Area,geometry,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT
0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,,...,8.0,1.0,233414,1902.439051,104152.8,(POLYGON ((-118.4985124819225 34.3824189370446...,2007,2007_10_21,2007-10-21,2007-10-23
1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,,...,8.0,1.0,233077,20407.965662,11431870.0,(POLYGON ((-118.5844782794717 34.4197766738174...,2007,2007_10_22,2007-10-22,2007-10-25
2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,,...,7.0,1.0,166,169150.71569,236378200.0,(POLYGON ((-118.7564468802518 34.5965130520924...,2007,2007_10_20,2007-10-20,2007-11-15
3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,,...,8.0,1.0,201384,6117.777086,696929.2,(POLYGON ((-118.0727716195954 34.5019812566583...,2007,2007_09_11,2007-09-11,2007-09-11
4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,,...,8.0,1.0,259483,22907.182174,19052590.0,(POLYGON ((-118.7440804532698 34.0812976688129...,2007,2007_11_24,2007-11-24,2007-11-27


In [119]:
# fixing fire containment dates because of negative fire duration
cal_fire_hist_2000p_df.loc[18906, 'CONT_DATE_DT_DT'] = pd.to_datetime('2015-07-04', errors='coerce')
cal_fire_hist_2000p_df.loc[18725, 'CONT_DATE_DT_DT'] = pd.to_datetime('2014-08-12', errors='coerce')
cal_fire_hist_2000p_df.loc[123, 'CONT_DATE_DT_DT'] = pd.to_datetime('2006-09-17', errors='coerce')
cal_fire_hist_2000p_df.loc[123, 'YEAR'] = 2006

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [120]:
cal_fire_hist_2000p_df['FIRE_DUR'] = 1 + (cal_fire_hist_2000p_df['CONT_DATE_DT_DT'] - cal_fire_hist_2000p_df['ALARM_DATE_DT_DT'])/ np.timedelta64(1, 'D')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [121]:
cal_fire_hist_2000p_df['FIRE_DUR'].describe()

count    5621.000000
mean       10.022772
std        26.479822
min      -264.000000
25%         1.000000
50%         2.000000
75%         6.000000
max       367.000000
Name: FIRE_DUR, dtype: float64

In [122]:
cal_fire_hist_2000p_df.OBJECTIVE.value_counts()

1.0    5782
2.0      95
Name: OBJECTIVE, dtype: int64

In [123]:
cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.OBJECTIVE==2].CAUSE.value_counts()

1.0     93
9.0      1
14.0     1
Name: CAUSE, dtype: int64

**Create final wildfire dataset:**

1. Drop wildfires without start date
2. Drop wildfires not assigned to CA
3. Drop wildfires with negative duration
4. Drop wildfires with duration over 300 days
5. Create new name to account for wildfires without names
6. For wildfires with the same name/start date create unique id
7. Create custom wildfire ID

In [124]:
cal_fire_hist_2000p_clean_df = cal_fire_hist_2000p_df[cal_fire_hist_2000p_df.ALARM_DATE_DT!='NA']
cal_fire_hist_2000p_clean_df = cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.STATE=='CA']
cal_fire_hist_2000p_clean_df = cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.FIRE_DUR>0]
cal_fire_hist_2000p_clean_df = cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.FIRE_DUR<300]
cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'] = ['UNKNOWN' if x in ['', ' '] else x for x in cal_fire_hist_2000p_clean_df.FIRE_NAME]
cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'] = [x if x else 'UNKNOWN' for x in cal_fire_hist_2000p_clean_df.FIRE_NAME_NEW]
cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'] = cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'].apply(lambda x: re.sub(r'[^\w\s]','',x.strip().replace(' ', '_')))
cal_fire_hist_2000p_clean_df['DUP_ID'] = cal_fire_hist_2000p_clean_df.groupby(['FIRE_NAME_NEW','ALARM_DATE_DT']).cumcount()
cal_fire_hist_2000p_clean_df['FIRE_CUSTOM_ID'] = cal_fire_hist_2000p_clean_df['FIRE_NAME_NEW'] + '_' + cal_fire_hist_2000p_clean_df['ALARM_DATE_DT'] + '_' + cal_fire_hist_2000p_clean_df['DUP_ID'].astype(str)
cal_fire_hist_2000p_clean_df.reset_index(inplace=True)
cal_fire_hist_2000p_clean_df.shape

(5584, 27)

In [125]:
cal_fire_hist_2000p_clean_df.head()

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,Shape_Area,geometry,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID
0,0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,...,104152.8,(POLYGON ((-118.4985124819225 34.3824189370446...,2007,2007_10_21,2007-10-21,2007-10-23,3.0,OCTOBER,0,OCTOBER_2007_10_21_0
1,1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,...,11431870.0,(POLYGON ((-118.5844782794717 34.4197766738174...,2007,2007_10_22,2007-10-22,2007-10-25,4.0,MAGIC,0,MAGIC_2007_10_22_0
2,2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,...,236378200.0,(POLYGON ((-118.7564468802518 34.5965130520924...,2007,2007_10_20,2007-10-20,2007-11-15,27.0,RANCH,0,RANCH_2007_10_20_0
3,3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,...,696929.2,(POLYGON ((-118.0727716195954 34.5019812566583...,2007,2007_09_11,2007-09-11,2007-09-11,1.0,EMMA,0,EMMA_2007_09_11_0
4,4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,...,19052590.0,(POLYGON ((-118.7440804532698 34.0812976688129...,2007,2007_11_24,2007-11-24,2007-11-27,4.0,CORRAL,0,CORRAL_2007_11_24_0


In [126]:
cal_fire_hist_2000p_clean_df.FIRE_CUSTOM_ID.value_counts().sort_index()

09_SHU_LIGHTNING_COMPLEX_2009_08_01_0     1
09_SHU_LIGHTNING_COMPLEX_2009_08_02_0     1
10_2003_09_03_0                           1
111_RAY_SPRINGS_LIGHTNING_2008_06_26_0    1
118_FWY_2015_06_04_0                      1
121_PORK_LIGHTNING_2008_07_04_0           1
128_2002_09_04_0                          1
128_2006_07_07_0                          1
132_2004_07_23_0                          1
132_2009_06_19_0                          1
13_2013_04_29_0                           1
152_2003_06_16_0                          1
152_2004_05_01_0                          1
152_2004_05_01_1                          1
152_FIRE_2005_06_14_0                     1
155_2003_07_11_0                          1
155_FIRE_2012_02_09_0                     1
156_FIRE_2005_11_19_0                     1
165_2001_06_13_0                          1
166_2010_07_12_0                          1
16_SKUNK_2_LIGHTNING_2008_06_25_0         1
198_2010_06_06_0                          1
198_2012_01_09_0                

In [127]:
cal_fire_hist_2000p_clean_df.FIRE_CUSTOM_ID.nunique()

5584

### Loading another wildfire DB for double checking

In [128]:
hist_df = gpd.read_file("./Data/US_HIST_FIRE_PERIMTRS_DD83/US_HIST_FIRE_PERIMTRS_DD83.shp")
hist_df['state'] = hist_df.unit_id.str[:2]
hist = hist_df[hist_df.state=='CA']
hist_df.head()

Unnamed: 0,objectid,year_,acres,fire_name,unit_id,irwinid,fire_num,st_area_sh,st_length_,geometry,state
0,21999,2001,11059.673693,BACON POND,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G290,0.004912,0.464703,POLYGON ((-114.2894247559472 42.60646789757629...,ID
1,22000,2001,72.801837,S WALCOTT,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G296,3.2e-05,0.03365,POLYGON ((-113.4203092376806 42.57474289596877...,ID
2,22001,2001,15.452375,ROCK CREEK,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G303,7e-06,0.0128,POLYGON ((-114.3107468093172 42.38745396363043...,ID
3,22002,2001,101.092962,I84MP246,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G308,4.5e-05,0.051676,POLYGON ((-113.1928317623754 42.31456795155066...,ID
4,22003,2001,191.213671,HWY84MP18,ID-SID,{00000000-0000-0000-0000-000000000000},2001-ID-SID-G312,8.5e-05,0.037007,POLYGON ((-113.4726872483172 42.54493802247265...,ID


In [129]:
hist_df[hist_df.fire_name=='RALSTON']

Unnamed: 0,objectid,year_,acres,fire_name,unit_id,irwinid,fire_num,st_area_sh,st_length_,geometry,state
14640,37784,2006,8447.807089,RALSTON,CA-KNP,{00000000-0000-0000-0000-000000000000},2006-CA-KNP-C0TT,0.003556,0.474796,POLYGON ((-120.7471438786865 39.00449101684785...,CA
21962,47788,2018,63.191321,RALSTON,COLSD,{C7DA352A-6CCA-4FF0-95EE-4975662CD5D2},2018-COLSD-000496,2.7e-05,0.027205,POLYGON ((-107.7267003351517 40.40124067621713...,CO


### Wildfire EDA

In [130]:
cal_fire_hist_2000p_clean_df.GIS_ACRES.describe()

count      5578.000000
mean       2227.786551
std       15381.533061
min           0.001357
25%          14.062096
50%          53.260897
75%         297.765724
max      501082.031250
Name: GIS_ACRES, dtype: float64

In [131]:
cal_fire_hist_2000p_clean_df.FIRE_DUR.describe()

count    5584.000000
mean       10.197708
std        23.915083
min         1.000000
25%         1.000000
50%         2.000000
75%         6.000000
max       223.000000
Name: FIRE_DUR, dtype: float64

In [132]:
cal_fire_hist_2000p_clean_df['ALARM_DATE_MONTH'] = cal_fire_hist_2000p_clean_df.ALARM_DATE_DT_DT.dt.month

In [133]:
cal_fire_hist_2000p_clean_df.ALARM_DATE_MONTH.value_counts().sort_index()

1       73
2       55
3       67
4      138
5      416
6     1039
7     1428
8     1031
9      775
10     342
11     153
12      67
Name: ALARM_DATE_MONTH, dtype: int64

In [134]:
cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.ALARM_DATE_MONTH==12].GIS_ACRES.describe()

count        67.000000
mean       4935.566501
std       34438.425411
min           0.001357
25%           9.684571
50%          19.825445
75%          61.843996
max      281790.875000
Name: GIS_ACRES, dtype: float64

In [135]:
cal_fire_hist_2000p_clean_df[(cal_fire_hist_2000p_clean_df.ALARM_DATE_MONTH==12) & (cal_fire_hist_2000p_clean_df.GIS_ACRES>200000)]

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,geometry,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH
5016,19929,2017,CA,USF,VNC,THOMAS,3583,2017-12-04T00:00:00,2018-01-12T00:00:00,9.0,...,(POLYGON ((-119.6204504308018 34.4438152012489...,2017,2017_12_04,2017-12-04,2018-01-12,40.0,THOMAS,0,THOMAS_2017_12_04_0,12


In [136]:
cal_fire_hist_2000p_clean_df.groupby(['ALARM_DATE_MONTH']).GIS_ACRES.mean()

ALARM_DATE_MONTH
1      278.592930
2      610.646942
3      161.534270
4      179.245391
5      571.839480
6     1672.387259
7     3138.584176
8     2390.907592
9     1554.019401
10    4566.768270
11    2122.960667
12    4935.566501
Name: GIS_ACRES, dtype: float64

### Extracting S2 Cells

In [137]:
def create_S2_loop(geometry):
    """Creates a list of S2 Loop for polygons/multipolygons"""
    s2_loops = []
    mapping_dict = mapping(geometry)
    coords = mapping_dict['coordinates']
    if mapping_dict['type']=='Polygon':
        points = []
        for coord in tuple(reversed(coords[0])):
            long, lat = coord
            latlng = s2.S2LatLng.FromDegrees(lat, long)
            points.append(latlng.ToPoint())        
        s2_loops.append(s2.S2Loop(points))
    elif mapping_dict['type']=='MultiPolygon':
        for coord in coords:
            points = []
            for subcoord in tuple(reversed(coord[0])):
                long, lat = subcoord
                latlng = s2.S2LatLng.FromDegrees(lat, long)
                points.append(latlng.ToPoint())
            s2_loops.append(s2.S2Loop(points))
    return s2_loops

def split_data_frame_list(df, target_column, row_id):
    """
    Splits a column with lists into rows
    
    Keyword arguments:
        df -- dataframe
        target_column -- name of column that contains lists        
    """
    
    # create a new dataframe with each item in a seperate column, dropping rows with missing values
    col_df = pd.DataFrame(df[target_column].tolist())\
                .join(df)\
                .drop(columns=[target_column])\
                .set_index(row_id)

    # create a series with columns stacked as rows         
    stacked = col_df.stack()\
                    .reset_index()\
                    .drop(columns='level_1')
    stacked.columns = [row_id, target_column]

    return stacked

def create_S2_coverer(loop, lvl):
    """Generates a list of S2 Cells of specified level"""
    coverer = s2.S2RegionCoverer()
    coverer.set_min_level(lvl)
    coverer.set_max_level(lvl)
    return coverer.GetCovering(loop)

In [138]:
cal_fire_hist_2000p_clean_df['FIRE_S2_LOOP'] = cal_fire_hist_2000p_clean_df.geometry.apply(create_S2_loop)
cal_fire_hist_2000p_clean_df.head()

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH,FIRE_S2_LOOP
0,0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,...,2007,2007_10_21,2007-10-21,2007-10-23,3.0,OCTOBER,0,OCTOBER_2007_10_21_0,10,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
1,1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,...,2007,2007_10_22,2007-10-22,2007-10-25,4.0,MAGIC,0,MAGIC_2007_10_22_0,10,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
2,2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,...,2007,2007_10_20,2007-10-20,2007-11-15,27.0,RANCH,0,RANCH_2007_10_20_0,10,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
3,3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,...,2007,2007_09_11,2007-09-11,2007-09-11,1.0,EMMA,0,EMMA_2007_09_11_0,9,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
4,4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,...,2007,2007_11_24,2007-11-24,2007-11-27,4.0,CORRAL,0,CORRAL_2007_11_24_0,11,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...


In [139]:
cal_fire_hist_2000p_clean_df.FIRE_CUSTOM_ID.nunique(), cal_fire_hist_2000p_clean_df.shape[0]

(5584, 5584)

In [140]:
cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.YEAR>=2016].head()

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH,FIRE_S2_LOOP
1033,11962,2016,CA,NPS,BNP,INDIAN WELLS,970,2016-09-29T00:00:00,2016-09-30T00:00:00,14.0,...,2016,2016_09_29,2016-09-29,2016-09-30,2.0,INDIAN_WELLS,0,INDIAN_WELLS_2016_09_29_0,9,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
1034,11963,2016,CA,NPS,RNP,PELICAN,1608,2016-07-07T00:00:00,2016-07-07T00:00:00,14.0,...,2016,2016_07_07,2016-07-07,2016-07-07,1.0,PELICAN,0,PELICAN_2016_07_07_0,7,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
1035,11964,2016,CA,NPS,SMP,COAST,896,2016-12-02T00:00:00,2016-12-02T00:00:00,14.0,...,2016,2016_12_02,2016-12-02,2016-12-02,1.0,COAST,0,COAST_2016_12_02_0,12,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
1036,11965,2016,CA,NPS,SMP,LIBERTY CANYON,843,2016-11-05T00:00:00,2016-11-05T00:00:00,14.0,...,2016,2016_11_05,2016-11-05,2016-11-05,1.0,LIBERTY_CANYON,0,LIBERTY_CANYON_2016_11_05_0,11,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
1037,11966,2016,CA,NPS,SMP,MULHOLLAND,210,2016-02-25T00:00:00,2016-02-28T00:00:00,14.0,...,2016,2016_02_25,2016-02-25,2016-02-28,4.0,MULHOLLAND,0,MULHOLLAND_2016_02_25_0,2,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...


In [141]:
cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.YEAR>=2016].ALARM_DATE_DT_DT.nunique()

531

In [142]:
cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.YEAR>=2016].FIRE_CUSTOM_ID.nunique()

1353

In [143]:
cal_fire_2016p_df = cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.YEAR>=2016].reset_index()
cal_fire_2016p_df = cal_fire_2016p_df[['FIRE_CUSTOM_ID', 'FIRE_S2_LOOP']]
cal_fire_2016p_df = split_data_frame_list(cal_fire_2016p_df, 'FIRE_S2_LOOP', 'FIRE_CUSTOM_ID')
cal_fire_2016p_df.shape

(2101, 2)

In [144]:
# coordinates create incorrect loops
trouble_loops = [
    'MILLERTON_2008_06_18_0',
    'OLSEN_2015_07_30_0',
    'POTRERO_2015_05_01_0',
    'RIVER_2016_06_26_0',
    'FREEDOM_2017_07_04_0'
]

In [145]:
cal_fire_2016p_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,FIRE_S2_LOOP
0,INDIAN_WELLS_2016_09_29_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
1,INDIAN_WELLS_2016_09_29_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
2,PELICAN_2016_07_07_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
3,COAST_2016_12_02_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
4,LIBERTY_CANYON_2016_11_05_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...


In [146]:
cal_fire_2016p_df[cal_fire_2016p_df.FIRE_CUSTOM_ID.isin(trouble_loops)].shape

(2, 2)

In [147]:
trouble_loops_new_index = cal_fire_2016p_df[cal_fire_2016p_df.FIRE_CUSTOM_ID.isin(trouble_loops)].index
cal_fire_2016p_df = cal_fire_2016p_df.drop(index=trouble_loops_new_index).reset_index()
cal_fire_2016p_df.drop(columns=['index'], inplace=True)
cal_fire_2016p_df.shape

(2099, 2)

In [148]:
cal_fire_2016p_df.tail()

Unnamed: 0,FIRE_CUSTOM_ID,FIRE_S2_LOOP
2094,COLONY_2018_09_14_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
2095,COLONY_2018_09_14_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
2096,CAMINO_2018_10_19_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
2097,SIGNBOARD_2018_11_16_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...
2098,MILL_CREEK_1_2018_09_05_0,<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...


In [149]:
cal_fire_2016p_df['S2_Cells_low'] = cal_fire_2016p_df.FIRE_S2_LOOP.apply(create_S2_coverer, args=[9])
cal_fire_2016p_df['S2_Cells_med'] = cal_fire_2016p_df.FIRE_S2_LOOP.apply(create_S2_coverer, args=[10])
cal_fire_2016p_df['S2_Cells_high'] = cal_fire_2016p_df.FIRE_S2_LOOP.apply(create_S2_coverer, args=[11])

In [150]:
cal_fire_s2_low_df = cal_fire_2016p_df[['FIRE_CUSTOM_ID', 'S2_Cells_low']]
cal_fire_s2_low_df = split_data_frame_list(cal_fire_s2_low_df, 'S2_Cells_low', 'FIRE_CUSTOM_ID')
cal_fire_s2_low_df['S2_Cells_ID'] = cal_fire_s2_low_df.S2_Cells_low.apply(lambda x: x.ToToken())
## Some cells are duplicated mostly because there are usually multiple polygons per wildfire
cal_fire_s2_low_df = cal_fire_s2_low_df.drop_duplicates(subset=['FIRE_CUSTOM_ID', 'S2_Cells_ID']).reset_index().drop(columns='index')
cal_fire_s2_low_df['FZ_grp'] = 'low'
cal_fire_s2_low_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells_low,S2_Cells_ID,FZ_grp
0,INDIAN_WELLS_2016_09_29_0,2/221213110�,54cea4,low
1,PELICAN_2016_07_07_0,4/001002322�,8085d4,low
2,COAST_2016_12_02_0,4/001310013�,80e83c,low
3,LIBERTY_CANYON_2016_11_05_0,4/001310010�,80e824,low
4,MULHOLLAND_2016_02_25_0,4/001310013�,80e83c,low


In [151]:
cal_fire_s2_low_df.S2_Cells_ID.nunique(), cal_fire_s2_low_df.shape

(692, (1705, 4))

In [152]:
# pd.merge(cal_fire_s2_low_df[cal_fire_s2_low_df.S2_Cells_ID=='80dcf5'], cal_fire_hist_2000p_clean_df, on='FIRE_CUSTOM_ID', how='left').T

In [153]:
cal_fire_s2_med_df = cal_fire_2016p_df[['FIRE_CUSTOM_ID', 'S2_Cells_med']]
cal_fire_s2_med_df = split_data_frame_list(cal_fire_s2_med_df, 'S2_Cells_med', 'FIRE_CUSTOM_ID')
cal_fire_s2_med_df['S2_Cells_ID'] = cal_fire_s2_med_df.S2_Cells_med.apply(lambda x: x.ToToken())
## Some cells are duplicated mostly because there are usually multiple polygons per wildfire
cal_fire_s2_med_df = cal_fire_s2_med_df.drop_duplicates(subset=['FIRE_CUSTOM_ID', 'S2_Cells_ID']).reset_index().drop(columns='index')
cal_fire_s2_med_df['FZ_grp'] = 'med'
cal_fire_s2_med_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells_med,S2_Cells_ID,FZ_grp
0,INDIAN_WELLS_2016_09_29_0,2/2212131103�,54cea7,med
1,PELICAN_2016_07_07_0,4/0010023221�,8085d3,med
2,COAST_2016_12_02_0,4/0013100132�,80e83d,med
3,LIBERTY_CANYON_2016_11_05_0,4/0013100100�,80e821,med
4,MULHOLLAND_2016_02_25_0,4/0013100132�,80e83d,med


In [154]:
cal_fire_s2_med_df.S2_Cells_ID.nunique(), cal_fire_s2_med_df.shape

(1423, (2185, 4))

In [155]:
cal_fire_s2_high_df = cal_fire_2016p_df[['FIRE_CUSTOM_ID', 'S2_Cells_high']]
cal_fire_s2_high_df = split_data_frame_list(cal_fire_s2_high_df, 'S2_Cells_high', 'FIRE_CUSTOM_ID')
cal_fire_s2_high_df['S2_Cells_ID'] = cal_fire_s2_high_df.S2_Cells_high.apply(lambda x: x.ToToken())
## Some cells are duplicated mostly because there are usually multiple polygons per wildfire
cal_fire_s2_high_df = cal_fire_s2_high_df.drop_duplicates(subset=['FIRE_CUSTOM_ID', 'S2_Cells_ID']).reset_index().drop(columns='index')
cal_fire_s2_high_df['FZ_grp'] = 'high'
cal_fire_s2_high_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells_high,S2_Cells_ID,FZ_grp
0,INDIAN_WELLS_2016_09_29_0,2/22121311030�,54cea64,high
1,PELICAN_2016_07_07_0,4/00100232211�,8085d2c,high
2,COAST_2016_12_02_0,4/00131001323�,80e83dc,high
3,LIBERTY_CANYON_2016_11_05_0,4/00131001002�,80e8214,high
4,MULHOLLAND_2016_02_25_0,4/00131001320�,80e83c4,high


In [156]:
cal_fire_s2_high_df.S2_Cells_ID.nunique(), cal_fire_s2_high_df.shape

(2917, (3500, 4))

In [157]:
cal_fire_s2_low_ids_df = cal_fire_s2_low_df.groupby('S2_Cells_ID').FIRE_CUSTOM_ID.count().reset_index()
cal_fire_s2_med_ids_df = cal_fire_s2_med_df.groupby('S2_Cells_ID').FIRE_CUSTOM_ID.count().reset_index()
cal_fire_s2_high_ids_df = cal_fire_s2_high_df.groupby('S2_Cells_ID').FIRE_CUSTOM_ID.count().reset_index()
cal_fire_s2_low_ids_df.shape, cal_fire_s2_med_ids_df.shape, cal_fire_s2_high_ids_df.shape

((692, 2), (1423, 2), (2917, 2))

#### Checking CA S2 Cells and WIldFIre overlap

In [158]:
ca_s2_df = pd.read_csv('./Data/Processed/CA_FZ_S2Cells.csv')
ca_s2_df.shape

(10643, 8)

In [159]:
ca_s2_df.head()

Unnamed: 0,CWA,NAME,STATE_ZONE,FE_AREA,AREA,WF_cum_area,FZ_grp,S2_Cells_ID
0,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80b8a4
1,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80b8ac
2,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80b8b4
3,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80be44
4,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80be4c


In [160]:
ca_s2_df.S2_Cells_ID.nunique()

10643

In [161]:
cal_fire_fz_s2_low_df = pd.merge(ca_s2_df[ca_s2_df.FZ_grp=='low'], cal_fire_s2_low_ids_df, on='S2_Cells_ID', how='left')
cal_fire_fz_s2_med_df = pd.merge(ca_s2_df[ca_s2_df.FZ_grp=='med'], cal_fire_s2_med_ids_df, on='S2_Cells_ID', how='left')
cal_fire_fz_s2_high_df = pd.merge(ca_s2_df[ca_s2_df.FZ_grp=='high'], cal_fire_s2_high_ids_df, on='S2_Cells_ID', how='left')
cal_fire_fz_s2_low_df.shape, cal_fire_fz_s2_med_df.shape, cal_fire_fz_s2_high_df.shape

((607, 9), (3490, 9), (6546, 9))

In [162]:
cal_fire_s2_df = cal_fire_fz_s2_low_df.append(cal_fire_fz_s2_med_df).append(cal_fire_fz_s2_high_df)
cal_fire_s2_df['WildFire_count'] = cal_fire_s2_df.FIRE_CUSTOM_ID.apply(lambda x: 0 if np.isnan(x) else x)
cal_fire_s2_df['WildFire'] = cal_fire_s2_df.FIRE_CUSTOM_ID.apply(lambda x: 0 if np.isnan(x) else 1)
cal_fire_s2_df.shape

(10643, 11)

In [163]:
cal_fire_s2_df.head()

Unnamed: 0,CWA,NAME,STATE_ZONE,FE_AREA,AREA,WF_cum_area,FZ_grp,S2_Cells_ID,FIRE_CUSTOM_ID,WildFire_count,WildFire
0,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80b8a4,,0.0,0
1,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80b8ac,,0.0,0
2,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80b8b4,,0.0,0
3,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80be44,,0.0,0
4,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80be4c,,0.0,0


In [164]:
cal_fire_s2_df.WildFire.mean()

0.26073475523818473

In [165]:
cal_fire_s2_df.WildFire.sum()

2775

In [166]:
cal_fire_s2_df.WildFire_count.sum()

3717.0

In [167]:
cal_fire_s2_df.WildFire_count.sum() - cal_fire_s2_df.WildFire.sum()

942.0

In [168]:
cal_fire_s2_df.WildFire_count.sum()/(cal_fire_s2_df.shape[0]*3*365)

0.00031894395827729077

In [169]:
cal_fire_s2_df.WildFire_count.value_counts()

0.0     7868
1.0     2170
2.0      423
3.0      110
4.0       34
5.0       16
6.0       10
7.0        7
8.0        2
9.0        1
11.0       1
10.0       1
Name: WildFire_count, dtype: int64

In [170]:
cal_fire_s2_df.sort_values(by='WildFire_count', ascending=False).head(10)

Unnamed: 0,CWA,NAME,STATE_ZONE,FE_AREA,AREA,WF_cum_area,FZ_grp,S2_Cells_ID,FIRE_CUSTOM_ID,WildFire_count,WildFire
494,SGX,San Diego County Coastal Areas,CA243,sw,0.145622,31.252567,low,80dcf4,11.0,11.0,1
2418,LOX,Ventura County Coastal Valleys,CA245,sw,0.086956,4932.531543,med,80e9b5,10.0,10.0,1
2706,SGX,Orange County Inland,CA554,sw,0.138871,9673.438832,med,80dce9,9.0,9.0,1
574,MFR,Siskiyou County from the Cascade Mountains Eas...,CA284,nn,0.641824,507.997167,low,54cefc,8.0,8.0,1
6276,HNX,Central Sierra,CA592,cc,0.647504,142650.261459,high,8096edc,8.0,8.0,1
2477,LOX,Santa Monica Mountains Recreational Area,CA246,sw,0.029418,490.051569,med,80e821,7.0,7.0,1
836,STO,Central Sacramento Valley including Glenn/Colu...,CA216,cc,1.003569,33012.001532,med,809b43,7.0,7.0,1
2444,LOX,Ventura County Mountains / Los Padres National...,CA253,sw,0.248142,2374.702105,med,80e9a5,7.0,7.0,1
3351,LOX,Ventura County Interior Valleys,CA244,sw,0.074788,282050.245514,high,80e9b54,7.0,7.0,1
2685,SGX,Orange County Coastal,CA552,sw,0.036114,235.769974,med,80dcef,7.0,7.0,1


#### Checking date distribution

In [171]:
fire_dates = pd.DataFrame(cal_fire_hist_2000p_clean_df[cal_fire_hist_2000p_clean_df.YEAR>=2016].groupby('ALARM_DATE_DT_DT').FIRE_CUSTOM_ID.count()).reset_index()
fire_dates['ALARM_DATE_DT_MO'] = fire_dates.ALARM_DATE_DT_DT.dt.month
fire_dates.shape

(531, 3)

In [172]:
fire_dates.head()

Unnamed: 0,ALARM_DATE_DT_DT,FIRE_CUSTOM_ID,ALARM_DATE_DT_MO
0,2016-02-07,1,2
1,2016-02-25,1,2
2,2016-02-26,1,2
3,2016-03-01,1,3
4,2016-03-31,1,3


In [173]:
fire_dates.ALARM_DATE_DT_MO.value_counts().sort_index()

1      8
2     17
3     10
4     27
5     58
6     82
7     89
8     74
9     70
10    52
11    27
12    17
Name: ALARM_DATE_DT_MO, dtype: int64

### Saving WildFire Dataset

In [174]:
keep_cols = [
    'AGENCY',
    'UNIT_ID',
    'CAUSE',
#     'COMMENTS',
    'GIS_ACRES',
    'C_METHOD',
    'OBJECTIVE',
    'Shape_Length',
    'Shape_Area',
#     'geometry',
    'YEAR',
    'ALARM_DATE_DT_DT',
    'CONT_DATE_DT_DT',
    'FIRE_DUR',
    'FIRE_CUSTOM_ID',
    'ALARM_DATE_MONTH'
]

In [175]:
cal_fire_hist_2000p_clean_df.head()

Unnamed: 0,index,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,...,YEAR,ALARM_DATE_DT,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,FIRE_NAME_NEW,DUP_ID,FIRE_CUSTOM_ID,ALARM_DATE_MONTH,FIRE_S2_LOOP
0,0,2007,CA,CCO,LAC,OCTOBER,246393,2007-10-21T00:00:00,2007-10-23T00:00:00,14.0,...,2007,2007_10_21,2007-10-21,2007-10-23,3.0,OCTOBER,0,OCTOBER_2007_10_21_0,10,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
1,1,2007,CA,CCO,LAC,MAGIC,233077,2007-10-22T00:00:00,2007-10-25T00:00:00,14.0,...,2007,2007_10_22,2007-10-22,2007-10-25,4.0,MAGIC,0,MAGIC_2007_10_22_0,10,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
2,2,2007,CA,USF,ANF,RANCH,166,2007-10-20T00:00:00,2007-11-15T00:00:00,2.0,...,2007,2007_10_20,2007-10-20,2007-11-15,27.0,RANCH,0,RANCH_2007_10_20_0,10,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
3,3,2007,CA,CCO,LAC,EMMA,201384,2007-09-11T00:00:00,2007-09-11T00:00:00,14.0,...,2007,2007_09_11,2007-09-11,2007-09-11,1.0,EMMA,0,EMMA_2007_09_11_0,9,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...
4,4,2007,CA,CCO,LAC,CORRAL,259483,2007-11-24T00:00:00,2007-11-27T00:00:00,14.0,...,2007,2007_11_24,2007-11-24,2007-11-27,4.0,CORRAL,0,CORRAL_2007_11_24_0,11,[<s2_py.pywraps2.S2Loop; proxy of <Swig Object...


In [176]:
cal_fire_s2_low_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells_low,S2_Cells_ID,FZ_grp
0,INDIAN_WELLS_2016_09_29_0,2/221213110�,54cea4,low
1,PELICAN_2016_07_07_0,4/001002322�,8085d4,low
2,COAST_2016_12_02_0,4/001310013�,80e83c,low
3,LIBERTY_CANYON_2016_11_05_0,4/001310010�,80e824,low
4,MULHOLLAND_2016_02_25_0,4/001310013�,80e83c,low


In [177]:
cal_fire_s2_med_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells_med,S2_Cells_ID,FZ_grp
0,INDIAN_WELLS_2016_09_29_0,2/2212131103�,54cea7,med
1,PELICAN_2016_07_07_0,4/0010023221�,8085d3,med
2,COAST_2016_12_02_0,4/0013100132�,80e83d,med
3,LIBERTY_CANYON_2016_11_05_0,4/0013100100�,80e821,med
4,MULHOLLAND_2016_02_25_0,4/0013100132�,80e83d,med


In [178]:
cal_fire_s2_high_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells_high,S2_Cells_ID,FZ_grp
0,INDIAN_WELLS_2016_09_29_0,2/22121311030�,54cea64,high
1,PELICAN_2016_07_07_0,4/00100232211�,8085d2c,high
2,COAST_2016_12_02_0,4/00131001323�,80e83dc,high
3,LIBERTY_CANYON_2016_11_05_0,4/00131001002�,80e8214,high
4,MULHOLLAND_2016_02_25_0,4/00131001320�,80e83c4,high


In [179]:
cal_fire_s2_stacked_df = cal_fire_s2_low_df.drop(columns='S2_Cells_low').append(cal_fire_s2_med_df.drop(columns='S2_Cells_med')).append(cal_fire_s2_high_df.drop(columns='S2_Cells_high')).reset_index().drop(columns='index')
cal_fire_s2_stacked_df = pd.merge(cal_fire_s2_stacked_df, cal_fire_hist_2000p_clean_df[keep_cols], on='FIRE_CUSTOM_ID', how='left')
cal_fire_s2_stacked_df.shape

(7390, 16)

In [180]:
cal_fire_s2_stacked_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells_ID,FZ_grp,AGENCY,UNIT_ID,CAUSE,GIS_ACRES,C_METHOD,OBJECTIVE,Shape_Length,Shape_Area,YEAR,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,ALARM_DATE_MONTH
0,INDIAN_WELLS_2016_09_29_0,54cea4,low,NPS,BNP,14.0,0.053234,8.0,1.0,67.420823,215.429882,2016,2016-09-29,2016-09-30,2.0,9
1,PELICAN_2016_07_07_0,8085d4,low,NPS,RNP,14.0,0.498426,8.0,1.0,227.100239,2017.058906,2016,2016-07-07,2016-07-07,1.0,7
2,COAST_2016_12_02_0,80e83c,low,NPS,SMP,14.0,1.167387,8.0,1.0,461.384327,4724.249576,2016,2016-12-02,2016-12-02,1.0,12
3,LIBERTY_CANYON_2016_11_05_0,80e824,low,NPS,SMP,14.0,0.077735,8.0,1.0,63.357001,314.580697,2016,2016-11-05,2016-11-05,1.0,11
4,MULHOLLAND_2016_02_25_0,80e83c,low,NPS,SMP,14.0,11.32529,8.0,1.0,1032.685562,45831.821744,2016,2016-02-25,2016-02-28,4.0,2


In [181]:
cal_fire_s2_stacked_df.to_csv('./Data/Processed/WildFire_S2Cells_stacked.csv', index=False)

In [182]:
ca_s2_df.shape

(10643, 8)

In [183]:
ca_s2_df.head()

Unnamed: 0,CWA,NAME,STATE_ZONE,FE_AREA,AREA,WF_cum_area,FZ_grp,S2_Cells_ID
0,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80b8a4
1,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80b8ac
2,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80b8b4
3,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80be44
4,VEF,Death Valley National Park,CA227,ee,2.001602,37.487592,low,80be4c


In [184]:
cal_fire_s2_stacked_df['WildFire'] = 1
ca_s2_wf_df = pd.merge(ca_s2_df[['S2_Cells_ID']], cal_fire_s2_stacked_df.drop(columns='FZ_grp'), on='S2_Cells_ID', how='inner')
# ca_s2_wf_df.WildFire = ca_s2_wf_df.WildFire.fillna(0)
ca_s2_wf_df.shape

(3717, 16)

In [185]:
ca_s2_wf_df.head()

Unnamed: 0,S2_Cells_ID,FIRE_CUSTOM_ID,AGENCY,UNIT_ID,CAUSE,GIS_ACRES,C_METHOD,OBJECTIVE,Shape_Length,Shape_Area,YEAR,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,ALARM_DATE_MONTH,WildFire
0,80bf84,MOFFAT_2018_04_19_0,CDF,BDU,4.0,1253.924683,2.0,1.0,19468.811184,5074453.0,2018,2018-04-19,2018-04-23,5.0,4,1
1,80bf9c,MOFFAT_2018_04_19_0,CDF,BDU,4.0,1253.924683,2.0,1.0,19468.811184,5074453.0,2018,2018-04-19,2018-04-23,5.0,4,1
2,80c13c,GREAT_2016_05_13_0,BLM,CDD,4.0,10.494216,1.0,1.0,1460.354604,42468.59,2016,2016-05-13,2016-05-14,2.0,5,1
3,80c664,TECOPA_2017_09_11_0,BLM,CDD,1.0,26.993376,1.0,1.0,3280.927943,109238.3,2017,2017-09-11,2017-09-14,4.0,9,1
4,54d024,LITTLE_BUCK_2017_09_07_0,USF,KNF,1.0,113.962486,3.0,1.0,5158.41838,461189.8,2017,2017-09-07,2017-10-10,34.0,9,1


In [186]:
ca_s2_wf_df.FIRE_CUSTOM_ID.isna().sum()

0

In [187]:
ca_s2_wf_df.WildFire.sum()

3717

In [188]:
ca_s2_wf_df.shape[0] - ca_s2_df.shape[0]

-6926

In [189]:
ca_s2_wf_df.AGENCY.value_counts()

CDF    1773
USF    1065
CCO     560
NPS     126
BLM     123
LRA      34
DOD      28
BIA       8
Name: AGENCY, dtype: int64

In [190]:
agency = pd.get_dummies(ca_s2_wf_df.AGENCY)
agency.columns = ['AGENCY_'+i for i in agency.columns]
agency.head()

Unnamed: 0,AGENCY_BIA,AGENCY_BLM,AGENCY_CCO,AGENCY_CDF,AGENCY_DOD,AGENCY_LRA,AGENCY_NPS,AGENCY_USF
0,0,0,0,1,0,0,0,0
1,0,0,0,1,0,0,0,0
2,0,1,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,1


In [191]:
ca_s2_wf_df.UNIT_ID.nunique()

63

In [192]:
ca_s2_wf_df.CAUSE.value_counts()

14.0    1280
9.0      654
1.0      555
10.0     325
2.0      306
7.0      177
11.0     169
4.0      108
5.0       95
8.0       14
3.0       11
15.0       5
18.0       5
16.0       5
6.0        2
Name: CAUSE, dtype: int64

In [193]:
cause = pd.get_dummies(ca_s2_wf_df.CAUSE)
cause.columns = ['CAUSE_'+str(int(i)) for i in cause.columns]
cause.head()

Unnamed: 0,CAUSE_1,CAUSE_2,CAUSE_3,CAUSE_4,CAUSE_5,CAUSE_6,CAUSE_7,CAUSE_8,CAUSE_9,CAUSE_10,CAUSE_11,CAUSE_14,CAUSE_15,CAUSE_16,CAUSE_18
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [194]:
ca_s2_wf_df.C_METHOD.value_counts()

1.0    1819
7.0     573
3.0     438
6.0     280
2.0     266
8.0     244
4.0      75
5.0      22
Name: C_METHOD, dtype: int64

In [195]:
c_method = pd.get_dummies(ca_s2_wf_df.C_METHOD)
c_method.columns = ['C_METHOD_'+str(int(i)) for i in c_method.columns]
c_method.head()

Unnamed: 0,C_METHOD_1,C_METHOD_2,C_METHOD_3,C_METHOD_4,C_METHOD_5,C_METHOD_6,C_METHOD_7,C_METHOD_8
0,0,1,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0


In [196]:
ca_s2_wf_df.OBJECTIVE.nunique()

1

In [49]:
cal_fire_hist_df = cal_fire_hist_2000p_clean_df[['FIRE_CUSTOM_ID', 'FIRE_S2_LOOP']]
cal_fire_hist_df = split_data_frame_list(cal_fire_hist_df, 'FIRE_S2_LOOP', 'FIRE_CUSTOM_ID')
trouble_loops_old_index = cal_fire_hist_df[cal_fire_hist_df.FIRE_CUSTOM_ID.isin(trouble_loops)].index
cal_fire_hist_df = cal_fire_hist_df.drop(index=trouble_loops_old_index).reset_index()
cal_fire_hist_df.drop(columns=['index'], inplace=True)
cal_fire_hist_df['S2_Cells_low'] = cal_fire_hist_df.FIRE_S2_LOOP.apply(create_S2_coverer, args=[9])
cal_fire_hist_df['S2_Cells_med'] = cal_fire_hist_df.FIRE_S2_LOOP.apply(create_S2_coverer, args=[10])
cal_fire_hist_df['S2_Cells_high'] = cal_fire_hist_df.FIRE_S2_LOOP.apply(create_S2_coverer, args=[11])
cal_fire_hist_df.shape

(7668, 5)

In [94]:
cal_fire_s2_hist_low_df = cal_fire_hist_df[['FIRE_CUSTOM_ID', 'S2_Cells_low']]
cal_fire_s2_hist_low_df = split_data_frame_list(cal_fire_s2_hist_low_df, 'S2_Cells_low', 'FIRE_CUSTOM_ID')
cal_fire_s2_hist_low_df['S2_Cells_ID'] = cal_fire_s2_hist_low_df.S2_Cells_low.apply(lambda x: x.ToToken())
## Some cells are duplicated mostly because there are usually multiple polygons per wildfire
cal_fire_s2_hist_low_df = cal_fire_s2_hist_low_df.drop_duplicates(subset=['FIRE_CUSTOM_ID', 'S2_Cells_ID']).reset_index().drop(columns='index')
cal_fire_s2_hist_low_df = pd.merge(cal_fire_s2_hist_low_df, cal_fire_hist_2000p_clean_df[['FIRE_CUSTOM_ID', 'YEAR']], on='FIRE_CUSTOM_ID', how='left')
cal_fire_s2_hist_low_df = cal_fire_s2_hist_low_df.groupby(['S2_Cells_ID', 'YEAR']).FIRE_CUSTOM_ID.count().reset_index()
cal_fire_s2_hist_low_df = cal_fire_s2_hist_low_df[cal_fire_s2_hist_low_df.YEAR>2010]
cal_fire_s2_hist_low_df = pd.pivot_table(data=cal_fire_s2_hist_low_df,
                                         values='FIRE_CUSTOM_ID',
                                         index='S2_Cells_ID',
                                         columns='YEAR',
                                         fill_value=0).reset_index()
cal_fire_s2_hist_low_df.columns = ['S2_Cells_ID'] + ['YEAR_'+str(i) for i in cal_fire_s2_hist_low_df.columns[1:]]
cal_fire_s2_hist_low_df.head()

Unnamed: 0,S2_Cells_ID,YEAR_2011,YEAR_2012,YEAR_2013,YEAR_2014,YEAR_2015,YEAR_2016,YEAR_2017,YEAR_2018
0,54c944,0,0,0,0,0,0,1,0
1,54c94c,0,0,0,0,0,0,1,0
2,54c954,0,0,0,0,1,0,3,0
3,54c95c,0,0,0,0,0,0,3,0
4,54c964,0,0,0,0,1,0,5,0


In [96]:
cal_fire_s2_hist_low_df.sum()

S2_Cells_ID    54c94454c94c54c95454c95c54c96454c96c54c97c54ca...
YEAR_2011                                                    375
YEAR_2012                                                    418
YEAR_2013                                                    360
YEAR_2014                                                    291
YEAR_2015                                                    384
YEAR_2016                                                    413
YEAR_2017                                                    764
YEAR_2018                                                    528
dtype: object

In [97]:
cal_fire_s2_hist_low_df.shape

(923, 9)

In [98]:
cal_fire_s2_hist_med_df = cal_fire_hist_df[['FIRE_CUSTOM_ID', 'S2_Cells_med']]
cal_fire_s2_hist_med_df = split_data_frame_list(cal_fire_s2_hist_med_df, 'S2_Cells_med', 'FIRE_CUSTOM_ID')
cal_fire_s2_hist_med_df['S2_Cells_ID'] = cal_fire_s2_hist_med_df.S2_Cells_med.apply(lambda x: x.ToToken())
## Some cells are duplicated mostly because there are usually multiple polygons per wildfire
cal_fire_s2_hist_med_df = cal_fire_s2_hist_med_df.drop_duplicates(subset=['FIRE_CUSTOM_ID', 'S2_Cells_ID']).reset_index().drop(columns='index')
cal_fire_s2_hist_med_df = pd.merge(cal_fire_s2_hist_med_df, cal_fire_hist_2000p_clean_df[['FIRE_CUSTOM_ID', 'YEAR']], on='FIRE_CUSTOM_ID', how='left')
cal_fire_s2_hist_med_df = cal_fire_s2_hist_med_df.groupby(['S2_Cells_ID', 'YEAR']).FIRE_CUSTOM_ID.count().reset_index()
cal_fire_s2_hist_med_df = cal_fire_s2_hist_med_df[cal_fire_s2_hist_med_df.YEAR>2010]
cal_fire_s2_hist_med_df = pd.pivot_table(data=cal_fire_s2_hist_med_df,
                                         values='FIRE_CUSTOM_ID',
                                         index='S2_Cells_ID',
                                         columns='YEAR',
                                         fill_value=0).reset_index()
cal_fire_s2_hist_med_df.columns = ['S2_Cells_ID'] + ['YEAR_'+str(i) for i in cal_fire_s2_hist_med_df.columns[1:]]
cal_fire_s2_hist_med_df.head()

Unnamed: 0,S2_Cells_ID,YEAR_2011,YEAR_2012,YEAR_2013,YEAR_2014,YEAR_2015,YEAR_2016,YEAR_2017,YEAR_2018
0,54c943,0,0,0,0,0,0,1,0
1,54c94d,0,0,0,0,0,0,1,0
2,54c951,0,0,0,0,1,0,0,0
3,54c953,0,0,0,0,0,0,1,0
4,54c955,0,0,0,0,0,0,1,0


In [99]:
cal_fire_s2_hist_med_df.sum()

S2_Cells_ID    54c94354c94d54c95154c95354c95554c95754c95954c9...
YEAR_2011                                                    438
YEAR_2012                                                    532
YEAR_2013                                                    444
YEAR_2014                                                    366
YEAR_2015                                                    512
YEAR_2016                                                    506
YEAR_2017                                                    966
YEAR_2018                                                    713
dtype: object

In [100]:
cal_fire_s2_hist_med_df.shape

(2249, 9)

In [101]:
cal_fire_s2_hist_high_df = cal_fire_hist_df[['FIRE_CUSTOM_ID', 'S2_Cells_high']]
cal_fire_s2_hist_high_df = split_data_frame_list(cal_fire_s2_hist_high_df, 'S2_Cells_high', 'FIRE_CUSTOM_ID')
cal_fire_s2_hist_high_df['S2_Cells_ID'] = cal_fire_s2_hist_high_df.S2_Cells_high.apply(lambda x: x.ToToken())
## Some cells are duplicated mostly because there are usually multiple polygons per wildfire
cal_fire_s2_hist_high_df = cal_fire_s2_hist_high_df.drop_duplicates(subset=['FIRE_CUSTOM_ID', 'S2_Cells_ID']).reset_index().drop(columns='index')
cal_fire_s2_hist_high_df = pd.merge(cal_fire_s2_hist_high_df, cal_fire_hist_2000p_clean_df[['FIRE_CUSTOM_ID', 'YEAR']], on='FIRE_CUSTOM_ID', how='left')
cal_fire_s2_hist_high_df = cal_fire_s2_hist_high_df.groupby(['S2_Cells_ID', 'YEAR']).FIRE_CUSTOM_ID.count().reset_index()
cal_fire_s2_hist_high_df = cal_fire_s2_hist_high_df[cal_fire_s2_hist_high_df.YEAR>2010]
cal_fire_s2_hist_high_df = pd.pivot_table(data=cal_fire_s2_hist_high_df,
                                         values='FIRE_CUSTOM_ID',
                                         index='S2_Cells_ID',
                                         columns='YEAR',
                                         fill_value=0).reset_index()
cal_fire_s2_hist_high_df.columns = ['S2_Cells_ID'] + ['YEAR_'+str(i) for i in cal_fire_s2_hist_high_df.columns[1:]]
cal_fire_s2_hist_high_df.head()

Unnamed: 0,S2_Cells_ID,YEAR_2011,YEAR_2012,YEAR_2013,YEAR_2014,YEAR_2015,YEAR_2016,YEAR_2017,YEAR_2018
0,54c942c,0,0,0,0,0,0,1,0
1,54c94d4,0,0,0,0,0,0,1,0
2,54c94dc,0,0,0,0,0,0,1,0
3,54c951c,0,0,0,0,1,0,0,0
4,54c9524,0,0,0,0,0,0,1,0


In [102]:
cal_fire_s2_hist_high_df.sum()

S2_Cells_ID    54c942c54c94d454c94dc54c951c54c952454c952c54c9...
YEAR_2011                                                    596
YEAR_2012                                                    824
YEAR_2013                                                    651
YEAR_2014                                                    563
YEAR_2015                                                    849
YEAR_2016                                                    726
YEAR_2017                                                   1557
YEAR_2018                                                   1217
dtype: object

In [103]:
cal_fire_s2_hist_high_df.shape

(5070, 9)

In [202]:
cal_fire_s2_hist_high_df.S2_Cells_ID.nunique()

5070

In [271]:
ca_s2_wf_final_df = ca_s2_wf_df.join(agency).join(cause).join(c_method)
cal_fire_s2_hist_stacked_df = cal_fire_s2_hist_low_df.append(cal_fire_s2_hist_med_df).append(cal_fire_s2_hist_high_df).reset_index().drop(columns='index')
ca_s2_wf_final_df = pd.merge(ca_s2_wf_final_df, cal_fire_s2_hist_stacked_df, on='S2_Cells_ID', how='left')
ca_s2_wf_final_df.shape

(3717, 55)

In [272]:
cal_fire_s2_hist_stacked_df.S2_Cells_ID.nunique(), cal_fire_s2_hist_stacked_df.shape

(8242, (8242, 9))

In [275]:
conditions = [
    (ca_s2_wf_final_df['YEAR'] == 2016),
    (ca_s2_wf_final_df['YEAR'] == 2017),
    (ca_s2_wf_final_df['YEAR'] == 2018)]
choices_1yr_ago = [ca_s2_wf_final_df['YEAR_2015'], ca_s2_wf_final_df['YEAR_2016'], ca_s2_wf_final_df['YEAR_2017']]
choices_2yr_ago = [ca_s2_wf_final_df['YEAR_2014'], ca_s2_wf_final_df['YEAR_2015'], ca_s2_wf_final_df['YEAR_2016']]
choices_3yr_ago = [ca_s2_wf_final_df['YEAR_2013'], ca_s2_wf_final_df['YEAR_2014'], ca_s2_wf_final_df['YEAR_2015']]
choices_4yr_ago = [ca_s2_wf_final_df['YEAR_2012'], ca_s2_wf_final_df['YEAR_2013'], ca_s2_wf_final_df['YEAR_2014']]
choices_5yr_ago = [ca_s2_wf_final_df['YEAR_2011'], ca_s2_wf_final_df['YEAR_2012'], ca_s2_wf_final_df['YEAR_2013']]
ca_s2_wf_final_df['WildFire_COUNT_1YR_AGO'] = np.select(conditions, choices_1yr_ago)
ca_s2_wf_final_df['WildFire_COUNT_2YR_AGO'] = np.select(conditions, choices_2yr_ago)
ca_s2_wf_final_df['WildFire_COUNT_3YR_AGO'] = np.select(conditions, choices_3yr_ago)
ca_s2_wf_final_df['WildFire_COUNT_4YR_AGO'] = np.select(conditions, choices_4yr_ago)
ca_s2_wf_final_df['WildFire_COUNT_5YR_AGO'] = np.select(conditions, choices_5yr_ago)
ca_s2_wf_final_df = ca_s2_wf_final_df.drop(columns=['YEAR_2011','YEAR_2012','YEAR_2013','YEAR_2014','YEAR_2015','YEAR_2016','YEAR_2017','YEAR_2018'])

In [276]:
ca_s2_wf_final_df.shape

(3717, 52)

In [277]:
ca_s2_wf_final_df[list(ca_s2_wf_final_df)[-5:]].describe()

Unnamed: 0,WildFire_COUNT_1YR_AGO,WildFire_COUNT_2YR_AGO,WildFire_COUNT_3YR_AGO,WildFire_COUNT_4YR_AGO,WildFire_COUNT_5YR_AGO
count,3717.0,3717.0,3717.0,3717.0,3717.0
mean,0.229217,0.17541,0.168953,0.138015,0.146624
std,0.606013,0.543489,0.560615,0.45408,0.49079
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0
max,6.0,6.0,6.0,4.0,5.0


In [278]:
ca_s2_wf_final_df[ca_s2_wf_final_df.duplicated(subset=['S2_Cells_ID','ALARM_DATE_DT_DT'])].shape

(44, 52)

In [279]:
ca_s2_wf_final_grp1_df = ca_s2_wf_final_df.groupby(['S2_Cells_ID', 'ALARM_DATE_DT_DT']).agg({'FIRE_CUSTOM_ID': 'count', 'YEAR': 'max', 'ALARM_DATE_MONTH': 'max', 'WildFire': 'max'}).reset_index()
ca_s2_wf_final_grp1_df = ca_s2_wf_final_grp1_df.rename(columns={'FIRE_CUSTOM_ID': 'WildFire_COUNT'})
ca_s2_wf_final_grp1_df.shape

(3673, 6)

In [280]:
ca_s2_wf_final_grp1_df.head()

Unnamed: 0,S2_Cells_ID,ALARM_DATE_DT_DT,WildFire_COUNT,YEAR,ALARM_DATE_MONTH,WildFire
0,54c942c,2017-07-24,1,2017,7,1
1,54c94c,2017-06-28,1,2017,6,1
2,54c94d4,2017-06-28,1,2017,6,1
3,54c94dc,2017-06-28,1,2017,6,1
4,54c9524,2017-06-28,1,2017,6,1


In [283]:
sum_vars = ['GIS_ACRES',
 'Shape_Length',
 'Shape_Area',
 'FIRE_DUR',
 'AGENCY_BIA',
 'AGENCY_BLM',
 'AGENCY_CCO',
 'AGENCY_CDF',
 'AGENCY_DOD',
 'AGENCY_LRA',
 'AGENCY_NPS',
 'AGENCY_USF',
 'CAUSE_1',
 'CAUSE_2',
 'CAUSE_3',
 'CAUSE_4',
 'CAUSE_5',
 'CAUSE_6',
 'CAUSE_7',
 'CAUSE_8',
 'CAUSE_9',
 'CAUSE_10',
 'CAUSE_11',
 'CAUSE_14',
 'CAUSE_15',
 'CAUSE_16',
 'CAUSE_18',
 'C_METHOD_1',
 'C_METHOD_2',
 'C_METHOD_3',
 'C_METHOD_4',
 'C_METHOD_5',
 'C_METHOD_6',
 'C_METHOD_7',
 'C_METHOD_8',
 'WildFire_COUNT_1YR_AGO',
 'WildFire_COUNT_2YR_AGO',
 'WildFire_COUNT_3YR_AGO',
 'WildFire_COUNT_4YR_AGO',
 'WildFire_COUNT_5YR_AGO']

In [284]:
ca_s2_wf_final_grp2_df = ca_s2_wf_final_df[['S2_Cells_ID', 'ALARM_DATE_DT_DT']+sum_vars].groupby(['S2_Cells_ID', 'ALARM_DATE_DT_DT']).sum().reset_index()
ca_s2_wf_final_grp2_df.shape

(3673, 42)

In [285]:
ca_s2_wf_final_grp2_df.head()

Unnamed: 0,S2_Cells_ID,ALARM_DATE_DT_DT,GIS_ACRES,Shape_Length,Shape_Area,FIRE_DUR,AGENCY_BIA,AGENCY_BLM,AGENCY_CCO,AGENCY_CDF,...,C_METHOD_4,C_METHOD_5,C_METHOD_6,C_METHOD_7,C_METHOD_8,WildFire_COUNT_1YR_AGO,WildFire_COUNT_2YR_AGO,WildFire_COUNT_3YR_AGO,WildFire_COUNT_4YR_AGO,WildFire_COUNT_5YR_AGO
0,54c942c,2017-07-24,1492.397827,19322.129952,6039520.0,129.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,54c94c,2017-06-28,653.507935,14766.199729,2644653.0,4.0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,54c94d4,2017-06-28,653.507935,14766.199729,2644653.0,4.0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,54c94dc,2017-06-28,653.507935,14766.199729,2644653.0,4.0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,54c9524,2017-06-28,653.507935,14766.199729,2644653.0,4.0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [293]:
ca_s2_wf_final_grp_df = pd.merge(ca_s2_wf_final_grp1_df, ca_s2_wf_final_grp2_df, on=['S2_Cells_ID', 'ALARM_DATE_DT_DT'], how='left')
ca_s2_wf_final_grp_df.ALARM_DATE_DT_DT = ca_s2_wf_final_grp_df.ALARM_DATE_DT_DT.dt.date
ca_s2_wf_final_grp_df.columns = ['S2_Cells_ID'] + ['WF_'+i for i in ca_s2_wf_final_grp_df.columns if i != 'S2_Cells_ID']
ca_s2_wf_final_grp_df.shape

(3673, 46)

In [294]:
ca_s2_wf_final_grp_df.WF_WildFire_COUNT.sum()

3717

In [295]:
ca_s2_wf_final_grp_df.head().T

Unnamed: 0,0,1,2,3,4
S2_Cells_ID,54c942c,54c94c,54c94d4,54c94dc,54c9524
WF_ALARM_DATE_DT_DT,2017-07-24,2017-06-28,2017-06-28,2017-06-28,2017-06-28
WF_WildFire_COUNT,1,1,1,1,1
WF_YEAR,2017,2017,2017,2017,2017
WF_ALARM_DATE_MONTH,7,6,6,6,6
WF_WildFire,1,1,1,1,1
WF_GIS_ACRES,1492.4,653.508,653.508,653.508,653.508
WF_Shape_Length,19322.1,14766.2,14766.2,14766.2,14766.2
WF_Shape_Area,6.03952e+06,2.64465e+06,2.64465e+06,2.64465e+06,2.64465e+06
WF_FIRE_DUR,129,4,4,4,4


In [296]:
ca_s2_wf_final_grp_df.to_csv('./Data/Processed/WildFire_S2Cells.csv', index=False)

In [297]:
list(ca_s2_wf_final_grp_df)

['S2_Cells_ID',
 'WF_ALARM_DATE_DT_DT',
 'WF_WildFire_COUNT',
 'WF_YEAR',
 'WF_ALARM_DATE_MONTH',
 'WF_WildFire',
 'WF_GIS_ACRES',
 'WF_Shape_Length',
 'WF_Shape_Area',
 'WF_FIRE_DUR',
 'WF_AGENCY_BIA',
 'WF_AGENCY_BLM',
 'WF_AGENCY_CCO',
 'WF_AGENCY_CDF',
 'WF_AGENCY_DOD',
 'WF_AGENCY_LRA',
 'WF_AGENCY_NPS',
 'WF_AGENCY_USF',
 'WF_CAUSE_1',
 'WF_CAUSE_2',
 'WF_CAUSE_3',
 'WF_CAUSE_4',
 'WF_CAUSE_5',
 'WF_CAUSE_6',
 'WF_CAUSE_7',
 'WF_CAUSE_8',
 'WF_CAUSE_9',
 'WF_CAUSE_10',
 'WF_CAUSE_11',
 'WF_CAUSE_14',
 'WF_CAUSE_15',
 'WF_CAUSE_16',
 'WF_CAUSE_18',
 'WF_C_METHOD_1',
 'WF_C_METHOD_2',
 'WF_C_METHOD_3',
 'WF_C_METHOD_4',
 'WF_C_METHOD_5',
 'WF_C_METHOD_6',
 'WF_C_METHOD_7',
 'WF_C_METHOD_8',
 'WF_WildFire_COUNT_1YR_AGO',
 'WF_WildFire_COUNT_2YR_AGO',
 'WF_WildFire_COUNT_3YR_AGO',
 'WF_WildFire_COUNT_4YR_AGO',
 'WF_WildFire_COUNT_5YR_AGO']