In [1]:
import pandas as pd
import geopandas as gpd
import xarray as xr
import pyproj
from tqdm import tqdm
import numpy as np
import os
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [2]:
# check python version and all packages version
def check_python_version():
    import sys
    print("Python version")
    print (sys.version)
    print("Pandas version")
    print(pd.__version__)
    print("Geopandas version")
    print(gpd.__version__)
    print("Xarray version")
    print(xr.__version__)
    print("Pyproj version")
    print(pyproj.__version__)

check_python_version()

Python version
3.11.9 | packaged by Anaconda, Inc. | (main, Apr 19 2024, 16:40:41) [MSC v.1916 64 bit (AMD64)]
Pandas version
2.2.2
Geopandas version
0.14.2
Xarray version
2023.6.0
Pyproj version
3.6.1


In [284]:
path = '../New_Data/calfire_2001_2023_MTBS_match_TableToExcel.csv'
fod_fpa = pd.read_csv(path, encoding='latin1')

In [25]:
fod_fpa.columns

Index(['OBJECTID', 'FOD_ID', 'FPA_ID', 'SOURCE_SYSTEM_TYPE', 'SOURCE_SYSTEM',
       'NWCG_REPORTING_AGENCY', 'NWCG_REPORTING_UNIT_ID',
       'NWCG_REPORTING_UNIT_NAME', 'SOURCE_REPORTING_UNIT_NAME',
       'LOCAL_FIRE_REPORT_ID', 'FIRE_CODE', 'FIRE_NAME',
       'ICS_209_PLUS_INCIDENT_JOIN_ID', 'ICS_209_PLUS_COMPLEX_JOIN_ID',
       'MTBS_ID', 'MTBS_FIRE_NAME', 'COMPLEX_NAME', 'FIRE_YEAR',
       'DISCOVERY_DATE', 'DISCOVERY_DOY', 'NWCG_CAUSE_CLASSIFICATION',
       'NWCG_GENERAL_CAUSE', 'NWCG_CAUSE_AGE_CATEGORY', 'CONT_DATE',
       'CONT_DOY', 'FIRE_SIZE', 'FIRE_SIZE_CLASS', 'LATITUDE', 'LONGITUDE',
       'OWNER_DESCR', 'STATE', 'COUNTY', 'FIPS_NAME'],
      dtype='object')

In [237]:
cols = ['MTBS_FIRE_NAME','DISCOVERY_DATE','CONT_DATE','FIRE_SIZE','LONGITUDE','LATITUDE','FIRE_YEAR']
fod_fpa_sub = fod_fpa[cols]
fod_fpa_sub.rename(columns={'MTBS_FIRE_NAME':'FIRE_NAME'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fod_fpa_sub.rename(columns={'MTBS_FIRE_NAME':'FIRE_NAME'}, inplace=True)


In [238]:
# CHECK IF fod_fpa_sub HAS DUPLICATES
fod_fpa_sub.duplicated().sum()

0

In [239]:
# add index column to be row number + fod_fpa
fod_fpa_sub['index'] = fod_fpa_sub.index
fod_fpa_sub['index'] = 'fod_fpa' + fod_fpa_sub['index'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fod_fpa_sub['index'] = fod_fpa_sub.index
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fod_fpa_sub['index'] = 'fod_fpa' + fod_fpa_sub['index'].astype(str)


In [240]:
fod_fpa_sub.shape

(1056, 8)

In [241]:
fod_fpa_sub.head()

Unnamed: 0,FIRE_NAME,DISCOVERY_DATE,CONT_DATE,FIRE_SIZE,LONGITUDE,LATITUDE,FIRE_YEAR,index
0,POWER,10/6/2004,10/21/2004,16823.0,-120.211667,38.523333,2004,fod_fpa0
1,FREDS,10/13/2004,10/17/2004,7700.0,-120.26,38.78,2004,fod_fpa1
2,SLINKARD 2,7/21/2005,7/24/2005,89.0,-119.554722,38.573611,2005,fod_fpa2
3,NINE,7/9/2005,7/10/2005,1149.0,-118.438333,35.688056,2005,fod_fpa3
4,MUNZ,6/27/2005,6/27/2005,800.0,-118.388889,34.683056,2005,fod_fpa4


In [35]:
fire_data = pd.read_parquet('../Clean_Data/fire_data_map_weather_lon_lat.parquet')

In [95]:
fire_data.columns

Index(['YEAR_', 'STATE', 'AGENCY', 'UNIT_ID', 'FIRE_NAME', 'INC_NUM',
       'IRWINID', 'ALARM_DATE', 'CONT_DATE', 'C_METHOD', 'CAUSE', 'OBJECTIVE',
       'GIS_ACRES', 'COMPLEX_NA', 'COMPLEX_ID', 'COMMENTS', 'FIRE_NUM',
       'Shape_Leng', 'Shape_Area', 'lon', 'lat', 'distance'],
      dtype='object')

In [242]:
cols = ['YEAR_','ALARM_DATE','FIRE_NAME','GIS_ACRES','Shape_Area','lon','lat']
fire_data_sub = fire_data[cols]

In [243]:
# check if fire_data_sub has duplicates
fire_data_sub.duplicated().sum()

0

In [244]:
fire_data_sub['index'] = fire_data_sub.index
fire_data_sub['index'] = 'fire_data' + fire_data_sub['index'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fire_data_sub['index'] = fire_data_sub.index
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fire_data_sub['index'] = 'fire_data' + fire_data_sub['index'].astype(str)


In [245]:
fire_data_sub.head()

Unnamed: 0,YEAR_,ALARM_DATE,FIRE_NAME,GIS_ACRES,Shape_Area,lon,lat,index
0,2023,2023-06-17,WHITWORTH,5.72913,23184.946763,-122.891667,41.525,fire_data0
1,2023,2023-06-02,KAISER,13.6024,55046.898792,-121.975,39.816667,fire_data1
2,2023,2023-07-01,JACKSON,27.8145,112561.147688,-120.891667,38.275,fire_data2
3,2023,2023-07-11,CARBON,58.7602,237794.210367,-120.975,38.441667,fire_data3
4,2023,2023-07-11,LIBERTY,70.979,287241.810389,-121.016667,38.233333,fire_data4


In [246]:
# check fire names that is in fod_fpa but not in matched_df
fod_fpa_names = fod_fpa_sub['FIRE_NAME'].unique()
fire_names_full = fire_data_sub['FIRE_NAME'].unique()
# get fire names that are in both fod_fpa and fire_data
fire_names = set(fod_fpa_names).intersection(fire_names_full)
# convert to list
fire_names = list(fire_names)

In [247]:
len(fire_names), len(fod_fpa_names), len(fire_names_full)

(638, 834, 4190)

In [248]:
# print sentence saying the # of rows in fod_fpa_sub
print(f'There are {fod_fpa_sub.shape[0]} rows in fod_fpa_sub')
# in fod_fpa_sub data, only keep the fire names that are in fire_data_sub
fod_fpa_sub = fod_fpa_sub[fod_fpa_sub['FIRE_NAME'].isin(fire_names)]
# print sentence saying the # of rows in fod_fpa_sub after filtering fire names in fire_data_sub
print(f'After filtering fire names in fire_data_sub, there are {fod_fpa_sub.shape[0]} rows in fod_fpa_sub')

There are 1056 rows in fod_fpa_sub
After filtering fire names in fire_data_sub, there are 809 rows in fod_fpa_sub


In [249]:
merged_df = pd.merge(fod_fpa_sub, fire_data_sub, on='FIRE_NAME', suffixes=('_fpa_fod', ''), how='inner')
merged_df.head()

Unnamed: 0,FIRE_NAME,DISCOVERY_DATE,CONT_DATE,FIRE_SIZE,LONGITUDE,LATITUDE,FIRE_YEAR,index_fpa_fod,YEAR_,ALARM_DATE,GIS_ACRES,Shape_Area,lon,lat,index
0,POWER,10/6/2004,10/21/2004,16823.0,-120.211667,38.523333,2004,fod_fpa0,2023,2023-09-07,60.4371,243103.799966,-120.683333,36.566667,fire_data181
1,POWER,10/6/2004,10/21/2004,16823.0,-120.211667,38.523333,2004,fod_fpa0,2022,2022-09-06,133.754,541281.652317,-119.516667,37.15,fire_data353
2,POWER,10/6/2004,10/21/2004,16823.0,-120.211667,38.523333,2004,fod_fpa0,2022,2022-10-23,21.0131,85036.984789,-121.141667,38.358333,fire_data516
3,POWER,10/6/2004,10/21/2004,16823.0,-120.211667,38.523333,2004,fod_fpa0,2020,2020-07-11,151.549,613298.938547,-119.516667,37.15,fire_data1055
4,POWER,10/6/2004,10/21/2004,16823.0,-120.211667,38.523333,2004,fod_fpa0,2020,2020-05-29,17.0457,68981.33137,-114.6,32.733333,fire_data1330


In [250]:
merged_df.shape

(4463, 15)

In [251]:
# assign DISCOVERY_DATE to datetime
merged_df['DISCOVERY_DATE'] = pd.to_datetime(merged_df['DISCOVERY_DATE'])
merged_df['ALARM_DATE'] = pd.to_datetime(merged_df['ALARM_DATE'])

In [252]:
# filter rows where DISCOVERY_DATE and ALARM_DATE are the same or only 1 day apart
merged_df['date_diff'] = (merged_df['ALARM_DATE'] - merged_df['DISCOVERY_DATE']).dt.days
merged_df['date_diff'] = merged_df['date_diff'].abs()
# calcuate the difference between FIRE_SIZE and GIS_ACRES
merged_df['size_diff'] = (merged_df['FIRE_SIZE'] - merged_df['GIS_ACRES']).abs()

In [260]:
# for each index_fpa_fod, only keep the row with the smallest date_diff
matched_df = merged_df.sort_values('date_diff', ascending=True).groupby('index_fpa_fod').head(1)

In [261]:
matched_df.shape

(809, 17)

In [263]:
matched_df['YEAR_'] = matched_df['YEAR_'].astype(int)
matched_df['FIRE_YEAR'] = matched_df['FIRE_YEAR'].astype(int)
matched_df['year_diff'] = (matched_df['YEAR_'] - matched_df['FIRE_YEAR']).abs()

In [264]:
# value counts of year_diff
matched_df['year_diff'].value_counts()

year_diff
0     748
1      15
3       9
2       8
8       6
4       4
6       4
9       3
15      3
7       2
13      2
16      2
10      1
11      1
19      1
Name: count, dtype: int64

In [266]:
# check the range of size_diff when year_diff > 0
matched_df[matched_df['year_diff'] > 0]['size_diff'].describe()

count        61.000000
mean      10760.656847
std       31809.716276
min          47.275300
25%         987.717500
50%        1843.605000
75%        4834.271440
max      192556.200000
Name: size_diff, dtype: float64

In [267]:
# only keep rows where year_diff = 0
matched_df = matched_df[matched_df['year_diff'] == 0]

In [268]:
matched_df.shape

(748, 18)

In [269]:
# check the range of size_diff
matched_df['size_diff'].describe()

count    7.480000e+02
mean     1.185271e+04
std      9.724764e+04
min      0.000000e+00
25%      1.707500e+00
50%      2.310000e+01
75%      1.786500e+02
max      1.031480e+06
Name: size_diff, dtype: float64

In [270]:
matched_df['index'].duplicated().sum() # in original fire data, each row should only be matched once

22

In [271]:
duplicated_index = matched_df[matched_df['index'].duplicated()]['index'].tolist()
duplicated_index

['fire_data3412',
 'fire_data1271',
 'fire_data1197',
 'fire_data1271',
 'fire_data3579',
 'fire_data2114',
 'fire_data6153',
 'fire_data1271',
 'fire_data4930',
 'fire_data1271',
 'fire_data1271',
 'fire_data1271',
 'fire_data2665',
 'fire_data1271',
 'fire_data3220',
 'fire_data2665',
 'fire_data3344',
 'fire_data5441',
 'fire_data5441',
 'fire_data2054',
 'fire_data6182',
 'fire_data2662']

In [272]:
matched_df[matched_df['index'] == "fire_data3412"]

Unnamed: 0,FIRE_NAME,DISCOVERY_DATE,CONT_DATE,FIRE_SIZE,LONGITUDE,LATITUDE,FIRE_YEAR,index_fpa_fod,YEAR_,ALARM_DATE,GIS_ACRES,Shape_Area,lon,lat,index,date_diff,size_diff,year_diff
2629,SHIELL,2015-07-30,10/30/2015,8669.0,-123.160556,40.495,2015,fod_fpa708,2015,2015-07-30,15390.6,62283750.0,-123.016667,40.441667,fire_data3412,0.0,6721.6,0
2649,SHIELL,2015-07-30,10/30/2015,15378.0,-123.053889,40.465278,2015,fod_fpa710,2015,2015-07-30,15390.6,62283750.0,-123.016667,40.441667,fire_data3412,0.0,12.6,0


In [273]:
matched_df.shape

(748, 18)

In [274]:
# for each index, only keep the row with the smallest size_diff
matched_df = matched_df.sort_values('size_diff', ascending=True).groupby('index').head(1)
matched_df.shape

(726, 18)

In [275]:
matched_df['size_diff'].describe()

count       726.000000
mean       1267.312960
std       16743.956465
min           0.000000
25%           1.632500
50%          20.435000
75%         158.972500
max      443332.000000
Name: size_diff, dtype: float64

In [278]:
# confirm both index are unique
matched_df['index'].duplicated().sum()

0

In [279]:
matched_df['index_fpa_fod'].duplicated().sum()

0

In [276]:
# order by size_diff
matched_df = matched_df.sort_values('size_diff', ascending=False)

In [287]:
fire_data.columns

Index(['YEAR_', 'STATE', 'AGENCY', 'UNIT_ID', 'FIRE_NAME', 'INC_NUM',
       'IRWINID', 'ALARM_DATE', 'CONT_DATE', 'C_METHOD', 'CAUSE', 'OBJECTIVE',
       'GIS_ACRES', 'COMPLEX_NA', 'COMPLEX_ID', 'COMMENTS', 'FIRE_NUM',
       'Shape_Leng', 'Shape_Area', 'lon', 'lat', 'distance'],
      dtype='object')

In [300]:
# in fire_data, show rows w FIRE_NAME  == AUGUST COMPLEX
col_to_show  = ['AGENCY', 'UNIT_ID', 'FIRE_NAME', 'INC_NUM',
       'IRWINID', 'ALARM_DATE', 'CONT_DATE', 'GIS_ACRES', 
       'Shape_Leng', 'Shape_Area', 'lon', 'lat', 'distance']	
fire_data[fire_data['FIRE_NAME'] == 'AUGUST COMPLEX'][col_to_show]

Unnamed: 0,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,IRWINID,ALARM_DATE,CONT_DATE,GIS_ACRES,Shape_Leng,Shape_Area,lon,lat,distance
1271,USF,MNF,AUGUST COMPLEX,753,{EE343B88-0ADB-4C25-B744-6B4CE9DCC6DA},2020-08-16,2020-11-11,1032700.0,1298407.0,4179187000.0,-123.016667,39.941667,1.983027


In [291]:
fod_fpa.columns

Index(['OBJECTID', 'FOD_ID', 'FPA_ID', 'SOURCE_SYSTEM_TYPE', 'SOURCE_SYSTEM',
       'NWCG_REPORTING_AGENCY', 'NWCG_REPORTING_UNIT_ID',
       'NWCG_REPORTING_UNIT_NAME', 'SOURCE_REPORTING_UNIT_NAME',
       'LOCAL_FIRE_REPORT_ID', 'FIRE_CODE', 'FIRE_NAME',
       'ICS_209_PLUS_INCIDENT_JOIN_ID', 'ICS_209_PLUS_COMPLEX_JOIN_ID',
       'MTBS_ID', 'MTBS_FIRE_NAME', 'COMPLEX_NAME', 'FIRE_YEAR',
       'DISCOVERY_DATE', 'DISCOVERY_DOY', 'NWCG_CAUSE_CLASSIFICATION',
       'NWCG_GENERAL_CAUSE', 'NWCG_CAUSE_AGE_CATEGORY', 'CONT_DATE',
       'CONT_DOY', 'FIRE_SIZE', 'FIRE_SIZE_CLASS', 'LATITUDE', 'LONGITUDE',
       'OWNER_DESCR', 'STATE', 'COUNTY', 'FIPS_NAME'],
      dtype='object')

In [299]:
col_to_show  = [ 'NWCG_REPORTING_UNIT_ID',
       'NWCG_REPORTING_UNIT_NAME', 'SOURCE_REPORTING_UNIT_NAME',
       'FIRE_NAME',
       'ICS_209_PLUS_INCIDENT_JOIN_ID', 
        'MTBS_FIRE_NAME', 
       'DISCOVERY_DATE', 
       'NWCG_GENERAL_CAUSE', 'CONT_DATE',
       'CONT_DOY', 'FIRE_SIZE', 'FIRE_SIZE_CLASS', 'LATITUDE', 'LONGITUDE',
       'OWNER_DESCR']
fod_fpa[fod_fpa['MTBS_FIRE_NAME'] == 'AUGUST COMPLEX'][col_to_show]

Unnamed: 0,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT_NAME,FIRE_NAME,ICS_209_PLUS_INCIDENT_JOIN_ID,MTBS_FIRE_NAME,DISCOVERY_DATE,NWCG_GENERAL_CAUSE,CONT_DATE,CONT_DOY,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,OWNER_DESCR
1019,USCASHF,Shasta-Trinity National Forest,Shasta-Trinity National Forest,SHF ELKHORN,2020_11923345_SHF ELKHORN,AUGUST COMPLEX,8/18/2020,Natural,,,51576.7,G,40.096175,-122.734014,PRIVATE
1020,USCAMNF,Mendocino National Forest,Mendocino National Forest,DOE,2020_11885771_DOE,AUGUST COMPLEX,8/16/2020,Natural,11/11/2020,316.0,589368.0,G,39.765255,-122.672914,USFS
1021,USCAMNF,Mendocino National Forest,Mendocino National Forest,GLADE,2020_11843929_AUGUST COMPLEX,AUGUST COMPLEX,8/19/2020,Natural,,,26196.7,G,39.944075,-122.916814,USFS
1023,USCAMNF,Mendocino National Forest,Mendocino National Forest,CORBIN,2020_11843929_AUGUST COMPLEX,AUGUST COMPLEX,8/16/2020,Natural,,,6061.25,G,39.544815,-122.742813,USFS
1024,USCAMNF,Mendocino National Forest,Mendocino National Forest,HULL,2020_11873463_HULL,AUGUST COMPLEX,8/19/2020,Natural,,,12283.8,G,39.536375,-122.938913,USFS
1025,USCAMNF,Mendocino National Forest,Mendocino National Forest,PINE KOP,2020_11843929_AUGUST COMPLEX,AUGUST COMPLEX,8/16/2020,Natural,11/11/2020,316.0,1221.9,F,39.596394,-122.747513,USFS
1026,USCAMNF,Mendocino National Forest,Mendocino National Forest,HOPKINS,2020_11923345_SHF ELKHORN,AUGUST COMPLEX,8/17/2020,Natural,,,328363.0,G,40.125375,-123.072714,USFS
1027,USCAMNF,Mendocino National Forest,Mendocino National Forest,WILLOW BASIN,2020_11923345_SHF ELKHORN,AUGUST COMPLEX,8/19/2020,Natural,,,1220.0,F,40.087235,-122.984014,USFS


In [277]:
matched_df.head()

Unnamed: 0,FIRE_NAME,DISCOVERY_DATE,CONT_DATE,FIRE_SIZE,LONGITUDE,LATITUDE,FIRE_YEAR,index_fpa_fod,YEAR_,ALARM_DATE,GIS_ACRES,Shape_Area,lon,lat,index,date_diff,size_diff,year_diff
4328,AUGUST COMPLEX,2020-08-16,11/11/2020,589368.0,-122.672914,39.765255,2020,fod_fpa1020,2020,2020-08-16,1032700.0,4179187000.0,-123.016667,39.941667,fire_data1271,0.0,443332.0,0
3035,OAK,2017-08-11,,45403.0,-123.475,41.786389,2017,fod_fpa781,2017,2017-08-11,91125.3,368771100.0,-123.6,41.733333,fire_data2665,0.0,45722.3,0
2146,NORTH PASS,2012-08-18,10/16/2012,41983.0,-123.129167,39.866111,2012,fod_fpa589,2012,2012-08-18,5.3697,21730.39,-123.016667,39.9,fire_data4243,0.0,41977.6303,0
274,WITCH,2007-10-21,10/31/2007,197990.0,-116.711111,33.074444,2007,fod_fpa61,2007,2007-10-21,162070.0,655875900.0,-116.85,33.066667,fire_data5879,0.0,35920.0,0
2669,LAKE,2015-06-17,,51000.0,-116.900833,34.156944,2015,fod_fpa719,2015,2015-06-17,31284.4,126603600.0,-116.766667,34.15,fire_data3354,0.0,19715.6,0


In [301]:
matched_df.shape

(726, 18)

In [303]:
# check # of rows in fire_data that have GIS_ACRES > 100
fire_data[fire_data['GIS_ACRES'] > 100].shape

(2818, 22)

In [304]:
# check # of rows in matched_df that have GIS_ACRES > 100
matched_df[matched_df['GIS_ACRES'] > 100].shape

(711, 18)

In [321]:
# check description of GIS_ACRES in fire_data, get quantile 0, 0.1, 0.25, 0.5, 0.75, 0.9, 1
matched_df['GIS_ACRES'].describe(percentiles=[0, 0.1, 0.25, 0.5, 0.75, 0.9, 1])

count    7.260000e+02
mean     1.712262e+04
std      5.526208e+04
min      2.579980e+00
0%       2.579980e+00
10%      1.020160e+03
25%      1.508438e+03
50%      3.118740e+03
75%      1.114065e+04
90%      3.702410e+04
100%     1.032700e+06
max      1.032700e+06
Name: GIS_ACRES, dtype: float64

In [322]:
711/len(matched_df)

0.9793388429752066

In [312]:
def big_fire_pct(cutoff):
    # check # of rows in fire_data that have GIS_ACRES > cutoff
    n_fire_data_big = fire_data[fire_data['GIS_ACRES'] > cutoff].shape[0]
    # check # of rows in matched_df that have GIS_ACRES > cutoff
    n_matched_big = matched_df[matched_df['GIS_ACRES'] > cutoff].shape[0]
    # calculate the percentage
    pct = n_matched_big / n_fire_data_big * 100
    # print the sentence to summary n_matched_big and n_fire_data_big
    print(f'{n_matched_big} fires in matched_df have GIS_ACRES > {cutoff}')
    print(f'{n_fire_data_big} fires in fire_data have GIS_ACRES > {cutoff}')
    # return the percentage in a sentence
    return f'{pct:.2f}% of fires in fire_data have GIS_ACRES > {cutoff}'

In [313]:
big_fire_pct(100)

711 fires in matched_df have GIS_ACRES > 100
2818 fires in fire_data have GIS_ACRES > 100


'25.23% of fires in fire_data have GIS_ACRES > 100'

In [314]:
big_fire_pct(1000)

663 fires in matched_df have GIS_ACRES > 1000
1023 fires in fire_data have GIS_ACRES > 1000


'64.81% of fires in fire_data have GIS_ACRES > 1000'

In [315]:
big_fire_pct(10000)

194 fires in matched_df have GIS_ACRES > 10000
294 fires in fire_data have GIS_ACRES > 10000


'65.99% of fires in fire_data have GIS_ACRES > 10000'

In [316]:
big_fire_pct(50000)

50 fires in matched_df have GIS_ACRES > 50000
82 fires in fire_data have GIS_ACRES > 50000


'60.98% of fires in fire_data have GIS_ACRES > 50000'

In [317]:
big_fire_pct(100000)

22 fires in matched_df have GIS_ACRES > 100000
34 fires in fire_data have GIS_ACRES > 100000


'64.71% of fires in fire_data have GIS_ACRES > 100000'