In [1]:
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append('..') # make sure modules can be found from project home directory

In [2]:
from src.data.data_loading import load_processed_dataset, load_raw_datasets

import pandas as pd
import geopandas as gpd
import numpy as np 
import matplotlib.pyplot as plt
# etc...

In [3]:
from tqdm import tqdm
import fiona
tqdm.pandas()

### Loading raw ca_fire_perimeters data (only `firep`, which is layer 0)

In [4]:
df_list, df_names = load_raw_datasets('ca_fire_perimeters')


 fire20_1 has the following layers: 
 ['firep20_1', 'rxburn20_1', 'Non_RXFire_Legacy13_2'] 



What layers you would like to keep? Enter the layer names with 1 space in between, or enter `all` to load all layers. 
 firep20_1



 Loading fire20_1_firep20_1 

The list of dfs contains the following datasets (in this order): 
 ['fire20_1_firep20_1']


In [5]:
firep = df_list[0].copy()

In [6]:
firep.rename(
    columns=
    {
        'YEAR_':'FIRE_YEAR',
        'CAUSE':'SPECIFIC_CAUSE',
        'GIS_ACRES': 'FIRE_SIZE',
        'ALARM_DATE': 'FIRE_DATE'
    }, 
    inplace=True
)

In [9]:
def fire_size_class(size):
    """Bins fire_size (in acres) based on fpa_fod dataset splits
    """
    if 0 < size <= 0.25: 
        return 'A'
    if 0.25 < size < 10:
        return 'B'
    if 10 <= size < 100: 
        return 'C'
    if 100 <= size < 300: 
        return 'D'
    if 300 <= size < 1000: 
        return 'E'
    if 1000 <= size < 5000: 
        return 'F'
    if 5000 < size: 
        return 'G'

In [10]:
firep['FIRE_SIZE_CLASS'] = firep['FIRE_SIZE']
firep['FIRE_SIZE_CLASS'] = firep['FIRE_SIZE_CLASS'].progress_apply(fire_size_class)

100%|█████████████████████████████████████████████████████| 21318/21318 [00:00<00:00, 593632.89it/s]


### Change/update feature dtypes

In [11]:
firep[['FIRE_YEAR']] = firep[['FIRE_YEAR']].progress_apply(
        pd.to_numeric, downcast="unsigned")
firep[['FIRE_SIZE', 'Shape_Length', 'Shape_Area']] =  \
    firep[['FIRE_SIZE', 'Shape_Length', 'Shape_Area']].progress_apply(
        pd.to_numeric, downcast='float'
    )

100%|█████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 50.00it/s]
100%|████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 978.53it/s]


### Change/update feature values

`CAUSE`

In [12]:
map_val_to_cause = {
    1: 'Lightning', 
    2: 'Equipment Use', 
    3: 'Smoking', 
    4: 'Campfire', 
    5: 'Debris', 
    6: 'Railroad', 
    7: 'Arson', 
    8: 'Playing with Fire', 
    9: 'Miscellaneous', 
    10: 'Vehicle', 
    11: 'Power Line', 
    12: 'Firefighter Training', 
    13: 'Non-Firefighter Training',
    14: 'Unknown/Unidentified', 
    15: 'Structure', 
    16: 'Aircraft', 
    17: 'Volcanic', 
    18: 'Escaped Prescribed Burn', 
    19: 'Illegal Alien Campfire'
}

map_val_to_fpa_fod_cause = {
    1: 'Natural', 
    2: 'Equipment and vehicle use',
    3: 'Smoking',
    4: 'Recreation and ceremony', 
    5: 'Debris and open burning', 
    6: 'Railroad operations and maintenance', 
    7: 'Arson/incendiarism', 
    8: 'Misuse of fire by a minor', 
    9: 'Other causes', 
    10: 'Equipment and vehicle use', 
    11: 'Power generation/transmission/distribution',
    12: 'Other causes', 
    13: 'Other causes', 
    14: 'Missing data/not specified/undetermined',
    15: 'Other causes',
    16: 'Other causes',
    17: 'Natural',
    18: 'Other causes',
    19: 'Recreation and ceremony',
}

In [13]:
firep['SPECIFIC_CAUSE_ORIG'] = firep['SPECIFIC_CAUSE'].replace(map_val_to_fpa_fod_cause)
firep['SPECIFIC_CAUSE'] = firep['SPECIFIC_CAUSE'].replace(map_val_to_cause)

`C_METHOD`

In [14]:
map_val_to_c_method = {
    1: 'GPS Ground', 
    2: 'GPS Air', 
    3: 'Infrared',      
    4: 'Other Imagery', 
    5: 'Photo Interpretation',
    6: 'Hand Drawn', 
    7: 'Mixed Collection Methods',
    8: 'Unknown'
}

In [15]:
firep.C_METHOD = firep.C_METHOD.replace(map_val_to_c_method)

`OBJECTIVE`

In [16]:
map_val_to_objective = {
    0: 'Unknown', 
    1: 'Suppression', 
    2: 'Resource Benefit'
}

In [17]:
firep.OBJECTIVE = firep.OBJECTIVE.replace(map_val_to_objective)

### Convert to categorical variables as needed

In [18]:
for col in ['SPECIFIC_CAUSE', 'SPECIFIC_CAUSE_ORIG', 'C_METHOD', 'OBJECTIVE', 'FIRE_SIZE_CLASS']:
    firep[col] = firep[col].astype('category')

In [19]:
firep.head()

Unnamed: 0,FIRE_YEAR,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,FIRE_DATE,CONT_DATE,SPECIFIC_CAUSE,COMMENTS,REPORT_AC,FIRE_SIZE,C_METHOD,OBJECTIVE,FIRE_NUM,Shape_Length,Shape_Area,geometry,FIRE_SIZE_CLASS,SPECIFIC_CAUSE_ORIG
0,2020.0,CA,CDF,NEU,NELSON,13212,2020-06-18T00:00:00+00:00,2020-06-23T00:00:00+00:00,Power Line,,110.0,109.602501,GPS Ground,Suppression,,3252.523193,443544.7,"MULTIPOLYGON (((-116841.251 97942.565, -116836...",D,Power generation/transmission/distribution
1,2020.0,CA,CDF,NEU,AMORUSO,11799,2020-06-01T00:00:00+00:00,2020-06-04T00:00:00+00:00,Equipment Use,,670.0,685.585022,GPS Ground,Suppression,,9653.760742,2774464.0,"MULTIPOLYGON (((-117328.400 90212.407, -117321...",E,Equipment and vehicle use
2,2020.0,CA,CDF,NEU,ATHENS,18493,2020-08-10T00:00:00+00:00,2020-03-01T00:00:00+00:00,Unknown/Unidentified,,26.0,27.30048,GPS Ground,Suppression,,1649.643188,110481.1,"MULTIPOLYGON (((-115605.059 92988.787, -115585...",C,Missing data/not specified/undetermined
3,2020.0,CA,CDF,NEU,FLEMING,7619,2020-03-31T00:00:00+00:00,2020-04-01T00:00:00+00:00,Miscellaneous,,13.0,12.93155,GPS Ground,Suppression,,1577.155884,52332.11,"MULTIPOLYGON (((-110213.270 105975.579, -11020...",C,Other causes
4,2020.0,CA,CDF,NEU,MELANESE,8471,2020-04-14T00:00:00+00:00,2020-04-19T00:00:00+00:00,Escaped Prescribed Burn,,10.3,10.31596,GPS Ground,Suppression,,1035.787598,41747.22,"MULTIPOLYGON (((-111793.600 164243.615, -11177...",C,Other causes


In [20]:
firep.dtypes

FIRE_YEAR               float64
STATE                    object
AGENCY                   object
UNIT_ID                  object
FIRE_NAME                object
INC_NUM                  object
FIRE_DATE                object
CONT_DATE                object
SPECIFIC_CAUSE         category
COMMENTS                 object
REPORT_AC               float64
FIRE_SIZE               float32
C_METHOD               category
OBJECTIVE              category
FIRE_NUM                 object
Shape_Length            float32
Shape_Area              float32
geometry               geometry
FIRE_SIZE_CLASS        category
SPECIFIC_CAUSE_ORIG    category
dtype: object

In [25]:
firep = firep[firep.FIRE_YEAR >= 1992]
firep = firep[firep.FIRE_YEAR <= 2018]

In [28]:
fpafod = load_processed_dataset('fpa_fod')
fpafod