# Main script to Join Data on Instrument 2 Fire Location

Modules: N/A <br>
Author: Jordan Meyer <br>
Email: jordan.meyer@berkeley.edu <br>
Date created: Feb 18, 2023 <br>

**Citations (data sources)**


**Citations (persons)**
1. Cornelia Ilin 

**Preferred environment**
1. Code written in Jupyter Notebooks

### Step 1: Import packages

In [1]:
import pandas as pd
import numpy as np
import netCDF4 as ncdf
import os
from datetime import date, timedelta
from math import pi
import fiona

import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import matplotlib.ticker as mticker
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable

# geography
import geopandas as gpd
import osmnx as ox
import shapely
from shapely.geometry import Point
import contextily as ctx 

#Moved from sklearn.neighbors to sklearn.metrics following their package change
import sklearn.metrics
dist = sklearn.metrics.DistanceMetric.get_metric(
    'haversine'
)

# ignore warnings
import warnings
warnings.filterwarnings(
    'ignore'
)


### Step 2: Define working directories

In [11]:
#Local directories on my machine (not gdrive)
fire_dir = 'fire/'
wind_dir = 'wind/clean/'
pm_dir = 'aqi/'
health_dir = 'health/'
out_dir = './'

In [12]:
ls

[1m[36m2021 Data Documentation[m[m/ [1m[36mdata_clean_scripts[m[m/      join_instruments.ipynb
6. fires_cleaning.ipynb  [1m[36mfire[m[m/                    [1m[36mschools_data[m[m/
__init__.py              [1m[36mhealth[m[m/                  [1m[36mwind[m[m/
[1m[36maqi[m[m/                     join_data.ipynb


### Step 3: Define functions

### Step 4: Read data

``wind``

In [28]:
wind_df = pd.read_csv(wind_dir + "all_years_wind_data.csv",index_col=0).sort_values(["year_month","ZCTA10"]).reset_index(drop=True)

wind_df.head()

Unnamed: 0.1,Unnamed: 0,lat,lon,ZCTA10,u,v,wdir,wspd,year_month,year,month
0,0,37.465,-117.936,89010,0.504258,-0.719008,305.042938,0.878208,199101,1991,1
1,1,35.396,-116.322,89019,-0.172753,-0.94694,259.661102,0.962568,199101,1991,1
2,2,36.161,-116.139,89060,-0.435964,-0.812957,241.796738,0.922477,199101,1991,1
3,3,35.957,-115.897,89061,-0.560538,-1.176384,244.522552,1.303105,199101,1991,1
4,4,39.52,-120.032,89439,0.042253,0.205661,78.390099,0.209957,199101,1991,1


In [300]:
fire_df = pd.read_csv(fire_dir + "fire_zipcodes_compressed.csv",index_col=0)

#Extract year and month, filter for fires in scope 1991+
fire_df['year'] = fire_df['ALARM_DATE'].str[:4]
fire_df = fire_df[fire_df['year'].astype(int)>1990]


#Extract Month and End Year/Month
fire_df['month'] = fire_df['ALARM_DATE'].str[5:7]
fire_df['year_month'] = fire_df['year']+fire_df['month']
fire_df['month'] = fire_df['month'].astype(int)
fire_df['end_year'] = fire_df['CONT_DATE'].str[:4]
fire_df['end_month'] = fire_df['CONT_DATE'].str[5:7].astype(int)

#Calculate duration with monthly basis, add 1 as proxy for ceiling calculation ie fire at any time in month counts regardless of start date
fire_df['duration_months'] = (fire_df['end_month']-fire_df['month'])+1
fire_df = fire_df.set_index(['ZCTA','ALARM_DATE'])

# For fires that span multiple years replace the values of the duration on a yearly basis and duplciate the entry for the next year
for idx,fire in fire_df[fire_df['end_year'] != fire_df['year']].iterrows():
    fire['FIRE_NAME'] = fire['FIRE_NAME']+" CONT"
    fire['year'] = int(fire['year'])+1
    fire['month'] = "01"
    fire['year_month'] = str(fire['year']) + fire['month']
    #Hard coding duration = 1 as all fires that spanned across years happened to be put out in January
    fire['duration_months'] = 1
    
    #Replace the total durations with the duration in year, clear up negatives durations from month 2 < month 1
    fire_df.loc[idx,'duration_months'] =  (13-fire_df.loc[idx,'month'])
    
    #Append the new year's entry for the fire's continuation
    fire_df = fire_df.append(fire)

fire_df = fire_df.reset_index()

In [301]:
fire_df.shape

(6868, 18)

In [302]:
# For all fires that are longer than 1 month insert another entry with remaing duration into the dataframe
print(fire_df[fire_df['duration_months']>1].shape)
for idx,fire in fire_df[fire_df['duration_months']>1].iterrows():
    month = 2
    duration = fire['duration_months'] 
    while duration > 1:
        # print(idx, fire_df.iloc[idx]['duration_months'], duration-1)
        #fire['FIRE_NAME'] = fire_df.iloc[idx]['FIRE_NAME'] + f"_{month}"
        fire['duration_months'] -= 1
        fire['month'] += 1
        fire['year_month'] = (str(fire['year']) + str(fire['month'])) if len(str(fire['month']))>1 else (str(fire['year']) + "0" + str(fire['month'])) 
        fire_df = fire_df.append(fire)
        duration = fire['duration_months']
        month += 1

(1419, 18)


In [303]:
pd_fire = pd.DataFrame(fire_df.drop('fire_centroid',axis=1))

In [304]:
fire_df.drop_duplicates()

Unnamed: 0,ZCTA,ALARM_DATE,FIRE_NAME,CONT_DATE,CAUSE,GIS_ACRES,Shape_Length,Shape_Area,index_right,fire_centroid,DURATION,FIRE_AREA_KM2,year,month,year_month,end_year,end_month,duration_months
0,95648.0,2020-06-18,NELSON,2020-06-23,11.0,109.602280,3252.523280,4.435447e+05,405.0,POINT (-121.3480590211847 38.88804091206984),5 days,0.443546,2020,6,202006,2020,6,1
1,95747.0,2020-06-01,AMORUSO,2020-06-04,2.0,685.585022,9653.760308,2.774464e+06,1217.0,POINT (-121.3708983104108 38.82756661261951),3 days,2.774464,2020,6,202006,2020,6,1
2,95648.0,2020-03-31,FLEMING,2020-04-01,9.0,12.931545,1577.155857,5.233211e+04,405.0,POINT (-121.2734135751918 38.9623284462546),1 days,0.052332,2020,3,202003,2020,4,2
3,95966.0,2020-04-14,MELANESE,2020-04-19,18.0,10.315964,1035.787625,4.174722e+04,933.0,POINT (-121.3006534927401 39.48636412414794),5 days,0.041747,2020,4,202004,2020,4,1
4,95747.0,2020-07-05,PFE,2020-07-05,14.0,36.701931,2348.114043,1.485274e+05,1217.0,POINT (-121.3810176618852 38.73133921489409),0 days,0.148527,2020,7,202007,2020,7,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6822,95546.0,2016-08-22,TULLEY,2016-09-04,7.0,607.445740,13095.080608,2.458246e+06,277.0,POINT (-123.8140318155857 41.28541623395056),13 days,2.458246,2016,9,201609,2016,9,1
6847,96130.0,2016-07-30,DOCKERY,2016-08-17,10.0,27.963249,2655.210701,1.131633e+05,51.0,POINT (-120.6873444598079 40.42539576204376),18 days,0.113163,2016,8,201608,2016,8,1
6848,96015.0,2016-09-11,HOWARD,2016-10-17,6.0,379.622528,7898.270122,1.536278e+06,1021.0,POINT (-120.9157313359543 41.47071744410114),36 days,1.536278,2016,10,201610,2016,10,1
6849,96130.0,2016-09-11,WILLARD,2016-11-28,14.0,2573.955322,27364.478495,1.041643e+07,51.0,POINT (-120.7510806570093 40.37593783404107),78 days,10.416427,2016,10,201610,2016,11,2


In [305]:
pd_fire.drop_duplicates().set_index(['ZCTA','year_month'])

Unnamed: 0_level_0,Unnamed: 1_level_0,ALARM_DATE,FIRE_NAME,CONT_DATE,CAUSE,GIS_ACRES,Shape_Length,Shape_Area,index_right,DURATION,FIRE_AREA_KM2,year,month,end_year,end_month,duration_months
ZCTA,year_month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
95648.0,202006,2020-06-18,NELSON,2020-06-23,11.0,109.602280,3252.523280,4.435447e+05,405.0,5 days,0.443546,2020,6,2020,6,1
95747.0,202006,2020-06-01,AMORUSO,2020-06-04,2.0,685.585022,9653.760308,2.774464e+06,1217.0,3 days,2.774464,2020,6,2020,6,1
95648.0,202003,2020-03-31,FLEMING,2020-04-01,9.0,12.931545,1577.155857,5.233211e+04,405.0,1 days,0.052332,2020,3,2020,4,2
95966.0,202004,2020-04-14,MELANESE,2020-04-19,18.0,10.315964,1035.787625,4.174722e+04,933.0,5 days,0.041747,2020,4,2020,4,1
95747.0,202007,2020-07-05,PFE,2020-07-05,14.0,36.701931,2348.114043,1.485274e+05,1217.0,0 days,0.148527,2020,7,2020,7,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95546.0,201609,2016-08-22,TULLEY,2016-09-04,7.0,607.445740,13095.080608,2.458246e+06,277.0,13 days,2.458246,2016,9,2016,9,1
96130.0,201608,2016-07-30,DOCKERY,2016-08-17,10.0,27.963249,2655.210701,1.131633e+05,51.0,18 days,0.113163,2016,8,2016,8,1
96015.0,201610,2016-09-11,HOWARD,2016-10-17,6.0,379.622528,7898.270122,1.536278e+06,1021.0,36 days,1.536278,2016,10,2016,10,1
96130.0,201610,2016-09-11,WILLARD,2016-11-28,14.0,2573.955322,27364.478495,1.041643e+07,51.0,78 days,10.416427,2016,10,2016,11,2
