# Main script to Join Data on Instrument 2 Fire Location

Modules: N/A <br>
Author: Jordan Meyer <br>
Email: jordan.meyer@berkeley.edu <br>
Date created: Feb 18, 2023 <br>

**Citations (data sources)**


**Citations (persons)**
1. Cornelia Ilin 

**Preferred environment**
1. Code written in Jupyter Notebooks

In [1]:
import sys

IN_COLAB = "google.colab" in sys.modules

if IN_COLAB:
    !pip install geopandas --quiet
    from google.colab import drive

    drive.mount("/content/drive")
    in_dir = (
        in_instrument
    ) = "/content/drive/MyDrive/capstone_fire/notebooks/instrument_2/"
    import drive.MyDrive.capstone_fire.modules.deep_ols as deep_ols
else:
    in_dir = in_instrument = "../data/instrument_2/"
    %cd '..'
    import modules.deep_ols as deep_ols
    %cd 'notebooks'

/Users/jordan/Documents/GitHub/fire_capstone
/Users/jordan/Documents/GitHub/fire_capstone/notebooks


In [2]:
import calendar
import os
from datetime import date, timedelta

# geography
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
import shapely

# Moved from sklearn.neighbors to sklearn.metrics following their package change
import sklearn.metrics
from shapely.geometry import Point

# import cartopy.crs as ccrs
# import contextily as ctx
# import fiona
# import netCDF4 as ncdf
# import osmnx as ox
# from cartopy.mpl.gridliner import LATITUDE_FORMATTER, LONGITUDE_FORMATTER
# from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable


dist = sklearn.metrics.DistanceMetric.get_metric("haversine")

# ignore warnings
import warnings

from tqdm.notebook import tqdm, trange

warnings.filterwarnings("ignore")

# START HERE FOR AGGREGATION

## Aggregation of the dataframe for OLS

In [3]:
instrument_df = pd.read_csv(in_instrument + "aggregated_draft_3.csv",)
instrument_df

Unnamed: 0,ZCTA,year_month,ins_1_no_bin_raw,ins_2_add_acres_raw,ins_3_norms,ins_4_add_acres_norms,ins_5_norm_bins_acres,ins_6_bins_raw,zip_pm25,zip_elevation,elevation_difference,zip_wspd
0,89010,199101,0.014313,2.171619,2.193307,0.002237,0.024999,24.261588,12.450976,4784.0,-3523.444444,0.878208
1,89010,199102,0.003401,1.473353,0.522705,0.001519,0.017402,16.875655,11.255854,4784.0,-3412.000000,0.283772
2,89010,199103,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.130000,4784.0,,1.129854
3,89010,199104,0.024555,4.317860,3.803710,0.004475,0.053075,51.209927,7.899268,4784.0,-3777.333333,1.547416
4,89010,199105,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.004146,4784.0,,1.658761
...,...,...,...,...,...,...,...,...,...,...,...,...
547646,97635,201808,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,18.950000,7370.0,,0.775684
547647,97635,201809,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.200000,7370.0,,0.599688
547648,97635,201810,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.453125,7370.0,,0.094439
547649,97635,201811,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.415625,7370.0,,0.914967


In [4]:
demographics_df = pd.read_csv(in_instrument+'demographics.csv')

In [5]:
demographics_df['year']=demographics_df['year'].map(lambda x: str(x)[-4:])
demographics_df = demographics_df.rename(columns={'zip_code':'ZCTA'})

In [6]:
temp = instrument_df.reset_index()
instrument_df['year'] = temp['year_month'].apply(lambda x:str(x)[:4])

In [17]:
final_df = instrument_df.merge(demographics_df, on=['year','ZCTA'], how='left')
final_df.fillna(0)

Unnamed: 0,ZCTA,year_month,ins_1_no_bin_raw,ins_2_add_acres_raw,ins_3_norms,ins_4_add_acres_norms,ins_5_norm_bins_acres,ins_6_bins_raw,zip_pm25,zip_elevation,...,percent_pop_female_age_under_5,percent_pop_male_age_5to9,percent_pop_male_age_under_5,total_population,percent_bach_deg_grad_new,percent_high_school_grad_new,percent_pop_age_15to19_new,percent_pop_female_age_15to19_new,percent_pop_male_age_10to14_new,percent_pop_male_age_15to19_new
0,89010,199101,0.014313,2.171619,2.193307,0.002237,0.024999,24.261588,12.450976,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
1,89010,199102,0.003401,1.473353,0.522705,0.001519,0.017402,16.875655,11.255854,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
2,89010,199103,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.130000,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
3,89010,199104,0.024555,4.317860,3.803710,0.004475,0.053075,51.209927,7.899268,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
4,89010,199105,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.004146,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547646,97635,201808,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,18.950000,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300
547647,97635,201809,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.200000,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300
547648,97635,201810,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.453125,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300
547649,97635,201811,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.415625,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300


In [18]:
final_df.to_csv(in_instrument + 'finalish_df_4.csv')

In [16]:
instrument_df[instrument_df['year'].astype(int)].isna().sum()

ZCTA                         0
year_month                   0
ins_1_no_bin_raw             0
ins_2_add_acres_raw          0
ins_3_norms                  0
ins_4_add_acres_norms        0
ins_5_norm_bins_acres        0
ins_6_bins_raw               0
zip_pm25                   667
zip_elevation              235
elevation_difference     40806
zip_wspd                     0
year                         0
dtype: int64

In [9]:
sorted(final_df.year.unique())

['1991',
 '1992',
 '1993',
 '1994',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017',
 '2018']