# Main script to Join Data on Instrument 2 Fire Location

Modules: N/A <br>
Author: Jordan Meyer <br>
Email: jordan.meyer@berkeley.edu <br>
Date created: Feb 18, 2023 <br>

**Citations (data sources)**


**Citations (persons)**
1. Cornelia Ilin 

**Preferred environment**
1. Code written in Jupyter Notebooks

In [22]:
import sys

IN_COLAB = "google.colab" in sys.modules

if IN_COLAB:
    !pip install geopandas --quiet
    from google.colab import drive

    drive.mount("/content/drive")
    in_dir = (
        in_instrument
    ) = "/content/drive/MyDrive/capstone_fire/notebooks/instrument_2/"
    import drive.MyDrive.capstone_fire.modules.deep_ols as deep_ols
else:
    in_dir = in_instrument = "../data/instrument_2/"
    %cd '..'
    import modules.deep_ols as deep_ols
    %cd 'notebooks'

/Users/jordan/Documents/GitHub/fire_capstone
/Users/jordan/Documents/GitHub/fire_capstone/notebooks


In [35]:
import calendar
import os
from datetime import date, timedelta

# geography
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
import shapely

# Moved from sklearn.neighbors to sklearn.metrics following their package change
import sklearn.metrics
from shapely.geometry import Point

# import cartopy.crs as ccrs
# import contextily as ctx
# import fiona
# import netCDF4 as ncdf
# import osmnx as ox
# from cartopy.mpl.gridliner import LATITUDE_FORMATTER, LONGITUDE_FORMATTER
# from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable


dist = sklearn.metrics.DistanceMetric.get_metric("haversine")

# ignore warnings
import warnings

from tqdm.notebook import tqdm, trange

warnings.filterwarnings("ignore")

# START HERE FOR AGGREGATION

## Aggregation of the dataframe for OLS

In [24]:
instrument_df = pd.read_csv(in_instrument + "aggregated_draft_5.csv",)
instrument_df

Unnamed: 0,ZCTA,year_month,ins_1_no_bin_raw,ins_2_add_acres_raw,ins_3_norms,ins_4_add_acres_norms,ins_5_norm_bins_acres,ins_6_bins_raw,zip_pm25,zip_elevation,elevation_difference,zip_wspd,treatment
0,89010,199101,0.014313,2.171619,2.095820,0.001013,0.011318,24.261588,12.450976,4784.0,-3523.444444,0.878208,1
1,89010,199102,0.003401,1.473353,0.499474,0.000688,0.007878,16.875655,11.255854,4784.0,-3412.000000,0.283772,0
2,89010,199103,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.130000,4784.0,,1.129854,0
3,89010,199104,0.024555,4.317860,3.633868,0.002025,0.024022,51.209927,7.899268,4784.0,-3777.333333,1.547416,1
4,89010,199105,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.004146,4784.0,,1.658761,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
548327,97635,201808,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,18.950000,7370.0,,0.775684,0
548328,97635,201809,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.200000,7370.0,,0.599688,0
548329,97635,201810,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.453125,7370.0,,0.094439,0
548330,97635,201811,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.415625,7370.0,,0.914967,0


In [25]:
demographics_df = pd.read_csv(in_instrument+'demographics.csv')

In [26]:
demographics_df['year']=demographics_df['year'].map(lambda x: str(x)[-4:])
demographics_df = demographics_df.rename(columns={'zip_code':'ZCTA'})

In [27]:
temp = instrument_df.reset_index()
instrument_df['year'] = temp['year_month'].apply(lambda x:str(x)[:4])

In [28]:
final_df = instrument_df.merge(demographics_df, on=['year','ZCTA'], how='left')
final_df.fillna(0)

Unnamed: 0,ZCTA,year_month,ins_1_no_bin_raw,ins_2_add_acres_raw,ins_3_norms,ins_4_add_acres_norms,ins_5_norm_bins_acres,ins_6_bins_raw,zip_pm25,zip_elevation,...,percent_pop_female_age_under_5,percent_pop_male_age_5to9,percent_pop_male_age_under_5,total_population,percent_bach_deg_grad_new,percent_high_school_grad_new,percent_pop_age_15to19_new,percent_pop_female_age_15to19_new,percent_pop_male_age_10to14_new,percent_pop_male_age_15to19_new
0,89010,199101,0.014313,2.171619,2.095820,0.001013,0.011318,24.261588,12.450976,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
1,89010,199102,0.003401,1.473353,0.499474,0.000688,0.007878,16.875655,11.255854,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
2,89010,199103,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.130000,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
3,89010,199104,0.024555,4.317860,3.633868,0.002025,0.024022,51.209927,7.899268,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
4,89010,199105,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.004146,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548327,97635,201808,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,18.950000,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300
548328,97635,201809,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.200000,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300
548329,97635,201810,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.453125,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300
548330,97635,201811,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.415625,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300


In [29]:
final_df.to_csv(in_instrument + 'finalish_df_6.csv')

In [36]:
final_df

Unnamed: 0,ZCTA,year_month,ins_1_no_bin_raw,ins_2_add_acres_raw,ins_3_norms,ins_4_add_acres_norms,ins_5_norm_bins_acres,ins_6_bins_raw,zip_pm25,zip_elevation,...,percent_pop_female_age_under_5,percent_pop_male_age_5to9,percent_pop_male_age_under_5,total_population,percent_bach_deg_grad_new,percent_high_school_grad_new,percent_pop_age_15to19_new,percent_pop_female_age_15to19_new,percent_pop_male_age_10to14_new,percent_pop_male_age_15to19_new
0,89010,199101,0.014313,2.171619,2.095820,0.001013,0.011318,24.261588,12.450976,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
1,89010,199102,0.003401,1.473353,0.499474,0.000688,0.007878,16.875655,11.255854,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
2,89010,199103,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.130000,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
3,89010,199104,0.024555,4.317860,3.633868,0.002025,0.024022,51.209927,7.899268,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
4,89010,199105,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.004146,4784.0,...,1.8,4.5,2.4,275.6,14.272763,56.603842,13.526364,14.514991,3.99958,8.781446
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548327,97635,201808,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,18.950000,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300
548328,97635,201809,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.200000,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300
548329,97635,201810,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.453125,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300
548330,97635,201811,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.415625,7370.0,...,9.6,3.4,15.7,138.0,8.472573,43.360501,2.356591,5.758929,12.83365,11.093300


In [38]:
final_df.iloc[:,:9].groupby(['year_month']).sum()

Unnamed: 0_level_0,ZCTA,ins_1_no_bin_raw,ins_2_add_acres_raw,ins_3_norms,ins_4_add_acres_norms,ins_5_norm_bins_acres,ins_6_bins_raw,zip_pm25
year_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
199101,152937606,26.864811,5736.485628,3943.334096,2.67910,29.325210,62791.308838,35026.407429
199102,152937606,8.043872,4326.408206,1203.372300,2.02121,22.076365,47254.620748,26172.689642
199103,152480043,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,16444.378421
199104,152937606,25.976775,2790.972714,3846.035418,1.30774,14.257673,30428.809189,20464.641418
199105,152480043,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,21994.465460
...,...,...,...,...,...,...,...,...
201808,152480043,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,27211.137765
201809,152480043,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,15027.243822
201810,152480043,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,12527.645153
201811,152480043,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,31103.239456


In [39]:
final_df.groupby('year').sum()

Unnamed: 0_level_0,ZCTA,year_month,ins_1_no_bin_raw,ins_2_add_acres_raw,ins_3_norms,ins_4_add_acres_norms,ins_5_norm_bins_acres,ins_6_bins_raw,zip_pm25,zip_elevation,...,percent_pop_female_age_under_5,percent_pop_male_age_5to9,percent_pop_male_age_under_5,total_population,percent_bach_deg_grad_new,percent_high_school_grad_new,percent_pop_age_15to19_new,percent_pop_female_age_15to19_new,percent_pop_male_age_10to14_new,percent_pop_male_age_15to19_new
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1991,1834336146,3899699934,194.002846,87024.74,28556.186083,40.5424,438.611515,941523.2,302638.946485,25174260.0,...,163006.6,146384.4,171530.4,366501425.4,398767.887787,1447693.0,183455.06273,170752.967709,138438.29626,142722.74113
1992,1833878583,3900662489,353.984096,250848.9,52285.086927,116.322391,1267.009947,2732526.0,258377.326868,25174260.0,...,161328.9,145671.6,169620.6,368905308.9,404755.851021,1450388.0,180780.773864,168553.584276,138189.87741,140621.109878
1993,1833421020,3901624124,428.008405,1174820.0,63215.354664,549.786632,5974.092763,12765780.0,250860.892488,25174260.0,...,159837.2,144931.2,167682.0,371309175.6,410784.130331,1453095.0,178297.403636,166334.919814,137957.41688,138480.041115
1994,1833330274,3903382263,729.402317,2141063.0,107863.707449,1005.497417,10903.48483,23218170.0,239303.258439,25174185.0,...,158281.1,144207.6,165778.7,373692906.9,416819.741191,1455827.0,175599.565,164217.888737,137719.17196,136534.198025
1995,1833421020,3905539324,716.222826,823072.3,115858.98621,391.496691,4261.259234,8962164.0,230092.185989,25174260.0,...,156719.6,143504.4,163879.2,376123345.6,422893.025745,1458773.0,173147.669089,162111.491061,137503.74464,134439.192355
1996,1833878583,3908494889,1047.523175,3059471.0,154712.682438,1432.391173,15583.628506,33286880.0,214400.519046,25174260.0,...,155165.7,142857.6,161982.6,378533735.1,428989.996485,1461699.0,170684.895,160081.43148,137264.29485,132488.959542
1997,1833421020,3909454444,724.85406,1265302.0,107265.978615,593.423177,6415.997634,13680340.0,213317.821198,25174260.0,...,153618.8,142083.6,160075.2,380937532.8,435020.938177,1464407.0,168012.201821,157873.9111,137023.85516,130383.65652
1998,1832505894,3909414004,331.880346,90474.01,49087.12333,42.163275,460.95161,989083.1,207947.838626,25174260.0,...,151979.4,141382.8,158131.2,383337940.8,440967.519397,1466950.0,165295.748181,155625.066872,136756.13334,128170.084337
1999,1835251272,3917367774,888.928173,4316271.0,131167.678559,2011.065668,21702.546468,46580800.0,230696.377261,25174260.0,...,150517.2,140740.8,156325.2,385765184.4,447344.805885,1470688.0,163316.847275,153974.271284,136563.04368,126650.500329
2000,1834336146,3917327304,554.329262,1603397.0,82057.536044,749.525488,8314.148296,17785450.0,206277.942497,25174260.0,...,148936.6,139978.8,154352.4,388165598.2,453300.515276,1473213.0,160607.151819,151671.121788,136336.35754,124432.57642
