# Main script to Join Data on Instrument 2 Fire Location

Modules: N/A <br>
Author: Jordan Meyer <br>
Email: jordan.meyer@berkeley.edu <br>
Date created: Feb 18, 2023 <br>

**Citations (data sources)**


**Citations (persons)**
1. Cornelia Ilin 

**Preferred environment**
1. Code written in Jupyter Notebooks

### Step 1: Import packages

In [113]:
# !pip install geopandas --quiet

In [114]:
import calendar
import os
from datetime import date, timedelta

# geography
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
import shapely

# Moved from sklearn.neighbors to sklearn.metrics following their package change
import sklearn.metrics
from shapely.geometry import Point

# import cartopy.crs as ccrs
# import contextily as ctx
# import fiona
# import netCDF4 as ncdf
# import osmnx as ox
# from cartopy.mpl.gridliner import LATITUDE_FORMATTER, LONGITUDE_FORMATTER
# from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable


dist = sklearn.metrics.DistanceMetric.get_metric("haversine")

# ignore warnings
import warnings

from tqdm.notebook import tqdm, trange

warnings.filterwarnings("ignore")

### Step 2: Define working directories

In [115]:
# from google.colab import drive
# drive.mount('/content/drive')
# in_dir = '/content/drive/MyDrive/capstone_fire/data/'
# in_instrument = '/content/drive/MyDrive/capstone_fire/notebooks/instrument_2/'


# # Local directories on my machine (not gdrive)
in_dir = in_instrument = "../data/instrument_2/"

### Step 3: Define functions

### Step 4: Read data

## Wind

Wind load from other file

In [116]:
wind_df = (
    (
        pd.read_csv(in_dir + "all_years_wind_data.csv", index_col=0)
        .sort_values(["year_month", "ZCTA10"])
        .reset_index(drop=True)
    )
    .drop("Unnamed: 0", axis=1)
    .rename(columns={"ZCTA10": "ZCTA"})
    .reset_index(drop=True)
    .drop_duplicates()
)


wind_df["year_month"] = wind_df["year_month"].astype(str)
wind_df["ZCTA"] = wind_df["ZCTA"].astype(str)


wind_df[wind_df.ZCTA == "97635"].lat = 41.993
# wind_ready.loc['97635','ZCTA' == '97635']#['lon'] = -120.250
# wind_ready = wind_ready.drop_duplicates()
ind_fix = wind_df[wind_df.ZCTA == "97635"].index
for ind in ind_fix:
    wind_df.iloc[ind, 0] = 41.993
    wind_df.iloc[ind, 1] = -120.250

wind_dates = wind_df["year_month"].unique()
wind_zips = wind_df["ZCTA"].unique()
wind_ready = wind_df
wind_ready

Unnamed: 0,lat,lon,ZCTA,u,v,wdir,wspd,year_month,year,month
0,37.465,-117.936,89010,0.504258,-0.719008,305.042938,0.878208,199101,1991,1
1,35.396,-116.322,89019,-0.172753,-0.946940,259.661102,0.962568,199101,1991,1
2,36.161,-116.139,89060,-0.435964,-0.812957,241.796738,0.922477,199101,1991,1
3,35.957,-115.897,89061,-0.560538,-1.176384,244.522552,1.303105,199101,1991,1
4,39.520,-120.032,89439,0.042253,0.205661,78.390099,0.209957,199101,1991,1
...,...,...,...,...,...,...,...,...,...,...
627067,39.149,-120.248,96146,0.636599,0.953782,56.279018,1.146716,202212,2022,12
627068,39.236,-120.062,96148,0.636599,0.953782,56.279018,1.146716,202212,2022,12
627069,38.732,-120.033,96150,0.071517,0.472164,81.387100,0.477549,202212,2022,12
627070,39.184,-120.427,96161,-0.217010,0.239125,132.224274,0.322915,202212,2022,12


## Fire

Finsh cleaning some fire things and then join fire to wind 

In [117]:
fire_df = pd.read_csv(in_instrument + "fire_wind_processed.csv", index_col=0)

# Extract year and month, filter for fires in scope 1991+
# fire_df["year"] = fire_df["ALARM_DATE"].str[:4]
fire_df = fire_df[fire_df["year"].astype(int) > 1990]
fire_df = fire_df[fire_df["year"].astype(int) < 2020]

# Extract Month and End Year/Month
fire_df["month"] = fire_df["month"].astype(int)
fire_df["end_year"] = fire_df["CONT_DATE"].str[:4]
fire_df["end_month"] = fire_df["CONT_DATE"].str[5:7].astype(int)

# Give each fire a unique index code for later
fire_df = (
    fire_df.drop_duplicates().reset_index().rename(columns={"Index": "fire_index"})
)

fire_df["geometry"] = gpd.GeoSeries.from_wkt(fire_df["fire_centroid"])
fire_df["lon"] = fire_df["geometry"].apply(lambda p: p.x)
fire_df["lat"] = fire_df["geometry"].apply(lambda p: p.y)

fire_df["sq_mi"] = fire_df["GIS_ACRES"] / 640
fire_df["radius_mi"] = np.sqrt(fire_df["sq_mi"] / np.pi)

# Give each fire a unique index code for later
fire_df = (
    fire_df.drop_duplicates()
    .reset_index(drop=True)
    .reset_index()
    .rename(columns={"index": "fire_index"})
)

fire_dates = fire_df["year_month"].unique()
fire_indexes = fire_df.index

fire_df.year = fire_df.year.astype(int)
fire_df.month = fire_df.month.astype(int)
fire_df["max_days_in_mo"] = fire_df[["year", "month"]].apply(
    lambda x: calendar.monthrange(*x)[1], axis=1
)

fire_df["sq_mi"] = fire_df["GIS_ACRES"] / 640
fire_df["radius_mi"] = np.sqrt(fire_df["sq_mi"] / np.pi)

fire_df["start_month"] = fire_df["ALARM_DATE"].apply(lambda x: int(x[5:7]))
fire_df["start_day"] = fire_df["ALARM_DATE"].apply(lambda x: int(x[8:10]))

fire_df["end_month"] = fire_df["CONT_DATE"].apply(lambda x: int(x[5:7]))
fire_df["end_day"] = fire_df["CONT_DATE"].apply(lambda x: int(x[8:10]))


fire_df["start_mid_end"] = fire_df[["month", "start_month", "end_month"]].apply(
    lambda x: (
        "complete"
        if x[1] == x[2]
        else ("start" if x[0] == x[1] else ("end" if x[0] == x[2] else "mid"))
    ),
    axis=1,
)

fire_df["fire_days_in_mo"] = fire_df[
    [
        "start_month",
        "end_month",
        "max_days_in_mo",
        "start_day",
        "end_day",
        "start_mid_end",
    ]
].apply(
    lambda x: (
        1
        if (x["end_day"] - x["start_day"] == 0)
        else (
            x["end_day"] - x["start_day"]
            if (x["start_mid_end"] == "complete")
            else (
                x["max_days_in_mo"] - x["start_day"]
                if (x["start_mid_end"] == "start")
                else (
                    x["end_day"]
                    if x["start_mid_end"] == "end"
                    else (x["max_days_in_mo"])
                )
            )
        )
    )
    / x["max_days_in_mo"],
    axis=1,
)

fire_backup = fire_df

fire_ready = fire_df[
    [
        "fire_lat_lon",
        "GIS_ACRES",
        "lat",
        "lon",
        "fire_days_in_mo",
        "fire_wdir",
        "fire_wspd",
        "year_month",
        "year",
        "month",
    ]
]
fire_ready = fire_ready.reset_index(drop=True).rename(
    columns={"GIS_ACRES": "acres", "lat": "fire_lat", "lon": "fire_lon"}
)

fire_ready

Unnamed: 0,fire_lat_lon,acres,fire_lat,fire_lon,fire_days_in_mo,fire_wdir,fire_wspd,year_month,year,month
0,"(35.25539508507167, -119.5812970773511)",238.031830,35.255395,-119.581297,0.032258,342.554565,1.595909,199305,1993,5
1,"(39.45165114943642, -120.0093608630314)",19.139362,39.451651,-120.009361,0.032258,0.700786,1.000503,199307,1993,7
2,"(38.63102591471577, -120.8402934705149)",70.179840,38.631026,-120.840293,0.033333,58.361294,0.271346,199309,1993,9
3,"(35.42095813876129, -119.7656494555765)",23.423927,35.420958,-119.765649,0.033333,331.157288,1.023493,199309,1993,9
4,"(35.32190356232154, -118.4169237147862)",151.780167,35.321904,-118.416924,0.032258,7.345666,1.831764,199308,1993,8
...,...,...,...,...,...,...,...,...,...,...
6632,"(37.87494989081461, -119.7281350256987)",331.624695,37.874950,-119.728135,0.166667,73.448456,0.113857,200709,2007,9
6633,"(37.56860595616687, -119.6600328204804)",23.633701,37.568606,-119.660033,0.233333,158.267761,0.253292,200711,2007,11
6634,"(37.74854288820926, -119.7035432531625)",239.260773,37.748543,-119.703543,1.000000,215.334213,0.138538,200711,2007,11
6635,"(37.74854288820926, -119.7035432531625)",239.260773,37.748543,-119.703543,0.258065,178.254211,0.474964,200712,2007,12


## PM2.5

Load PM2.5 from lawis files downloaded from gdrive

In [118]:
aqi1 = pd.read_csv(in_instrument + "jordan_firepm25.csv", index_col=0)
aqi2 = pd.read_csv(in_instrument + "finalpm25.csv", index_col=0)
aqi2

Unnamed: 0,year_month,ZIP10,pm25
204,2017-01,89010,1.842857
205,2017-02,89010,3.528571
206,2017-03,89010,3.242857
207,2017-04,89010,3.700000
208,2017-05,89010,5.242857
...,...,...,...
585307,2016-12,96148,1.713333
585308,2016-12,96150,1.719288
585309,2016-12,96155,0.771287
585310,2016-12,96161,1.416996


In [119]:
pm25_df = aqi2.reset_index(drop=True).rename(
    columns={"ZIP10": "ZCTA", "year_month": "old_ym"}
)
pm25_df["year_month"] = pm25_df["old_ym"].str[:4] + pm25_df["old_ym"].str[5:]
pm25_df["ZCTA"] = pm25_df["ZCTA"].astype(str)
pm25_ready = pm25_df.drop("old_ym", axis=1)

In [120]:
wfp_df = wind_ready.merge(pm25_ready, on=["year_month", "ZCTA"], how="left")
wind_final = wfp_df.rename(
    columns={
        "lat": "zip_lat",
        "lon": "zip_lon",
        "wdir": "zip_wdir",
        "wspd": "zip_wspd",
        "pm25": "zip_pm25",
    }
).drop(["u", "v"], axis=1)[wfp_df["year"] < 2019]
wind_final.to_csv(in_instrument + "wind_zip_pm25.csv")
wind_final

Unnamed: 0,zip_lat,zip_lon,ZCTA,zip_wdir,zip_wspd,year_month,year,month,zip_pm25
0,37.465,-117.936,89010,305.042938,0.878208,199101,1991,1,12.450976
1,35.396,-116.322,89019,259.661102,0.962568,199101,1991,1,10.846541
2,36.161,-116.139,89060,241.796738,0.922477,199101,1991,1,12.385000
3,35.957,-115.897,89061,244.522552,1.303105,199101,1991,1,10.150000
4,39.520,-120.032,89439,78.390099,0.209957,199101,1991,1,8.389565
...,...,...,...,...,...,...,...,...,...
548683,39.149,-120.248,96146,76.585587,0.437017,201812,2018,12,4.626316
548684,39.236,-120.062,96148,76.585587,0.437017,201812,2018,12,4.528572
548685,38.732,-120.033,96150,107.369316,0.231856,201812,2018,12,4.286667
548686,39.184,-120.427,96161,151.071350,0.243283,201812,2018,12,5.053509


In [121]:
fire_pm25 = aqi1.rename(
    columns={"lat": "fire_lat", "lon": "fire_lon", "pm25": "fire_pm25"}
).reset_index(drop=True)
fire_pm25.to_csv("fire_pm25.csv")
fire_pm25

Unnamed: 0,year,month,fire_lat,fire_lon,fire_pm25
0,1991,1,39.155351,-122.775965,0.10
1,1991,1,39.567560,-121.002336,11.24
2,1991,1,39.084997,-122.744482,1.74
3,1991,1,39.756806,-121.331523,10.59
4,1991,1,39.694403,-121.355888,15.02
...,...,...,...,...,...
8267,2016,11,40.375938,-120.751081,3.61
8268,2016,12,34.047386,-118.948054,4.52
8269,2016,12,34.234554,-118.800483,5.81
8270,2016,12,34.122593,-118.722263,6.09


In [122]:
fire_final = (
    fire_ready.merge(
        fire_pm25, on=["fire_lat", "fire_lon", "year", "month"], how="left"
    )
    .drop_duplicates()
    .reset_index(drop=True)
)
fire_final

Unnamed: 0,fire_lat_lon,acres,fire_lat,fire_lon,fire_days_in_mo,fire_wdir,fire_wspd,year_month,year,month,fire_pm25
0,"(35.25539508507167, -119.5812970773511)",238.031830,35.255395,-119.581297,0.032258,342.554565,1.595909,199305,1993,5,9.38
1,"(39.45165114943642, -120.0093608630314)",19.139362,39.451651,-120.009361,0.032258,0.700786,1.000503,199307,1993,7,5.53
2,"(38.63102591471577, -120.8402934705149)",70.179840,38.631026,-120.840293,0.033333,58.361294,0.271346,199309,1993,9,14.24
3,"(35.42095813876129, -119.7656494555765)",23.423927,35.420958,-119.765649,0.033333,331.157288,1.023493,199309,1993,9,11.99
4,"(35.32190356232154, -118.4169237147862)",151.780167,35.321904,-118.416924,0.032258,7.345666,1.831764,199308,1993,8,9.19
...,...,...,...,...,...,...,...,...,...,...,...
6632,"(37.87494989081461, -119.7281350256987)",331.624695,37.874950,-119.728135,0.166667,73.448456,0.113857,200709,2007,9,4.76
6633,"(37.56860595616687, -119.6600328204804)",23.633701,37.568606,-119.660033,0.233333,158.267761,0.253292,200711,2007,11,4.38
6634,"(37.74854288820926, -119.7035432531625)",239.260773,37.748543,-119.703543,1.000000,215.334213,0.138538,200711,2007,11,1.70
6635,"(37.74854288820926, -119.7035432531625)",239.260773,37.748543,-119.703543,0.258065,178.254211,0.474964,200712,2007,12,3.80


In [123]:
def distance(wind, fire):
    """
    params:
    -------
    wind: wind cleaned data
    fire: fire semi-cleaned data

    return:
    -------
    df3: dataframe with distances from each fire during time period
    """
    # create labels
    wind["wind_lat_lon"] = [str(xy) for xy in zip(wind.zip_lat, wind.zip_lon)]
    fire["fire_lat_lon"] = [str(xy) for xy in zip(fire.fire_lat, fire.fire_lon)]

    ## for each point in wind data find the nearest point in the census data ##
    ###############
    # keep only unique points in fire data
    df1_unique = wind.drop_duplicates(["wind_lat_lon"])
    df2_unique = fire.drop_duplicates(["fire_lat_lon"])

    df1_unique.reset_index(drop=True, inplace=True)
    df2_unique.reset_index(drop=True, inplace=True)

    # transform to radians
    df1_unique["lat_r"] = np.radians(df1_unique.zip_lat)
    df1_unique["lon_r"] = np.radians(df1_unique.zip_lon)
    df2_unique["lat_r"] = np.radians(df2_unique.fire_lat)
    df2_unique["lon_r"] = np.radians(df2_unique.fire_lon)

    # compute pairwise distance (in miles)
    dist_matrix = (
        dist.pairwise(
            df2_unique[["lat_r", "lon_r"]],
            df1_unique[["lat_r", "lon_r"]],
        )
    ) * 3959

    # create a df from dist_matrix
    dist_matrix = pd.DataFrame(
        dist_matrix, index=df2_unique["fire_lat_lon"], columns=df1_unique["ZCTA"]
    )
    return dist_matrix

In [None]:
temp_df = distance(wind_final, fire_final)
dist_df = (
    fire_final.merge(
        temp_df, left_on="fire_lat_lon", right_on=temp_df.index, how="left"
    )
    .drop_duplicates()
    .reset_index(drop=True)
)
dist_df

Unnamed: 0,fire_lat_lon,acres,fire_lat,fire_lon,fire_days_in_mo,fire_wdir,fire_wspd,year_month,year,month,...,96140,96141,96142,96143,96145,96146,96148,96150,96161,97635
0,"(35.25539508507167, -119.5812970773511)",238.031830,35.255395,-119.581297,0.032258,342.554565,1.595909,199305,1993,5,...,273.902165,262.986709,255.763437,275.094422,265.225259,271.526768,276.316781,241.513929,275.412483,466.944962
1,"(39.45165114943642, -120.0093608630314)",19.139362,39.451651,-120.009361,0.032258,0.700786,1.000503,199307,1993,7,...,18.155774,30.531542,36.890962,15.868377,29.049318,24.497956,15.164144,49.742314,28.990727,176.052420
2,"(38.63102591471577, -120.8402934705149)",70.179840,38.631026,-120.840293,0.033333,58.361294,0.271346,199309,1993,9,...,55.923517,44.852443,42.502675,60.493373,45.091172,47.913347,59.138386,44.100498,44.201396,234.375215
3,"(35.42095813876129, -119.7656494555765)",23.423927,35.420958,-119.765649,0.033333,331.157288,1.023493,199309,1993,9,...,261.622226,250.503922,243.318349,262.985454,252.701931,258.958683,264.112075,229.258884,262.543176,454.860185
4,"(35.32190356232154, -118.4169237147862)",151.780167,35.321904,-118.416924,0.032258,7.345666,1.831764,199308,1993,8,...,283.319934,274.116537,266.888233,283.351792,276.555557,282.960181,285.162251,251.917314,288.832200,471.414260
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6632,"(37.87494989081461, -119.7281350256987)",331.624695,37.874950,-119.728135,0.166667,73.448456,0.113857,200709,2007,9,...,93.579473,83.601955,76.330517,94.315845,85.999136,92.411551,95.759703,61.483932,98.022876,285.885768
6633,"(37.56860595616687, -119.6600328204804)",23.633701,37.568606,-119.660033,0.233333,158.267761,0.253292,200711,2007,11,...,115.058673,104.965884,97.687268,115.806581,107.344223,113.752581,117.250544,82.902744,119.099573,307.313468
6634,"(37.74854288820926, -119.7035432531625)",239.260773,37.748543,-119.703543,1.000000,215.334213,0.138538,200711,2007,11,...,102.401609,92.355699,85.079677,103.152564,94.742784,101.153523,104.591706,70.267223,106.628801,294.707998
6635,"(37.74854288820926, -119.7035432531625)",239.260773,37.748543,-119.703543,0.258065,178.254211,0.474964,200712,2007,12,...,102.401609,92.355699,85.079677,103.152564,94.742784,101.153523,104.591706,70.267223,106.628801,294.707998


In [None]:
def fire_bearings(wind, fire):
    """
    params:
    -------
    wind: wind cleaned data
    fire: fire semi-cleaned data

    return:
    -------
    df3: dataframe with distances from each fire during time period
    """

    ## for each point in wind data find the angle##
    ###############
    # keep only unique points in fire data
    df1_unique = wind.drop_duplicates(["wind_lat_lon"])
    df2_unique = fire.drop_duplicates(["fire_lat_lon"])

    df1_unique.reset_index(drop=True, inplace=True)
    df2_unique.reset_index(drop=True, inplace=True)

    # transform to radians
    df1_unique["lat_r"] = np.radians(df1_unique.zip_lat)
    df1_unique["lon_r"] = np.radians(df1_unique.zip_lon)
    df2_unique["lat_r"] = np.radians(df2_unique.fire_lat)
    df2_unique["lon_r"] = np.radians(df2_unique.fire_lon)

    # compute pairwise distance (in miles)
    bearing_matrix = np.zeros((df1_unique.shape[0], df2_unique.shape[0]))
    for idx in range(df1_unique.shape[0]):
        pa_lat = df1_unique["zip_lat"][idx]
        pa_lon = df1_unique["zip_lon"][idx]
        wdir_ba = df2_unique["fire_wdir"][idx]
        for idy in range(df2_unique.shape[0]):
            pb_lat = df2_unique["fire_lat"][idy]
            pb_lon = df2_unique["fire_lon"][idy]

            if pb_lon > pa_lon:
                d_X = -np.cos(pa_lat) * np.sin(pa_lon - pb_lon)
            else:
                d_X = np.cos(pa_lat) * np.sin(pa_lon - pb_lon)

            if pb_lat > pa_lat:
                d_Y = -np.cos(pb_lat) * np.sin(pa_lat) - np.sin(pb_lat) * np.cos(
                    pa_lat
                ) * np.cos(pa_lon - pb_lon)
            else:
                d_Y = np.cos(pb_lat) * np.sin(pa_lat) - np.sin(pb_lat) * np.cos(
                    pa_lat
                ) * np.cos(pa_lon - pb_lon)

            B_ba = np.arctan2(d_X, d_Y) * (180 / np.pi) % 360

            angle_diff = np.max([B_ba, wdir_ba]) - np.min([B_ba, wdir_ba])

            if angle_diff > 180:
                upwind_effect = (angle_diff) / 360
            else:
                upwind_effect = 1 - (angle_diff) / 360

            bearing_matrix[idx, idy] = upwind_effect

    # create a df from dist_matrix
    bearing_matrix = pd.DataFrame(
        bearing_matrix, index=df1_unique["ZCTA"], columns=df2_unique["fire_lat_lon"]
    )

    return bearing_matrix

In [None]:
temp_df = fire_bearings(wind_final, fire_final)

In [None]:
temp_df = temp_df.fillna(0)
merge_temp_df = temp_df.T
bearings = fire_ready.merge(
    merge_temp_df, left_on="fire_lat_lon", right_on=merge_temp_df.index, how="left"
)
bearings_fire = bearings

In [None]:
bearings_fire

In [None]:
treatment_df = (bearings_fire.drop_duplicates().iloc[:, 11:] >= 0.75)[
    dist_df.drop_duplicates().iloc[:, 11:] < 200
].fillna(False)

treatment_df["fire_lat_lon"] = bearings.drop_duplicates()["fire_lat_lon"].astype(str)
treatment_df["year_month"] = bearings_fire.drop_duplicates()["year_month"]
treatment_df["fire_wdir"] = bearings_fire.drop_duplicates()["fire_wdir"]
treatment_df["fire_wspd"] = bearings_fire.drop_duplicates()["fire_wspd"]

treatment_df["fire_days_in_mo"] = bearings_fire.drop_duplicates()["fire_days_in_mo"]
treatment_df = treatment_df.reset_index(drop=True)

In [21]:
fire_bear = bearings_fire[
    (bearings_fire.drop_duplicates().iloc[:, 11:] >= 0.75)[
        dist_df.drop_duplicates().iloc[:, 11:] < 200
    ]
].fillna(0)

fire_bear["fire_lat_lon"] = bearings.drop_duplicates()["fire_lat_lon"].astype(str)
fire_bear["year_month"] = bearings_fire.drop_duplicates()["year_month"]
fire_bear["fire_wdir"] = bearings_fire.drop_duplicates()["fire_wdir"]
fire_bear["fire_wspd"] = bearings_fire.drop_duplicates()["fire_wspd"]

fire_bear["fire_days_in_mo"] = bearings_fire.drop_duplicates()["fire_days_in_mo"]
fire_bear = fire_bear.reset_index(drop=True)

fire_bear.to_csv("fire_bear.csv")

In [22]:
fire_dist = dist_df[
    (bearings_fire.drop_duplicates().iloc[:, 11:] >= 0.75)[
        dist_df.drop_duplicates().iloc[:, 11:] < 200
    ]
].fillna(0)

fire_dist["fire_lat_lon"] = bearings.drop_duplicates()["fire_lat_lon"].astype(str)
fire_dist["year_month"] = bearings_fire.drop_duplicates()["year_month"]
fire_dist["fire_wdir"] = bearings_fire.drop_duplicates()["fire_wdir"]
fire_dist["fire_wspd"] = bearings_fire.drop_duplicates()["fire_wspd"]

fire_dist["fire_days_in_mo"] = bearings_fire.drop_duplicates()["fire_days_in_mo"]
fire_dist = fire_dist.reset_index(drop=True)

fire_dist.to_csv("fire_dist.csv")

In [23]:
days_mo = fire_dist["fire_days_in_mo"][2]
wspd = fire_dist["fire_wspd"][2]
distance = fire_dist["96142"][2]
bearing = fire_bear["96142"][2]

instrument = np.sum(days_mo * wspd * distance * bearing)
instrument

0.3746602309176257

In [77]:
import multiprocessing

from tqdm import tqdm

iters = int(np.ceil(fire_dist.shape[0] / 100))
instrument_df = pd.DataFrame(
    columns=[
        "ZCTA",
        "fire_lat_lon",
        "instrument",
        "instrument_norm",
        "year_month",
        "bearing",
        "distance",
        "fire_wspd",
    ]
)

In [79]:
def get_instrument(distance_df, bearing_df, instrument_df):
    bear_max = 1
    bear_min = 0.75
    dist_max = 199.9998830889509
    dist_min = 0
    wspd_max = 5.554734706878662
    wspd_min = 0.0069808503612875

    for ym in tqdm(distance_df.index):
        days_mo = distance_df["fire_days_in_mo"][ym]
        wspd = distance_df["fire_wspd"][ym]
        wspd_norm = (wspd - wspd_min) / (wspd_max - wspd_min)
        for zcta in distance_df.columns[11:]:
            if distance_df.loc[ym, zcta] == 0:
                pass
            else:
                distance = distance_df[zcta][ym]
                dist_norm = (distance - dist_min) / (dist_max - dist_min)
                bearing = bearing_df[zcta][ym]
                bear_norm = (bearing - bear_min) / (bear_max - bear_min)

                instrument = days_mo * wspd * bearing / distance
                instrument_norm = days_mo * wspd_norm * bear_norm / dist_norm

                instrument_df = instrument_df.append(
                    {
                        "ZCTA": zcta,
                        "fire_lat_lon": distance_df["fire_lat_lon"][ym],
                        "instrument": instrument,
                        "instrument_norm": instrument_norm,
                        "year_month": distance_df["year_month"][ym],
                        "bearing": bearing,
                        "distance": distance,
                        "fire_wspd": wspd,
                    },
                    ignore_index=True,
                )
    return instrument_df

In [None]:
# import multiprocessing as mp

# # Step 1: Use multiprocessing.Pool() and specify number of cores to use (here I use 4).
# pool = mp.Pool(4)

# # Step 2: Use pool.starmap which takes a multiple iterable arguments
# results = pool.starmap([get_instrument(fire_dist[p[0]:p[1]],fire_bear[p[0]:p[1]],instrument_df) for p in params])

# # Step 3: Don't forget to close
# pool.close()

In [80]:
iters = int(np.ceil(fire_dist.shape[0] / 100))

for i in range(iters):
    print(f"iteration {i}/{iters}")

    instrument_df = pd.DataFrame(
        columns=[
            "ZCTA",
            "fire_lat_lon",
            "instrument",
            "instrument_norm",
            "year_month",
            "bearing",
            "distance",
            "fire_wspd",
        ]
    )

    instrument_df = get_instrument(
        fire_dist[i * 100 : i * 100 + 100],
        fire_bear[i * 100 : i * 100 + 100],
        instrument_df,
    )
    instrument_df.to_csv(in_instrument + f"instrument_iter_{i}.csv")

iteration 0/67


100%|█████████████████████████████████████████| 100/100 [01:10<00:00,  1.42it/s]


iteration 1/67


100%|█████████████████████████████████████████| 100/100 [01:05<00:00,  1.52it/s]


iteration 2/67


100%|█████████████████████████████████████████| 100/100 [01:22<00:00,  1.21it/s]


iteration 3/67


100%|█████████████████████████████████████████| 100/100 [01:13<00:00,  1.36it/s]


iteration 4/67


100%|█████████████████████████████████████████| 100/100 [01:27<00:00,  1.14it/s]


iteration 5/67


100%|█████████████████████████████████████████| 100/100 [01:26<00:00,  1.16it/s]


iteration 6/67


100%|█████████████████████████████████████████| 100/100 [01:00<00:00,  1.65it/s]


iteration 7/67


100%|█████████████████████████████████████████| 100/100 [01:18<00:00,  1.27it/s]


iteration 8/67


100%|█████████████████████████████████████████| 100/100 [01:11<00:00,  1.41it/s]


iteration 9/67


100%|█████████████████████████████████████████| 100/100 [01:09<00:00,  1.43it/s]


iteration 10/67


100%|█████████████████████████████████████████| 100/100 [01:05<00:00,  1.52it/s]


iteration 11/67


100%|█████████████████████████████████████████| 100/100 [01:11<00:00,  1.41it/s]


iteration 12/67


100%|█████████████████████████████████████████| 100/100 [01:21<00:00,  1.23it/s]


iteration 13/67


100%|█████████████████████████████████████████| 100/100 [01:16<00:00,  1.31it/s]


iteration 14/67


100%|█████████████████████████████████████████| 100/100 [01:10<00:00,  1.41it/s]


iteration 15/67


100%|█████████████████████████████████████████| 100/100 [01:05<00:00,  1.52it/s]


iteration 16/67


100%|█████████████████████████████████████████| 100/100 [01:09<00:00,  1.44it/s]


iteration 17/67


100%|█████████████████████████████████████████| 100/100 [01:17<00:00,  1.28it/s]


iteration 18/67


100%|█████████████████████████████████████████| 100/100 [01:15<00:00,  1.32it/s]


iteration 19/67


100%|█████████████████████████████████████████| 100/100 [01:05<00:00,  1.52it/s]


iteration 20/67


100%|█████████████████████████████████████████| 100/100 [01:08<00:00,  1.45it/s]


iteration 21/67


100%|█████████████████████████████████████████| 100/100 [01:10<00:00,  1.41it/s]


iteration 22/67


100%|█████████████████████████████████████████| 100/100 [01:06<00:00,  1.51it/s]


iteration 23/67


100%|█████████████████████████████████████████| 100/100 [01:02<00:00,  1.60it/s]


iteration 24/67


100%|█████████████████████████████████████████| 100/100 [01:18<00:00,  1.27it/s]


iteration 25/67


100%|█████████████████████████████████████████| 100/100 [01:13<00:00,  1.36it/s]


iteration 26/67


100%|█████████████████████████████████████████| 100/100 [01:05<00:00,  1.53it/s]


iteration 27/67


100%|█████████████████████████████████████████| 100/100 [01:12<00:00,  1.37it/s]


iteration 28/67


100%|█████████████████████████████████████████| 100/100 [01:17<00:00,  1.30it/s]


iteration 29/67


100%|█████████████████████████████████████████| 100/100 [01:11<00:00,  1.40it/s]


iteration 30/67


100%|█████████████████████████████████████████| 100/100 [01:08<00:00,  1.47it/s]


iteration 31/67


100%|█████████████████████████████████████████| 100/100 [01:09<00:00,  1.44it/s]


iteration 32/67


100%|█████████████████████████████████████████| 100/100 [01:00<00:00,  1.65it/s]


iteration 33/67


100%|█████████████████████████████████████████| 100/100 [00:56<00:00,  1.78it/s]


iteration 34/67


100%|█████████████████████████████████████████| 100/100 [00:59<00:00,  1.68it/s]


iteration 35/67


100%|█████████████████████████████████████████| 100/100 [00:55<00:00,  1.81it/s]


iteration 36/67


100%|█████████████████████████████████████████| 100/100 [01:09<00:00,  1.44it/s]


iteration 37/67


100%|█████████████████████████████████████████| 100/100 [00:54<00:00,  1.84it/s]


iteration 38/67


100%|█████████████████████████████████████████| 100/100 [01:15<00:00,  1.32it/s]


iteration 39/67


100%|█████████████████████████████████████████| 100/100 [01:03<00:00,  1.57it/s]


iteration 40/67


100%|█████████████████████████████████████████| 100/100 [00:51<00:00,  1.94it/s]


iteration 41/67


100%|█████████████████████████████████████████| 100/100 [01:23<00:00,  1.20it/s]


iteration 42/67


100%|█████████████████████████████████████████| 100/100 [01:00<00:00,  1.65it/s]


iteration 43/67


100%|█████████████████████████████████████████| 100/100 [01:03<00:00,  1.56it/s]


iteration 44/67


100%|█████████████████████████████████████████| 100/100 [01:01<00:00,  1.62it/s]


iteration 45/67


100%|█████████████████████████████████████████| 100/100 [01:00<00:00,  1.66it/s]


iteration 46/67


100%|█████████████████████████████████████████| 100/100 [00:58<00:00,  1.70it/s]


iteration 47/67


100%|█████████████████████████████████████████| 100/100 [00:59<00:00,  1.67it/s]


iteration 48/67


100%|█████████████████████████████████████████| 100/100 [01:08<00:00,  1.45it/s]


iteration 49/67


100%|█████████████████████████████████████████| 100/100 [00:53<00:00,  1.87it/s]


iteration 50/67


100%|█████████████████████████████████████████| 100/100 [00:59<00:00,  1.68it/s]


iteration 51/67


100%|█████████████████████████████████████████| 100/100 [00:46<00:00,  2.15it/s]


iteration 52/67


100%|█████████████████████████████████████████| 100/100 [00:48<00:00,  2.04it/s]


iteration 53/67


100%|█████████████████████████████████████████| 100/100 [01:09<00:00,  1.43it/s]


iteration 54/67


100%|█████████████████████████████████████████| 100/100 [01:07<00:00,  1.49it/s]


iteration 55/67


100%|█████████████████████████████████████████| 100/100 [00:54<00:00,  1.84it/s]


iteration 56/67


100%|█████████████████████████████████████████| 100/100 [01:15<00:00,  1.32it/s]


iteration 57/67


100%|█████████████████████████████████████████| 100/100 [00:45<00:00,  2.18it/s]


iteration 58/67


100%|█████████████████████████████████████████| 100/100 [01:11<00:00,  1.39it/s]


iteration 59/67


100%|█████████████████████████████████████████| 100/100 [01:07<00:00,  1.47it/s]


iteration 60/67


100%|█████████████████████████████████████████| 100/100 [00:54<00:00,  1.84it/s]


iteration 61/67


100%|█████████████████████████████████████████| 100/100 [00:51<00:00,  1.93it/s]


iteration 62/67


100%|█████████████████████████████████████████| 100/100 [01:08<00:00,  1.46it/s]


iteration 63/67


100%|█████████████████████████████████████████| 100/100 [00:59<00:00,  1.67it/s]


iteration 64/67


100%|█████████████████████████████████████████| 100/100 [01:02<00:00,  1.61it/s]


iteration 65/67


100%|█████████████████████████████████████████| 100/100 [01:04<00:00,  1.56it/s]


iteration 66/67


100%|███████████████████████████████████████████| 37/37 [00:14<00:00,  2.50it/s]


In [81]:
print("woo")

woo


In [82]:
import glob

files = glob.glob(in_instrument + "instrument_iter" + "*.csv")
df_all_instruments = pd.DataFrame()

total = 0
for f in files:
    total += 1
    csv = pd.read_csv(f, index_col=0)
    df_all_instruments = df_all_instruments.append(csv)
print(f"{total} files consoidated.")

df_all_instruments = df_all_instruments.reset_index(drop=True).drop_duplicates()
df_all_instruments.to_csv(in_instrument + "final_instrument_df.csv")

67 files consoidated.


In [33]:
instruments = df_all_instruments.reset_index(drop=True)


instrument_final = (
    instruments.groupby(["ZCTA", "year_month"]).aggregate(sum).drop_duplicates()
)
instrument_final.to_csv(in_instrument + "instrument_final.csv")

In [26]:
instrument_final = pd.read_csv(in_instrument + "instrument_final.csv")
instrument_final

Unnamed: 0,ZCTA,year_month,instrument
0,89019,199104,8.959256
1,89019,199107,5.524780
2,89019,199108,174.513702
3,89019,199109,71.185512
4,89019,199110,109.824039
...,...,...,...
318025,97635,201607,43.174540
318026,97635,201608,149.598601
318027,97635,201609,147.283876
318028,97635,201610,146.902261


0.0

In [34]:
df_all_instruments

Unnamed: 0,ZCTA,fire_lat_lon,instrument,year_month
0,90001,"(34.78121497430767, -120.4856276252864)",15.158099,200009
1,90002,"(34.78121497430767, -120.4856276252864)",12.881696,200009
2,90003,"(34.78121497430767, -120.4856276252864)",14.472301,200009
3,90004,"(34.78121497430767, -120.4856276252864)",11.393226,200009
4,90005,"(34.78121497430767, -120.4856276252864)",14.283857,200009
...,...,...,...,...
46010,96134,"(41.55849847497173, -123.1157014096472)",0.808206,200708
46011,96135,"(41.55849847497173, -123.1157014096472)",1.716934,200708
46012,96136,"(41.55849847497173, -123.1157014096472)",1.440254,200708
46013,96137,"(41.55849847497173, -123.1157014096472)",1.264808,200708


In [83]:
wind_elevation = pd.read_csv("all_elevations.csv")
wind_elevation["zips"] = wind_elevation["zips"].astype(str)
wind_elevation["elevation"] = (
    wind_elevation["elevation"].map(lambda x: x[:-2]).astype(float)
)
wind_elevation = wind_final.merge(
    wind_elevation, left_on="ZCTA", right_on="zips", how="left"
)
wind_elevation = wind_elevation.rename(columns={"elevation": "zip_elevation"})

In [84]:
fire_elevation = pd.read_csv(in_instrument + "elevation_final.csv", index_col=0)
fire_elevation = fire_final.merge(
    fire_elevation, on=["fire_lat", "fire_lon"], how="left"
)

In [85]:
fire_elevation["year_month"] = fire_elevation["year_month"].astype(str)
df_all_instruments["ZCTA"] = df_all_instruments["ZCTA"].astype(str)
wind_elevation["ZCTA"] = wind_elevation["ZCTA"].astype(str)
df_all_instruments["year_month"] = df_all_instruments["year_month"].astype(str)

In [86]:
temp_instrument = df_all_instruments.drop_duplicates().merge(
    fire_elevation, on=["fire_lat_lon", "year_month"], how="left"
)

# Max and min pm 2.5 of whole dataset for normalization
pm_25min = wind_final.zip_pm25.min()
pm_25max = wind_final.zip_pm25.max()

temp_instrument["fire_pm25_norm"] = (temp_instrument["fire_pm25"] - pm_25min) / (
    pm_25max - pm_25min
)
temp_instrument["instrument_2"] = temp_instrument["instrument"] * (
    temp_instrument["fire_pm25"]
)
temp_instrument["instrument_2_norm"] = temp_instrument["instrument"] * (
    temp_instrument["fire_pm25_norm"] + 1
)

In [91]:
temp_instrument

Unnamed: 0,ZCTA,fire_lat_lon,instrument,instrument_norm,year_month,bearing,distance,fire_wspd_x,acres,fire_lat,...,fire_days_in_mo,fire_wdir,fire_wspd_y,year,month,fire_pm25,fire_elevation,fire_pm25_norm,instrument_2,instrument_2_norm
0,90001,"(34.78121497430767, -120.4856276252864)",0.000776,0.023991,200009,0.955673,139.742150,1.702547,9329.916992,34.781215,...,0.066667,325.456421,1.702547,2000,9,7.76,95.0,0.087733,0.006024,0.000844
1,90002,"(34.78121497430767, -120.4856276252864)",0.000645,0.006125,200009,0.803104,141.316634,1.702547,9329.916992,34.781215,...,0.066667,325.456421,1.702547,2000,9,7.76,95.0,0.087733,0.005006,0.000702
2,90003,"(34.78121497430767, -120.4856276252864)",0.000753,0.019996,200009,0.920015,138.590876,1.702547,9329.916992,34.781215,...,0.066667,325.456421,1.702547,2000,9,7.76,95.0,0.087733,0.005847,0.000820
3,90004,"(34.78121497430767, -120.4856276252864)",0.000655,0.001343,200009,0.760872,131.925083,1.702547,9329.916992,34.781215,...,0.066667,325.456421,1.702547,2000,9,7.76,95.0,0.087733,0.005080,0.000712
4,90005,"(34.78121497430767, -120.4856276252864)",0.000810,0.024230,200009,0.947446,132.825955,1.702547,9329.916992,34.781215,...,0.066667,325.456421,1.702547,2000,9,7.76,95.0,0.087733,0.006283,0.000881
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4960767,96136,"(41.55849847497173, -123.1157014096472)",0.000042,0.000796,200708,0.872197,186.231827,0.137437,12.925918,41.558498,...,0.064516,219.183105,0.137437,2007,8,0.85,2380.0,0.009610,0.000035,0.000042
4960768,96137,"(41.55849847497173, -123.1157014096472)",0.000061,0.002036,200708,0.991349,143.888868,0.137437,12.925918,41.558498,...,0.064516,219.183105,0.137437,2007,8,0.85,2380.0,0.009610,0.000052,0.000062
4960769,96137,"(41.55849847497173, -123.1157014096472)",0.000061,0.002036,200708,0.991349,143.888868,0.137437,12.925918,41.558498,...,0.064516,219.183105,0.137437,2007,8,0.85,2380.0,0.009610,0.000052,0.000062
4960770,97635,"(41.55849847497173, -123.1157014096472)",0.000052,0.001067,200708,0.882441,150.683061,0.137437,12.925918,41.558498,...,0.064516,219.183105,0.137437,2007,8,0.85,2380.0,0.009610,0.000044,0.000052


In [87]:
temp_wind_df = wind_elevation[
    ["ZCTA", "zip_elevation", "zip_pm25", "year_month"]
].drop_duplicates()

In [103]:
temp_instrument.columns

Index(['ZCTA', 'fire_lat_lon', 'instrument', 'instrument_norm', 'year_month',
       'bearing', 'distance', 'fire_wspd_x', 'acres', 'fire_lat', 'fire_lon',
       'fire_days_in_mo', 'fire_wdir', 'fire_wspd_y', 'year', 'month',
       'fire_pm25', 'fire_elevation', 'fire_pm25_norm', 'instrument_2',
       'instrument_2_norm'],
      dtype='object')

In [104]:
temp_instrument[
    [
        "ZCTA",
        "fire_lat_lon",
        "instrument",
        "instrument_norm",
        "year_month",
        "bearing",
        "distance",
        "fire_wspd_x",
        "acres",
        "fire_days_in_mo",
        "fire_pm25",
        "fire_elevation",
        "fire_pm25_norm",
        "instrument_2",
        "instrument_2_norm",
    ]
]

Unnamed: 0,ZCTA,fire_lat_lon,instrument,instrument_norm,year_month,bearing,distance,fire_wspd_x,acres,fire_days_in_mo,fire_pm25,fire_elevation,fire_pm25_norm,instrument_2,instrument_2_norm
0,90001,"(34.78121497430767, -120.4856276252864)",0.000776,0.023991,200009,0.955673,139.742150,1.702547,9329.916992,0.066667,7.76,95.0,0.087733,0.006024,0.000844
1,90002,"(34.78121497430767, -120.4856276252864)",0.000645,0.006125,200009,0.803104,141.316634,1.702547,9329.916992,0.066667,7.76,95.0,0.087733,0.005006,0.000702
2,90003,"(34.78121497430767, -120.4856276252864)",0.000753,0.019996,200009,0.920015,138.590876,1.702547,9329.916992,0.066667,7.76,95.0,0.087733,0.005847,0.000820
3,90004,"(34.78121497430767, -120.4856276252864)",0.000655,0.001343,200009,0.760872,131.925083,1.702547,9329.916992,0.066667,7.76,95.0,0.087733,0.005080,0.000712
4,90005,"(34.78121497430767, -120.4856276252864)",0.000810,0.024230,200009,0.947446,132.825955,1.702547,9329.916992,0.066667,7.76,95.0,0.087733,0.006283,0.000881
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4960767,96136,"(41.55849847497173, -123.1157014096472)",0.000042,0.000796,200708,0.872197,186.231827,0.137437,12.925918,0.064516,0.85,2380.0,0.009610,0.000035,0.000042
4960768,96137,"(41.55849847497173, -123.1157014096472)",0.000061,0.002036,200708,0.991349,143.888868,0.137437,12.925918,0.064516,0.85,2380.0,0.009610,0.000052,0.000062
4960769,96137,"(41.55849847497173, -123.1157014096472)",0.000061,0.002036,200708,0.991349,143.888868,0.137437,12.925918,0.064516,0.85,2380.0,0.009610,0.000052,0.000062
4960770,97635,"(41.55849847497173, -123.1157014096472)",0.000052,0.001067,200708,0.882441,150.683061,0.137437,12.925918,0.064516,0.85,2380.0,0.009610,0.000044,0.000052


In [105]:
temp_fire_df =temp_instrument[
    [
        "ZCTA",
        "fire_lat_lon",
        "instrument",
        "instrument_norm",
        "year_month",
        "bearing",
        "distance",
        "fire_wspd_x",
        "acres",
        "fire_days_in_mo",
        "fire_pm25",
        "fire_elevation",
        "fire_pm25_norm",
        "instrument_2",
        "instrument_2_norm",
    ]
].rename(columns={"fire_wspd_x": "fire_wspd"})

In [106]:
temp_temp = temp_fire_df.merge(
    temp_wind_df, on=["ZCTA", "year_month"]
).drop_duplicates()

In [107]:
temp_temp["elevation_difference"] = (
    temp_temp["fire_elevation"] - temp_temp["zip_elevation"]
)

In [109]:
temp_temp.to_csv(in_instrument+"non_agg_instrument.csv")

In [108]:
temp_temp

Unnamed: 0,ZCTA,fire_lat_lon,instrument,instrument_norm,year_month,bearing,distance,fire_wspd,acres,fire_days_in_mo,fire_pm25,fire_elevation,fire_pm25_norm,instrument_2,instrument_2_norm,zip_elevation,zip_pm25,elevation_difference
0,90001,"(34.78121497430767, -120.4856276252864)",0.000776,0.023991,200009,0.955673,139.742150,1.702547,9329.916992,0.066667,7.76,95.0,0.087733,0.006024,0.000844,149.0,17.923000,-54.0
1,90001,"(34.79772989935934, -120.5889820313821)",0.000738,0.021942,200009,0.945971,145.585280,1.702547,978.644958,0.066667,8.44,55.0,0.095421,0.006225,0.000808,149.0,17.923000,-94.0
2,90001,"(34.57684347224777, -120.6407594942807)",0.000724,0.022395,200009,0.955551,143.198394,3.252706,0.166916,0.033333,8.02,56.0,0.090673,0.005802,0.000789,149.0,17.923000,-93.0
3,90001,"(36.72741370585582, -119.0274821211544)",0.000038,0.000119,200009,0.767375,196.281243,0.146893,238.249573,0.066667,7.34,1291.0,0.082985,0.000281,0.000041,149.0,17.923000,1142.0
4,90001,"(34.62990741226313, -120.6103725832152)",0.000725,0.022322,200009,0.954103,142.651362,3.252706,23.520237,0.033333,7.85,140.0,0.088751,0.005693,0.000790,149.0,17.923000,-9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4960767,90066,"(33.93274727774168, -117.6323445395605)",0.000653,0.006294,200701,0.804066,45.635401,1.149009,208.714447,0.032258,4.29,164.0,0.048502,0.002802,0.000685,51.0,15.335833,113.0
4960768,91752,"(33.93274727774168, -117.6323445395605)",0.006693,0.045293,200701,0.787166,4.359097,1.149009,208.714447,0.032258,4.29,164.0,0.048502,0.028714,0.007018,696.0,10.056389,-532.0
4960769,91902,"(33.93274727774168, -117.6323445395605)",0.000289,0.000093,200701,0.751684,96.274283,1.149009,208.714447,0.032258,4.29,164.0,0.048502,0.001241,0.000303,93.0,12.032800,71.0
4960770,92313,"(33.93274727774168, -117.6323445395605)",0.001575,0.004315,200701,0.764619,17.996277,1.149009,208.714447,0.032258,4.29,164.0,0.048502,0.006756,0.001651,1060.0,12.165000,-896.0


In [110]:
model_df2 = (
    temp_instrument.groupby(  # [['ZCTA','year_month','fire_lat_lon','instrument','instrument_2','instrument_2_norm']]
        ["ZCTA", "year_month"]
    )
    .aggregate(sum)
    .drop_duplicates()
)

In [111]:
model_df2

Unnamed: 0_level_0,Unnamed: 1_level_0,instrument,instrument_norm,bearing,distance,fire_wspd_x,acres,fire_lat,fire_lon,fire_days_in_mo,fire_wdir,fire_wspd_y,year,month,fire_pm25,fire_elevation,fire_pm25_norm,instrument_2,instrument_2_norm
ZCTA,year_month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
89019,199104,0.000261,0.006595,0.910217,185.292714,1.593638,207.908157,37.368992,-118.578197,0.033333,356.694611,1.593638,1991,4,10.71,1630.0,0.121085,0.002795,0.000293
89019,199107,0.000712,0.016371,5.377310,921.508343,2.220699,733.828024,219.863048,-711.415978,0.354839,539.071667,2.220699,11946,42,25.43,14253.0,0.287507,0.003166,0.000748
89019,199108,0.031378,0.717908,8.021400,1264.961710,8.900734,3778.423931,325.520290,-1064.638511,6.161290,434.880981,8.900734,17919,72,12.88,22620.0,0.145619,0.064362,0.032106
89019,199109,0.011910,0.275090,15.282616,2500.767308,4.960312,7088.643047,617.857124,-2014.177914,9.066667,3306.518982,4.960312,33847,153,70.09,41888.0,0.792425,0.048156,0.012454
89019,199110,0.018859,0.453440,12.722091,2185.207157,3.562893,6874.610214,510.727792,-1661.366877,13.096774,3704.847351,3.562893,27874,140,64.56,34152.0,0.729904,0.084305,0.019812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97635,201607,0.002553,0.036612,10.788656,1845.938462,6.050591,1481.088247,529.755001,-1587.936493,1.096774,1940.639033,6.050591,26208,91,71.50,7937.0,0.808366,0.013238,0.002703
97635,201608,0.010405,0.152898,12.858277,2378.383437,9.851180,3532.768287,611.579388,-1837.366152,3.096774,3243.913956,9.851180,30240,120,68.91,9088.0,0.779084,0.069987,0.011196
97635,201609,0.019936,0.275592,18.298730,3080.280585,10.709470,12670.919912,894.747648,-2682.674963,5.666667,5492.342896,10.709470,44352,198,113.96,16837.0,1.288412,0.078531,0.020824
97635,201610,0.063282,1.022041,4.274471,443.813040,7.398554,8481.111023,204.069248,-604.084705,4.096774,277.967331,7.398554,10080,50,14.69,7816.0,0.166083,0.195177,0.065489


In [112]:
model_df2.to_csv(in_instrument + 'aggregated_instrument_cols_useful_only.csv')

In [None]:
model_df = fire_4d.drop(columns=["lat", "lon", "u", "v", "wind_lat_lon"])
model_df = model_df.drop(model_df[model_df["year"] > 2019].index)
model_df = model_df.drop(columns=["year", "month"])
model_df["fire_index"] = model_df["fire_index"].fillna(-1)
model_df["radius_mi"] = model_df["radius_mi"].fillna(-1)
model_df["treatment"] = model_df["treatment"].fillna(False)
model_df.fire_index = model_df.fire_index.astype(int)
model_df.fire_index = model_df.fire_index.astype(str)
model_df = model_df.drop_duplicates()
model_df["real_interaction"] = np.nan
model_df["real_distance"] = np.nan
model_df["interaction"] = np.nan
model_df["distance"] = np.nan
model_df["duration_days"] = model_df["duration_days"].fillna(-1)
model_df.groupby(["ZCTA", "year_month", "fire_index"]).first()
model_df

In [None]:
model_df.to_csv("progress.csv")

In [None]:
bearings.T.head()

In [None]:
fire_later = (
    fire_indexed.merge(bearings.T, on="fire_index", how="left")
    .fillna(0)
    .drop_duplicates()
)

fire_later.columns = fire_later.columns.astype(str)
fire_later.fire_index = fire_later.fire_index.astype(int)
fire_later.fire_index = fire_later.fire_index.astype(str)
fire_later = fire_later.set_index(["fire_index", "year_month", "ZCTA"])

fire_later.head()

In [None]:
fire_later2 = (
    fire_indexed.merge(dist_df.T, on="fire_index", how="left")
    .fillna(0)
    .drop_duplicates()
)
fire_later2.columns = fire_later2.columns.astype(str)
fire_later2.fire_index = fire_later2.fire_index.astype(int)
fire_later2.fire_index = fire_later2.fire_index.astype(str)
fire_later2 = fire_later2.set_index(["fire_index", "year_month", "ZCTA"])

fire_later2

In [None]:
dist_df = dist_df.fillna(0)

In [None]:
dist_df.head()

In [None]:
dist_df.loc[89010, :]

In [None]:
model_df.ZCTA = model_df.ZCTA.astype(str)
model_df.fire_index = model_df.fire_index.astype(str)
model_df.year_month = model_df.year_month.astype(str)

model_df2 = model_df.set_index(["fire_index", "year_month", "ZCTA"]).drop_duplicates()

In [None]:
# errors = []
# for entry in model_df2.iterrows():
#     if entry[0][0] == "-1":
#         model_df2.loc[(entry[0]), "real_interaction"] = 0
#         model_df2.loc[(entry[0]), "real_distance"] = 0

#     elif (
#         entry[0][2] in fire_later.columns
#         and entry[0][2] in fire_later2.columns
#         and entry[0] in fire_later.index
#         and entry[0] in fire_later2.index
#     ):
#         try:
#             model_df2.loc[entry[0], "real_interaction"] = fire_later.loc[
#                 entry[0], entry[0][2]
#             ]
#             model_df2.loc[entry[0], "real_distance"] = fire_later2.loc[
#                 entry[0], entry[0][2]
#             ]
#         except:
#             errors.append(entry)
#             continue
#     # else:
#     #     model_df2.loc[entry[0], "real_interaction"] = 0
#     #     model_df2.loc[entry[0], "real_distance"] = -1

In [None]:
errors = []
for entry in model_df2.head().iterrows():
    print(entry[0][2])
    if entry[0][0] == "-1":
        model_df2.loc[(entry[0]), "real_interaction"] = 0
        model_df2.loc[(entry[0]), "real_distance"] = 0

    else:
        try:
            model_df2.loc[entry[0], "real_interaction"] = bearings.loc[
                int(entry[0][2]), entry[0][0]
            ]
            model_df2.loc[entry[0], "real_distance"] = dist_df.loc[
                int(entry[0][2]), entry[0][0]
            ]
        except:
            errors.append(entry)
            continue

In [None]:
errors[0][0]

In [None]:
dist_df.head()

In [None]:
dist_df.loc[int(errors[0][0][2]), errors[0][0][0]]

In [None]:
bearings.loc[int(errors[0][0][2]), errors[0][0][0]]

In [None]:
model_df2.loc[

###### model_df2.to_csv("instrument2b_with_int_and_dist.csv")

In [None]:
# model_df2 = pd.read_csv("instrument2b_with_int_and_dist.csv", index_col=[0, 1, 2])
# model_df2

In [None]:
def ins_interaction(df):
    if df["real_interaction"] >= 0.75 and df["real_distance"] <= 100:
        return df["real_interaction"]
    else:
        return 0


def ins_distance(df):
    if df["real_interaction"] >= 0.75 and df["real_distance"] <= 100:
        return df["real_distance"]
    else:
        return 0


def ins_treatment(df):
    if df["real_interaction"] >= 0.75 and df["real_distance"] <= 100:
        return True
    else:
        return False

In [None]:
model_df2.real_distance.max()

In [None]:
model_df2["distance"] = model_df2.apply(ins_distance, axis=1)

In [None]:
model_df2["interaction"] = model_df2.apply(ins_interaction, axis=1)

In [None]:
model_df2["treatment"] = model_df2.apply(ins_treatment, axis=1)

In [None]:
model_df2

In [None]:
elevation = pd.read_csv("all_elevations.csv")

In [1]:
elev_df = pd.read_csv("all_elevations.csv")
elev_df["temp"] = elev_df["elevation"].apply(lambda x: int(x[: (len(x) - 3)]))
elev_df = elev_df.drop("elevation", axis=1)
elev_df = elev_df.rename(columns={"zips": "ZCTA", "temp": "elevation"})
elev_df["ZCTA"] = elev_df["ZCTA"].astype(str)
elev_df

NameError: name 'pd' is not defined

In [None]:
model_df3 = model_df2.reset_index()

In [None]:
model_final = model_df3.merge(elev_df, on="ZCTA", how="left")
model_final["year"] = model_final["year_month"].apply(lambda x: str(x[:4]))
model_final

In [None]:
model_final.to_csv("wind-fire-pm-elev-ins.csv")

In [None]:
model_final.real_distance.max()

In [None]:
model_df2.to_csv("modeling_data_int_days_rad.csv")

In [None]:
np.sum(~model_df2["treatment"])

In [None]:
model_df2.real_distance.max()

In [None]:
np.sum(model_df2.distance.isna())

In [None]:
model_df2.real_distance

In [None]:
np.sum(model_df2.real_distance.isna())

In [None]:
bearings.to_csv("bearings.csv")

In [None]:
bearings

In [None]:
dist_df.to_csv("dist.csv")

In [None]:
errors

In [None]:
dist_df

In [None]:
model_df2.real_distance.max()

In [None]:
model_df2 = pd.read_csv("instrument2b_with_int_and_dist.csv", index_col=[0, 1, 2])

In [None]:
model_df2.to_csv("instrument1_with_val_tuesday.csv")

In [None]:
model_df2.distance.max()

In [None]:
model_df2 = pd.read_csv("instrument2b_with_int_and_dist.csv", index_col=[0, 1, 2])
try:
    model_df2 = model_df2.drop((4126, 200508, 97635), axis=0)
    model_df2["treatment_2"] = np.where(model_df2["distance"] > 0, True, False)
    print("success")
except:
    pass