# Main script to Join Data on Instrument 2 Fire Location

Modules: N/A <br>
Author: Jordan Meyer <br>
Email: jordan.meyer@berkeley.edu <br>
Date created: Feb 18, 2023 <br>

**Citations (data sources)**


**Citations (persons)**
1. Cornelia Ilin 

**Preferred environment**
1. Code written in Jupyter Notebooks

### Step 1: Import packages

In [2]:
import os
from datetime import date, timedelta
from math import pi

import cartopy.crs as ccrs
import contextily as ctx
import fiona

# geography
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import netCDF4 as ncdf
import numpy as np
import osmnx as ox
import pandas as pd
import shapely

# Moved from sklearn.neighbors to sklearn.metrics following their package change
import sklearn.metrics
from cartopy.mpl.gridliner import LATITUDE_FORMATTER, LONGITUDE_FORMATTER
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from shapely.geometry import Point

dist = sklearn.metrics.DistanceMetric.get_metric("haversine")

# ignore warnings
import warnings

from tqdm.notebook import tqdm, trange

warnings.filterwarnings("ignore")

### Step 2: Define working directories

In [3]:
# Local directories on my machine (not gdrive)
in_dir = "../data/interim/"
out_dir = "../data/processed/"

In [4]:
ls

1.0-jam-wind-cleaning.ipynb
1.1-jam-wind-direction.ipynb
1.2-jam-wind-functions.ipynb
3.0-jmm-fires_cleaning.ipynb
3.1-jmm-fires_clean_eda.ipynb
3.2-jmm-fires_dropped_eda.ipynb
3.3-jam-fire_zcta_add.ipynb
3.4-jam-fires_final_stage_cleaning_zctas.ipynb
3.5-jam-fire-trim-multipolygons.ipynb
4.0-jam-join_data.ipynb
4.1-jam-join_data_instrument_1.ipynb
4.2-jam-join_data_instrument_2.ipynb
4.3-jam-join_data_instrument_troubleshooting.ipynb
4.4-jam-lookups.ipynb
5.0-phd-fake_med_data_zipcode.ipynb
6.0-lk-cross_sectional_models.ipynb
all_elevations.csv
backup_instrument2b_with_int_and_dist.csv
bearings.csv
dist.csv
instrument1.csv
instrument1_with_val_tuesday.csv
instrument2_with_int_and_dist.csv
instrument2b_with_int_and_dist.csv
interactions.csv
modeling_data_int_days_rad.csv
progress.csv
progress?.csv
wind-fire-pm-elev-ins.csv


### Step 3: Define functions

### Step 4: Read data

## Wind

Wind load from other file

In [75]:
wind_df = (
    pd.read_csv(in_dir + "all_years_wind_data.csv", index_col=0)
    .sort_values(["year_month", "ZCTA10"])
    .reset_index(drop=True)
)

wind_df.ZCTA10.unique().shape

(1633,)

## Fire

Finsh cleaning some fire things and then join fire to wind 

In [6]:
fire_df = pd.read_csv(in_dir + "fire_zipcodes_compressed.csv", index_col=0)

# Extract year and month, filter for fires in scope 1991+
fire_df["year"] = fire_df["ALARM_DATE"].str[:4]
fire_df = fire_df[fire_df["year"].astype(int) > 1990]

# Extract Month and End Year/Month
fire_df["month"] = fire_df["ALARM_DATE"].str[5:7]
fire_df["year_month"] = fire_df["year"] + fire_df["month"]
fire_df["month"] = fire_df["month"].astype(int)
fire_df["end_year"] = fire_df["CONT_DATE"].str[:4]
fire_df["end_month"] = fire_df["CONT_DATE"].str[5:7].astype(int)

# Give each fire a unique index code for later
fire_df = (
    fire_df.drop_duplicates().reset_index().rename(columns={"Index": "fire_index"})
)

# Calculate duration with monthly basis, add 1 as proxy for ceiling calculation ie fire at any time in month counts regardless of start date
fire_df["duration_months"] = (fire_df["end_month"] - fire_df["month"]) + 1
# fire_df = fire_df.set_index(['ZCTA','ALARM_DATE'])

# For fires that span multiple years replace the values of the duration on a yearly basis and duplciate the entry for the next year
for idx, fire in fire_df[fire_df["end_year"] != fire_df["year"]].iterrows():
    fire["FIRE_NAME"] = fire["FIRE_NAME"] + " CONT"
    fire["year"] = int(fire["year"]) + 1
    fire["month"] = "01"
    fire["year_month"] = str(fire["year"]) + fire["month"]
    # Hard coding duration = 1 as all fires that spanned across years happened to be put out in January
    fire["duration_months"] = 1

    # Replace the total durations with the duration in year, clear up negatives durations from month 2 < month 1
    fire_df.loc[idx, "duration_months"] = 13 - fire_df.loc[idx, "month"]

    # Append the new year's entry for the fire's continuation
    fire_df = fire_df.append(fire)

In [7]:
# For all fires that are longer than 1 month insert another entry with remaing duration into the dataframe
print(fire_df[fire_df["duration_months"] > 1].shape)
for idx, fire in fire_df[fire_df["duration_months"] > 1].iterrows():
    month = 2
    duration = fire["duration_months"]
    while duration > 1:
        # print(idx, fire_df.iloc[idx]['duration_months'], duration-1)
        # fire['FIRE_NAME'] = fire_df.iloc[idx]['FIRE_NAME'] + f"_{month}"
        fire["duration_months"] -= 1
        fire["month"] += 1
        fire["year_month"] = (
            (str(fire["year"]) + str(fire["month"]))
            if len(str(fire["month"])) > 1
            else (str(fire["year"]) + "0" + str(fire["month"]))
        )
        fire_df = fire_df.append(fire)
        duration = fire["duration_months"]
        month += 1

(1419, 19)


In [8]:
pd_fire = pd.DataFrame(fire_df)  # .drop('fire_centroid',axis=1))
pd_fire = pd_fire.drop("ZCTA", axis=1)
pd_fire["year_month"] = pd_fire["year_month"].str[:]
fire_ready = pd_fire.reset_index(drop=True).drop_duplicates()

fire_dates = fire_ready["year_month"].unique()
fire_indexes = fire_ready["fire_index"].unique()
# fire_ready = pd_fire[~pd_fire["ZCTA"].isna()].reset_index(drop=True).drop_duplicates()

In [106]:
wind_ready = (
    wind_df.drop("Unnamed: 0", axis=1)
    .rename(columns={"ZCTA10": "ZCTA"})
    .reset_index(drop=True)
    .drop_duplicates()
)
wind_ready["year_month"] = wind_ready["year_month"].astype(str)
wind_ready["ZCTA"] = wind_ready["ZCTA"].astype(str)

wind_dates = wind_ready["year_month"].unique()
wind_zips = wind_ready["ZCTA"].unique()
wind_ready.dtypes[["year_month", "ZCTA"]]

year_month    object
ZCTA          object
dtype: object

In [140]:
wind_ready[wind_ready.ZCTA == "97635"].lat = 41.993
# wind_ready.loc['97635','ZCTA' == '97635']#['lon'] = -120.250
# wind_ready = wind_ready.drop_duplicates()
ind_fix = wind_ready[wind_ready.ZCTA == "97635"].index
for ind in ind_fix:
    wind_ready.iloc[ind, 0] = 41.993
    wind_ready.iloc[ind, 1] = -120.250
wind_ready[wind_ready.ZCTA == "97635"]

Unnamed: 0,lat,lon,ZCTA,u,v,wdir,wspd,year_month,year,month
1632,41.993,-120.25,97635,0.266725,0.492897,61.580482,0.560437,199101,1991,1
3265,41.993,-120.25,97635,0.382091,1.256239,73.082626,1.313062,199102,1991,2
4898,41.993,-120.25,97635,1.138832,1.177601,45.958839,1.638194,199103,1991,3
6531,41.993,-120.25,97635,1.574920,-0.570827,340.077026,1.675177,199104,1991,4
8164,41.993,-120.25,97635,1.208124,-1.199607,315.202698,1.702534,199105,1991,5
...,...,...,...,...,...,...,...,...,...,...
620539,41.993,-120.25,97635,0.825993,0.237262,16.026415,0.859393,202208,2022,8
622172,41.993,-120.25,97635,0.562525,0.747856,53.050083,0.935801,202209,2022,9
623805,41.993,-120.25,97635,-0.165442,0.297311,119.094231,0.340243,202210,2022,10
625438,41.993,-120.25,97635,0.534377,0.960378,60.907417,1.099038,202211,2022,11


## PM2.5

Load PM2.5 from lawis files downloaded from gdrive

In [77]:
aqi1 = pd.read_csv(in_dir + "finalpm25.csv", index_col=0)
aqi1

Unnamed: 0,year_month,ZIP10,pm25
204,2017-01,89010,1.842857
205,2017-02,89010,3.528571
206,2017-03,89010,3.242857
207,2017-04,89010,3.700000
208,2017-05,89010,5.242857
...,...,...,...
585307,2016-12,96148,1.713333
585308,2016-12,96150,1.719288
585309,2016-12,96155,0.771287
585310,2016-12,96161,1.416996


In [78]:
pm25_df = aqi1.reset_index(drop=True).rename(
    columns={"ZIP10": "ZCTA", "year_month": "old_ym"}
)

In [79]:
pm25_df.shape

(626400, 3)

In [80]:
pm25_df["year_month"] = pm25_df["old_ym"].str[:4] + pm25_df["old_ym"].str[5:]
pm25_df["ZCTA"] = pm25_df["ZCTA"].astype(str)
pm25_ready = pm25_df.drop("old_ym", axis=1)

In [81]:
pm25_ready.dtypes

ZCTA           object
pm25          float64
year_month     object
dtype: object

In [15]:
wfp_df = wind_ready.merge(pm25_ready, on=["year_month", "ZCTA"], how="left")

In [102]:
wfp_df.head()

Unnamed: 0,lat,lon,ZCTA,u,v,wdir,wspd,year_month,year,month,pm25
0,37.465,-117.936,89010,0.504258,-0.719008,305.042938,0.878208,199101,1991,1,12.450976
1,35.396,-116.322,89019,-0.172753,-0.94694,259.661102,0.962568,199101,1991,1,10.846541
2,36.161,-116.139,89060,-0.435964,-0.812957,241.796738,0.922477,199101,1991,1,12.385
3,35.957,-115.897,89061,-0.560538,-1.176384,244.522552,1.303105,199101,1991,1,10.15
4,39.52,-120.032,89439,0.042253,0.205661,78.390099,0.209957,199101,1991,1,8.389565


In [17]:
fire_ready["geometry"] = gpd.GeoSeries.from_wkt(fire_ready["fire_centroid"])
fire_ready["lon"] = fire_ready["geometry"].apply(lambda p: p.x)
fire_ready["lat"] = fire_ready["geometry"].apply(lambda p: p.y)
fire_ready["sq_mi"] = fire_ready["GIS_ACRES"] / 640
fire_ready["radius_mi"] = np.sqrt(fire_ready["sq_mi"] / np.pi)

In [150]:
wind_locs = wind_ready[["ZCTA", "lon", "lat", "wdir"]].drop_duplicates()
wind_locs.ZCTA.unique().shape

(1633,)

In [144]:
fire_ready["CONT_DATE"] = pd.to_datetime(fire_ready["CONT_DATE"])
fire_ready["ALARM_DATE"] = pd.to_datetime(fire_ready["ALARM_DATE"])

In [145]:
fire_ready["duration_days"] = (
    fire_ready["CONT_DATE"] - fire_ready["ALARM_DATE"]
) / np.timedelta64(1, "D")

In [146]:
fire_locs = fire_ready[["fire_index", "lon", "lat"]].drop_duplicates()

In [126]:
fire_locs[["fire_index", "lon", "lat"]]

Unnamed: 0,fire_index,lon,lat
0,0,-121.348059,38.888041
1,1,-121.370898,38.827567
2,3,-121.273414,38.962328
3,4,-121.300653,39.486364
4,5,-121.381018,38.731339
...,...,...,...
6850,19726,-117.516283,35.344953
6851,19727,-118.496065,35.667384
6852,19728,-118.318674,35.572593
6855,19729,-117.381469,35.856800


In [151]:
def distance(wind, fire):
    """
    params:
    -------
    wind: wind cleaned data
    fire: fire semi-cleaned data

    return:
    -------
    df3: dataframe with distances from each fire during time period
    """
    # create labels
    wind["wind_lat_lon"] = [str(xy) for xy in zip(wind.lat, wind.lon)]
    fire["fire_lat_lon"] = [str(xy) for xy in zip(fire.lat, fire.lon)]

    ## for each point in wind data find the nearest point in the census data ##
    ###############
    # keep only unique points in fire data
    df1_unique = wind.drop_duplicates(["wind_lat_lon"])
    df2_unique = fire.drop_duplicates(["fire_lat_lon"])

    df1_unique.reset_index(drop=True, inplace=True)
    df2_unique.reset_index(drop=True, inplace=True)

    # transform to radians
    df1_unique["lat_r"] = np.radians(df1_unique.lat)
    df1_unique["lon_r"] = np.radians(df1_unique.lon)
    df2_unique["lat_r"] = np.radians(df2_unique.lat)
    df2_unique["lon_r"] = np.radians(df2_unique.lon)

    # compute pairwise distance (in miles)
    dist_matrix = (
        dist.pairwise(
            df2_unique[["lat_r", "lon_r"]],
            df1_unique[["lat_r", "lon_r"]],
        )
    ) * 3959

    # create a df from dist_matrix
    dist_matrix = pd.DataFrame(
        dist_matrix, index=df2_unique["fire_index"], columns=df1_unique["ZCTA"]
    )
    return dist_matrix

In [152]:
temp_df = distance(wind_locs, fire_locs)
dist_df = temp_df.merge(
    fire_ready[["year_month", "fire_index"]],
    left_on=temp_df.index,
    right_on="fire_index",
)

In [153]:
dist_df.iloc[:-2]

Unnamed: 0,89010,89019,89060,89061,89439,90001,90002,90003,90004,90005,...,96142,96143,96145,96146,96148,96150,96161,97635,year_month,fire_index
0,209.784488,367.121833,341.951585,361.133956,82.898204,381.673640,383.492028,382.132994,372.560594,373.680651,...,64.479235,75.412718,62.291262,61.747473,73.067539,71.622004,53.497243,222.174584,202006,0
1,209.035336,365.432157,340.818891,359.955902,86.213298,378.562149,380.382581,379.002401,369.438252,370.559046,...,66.045967,77.970525,64.438846,64.267602,75.710378,72.367993,56.348971,226.528324,202006,1
2,208.627307,367.354761,341.312496,360.559467,76.800081,384.392649,386.206767,384.888810,375.302234,376.420656,...,60.415906,70.090270,57.500033,56.511591,67.656088,68.622895,47.918170,216.205084,202003,3
3,208.627307,367.354761,341.312496,360.559467,76.800081,384.392649,386.206767,384.888810,375.302234,376.420656,...,60.415906,70.090270,57.500033,56.511591,67.656088,68.622895,47.918170,216.205084,202004,3
4,229.394707,392.914821,363.431339,382.880297,67.677560,417.624410,419.426869,418.205547,408.595169,409.708739,...,72.653312,71.336098,65.324574,60.908483,68.396747,85.651339,51.152100,181.722762,202004,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9265,148.337551,67.382649,95.623900,100.271388,319.769862,104.384126,105.385048,106.493189,99.757474,100.352986,...,286.979295,300.903043,296.457155,302.725644,303.066354,272.076400,309.773782,482.382906,201605,19726
9266,128.039928,123.677381,136.240025,146.999697,279.159159,118.887688,120.521531,120.117554,110.807372,111.818538,...,243.002176,259.265396,252.662422,259.060952,261.103024,228.017596,265.067364,447.134669,201606,19727
9267,132.475798,113.000379,128.641709,138.346534,288.438167,111.569579,113.130117,112.966683,103.900438,104.863356,...,252.832003,268.698745,262.475330,268.860526,270.598795,237.835182,275.060915,455.631160,201606,19728
9268,132.475798,113.000379,128.641709,138.346534,288.438167,111.569579,113.130117,112.966683,103.900438,104.863356,...,252.832003,268.698745,262.475330,268.860526,270.598795,237.835182,275.060915,455.631160,201606,19728


In [86]:
def fire_bearings(wind, fire):
    """
    params:
    -------
    wind: wind cleaned data
    fire: fire semi-cleaned data

    return:
    -------
    df3: dataframe with distances from each fire during time period
    """

    # create labels
    wind["wind_lat_lon"] = [str(xy) for xy in zip(wind.lat, wind.lon)]
    fire["fire_lat_lon"] = [str(xy) for xy in zip(fire.lat, fire.lon)]

    ###############
    # keep only unique points in fire data
    df1_unique = wind.drop_duplicates(["wind_lat_lon"])
    df2_unique = fire.drop_duplicates(["fire_lat_lon"])

    df1_unique.reset_index(drop=True, inplace=True)

    df2_unique.reset_index(drop=True, inplace=True)

    # compute pairwise distance (in miles)
    bearing_matrix = np.zeros((df1_unique.shape[0], df2_unique.shape[0]))
    for idx in range(df1_unique.shape[0]):
        pa_lat = df1_unique["lat"][idx]
        pa_lon = df1_unique["lon"][idx]
        wdir_ba = df1_unique["wdir"][idx]
        for idy in range(df2_unique.shape[0]):
            pb_lat = df2_unique["lat"][idy]
            pb_lon = df2_unique["lon"][idy]

            if pb_lon > pa_lon:
                d_X = -np.cos(pa_lat) * np.sin(pa_lon - pb_lon)
            else:
                d_X = np.cos(pa_lat) * np.sin(pa_lon - pb_lon)

            if pb_lat > pa_lat:
                d_Y = -np.cos(pb_lat) * np.sin(pa_lat) - np.sin(pb_lat) * np.cos(
                    pa_lat
                ) * np.cos(pa_lon - pb_lon)
            else:
                d_Y = np.cos(pb_lat) * np.sin(pa_lat) - np.sin(pb_lat) * np.cos(
                    pa_lat
                ) * np.cos(pa_lon - pb_lon)

            B_ba = np.arctan2(d_X, d_Y) * (180 / np.pi) % 360

            angle_diff = np.max([B_ba, wdir_ba]) - np.min([B_ba, wdir_ba])

            if angle_diff > 180:
                upwind_effect = (angle_diff) / 360
            else:
                upwind_effect = 1 - (angle_diff) / 360

            bearing_matrix[idx, idy] = upwind_effect

    # create a df from dist_matrix
    bearing_matrix = pd.DataFrame(
        bearing_matrix, index=df1_unique["ZCTA"], columns=df2_unique["fire_index"]
    )

    return bearing_matrix

In [127]:
wind_locs[["ZCTA", "lat", "lon"]]

Unnamed: 0,ZCTA,lat,lon
0,89010,37.465,-117.936
1,89019,35.396,-116.322
2,89060,36.161,-116.139
3,89061,35.957,-115.897
4,89439,39.520,-120.032
...,...,...,...
13063,97635,41.993,-120.258
14696,97635,41.993,-120.263
16329,97635,41.993,-120.272
17962,97635,41.993,-120.277


In [154]:
temp_df = fire_bearings(wind_locs, fire_locs)
temp_df = temp_df.fillna(0)
temp_df = temp_df.T
temp_df
bearings = temp_df.merge(
    fire_ready[["year_month", "fire_index"]],
    left_on=temp_df.index,
    right_on="fire_index",
)

In [171]:
bearings

Unnamed: 0,89010,89019,89060,89061,89439,90001,90002,90003,90004,90005,...,96142,96143,96145,96146,96148,96150,96161,97635,year_month,fire_index
0,0.889575,0.572339,0.684403,0.637840,0.699592,0.847698,0.845244,0.852177,0.860826,0.859312,...,0.779805,0.654119,0.726955,0.685446,0.647946,0.941487,0.583209,0.509476,202006,0
1,0.893569,0.580642,0.682568,0.642698,0.706343,0.844596,0.842201,0.849045,0.857528,0.856028,...,0.762434,0.651479,0.714120,0.678345,0.646130,0.946963,0.578757,0.510795,202006,1
2,0.877717,0.554791,0.687970,0.627505,0.692368,0.858049,0.855466,0.862550,0.871689,0.870125,...,0.964765,0.658708,0.750876,0.698110,0.651018,0.939242,0.590482,0.545047,202003,3
3,0.877717,0.554791,0.687970,0.627505,0.692368,0.858049,0.855466,0.862550,0.871689,0.870125,...,0.964765,0.658708,0.750876,0.698110,0.651018,0.939242,0.590482,0.545047,202004,3
4,0.885649,0.525731,0.848731,0.678458,0.550954,0.859091,0.855759,0.865785,0.875208,0.873511,...,0.848255,0.669465,0.793862,0.728418,0.658561,0.814500,0.612066,0.712649,202004,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9267,0.785607,0.988393,0.663323,0.707385,0.642813,0.865451,0.872265,0.854896,0.847645,0.844893,...,0.948279,0.880447,0.919038,0.897951,0.876232,0.952176,0.970675,0.739495,201606,19728
9268,0.785607,0.988393,0.663323,0.707385,0.642813,0.865451,0.872265,0.854896,0.847645,0.844893,...,0.948279,0.880447,0.919038,0.897951,0.876232,0.952176,0.970675,0.739495,201606,19728
9269,0.785607,0.988393,0.663323,0.707385,0.642813,0.865451,0.872265,0.854896,0.847645,0.844893,...,0.948279,0.880447,0.919038,0.897951,0.876232,0.952176,0.970675,0.739495,201606,19728
9270,0.761760,0.913633,0.654034,0.774174,0.531704,0.991401,0.993163,0.984196,0.982189,0.983112,...,0.900795,0.878378,0.888578,0.880014,0.874325,0.979413,0.953702,0.801187,201605,19729


In [156]:
fire_dates.shape

(279,)

In [157]:
wind_zips.shape[0]

1633

In [172]:
dist_df

Unnamed: 0,89010,89019,89060,89061,89439,90001,90002,90003,90004,90005,...,96142,96143,96145,96146,96148,96150,96161,97635,year_month,fire_index
0,209.784488,367.121833,341.951585,361.133956,82.898204,381.673640,383.492028,382.132994,372.560594,373.680651,...,64.479235,75.412718,62.291262,61.747473,73.067539,71.622004,53.497243,222.174584,202006,0
1,209.035336,365.432157,340.818891,359.955902,86.213298,378.562149,380.382581,379.002401,369.438252,370.559046,...,66.045967,77.970525,64.438846,64.267602,75.710378,72.367993,56.348971,226.528324,202006,1
2,208.627307,367.354761,341.312496,360.559467,76.800081,384.392649,386.206767,384.888810,375.302234,376.420656,...,60.415906,70.090270,57.500033,56.511591,67.656088,68.622895,47.918170,216.205084,202003,3
3,208.627307,367.354761,341.312496,360.559467,76.800081,384.392649,386.206767,384.888810,375.302234,376.420656,...,60.415906,70.090270,57.500033,56.511591,67.656088,68.622895,47.918170,216.205084,202004,3
4,229.394707,392.914821,363.431339,382.880297,67.677560,417.624410,419.426869,418.205547,408.595169,409.708739,...,72.653312,71.336098,65.324574,60.908483,68.396747,85.651339,51.152100,181.722762,202004,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9267,132.475798,113.000379,128.641709,138.346534,288.438167,111.569579,113.130117,112.966683,103.900438,104.863356,...,252.832003,268.698745,262.475330,268.860526,270.598795,237.835182,275.060915,455.631160,201606,19728
9268,132.475798,113.000379,128.641709,138.346534,288.438167,111.569579,113.130117,112.966683,103.900438,104.863356,...,252.832003,268.698745,262.475330,268.860526,270.598795,237.835182,275.060915,455.631160,201606,19728
9269,132.475798,113.000379,128.641709,138.346534,288.438167,111.569579,113.130117,112.966683,103.900438,104.863356,...,252.832003,268.698745,262.475330,268.860526,270.598795,237.835182,275.060915,455.631160,201606,19728
9270,115.294666,67.487325,72.558188,83.368515,291.637846,140.003070,141.079674,142.045160,134.883714,135.547134,...,261.047902,273.481013,270.312701,276.439556,275.801483,246.373404,284.057088,451.089407,201605,19729


In [369]:
treatment_df = (bearings.drop_duplicates().iloc[:, :-2] >= 0.75)[
    dist_df.drop_duplicates().iloc[:, :-2] < 100
].fillna(False)

In [370]:
treatment_df["fire_index"] = (
    bearings.drop_duplicates()["fire_index"].astype(int).astype(str)
)
treatment_df["year_month"] = bearings.drop_duplicates()["year_month"]
treatment_df = treatment_df.reset_index(drop=True)
treatment_df

Unnamed: 0,89010,89019,89060,89061,89439,90001,90002,90003,90004,90005,...,96142,96143,96145,96146,96148,96150,96161,97635,fire_index,year_month
0,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,True,False,False,0,202006
1,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,True,False,False,1,202006
2,False,False,False,False,False,False,False,False,False,False,...,True,False,True,False,False,True,False,False,3,202003
3,False,False,False,False,False,False,False,False,False,False,...,True,False,True,False,False,True,False,False,3,202004
4,False,False,False,False,False,False,False,False,False,False,...,True,False,True,False,False,True,False,False,4,202004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7246,False,True,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,19726,201605
7247,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,19727,201606
7248,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,19728,201606
7249,False,True,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,19729,201605


In [368]:
wind_zips

array(['89010', '89019', '89060', ..., '96150', '96161', '97635'],
      dtype=object)

In [158]:
df = pd.DataFrame()

In [159]:
df["fire_index"] = fire_indexes

In [371]:
impacted = dict()
for fire in treatment_df.iterrows():
    impacted[treatment_df.iloc[fire[0], -2]] = wind_zips[
        np.asarray(fire[1][:-2], dtype=bool)
    ]

# Data stored in the impacted dictionary uses the fire_index as the key vs the index of the dataframe which was used for checking

# impacted

treatment_df.sum(axis=1)
# for zcta in wind_zips[fire[1]]:
#     impacted[fire[0]].append(zcta)

0       123
1       122
2       125
3       125
4       126
       ... 
7246    234
7247    104
7248    154
7249     56
7250    200
Length: 7251, dtype: int64

In [373]:
treatment_df.iloc[:, :-2].sum(axis=0)

89010    147
89019    201
89060      0
89061     15
89439    410
        ... 
96146    155
96148     25
96150    500
96161     65
97635    209
Length: 1633, dtype: int64

In [314]:
treatment_df.iloc[7249, :]

89010          False
89019           True
89060          False
89061           True
89439          False
               ...  
96150          False
96161          False
97635          False
fire_index     18167
year_month    201307
Name: 7249, Length: 1635, dtype: object

In [350]:
len(impacted["18167"])

56

In [307]:
x = treatment_df.iloc[7249, :-2]
# impacted[fire[0]] = []
# for zcta in wind_zips[fire[1]]:
#     impacted[fire[0]].append(zcta)

mask = [i for i in x]
len(wind_zips[mask])

56

In [513]:
bear_export = bearings.drop_duplicates()
bear_export.to_csv('bearings_new.csv')

In [514]:
dist_export = dist_df.drop_duplicates()
dist_export.to_csv('dist_new.csv')

In [None]:
bearings.shape

In [320]:
# initialize empty lists
fires = []
impacted_zips = []
zip_distances = []
winds = []
zip_bearings = []

In [399]:
small_dist = dist_df
small_dist.fire_index = small_dist.fire_index.astype(int)
small_dist.year_month = small_dist.year_month.astype(int)
small_dist = small_dist.set_index(["fire_index", "year_month"])

small_bear = bearings
small_bear.fire_index = small_bear.fire_index.astype(int)
small_bear.year_month = small_bear.year_month.astype(int)
small_bear = small_bear.set_index(["fire_index", "year_month"])

In [400]:
small_bear

Unnamed: 0_level_0,Unnamed: 1_level_0,89010,89019,89060,89061,89439,90001,90002,90003,90004,90005,...,96140,96141,96142,96143,96145,96146,96148,96150,96161,97635
fire_index,year_month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,202006,0.889575,0.572339,0.684403,0.637840,0.699592,0.847698,0.845244,0.852177,0.860826,0.859312,...,0.664306,0.740132,0.779805,0.654119,0.726955,0.685446,0.647946,0.941487,0.583209,0.509476
1,202006,0.893569,0.580642,0.682568,0.642698,0.706343,0.844596,0.842201,0.849045,0.857528,0.856028,...,0.660292,0.725857,0.762434,0.651479,0.714120,0.678345,0.646130,0.946963,0.578757,0.510795
3,202003,0.877717,0.554791,0.687970,0.627505,0.692368,0.858049,0.855466,0.862550,0.871689,0.870125,...,0.671318,0.766712,0.964765,0.658708,0.750876,0.698110,0.651018,0.939242,0.590482,0.545047
3,202004,0.877717,0.554791,0.687970,0.627505,0.692368,0.858049,0.855466,0.862550,0.871689,0.870125,...,0.671318,0.766712,0.964765,0.658708,0.750876,0.698110,0.651018,0.939242,0.590482,0.545047
4,202004,0.885649,0.525731,0.848731,0.678458,0.550954,0.859091,0.855759,0.865785,0.875208,0.873511,...,0.687997,0.809382,0.848255,0.669465,0.793862,0.728418,0.658561,0.814500,0.612066,0.712649
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19728,201606,0.785607,0.988393,0.663323,0.707385,0.642813,0.865451,0.872265,0.854896,0.847645,0.844893,...,0.886903,0.926355,0.948279,0.880447,0.919038,0.897951,0.876232,0.952176,0.970675,0.739495
19728,201606,0.785607,0.988393,0.663323,0.707385,0.642813,0.865451,0.872265,0.854896,0.847645,0.844893,...,0.886903,0.926355,0.948279,0.880447,0.919038,0.897951,0.876232,0.952176,0.970675,0.739495
19728,201606,0.785607,0.988393,0.663323,0.707385,0.642813,0.865451,0.872265,0.854896,0.847645,0.844893,...,0.886903,0.926355,0.948279,0.880447,0.919038,0.897951,0.876232,0.952176,0.970675,0.739495
19729,201605,0.761760,0.913633,0.654034,0.774174,0.531704,0.991401,0.993163,0.984196,0.982189,0.983112,...,0.880485,0.892362,0.900795,0.878378,0.888578,0.880014,0.874325,0.979413,0.953702,0.801187


In [406]:
small_dist.loc[small_dist.index[0], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,89010,89019,89060,89061,89439,90001,90002,90003,90004,90005,...,96140,96141,96142,96143,96145,96146,96148,96150,96161,97635
fire_index,year_month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,202006,209.784488,367.121833,341.951585,361.133956,82.898204,381.67364,383.492028,382.132994,372.560594,373.680651,...,70.421092,63.319238,64.479235,75.412718,62.291262,61.747473,73.067539,71.622004,53.497243,222.174584


In [423]:
small_bear.loc[small_bear.index[0], ['89019','89010']].values

array([[0.57233906, 0.88957524]])

In [490]:
import pandas as pd

In [503]:
small_dist.loc[small_dist.index[0]][small_dist.loc[fire_id] < 100]

Unnamed: 0_level_0,Unnamed: 1_level_0,89010,89019,89060,89061,89439,90001,90002,90003,90004,90005,...,96140,96141,96142,96143,96145,96146,96148,96150,96161,97635
fire_index,year_month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,202006,,,,,82.898204,,,,,,...,70.421092,63.319238,64.479235,75.412718,62.291262,61.747473,73.067539,71.622004,53.497243,


In [None]:
list(small_bear.loc[(,list(wind_zips[np.asarray(~tmp.isna()[:-2], dtype=bool)]))])

SyntaxError: invalid syntax (1486907527.py, line 1)

In [481]:
for i in small_dist.index:
    fire_id = i
    # return zip and distance data for relevant ZCTAs
    tmp = small_dist.loc[fire_id][small_dist.loc[fire_id] < 100]
    # add data to list
    impacted_zips += list(tmp.index)
    zip_distances += list(tmp.values)
    fires += [fire_id] * tmp.shape[0]
    winds += list(small_bear.loc[fire_id,list(wind_zips[
        np.asarray(~tmp.isna()[:-2], dtype=bool)
    ])].values)

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [None]:
small_dist.loc[(0,202006),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,89010,89019,89060,89061,89439,90001,90002,90003,90004,90005,...,96140,96141,96142,96143,96145,96146,96148,96150,96161,97635
fire_index,year_month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,202006,209.784488,367.121833,341.951585,361.133956,82.898204,381.67364,383.492028,382.132994,372.560594,373.680651,...,70.421092,63.319238,64.479235,75.412718,62.291262,61.747473,73.067539,71.622004,53.497243,222.174584


In [393]:
wind_locs

Unnamed: 0,ZCTA,lon,lat,wdir,wind_lat_lon
0,89010,-117.936,37.465,305.042938,"(37.465, -117.936)"
1,89019,-116.322,35.396,259.661102,"(35.396, -116.322)"
2,89060,-116.139,36.161,241.796738,"(36.161, -116.139)"
3,89061,-115.897,35.957,244.522552,"(35.957, -115.897)"
4,89439,-120.032,39.520,78.390099,"(39.52, -120.032)"
...,...,...,...,...,...
627067,96146,-120.248,39.149,56.279018,"(39.149, -120.248)"
627068,96148,-120.062,39.236,56.279018,"(39.236, -120.062)"
627069,96150,-120.033,38.732,81.387100,"(38.732, -120.033)"
627070,96161,-120.427,39.184,132.224274,"(39.184, -120.427)"


In [None]:
fire_impact_df = pd.DataFrame(
    {
        "fire_num": fires,
        "impacted_zip": impacted_zips,
        "zip_dist": zip_distances,
        "wind": winds,
    }
)

In [652]:
model_df = fire_4d.drop(columns=["lat", "lon", "u", "v", "wind_lat_lon"])
model_df = model_df.drop(model_df[model_df["year"] > 2019].index)
model_df = model_df.drop(columns=["year", "month"])
model_df["fire_index"] = model_df["fire_index"].fillna(-1)
model_df["radius_mi"] = model_df["radius_mi"].fillna(-1)
model_df["treatment"] = model_df["treatment"].fillna(False)
model_df.fire_index = model_df.fire_index.astype(int)
model_df.fire_index = model_df.fire_index.astype(str)
model_df = model_df.drop_duplicates()
model_df["real_interaction"] = np.nan
model_df["real_distance"] = np.nan
model_df["interaction"] = np.nan
model_df["distance"] = np.nan
model_df["duration_days"] = model_df["duration_days"].fillna(-1)
model_df.groupby(["ZCTA", "year_month", "fire_index"]).first()
model_df

Unnamed: 0,ZCTA,wdir,wspd,year_month,fire_index,treatment,radius_mi,duration_days,pm25,real_interaction,real_distance,interaction,distance
0,89010,305.042938,0.878208,199101,-1,False,-1.000000,-1.0,12.450976,,,,
1,89019,259.661102,0.962568,199101,-1,False,-1.000000,-1.0,10.846541,,,,
2,89060,241.796738,0.922477,199101,-1,False,-1.000000,-1.0,12.385000,,,,
3,89061,244.522552,1.303105,199101,-1,False,-1.000000,-1.0,10.150000,,,,
4,89439,78.390099,0.209957,199101,-1,False,-1.000000,-1.0,8.389565,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
630193,,,,201503,19404,True,0.132465,36.0,,,,,
630194,,,,201503,19405,True,0.104751,143.0,,,,,
630195,,,,201503,19470,True,0.322894,7.0,,,,,
630196,93667,,,201608,19645,True,1.055740,10.0,14.126020,,,,


In [653]:
model_df.to_csv("progress.csv")

In [654]:
bearings.T.head()

ZCTA,89010,89019,89060,89061,89439,90001,90002,90003,90004,90005,...,97635,97635,97635,97635,97635,97635,97635,97635,97635,97635
fire_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.889575,0.572339,0.684403,0.63784,0.699592,0.847698,0.845244,0.852177,0.860826,0.859312,...,0.552358,0.735334,0.80497,0.736642,0.706208,0.605404,0.91907,0.664107,0.630411,0.547616
1,0.893569,0.580642,0.682568,0.642698,0.706343,0.844596,0.842201,0.849045,0.857528,0.856028,...,0.532135,0.715195,0.784699,0.716349,0.685848,0.584955,0.898509,0.643341,0.609531,0.568705
3,0.877717,0.554791,0.68797,0.627505,0.692368,0.858049,0.855466,0.86255,0.871689,0.870125,...,0.587864,0.770698,0.840541,0.772248,0.741918,0.641255,0.955098,0.700456,0.666942,0.510756
4,0.885649,0.525731,0.848731,0.678458,0.550954,0.859091,0.855759,0.865785,0.875208,0.873511,...,0.75592,0.93836,0.991857,0.939839,0.909478,0.808771,0.877442,0.867809,0.834233,0.656417
5,0.896248,0.589362,0.680653,0.647635,0.715738,0.843239,0.840886,0.847645,0.856105,0.854599,...,0.507942,0.691094,0.760434,0.692056,0.661474,0.56047,0.873884,0.618463,0.584511,0.593983


In [655]:
fire_later = (
    fire_indexed.merge(bearings.T, on="fire_index", how="left")
    .fillna(0)
    .drop_duplicates()
)

fire_later.columns = fire_later.columns.astype(str)
fire_later.fire_index = fire_later.fire_index.astype(int)
fire_later.fire_index = fire_later.fire_index.astype(str)
fire_later = fire_later.set_index(["fire_index", "year_month", "ZCTA"])

fire_later.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,radius_mi,duration_days,89010,89019,89060,89061,89439,90001,90002,90003,...,97635,97635,97635,97635,97635,97635,97635,97635,97635,97635
fire_index,year_month,ZCTA,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0,202006,95648,0.233477,5.0,0.889575,0.572339,0.684403,0.63784,0.699592,0.847698,0.845244,0.852177,...,0.552358,0.735334,0.80497,0.736642,0.706208,0.605404,0.91907,0.664107,0.630411,0.547616
1,202006,95747,0.583937,3.0,0.893569,0.580642,0.682568,0.642698,0.706343,0.844596,0.842201,0.849045,...,0.532135,0.715195,0.784699,0.716349,0.685848,0.584955,0.898509,0.643341,0.609531,0.568705
3,202003,95648,0.080197,1.0,0.877717,0.554791,0.68797,0.627505,0.692368,0.858049,0.855466,0.86255,...,0.587864,0.770698,0.840541,0.772248,0.741918,0.641255,0.955098,0.700456,0.666942,0.510756
4,202004,95966,0.071629,5.0,0.885649,0.525731,0.848731,0.678458,0.550954,0.859091,0.855759,0.865785,...,0.75592,0.93836,0.991857,0.939839,0.909478,0.808771,0.877442,0.867809,0.834233,0.656417
5,202007,95747,0.135108,0.0,0.896248,0.589362,0.680653,0.647635,0.715738,0.843239,0.840886,0.847645,...,0.507942,0.691094,0.760434,0.692056,0.661474,0.56047,0.873884,0.618463,0.584511,0.593983


In [656]:
fire_later2 = (
    fire_indexed.merge(dist_df.T, on="fire_index", how="left")
    .fillna(0)
    .drop_duplicates()
)
fire_later2.columns = fire_later2.columns.astype(str)
fire_later2.fire_index = fire_later2.fire_index.astype(int)
fire_later2.fire_index = fire_later2.fire_index.astype(str)
fire_later2 = fire_later2.set_index(["fire_index", "year_month", "ZCTA"])

fire_later2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,radius_mi,duration_days,89010,89019,89060,89061,89439,90001,90002,90003,...,97635,97635,97635,97635,97635,97635,97635,97635,97635,97635
fire_index,year_month,ZCTA,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0,202006,95648,0.233477,5.0,209.784488,367.121833,341.951585,361.133956,82.898204,381.673640,383.492028,382.132994,...,222.268524,222.256737,222.174584,222.160933,222.120047,222.065696,221.998019,221.876932,221.810069,221.690449
1,202006,95747,0.583937,3.0,209.035336,365.432157,340.818891,359.955902,86.213298,378.562149,380.382581,379.002401,...,226.622309,226.610641,226.528324,226.514644,226.473674,226.419205,226.351376,226.230003,226.162974,226.043041
3,202003,95648,0.080197,1.0,208.627307,367.354761,341.312496,360.559467,76.800081,384.392649,386.206767,384.888810,...,216.298057,216.283698,216.205084,216.192023,216.152914,216.100936,216.036233,215.920524,215.856662,215.742469
4,202004,95966,0.071629,5.0,229.394707,392.914821,363.431339,382.880297,67.677560,417.624410,419.426869,418.205547,...,181.820185,181.818040,181.722762,181.706930,181.659517,181.596492,181.518019,181.377633,181.300123,181.161474
5,202007,95747,0.135108,0.0,206.737358,361.723879,337.903974,356.971181,90.544632,372.981273,374.804029,373.398352,...,233.194055,233.181302,233.100474,233.087041,233.046810,232.993324,232.926717,232.807525,232.741698,232.623912
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19698,201609,95546,0.549653,13.0,410.017119,574.422027,544.177415,563.677889,233.382274,590.530005,592.360809,590.843004,...,190.528494,190.711742,190.412346,190.362450,190.212767,190.013203,189.763769,189.314845,189.065476,188.616670
19720,201608,96130,0.117931,18.0,252.367462,421.059804,384.153075,403.602940,71.540285,466.509951,468.255942,467.372564,...,110.766256,110.741757,110.677283,110.666619,110.634767,110.592627,110.540478,110.448089,110.397586,110.308163
19721,201610,96015,0.434521,36.0,319.123999,487.679355,447.872863,467.100306,142.562148,539.070350,540.804893,539.976080,...,49.928597,50.021368,49.807632,49.772107,49.665701,49.524223,49.348017,49.032666,48.858496,48.546866
19722,201610,96130,1.131451,78.0,251.680688,420.275310,383.835959,403.305903,70.347517,464.278552,466.029679,465.120451,...,114.823485,114.803841,114.732592,114.720795,114.685539,114.638845,114.580981,114.478240,114.421949,114.322045


In [657]:
dist_df = dist_df.fillna(0)

In [658]:
dist_df.head()

fire_index,0,1,3,4,5,6,7,8,9,10,...,19721,19722,19723,19724,19725,19726,19727,19728,19729,19730
ZCTA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89010,209.784488,209.035336,208.627307,229.394707,206.737358,218.838665,216.038569,226.58834,226.303575,208.957379,...,319.123999,251.680688,264.825828,338.002187,111.184487,148.337551,128.039928,132.475798,115.294666,139.923583
89019,367.121833,365.432157,367.354761,392.914821,361.723879,378.837568,376.467808,389.356902,388.796593,363.863147,...,487.679355,420.27531,400.885469,191.324011,101.117394,67.382649,123.677381,113.000379,67.487325,124.146145
89060,341.951585,340.818891,341.312496,363.431339,337.903974,351.999965,349.323211,360.493392,360.152663,340.104959,...,447.872863,383.835959,386.676246,245.140578,107.777834,95.6239,136.240025,128.641709,72.558188,141.293224
89061,361.133956,359.955902,360.559467,382.880297,356.971181,371.306402,368.647617,379.914802,379.563656,359.168395,...,467.100306,403.305903,404.649097,233.975195,119.597674,100.271388,146.999697,138.346534,83.368515,150.762093
89439,82.898204,86.213298,76.800081,67.67756,90.544632,76.410696,73.061286,69.067264,70.107323,91.681319,...,142.562148,70.347517,173.474108,506.709376,275.687789,319.769862,279.159159,288.438167,291.637846,290.063782


In [689]:
dist_df.loc[89010, :]

fire_index
0        209.784488
1        209.035336
3        208.627307
4        229.394707
5        206.737358
            ...    
19726    148.337551
19727    128.039928
19728    132.475798
19729    115.294666
19730    139.923583
Name: 89010, Length: 5640, dtype: float64

In [659]:
model_df.ZCTA = model_df.ZCTA.astype(str)
model_df.fire_index = model_df.fire_index.astype(str)
model_df.year_month = model_df.year_month.astype(str)

model_df2 = model_df.set_index(["fire_index", "year_month", "ZCTA"]).drop_duplicates()

In [660]:
# errors = []
# for entry in model_df2.iterrows():
#     if entry[0][0] == "-1":
#         model_df2.loc[(entry[0]), "real_interaction"] = 0
#         model_df2.loc[(entry[0]), "real_distance"] = 0

#     elif (
#         entry[0][2] in fire_later.columns
#         and entry[0][2] in fire_later2.columns
#         and entry[0] in fire_later.index
#         and entry[0] in fire_later2.index
#     ):
#         try:
#             model_df2.loc[entry[0], "real_interaction"] = fire_later.loc[
#                 entry[0], entry[0][2]
#             ]
#             model_df2.loc[entry[0], "real_distance"] = fire_later2.loc[
#                 entry[0], entry[0][2]
#             ]
#         except:
#             errors.append(entry)
#             continue
#     # else:
#     #     model_df2.loc[entry[0], "real_interaction"] = 0
#     #     model_df2.loc[entry[0], "real_distance"] = -1

In [736]:
errors = []
for entry in model_df2.head().iterrows():
    print(entry[0][2])
    if entry[0][0] == "-1":
        model_df2.loc[(entry[0]), "real_interaction"] = 0
        model_df2.loc[(entry[0]), "real_distance"] = 0

    else:
        try:
            model_df2.loc[entry[0], "real_interaction"] = bearings.loc[
                int(entry[0][2]), entry[0][0]
            ]
            model_df2.loc[entry[0], "real_distance"] = dist_df.loc[
                int(entry[0][2]), entry[0][0]
            ]
        except:
            errors.append(entry)
            continue

In [728]:
errors[0][0]

('4126', '200508', '97635')

In [721]:
dist_df.head()

fire_index,0,1,3,4,5,6,7,8,9,10,...,19721,19722,19723,19724,19725,19726,19727,19728,19729,19730
ZCTA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89010,209.784488,209.035336,208.627307,229.394707,206.737358,218.838665,216.038569,226.58834,226.303575,208.957379,...,319.123999,251.680688,264.825828,338.002187,111.184487,148.337551,128.039928,132.475798,115.294666,139.923583
89019,367.121833,365.432157,367.354761,392.914821,361.723879,378.837568,376.467808,389.356902,388.796593,363.863147,...,487.679355,420.27531,400.885469,191.324011,101.117394,67.382649,123.677381,113.000379,67.487325,124.146145
89060,341.951585,340.818891,341.312496,363.431339,337.903974,351.999965,349.323211,360.493392,360.152663,340.104959,...,447.872863,383.835959,386.676246,245.140578,107.777834,95.6239,136.240025,128.641709,72.558188,141.293224
89061,361.133956,359.955902,360.559467,382.880297,356.971181,371.306402,368.647617,379.914802,379.563656,359.168395,...,467.100306,403.305903,404.649097,233.975195,119.597674,100.271388,146.999697,138.346534,83.368515,150.762093
89439,82.898204,86.213298,76.800081,67.67756,90.544632,76.410696,73.061286,69.067264,70.107323,91.681319,...,142.562148,70.347517,173.474108,506.709376,275.687789,319.769862,279.159159,288.438167,291.637846,290.063782


In [730]:
dist_df.loc[int(errors[0][0][2]), errors[0][0][0]]

499.6381850526929

In [731]:
bearings.loc[int(errors[0][0][2]), errors[0][0][0]]

0.7178215546534799

In [704]:
model_df2.loc[

547059

###### model_df2.to_csv("instrument2b_with_int_and_dist.csv")

In [699]:
# model_df2 = pd.read_csv("instrument2b_with_int_and_dist.csv", index_col=[0, 1, 2])
# model_df2

In [700]:
def ins_interaction(df):
    if df["real_interaction"] >= 0.75 and df["real_distance"] <= 100:
        return df["real_interaction"]
    else:
        return 0


def ins_distance(df):
    if df["real_interaction"] >= 0.75 and df["real_distance"] <= 100:
        return df["real_distance"]
    else:
        return 0


def ins_treatment(df):
    if df["real_interaction"] >= 0.75 and df["real_distance"] <= 100:
        return True
    else:
        return False

In [665]:
model_df2.real_distance.max()

81.96208812044381

In [666]:
model_df2["distance"] = model_df2.apply(ins_distance, axis=1)

In [667]:
model_df2["interaction"] = model_df2.apply(ins_interaction, axis=1)

In [668]:
model_df2["treatment"] = model_df2.apply(ins_treatment, axis=1)

In [669]:
model_df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,wdir,wspd,treatment,radius_mi,duration_days,pm25,real_interaction,real_distance,interaction,distance
fire_index,year_month,ZCTA,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
-1,199101,89010,305.042938,0.878208,False,-1.0,-1.0,12.450976,0.0,0.0,0.0,0.0
-1,199101,89019,259.661102,0.962568,False,-1.0,-1.0,10.846541,0.0,0.0,0.0,0.0
-1,199101,89060,241.796738,0.922477,False,-1.0,-1.0,12.385000,0.0,0.0,0.0,0.0
-1,199101,89061,244.522552,1.303105,False,-1.0,-1.0,10.150000,0.0,0.0,0.0,0.0
-1,199101,89439,78.390099,0.209957,False,-1.0,-1.0,8.389565,0.0,0.0,0.0,0.0
-1,...,...,...,...,...,...,...,...,...,...,...,...
-1,201912,96108,100.429756,1.599154,False,-1.0,-1.0,,0.0,0.0,0.0,0.0
-1,201912,96109,101.103188,0.458200,False,-1.0,-1.0,,0.0,0.0,0.0,0.0
-1,201912,96110,101.118813,1.063629,False,-1.0,-1.0,,0.0,0.0,0.0,0.0
-1,201912,96117,110.966415,0.430399,False,-1.0,-1.0,,0.0,0.0,0.0,0.0


In [670]:
elevation = pd.read_csv("all_elevations.csv")

In [671]:
elev_df = pd.read_csv("all_elevations.csv")
elev_df["temp"] = elev_df["elevation"].apply(lambda x: int(x[: (len(x) - 3)]))
elev_df = elev_df.drop("elevation", axis=1)
elev_df = elev_df.rename(columns={"zips": "ZCTA", "temp": "elevation"})
elev_df["ZCTA"] = elev_df["ZCTA"].astype(str)
elev_df

Unnamed: 0,ZCTA,elevation
0,89010,4784
1,89019,4821
2,89060,2621
3,89061,2709
4,89439,5951
...,...,...
1618,96148,6473
1619,96150,6548
1620,96161,6042
1621,97635,7370


In [672]:
model_df3 = model_df2.reset_index()

In [673]:
model_final = model_df3.merge(elev_df, on="ZCTA", how="left")
model_final["year"] = model_final["year_month"].apply(lambda x: str(x[:4]))
model_final

Unnamed: 0,fire_index,year_month,ZCTA,wdir,wspd,treatment,radius_mi,duration_days,pm25,real_interaction,real_distance,interaction,distance,elevation,year
0,-1,199101,89010,305.042938,0.878208,False,-1.0,-1.0,12.450976,0.0,0.0,0.0,0.0,4784,1991
1,-1,199101,89019,259.661102,0.962568,False,-1.0,-1.0,10.846541,0.0,0.0,0.0,0.0,4821,1991
2,-1,199101,89060,241.796738,0.922477,False,-1.0,-1.0,12.385000,0.0,0.0,0.0,0.0,2621,1991
3,-1,199101,89061,244.522552,1.303105,False,-1.0,-1.0,10.150000,0.0,0.0,0.0,0.0,2709,1991
4,-1,199101,89439,78.390099,0.209957,False,-1.0,-1.0,8.389565,0.0,0.0,0.0,0.0,5951,1991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547054,-1,201912,96108,100.429756,1.599154,False,-1.0,-1.0,,0.0,0.0,0.0,0.0,5089,2019
547055,-1,201912,96109,101.103188,0.458200,False,-1.0,-1.0,,0.0,0.0,0.0,0.0,6482,2019
547056,-1,201912,96110,101.118813,1.063629,False,-1.0,-1.0,,0.0,0.0,0.0,0.0,4583,2019
547057,-1,201912,96117,110.966415,0.430399,False,-1.0,-1.0,,0.0,0.0,0.0,0.0,6258,2019


In [674]:
model_final.to_csv("wind-fire-pm-elev-ins.csv")

In [675]:
model_final.real_distance.max()

81.96208812044381

In [475]:
model_df2.to_csv("modeling_data_int_days_rad.csv")

In [518]:
np.sum(~model_df2["treatment"])

543771

In [522]:
model_df2.real_distance.max()

81.96208812044381

In [523]:
np.sum(model_df2.distance.isna())

0

In [524]:
model_df2.real_distance

fire_index  year_month  ZCTA 
-1          199101      89010    0.0
                        89019    0.0
                        89060    0.0
                        89061    0.0
                        89439    0.0
                                ... 
            201912      96108    0.0
                        96109    0.0
                        96110    0.0
                        96117    0.0
                        97635    0.0
Name: real_distance, Length: 547059, dtype: float64

In [422]:
np.sum(model_df2.real_distance.isna())

547059

In [None]:
bearings.to_csv("bearings.csv")

In [525]:
bearings

fire_index,0,1,3,4,5,6,7,8,9,10,...,19721,19722,19723,19724,19725,19726,19727,19728,19729,19730
ZCTA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89010,0.889575,0.893569,0.877717,0.885649,0.896248,0.892742,0.882706,0.886995,0.889156,0.902112,...,0.619994,0.690069,0.932567,0.516846,0.832709,0.773576,0.759675,0.785607,0.761760,0.751960
89019,0.572339,0.580642,0.554791,0.525731,0.589362,0.546271,0.535974,0.506763,0.500158,0.592444,...,0.639431,0.665894,0.733366,0.848007,0.997566,0.889570,0.968686,0.988393,0.913633,0.993551
89060,0.684403,0.682568,0.687970,0.848731,0.680653,0.707593,0.709682,0.860313,0.866081,0.680654,...,0.834776,0.834141,0.669989,0.818841,0.652564,0.664471,0.662997,0.663323,0.654034,0.665415
89061,0.637840,0.642698,0.627505,0.678458,0.647635,0.607169,0.600085,0.591666,0.559808,0.648518,...,0.789865,0.790277,0.693607,0.908853,0.741259,0.717145,0.703654,0.707385,0.774174,0.698158
89439,0.699592,0.706343,0.692368,0.550954,0.715738,0.639374,0.644971,0.504444,0.508473,0.713270,...,0.739221,0.755190,0.760240,0.814885,0.565905,0.676121,0.636038,0.642813,0.531704,0.654185
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97635,0.605404,0.584955,0.641255,0.808771,0.560470,0.696171,0.708958,0.789826,0.782196,0.561482,...,0.856495,0.893216,0.534237,0.603652,0.830652,0.896753,0.813046,0.835226,0.897207,0.829555
97635,0.919070,0.898509,0.955098,0.877442,0.873884,0.989787,0.976977,0.896297,0.903900,0.874922,...,0.829981,0.793275,0.779243,0.709087,0.855615,0.789698,0.873369,0.851231,0.789069,0.856968
97635,0.664107,0.643341,0.700456,0.867809,0.618463,0.755929,0.768779,0.849117,0.841563,0.619548,...,0.914807,0.951479,0.523940,0.547547,0.889540,0.955128,0.871522,0.893584,0.956069,0.887729
97635,0.630411,0.609531,0.666942,0.834233,0.584511,0.722613,0.735485,0.815630,0.808104,0.585622,...,0.880977,0.917627,0.509946,0.582171,0.855911,0.921315,0.837745,0.859766,0.922429,0.853846


In [None]:
dist_df.to_csv("dist.csv")

In [None]:
errors

[(('4126', '200508', '97635'),
  wdir           352.434692
  wspd             0.904916
  treatment           False
  radius_mi        0.318846
  pm25                2.725
  interaction           NaN
  distance              NaN
  Name: (4126, 200508, 97635), dtype: object)]

In [579]:
dist_df

fire_index,0,1,3,4,5,6,7,8,9,10,...,19721,19722,19723,19724,19725,19726,19727,19728,19729,19730
ZCTA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
89010,209.784488,209.035336,208.627307,229.394707,206.737358,218.838665,216.038569,226.588340,226.303575,208.957379,...,319.123999,251.680688,264.825828,338.002187,111.184487,148.337551,128.039928,132.475798,115.294666,139.923583
89019,367.121833,365.432157,367.354761,392.914821,361.723879,378.837568,376.467808,389.356902,388.796593,363.863147,...,487.679355,420.275310,400.885469,191.324011,101.117394,67.382649,123.677381,113.000379,67.487325,124.146145
89060,341.951585,340.818891,341.312496,363.431339,337.903974,351.999965,349.323211,360.493392,360.152663,340.104959,...,447.872863,383.835959,386.676246,245.140578,107.777834,95.623900,136.240025,128.641709,72.558188,141.293224
89061,361.133956,359.955902,360.559467,382.880297,356.971181,371.306402,368.647617,379.914802,379.563656,359.168395,...,467.100306,403.305903,404.649097,233.975195,119.597674,100.271388,146.999697,138.346534,83.368515,150.762093
89439,82.898204,86.213298,76.800081,67.677560,90.544632,76.410696,73.061286,69.067264,70.107323,91.681319,...,142.562148,70.347517,173.474108,506.709376,275.687789,319.769862,279.159159,288.438167,291.637846,290.063782
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97635,222.065696,226.419205,216.100936,181.596492,232.993324,206.263021,205.004177,187.615588,189.450894,232.692648,...,49.524223,114.638845,296.953250,671.848872,440.474585,482.514582,447.225512,455.729426,451.236155,458.522867
97635,221.998019,226.351376,216.036233,181.518019,232.926717,206.188974,204.934024,187.538329,189.373250,232.623699,...,49.348017,114.580981,296.837848,671.925586,440.547513,482.597056,447.282489,455.791037,451.328055,458.577682
97635,221.876932,226.230003,215.920524,181.377633,232.807525,206.056463,204.808534,187.400105,189.234325,232.500289,...,49.032666,114.478240,296.630606,672.063929,440.679166,482.745853,447.385437,455.902316,451.493831,458.676731
97635,221.810069,226.162974,215.856662,181.300123,232.741698,205.983277,204.739255,187.323783,189.157608,232.432117,...,48.858496,114.421949,296.515738,672.140931,440.752519,482.828708,447.442845,455.964348,451.586125,458.731970


In [703]:
model_df2.real_distance.max()

81.96208812044381

In [53]:
model_df2 = pd.read_csv("instrument2b_with_int_and_dist.csv", index_col=[0, 1, 2])

In [419]:
model_df2.to_csv("instrument1_with_val_tuesday.csv")

In [526]:
model_df2.distance.max()

81.96208812044381

In [115]:
model_df2 = pd.read_csv("instrument2b_with_int_and_dist.csv", index_col=[0, 1, 2])
try:
    model_df2 = model_df2.drop((4126, 200508, 97635), axis=0)
    model_df2["treatment_2"] = np.where(model_df2["distance"] > 0, True, False)
    print("success")
except:
    pass