# Code for constructing the distance matrix as described in eq. (3.1)

In [1]:
## Modules

import geopandas as gpd
import pandas as pd
import numpy as np

import json
import urllib.request
import geopandas as gpd
import pandas as pd
import numpy as np
import geopy.distance

In [2]:
## Utils
over_18_pop_by_zip = pd.read_csv(r"C:\Users\luosk\Dropbox\Research Stuff\coverage project\Full US Data\over_18_pop_data.csv", index_col = 0)
cars_by_zip = pd.read_csv(r"C:\Users\luosk\Dropbox\Research Stuff\coverage project\Full US Data\full_us_car_data.csv", index_col = 0)

#get population for zip code
def get_pop(zipcode):
    foo = 'ZCTA5 '+zipcode
    return int(over_18_pop_by_zip.loc[foo])

#get car ownership data for zip code
def get_cars(zipcode):
    foo = 'ZCTA5 '+zipcode
    return int(cars_by_zip.loc[foo])    

#get zip code of polling site
def get_zip(site, polls):
    return polls.loc[site]['address.zip']

#get fraction of cars/population
def get_car_prop(site, polls):
    zipcode = get_zip(site, polls)
    pop = get_pop(zipcode)
    cars = get_cars(zipcode)
    if pop == 0:
        return 0
    else:
        return min(cars/pop, 1)

#construction of \tilde{d} 
def calc_dtilde_matrix(polls, t_car, t_pub, t_walk):
    N = polls.shape[0]
    dtilde_matrix = np.zeros((N,N))
    for i, a in polls.iterrows():
        for j, b in polls.iterrows():
            if i != j:
                walk = t_walk[i,j] + t_walk[j,i]
                pub = t_pub[i,j] + t_pub[j,i]
                car = t_car[i,j] + t_car[j,i]
                driver_min = min(walk, pub, car)
                non_driver_min = min(walk, pub)
                car_prop = get_car_prop(i, polls)
                dtilde_matrix[i,j] = car_prop*driver_min + (1-car_prop)*non_driver_min
    return dtilde_matrix

#construction of d (as in eq. (3.1))
def calc_d_matrix(dtilde_matrix, polls):
    N = polls.shape[0]
    d_matrix = np.zeros((N,N))
    for i, a in polls.iterrows():
        for j, b in polls.iterrows():
            if i != j:
                zipi = get_zip(i, polls)
                zipj = get_zip(j, polls)
                popi = get_pop(zipi)
                popj = get_pop(zipj)
                if popi == 0 and popj == 0:
                    d_matrix[i,j] = 0
                elif popi == 0:
                    d_matrix[i,j] = dtilde_matrix[j,i]
                elif popj == 0:
                    d_matrix[i,j] = dtilde_matrix[i,j]
                else:
                    d_matrix[i,j] = (1/(popi+popj))*(popi*dtilde_matrix[i,j] + popj*dtilde_matrix[j,i])
    return d_matrix

Note: Some of the zip codes had to be manually adjusted, as described in the paper. 

SLC

In [4]:
slc_sites = gpd.read_file('Salt Lake City\slc_polls.geojson')
slc_sites.at[10,'address.zip'] = '84115'

In [5]:
slc_tcar = np.load("Salt Lake City\slc_tcar_matrix_completed.npy")
slc_tpub = np.load("Salt Lake City\slc_tpub_matrix_completed.npy")
slc_twalk = np.load("walk_matrix_complete\slc_walk.npy")

In [6]:
slc_dtilde = calc_dtilde_matrix(slc_sites, slc_tcar, slc_tpub, slc_twalk)
slc_d_matrix = calc_d_matrix(slc_dtilde, slc_sites)

In [7]:
np.save("slc_d_matrix_updated.npy", slc_d_matrix) 

Atlanta

In [8]:
atl_sites = gpd.read_file('Atlanta/atl_polls.geojson')

In [10]:
atl_tcar = np.load("Atlanta/atl_tcar_matrix_completed.npy")
atl_tpub = np.load("Atlanta/atl_tpub_matrix_completed.npy")
atl_twalk = np.load("walk_matrix_complete/atl_walk.npy")

In [11]:
atl_sites['address.zip'] = atl_sites['address.zip'].astype(float)
atl_sites['address.zip'] = atl_sites['address.zip'].astype(int)
atl_sites['address.zip'] = atl_sites['address.zip'].astype(str)

In [12]:
atl_sites.at[16, 'address.zip'] = '30312'

In [13]:
atl_dtilde = calc_dtilde_matrix(atl_sites, atl_tcar, atl_tpub, atl_twalk)
atl_d_matrix = calc_d_matrix(atl_dtilde, atl_sites)
np.save("atl_d_matrix_updated.npy", atl_d_matrix) 

LAC

In [14]:
lac_sites = gpd.read_file('Los Angeles/fixed_files/lac_NoCatalina_polls.geojson')
for i,a in lac_sites.iterrows():
    if a['address.zip'] == '90095':
        lac_sites.at[i, 'address.zip'] = '90024'
    elif a['address.zip'] == '90506':
        lac_sites.at[i, 'address.zip'] = '90249'
    elif a['address.zip'] == '91371':
        lac_sites.at[i, 'address.zip'] = '91367'

In [15]:
lac_tcar = np.load("Los Angeles/fixed_files/lac_tcar_matrix_completed.npy")
lac_tpub = np.load("Los Angeles/fixed_files/lac_tpub_matrix_completed.npy")
lac_twalk = np.load("walk_matrix_complete\lac_walk.npy")

In [16]:
lac_dtilde = calc_dtilde_matrix(lac_sites, lac_tcar, lac_tpub, lac_twalk)
lac_d_matrix = calc_d_matrix(lac_dtilde, lac_sites)
np.save("lac_d_matrix_updated.npy", lac_d_matrix) 

Jacksonville

In [17]:
jax_sites = gpd.read_file('Jacksonville/jax_polls.geojson')

In [18]:
jax_tcar = np.load("Jacksonville/jax_tcar_matrix_completed.npy")
jax_tpub = np.load("Jacksonville/jax_tpub_matrix_completed.npy")
jax_twalk = np.load("walk_matrix_complete\jax_walk.npy")

In [19]:
jax_dtilde = calc_dtilde_matrix(jax_sites, jax_tcar, jax_tpub, jax_twalk)
jax_d_matrix = calc_d_matrix(jax_dtilde, jax_sites)
np.save("jax_d_matrix_updated.npy", jax_d_matrix) 

Chicago

In [24]:
chc_sites = gpd.read_file('Chicago/chc_polls.geojson')
chc_sites.at[397, 'address.zip'] = '60656'
chc_sites.at[453, 'address.zip'] = '60827'

chc_tcar = np.load("Chicago/chc_tcar_matrix_completed.npy")
chc_tpub = np.load("Chicago/chc_tpub_matrix_completed.npy")
chc_twalk = np.load("walk_matrix_complete\chc_walk.npy")

In [27]:
chc_dtilde = calc_dtilde_matrix(chc_sites, chc_tcar, chc_tpub, chc_twalk)
chc_d_matrix = calc_d_matrix(chc_dtilde, chc_sites)
np.save("chc_d_matrix_updated.npy", chc_d_matrix) 

NYC

In [28]:
manbronx_sites = gpd.read_file('NYC/manbronx_polls.geojson')
for i,a in manbronx_sites.iterrows():
    if a['address.zip'] == '10103':
        manbronx_sites.at[i, 'address.zip'] = '10019'

manbronx_tcar = np.load("NYC/manbronx_tcar_matrix_completed.npy")
manbronx_tpub = np.load("NYC/manbronx_tpub_matrix_completed.npy")
manbronx_twalk = np.load("walk_matrix_complete\manbronx_walk.npy")

manbronx_dtilde = calc_dtilde_matrix(manbronx_sites, manbronx_tcar, manbronx_tpub, manbronx_twalk)
manbronx_d_matrix = calc_d_matrix(manbronx_dtilde, manbronx_sites)
np.save("manbronx_d_matrix_updated.npy", manbronx_d_matrix) 

In [32]:
queensbrook_sites = gpd.read_file('NYC/queensbrook_polls.geojson')

queensbrook_tcar = np.load("NYC/queensbrook_tcar_matrix_completed.npy")
queensbrook_tpub = np.load("NYC/queensbrook_tpub_matrix_completed.npy")
queensbrook_twalk = np.load("walk_matrix_complete\queensbrook_walk.npy")

In [34]:
queensbrook_dtilde = calc_dtilde_matrix(queensbrook_sites, queensbrook_tcar, queensbrook_tpub, queensbrook_twalk,)
queensbrook_d_matrix = calc_d_matrix(queensbrook_dtilde, queensbrook_sites)
np.save("queensbrook_d_matrix_updated.npy", queensbrook_d_matrix) 

In [35]:
stat_sites = gpd.read_file('NYC/stat_polls.geojson')

stat_tcar = np.load("NYC/stat_tcar_matrix_completed.npy")
stat_tpub = np.load("NYC/stat_tpub_matrix_completed.npy")
stat_twalk = np.load("walk_matrix_complete\stat_walk.npy")

stat_dtilde = calc_dtilde_matrix(stat_sites, stat_tcar, stat_tpub, stat_twalk)
stat_d_matrix = calc_d_matrix(stat_dtilde, stat_sites)
np.save("stat_d_matrix_updated.npy", stat_d_matrix) 

In [None]:
np.save("slc_twalk_matrix_completed.npy", slc_twalk)
np.save("atl_twalk_matrix_completed.npy", atl_twalk)
np.save("lac_twalk_matrix_completed.npy", lac_twalk)
np.save("chc_twalk_matrix_completed.npy", chc_twalk)
np.save("jax_twalk_matrix_completed.npy", jax_twalk)
np.save("queensbrook_twalk_matrix_completed.npy", queensbrook_twalk)
np.save("stat_twalk_matrix_completed.npy", stat_twalk)
np.save("manbronx_twalk_matrix_completed.npy", manbronx_twalk)