In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import networkx as nx

Load city shapefile dataframes

In [9]:
df_nyc = gpd.read_file('data/nyc zip codes with data.zip')
df_atl = gpd.read_file('data/atl zip codes with data.zip')

Create shapefiles with polling place locations for NYC and Georgia

In [56]:
df_poll = pd.read_csv('data/polling_pk_master_post.csv')

def create_pollplace_shapefile(city_df, city_name, state_abbv):
    '''
    Input: 
    city_df is the geopandas dataframe with the zip code geometries (e.g. df_nyc), 
    city_name is string (e.g. 'nyc'), 
    state_abbv is abbreviation of the state in polling_pk_master_post.csv (e.g. 'NY')
    
    Creates geopandas dataframe of polls that are contained in the city
    '''
    polls = df_poll[df_poll['address.state'] == state_abbv]
    polls = gpd.GeoDataFrame(polls, geometry=gpd.points_from_xy(polls.longitude, polls.latitude))
    polls = polls.iloc[[city_df.contains(poll['geometry']).any() for i, poll in polls.iterrows()]]
    polls.to_file(city_name + ' polling locations.geojson', driver='GeoJSON')

In [58]:
create_pollplace_shapefile(df_nyc, 'nyc', 'NY')

In [57]:
create_pollplace_shapefile(df_atl, 'atl', 'GA')

Calculate shortest paths (by time)

In [15]:
# THESE ARE UNTESTED BECAUSE I HAVEN'T GOTTEN OSMNX WORKING YET

def calc_shortest_time(G, a, b):
    '''
    Input: 
    G is digraph representing road network, with edge travel times already added as an attribute.
    a, b are Points whose coordinates given by lat, long (see https://shapely.readthedocs.io/en/stable/manual.html#points)
    
    Returns: An estimate of the travel time between a and b.
    '''
    a_node = ox.distance.nearest_nodes(G, a.x, a.y)
    b_node = ox.distance.nearest_nodes(G, b.x, b.y)
    t = nx.shortest_path_length(G, a_node, b_node, weight = 'travel_time')
    return t

def calc_time_by_car_matrix(G, polls, fname = None):
    '''
    Input: 
    G is nx.Digraph representing road network, with edge travel times already added as an attribute.
    polls is geopandas dataframe that stores polling locations as Points with (lat, long) coordinates.
    fname (optional) is string for file name to save to.
    
    Returns: Numpy array t_car s.t. t_car(i, j) = estimate of travel time by car from ith poll to jth poll. t_car is asymmetric
    If fname is given, saves t_car as .npy file
    '''
    N = polls.shape[0]
    t_car = np.zeros((N, N))
    for i, a in polls.iterrows():
        for j, b in polls.iterrows():
            if i != j:
                t_car[i, j] = calc_shortest_time(G, a['geometry'], b['geometry'])
    if fname is not None:
        np.save(t_car, "Distance_Marix_Files/" + fname)
    return t_car