In [13]:
import numpy as np
import pandas as pd
import geopandas as gpd

import osmnx as ox
import taxicab.distance as tc

from multiprocessing import Pool

In [15]:
def trim_centroids(od_matrix, buffered_boundary, bdry_as_gdf=True):
    
    if bdry_as_gdf:
        buffered_boundary = buffered_boundary.geometry[0]
    
    centroids_pt = gpd.points_from_xy(x= od_matrix.intptlon, y=od_matrix.intptlat, crs='EPSG:4326')
    rows_to_keep = centroids_pt.within(buffered_boundary)
    trimmed_od_matrix = od_matrix[rows_to_keep].reset_index(drop=True)

    return trimmed_od_matrix

# Network Distance in Taxicab Sense

In this notebook we try to collect the network distance of OD matrices using the taxicab package (https://github.com/nathanrooy/taxicab). Here, the amenities and centroids are mapped to the network's closest edge---not necessairily the closest node. I wrap the function to parallelize it:

In [9]:
def shortest_path_taxicab_distance(G, origin_yx_list, destination_yx_list, cpus):
    
    args = ((G, origin, destination) for origin, destination in zip(origin_yx_list, destination_yx_list))
    pool = Pool(cpus)
    sma = pool.starmap_async(tc.shortest_path, args)
    outputs = sma.get()
    
    pool.close()
    pool.join()
    
    return outputs

Below we test this process for 100 rows:

In [20]:
threshold=2000
number_of_cores=2
fua_code='USA80'

#Get the FUA boundary:
fua_buffered_boundary = gpd.read_file('../data/d03_intermediate/FUA-buffered-shapefile/FUA-buffered.shp').set_index('fuacode').loc[[fua_code]]

print('  got the boundary')

#Get the commutes within that FUA:
full_od_matrix = pd.read_csv('../data/d02_processed-safegraph/weeks_od_us_fua.csv')
fua_raw_od_matrix = full_od_matrix[full_od_matrix.fuacode==fua_code][:10].reset_index(drop=True)
fua_raw_od_matrix['fuacode'] = fua_code

print('  got the SafeGraph od matrix')

#Trim rows for which centroids lie outside the FUA:
od_matrix = trim_centroids(fua_raw_od_matrix, fua_buffered_boundary)
print('  trimmed the od matrix')

#Get the graphs:
walk_graph = ox.project_graph(ox.load_graphml('../data/d03_intermediate/FUA-networks/walk/'+fua_code+'.graphml'), to_crs='EPSG:5070')
drive_graph = ox.project_graph(ox.load_graphml('../data/d03_intermediate/FUA-networks/drive/'+fua_code+'.graphml'), to_crs='EPSG:5070')
print('  got the street networks')

#Get the geometries of origin and destinations:
centroids_pt = gpd.points_from_xy(x=od_matrix.intptlon, y=od_matrix.intptlat, crs='EPSG:4326').to_crs('EPSG:5070')
od_matrix['origin_x'], od_matrix['origin_y'] = centroids_pt.x, centroids_pt.y

places_pt = gpd.points_from_xy(x= od_matrix.longitude, y=od_matrix.latitude, crs='EPSG:4326').to_crs('EPSG:5070')
od_matrix['dest_x'], od_matrix['dest_y'] = places_pt.x, places_pt.y

print('  georeferenced origin and destination')

#Get the preferred commute mode:
od_matrix['mode'] = places_pt.distance(centroids_pt) <= threshold
od_matrix['mode'] = od_matrix['mode'].map({True: 'walk', False:'drive'})

print('  got preferred mode of commute')

#Now we split the dataframe into two (one for walking and one for driving):
od_matrix_dict = {mode: df for mode, df in od_matrix.groupby('mode')}
G = {'drive': drive_graph, 'walk': walk_graph}

#For each of those dataframes, we do nearest nodes from OSMnx on the appropriate graph and the distance:
full_dfs = []
for mode, df in od_matrix_dict.items():
    df[['distance', 'route', 'origin_edge', 'dest_edge']] = shortest_path_taxicab_distance(G[mode],
                                                                                           zip(df['origin_y'].values, df['origin_x'].values),
                                                                                           zip(df['dest_y'].values, df['dest_x'].values),
                                                                                           cpus=number_of_cores)
    full_dfs.append(df)    

#Merge these dataframes to obtain the OD matrix with naive network distance:
od_matrix_naivedistance = pd.concat(full_dfs, ignore_index=True)
print('  got naive network distance')

  got the boundary
  got the SafeGraph od matrix
  trimmed the od matrix
  got the street networks
  georeferenced origin and destination
  got preferred mode of commute
  got naive network distance


  return asarray(a).ndim


In [21]:
od_matrix_naivedistance

Unnamed: 0,safegraph_place_id,census_block_group,top_category,latitude,longitude,fuacode,intptlat,intptlon,origin_x,origin_y,dest_x,dest_y,mode,distance,route,origin_edge,dest_edge
0,sg:001855be013b421688f5f8c2e724d076,550250114021,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,43.126509,-89.27113,544188.014181,2256107.0,544257.01835,2261226.0,drive,98387827.199471,"[53499317, 53588396, 7037047764, 53402574, 533...",LINESTRING (544575.3057382132 2256129.58650420...,LINESTRING (544307.3510307875 2261179.83151095...
1,sg:001855be013b421688f5f8c2e724d076,550250022003,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,43.120499,-89.366947,536498.451002,2254893.0,544257.01835,2261226.0,drive,8945067.360424,"[53638087, 53516675, 53516676, 53463305, 53688...",LINESTRING (536472.0791405647 2254869.11136427...,LINESTRING (544307.3510307875 2261179.83151095...
2,sg:001855be013b421688f5f8c2e724d076,550250115032,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,43.20122,-89.244345,545757.939989,2264584.0,544257.01835,2261226.0,drive,30761761.493928,"[53419992, 53446225, 53591544, 53462544, 53691...",LINESTRING (545725.4737712108 2264351.59491817...,LINESTRING (544307.3510307875 2261179.83151095...
3,sg:001855be013b421688f5f8c2e724d076,550250118001,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,43.214106,-89.098972,557377.057688,2266863.0,544257.01835,2261226.0,drive,11767617.137245,"[53436059, 53436084, 53486384, 53486753, 53580...",LINESTRING (558243.0942216004 2267218.09009147...,LINESTRING (544307.3510307875 2261179.83151095...
4,sg:001855be013b421688f5f8c2e724d076,551050029003,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,42.783193,-89.312406,543552.19849,2217612.0,544257.01835,2261226.0,drive,11289032.854993,"[232256855, 206942603, 232276988, 370504076, 2...",LINESTRING (543612.3546742649 2217686.32456645...,LINESTRING (544307.3510307875 2261179.83151095...
5,sg:001855be013b421688f5f8c2e724d076,550250122021,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,42.935872,-89.244755,547829.890909,2235017.0,544257.01835,2261226.0,drive,5201251.863442,"[2612690949, 2869264964, 53582982, 6538574537,...",[],LINESTRING (544307.3510307875 2261179.83151095...
6,sg:001855be013b421688f5f8c2e724d076,550250114023,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,43.178295,-89.295503,541812.433687,2261737.0,544257.01835,2261226.0,drive,62200745.813049,"[53604645, 53471418, 496906382, 496673615, 534...",LINESTRING (541090.3641622717 2261523.01964497...,LINESTRING (544307.3510307875 2261179.83151095...
7,sg:001855be013b421688f5f8c2e724d076,550250132005,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,43.245704,-89.281518,542409.029419,2269326.0,544257.01835,2261226.0,drive,63089527.977888,"[53428914, 53523504, 53523514, 53414613, 53394...",LINESTRING (543779.0373387826 2268628.55701660...,LINESTRING (544307.3510307875 2261179.83151095...
8,sg:001855be013b421688f5f8c2e724d076,550250024021,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,43.137981,-89.359067,536998.405173,2256885.0,544257.01835,2261226.0,drive,25224371.67682,"[7183201235, 53606473, 53445186, 8997363242, 2...",LINESTRING (537029.3540927828 2256708.78558509...,LINESTRING (544307.3510307875 2261179.83151095...
9,sg:001855be013b421688f5f8c2e724d076,550250006001,Restaurants and Other Eating Places,43.172181,-89.265802,USA80,43.03281,-89.452007,530303.853059,2244644.0,544257.01835,2261226.0,drive,5784511.647953,"[53475763, 53475766, 53706624, 5596394465, 540...",LINESTRING (530230.9315564209 2244684.37740012...,LINESTRING (544307.3510307875 2261179.83151095...


In [None]:
#Get the rows that need reworking:
bad_rows = (od_matrix_naivedistance['mode']=='walk') & (od_matrix_naivedistance['distance'] > threshold)
print('  got bad rows')

#Set the Boolean value of whether we walk or drive to False in the bad rows:
od_matrix_naivedistance.loc[bad_rows, 'mode'] = 'drive'

#We do nearest nodes from OSMnx on the driving graph and the distance for those rows:
od_matrix_naivedistance.loc[bad_rows, 'distance'] = shortest_path_taxicab_distance(drive_graph,
                                                                                   zip(df['origin_y'].values, df['origin_x'].values),
                                                                                   zip(df['destination_y'].values, df['destination_x'].values)
                                                                                   cpus=number_of_cores)
print('  got final network distance')

#We need to drop some columns (and potentially a few more created by merges and droping indices):
cols_to_drop = ['origin_x', 'origin_y', 'dest_x', 'dest_y']
for col in od_matrix_naivedistance.columns:
    if 'Unnamed' in col:
        cols_to_drop.append(col)
    od_matrix_finaldistance = od_matrix_naivedistance.drop(cols_to_drop, axis=1)
    od_matrix_finaldistance.to_csv(outpath)
print('  saved')