Skip to content

Commit

Permalink
vectorize all great circle calculations
Browse files Browse the repository at this point in the history
  • Loading branch information
gboeing committed Feb 22, 2017
1 parent ab7f12a commit 75a7dc3
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 32 deletions.
64 changes: 38 additions & 26 deletions osmnx/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
from dateutil import parser as date_parser
from shapely.geometry import Point, LineString, Polygon, MultiPolygon
from shapely.ops import unary_union
from geopy.distance import great_circle, vincenty
from geopy.distance import vincenty
from geopy.geocoders import Nominatim

from . import globals
from .utils import log, make_str, get_largest_component
from .utils import log, make_str, get_largest_component, great_circle_vec
from .simplify import simplify_graph
from .projection import project_geometry, project_gdf
from .stats import count_streets_per_node
Expand Down Expand Up @@ -994,22 +994,26 @@ def add_edge_lengths(G):
Returns
-------
G : networkx multidigraph
"""

start_time = time.time()
"""

for u, v, key, data in G.edges(keys=True, data=True):

#geopy points are (lat, lon) so that's (y, x) for this great_circle calculation
u_point = (G.node[u]['y'], G.node[u]['x'])
v_point = (G.node[v]['y'], G.node[v]['x'])
edge_length = great_circle(u_point, v_point).m
data['length'] = edge_length
# first load all the edges' origin and destination coordinates as a dataframe indexed by u, v, key
coords = np.array([[u, v, k, G.node[u]['y'], G.node[u]['x'], G.node[v]['y'], G.node[v]['x']] for u, v, k in G.edges(keys=True)])
df_coords = pd.DataFrame(coords, columns=['u', 'v', 'k', 'u_y', 'u_x', 'v_y', 'v_x'])
df_coords[['u', 'v', 'k']] = df_coords[['u', 'v', 'k']].astype(np.int64)
df_coords = df_coords.set_index(['u', 'v', 'k'])

# then calculate the great circle distance with the vectorized function
gc_distances = great_circle_vec(lat1=df_coords['u_y'],
lng1=df_coords['u_x'],
lat2=df_coords['v_y'],
lng2=df_coords['v_x'])

gc_distances = gc_distances.fillna(value=0)
nx.set_edge_attributes(G, 'length', gc_distances.to_dict())

log('Added edge lengths to graph in {:,.2f} seconds'.format(time.time()-start_time))
return G


def get_nearest_node(G, point, return_dist=False):
"""
Return the graph node nearest to some specified point.
Expand All @@ -1026,21 +1030,29 @@ def get_nearest_node(G, point, return_dist=False):
-------
networkx multidigraph or tuple
multidigraph or optionally (multidigraph, float)
"""
start_time = time.time()
nodes = G.nodes(data=True)
"""
# dump graph node coordinates into a pandas dataframe indexed by node id with x and y columns
coords = np.array([[node, data['x'], data['y']] for node, data in G.nodes(data=True)])
df = pd.DataFrame(coords, columns=['node', 'x', 'y']).set_index('node')

# add columns to the dataframe representing the (constant) coordinates of the reference point
df['reference_y'] = point[0]
df['reference_x'] = point[1]

# calculate the distance between each node and the reference point
distances = great_circle_vec(lat1=df['reference_y'],
lng1=df['reference_x'],
lat2=df['y'],
lng2=df['x'])

# the nearest node is the one that minimizes great circle distance between its coordinates and the passed-in point
# geopy points are (lat, lon) so that's (y, x)
nearest_node = min(nodes, key=lambda node: great_circle((node[1]['y'], node[1]['x']), point).m)
log('Found nearest node ({}) to point {} in {:,.2f} seconds'.format(nearest_node[0], point, time.time()-start_time))
# nearest node's ID is the index label of the minimum distance
nearest_node = int(distances.idxmin())

# if caller requested return_dist, return distance between the point and the nearest node as well
if return_dist:
# if caller requested return_dist, calculate the great circle distance between the point and the nearest node and return it as well
distance = great_circle((nearest_node[1]['y'], nearest_node[1]['x']), point).m
return nearest_node[0], distance
return nearest_node, distances.loc[nearest_node]
else:
return nearest_node[0]
return nearest_node


def add_path(G, data, one_way):
Expand Down
21 changes: 16 additions & 5 deletions osmnx/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
from itertools import chain
from collections import Counter
import time
import warnings
import networkx as nx
from geopy.distance import great_circle
import numpy as np
import pandas as pd

from .utils import log, get_largest_component
from .utils import log, get_largest_component, great_circle_vec


def basic_stats(G, area=None):
Expand Down Expand Up @@ -114,10 +116,19 @@ def basic_stats(G, area=None):
street_density_km = None

# average circuity: sum of edge lengths divided by sum of great circle distance between edge endpoints
points = [((G.node[u]['y'], G.node[u]['x']), (G.node[v]['y'], G.node[v]['x'])) for u, v in G.edges()]
great_circle_distances = [great_circle(p1, p2).m for p1, p2 in points]
# first load all the edges origin and destination coordinates as a dataframe, then calculate the great circle distance with the vectorized function
coords = np.array([[G.node[u]['y'], G.node[u]['x'], G.node[v]['y'], G.node[v]['x']] for u, v, k in G.edges(keys=True)])
df_coords = pd.DataFrame(coords, columns=['u_y', 'u_x', 'v_y', 'v_x'])
# ignore warnings during this calculation because numpy warns it cannot calculate arccos for self-loops since u==v
warnings.filterwarnings('ignore')
gc_distances = great_circle_vec(lat1=df_coords['u_y'],
lng1=df_coords['u_x'],
lat2=df_coords['v_y'],
lng2=df_coords['v_x'])
warnings.filterwarnings('default')
gc_distances = gc_distances.fillna(value=0)
try:
circuity_avg = edge_length_total / sum(great_circle_distances)
circuity_avg = edge_length_total / gc_distances.sum()
except ZeroDivisionError:
circuity_avg = np.nan

Expand Down
34 changes: 33 additions & 1 deletion osmnx/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import logging as lg
import datetime as dt
import networkx as nx
import numpy as np

from . import globals

Expand Down Expand Up @@ -233,5 +234,36 @@ def get_largest_component(G, strongly=False):
log('Graph was not connected, retained only the largest weakly connected component ({:,} of {:,} total nodes) in {:.2f} seconds'.format(len(list(G.nodes())), original_len, time.time()-start_time))

return G


def great_circle_vec(lat1, lng1, lat2, lng2, earth_radius=6371009):
"""
Vectorized function to calculate the great-circle distance between two points or between vectors of points.
Parameters
----------
lat1 : float or array of float
lng1 : float or array of float
lat2 : float or array of float
lng2 : float or array of float
earth_radius : numeric
radius of earth in units in which distance will be returned (default is meters)
Returns
-------
distance : float or array of float
distance or vector of distances from (lat1, lng1) to (lat2, lng2) in units of earth_radius
"""

phi1 = np.deg2rad(90 - lat1)
phi2 = np.deg2rad(90 - lat2)

theta1 = np.deg2rad(lng1)
theta2 = np.deg2rad(lng2)

cos = (np.sin(phi1) * np.sin(phi2) * np.cos(theta1 - theta2) + np.cos(phi1) * np.cos(phi2))
arc = np.arccos(cos)


# return distance in units of earth_radius
distance = arc * earth_radius
return distance

1 comment on commit 75a7dc3

@gboeing
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolves #30

Please sign in to comment.