In [1]:
import cudf
import cuspatial
import nvstrings
from collections import OrderedDict
import numpy as np
import datetime as dt
import cugraph

%load_ext autotime

In [2]:
dtypes = OrderedDict([
    ('OccupancyDateTime', 'date'),
    ('PaidOccupancy', 'int64'),
    ('BlockfaceName', 'str'),
    ('SideOfStreet', 'str'),
    ('SourceElementKey', 'int64'),
    ('ParkingTimeLimitCategory', 'int64'),
    ('ParkingSpaceCount', 'int64'),
    ('PaidParkingArea', 'str'),
    ('PaidParkingSubArea', 'str'),
    ('PaidParkingRate', 'int8'),
    ('ParkingCategory', 'str'),
    ('Location', 'str'),
    ('dow', 'int8')
])

df = cudf.read_csv(
    '../data/parking_MayJun2019.csv'
    , skiprows=1
    , dtype=list(dtypes.values())
    , names=list(dtypes.keys())
)

df = df[['SourceElementKey', 'Location']]

time: 6.61 s


In [3]:
def extractLon(location):
    lon = location.str.extract('([0-9\.\-]+) ([0-9\.]+)')[0]
    return lon.str.stod()

def extractLat(location):
    lon = location.str.extract('([0-9\.\-]+) ([0-9\.]+)')[1]
    return lon.str.stod()
    
df['longitude'] = extractLon(df['Location'])
df['latitude'] = extractLat(df['Location'])

# df[['Location', 'longitude', 'latitude']].head().to_pandas()

time: 9.98 s


In [4]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="todrabas_test")
location = geolocator.geocode("400 Broad St, Seattle, WA 98109") # SPACE NEEDLE

df['LON_Ref'] = location.longitude
df['LAT_Ref'] = location.latitude

time: 915 ms


## Comparing cuspatial with vanilla Python Haversine

In [52]:
from math import sin, cos, sqrt, atan2, pi

def calculateDistance(latitude, longitude, LAT_Ref, LON_Ref, Distance_Python):
    R = 3958.8 # Earth's radius in miles
    
    for i, (lt, ln, lt_r, ln_r) in enumerate(zip(latitude, longitude, LAT_Ref, LON_Ref)):
        lt_rad = lt / 180.0 * pi
        ln_rad = ln / 180.0 * pi
        
        dlon = (ln_r - ln) / 180.0 * pi
        dlat = (lt_r - lt) / 180.0 * pi
        a = (sin(dlat/2.0))**2 + cos(lt_rad) * cos(lt_rad) * (sin(dlon/2.0))**2
        c = 2 * atan2(sqrt(a), sqrt(1-a))
        distance = R * c
        Distance_Python[i] = distance * 5280 # in feet
        
df = df.apply_rows(
    calculateDistance
    , incols=['latitude', 'longitude', 'LAT_Ref', 'LON_Ref']
    , outcols={'Distance_Python':np.float64}
    , kwargs={}
)

time: 619 ms


In [13]:
distances = cuspatial.haversine_distance(df['longitude'], df['latitude'], df['LAT_Ref'], df['LON_Ref'])

time: 45.2 ms


## As crow flies vs as people walk

In [5]:
locations = df[['SourceElementKey', 'longitude', 'latitude']].drop_duplicates()
locations.head()

Unnamed: 0,SourceElementKey,longitude,latitude
4080,1001,-122.334694,47.602873
1336,1002,-122.334513,47.602949
4455,1006,-122.335143,47.603674
1026,1009,-122.336658,47.605018
3208,1010,-122.336447,47.605101


time: 734 ms


In [6]:
del df

time: 17.4 ms


In [7]:
!head ../data/king_county_road_graph_20190909.csv

node1,node2,LENGTH
89108,27652,5.02825
27652,89108,5.02825
27652,122930,112.417
122930,27652,112.417
36778,36779,48.2475
36779,36778,48.2475
26559,26559,48.3425
26559,26559,48.3425
2634,78382,372.325
time: 529 ms


In [8]:
!head ../data/king_county_road_nodes_20190909.csv

NodeID,Lon,Lat
1,-121.431505,47.331052
2,-121.430642,47.334726
3,-121.404812,47.288757
4,-121.408953,47.289157
5,-121.432249,47.292963
6,-121.432702,47.294297
7,-121.432724,47.329542
8,-121.367462,47.290593
9,-121.360285,47.29045
time: 508 ms


In [150]:
road_graph_data = cudf.read_csv('../data/king_county_road_graph_20190909.csv')
road_graph_data['node1'] = road_graph_data['node1'].astype('int32')
road_graph_data['node2'] = road_graph_data['node2'].astype('int32')
road_graph_data['LENGTH'] = road_graph_data['LENGTH'] * 3.28084 # convert to feet

time: 44.1 ms


In [151]:
road_nodes = cudf.read_csv('../data/king_county_road_nodes_20190909.csv')
road_nodes['NodeID'] = road_nodes['NodeID'].astype('int32')

time: 26.7 ms


In [152]:
nodeId = road_nodes['NodeID'].max()
nodeId

127380

time: 2.8 ms


In [153]:
parking_locations = locations.to_pandas().to_dict('records')
parking_locations_nodes = cudf.DataFrame(columns=road_nodes.columns)

parking_locations_nodes.columns

Index(['NodeID', 'Lon', 'Lat'], dtype='object')

time: 21.2 ms


In [154]:
# loc = parking_locations[0]

for loc in parking_locations:
    nodeId = nodeId + 1

    road_nodes['Lon_REF'] = loc['longitude']
    road_nodes['Lat_REF'] = loc['latitude']
    road_nodes['Distance'] = cuspatial.haversine_distance(
          road_nodes['Lon']
        , road_nodes['Lat']
        , road_nodes['Lon_REF']
        , road_nodes['Lat_REF']) * 0.621371 * 5280 # distance in feet as cuspatial returns kilometers

    nearest = road_nodes.nsmallest(1, 'Distance')
    nearest['node2'] = nodeId

    nearest = nearest[['NodeID', 'node2', 'Distance']].rename({'NodeID': 'node1', 'Distance': 'LENGTH'}) # get nearest road intersection
    road_graph_data = cudf.concat([road_graph_data, nearest])
    # nearest

    rec = {'NodeID': nodeId, 'Lon': loc['longitude'], 'Lat': loc['latitude']}
    parking_locations_tmp = cudf.DataFrame(rec)
    parking_locations_nodes = cudf.concat([parking_locations_nodes, parking_locations_tmp])

time: 1min 2s


In [155]:
road_graph_data = road_graph_data.reset_index(drop=True)
road_graph_data['node1'] = road_graph_data['node1'].astype('int32')
road_graph_data['node2'] = road_graph_data['node2'].astype('int32')

road_graph_data

sources = cudf.Series(road_graph_data['node1'])
destinations = cudf.Series(road_graph_data['node2'])
distances = cudf.Series(road_graph_data['LENGTH'])

g = cugraph.Graph()
g.add_edge_list(sources, destinations, distances)

time: 3.65 ms


In [157]:
road_graph_data.query('node2 == 127381')

Unnamed: 0,node1,node2,LENGTH
315112,49081,127381,154.029908


time: 21.3 ms


In [158]:
cugraph.sssp(g, 49081).nsmallest(10, "distance")

Unnamed: 0,vertex,distance,predecessor
49081,49081,0.0,-1
128443,128443,150.627597,49081
127381,127381,154.029908,49081
127382,127382,154.646241,49081
127618,127618,165.073143,49081
127617,127617,194.373414,49081
49079,49079,336.732294,49081
49082,49082,340.301848,49081
49084,49084,351.099093,49081
49083,49083,415.626654,49082


time: 78.7 ms


In [159]:
road_nodes = cudf.concat([road_nodes[['NodeID', 'Lon', 'Lat']], parking_locations_nodes]).reset_index(drop=True)

time: 6.95 ms


In [160]:
road_nodes['Lon_REF'] = location.longitude
road_nodes['Lat_REF'] = location.latitude

time: 6.09 ms


In [161]:
road_nodes['Distance'] = cuspatial.haversine_distance(
          road_nodes['Lon']
        , road_nodes['Lat']
        , road_nodes['Lon_REF']
        , road_nodes['Lat_REF']) * 0.621371 * 5280 # distance in feet as cuspatial returns miles

i = road_nodes.nsmallest(1, 'Distance')
# space_needle_to_nearest_intersection = i['Distance'].to_array()[0]
i['NodeID'].to_array()[0], space_needle_to_nearest_intersection

(47757, 175.90639145955663)

time: 24.2 ms


In [172]:
all_distances = cugraph.sssp(g, i['NodeID'].to_array()[0])
distances = all_distances.query('vertex > 127380 and distance  < 1000')#.rename({'vertex': 'NodeID'}).merge(road_nodes, on=['NodeID'])
distances['distance_complete'] = distances['distance'] + space_needle_to_nearest_intersection
distances
# distances.sort_values(by='distance')#.nsmallest(10, 'distance')

Unnamed: 0,vertex,distance,predecessor,distance_complete
127585,127585,961.587486,47829,1137.493878
127666,127666,853.64252,47799,1029.548911
127912,127912,969.183233,47829,1145.089625
127913,127913,967.686292,47829,1143.592683
128506,128506,968.573076,47829,1144.479467


time: 128 ms


In [171]:
distances

Unnamed: 0,vertex,distance,predecessor
127585,127585,961.587486,47829
127666,127666,853.64252,47799
127912,127912,969.183233,47829
127913,127913,967.686292,47829
128506,128506,968.573076,47829


time: 42.3 ms


In [60]:
road_graph_data.query('node1 == 47799 and node2 == 127666')

Unnamed: 0,node1,node2,LENGTH
315397,47799,127666,156.316054


time: 21.1 ms


In [65]:
all_distances.query('vertex == 47799')

Unnamed: 0,vertex,distance,predecessor
47799,47799,212.5451,47797


time: 24 ms


In [79]:
all_distances.query('vertex == 47797')

Unnamed: 0,vertex,distance,predecessor
47797,47797,125.6606,47756


time: 31.2 ms


In [111]:
road_graph_data.query('node1 == 47756 and node2 == 47797')

Unnamed: 0,node1,node2,LENGTH
146782,47756,47797,20.0819


time: 33.9 ms


In [114]:
all_distances.query('vertex == 47756')

Unnamed: 0,vertex,distance,predecessor
47756,47756,105.5787,80449


time: 32.9 ms


In [115]:
all_distances.query('vertex == 80449')

Unnamed: 0,vertex,distance,predecessor
80449,80449,12.7052,47757


time: 111 ms


In [117]:
road_graph_data.query('node1 == 80449 and node2 == 47757')

Unnamed: 0,node1,node2,LENGTH
112109,80449,47757,12.7052


time: 34.3 ms


In [135]:
road_graph_data_joined = (
    road_graph_data
    .rename({'node1': 'NodeID'})
    .merge(road_nodes[['NodeID', 'Lon', 'Lat']], on=['NodeID'])
    .rename({'NodeID': 'node1', 'node2': 'NodeID'})
    .merge(road_nodes[['NodeID', 'Lon', 'Lat']], on=['NodeID'])
)

road_graph_data_joined['distance'] = cuspatial.haversine_distance(
    road_graph_data_joined['Lat_x']
    , road_graph_data_joined['Lon_x']
    , road_graph_data_joined['Lat_y']
    , road_graph_data_joined['Lon_y']
) * 0.621371 * 5280

road_graph_data_joined

Unnamed: 0,node1,NodeID,LENGTH,Lon_x,Lat_x,Lon_y,Lat_y,distance
0,10448,81832,37.1379,-122.173648,47.701303,-122.173207,47.701231,161.489195
1,81832,10448,37.1379,-122.173207,47.701231,-122.173648,47.701303,161.489195
2,10425,10492,24.5553,-122.173635,47.699438,-122.173335,47.699435,109.445343
3,10492,10425,24.5553,-122.173335,47.699435,-122.173635,47.699438,109.445343
4,10261,10335,35.2449,-122.187822,47.679216,-122.187393,47.679192,156.574102
5,10335,10261,35.2449,-122.187393,47.679192,-122.187822,47.679216,156.574102
6,81934,81935,89.3989,-122.178910,47.666713,-122.177860,47.666772,383.224741
7,81935,81934,89.3989,-122.177860,47.666772,-122.178910,47.666713,383.224741
8,10633,92309,139.4120,-122.185581,47.661069,-122.183881,47.661011,620.283885
9,92309,10633,139.4120,-122.183881,47.661011,-122.185581,47.661069,620.283885


time: 109 ms
