In [1]:
import cudf
import cuspatial
import nvstrings
from collections import OrderedDict
import numpy as np
import datetime as dt
import cugraph

%load_ext autotime

In [2]:
dtypes = OrderedDict([
    ('OccupancyDateTime', 'date'),
    ('PaidOccupancy', 'int64'),
    ('BlockfaceName', 'str'),
    ('SideOfStreet', 'str'),
    ('SourceElementKey', 'int64'),
    ('ParkingTimeLimitCategory', 'int64'),
    ('ParkingSpaceCount', 'int64'),
    ('PaidParkingArea', 'str'),
    ('PaidParkingSubArea', 'str'),
    ('PaidParkingRate', 'int8'),
    ('ParkingCategory', 'str'),
    ('Location', 'str'),
    ('dow', 'int8')
])

df = cudf.read_csv(
    '../data/parking_MayJun2019.csv'
    , skiprows=1
    , dtype=list(dtypes.values())
    , names=list(dtypes.keys())
)

df = df[['SourceElementKey', 'Location']]

time: 6.6 s


In [3]:
def extractLon(location):
    lon = location.str.extract('([0-9\.\-]+) ([0-9\.]+)')[0]
    return lon.str.stod()

def extractLat(location):
    lon = location.str.extract('([0-9\.\-]+) ([0-9\.]+)')[1]
    return lon.str.stod()
    
df['longitude'] = extractLon(df['Location'])
df['latitude'] = extractLat(df['Location'])

# df[['Location', 'longitude', 'latitude']].head().to_pandas()

time: 10.1 s


In [4]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="todrabas_test")
location = geolocator.geocode("400 Broad St, Seattle, WA 98109") # SPACE NEEDLE

df['LON_Ref'] = location.longitude
df['LAT_Ref'] = location.latitude

time: 957 ms


## Comparing cuspatial with vanilla Python Haversine

In [5]:
from math import sin, cos, sqrt, atan2, pi

def calculateDistance(latitude, longitude, LAT_Ref, LON_Ref, Distance_Python):
    R = 3958.8 # Earth's radius in miles
    
    for i, (lt, ln, lt_r, ln_r) in enumerate(zip(latitude, longitude, LAT_Ref, LON_Ref)):
        lt_rad = lt / 180.0 * pi
        ln_rad = ln / 180.0 * pi
        
        dlon = (ln_r - ln) / 180.0 * pi
        dlat = (lt_r - lt) / 180.0 * pi
        a = (sin(dlat/2.0))**2 + cos(lt_rad) * cos(lt_rad) * (sin(dlon/2.0))**2
        c = 2 * atan2(sqrt(a), sqrt(1-a))
        distance = R * c
        Distance_Python[i] = distance * 5280 # in feet
        
df = df.apply_rows(
    calculateDistance
    , incols=['latitude', 'longitude', 'LAT_Ref', 'LON_Ref']
    , outcols={'Distance_Python':np.float64}
    , kwargs={}
)

time: 555 ms


In [6]:
distances = cuspatial.haversine_distance(df['longitude'], df['latitude'], df['LAT_Ref'], df['LON_Ref'])

time: 33 ms


## As crow flies vs as people walk

In [5]:
locations = df[['SourceElementKey', 'longitude', 'latitude']].drop_duplicates()
locations.head()

Unnamed: 0,SourceElementKey,longitude,latitude
4080,1001,-122.334694,47.602873
1336,1002,-122.334513,47.602949
4455,1006,-122.335143,47.603674
1026,1009,-122.336658,47.605018
3208,1010,-122.336447,47.605101


time: 739 ms


In [6]:
del df

time: 18.1 ms


In [7]:
!head ../data/king_county_road_graph_20190909.csv

node1,node2,LENGTH
89108,27652,5.02825
27652,89108,5.02825
27652,122930,112.417
122930,27652,112.417
36778,36779,48.2475
36779,36778,48.2475
26559,26559,48.3425
26559,26559,48.3425
2634,78382,372.325
time: 519 ms


In [8]:
!head ../data/king_county_road_nodes_20190909.csv

NodeID,Lon,Lat
1,-121.431505,47.331052
2,-121.430642,47.334726
3,-121.404812,47.288757
4,-121.408953,47.289157
5,-121.432249,47.292963
6,-121.432702,47.294297
7,-121.432724,47.329542
8,-121.367462,47.290593
9,-121.360285,47.29045
time: 516 ms


In [9]:
road_graph_data = cudf.read_csv('../data/king_county_road_graph_20190909.csv')
road_graph_data['node1'] = road_graph_data['node1'].astype('int32')
road_graph_data['node2'] = road_graph_data['node2'].astype('int32')
road_graph_data['LENGTH'] = road_graph_data['LENGTH'] * 3.28084 # convert to feet

time: 22.5 ms


In [10]:
road_nodes = cudf.read_csv('../data/king_county_road_nodes_20190909.csv')
road_nodes['NodeID'] = road_nodes['NodeID'].astype('int32')

time: 27.3 ms


In [11]:
nodeId = road_nodes['NodeID'].max()
nodeId

127380

time: 8.43 ms


In [12]:
parking_locations = locations.to_pandas().to_dict('records')
parking_locations_nodes = cudf.DataFrame(columns=['NodeID', 'Lon', 'Lat', 'SourceElementKey'])

parking_locations_nodes.columns

Index(['NodeID', 'Lon', 'Lat', 'SourceElementKey'], dtype='object')

time: 14 ms


In [13]:
# loc = parking_locations[0]

for loc in parking_locations:
    nodeId = nodeId + 1

    road_nodes['Lon_REF'] = loc['longitude']
    road_nodes['Lat_REF'] = loc['latitude']
    road_nodes['Distance'] = cuspatial.haversine_distance(
          road_nodes['Lon']
        , road_nodes['Lat']
        , road_nodes['Lon_REF']
        , road_nodes['Lat_REF']) * 0.621371 * 5280 # distance in feet as cuspatial returns kilometers

    nearest = road_nodes.nsmallest(5, 'Distance') # connect to the nearest 5 intersections
    nearest['node2'] = nodeId

    nearest = nearest[['NodeID', 'node2', 'Distance']].rename({'NodeID': 'node1', 'Distance': 'LENGTH'}) # get nearest road intersection
    road_graph_data = cudf.concat([road_graph_data, nearest])

    rec = {'NodeID': nodeId, 'Lon': loc['longitude'], 'Lat': loc['latitude'], 'SourceElementKey': loc['SourceElementKey']}
    parking_locations_tmp = cudf.DataFrame(rec)
    parking_locations_nodes = cudf.concat([parking_locations_nodes, parking_locations_tmp])

time: 1min 5s


In [14]:
parking_locations_nodes.head()

Unnamed: 0,NodeID,Lon,Lat,SourceElementKey
0,127381,-122.334694,47.602873,1001
0,127382,-122.334513,47.602949,1002
0,127383,-122.335143,47.603674,1006
0,127384,-122.336658,47.605018,1009
0,127385,-122.336447,47.605101,1010


time: 21 ms


In [15]:
road_nodes = cudf.concat([road_nodes[['NodeID', 'Lon', 'Lat']], parking_locations_nodes]).reset_index(drop=True)

time: 62 ms


In [16]:
road_nodes['Lon_REF'] = location.longitude
road_nodes['Lat_REF'] = location.latitude

time: 4.82 ms


In [17]:
road_nodes['Distance'] = cuspatial.haversine_distance(
          road_nodes['Lon']
        , road_nodes['Lat']
        , road_nodes['Lon_REF']
        , road_nodes['Lat_REF']) * 0.621371 * 5280 # distance in feet as cuspatial returns miles

space_needle_to_nearest_intersection = road_nodes.nsmallest(5, 'Distance')
space_needle_to_nearest_intersection_dist = space_needle_to_nearest_intersection['Distance'].to_array()[0]

time: 25.9 ms


In [18]:
space_needle_to_nearest_intersection['node1'] = nodeId + 1
space_needle_to_nearest_intersection = space_needle_to_nearest_intersection.rename({'NodeID': 'node2', 'Distance': 'LENGTH'})[['node1', 'node2', 'LENGTH']]
road_graph_data = cudf.concat([space_needle_to_nearest_intersection, road_graph_data])
space_needle_to_nearest_intersection

Unnamed: 0,node1,node2,LENGTH
47756,128854,47757,175.906391
80448,128854,80449,200.062128
96739,128854,96740,261.056715
108797,128854,108798,277.221141
47827,128854,47828,301.71549


time: 24.4 ms


In [19]:
road_graph_data = road_graph_data.reset_index(drop=True)
road_graph_data['node1'] = road_graph_data['node1'].astype('int32')
road_graph_data['node2'] = road_graph_data['node2'].astype('int32')

sources      = cudf.Series(road_graph_data['node1'])
destinations = cudf.Series(road_graph_data['node2'])
distances    = cudf.Series(road_graph_data['LENGTH'])

g = cugraph.Graph()
g.add_edge_list(sources, destinations, distances)

time: 15.3 ms


In [20]:
all_distances = cugraph.sssp(g, nodeId + 1)
# all_distances['distancete'] = all_distances['distance']# + space_needle_to_nearest_intersection_dist
distances = all_distances.query('vertex > 127380 and vertex < @nodeId + 1 and distance < 1000')#.rename({'vertex': 'NodeID'}).merge(road_nodes, on=['NodeID'])

distances
# distances.sort_values(by='distance')#.nsmallest(10, 'distance')

Unnamed: 0,vertex,distance,predecessor
127433,127433,987.687911,96769
127585,127585,471.997029,47828
127586,127586,451.645186,47828
127666,127666,986.392815,47797
127667,127667,992.953966,47797
127822,127822,991.465776,47830
127823,127823,960.587564,47830
127912,127912,801.219246,47829
127913,127913,799.722305,47829
127979,127979,998.735532,47829


time: 175 ms


In [21]:
# unfold -- create the whole path
# vertex = distances.pop('vertex')

closest_node = nodeId + 1 #space_needle_to_nearest_intersection['NodeID'].to_array()[0]
parking_cnt = distances['vertex'].count()
parking_cnt

for i in range(parking_cnt):
    print('Processing record: {0}'.format(i))
    parking_node = distances.iloc[i]
    vertex = int(parking_node[0])
    predecessor = int(parking_node[2])
    
    if i == 0:
        paths = all_distances.query('vertex == @vertex')
    else:
        paths = cudf.concat([all_distances.query('vertex == @vertex'), paths])

    while vertex != closest_node:
        temp = all_distances.query('vertex == @predecessor')
        paths = cudf.concat([temp, paths])
        predecessor = temp['predecessor'].to_array()[0]
        vertex = temp['vertex'].to_array()[0]

Processing record: 0
Processing record: 1
Processing record: 2
Processing record: 3
Processing record: 4
Processing record: 5
Processing record: 6
Processing record: 7
Processing record: 8
Processing record: 9
Processing record: 10
Processing record: 11
Processing record: 12
Processing record: 13
Processing record: 14
Processing record: 15
Processing record: 16
time: 1.27 s


In [22]:
paths['vertex'] = paths['vertex'].astype('int64')
paths['predecessor'] = paths['predecessor'].astype('int64')
paths = paths.drop_duplicates()

paths_host = (
    paths
    .rename({'vertex': 'NodeID'})
    .merge(road_nodes[['NodeID', 'Lat', 'Lon']], on='NodeID', how='left')
    .rename({'NodeID': 'vertex', 'predecessor': 'NodeID'})
    .merge(road_nodes[['NodeID', 'Lat', 'Lon']], on='NodeID', how='left')
    .fillna({'Lat_y': location.latitude, 'Lon_y': location.longitude})
    [['vertex', 'Lat_x', 'Lon_x', 'Lat_y', 'Lon_y']]
    .query('vertex < @nodeId + 1')
    .to_pandas()
)

paths_host

Unnamed: 0,vertex,Lat_x,Lon_x,Lat_y,Lon_y
0,47756,47.620906,-122.347603,47.620906,-122.348736
1,47797,47.620905,-122.347358,47.620906,-122.347603
2,47829,47.619735,-122.347581,47.619742,-122.34886
3,47830,47.618574,-122.348924,47.619742,-122.34886
4,96769,47.620082,-122.351528,47.620082,-122.350232
5,127433,47.620365,-122.352939,47.620082,-122.351528
6,127585,47.619804,-122.348174,47.619742,-122.34886
7,127586,47.619679,-122.348258,47.619742,-122.34886
8,127666,47.620966,-122.34567,47.620905,-122.347358
9,127667,47.620826,-122.345645,47.620905,-122.347358


time: 137 ms


In [23]:
distances['vertex'] = distances['vertex'].astype('int64')
distances_host = (
    distances
    .rename({'vertex': 'NodeID'})
    .merge(road_nodes[['NodeID', 'Lat', 'Lon', 'SourceElementKey']], on='NodeID')
    [['SourceElementKey', 'Lat', 'Lon', 'distance']]
    .to_pandas()
#     .to_dict('records')
)

distances_host

Unnamed: 0,SourceElementKey,Lat,Lon,distance
0,11133,47.619804,-122.348174,471.997029
1,11134,47.619679,-122.348258,451.645186
2,54985,47.61806,-122.349479,999.813503
3,54986,47.617998,-122.349296,997.333559
4,2793,47.620365,-122.352939,987.687911
5,51494,47.619145,-122.348791,520.101226
6,75173,47.62015,-122.347713,620.706202
7,33741,47.619808,-122.346958,801.219246
8,33742,47.619666,-122.346963,799.722305
9,51493,47.619064,-122.34904,552.982577


time: 23.2 ms


In [27]:
info_box_template = """
<dl>
<dt>SourceElementKey</dt><dd>{SourceElementKey}</dd>
<dt>Distance</dt><dd>{distance:.0f} ft.</dd>
</dl>
"""

parking_info = [info_box_template.format(**parking) for parking in distances_host.to_dict('records')]

time: 1.14 ms


In [26]:
import gmaps
# import gmaps.datasets
gmaps.configure(api_key="AIzaSyAwpMogOOjXEFjmBNPqNQKuWQgFjv8bdUY") # Your Google API key, go to https://console.developers.google.com

parking_layer = gmaps.symbol_layer(
    distances_host[['Lat', 'Lon']], fill_color="green", stroke_color="green", scale=3, info_box_content=parking_info
)

destinations_layer = gmaps.symbol_layer(
    [[location.latitude, location.longitude]]
    , info_box_content=['DESTINATION']
    , scale=5
    , fill_color="red"
    , stroke_color="red"
)

lines_layer = gmaps.drawing_layer(features=[
    gmaps.Line(
          start = (path['Lat_x'], path['Lon_x'])
        , end   = (path['Lat_y'], path['Lon_y'])
        , stroke_weight=2
        , stroke_color="red"
    )
    for path in paths_host.to_dict('records')]
)

fig = gmaps.figure(layout={'height': '500px'})
fig.add_layer(parking_layer)
fig.add_layer(destinations_layer)
fig.add_layer(lines_layer)
fig

Figure(layout=FigureLayout(height='500px'))

time: 104 ms
