In [1]:
import cudf
import cuspatial
import nvstrings
from collections import OrderedDict
import numpy as np
import datetime as dt
import cugraph

%load_ext autotime

In [2]:
dtypes = OrderedDict([
    ('OccupancyDateTime', 'date'),
    ('PaidOccupancy', 'int64'),
    ('BlockfaceName', 'str'),
    ('SideOfStreet', 'str'),
    ('SourceElementKey', 'int64'),
    ('ParkingTimeLimitCategory', 'int64'),
    ('ParkingSpaceCount', 'int64'),
    ('PaidParkingArea', 'str'),
    ('PaidParkingSubArea', 'str'),
    ('PaidParkingRate', 'int8'),
    ('ParkingCategory', 'str'),
    ('Location', 'str'),
    ('dow', 'int8')
])

df = cudf.read_csv(
    '../data/parking_MayJun2019.csv'
    , skiprows=1
    , dtype=list(dtypes.values())
    , names=list(dtypes.keys())
)

df = df[['SourceElementKey', 'Location']]

time: 6.59 s


In [3]:
def extractLon(location):
    lon = location.str.extract('([0-9\.\-]+) ([0-9\.]+)')[0]
    return lon.str.stod()

def extractLat(location):
    lon = location.str.extract('([0-9\.\-]+) ([0-9\.]+)')[1]
    return lon.str.stod()
    
df['longitude'] = extractLon(df['Location'])
df['latitude'] = extractLat(df['Location'])

# df[['Location', 'longitude', 'latitude']].head().to_pandas()

time: 9.97 s


In [58]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="todrabas_test")
location = geolocator.geocode("400 Broad St, Seattle, WA 98109") # SPACE NEEDLE

# df['LON_Ref'] = location.longitude
# df['LAT_Ref'] = location.latitude

time: 841 ms


## Comparing cuspatial with vanilla Python Haversine

In [52]:
from math import sin, cos, sqrt, atan2, pi

def calculateDistance(latitude, longitude, LAT_Ref, LON_Ref, Distance_Python):
    R = 3958.8 # Earth's radius in miles
    
    for i, (lt, ln, lt_r, ln_r) in enumerate(zip(latitude, longitude, LAT_Ref, LON_Ref)):
        lt_rad = lt / 180.0 * pi
        ln_rad = ln / 180.0 * pi
        
        dlon = (ln_r - ln) / 180.0 * pi
        dlat = (lt_r - lt) / 180.0 * pi
        a = (sin(dlat/2.0))**2 + cos(lt_rad) * cos(lt_rad) * (sin(dlon/2.0))**2
        c = 2 * atan2(sqrt(a), sqrt(1-a))
        distance = R * c
        Distance_Python[i] = distance * 5280 # in feet
        
df = df.apply_rows(
    calculateDistance
    , incols=['latitude', 'longitude', 'LAT_Ref', 'LON_Ref']
    , outcols={'Distance_Python':np.float64}
    , kwargs={}
)

time: 619 ms


In [13]:
distances = cuspatial.haversine_distance(df['longitude'], df['latitude'], df['LAT_Ref'], df['LON_Ref'])

time: 45.2 ms


## As crow flies vs as people walk

In [5]:
locations = df[['SourceElementKey', 'longitude', 'latitude']].drop_duplicates()
locations.head()

Unnamed: 0,SourceElementKey,longitude,latitude
4080,1001,-122.334694,47.602873
1336,1002,-122.334513,47.602949
4455,1006,-122.335143,47.603674
1026,1009,-122.336658,47.605018
3208,1010,-122.336447,47.605101


time: 735 ms


In [6]:
del df

time: 19.7 ms


In [7]:
!head ../data/king_county_road_graph_20190909.csv

node1,node2,LENGTH
89108,27652,5.02825
27652,89108,5.02825
27652,122930,112.417
122930,27652,112.417
36778,36779,48.2475
36779,36778,48.2475
26559,26559,48.3425
26559,26559,48.3425
2634,78382,372.325
time: 523 ms


In [8]:
!head ../data/king_county_road_nodes_20190909.csv

NodeID,Lon,Lat
1,-121.431505,47.331052
2,-121.430642,47.334726
3,-121.404812,47.288757
4,-121.408953,47.289157
5,-121.432249,47.292963
6,-121.432702,47.294297
7,-121.432724,47.329542
8,-121.367462,47.290593
9,-121.360285,47.29045
time: 515 ms


In [45]:
road_graph_data = cudf.read_csv('../data/king_county_road_graph_20190909.csv')
road_graph_data['node1'] = road_graph_data['node1'].astype('int32')
road_graph_data['node2'] = road_graph_data['node2'].astype('int32')

time: 29.2 ms


In [46]:
road_nodes = cudf.read_csv('../data/king_county_road_nodes_20190909.csv')
road_nodes['NodeID'] = road_nodes['NodeID'].astype('int32')

time: 27.6 ms


In [47]:
nodeId = road_nodes['NodeID'].max()
nodeId

127380

time: 2.68 ms


In [48]:
parking_locations = locations.to_pandas().to_dict('records')
parking_locations_nodes = cudf.DataFrame(columns=road_nodes.columns)

parking_locations_nodes.columns

Index(['NodeID', 'Lon', 'Lat'], dtype='object')

time: 15.1 ms


In [51]:
# loc = parking_locations[0]

for loc in parking_locations:
    nodeId = nodeId + 1

    road_nodes['Lon_REF'] = loc['longitude']
    road_nodes['Lat_REF'] = loc['latitude']
    road_nodes['Distance'] = cuspatial.haversine_distance(
          road_nodes['Lon']
        , road_nodes['Lat']
        , road_nodes['Lon_REF']
        , road_nodes['Lat_REF']) * 0.621371 * 5280 # distance in feet as cuspatial returns miles

    nearest = road_nodes.nsmallest(1, 'Distance')
    nearest['node1'] = nodeId

    nearest = nearest[['NodeID', 'node1', 'Distance']].rename({'NodeID': 'node2', 'Distance': 'LENGTH'}) # get nearest road intersection
    road_graph_data = cudf.concat([road_graph_data, nearest])
    # nearest

    rec = {'NodeID': nodeId, 'Lon': loc['longitude'], 'Lat': loc['latitude']}
    parking_locations_tmp = cudf.DataFrame(rec)
    parking_locations_nodes = cudf.concat([parking_locations_nodes, parking_locations_tmp])

time: 1min 1s


In [52]:
road_graph_data = road_graph_data.reset_index(drop=True)
road_graph_data['node1'] = road_graph_data['node1'].astype('int32')
road_graph_data['node2'] = road_graph_data['node2'].astype('int32')

road_graph_data

sources = cudf.Series(road_graph_data['node1'])
destinations = cudf.Series(road_graph_data['node2'])
distances = cudf.Series(road_graph_data['LENGTH'])

g = cugraph.Graph()
g.add_edge_list(sources, destinations, distances)

time: 3.18 ms


In [57]:
cugraph.sssp(g, 128833).nsmallest(10, "distance")

Unnamed: 0,vertex,distance,predecessor
128833,128833,0.0,-1
47611,47611,328.661119,128833
47510,47510,416.219319,47611
47512,47512,451.571219,47510
47509,47509,459.074919,47510
47508,47508,526.358019,47509
47507,47507,587.577319,47510
47506,47506,612.799919,47509
80499,80499,612.880719,47508
47505,47505,652.109019,47508


time: 90.5 ms


In [60]:
location

Location(Space Needle, 400, Broad Street, South Lake Union, Belltown, Seattle, King County, Washington, 98109, USA, (47.6205131, -122.349303598832, 0.0))

time: 3.01 ms


(0       0
 1       0
 2      16
 3      25
 4      35
 5      41
 6      44
 7      48
 8      52
 9      56
 10     61
 11     63
 12     66
 13     67
 14     69
 15     74
 16     76
 17     78
 18     80
 19     82
 20     84
 21     87
 22     89
 23     91
 24     93
 25     98
 26    101
 27    104
 28    106
 29    110
 30    113
 31    117
 32    121
 33    127
 34    139
 35    156
 dtype: int32, 0       2
 1       3
 2       4
 3       5
 4       6
 5       7
 6       8
 7       9
 8      11
 9      12
 10     13
 11     14
 12     18
 13     20
 14     22
 15     32
 16      1
 17      3
 18      4
 19      8
 20     14
 21     18
 22     20
 23     22
 24     31
 25      1
 26      2
 27      4
 28      8
 29      9
        ..
 126    34
 127     3
 128     9
 129    15
 130    16
 131    19
 132    21
 133    23
 134    24
 135    30
 136    31
 137    32
 138    34
 139     9
 140    10
 141    14
 142    15
 143    16
 144    19
 145    20
 146    21
 147    23
 148   

time: 25.2 ms
