In [1]:
from pathlib import Path
import sys
import os

import gtfstk as gt
import pandas as pd
import numpy as np
import shapely.geometry as sg
import folium

sys.path.append('../')

import gtfs_map_matcher as mm

%load_ext autoreload
%autoreload 2



# Load sample GTFS feed

In [2]:
path = mm.DATA_DIR/'auckland_gtfs_sample.zip'
feed = gt.read_gtfs(path, dist_units='km')
feed.describe()

Unnamed: 0,indicator,value
0,agencies,"[Go West, AT Metro, Fullers Ferries, Pavlovich..."
1,timezone,Pacific/Auckland
2,start_date,20171012
3,end_date,20171220
4,num_routes,15
5,num_trips,4675
6,num_stops,1022
7,num_shapes,195
8,sample_date,20171019
9,num_routes_active_on_sample_date,15


# Create sample points

In [5]:
t = feed.trips.merge(feed.routes)
cond = t['route_type'] == 3
t = t[cond].copy()

%time spoints_1 = mm.sample_trip_points(feed, t.trip_id, 5)
%time spoints_2 = mm.sample_trip_points(feed, t.trip_id, 100)
%time spoints_3 = mm.sample_trip_points(feed, t.trip_id, point_dist=0.1)

print('num stop patterns = ', len(spoints_2))
pattern = list(spoints_2.keys())[0]


CPU times: user 548 ms, sys: 0 ns, total: 548 ms
Wall time: 547 ms
CPU times: user 740 ms, sys: 8 ms, total: 748 ms
Wall time: 745 ms
CPU times: user 932 ms, sys: 4 ms, total: 936 ms
Wall time: 930 ms
num stop patterns =  50


# Compare sample points with one actual feed shape

In [6]:
l1 = sg.LineString(spoints_1[pattern])
l2 = sg.LineString(spoints_2[pattern])
l3 = sg.LineString(spoints_3[pattern])

# Get actual trip shape
p = mm.get_stop_patterns(feed)
shid = p.loc[p['stop_pattern'] == pattern, 'shape_id'].unique()[0]
la = feed.build_geometry_by_shape(shape_ids=[shid], use_utm=False)[shid]

mappy = folium.Map(location=spoints_1[pattern][0][::-1], zoom_start=14)
folium.GeoJson(sg.mapping(l1), 
  style_function=lambda x: {'color': 'red'}).add_to(mappy)
folium.GeoJson(sg.mapping(l2), 
  style_function=lambda x: {'color': 'orange'}).add_to(mappy)
folium.GeoJson(sg.mapping(l3), 
  style_function=lambda x: {'color': 'yellow'}).add_to(mappy)
folium.GeoJson(sg.mapping(la), 
  style_function=lambda x: {'color': 'green'}).add_to(mappy)
mappy

# Match one set of sample points using various matching services and compare to GTFS shape

In [7]:
# Map match with Mapzen

mpoints = mm.match_with_mapzen({pattern: spoints_2[pattern]},
  os.environ['MAPZEN_KEY'])[pattern]
lm = sg.LineString(mpoints)

mappy = folium.Map(location=spoints_1[pattern][0][::-1], zoom_start=14)
folium.GeoJson(sg.mapping(la), 
  style_function=lambda x: {'color': 'green'}).add_to(mappy)
folium.GeoJson(sg.mapping(lm), 
  style_function=lambda x: {'color': 'red'}).add_to(mappy)
mappy


In [8]:
# Map match with OSRM

mpoints = mm.match_with_osrm({pattern: spoints_2[pattern]})[pattern]
lm = sg.LineString(mpoints)

mappy = folium.Map(location=spoints_1[pattern][0][::-1], zoom_start=14)
folium.GeoJson(sg.mapping(la), 
  style_function=lambda x: {'color': 'green'}).add_to(mappy)
folium.GeoJson(sg.mapping(lm), 
  style_function=lambda x: {'color': 'red'}).add_to(mappy)
mappy


In [9]:
# Map match with Mapbox, which uses OSRM

mpoints = mm.match_with_mapbox({pattern: spoints_2[pattern]},
  os.environ['MAPBOX_KEY'])[pattern]
lm = sg.LineString(mpoints)

mappy = folium.Map(location=spoints_1[pattern][0][::-1], zoom_start=14)
folium.GeoJson(sg.mapping(la), 
  style_function=lambda x: {'color': 'green'}).add_to(mappy)
folium.GeoJson(sg.mapping(lm), 
  style_function=lambda x: {'color': 'red'}).add_to(mappy)
mappy


In [11]:
# Map match with Google

mpoints = mm.match_with_google({pattern: spoints_2[pattern]},
  os.environ['GOOGLE_KEY'])[pattern]
lm = sg.LineString(mpoints)

mappy = folium.Map(location=spoints_1[pattern][0][::-1], zoom_start=14)
folium.GeoJson(sg.mapping(la), 
  style_function=lambda x: {'color': 'green'}).add_to(mappy)
folium.GeoJson(sg.mapping(lm), 
  style_function=lambda x: {'color': 'red'}).add_to(mappy)
mappy


# Match feed, but only one trip thereof

In [15]:
t = feed.trips.merge(feed.routes)
trip_id, shape_id = t.loc[t['route_type'] == 3, ['trip_id', 'shape_id']].iloc[0].values
print('Matching trip {!s} whose shape ID is {!s}'.format(trip_id, shape_id))
trip_ids = [trip_id]

%time feed2 = mm.match_feed(feed, 'mapzen', os.environ['MAPZEN_KEY'], trip_ids=trip_ids)

Matching trip 14010099857-20171013114012_v59.18 whose shape ID is 2-20171013114012_v59.18
CPU times: user 632 ms, sys: 12 ms, total: 644 ms
Wall time: 1.87 s


In [16]:
# Compare table of original shape with table of matched shape

print(feed.shapes[feed.shapes.shape_id == shape_id])
print(feed2.shapes[feed2.shapes.shape_id == shape_id])

                      shape_id  shape_pt_sequence  shape_pt_lon  shape_pt_lat
54520  2-20171013114012_v59.18                  0     174.75564     -36.84050
54521  2-20171013114012_v59.18                  1     174.75563     -36.84051
54522  2-20171013114012_v59.18                  2     174.75601     -36.84061
54523  2-20171013114012_v59.18                  3     174.75622     -36.84066
54524  2-20171013114012_v59.18                  4     174.75816     -36.84117
54525  2-20171013114012_v59.18                  5     174.75825     -36.84123
54526  2-20171013114012_v59.18                  6     174.75826     -36.84135
54527  2-20171013114012_v59.18                  7     174.75787     -36.84232
54528  2-20171013114012_v59.18                  8     174.75593     -36.84181
54529  2-20171013114012_v59.18                  9     174.75557     -36.84171
54530  2-20171013114012_v59.18                 10     174.75449     -36.84142
54531  2-20171013114012_v59.18                 11     174.75392 

In [18]:
feed2.describe()

Unnamed: 0,indicator,value
0,agencies,"[Go West, AT Metro, Fullers Ferries, Pavlovich..."
1,timezone,Pacific/Auckland
2,start_date,20171012
3,end_date,20171220
4,num_routes,15
5,num_trips,4675
6,num_stops,1022
7,num_shapes,195
8,sample_date,20171019
9,num_routes_active_on_sample_date,15


In [17]:
# How many match calls needed for entire feed?

mm.get_num_match_calls(feed)

50