In [1]:
from pathlib import Path
import json

import gtfstk as gt
import pandas as pd
import numpy as np


DATA_DIR = Path('../data')
OUT_DIR = Path('../output')

In [2]:
path = DATA_DIR/'wellington_gtfs_20171016.zip'
feed = gt.read_gtfs(path, dist_units='km')
feed.assess_quality()

Unnamed: 0,indicator,value
0,num_route_short_names_duplicated,0
1,frac_route_short_names_duplicated,0
2,num_stop_time_dists_missing,340007
3,frac_stop_time_dists_missing,1
4,num_direction_ids_missing,0
5,frac_direction_ids_missing,0
6,num_trips_missing_shapes,0
7,frac_trips_missing_shapes,0
8,num_departure_times_missing,0
9,frac_departure_times_missing,0


In [8]:
shapes_g = feed.shapes_to_geojson()
path = Path('../wellington_shapes_20171016.geojson')
with path.open('w') as tgt:
    json.dump(shapes_g, tgt)

In [3]:
"""
If no shapes, use stops only.
If shapes, then add distances to stop times and to shapes
"""
feed = feed.append_dist_to_shapes()
trip_stats = feed.compute_trip_stats(compute_dist_from_shapes=True)
feed = feed.append_dist_to_stop_times(trip_stats)


In [36]:

def get_trip_points(feed, trip_ids, max_sample_points=100):
    """
    """
    # Filter trips to given trip IDs
    t = feed.trips
    t = t[t['trip_id'].isin(trip_ids)].copy()
    
    # Many trips can have the same shape, so just choose one trip for each shape
    t = t.groupby('shape_id').agg('first').reset_index()

    # Get shape geometries
    geom_by_shape = feed.build_geometry_by_shape(shape_ids=t.shape_id)

    # Get stops times for the whittled down list of trips
    st = feed.stop_times
    st = st[st['trip_id'].isin(t.trip_id)].copy()
    
    # Join in stop locations
    st = st.merge(t).merge(feed.stops)
    
    n = max_sample_points
    # Build dictionary shape_id -> list of (lon, lat) sample points
    points_by_shape = {}
    for trip_id, group in st.groupby('trip_id'):
        # Scale stop distances to interval [0, 1]
        d = group['shape_dist_traveled'].iat[-1]
        group['shape_dist_traveled'] /= d
        stop_points = group[['stop_lon', 'stop_lat', 'shape_dist_traveled']].values

        k = group.shape[0]
        # Add n - k more points from trip shape in an even fashion
        assert k <= n
        
        delta = 1/(n - 1)
        ticks = [i*delta for i in range(n)]
        stop_bins = np.digitize(group['shape_dist_traveled'].values, ticks) - 1
        added_ticks = [i*delta/2 for i in range(n-1) if i not in stop_bins]
        shape = group.shape_id.iat[0]
        geom = geom_by_shape[shape]
        shape_points = [
          list(geom.interpolate(tick, normalized=True).coords[0]) + [tick]
          for tick in added_ticks]
        points = sorted(stop_points.tolist() + shape_points, key=lambda x: x[2])
        points = [x[:2] for x in points]
        points_by_shape[shape] = points

    
    return points_by_shape

In [37]:
tids = feed.trips['trip_id'].sample(10)
get_trip_points(feed, tids)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


{'1:22#887#916': [[174.7702496, -41.34267473],
  [174.770493264855, -41.34248078165424],
  [174.77066397839252, -41.342097098288725],
  [174.77098415143365, -41.341320730071885],
  [174.77114107138925, -41.34093120194145],
  [174.7712704475722, -41.34061007211699],
  [174.77122968, -41.34061258],
  [174.7714306776336, -41.340221898389665],
  [174.77175308411202, -41.33944634812687],
  [174.77191263545086, -41.33905789708279],
  [174.7722262690685, -41.33827875775621],
  [174.77253624251122, -41.3374981727578],
  [174.77269234994472, -41.33710833698369],
  [174.772829202245, -41.336777395358624],
  [174.77276674, -41.33684747],
  [174.7729867540935, -41.33638818661835],
  [174.77327795310563, -41.335601024487616],
  [174.77337794552477, -41.33476741012256],
  [174.77341232202878, -41.33443506306372],
  [174.77337230000003, -41.33443014],
  [174.77346571062492, -41.33402384272954],
  [174.77349744559913, -41.3336050958749],
  [174.77359136448868, -41.332770550388325],
  [174.773602816676