In [19]:
from pathlib import Path
import tempfile
import shutil

import pandas as pd
import numpy as np

DATA_DIR = Path('../data')


# Problem 3

In [17]:
GTFS_TABLES = [
    'agency',
    'stops',
    'routes',
    'trips',
    'stop_times',
    'calendar',
    'calendar_dates',
    'fare_attributes',
    'fare_rules',
    'shapes',
    'frequencies',
    'transfers',
    'feed_info',
    ]

STR_FIELDS = [
  'agency_id'
  'trip_id',
  'service_id',
  'shape_id',
  'block_id',
  'route_id',
  'stop_id',
  'fare_id',
  'origin_id',
  'destination_id',
  'contains_id',
  'from_stop_id',
  'to_stop_id',
]

def read_gtfs(path):
    """
    Given a path (string or pathlib object) to a (zipped) GTFS feed,
    unzip the feed and save the files to a dictionary whose keys are
    named after GTFS tables ('stops', 'routes', etc.) and whose
    corresponding values are Pandas data frames representing the tables.
    Return the resulting dictionary.

    NOTES:
        - Ignore files that are not valid GTFS; see https://developers.google.com/transit/gtfs/reference/.
        - Ensure that all ID fields that could be string ('stop_id', 'route_id', etc.) are parsed as strings and not as numbers.    
    """
    path = Path(path)
    
    # Unzip feed into temporary directory
    tmp_dir = tempfile.TemporaryDirectory()
    shutil.unpack_archive(str(path), tmp_dir.name, 'zip')

    # Read valid GTFS files into Pandas data frames
    feed = {}
    dtype = {field: str for field in STR_FIELDS} # ensure some string types
    for p in Path(tmp_dir.name).iterdir():
        name = p.stem
        if name in GTFS_TABLES:
            feed[name] = pd.read_csv(p, dtype=dtype)
        
    # Delete temporary directory
    tmp_dir.cleanup()
    
    return feed

In [18]:
path = DATA_DIR/'auckland_gtfs_20161017.zip'
feed = read_gtfs(path)

for name, table in feed.items():
    print('-'*40)
    print(name)
    print(table.dtypes)
    print(table.head())


----------------------------------------
routes
route_short_name     object
route_long_name      object
route_type            int64
route_text_color    float64
agency_id            object
route_id             object
route_color         float64
dtype: object
  route_short_name                         route_long_name  route_type  \
0              005  Britomart To Pt Chevalier Via Westmere           3   
1              007              St Heliers To Pt Chevalier           3   
2              008                     New Lynn To Otahuhu           3   
3              009                    Onehunga To New Lynn           3   
4              010  Wynyard Quarter To Onehunga Via Unitec           3   

   route_text_color agency_id   route_id  route_color  
0               NaN     NZBML  route_001          NaN  
1               NaN     NZBML  route_002          NaN  
2               NaN        UE  route_003          NaN  
3               NaN        UE  route_004          NaN  
4               N

# Problem 4

In [53]:
def compute_trip_stats(feed):
    """
    Return a data frame of trip stats.
    """
    st = feed['stop_times']
    
    # Ensure stop times are properly sorted
    st = st.sort_values(['trip_id', 'stop_sequence'])
    
    # Aggregate stop times into trip stats
    def my_agg(group):
        d = {}
        d['start_time'] = group['departure_time'].iat[0]
        d['end_time'] = group['departure_time'].iat[-1]
        d['distance'] = group['shape_dist_traveled'].iat[-1]
        return pd.Series(d)
        
    f = st.groupby('trip_id').apply(my_agg).reset_index()
    
    # Append some extra route information
    f = f.merge(
      feed['trips'][['trip_id', 'route_id']]).merge(
      feed['routes'])
    
    return f

In [54]:
ts = compute_trip_stats(feed)
print(ts.head())

# Restrict to buses
f = ts[ts['route_type'] == 3].copy()

# Find shortest and longest bus trips
i, j = f['distance'].argmin(), f['distance'].argmax()

print('-'*40)
print(f.ix[i])

print('-'*40)
print(f.ix[j])


                             trip_id start_time  end_time   distance  \
0   1000046829-20161011151756_v46.25   07:00:00  08:12:00  28.398145   
1   1000046830-20161011151756_v46.25   07:25:00  08:45:00  28.398145   
2   1000046831-20161011151756_v46.25   07:10:00  08:20:00  28.398145   
3  18220046548-20161011151756_v46.25   06:30:00  07:39:00  29.063917   
4  18220046549-20161011151756_v46.25   07:45:00  09:05:00  29.063917   

    route_id route_short_name  \
0  route_157             457X   
1  route_157             457X   
2  route_157             457X   
3  route_157             457X   
4  route_157             457X   

                                     route_long_name  route_type  \
0  Manukau City Centre To Britomart Express Via O...           3   
1  Manukau City Centre To Britomart Express Via O...           3   
2  Manukau City Centre To Britomart Express Via O...           3   
3  Manukau City Centre To Britomart Express Via O...           3   
4  Manukau City Centre To Br