In [4]:
from pathlib import Path
import tempfile
import shutil

import pandas as pd
import numpy as np

DATA_DIR = Path("../data")
%ls {DATA_DIR}

auckland_gtfs_20190524.zip                      nz_crashes_2011--2016.csv
auckland_new-zealand_roads_gen1.geojson         nz_crashes_metadata.csv
auckland_transit_delays_20170217--20170226.csv  words.txt
bicycle_counts_2017.csv


# Problem 3

In [5]:
GTFS_TABLES = [
    "agency",
    "stops",
    "routes",
    "trips",
    "stop_times",
    "calendar",
    "calendar_dates",
    "fare_attributes",
    "fare_rules",
    "shapes",
    "frequencies",
    "transfers",
    "feed_info",
    ]

STR_FIELDS = [
  "agency_id"
  "trip_id",
  "service_id",
  "shape_id",
  "block_id",
  "route_id",
  "stop_id",
  "fare_id",
  "origin_id",
  "destination_id",
  "contains_id",
  "from_stop_id",
  "to_stop_id",
]

def read_gtfs(path):
    """
    Given a path (string or pathlib object) to a (zipped) GTFS feed,
    unzip the feed and save the files to a dictionary whose keys are
    named after GTFS tables ("stops", "routes", etc.) and whose
    corresponding values are Pandas data frames representing the tables.
    Return the resulting dictionary.

    NOTES:
        - Ignore files that are not valid GTFS; see https://developers.google.com/transit/gtfs/reference/.
        - Ensure that all ID fields that could be string ("stop_id", "route_id", etc.) are parsed as strings and not as numbers.    
    """
    path = Path(path)
    
    # Unzip feed into temporary directory
    tmp_dir = tempfile.TemporaryDirectory()
    shutil.unpack_archive(str(path), tmp_dir.name, "zip")

    # Read valid GTFS files into Pandas data frames
    feed = {}
    dtype = {field: str for field in STR_FIELDS} # ensure some string types
    for p in Path(tmp_dir.name).iterdir():
        name = p.stem
        if name in GTFS_TABLES:
            feed[name] = pd.read_csv(p, dtype=dtype)
        
    # Delete temporary directory
    tmp_dir.cleanup()
    
    return feed

In [8]:
path = DATA_DIR/"auckland_gtfs_20190524.zip"
feed = read_gtfs(path)

for name, table in feed.items():
    print("-"*40)
    print(name)
    print(table.dtypes)
    display(table.head())


----------------------------------------
calendar
service_id    object
start_date     int64
end_date       int64
monday         int64
tuesday        int64
wednesday      int64
thursday       int64
friday         int64
saturday       int64
sunday         int64
dtype: object


Unnamed: 0,service_id,start_date,end_date,monday,tuesday,wednesday,thursday,friday,saturday,sunday
0,440164642-20190412121818_v78.16,20190506,20190525,0,0,0,0,0,1,1
1,458137419-20190412121818_v78.16,20190506,20190525,1,1,1,1,1,0,0
2,7004186127-20190522094814_v80.15,20190526,20190831,1,1,1,1,1,0,0
3,51358161725-20190412121818_v78.16,20190506,20190525,0,0,0,0,0,1,1
4,14323129576-20190522094814_v80.15,20190526,20190831,1,1,1,1,1,0,0


----------------------------------------
trips
block_id         object
route_id         object
direction_id      int64
trip_headsign    object
shape_id         object
service_id       object
trip_id          object
dtype: object


Unnamed: 0,block_id,route_id,direction_id,trip_headsign,shape_id,service_id,trip_id
0,,route_124,1,Point Chevalier,884-20190412121818_v78.16,440164642-20190412121818_v78.16,440164642-20190412121818_v78.16
1,,route_148,1,Crossfield Rd,873-20190412121818_v78.16,458137419-20190412121818_v78.16,458137419-20190412121818_v78.16
2,,route_207,0,Auckland,1072-20190522094814_v80.15,7004186127-20190522094814_v80.15,7004186127-20190522094814_v80.15
3,,route_219,1,Albany Station,998-20190412121818_v78.16,51358161725-20190412121818_v78.16,51358161725-20190412121818_v78.16
4,,route_076,0,Panmure Station,544-20190522094814_v80.15,14323129576-20190522094814_v80.15,14323129576-20190522094814_v80.15


----------------------------------------
stops
stop_lat          float64
zone_id            object
stop_lon          float64
stop_id            object
parent_station     object
stop_desc         float64
stop_name          object
location_type       int64
stop_code           int64
dtype: object


Unnamed: 0,stop_lat,zone_id,stop_lon,stop_id,parent_station,stop_desc,stop_name,location_type,stop_code
0,-36.89323,merged_17,174.8336,7485-20190412121818_v78.16,31464-20190412121818_v78.16,,201 Marua Rd,0,7485
1,-36.95897,merged_18,174.82667,51555-20190522094814_v80.15,,,Vine St/Cleek Rd,1,51555
2,-36.89932,merged_17,174.88728,6088-20190412121818_v78.16,31570-20190412121818_v78.16,,98 Glenmore Rd,0,6088
3,-36.72485,merged_17,174.71598,4292-20190412121818_v78.16,21936-20190412121818_v78.16,,McClymonts Rd near Lilac Ln,0,4292
4,-36.67829,merged_17,174.45257,4574-20190412121818_v78.16,21791-20190412121818_v78.16,,20 Rata St,0,4574


----------------------------------------
stop_times
trip_id                 object
arrival_time            object
departure_time          object
stop_id                 object
stop_sequence            int64
stop_headsign          float64
pickup_type            float64
drop_off_type          float64
shape_dist_traveled    float64
dtype: object


Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
0,1028107823-20190412121818_v78.16,10:05:00,10:05:00,1555-20190412121818_v78.16,1,,,,0.0
1,1028107823-20190412121818_v78.16,10:05:38,10:05:38,1590-20190412121818_v78.16,2,,,,0.269261
2,1028107823-20190412121818_v78.16,10:06:26,10:06:26,1588-20190412121818_v78.16,3,,,,0.610504
3,1028107823-20190412121818_v78.16,10:07:11,10:07:11,1586-20190412121818_v78.16,4,,,,0.933893
4,1028107823-20190412121818_v78.16,10:08:04,10:08:04,1526-20190412121818_v78.16,5,,,,1.312085


----------------------------------------
routes
route_short_name     object
route_long_name      object
route_type            int64
route_text_color    float64
agency_id            object
route_id             object
route_color         float64
dtype: object


Unnamed: 0,route_short_name,route_long_name,route_type,route_text_color,agency_id,route_id,route_color
0,1,Matiatia Wharf To Onetangi,3,,WBC,route_001,
1,101,Auckland University To Pt Chevalier Via Jervoi...,3,,NZB,route_002,
2,105,Westmere To Britomart Via Richmond Rd,3,,NZB,route_003,
3,106,Freemans Bay Loop,3,,NZB,route_004,
4,110,City Centre To Westgate Via Northwestern Motorway,3,,NZB,route_005,


----------------------------------------
shapes
shape_id              object
shape_pt_sequence      int64
shape_pt_lon         float64
shape_pt_lat         float64
dtype: object


Unnamed: 0,shape_id,shape_pt_sequence,shape_pt_lon,shape_pt_lat
0,10-20190412121818_v78.16,0,174.70142,-36.92213
1,10-20190412121818_v78.16,1,174.70136,-36.92214
2,10-20190412121818_v78.16,2,174.70143,-36.92254
3,10-20190412121818_v78.16,3,174.70145,-36.92259
4,10-20190412121818_v78.16,4,174.70146,-36.92263


----------------------------------------
calendar_dates
service_id        object
date               int64
exception_type     int64
dtype: object


Unnamed: 0,service_id,date,exception_type
0,7004186127-20190522094814_v80.15,20190603,2
1,14323129576-20190522094814_v80.15,20190603,2
2,442139632-20190522094814_v80.15,20190603,1
3,1080164355-20190522094814_v80.15,20190603,2
4,1065164096-20190522094814_v80.15,20190603,2


----------------------------------------
agency
agency_phone       object
agency_url         object
agency_id          object
agency_name        object
agency_timezone    object
agency_lang        object
dtype: object


Unnamed: 0,agency_phone,agency_url,agency_id,agency_name,agency_timezone,agency_lang
0,(09)355-3553,http://www.aucklandtransport.govt.nz,SLPH,SeaLink Pine Harbour,Pacific/Auckland,en
1,(09)355-3553,http://www.aucklandtransport.govt.nz,PC,Pavlovich Transport Solutions,Pacific/Auckland,en
2,(09)355-3553,http://www.aucklandtransport.govt.nz,ABEXP,SkyBus,Pacific/Auckland,en
3,(09)355-3553,http://www.aucklandtransport.govt.nz,AM,AT Metro,Pacific/Auckland,en
4,(09)355-3553,http://www.aucklandtransport.govt.nz,RTH,Ritchies Transport,Pacific/Auckland,en


# Problem 4

In [14]:
def compute_trip_stats(feed):
    """
    Return a data frame of trip stats.
    """
    st = feed["stop_times"]
    
    # Ensure stop times are properly sorted
    st = st.sort_values(["trip_id", "stop_sequence"])
    
    # Aggregate stop times into trip stats
    def my_agg(group):
        d = {}
        d["start_time"] = group["departure_time"].iat[0]
        d["end_time"] = group["departure_time"].iat[-1]
        d["distance"] = group["shape_dist_traveled"].iat[-1]
        return pd.Series(d)
        
    f = st.groupby("trip_id").apply(my_agg).reset_index()
    
    # Append some extra route information
    f = (
        f
        .merge(feed["trips"][["trip_id", "route_id"]])
        .merge(feed["routes"])
    )
    
    return f

In [16]:
ts = compute_trip_stats(feed)
display(ts.head())

# Restrict to buses
f = ts.loc[lambda x: x.route_type == 3].copy()

# Find shortest and longest bus trips
i, j = f.distance.values.argmin(), f.distance.values.argmax()

print("-"*40)
display(f.iloc[i])

print("-"*40)
display(f.iloc[j])


Unnamed: 0,trip_id,start_time,end_time,distance,route_id,route_short_name,route_long_name,route_type,route_text_color,agency_id,route_color
0,1028107823-20190412121818_v78.16,10:05:00,10:55:00,20.387226,route_005,110,City Centre To Westgate Via Northwestern Motorway,3,,NZB,
1,1028107823-20190522094814_v80.15,10:05:00,10:55:00,20.374492,route_005,110,City Centre To Westgate Via Northwestern Motorway,3,,NZB,
2,1028107891-20190412121818_v78.16,10:35:00,11:25:00,20.387226,route_005,110,City Centre To Westgate Via Northwestern Motorway,3,,NZB,
3,1028107891-20190522094814_v80.15,10:35:00,11:25:00,20.374492,route_005,110,City Centre To Westgate Via Northwestern Motorway,3,,NZB,
4,1028107960-20190412121818_v78.16,11:05:00,11:55:00,20.387226,route_005,110,City Centre To Westgate Via Northwestern Motorway,3,,NZB,


----------------------------------------


trip_id                        475135152-20190412121818_v78.16
start_time                                            06:20:00
end_time                                              06:26:00
distance                                              0.952319
route_id                                             route_214
route_short_name                                           INN
route_long_name     Inner Link Clockwise K Rd To Victoria Park
route_type                                                   3
route_text_color                                           NaN
agency_id                                                  NZB
route_color                                                NaN
Name: 30145, dtype: object

----------------------------------------


trip_id                            1120106985-20190412121818_v78.16
start_time                                                 05:05:00
end_time                                                   06:25:00
distance                                                    58.7884
route_id                                                  route_012
route_short_name                                               125X
route_long_name     City Centre To Helensville Via Westgate Express
route_type                                                        3
route_text_color                                                NaN
agency_id                                                       NZB
route_color                                                     NaN
Name: 6692, dtype: object