In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import re
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so

import helpers as hp

%load_ext autoreload
%autoreload 2


# Compute Auckland and Wellington fares through their web APIs


In [2]:
def get_journey_auckland(orig, dest, departure_time=None, max_walk=1600):
    """
    INPUT
    ------
    orig : list
        WGS84 longitude-latitude pair
    dest : list
        WGS84 longitude-latitude pair
    departure_time : string
        ISO 8601 datetime; e.g. '2017-06-01T07:30:00'
    max_walk : float
        Maximum walking distance in meters for the journey
        
    OUTPUT
    ------
    dictionary
        Decoded JSON response of journey
    """
    url = 'https://api.at.govt.nz/v2/public-restricted/journeyplanner/silverRailIVU/plan'
    fromLoc ='{!s},{!s}'.format(orig[1], orig[0])
    toLoc ='{!s},{!s}'.format(dest[1], dest[0])
    if departure_time is None:
        departure_time = dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
    date = departure_time + '+12:00'  # Add UTC offset
    params = {
        'from': 'from',
        'to': 'to',
        'fromLoc': fromLoc,
        'toLoc': toLoc,
        'timeMode': 'A',
        'date': date, 
        'modes': 'BUS,TRAIN,FERRY',
        'operators': '',
        'optimize': 'QUICK',
        'maxWalk': max_walk,
        'maxChanges': '-1',
        'routes': '',
        'subscription-key': '323741614c1c4b9083299adefe100aa6',
    }
    r = requests.get(url, params=params)
    
    # Raise an error if bad request
    r.raise_for_status()

    return r.json()         

def get_fare_auckland(journey):
    """
    Given a journey of the form output by :func:`get_journey_auckland`, 
    return the journey's adult Hop card fare (float)'
    """
    if journey['error'] is None:
        f = journey['response']['itineraries'][0]['fareHopAdult']
        if f is None:
            fare = 0
        else:
            fare = f/100
    else:
        fare = None
    return fare

def get_journey_wellington(orig, dest, departure_time=None, max_walk=1600):
    """
    INPUT
    ------
    orig : list
        WGS84 longitude-latitude pair
    dest : list
        WGS84 longitude-latitude pair
    departure_time : string
        ISO 8601 datetime; e.g. '2017-06-01T07:30:00'
    max_walk : float
        Maximum walking distance in meters for the journey
        
    OUTPUT
    ------
    text
        HTML response of journey query
    """
    url = 'https://www.metlink.org.nz/journeyplanner/JourneyPlannerForm'
    from_coords ='{!s},{!s}'.format(orig[1], orig[0])
    to_coords ='{!s},{!s}'.format(dest[1], dest[0])
    
    if departure_time is None:
        departure_time = dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
    date, time = departure_time.split('T')
        
    params = {
        'From': 'from',
        'To': 'to', 
        'Via': '',
        'When': 'LeaveAfter',
        'Date': date,
        'Time': time,
        'MaxChanges': 5,
        'WalkingSpeed': 4,
        'MaxWalking': max_walk,
        'Modes[Train]': 'Train',
        'Modes[Bus]': 'Bus',
        'Modes[Ferry]': 'Ferry',
        'Modes[Cable+Car]': 'Cable Car',
        'ShowAdvanced': '',
        'FromCoords': from_coords,
        'ToCoords': to_coords,
        'ViaCoords': '',
        'action_doForm': 'Go',
    }
    r = requests.get(url, params=params)
    # Raise an error if bad request
    r.raise_for_status()
    return r.text         

# Estimate Wellington card fare discount
path = hp.DATA_DIR/'processed'/'wellington'/'transit_fares.csv'
f = pd.read_csv(path)
f['card/cash'] = f['card_fare']/f['cash_fare']
r = f['card/cash'].mean()
print('estimated Wellington card discount rate=', r)

def get_fare_wellington(journey, card_discount=r):
    """
    Given a journey of the form output by :func:`get_journey_wellington`, 
    extract the journey's adult cash fare (float), multiply it by the given
    discount rate to estimate the adult card fare, and return the result.
    """
    pattern = 'Total adult fare </span><strong>&#36;(\d+\.\d\d)</strong>'
    m = re.search(pattern, journey)
    if m:
        fare = float(m.group(1))
    else:
        fare = None
    return round(r*fare, 2)

def collect_fares(rental_points, departure_time, region):
    """
    """
    # Get all pairs of points excluding equal points
    f = rental_points[['rental_area', 'geometry']].copy()
    rows = [[o[0], o[1].coords[0], d[0], d[1].coords[0]] for o, d in product(f.values, f.values) if o[0] != d[0]]
    f = pd.DataFrame(rows, columns=['orig_name', 'orig', 'dest_name', 'dest'])

    if region == 'auckland':
        get_journey = get_journey_auckland
        get_fare = get_fare_auckland
        time_per_call = 3.6
    elif region == 'wellington':
        get_journey = get_journey_wellington
        get_fare = get_fare_wellington
        time_per_call = 2.4
    
    print('This will take about {:02f} minutes'.format(f.shape[0]*time_per_call/60))

    # Get journeys for each pair
    rows = []
    for __, row in f.iterrows():
        try:
            j = get_journey(row['orig'], row['dest'], departure_time=departure_time)
            fare = get_fare(j)
        except:
            fare = None
        rows.append([row['orig_name'], row['dest_name'], fare])

    g = pd.DataFrame(rows, columns=['orig_name', 'dest_name', 'card_fare'])
    return g


estimated Wellington card discount rate= 0.782401654073


In [5]:
# Test some
orig = [175.01092026711063, -36.93134386721964]  # Maraetai
orig2 = [174.76864676675842, -36.84997406733503]  # Central East
dest = [174.8151970336325, -36.89546015048722]  # Ellerslie
%time j = get_journey_auckland(orig2, dest, departure_time='2017-07-14T07:30:00')
j
#get_fare_auckland(j)

# orig = (174.7708511352539,-41.28394744513899)
# dest = (174.78861808776855,-41.297458248607995)
# %time r = get_journey_wellington(orig, dest, departure_time='2017-06-01T07:30:00')
# r
# get_fare_wellington(r)

CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 1.36 s


{'error': None,
 'response': {'engine': 'silverRailIVU',
  'itineraries': [{'duration': 2460000,
    'durationStr': '41 minutes',
    'endTime': '2017-07-14T08:11',
    'endTimeStr': '8:11 am',
    'fareAdult': 550,
    'fareChild': 300,
    'fareError': None,
    'fareHopAdult': 315,
    'fareHopChild': 180,
    'fareHopTertiary': 235,
    'legs': [{'distance': '800 metres',
      'distanceExact': 800,
      'distanceStr': '800 metres',
      'duration': 720000,
      'durationStr': '12 min',
      'endLat': '-36.84429',
      'endLon': '174.76848',
      'endTime': '2017-07-14T07:42',
      'endTimeStr': '7:42 am',
      'fareAdult': 0,
      'fareChild': 0,
      'fareHopAdult': 0,
      'fareHopChild': 0,
      'fareHopTertiary': 0,
      'from': 'from',
      'isFastLeg': False,
      'isFirst': True,
      'isLast': False,
      'legGeometry': {'length': 47,
       'points': 'jg|_F_oui`@c@OEd@BP^bAiAQ{@P_@PQRKF]NY`@[PgAz@EN}@TOA{A[sBYQj@KEEACM}@WD]GAUB{DkAWSWUyBk@Fa@KE'},
      '

In [7]:
regions = ['auckland']#, 'wellington']
departure_time = '2017-07-13T07:30:00'
for region in regions:
    rp = hp.get_data(region, 'rental_points')
    g = collect_fares(rp, departure_time, region)

    path = hp.get_path(region, 'transit_costs')
    g.to_csv(path, index=False)
    print('* ', region)
    print(g.head())


This will take about 570.360000 minutes
*  auckland
  orig_name                   dest_name  card_fare
0    Albany                    Avondale       6.10
1    Albany                    Balmoral       6.10
2    Albany         Beachhaven/Birkdale       3.15
3    Albany  Blockhouse Bay/New Windsor       6.10
4    Albany                Botony Downs       7.50


In [9]:
# Fill some holes by trying again

def get_transit_costs_with_coords(region):
    f = hp.get_data(region, 'transit_costs')
    g = hp.get_data(region, 'rental_points')
    g['coords'] = g['geometry'].map(lambda g: g.coords[0])
    f = f.merge(g[['rental_area', 'coords']].rename(columns={
        'rental_area': 'orig_name',
        'coords': 'orig_coords',
    }))
    f = f.merge(g[['rental_area', 'coords']].rename(columns={
        'rental_area': 'dest_name',
        'coords': 'dest_coords',
    }))
    return f.sort_values(['orig_name', 'dest_name'])

def fill_holes(region, departure_time=None):
    f = get_transit_costs_with_coords(region)
    cond = f['card_fare'].isnull()
    
    if region == 'auckland':
        get_journey = get_journey_auckland
        get_fare = get_fare_auckland
        time_per_call = 3.6
    elif region == 'wellington':
        get_journey = get_journey_wellington
        get_fare = get_fare_wellington
        time_per_call = 2.4
    
    print('This will take about {:02f} minutes'.format(f[cond].shape[0]*time_per_call/60))
        
    new_rows = []
    for __, row in f[cond].iterrows():
        try:
            journey = get_journey(row['orig_coords'], row['dest_coords'], departure_time=departure_time)
            fare = get_fare(journey)
        except:
            fare = np.nan
        new_rows.append([row['orig_name'], row['dest_name'], fare])
    g = pd.DataFrame(new_rows, columns=['orig_name', 'dest_name', 'card_fare']).fillna(value=np.nan)

    f = f[~cond].drop(['orig_coords', 'dest_coords'], axis=1)
    f = pd.concat([f, g]).sort_values(['orig_name', 'dest_name'])
    return f



In [20]:
departure_time = '2017-07-13T07:30:00'
for region in ['auckland']:#, 'wellington']:
    f = fill_holes(region, departure_time=departure_time)
    path = hp.get_path(region, 'transit_costs')
    path = path.parent/(path.name + '.new')
    f.to_csv(path, index=False)

This will take about 68.040000 minutes


In [29]:
region = 'auckland'
path1 = hp.get_path(region, 'transit_costs')
f1 = pd.read_csv(path1)
path2 = path1.parent/(path1.name + '.new')
f2 = pd.read_csv(path2)
print(f1.shape[0], f1[f1['card_fare'].isnull()].shape[0])
print(f2.shape[0], f2[f2['card_fare'].isnull()].shape[0])

if f2.shape[0] < f1.shape[0]:
    # Overwrite old costs with new
    path2.rename(path1)
    path2.unlink()

9506 1134
9506 1134


# Canterbury has no fare calculator API. So estimate Canterbury fares from fare zones and fare table.

In [None]:
# Fares
fares = pd.DataFrame([[1, 2.55], [2, 3.75]], columns=['#zones_traveled', 'card_fare'])
fares

# Zones
path = DATA_DIR/'processed'/'canterbury'/'fare_zones.geojson'
zones = gpd.read_file(str(path))
zones

# Attach zones to rental points
rp = hp.get_data('canterbury', 'rental_points')
g = gpd.sjoin(rp, zones, op='within')
g = g[['rental_area', 'zone']].copy()
g.head()

# Compute origin and destination zones
f = hp.get_data('canterbury', 'commutes_transit')
f = f.merge(g.rename(columns={'rental_area': 'orig_name', 'zone': 'orig_zone'}))
f = f.merge(g.rename(columns={'rental_area': 'dest_name', 'zone': 'dest_zone'}))

# Compute #zones traveled, then card fare
f['#zones_traveled'] = abs(f['orig_zone'] - f['dest_zone']) + 1
f = f.merge(fares)

# Cut down and save
f = f[['orig_name', 'dest_name', 'card_fare']].copy()
path = hp.get_path('canterbury', 'transit_costs')
f.to_csv(path, index=False)
f