In [1]:
import pandas as pd
import requests
import numpy as np
from pandas.io.json import json_normalize 
pd.set_option('display.max_columns', None)  

In [2]:
def get_data(url):
    response = requests.get(url)
    d = response.json()
    data = d['results']
    return data

Mimic basic disturbance delay analysis for the SE Madison corridor for September 2017 vs. September 2018.  
See cells 30-55 in this notebook https://github.com/hackoregon/2019-transportation-data-science/blob/master/notebooks/1.0-bll-madison-corridor-travel-and-delay-analysis.ipynb for a similar analysis on the raw data  
  
Define API call in url variable. For examples navigate to http://service.civicpdx.org/transportation2019/v1/schema/ and click "Try it out."  
Then paste the "Request URL" here. 

In [3]:
url = 'http://service.civicpdx.org/transportation2019/v1/toad/disturbanceStops/?limit=10000&months=9&time_range=9,10&years=2017,2018&lines=10,14&service_key=W'

In [4]:
data = get_data(url)

In [5]:
disturbance_stops = pd.DataFrame.from_dict(json_normalize(data['features']))
disturbance_stops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5773 entries, 0 to 5772
Data columns (total 19 columns):
geometry.coordinates             5773 non-null object
geometry.type                    5773 non-null object
id                               5773 non-null int64
properties.act_arr_time          5773 non-null object
properties.act_dep_time          5773 non-null object
properties.day                   5773 non-null int64
properties.day_of_week           5773 non-null int64
properties.duration              5773 non-null object
properties.end_quarter_hour      5773 non-null float64
properties.latitude              5773 non-null float64
properties.line_id               5773 non-null int64
properties.longitude             5773 non-null float64
properties.month                 5773 non-null int64
properties.opd_date              5773 non-null object
properties.pattern_direction     5773 non-null object
properties.service_key           5773 non-null object
properties.start_quarter_hour  

In [6]:
# No stop ID parameter, use longitude to filter by 11th and SE Madison to SE Grand & SE Madison 
disturbance_stops = disturbance_stops[(disturbance_stops['properties.longitude'] <= -122.654728) & (disturbance_stops['properties.longitude'] >= -122.660740)]
disturbance_stops.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 233 entries, 33 to 5762
Data columns (total 19 columns):
geometry.coordinates             233 non-null object
geometry.type                    233 non-null object
id                               233 non-null int64
properties.act_arr_time          233 non-null object
properties.act_dep_time          233 non-null object
properties.day                   233 non-null int64
properties.day_of_week           233 non-null int64
properties.duration              233 non-null object
properties.end_quarter_hour      233 non-null float64
properties.latitude              233 non-null float64
properties.line_id               233 non-null int64
properties.longitude             233 non-null float64
properties.month                 233 non-null int64
properties.opd_date              233 non-null object
properties.pattern_direction     233 non-null object
properties.service_key           233 non-null object
properties.start_quarter_hour    233 non-null f

In [7]:
# Get number of rows for 2017 to see total disturbance stops
len(disturbance_stops[disturbance_stops['properties.year'] == 2017])

137

In [8]:
# Get number of rows for 2018 to see total disturbance stops
len(disturbance_stops[disturbance_stops['properties.year'] == 2018])

96

In [9]:
# Convert duration to seconds
disturbance_stops['properties.duration'] = pd.to_timedelta(disturbance_stops['properties.duration'], unit='s')

In [10]:
# Get total delay in seconds throughout the corridor for 2017
disturbance_stops[disturbance_stops['properties.year'] == 2017]['properties.duration'].dt.total_seconds().sum()

3607.0

In [11]:
# Get total delay in seconds throughout the corridor for 2018
disturbance_stops[disturbance_stops['properties.year'] == 2018]['properties.duration'].dt.total_seconds().sum()

2375.0

Use new data from the /transportation2019/v1/toad/busPassengerStops/ endpoint to analyze late arrival times and ons and offs at the Madison corridor for September 2017 vs. September 2018.

In [12]:
url = 'http://service.civicpdx.org/transportation2019/v1/toad/busPassengerStops/?limit=10000&lines=10,14&stops=3637,3641,3633&time_range=9,10&service_key=W'

In [13]:
data = get_data(url)

In [14]:
passenger_stops = pd.DataFrame.from_dict(json_normalize(data['features']))
passenger_stops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6522 entries, 0 to 6521
Data columns (total 23 columns):
geometry.coordinates              6522 non-null object
geometry.type                     6522 non-null object
id                                6522 non-null int64
properties.arrive_quarter_hour    6522 non-null float64
properties.arrive_time            6522 non-null object
properties.arriving_load          6522 non-null int64
properties.direction              6522 non-null int64
properties.door                   6522 non-null int64
properties.dwell                  6522 non-null int64
properties.estimated_load         6522 non-null int64
properties.leave_time             6522 non-null object
properties.lift                   6522 non-null int64
properties.location_id            6522 non-null int64
properties.offs                   6522 non-null int64
properties.ons                    6522 non-null int64
properties.route_number           6522 non-null int64
properties.seconds_late

In [15]:
# Convert properties.service_date to datetime
passenger_stops['properties.service_date'] =  pd.to_datetime(passenger_stops['properties.service_date'])

In [16]:
# Filter to only September 2017 and September 2018
passenger_stops = passenger_stops[((passenger_stops['properties.service_date'] >= '2018-09-01') & (passenger_stops['properties.service_date'] < '2018-10-01')) | ((passenger_stops['properties.service_date'] >= '2017-09-01') & (passenger_stops['properties.service_date'] < '2017-10-01'))]
passenger_stops.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 993 entries, 0 to 3200
Data columns (total 23 columns):
geometry.coordinates              993 non-null object
geometry.type                     993 non-null object
id                                993 non-null int64
properties.arrive_quarter_hour    993 non-null float64
properties.arrive_time            993 non-null object
properties.arriving_load          993 non-null int64
properties.direction              993 non-null int64
properties.door                   993 non-null int64
properties.dwell                  993 non-null int64
properties.estimated_load         993 non-null int64
properties.leave_time             993 non-null object
properties.lift                   993 non-null int64
properties.location_id            993 non-null int64
properties.offs                   993 non-null int64
properties.ons                    993 non-null int64
properties.route_number           993 non-null int64
properties.seconds_late           993 no

In [17]:
# Calculate seconds late throughout the Madison corridor in September 2017
passenger_stops[(passenger_stops['properties.service_date'] >= '2017-09-01') & 
                (passenger_stops['properties.service_date'] < '2017-10-01')]['properties.seconds_late'].sum()

91292

In [18]:
# Calculate seconds late throughout the Madison corridor in September 2018
passenger_stops[(passenger_stops['properties.service_date'] >= '2018-09-01') & 
                (passenger_stops['properties.service_date'] < '2018-10-01')]['properties.seconds_late'].sum()

63098

In [19]:
# Calculate total ons throughout the Madison corridor in September 2017
passenger_stops[(passenger_stops['properties.service_date'] >= '2017-09-01') & 
                (passenger_stops['properties.service_date'] < '2017-10-01')]['properties.ons'].sum()

289

In [20]:
# Calculate total offs throughout the Madison corridor in September 2017
passenger_stops[(passenger_stops['properties.service_date'] >= '2017-09-01') & 
                (passenger_stops['properties.service_date'] < '2017-10-01')]['properties.offs'].sum()

678

In [21]:
# Calculate total ons throughout the Madison corridor in September 2018
passenger_stops[(passenger_stops['properties.service_date'] >= '2018-09-01') & 
                (passenger_stops['properties.service_date'] < '2018-10-01')]['properties.ons'].sum()

372

In [22]:
# Calculate total offs throughout the Madison corridor in September 2018
passenger_stops[(passenger_stops['properties.service_date'] >= '2018-09-01') & 
                (passenger_stops['properties.service_date'] < '2018-10-01')]['properties.offs'].sum()

480