In [96]:
from pathlib import Path

import pandas as pd
import pygal as pg
from highcharts import Highchart
import spectra

from IPython.core.display import HTML

DATA_DIR = Path('../data')
%matplotlib inline

In [13]:
# Load data of Auckland transit delays
#
path = DATA_DIR/'auckland_transit_delays_20170217--20170226.csv'
delays = pd.read_csv(path, dtype={   
  'route_id': str,
  'route_short_name': str,
  'route_type': str,
  })
delays.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,239,240,241,242,243,244,245,246,247,248
route_short_name,005,007,008,009,010,011,020,020X,027,030,...,986,987,988,991X,992X,CTY,INN,NEX,OUT,SKY
delay_p000,-779,-1510,-1787,-1019,-1432,-334,-1431,-1177,-379,-1106,...,-537,-263,-696,-765,-1754,-1793,-1747,-1357,-1792,-1268
delay_p010,-124.5,-65,-53,-16,-225,-7,-218,-30,112.2,-407.1,...,-116,-157,-383.8,-370.6,-800.8,-299,-152.5,-384,-130,-216
delay_p025,30.25,66,46,70,17,41,-65,36,212.5,-149,...,-17,-27.75,-153,-179,-472,-143,-12,-198,6,-44
delay_p050,144,221,176,174,175,114.5,39,117,362,32,...,73.5,19,48,37,-137,2,123,-33,136,91
delay_p075,376.25,424,369,315,356,219.25,150,275,618.5,153,...,183,60,111,164,38,78,332,81,333,286
delay_p090,641.5,677,585,502,561,330,337,759,790,274.1,...,297.3,147.1,179.6,316,162,173,659,211,627,547
delay_p100,1662,1781,1656,1738,1730,1070,1768,1794,1041,1800,...,1011,990,500,797,1296,1742,1799,1750,1798,1792
end_delay_p000,-779,-1510,-907,-1019,-1424,-334,-903,-1177,-379,-961,...,-440,-263,-696,-765,-1617,-1793,-1406,-1357,-1699,-1242
end_delay_p010,-556.4,-611,-287.2,-458,-708,-191.7,-390.9,-467.5,-153,-641,...,-76.6,-216.9,-616.6,-574.6,-919,-549.5,-350,-641,-331,-547.4


# Problem 3

In [116]:
# python-highcharts box plot
#
f = delays.copy()    

# Only keep routes with a large enough fraction of delay samples
cond = f['num_delays']/f['num_stop_times'] >= 0.4
f = f[cond].copy()

# Convert to minutes
cols = ['delay_p{:03d}'.format(p) for p in [0, 10, 25, 50, 75, 90, 100]]
f[cols] /= 60

# Create name
f['name'] = f[['route_short_name', 'route_long_name']].apply(
  lambda x: ', '.join(x), axis=1) 

# Custom color routes
scale = spectra.scale(['#2b83ba', '#ffffbf', '#d7191c', '#d7191c'])
scale = scale.domain([0, 10, 20, 30])
scale = scale.colorspace('lch')
f['iqr'] = f[cols[4]] - f[cols[2]]
f['color'] = f['iqr'].map(lambda x: scale(abs(x)).hexcode)

# Plot
chart = Highchart()
N = f.shape[0]
options = {
    'chart': {
        'width': 650,
        'height': 15*N,
        'inverted': True,
    },
    'title': {
        'text': 'Box plots of stop delays',
    },
    'subtitle': {
        'text': 'Study period is 2017-02-17 to 2017-02-26',
    },
    'legend': {
        'enabled': False,
    },
    'plotOptions': {
        'boxplot': {
            'color': '#666'
        }
    },
    'xAxis': {
        'title': {
            'text': 'Route'
        },
        'type': 'category',
        'categories': f['name'].tolist(),
    },
    'yAxis': {
        'title': {
            'text': 'Delay (min)'
        },
        'opposite': True,
        'tickInterval': 5,
        'plotLines': [{
            'value': 0,
            'color': 'purple',
            'width': 2,
        }]    
    },
    'tooltip': {
        'valueDecimals': 1,
        'useHTML': True,
        'headerFormat': '''
           <b>{point.key}</b> 
           <table><tr><td>Percentile</td><td>Delay</td></tr>
           ''',
        'pointFormat': '''
            <tr>
              <td style="padding-right:0.5em">10th</td>
              <td style="text-align:right">{point.low} min</td>
            </tr>
            <tr>
              <td style="padding-right:0.5em">25th</td>
              <td style="text-align:right">{point.q1} min</td>
            </tr>
            <tr>
              <td style="padding-right:0.5em">50th</td>
              <td style="text-align:right">{point.median} min</td>
            </tr>
            <tr>
              <td style="padding-right:0.5em">75th</td>
              <td style="text-align:right">{point.q3} min</td>
            </tr>
            <tr>
              <td style="padding-right:0.5em">90th</td>
              <td style="text-align:right">{point.high} min</td>
            </tr>
            ''',
        'footerFormat' : '</table>'
    }
}
chart.set_dict_options(options)

# Central values
data = [{
    'fillColor': row['color'],
    'low': row[cols[1]],
    'q1': row[cols[2]],
    'median': row[cols[3]],
    'q3': row[cols[4]],
    'high': row[cols[5]],
    } for __, row in f.iterrows()]
chart.add_data_set(data, 'boxplot')

# Min values
opts = {
    'marker': {
        'fillColor': 'white',
        'lineWidth': 1,
        'lineColor': '#666',
        'radius': 2,
    },
    'tooltip': {
        'pointFormat': '''
            <tr>
              <td style="padding-right:0.5em">0th</td>
              <td style="text-align:right">{point.y} min</td>
            </tr>
            '''
    }
}
data = [{
    'y': row[cols[0]]    
    } for __, row in f.iterrows()]
chart.add_data_set(data, 'scatter', **opts)

# Max values
opts['tooltip']['pointFormat'] = '''
    <tr>
      <td style="padding-right:0.5em">100th</td>
      <td style="text-align:right">{point.y} min</td>
    </tr>
    '''
data = [{
    'y': row[cols[-1]]    
    } for __, row in f.iterrows()]
chart.add_data_set(data, 'scatter', **opts)    

chart