## MTA Self Assessment data



In [1]:
import pandas as pd
import re
import string
import warnings

from bokeh.io import output_notebook
from bokeh.layouts import gridplot
from bokeh.models import HoverTool, ColumnDataSource, FuncTickFormatter, DatetimeTickFormatter
from bokeh.palettes import Category10, gray
from bokeh.plotting import Figure, show, figure
from datetime import datetime, timedelta

In [2]:
# load general variables
subway_lines = 'ABCDEFGJLMNQRSWZ1234567'
subway_colors = {
    'A': '#0039A6',
    'B': '#FF6319',
    'C': '#0039A6',
    'D': '#FF6319',
    'E': '#0039A6',
    'F': '#FF6319',
    'G': '#6CBE45',
    'J': '#996633',
    'L': '#A7A9AC',
    'M': '#FF6319',
    'N': '#FCCC0A',
    'Q': '#FCCC0A',
    'R': '#FCCC0A',
    'S': '#808183',
    'Z': '#996633',
    'W': '#FCCC0A',
    '1': '#EE352E',
    '2': '#EE352E',
    '3': '#EE352E',
    '4': '#00933C',
    '5': '#00933C',
    '6': '#00933C',
    '7': '#B933AD'
}

In [15]:
# load visualization tools & settings
output_notebook()
warnings.filterwarnings('ignore')

In [3]:
mta_data = pd.read_csv('../data/MTA_Performance_NYCT.csv', encoding='latin1')

In [4]:
mta_data.head(2)

Unnamed: 0,INDICATOR_SEQ,PARENT_SEQ,AGENCY_NAME,INDICATOR_NAME,DESCRIPTION,CATEGORY,FREQUENCY,DESIRED_CHANGE,INDICATOR_UNIT,DECIMAL_PLACES,PERIOD_YEAR,PERIOD_MONTH,YTD_TARGET,YTD_ACTUAL,MONTHLY_TARGET,MONTHLY_ACTUAL
0,67816,0,NYC Transit,Mean Distance Between Failures - Subways,Average number of miles a subway car travels i...,Service Indicators,M,U,-,0,2008,1,148244.0,148244.0,155000.0,148420.0
1,67816,0,NYC Transit,Mean Distance Between Failures - Subways,Average number of miles a subway car travels i...,Service Indicators,M,U,-,0,2008,2,148476.0,148476.0,155000.0,140993.0


In [5]:
mta_data['date'] = mta_data[
    ['PERIOD_YEAR','PERIOD_MONTH']
].apply(lambda x: datetime(x[0], x[1], 1), axis=1)

In [6]:
mta_data['INDICATOR_NAME'].unique()

array(['Mean Distance Between Failures - Subways',
       'Wait Assessment - Subways (Inactive, Historic Calculations)',
       'Customer Injury Rate - Subways',
       'Mean Distance Between Failures - Staten Island Railway ',
       'On-Time Performance - Staten Island Railway',
       'Employee Lost Time and Restricted Duty Rate ',
       'Total Ridership - Subways',
       'Mean Distance Between Failures - NYCT Bus',
       'Total Paratransit Ridership - NYCT Bus',
       'Customer Accident Injury Rate - NYCT Bus',
       'Total Ridership - NYCT Bus ', 'Elevator Availability - Subways',
       'Escalator Availability - Subways',
       'Collisions with Injury Rate - NYCT Bus',
       '% of Completed Trips - NYCT Bus',
       'East New York Depot - % of Completed Trips',
       'Flatbush Depot - % of Completed Trips',
       'Jackie Gleason Depot - % of Completed Trips',
       'Ulmer Park Depot - % of Completed Trips',
       'Fresh Pond Depot - % of Completed Trips',
       'Yukon

In [7]:
otp_data = mta_data[mta_data['INDICATOR_NAME'].str.contains('OTP')]

In [8]:
# On Time Performance by line

plot_list = []
for d in subway_lines:
    if d != 'J'and d != 'S' and d != 'W':
        _plot = figure(
            plot_width=250, 
            plot_height=150, 
            y_range = (0,100), 
            title=d, 
            y_minor_ticks=2, 
            x_axis_type='datetime')
        _plot.line(
            x=otp_data['date'][otp_data['INDICATOR_NAME'].str.contains(d + ' Line')] ,
            y=otp_data['MONTHLY_ACTUAL'][otp_data['INDICATOR_NAME'].str.contains(d + ' Line')],
            line_color=subway_colors[d],
            line_width=3
        )
        
        _plot.xaxis.formatter=DatetimeTickFormatter(years='%y')
        plot_list.append(_plot)
        
grid = gridplot(plot_list, ncols=5, plot_width=250, plot_height=150)
show(grid)


In [49]:
# wait assessment
assessment_data = mta_data[mta_data['INDICATOR_NAME'].str.contains('Assessment - ')]
plot_list = []
for d in subway_lines:
    if d != 'J'and d != 'S' and d != 'W':
        _plot = figure(
            plot_width=250, 
            plot_height=150, 
            y_range = (50,100), 
            title=d, 
            y_minor_ticks=2, 
            x_axis_type='datetime')
        _plot.line(
            x=assessment_data['date'][assessment_data['INDICATOR_NAME'].str.contains(d + ' Line')] ,
            y=assessment_data['MONTHLY_ACTUAL'][assessment_data['INDICATOR_NAME'].str.contains(d + ' Line')],
            line_color=subway_colors[d],
            line_width=3
        )
        
        _plot.xaxis.formatter=DatetimeTickFormatter(years='%y')
        plot_list.append(_plot)
        
grid = gridplot(plot_list, ncols=5, plot_width=250, plot_height=150)
show(grid)

In [12]:
# performance stats vs alerts
alert_data = pd.read_csv('../data/my_mta_data_for_analysis.csv')
alert_data.head(2)

Unnamed: 0.1,Unnamed: 0,count,hex_x,title,body,msg,system,time,hex_y,planned work,...,unauthorized person,event,sick customer,update,non service,construction,weather,other agencies,other,estimated
0,190003,182417,2c891,"UPDATED: BKLYN, 2,3 & 4 Trains, Sick Customer",Following an earlier incident at Ber...,"Following an earlier incident at Bergen St, 2...",NYC,2014-09-01 00:10:00,2c891,False,...,False,False,True,True,False,False,False,False,False,end disruption
1,190006,182420,2c894,"UPDATED: MANH, 1, 2 and 3 Trains, Signal Probl...","b/d 1, 2 and 3 trains are running wi...","b/d 1, 2 and 3 trains are running with delays...",NYC,2014-09-01 00:26:00,2c894,False,...,False,False,False,True,False,False,False,False,False,signal


In [13]:
# Find subway lines mentioned
regex = re.compile('[%s]' % re.escape(string.punctuation))
def extract_subway_lines(txt, subway_lines=subway_lines):
    cleaned_txt = regex.sub(' ', txt)
    split_txt = cleaned_txt.split()
    if 'All' in split_txt:
        return [s for s in subway_lines]
    else:
        return [s for s in split_txt if s in subway_lines]
    
alert_data['lines'] = alert_data['title'].apply(
    lambda x: extract_subway_lines(x)
)

In [15]:
alert_data['month'] = alert_data['time'].apply(lambda x: datetime(pd.to_datetime(x).year, pd.to_datetime(x).month, 1))

In [None]:
# 