# {agency_name} Vehicle Position Data Available for Scheduled Data

An analysis of {agency_name} measuring how much RealTime GTFS Vehicle Positions data is available from their GTFS Scheduled data. This analysis compares Scheduled and Vehicle Position data for each day by routes, aggregating the number of present trips. Note: trip_updates data is a pending addition to the data to account for any missing trips. This measure does not aim to rate an agency's reliability, rather measure GTFS feed publication. (text subject to change)

In [1]:
%%capture

import utils
import shared_utils
from dla_utils import _dla_utils as dla_utils
from shared_utils import styleguide, altair_utils, portfolio_utils

from siuba import *
import pandas as pd

from IPython.display import display, Markdown, HTML

import altair as alt


In [2]:
#agency is defined as the single operator (a unique calitp_itp_id) used to subset the data

In [3]:
#parameter cell
calitp_id = 300

In [4]:
rt_sched = utils.read_data()


In [5]:
rt_sched['calitp_deleted_at'] = pd.to_datetime(rt_sched['calitp_deleted_at'])


In [6]:
cats_day = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
cats_month = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

In [7]:
## for now filtering down these columns so we do not have the bad 
## moving to utils in the read_data()

# rt_sched = rt_sched[((rt_sched["calitp_itp_id"]==290) & (rt_sched["calitp_url_number"]==1)) | ((rt_sched["calitp_itp_id"]==300))]

In [8]:
#getting df with daily pct 
rt_sched_date = utils.agg_by_date(rt_sched, 'num_sched', 'num_vp')

In [9]:
agency = rt_sched>>filter(_.calitp_itp_id == calitp_id)

In [10]:
#getting df with daily pct for single agency
agency_date = utils.agg_by_date(agency, 'num_sched', 'num_vp')

In [11]:
display(HTML(f"<strong>{(agency.iloc[0]['agency_name'])} "
             f"Monthly Average Percent Scheduled Trips vs. Vehicle Position </strong>"))

In [12]:
agency_month = (utils.get_agg_pct(agency, groupings = ['month'], sum_sched = 'num_sched', sum_vp = 'num_vp'))
agency_month['average_percent'] = agency_month['avg'].astype(float).map("{:.2%}".format)
display(HTML(dla_utils.pretify_tables(agency_month>>select(_.month, _.average_percent))))

Month,Average Percent
May,82.88%
June,84.64%


In [13]:
avg_route = (utils.get_agg_pct(agency, groupings = ['route_short_name'], sum_sched = 'num_sched', sum_vp = 'num_vp'))

In [14]:
avg_route['avg_pct'] = avg_route['avg'].astype(float).map("{:.2%}".format)
avg_route = avg_route.rename(columns={'route_short_name':'Route Short Name','avg_pct':'Route Average Percent with VP Trip Data'})

In [15]:
display(HTML(f"Out of the <strong>{len(avg_route)} routes operating</strong>, there are "
             f"<strong>{len(avg_route>>arrange(_.avg)>>filter(_.avg>=.90))} routes</strong> "
             f"with over 90% of vehicle position data for scheduled trips"))

In [16]:
display(HTML(dla_utils.pretify_tables(avg_route>>select(_['Route Short Name'],_['Route Average Percent with VP Trip Data']))))

Route Short Name,Route Average Percent With Vp Trip Data
1,86.09%
3,83.75%
7,84.36%
9,87.09%
R12,86.92%
14,89.35%
18,88.72%
41,93.93%
44,0.00%
R10,90.70%


In [17]:
display(HTML("<h2>Agency Performance</h2>"))

In [18]:
agency_avg = (utils.get_agg_pct(agency, groupings = ['agency_name'], sum_sched = 'num_sched', sum_vp = 'num_vp'))
overall_avg =((rt_sched >>summarize(tot_sched =_.num_sched.sum(), tot_vp = _.num_vp.sum())) >> mutate(avg = _.tot_vp/_.tot_sched))

In [19]:
(display(HTML(f'Compared to the total operator average of <strong>{("{:.2%}".format(overall_avg.iloc[0]["avg"]))}</strong>, '
            f'{(agency.iloc[0]["agency_name"])} has a total average of '
            f'<strong>{("{:.2%}".format(agency_avg.iloc[0]["avg"]))}</strong> for vehicle position data for scheduled trips')))

In [20]:
display(HTML(f"<strong>{(agency.iloc[0]['agency_name'])} Average Compared to Overall Average</strong>"))

(utils.total_average_with_1op_chart(rt_sched, 300)).mark_trail().encode(size='Percent with Vehicle Position Data:Q')

In [21]:
agency_day_long = agency_date.copy()
agency_day_long = agency_day_long.rename(columns={ 'total_num_sched':'Total Number Scheduled Trips',
                                            'total_num_vp':'Total Number Vehicle Position Trips'})
agency_day_long =  (agency_day_long>>select(_.agency_name,
                            _.calitp_itp_id,
                            _.service_date,
                         _['Total Number Scheduled Trips'],
                         _['Total Number Vehicle Position Trips']
                        ) 
             >> gather('measure',
                       'value',
                       _['Total Number Scheduled Trips'],
                       _['Total Number Vehicle Position Trips']
                      )
            )

In [22]:
display(HTML(f"<strong>Number of {(agency.iloc[0]['agency_name'])} Scheduled and Vehicle Postion Trips</strong>"))

(utils.bar_chart_over_time((agency_day_long),
                           'service_date','value','measure',',f', 'x', '')).encode(y=alt.Y('value', stack = None,)).properties(width=800)

In [90]:
display(HTML(f"<strong>{(agency.iloc[0]['agency_name'])} Percent of Scheduled Trips with RT Vehicle Position Data</strong>"))
display(Markdown(f"<strong><span style='color:red'>Red Line </span></strong> represents "
                 f" {(agency.iloc[0]['agency_name'])} average"))

bar = (utils.bar_chart_over_time((agency_date),
                           'service_date', 'pct_w_vp', 'weekday','%', 'x', '')).mark_bar(size=8).properties(width=800)
#mean line
rule = alt.Chart(agency_date).mark_rule(color='red').encode(y=alt.Y('mean(pct_w_vp):Q', axis=alt.Axis(format='%')))

bar + rule

<strong><span style='color:red'>Red Line </span></strong> represents  Big Blue Bus average

In [24]:
# avg_week_month = utils.get_agg_pct((agency_date),
#                groupings = ['agency_name', 'calitp_itp_id', 'month', 'weekday'],
#                sum_sched ='total_num_sched',
#                sum_vp = 'total_num_vp')

In [25]:
# display(HTML(f"<strong>{(agency.iloc[0]['agency_name'])} Average Percent of Scheduled Trips with RT Vehicle Position Data by Day</strong>"))
# utils.bar_chart_over_time(avg_week_month,
#                           'avg',
#                           'weekday',
#                           'weekday',
#                           '', 
#                           cats_day, 
#                           '').encode(x=alt.X('avg:Q', title=utils.labeling('avg'), axis=alt.Axis(format='%')),
#                                                                                                                          row =alt.Row("month", sort=cats_month)).mark_bar(size=30)

In [26]:
display(HTML("<h3>Route Analysis</h3>"))

In [27]:
agency_short = agency>>select(_.agency_name, _.route_id,
                              _.route_short_name, _.service_date, _.pct_w_vp)

In [28]:
display(HTML(f"<strong>{(agency.iloc[0]['agency_name'])} "
             f"Average Percent of Scheduled Trips with Vehicle Postions Data "
             f"by Route Over Time</strong>"))
display(Markdown("To utilize the multi-select, use `shift` when clicking routes in the legend"))

line = alt.Chart(agency_short).mark_line().encode(
    x=alt.X('service_date', title=utils.labeling('service_date')),
    y=alt.Y('pct_w_vp', title=('Percent with Vehicle Positions'), axis=alt.Axis(format='%')),
    color=alt.Color('route_short_name', title=['Route Name']),
    strokeDash='route_short_name')

line_chart = utils.add_tooltip(line, 'route_short_name','service_date', 'pct_w_vp')
line_chart = utils.add_chart_selection(line_chart, 'route_short_name')
line_chart.properties(width=700)

To utilize the multi-select, use `shift` when clicking routes in the legend

In [29]:
agency_route_weekday = utils.get_agg_pct(agency,
                groupings = ['route_short_name', 'weekday'],
                sum_sched= 'num_sched',
                sum_vp = 'num_vp',
               )

In [30]:
weekend_routes = (agency_route_weekday>>filter(_.weekday=='Saturday')>>count(_.route_short_name)).route_short_name.tolist()

In [31]:
agency_route_weekday['weekend_service'] = agency_route_weekday['route_short_name'].isin(weekend_routes)

In [32]:
cats_day = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
cats_month = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

In [33]:
display(HTML(f"<strong>{(agency.iloc[0]['agency_name'])} Weekday Average Percent of Scheduled Trips with RT Vehicle Position Data</strong>"))
display(Markdown("To utilize the multi-select, use `shift` when clicking routes in the legend"))

weekday = (utils.bar_chart_over_time((agency_route_weekday>>filter(_.weekend_service==False)),
                          'weekday', 'avg', 'route_short_name', '%', cats_day, "").mark_line(point={"filled": False, "fill": "white"})).properties(width=500)
weekday = utils.add_chart_selection(weekday, 'route_short_name')
weekday = utils.add_tooltip(weekday, "route_short_name", "weekday","avg")

weekend = (utils.bar_chart_over_time((agency_route_weekday>>filter(_.weekend_service==True)),
                          'weekday', 'avg', 'route_short_name', '%', cats_day, "").mark_line(point={"filled": False, "fill": "white"})).properties(width=500)
weekend = utils.add_chart_selection(weekend, 'route_short_name')
weekend = utils.add_tooltip(weekend, "route_short_name", "weekday","avg")

(weekday | weekend)

To utilize the multi-select, use `shift` when clicking routes in the legend

In [34]:
agency_route_month = utils.get_agg_pct(agency,
                groupings = ['route_short_name', 'month'],
                sum_sched= 'num_sched',
                sum_vp = 'num_vp',
               )

In [35]:
# display(HTML(f"<strong>{(agency.iloc[0]['agency_name'])} Monthly Average Percent of Scheduled Trips with RT Vehicle Position Data by Route</strong>"))
# utils.bar_chart_over_time(agency_route_month,
#                           'month', 
#                           'avg',
#                           'month',
#                           '%', 
#                           cats_month,
#                           '').encode(column='route_short_name').mark_bar().encode(
#     color=alt.Color('month', scale=alt.Scale(range=altair_utils.CALITP_CATEGORY_BRIGHT_COLORS))).properties(width=450)

In [36]:
#OR

In [78]:
# display(HTML(f"<strong>{(agency.iloc[0]['agency_name'])} Monthly Average Percent of Scheduled Trips with RT Vehicle Position Data by Route</strong>"))
# utils.bar_chart_over_time(agency_route_month,
#                           'route_short_name', 
#                           'avg',
#                           'month',
#                           '%', 
#                           cats_month,
#                           '').encode(column='month').mark_bar().encode(
#     color=alt.Color('route_short_name', scale=alt.Scale(range=altair_utils.CALITP_CATEGORY_BRIGHT_COLORS))).properties(width=450)

In [75]:
agency_route_month['avg_pct'] = agency_route_month['avg'].astype(float).map("{:.2%}".format)
route_stat = (agency_route_month>> select(_.route_short_name,
                           _.month,
                           _.avg_pct) 
 >> group_by(_.route_short_name)
 >> spread(_.month, _.avg_pct)).apply(lambda x: x) 

In [77]:
display(HTML("<strong>Monthly Average Percent of Scheduled Trips with RT Vehicle Position Data by Route</strong>"))

In [76]:
display(HTML(dla_utils.pretify_tables(route_stat)))

Route Short Name,June,May
1,86.14%,86.04%
14,89.54%,89.15%
15,93.10%,95.24%
16,95.29%,94.62%
17,91.31%,89.92%
18,87.49%,89.90%
2,84.54%,83.73%
3,84.44%,83.08%
41,94.33%,93.52%
43,94.94%,94.56%


In [38]:
display(HTML("<h3>By District</h3>"))

In [39]:
dist_avg = utils.get_agg_pct(rt_sched, groupings='caltrans_district', sum_sched='num_sched', sum_vp='num_vp')
dist_avg = dist_avg.rename(columns={'caltrans_district':'Caltrans District', 'avg':'Average'})

In [79]:
dist_avg

Unnamed: 0,Caltrans District,num_sched,num_vp,Average
0,04 - Oakland,75860,55245,0.73
1,07 - Los Angeles,78765,65968,0.84


In [40]:
agency_avg

Unnamed: 0,agency_name,num_sched,num_vp,avg
0,Big Blue Bus,78765,65968,0.84


In [88]:
display(HTML(f"<strong>Average Percent of Scheduled Trips with RT Vehicle Position Data by District</strong>"))
display(Markdown(f"<strong><span style='color:red'>Red Line </span></strong> represents "
                 f" {(agency.iloc[0]['agency_name'])} average"))

bar = (utils.bar_chart_over_time(dist_avg, 'Caltrans District', 'Average', 'Caltrans District', '%', 'x', '')).mark_bar(size=200)

rule = alt.Chart(agency_avg).mark_rule(color='red',size=2).encode(y=alt.Y('avg:Q'))

bar + rule

<strong><span style='color:red'>Red Line </span></strong> represents  Big Blue Bus average