# Big Blue Bus Route Analysis

In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) ## 800GB?

from calitp.tables import tbl
from calitp import query_sql, magics
import calitp.magics
import branca

import shared_utils

from siuba import *
import pandas as pd

import datetime as dt
import time
from zoneinfo import ZoneInfo

import importlib

import gcsfs
fs = gcsfs.GCSFileSystem()

from tqdm import tqdm_notebook
from tqdm.notebook import trange, tqdm

from IPython.display import display, Markdown




In [2]:
import utils

In [3]:
import altair as alt
from dla_utils import _dla_utils

from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

In [4]:
bbb = 300

In [5]:
rt_sched = utils.read_data()

In [9]:
#filtering data down to one operator for a route analysis
rt_sched = rt_sched>>filter(_.calitp_itp_id == bbb)

In [10]:
rt_sched.sample(5)

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,num_sched,num_vp,pct_w_vp,weekday,month
8920,300,Big Blue Bus,0,3487,9,2022-06-19,38,24,0.63,Sunday,June
11105,300,Big Blue Bus,0,3481,3,2022-05-04,155,150,0.97,Wednesday,May
8919,300,Big Blue Bus,0,3483,5,2022-05-17,26,24,0.92,Tuesday,May
10909,300,Big Blue Bus,0,3485,7,2022-05-09,155,110,0.71,Monday,May
10719,300,Big Blue Bus,0,3480,2,2022-05-19,88,86,0.98,Thursday,May


In [17]:
rt_sched_agg = utils.agg_by_date(rt_sched,'num_sched', 'num_vp')

In [18]:
rt_sched_agg

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,service_date,weekday,month,total_num_sched,total_num_vp,pct_w_vp
0,300,Big Blue Bus,0,2022-05-01,Sunday,May,771,527,0.68
1,300,Big Blue Bus,0,2022-05-02,Monday,May,1519,1069,0.70
2,300,Big Blue Bus,0,2022-05-03,Tuesday,May,1624,1473,0.91
3,300,Big Blue Bus,0,2022-05-04,Wednesday,May,1622,1487,0.92
4,300,Big Blue Bus,0,2022-05-05,Thursday,May,1628,1455,0.89
...,...,...,...,...,...,...,...,...,...
56,300,Big Blue Bus,0,2022-06-26,Sunday,June,772,500,0.65
57,300,Big Blue Bus,0,2022-06-27,Monday,June,1465,1017,0.69
58,300,Big Blue Bus,0,2022-06-28,Tuesday,June,1559,1457,0.93
59,300,Big Blue Bus,0,2022-06-29,Wednesday,June,1548,1454,0.94


In [19]:
## Looking at Routes

In [20]:
rt_sched>>count(_.route_id)>>arrange(-_.n)

Unnamed: 0,route_id,n
0,3479,61
1,3480,61
2,3481,61
4,3485,61
5,3486,61
6,3487,61
8,3489,61
9,3490,61
12,3493,61
13,3494,61


In [24]:
rt_sched>>filter(_.service_date=='2022-06-01')>>summarize(n_routes = _.route_id.nunique())

Unnamed: 0,n_routes
0,19


In [25]:
(rt_sched>>group_by(_.service_date)>>summarize(n_routes=_.route_id.nunique())>>arrange(-_.n_routes)).n_routes.describe()

count   61.00
mean    16.43
std      3.92
min     10.00
25%     11.00
50%     19.00
75%     19.00
max     19.00
Name: n_routes, dtype: float64

In [26]:
#checking one route
(rt_sched>>filter(_.route_id=='3488')
).head()

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,num_sched,num_vp,pct_w_vp,weekday,month
6824,300,Big Blue Bus,0,3488,R10,2022-05-02,6,4,0.67,Monday,May
6825,300,Big Blue Bus,0,3488,R10,2022-06-13,6,4,0.67,Monday,June
6826,300,Big Blue Bus,0,3488,R10,2022-06-06,6,4,0.67,Monday,June
6827,300,Big Blue Bus,0,3488,R10,2022-05-23,6,4,0.67,Monday,May
6828,300,Big Blue Bus,0,3488,R10,2022-05-16,6,4,0.67,Monday,May


In [28]:
highlight = alt.selection(type='single', on="mouseover", 
                              fields=['route_short_name', "pct_w_vp"], nearest=True)
selection = alt.selection_multi(
                          fields=['route_short_name'], bind='legend')

line = alt.Chart(rt_sched).mark_line().encode(
    x=alt.X('service_date', title=_dla_utils.labeling('service_date')),
    y=alt.Y('pct_w_vp', title=('Percent with Vehicle Positions')),
    color='route_short_name',
    strokeDash='route_short_name',
      tooltip = alt.Tooltip(["route_short_name", "service_date","pct_w_vp"]),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
                size=alt.condition(~highlight, alt.value(2), alt.value(5))
             ).properties(title={
                 "text": ["Big Blue Bus: Average Percent of Scheduled Trips with Vehicle Postions Data", "by Route"],
             }).add_selection(
                 selection, highlight).properties(width=600)
line_chart = styleguide.preset_chart_config(line)
line_chart = _dla_utils.add_tooltip(line_chart, 'route_short_name', 'pct_w_vp')
line_chart.properties(width=600)

In [29]:
rt_sched>>filter(_.pct_w_vp<.5)>>count(_.route_id)

Unnamed: 0,route_id,n
0,3489,1
1,3491,1
2,3501,40


* something funky happening with route 3501. 
* checking trip updates

In [30]:
(rt_sched>>filter(_.route_id=='3501')>>arrange(_.service_date))

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,num_sched,num_vp,pct_w_vp,weekday,month
272,300,Big Blue Bus,0,3501,44,2022-05-02,104,0,0.0,Monday,May
276,300,Big Blue Bus,0,3501,44,2022-05-03,104,0,0.0,Tuesday,May
286,300,Big Blue Bus,0,3501,44,2022-05-04,104,0,0.0,Wednesday,May
278,300,Big Blue Bus,0,3501,44,2022-05-05,104,0,0.0,Thursday,May
263,300,Big Blue Bus,0,3501,44,2022-05-06,52,0,0.0,Friday,May
265,300,Big Blue Bus,0,3501,44,2022-05-09,104,0,0.0,Monday,May
277,300,Big Blue Bus,0,3501,44,2022-05-10,104,0,0.0,Tuesday,May
300,300,Big Blue Bus,0,3501,44,2022-05-11,104,0,0.0,Wednesday,May
301,300,Big Blue Bus,0,3501,44,2022-05-12,104,0,0.0,Thursday,May
281,300,Big Blue Bus,0,3501,44,2022-05-13,52,0,0.0,Friday,May


In [31]:

## seeing what the pct_with_rt would look like without route 3501

In [32]:
no_3501 = utils.agg_by_date((rt_sched>>filter(_.route_id != '3501')),'num_sched', 'num_vp')

In [33]:
no_3501.sample()

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,service_date,weekday,month,total_num_sched,total_num_vp,pct_w_vp
30,300,Big Blue Bus,0,2022-05-31,Tuesday,May,1412,1067,0.76


In [34]:
with_3501 = utils.agg_by_date((rt_sched),'num_sched', 'num_vp')
with_3501.sample()

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,service_date,weekday,month,total_num_sched,total_num_vp,pct_w_vp
53,300,Big Blue Bus,0,2022-06-23,Thursday,June,1558,1481,0.95


In [35]:

line_1 = (alt.Chart(with_3501).mark_line(color='navy').encode(
    x=alt.X('service_date', title=_dla_utils.labeling('service_date')),
    y=alt.Y('pct_w_vp', title=_dla_utils.labeling('Percent with Vehicle Positions')),
    opacity = alt.Opacity('with_3501:Q', legend=alt.Legend(title="With 3501"))
))
line_2 = alt.Chart(no_3501).mark_line(color='orange').encode(
    x=alt.X('service_date', title=_dla_utils.labeling('service_date')),
    y=alt.Y('pct_w_vp', title=_dla_utils.labeling('Percent with Vehicle Positions')),
    opacity= alt.Opacity('no_3501:Q', legend=alt.Legend(title="Without 3501")))
chart = line_1+line_2
chart
# line_chart = styleguide.preset_chart_config(chart)
# line_chart = _dla_utils.add_tooltip(line_chart, 'service_date:O', 'pct_w_vp:Q')
# line_chart.properties(width=600)


In [36]:
rt_sched.sample()

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,num_sched,num_vp,pct_w_vp,weekday,month
10742,300,Big Blue Bus,0,3493,17,2022-05-03,95,87,0.92,Tuesday,May


In [38]:
utils.groupby_onecol((rt_sched), 'month', 'pct_w_vp')

Unnamed: 0,month,avg
1,May,0.85
0,June,0.86


In [39]:
rt_sched.sample()

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,num_sched,num_vp,pct_w_vp,weekday,month
10425,300,Big Blue Bus,0,3487,9,2022-06-29,68,68,1.0,Wednesday,June


In [40]:
route_weekday = (utils.groupby_twocol((rt_sched), 'route_short_name', 'weekday', 'pct_w_vp', 'weekday'))

In [41]:
route_weekday.sample()

Unnamed: 0,route_short_name,weekday,avg
74,7,Monday,0.72


In [42]:
cats_day = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']


In [43]:
highlight = alt.selection(type='single', on="mouseover", 
                              fields=['route_short_name', "avg"], nearest=True)
selection = alt.selection_multi(
                          fields=['route_short_name'], bind='legend')

line = alt.Chart(route_weekday).mark_line(point={
      "filled": False,
      "fill": "white"
    }).encode(
    x=alt.X('weekday', title=_dla_utils.labeling('weekday'),  sort=cats_day),
    y=alt.Y('avg:Q', title=('Average Percent with Vehicle Positions')),
    color='route_short_name',
  #  strokeDash='route_short_name',
    tooltip = alt.Tooltip(["route_short_name", "weekday","avg"]),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
                size=alt.condition(~highlight, alt.value(2), alt.value(5))
             ).properties(title={
                 "text": ["BBB Average Percent of Scheduled Trips with Vehicle Postions Data", "by Route"],
             }).add_selection(
                 selection, highlight).properties(width=600)
line
#line_chart = styleguide.preset_chart_config(line)