## Round 1 
* https://github.com/cal-itp/data-analyses/issues/1059
* cd rt_segment_speeds && pip install -r requirements.txt && cd ../_shared_utils && make setup_env
* https://docs.google.com/document/d/1I1WiqlmU06W6iLCi7cZQrOCLILkrEfABEkcU0Jys7f0/edit
* https://route-speeds--cal-itp-data-analyses.netlify.app/name_bay-area-511-muni-schedule/0__report__name_bay-area-511-muni-schedule
* https://posit-dev.github.io/great-tables/get-started/nanoplots.html
* https://docs.pola.rs/py-polars/html/reference/api/polars.from_pandas.html

In [1]:
%%capture
# import warnings
# warnings.filterwarnings('ignore')

import altair as alt
import calitp_data_analysis.magics
import geopandas as gpd
import great_tables as gt
import pandas as pd
from calitp_data_analysis import calitp_color_palette as cp
from great_tables import md
from IPython.display import HTML, Markdown, display
from segment_speed_utils.project_vars import RT_SCHED_GCS
from shared_utils import rt_dates, rt_utils

alt.renderers.enable("html")
alt.data_transformers.enable("default", max_rows=None)

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
name = "SBMTD Schedule"

In [4]:
# %%capture_parameters
# name

### Data

In [5]:
# calitp-analytics-data/data-analyses/rt_vs_schedule/digest
df = pd.read_parquet(
    f"{RT_SCHED_GCS}digest/schedule_vp_metrics.parquet",
    filters=[[("name", "==", name)]],
)

In [6]:
most_recent_date = df.service_date.max()

In [7]:
most_recent_date

Timestamp('2024-03-13 00:00:00')

In [8]:
df.service_date.min()

Timestamp('2023-03-15 00:00:00')

In [9]:
df.head(2)

Unnamed: 0,schedule_gtfs_dataset_key,direction_id,time_period,avg_scheduled_service_minutes,avg_stop_miles,n_scheduled_trips,frequency,road_freq_category,road_typology,pct_typology,service_date,minutes_atleast1_vp,minutes_atleast2_vp,rt_service_minutes,scheduled_service_minutes,total_vp,vp_in_shape,n_trips,vp_per_minute,pct_in_shape,pct_rt_journey_vp,pct_rt_journey_atleast2_vp,pct_sched_journey_atleast1_vp,pct_sched_journey_atleast2_vp,n_vp_trips,pct_rt_journey_atleast1_vp,sched_rt_category,speed_mph,name,route_id,route_combined_name,base64_url,organization_source_record_id,organization_name,caltrans_district
14844,239f3baf3dd3b9e9464f66a777f9897d,0.0,all_day,15.0,0.18,54.0,2.25,very_high,downtown_local,0.44,2023-09-13,863.0,258.0,900.19,795.0,1136.0,925.0,53.0,1.26,0.81,0.96,0.29,1.0,0.33,,,schedule_and_vp,8.25,SBMTD Schedule,1,1 West Santa Barbara,aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZmVlZC56aXA=,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,05 - San Luis Obispo
14845,239f3baf3dd3b9e9464f66a777f9897d,0.0,all_day,15.0,0.18,54.0,2.25,very_high,downtown_local,0.44,2023-10-11,839.0,242.0,945.11,735.0,1093.0,848.0,49.0,1.16,0.78,0.89,0.26,1.0,0.33,,,schedule_and_vp,8.39,SBMTD Schedule,1,1 West Santa Barbara,aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZmVlZC56aXA=,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,05 - San Luis Obispo


### Test out Altair `extract_data`
* https://github.com/altair-viz/altair-transform/blob/master/altair_transform/core.py#L55
* Why do you want to use this?

In [10]:
from typing import List, Union

import altair as alt
import pandas as pd
from altair_transform.extract import extract_transform
from altair_transform.transform import visit
from altair_transform.utils import to_dataframe

In [11]:
__all__ = ["apply", "extract_data", "transform_chart"]

In [12]:
def reverse_snakecase(df):
    """
    Clean up columns to remove underscores and spaces.
    """
    df.columns = df.columns.str.replace("_", " ").str.strip().str.title()
    return df

In [13]:
def apply(
    df: pd.DataFrame,
    transform: Union[alt.Transform, List[alt.Transform]],
    inplace: bool = False,
) -> pd.DataFrame:
    """Apply transform or transforms to dataframe.

    Parameters
    ----------
    df : pd.DataFrame
    transform : list|dict
        A transform specification or list of transform specifications.
        Each specification must be valid according to Altair's transform
        schema.
    inplace : bool
        If True, then dataframe may be modified in-place. Default: False.

    Returns
    -------
    df_transformed : pd.DataFrame
        The transformed dataframe.

    Example
    -------
    >>> import pandas as pd
    >>> data = pd.DataFrame({'x': range(5), 'y': list('ABCAB')})
    >>> chart = alt.Chart(data).transform_aggregate(sum_x='sum(x)', groupby=['y'])
    >>> apply(data, chart.transform)
       y  sum_x
    0  A      3
    1  B      5
    2  C      2
    """
    if not inplace:
        df = df.copy()
    if transform is alt.Undefined:
        return df
    return visit(transform, df)

In [14]:
def extract_data(
    chart: alt.Chart, apply_encoding_transforms: bool = True
) -> pd.DataFrame:
    """Extract transformed data from a chart.

    This only works with data and transform defined at the
    top level of the chart.

    Parameters
    ----------
    chart : alt.Chart
        The chart instance from which the data and transform
        will be extracted
    apply_encoding_transforms : bool
        If True (default), then apply transforms specified within an
        encoding as well as those specified directly in the transforms
        attribute.

    Returns
    -------
    df_transformed : pd.DataFrame
        The extracted and transformed dataframe.

    Example
    -------
    >>> import pandas as pd
    >>> data = pd.DataFrame({'x': range(5), 'y': list('ABCAB')})
    >>> chart = alt.Chart(data).mark_bar().encode(x='sum(x)', y='y')
    >>> extract_data(chart)
       y  sum_x
    0  A      3
    1  B      5
    2  C      2
    """
    if apply_encoding_transforms:
        chart = extract_transform(chart)
    return apply(to_dataframe(chart.data, chart), chart.transform)

In [15]:
data = pd.DataFrame({"x": range(5), "y": list("ABCAB")})

In [16]:
chart = alt.Chart(data).mark_bar().encode(x="sum(x)", y="y")

In [17]:
chart

In [18]:
extract_data(chart)

Unnamed: 0,y,sum_x
0,A,3
1,B,5
2,C,2


### Monthly aggregated service hours by day_type, time_of_day
* Don't see day_type like weekend or weekday?
* Is this scheduled and/or RT?

In [19]:
def tag_weekend(df: pd.DataFrame) -> pd.DataFrame:
    # Function to determine if a date is a weekend day or a weekday
    def is_weekend(date):
        if date.weekday() < 5:
            return "Weekday"
        else:
            return "Weekend"

    # Apply the function to each value in the "service_date" column
    df["day_type"] = df["service_date"].apply(is_weekend)

    return df

In [20]:
df = tag_weekend(df)

In [21]:
# calitp-analytics-data/data-analyses/rt_vs_schedule/digest
df[["service_date", "day_type"]].sample(3)

Unnamed: 0,service_date,day_type
105894,2023-08-15,Weekday
106537,2023-04-12,Weekday
15404,2023-12-13,Weekday


In [22]:
df.day_type.value_counts()

Weekday    1913
Name: day_type, dtype: int64

In [23]:
df["month_year"] = df.service_date.dt.strftime("%m/%Y")

In [24]:
df.sample()

Unnamed: 0,schedule_gtfs_dataset_key,direction_id,time_period,avg_scheduled_service_minutes,avg_stop_miles,n_scheduled_trips,frequency,road_freq_category,road_typology,pct_typology,service_date,minutes_atleast1_vp,minutes_atleast2_vp,rt_service_minutes,scheduled_service_minutes,total_vp,vp_in_shape,n_trips,vp_per_minute,pct_in_shape,pct_rt_journey_vp,pct_rt_journey_atleast2_vp,pct_sched_journey_atleast1_vp,pct_sched_journey_atleast2_vp,n_vp_trips,pct_rt_journey_atleast1_vp,sched_rt_category,speed_mph,name,route_id,route_combined_name,base64_url,organization_source_record_id,organization_name,caltrans_district,day_type,month_year
131554,239f3baf3dd3b9e9464f66a777f9897d,1.0,offpeak,,,,,,,,2023-12-13,264.0,63.0,320.7,291.0,330.0,330.0,9.0,1.03,1.0,0.82,0.2,0.91,0.22,,,vp_only,25.66,SBMTD Schedule,24X,24X UCSB Express,aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZmVlZC56aXA=,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,05 - San Luis Obispo,Weekday,12/2023


In [25]:
monthly_service_routes = (
    df.groupby(
        [
            "caltrans_district",
            "month_year",
            "organization_source_record_id",
            "organization_name",
            "route_combined_name",
            "day_type",
            "time_period",
        ]
    )
    .agg({"rt_service_minutes": "mean", "scheduled_service_minutes": "mean"})
    .reset_index()
)

In [26]:
monthly_service_routes.shape

(994, 9)

In [27]:
monthly_service_routes["mean_rt_service_hrs"] = (
    monthly_service_routes.rt_service_minutes / 60
)

In [28]:
monthly_service_routes["mean_scheduled_service_hrs"] = (
    monthly_service_routes.scheduled_service_minutes / 60
)

In [29]:
monthly_service_routes.shape

(994, 11)

In [30]:
monthly_service_routes.loc[
    (monthly_service_routes.route_combined_name == "24X UCSB Express")
    & (monthly_service_routes.month_year == "01/2024")
]

Unnamed: 0,caltrans_district,month_year,organization_source_record_id,organization_name,route_combined_name,day_type,time_period,rt_service_minutes,scheduled_service_minutes,mean_rt_service_hrs,mean_scheduled_service_hrs
37,05 - San Luis Obispo,01/2024,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,24X UCSB Express,Weekday,all_day,847.2,812.5,14.12,13.54
38,05 - San Luis Obispo,01/2024,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,24X UCSB Express,Weekday,offpeak,431.86,426.0,7.2,7.1
39,05 - San Luis Obispo,01/2024,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,24X UCSB Express,Weekday,peak,415.33,386.5,6.92,6.44


#### Monthly without Routes

In [33]:
monthly_service = (
    df.groupby(
        [
            "caltrans_district",
            "month_year",
            "organization_source_record_id",
            "organization_name",
            "day_type",
            "time_period",
        ]
    )
    .agg({"rt_service_minutes": "mean", "scheduled_service_minutes": "mean"})
    .reset_index()
)

In [34]:
monthly_service["mean_rt_service_hrs"] = monthly_service.rt_service_minutes / 60

In [35]:
monthly_service["mean_scheduled_service_hrs"] = (
    monthly_service.scheduled_service_minutes / 60
)

In [36]:
monthly_service.shape

(36, 10)

In [37]:
monthly_service.loc[(monthly_service.month_year == "01/2024")]

Unnamed: 0,caltrans_district,month_year,organization_source_record_id,organization_name,day_type,time_period,rt_service_minutes,scheduled_service_minutes,mean_rt_service_hrs,mean_scheduled_service_hrs
0,05 - San Luis Obispo,01/2024,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,Weekday,all_day,509.62,493.73,8.49,8.23
1,05 - San Luis Obispo,01/2024,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,Weekday,offpeak,383.34,375.38,6.39,6.26
2,05 - San Luis Obispo,01/2024,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,Weekday,peak,273.23,262.25,4.55,4.37


#### How come there are only weekdays?

In [38]:
monthly_service.day_type.value_counts()

Weekday    36
Name: day_type, dtype: int64

In [39]:
base = (
    alt.Chart(monthly_service)
    .mark_bar()
    .encode(x="month_year", y="mean_rt_service_hrs", color="day_type:N")
    .properties(width=160, height=160)
)

In [40]:
base

In [41]:
chart = alt.hconcat()

In [42]:
for time_period in monthly_service.time_period.unique():
    chart |= base.transform_filter(alt.datum.time_period == time_period)
chart

In [43]:
ruler = (
    alt.Chart(monthly_service)
    .mark_rule(color="red", strokeDash=[10, 7])
    .encode(y="mean(mean_rt_service_hrs):Q")
)

In [44]:
monthly_service.columns

Index(['caltrans_district', 'month_year', 'organization_source_record_id',
       'organization_name', 'day_type', 'time_period', 'rt_service_minutes',
       'scheduled_service_minutes', 'mean_rt_service_hrs',
       'mean_scheduled_service_hrs'],
      dtype='object')

In [45]:
faceted_chart = (
    alt.Chart(monthly_service)
    .mark_bar()
    .encode(
        x=alt.X("month_year", title="Date"),
        y=alt.Y(
            "mean_rt_service_hrs",
            title="mean_rt_service_hrs",
            scale=alt.Scale(domain=[0, monthly_service.mean_rt_service_hrs.max()]),
        ),
        color=alt.Color(
            "time_period",
            title="time_period",
            scale=alt.Scale(range=cp.CALITP_SEQUENTIAL_COLORS),
        ),
        tooltip=[
            "caltrans_district",
            "month_year",
            "organization_source_record_id",
            "organization_name",
            "day_type",
            "time_period",
            "rt_service_minutes",
            "scheduled_service_minutes",
            "mean_rt_service_hrs",
            "mean_scheduled_service_hrs",
        ],
    )
    .facet(
        column=alt.Column("time_period", title="time_period"),
    )
    .interactive()
).properties(title="test")

In [46]:
faceted_chart

In [47]:
bar_chart = (
    alt.Chart(monthly_service)
    .mark_bar()
    .encode(
        x=alt.X("month_year"),
        xOffset="time_period:N",
        y=alt.Y("mean_rt_service_hrs:Q"),
        color=alt.Color(
            "time_period:N", scale=alt.Scale(range=cp.CALITP_SEQUENTIAL_COLORS)
        ),
    )
)

In [48]:
bar_chart + ruler

#### Ruler not working

In [49]:
def fake_faceted_chart(df: pd.DataFrame, column_to_facet: str, ruler_column: str):

    ruler = (
        alt.Chart(monthly_service)
        .mark_rule(color="red", strokeDash=[10, 7])
        .encode(y=f"mean({ruler_column}):Q")
    )

    unique_values = list(df[column_to_facet].unique())

    for i in unique_values:
        df2 = df.loc[df[column_to_facet] == i]
        bar_chart = (
            alt.Chart(df2)
            .mark_bar()
            .encode(
                x=alt.X("month_year"),
                xOffset="time_period:N",
                y=alt.Y("mean_rt_service_hrs:Q"),
                color=alt.Color(
                    "mean_rt_service_hrs:Q",
                    scale=alt.Scale(range=cp.CALITP_SEQUENTIAL_COLORS),
                ),
            )
            .properties(title=i)
        )

        display(bar_chart + ruler)

In [50]:
fake_faceted_chart(monthly_service, "time_period", "mean_rt_service_hrs")

### Monthly Trends
* https://posit-dev.github.io/great-tables/articles/intro.html

In [51]:
by_date_category = (
    pd.crosstab(
        df.service_date,
        df.sched_rt_category,
        values=df.n_scheduled_trips,
        aggfunc="sum",
    )
    .reset_index()
    .fillna(0)
)

In [52]:
by_date_category = reverse_snakecase(by_date_category)

In [53]:
by_date_category.columns

Index(['Service Date', 'Schedule Only', 'Vp Only', 'Schedule And Vp'], dtype='object', name='sched_rt_category')

In [54]:
(
    gt.GT(by_date_category, rowname_col="Service Date")
    .tab_spanner(
        label="Daily Trips by GTFS Availability",
        columns=["Schedule Only", "Vp Only", "Schedule And Vp"],
    )
    .fmt_integer(["Schedule Only", "Vp Only", "Schedule And Vp"])
    .tab_options(container_width="100%")
    .tab_options(table_font_size="12px")
)

0,1,2,3
2023-03-15,4,0,1672
2023-04-12,3,0,1673
2023-05-17,18,0,1658
2023-06-14,2,0,1604
2023-07-12,0,0,1598
2023-08-15,0,0,1612
2023-09-13,14,0,1684
2023-10-11,12,0,1716
2023-12-13,18,0,1710
2024-01-17,14,0,1702


In [55]:
route_categories = (
    df[df.time_period == "all_day"]
    .groupby("sched_rt_category")
    .agg({"route_combined_name": "nunique"})
    .reset_index()
)

In [56]:
route_categories.sched_rt_category = route_categories.sched_rt_category.str.replace(
    "_", " "
).str.title()

In [57]:
# Test with nanographs

#### GTFS Availability
* Change Category values to something more understandable?

In [70]:
(
    gt.GT(data=route_categories.dropna())
    .fmt_integer(columns=["route_combined_name"], compact=True)
    .cols_label(route_combined_name="# Routes", sched_rt_category="Category")
    .tab_options(container_width="100%")
    .tab_header(
        title=md("Routes with GTFS Availability"),
    )
    .tab_options(table_font_size="12px")
).data_color(
    palette=[
        "#2EA8CE",
        "#EB9F3C",
        "#F4D837",
    ],
    na_color="#FFE4C4",
)

Routes with GTFS Availability,Routes with GTFS Availability.1
Schedule Only,16
Vp Only,3
Schedule And Vp,35
Category,# Routes


#### Route Stats

In [59]:
route_merge_cols = ["route_combined_name", "direction_id"]

all_day_stats = df[
    (df.service_date == most_recent_date) & (df.time_period == "all_day")
][
    route_merge_cols
    + [
        "avg_scheduled_service_minutes",
        "avg_stop_miles",
        "n_scheduled_trips",
        "sched_rt_category",
    ]
]

In [60]:
all_day_stats.head(2)

Unnamed: 0,route_combined_name,direction_id,avg_scheduled_service_minutes,avg_stop_miles,n_scheduled_trips,sched_rt_category
14849,1 West Santa Barbara,0.0,15.0,0.18,54.0,schedule_and_vp
14867,1 West Santa Barbara,1.0,20.0,0.19,55.0,schedule_and_vp


In [61]:
peak_stats = df[(df.service_date == most_recent_date) & (df.time_period == "peak")][
    route_merge_cols + ["speed_mph", "n_scheduled_trips", "frequency"]
].rename(
    columns={
        "speed_mph": "peak_avg_speed",
        "n_scheduled_trips": "peak_scheduled_trips",
        "frequency": "peak_hourly_freq",
    }
)

In [62]:
peak_stats.head(2)

Unnamed: 0,route_combined_name,direction_id,peak_avg_speed,peak_scheduled_trips,peak_hourly_freq
14861,1 West Santa Barbara,0.0,9.14,27.0,3.38
14879,1 West Santa Barbara,1.0,8.68,28.0,3.5


In [63]:
offpeak_stats = df[
    (df.service_date == most_recent_date) & (df.time_period == "offpeak")
][route_merge_cols + ["speed_mph", "n_scheduled_trips", "frequency"]].rename(
    columns={
        "speed_mph": "offpeak_avg_speed",
        "n_scheduled_trips": "offpeak_scheduled_trips",
        "frequency": "offpeak_hourly_freq",
    }
)

In [64]:
offpeak_stats.head(2)

Unnamed: 0,route_combined_name,direction_id,offpeak_avg_speed,offpeak_scheduled_trips,offpeak_hourly_freq
14855,1 West Santa Barbara,0.0,9.6,27.0,1.69
14873,1 West Santa Barbara,1.0,10.52,27.0,1.69


In [65]:
table_df = (
    pd.merge(
        all_day_stats,
        peak_stats,
        on=route_merge_cols,
    )
    .merge(offpeak_stats, on=route_merge_cols)
    .sort_values(["route_combined_name", "direction_id"])
    .reset_index(drop=True)
)

In [66]:
table_df.columns

Index(['route_combined_name', 'direction_id', 'avg_scheduled_service_minutes',
       'avg_stop_miles', 'n_scheduled_trips', 'sched_rt_category',
       'peak_avg_speed', 'peak_scheduled_trips', 'peak_hourly_freq',
       'offpeak_avg_speed', 'offpeak_scheduled_trips', 'offpeak_hourly_freq'],
      dtype='object')

In [67]:
table_df.sample()

Unnamed: 0,route_combined_name,direction_id,avg_scheduled_service_minutes,avg_stop_miles,n_scheduled_trips,sched_rt_category,peak_avg_speed,peak_scheduled_trips,peak_hourly_freq,offpeak_avg_speed,offpeak_scheduled_trips,offpeak_hourly_freq
12,19X Carp SBCC Express,0.0,37.33,0.53,3.0,schedule_and_vp,22.55,2.0,0.25,23.37,1.0,0.06


In [68]:
numeric_cols = table_df.select_dtypes(include="number").columns
table_df[numeric_cols] = table_df[numeric_cols].fillna(0)

In [86]:
"""(
    gt.GT(data=table_df)
    .fmt_integer(
        columns=[
            "peak_scheduled_trips",
            "offpeak_scheduled_trips",
            "n_scheduled_trips",
        ],
        compact=True,
    )
    .fmt_number(
        columns=[
            "avg_scheduled_service_minutes",
            "avg_stop_miles",
            "n_scheduled_trips",
            "peak_avg_speed",
            "peak_scheduled_trips",
            "peak_hourly_freq",
            "offpeak_avg_speed",
            "offpeak_scheduled_trips",
            "offpeak_hourly_freq",
        ],
        decimals=2,
        compact=True,
        sep_mark=",",
    )
    .cols_hide(["sched_rt_category"])
    .cols_label(
        route_combined_name="Route",
        # sched_rt_category = "GTFS Category",
        avg_scheduled_service_minutes="Avg Scheduled Service (min)",
        avg_stop_miles="Avg Stop Distance (meters)",
        n_scheduled_trips="Daily Scheduled Trips",
        peak_scheduled_trips="Scheduled Peak Trips",
        offpeak_scheduled_trips="Scheduled Offpeak Trips",
        direction_id="Direction",
        peak_avg_speed="Peak Avg Speed (mph)",
        offpeak_avg_speed="Offpeak Avg Speed (mph)",
        peak_hourly_freq="Peak Hourly Freq",
        offpeak_hourly_freq="Offpeak Hourly Freq",
    )
    .tab_options(container_width="100%")
    .tab_header(title="Route Stats", subtitle="Testing")
    .tab_options(table_font_size="12px")
)"""

'(\n    gt.GT(data=table_df)\n    .fmt_integer(\n        columns=[\n            "peak_scheduled_trips",\n            "offpeak_scheduled_trips",\n            "n_scheduled_trips",\n        ],\n        compact=True,\n    )\n    .fmt_number(\n        columns=[\n            "avg_scheduled_service_minutes",\n            "avg_stop_miles",\n            "n_scheduled_trips",\n            "peak_avg_speed",\n            "peak_scheduled_trips",\n            "peak_hourly_freq",\n            "offpeak_avg_speed",\n            "offpeak_scheduled_trips",\n            "offpeak_hourly_freq",\n        ],\n        decimals=2,\n        compact=True,\n        sep_mark=",",\n    )\n    .cols_hide(["sched_rt_category"])\n    .cols_label(\n        route_combined_name="Route",\n        # sched_rt_category = "GTFS Category",\n        avg_scheduled_service_minutes="Avg Scheduled Service (min)",\n        avg_stop_miles="Avg Stop Distance (meters)",\n        n_scheduled_trips="Daily Scheduled Trips",\n        peak_sc

### Updating Already Made Charts

In [71]:
sched_df = df[df.sched_rt_category != "vp_only"]
vp_df = df[df.sched_rt_category != "schedule_only"]

sched_service_chart = sched_df[sched_df.time_period == "all_day"]

In [72]:
sched_service_chart.sample()

Unnamed: 0,schedule_gtfs_dataset_key,direction_id,time_period,avg_scheduled_service_minutes,avg_stop_miles,n_scheduled_trips,frequency,road_freq_category,road_typology,pct_typology,service_date,minutes_atleast1_vp,minutes_atleast2_vp,rt_service_minutes,scheduled_service_minutes,total_vp,vp_in_shape,n_trips,vp_per_minute,pct_in_shape,pct_rt_journey_vp,pct_rt_journey_atleast2_vp,pct_sched_journey_atleast1_vp,pct_sched_journey_atleast2_vp,n_vp_trips,pct_rt_journey_atleast1_vp,sched_rt_category,speed_mph,name,route_id,route_combined_name,base64_url,organization_source_record_id,organization_name,caltrans_district,day_type,month_year
15040,239f3baf3dd3b9e9464f66a777f9897d,0.0,all_day,20.51,0.17,59.0,2.46,very_high,downtown_local,0.56,2024-02-14,1123.0,383.0,1194.87,1008.0,1536.0,1536.0,48.0,1.29,1.0,0.94,0.32,1.0,0.38,,,schedule_and_vp,10.49,SBMTD Schedule,2,2 East Santa Barbara,aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZmVlZC56aXA=,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,05 - San Luis Obispo,Weekday,02/2024


In [78]:
sched_service_chart.direction_id.value_counts()

1.00    355
0.00    316
Name: direction_id, dtype: int64

In [85]:
(
    alt.Chart(sched_service_chart)
    .mark_bar()
    .encode(
        x=alt.X("month_year"),
        xOffset="time_period:N",
        y=alt.Y("avg_scheduled_service_minutes:Q"),
        color=alt.Color(
            "time_period:N", scale=alt.Scale(range=cp.CALITP_SEQUENTIAL_COLORS)
        ),
    )
).facet(column=alt.Column("direction_id:N", title="direction_id")).interactive()

In [90]:
facet_test2 = (
    alt.Chart(sched_df)
    .mark_bar()
    .encode(
        x=alt.X("month_year"),
        xOffset="time_period:N",
        y=alt.Y("n_scheduled_trips:Q"),
        color=alt.Color(
            "time_period:N", scale=alt.Scale(range=cp.CALITP_SEQUENTIAL_COLORS)
        ),
    )
).facet(column=alt.Column("direction_id:N", title="direction_id")).interactive()