In [1]:
import _report_visuals_utils
import altair as alt
import pandas as pd
import viz_data_prep
from omegaconf import OmegaConf
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS

readable_dict = OmegaConf.load("readable2.yml")

In [2]:
from omegaconf import OmegaConf

readable_dict = OmegaConf.load("readable2.yml")

In [3]:
FILE = GTFS_DATA_DICT.digest_tables.route_schedule_vp

# some of the portfolio grain can be dealt with
# but separate out the renaming/replacing/subsetting to separate script

df = pd.read_parquet(
    f"{RT_SCHED_GCS}{FILE}.parquet",
    filters=[
        [
            ("portfolio_organization_name", "==", "City of West Hollywood"),
            (
                "recent_combined_name",
                "in",
                ["Cityline Local-East", "Cityline Local-West"],
            ),
        ]
    ],
).pipe(
    viz_data_prep.data_wrangling_for_visualizing,
    viz_data_prep.route_direction_cols_for_viz,
    viz_data_prep.readable_col_names,
)

In [4]:
df.head(2)

Unnamed: 0,Direction (0/1),Period,Average Scheduled Service (trip minutes),# Scheduled Trips,# Realtime Trips,Date,Route,Direction,# Minutes with 1+ VP per Minute,# Minutes with 2+ VP per Minute,...,Average VP per Minute,% VP within Scheduled Shape,% Scheduled Trip w/ 1+ VP/Minute,% Scheduled Trip w/ 2+ VP/Minute,Realtime versus Scheduled Service Ratio,Speed (MPH),Portfolio Organization Name,Headway (Minutes),GTFS Availability,Average Stop Distance (Miles)
0,0,All Day,52.73,15,11,2023-03-15,Cityline Local-East,Eastbound,455,249,...,1.19,95.0,78.2,42.8,1.14,8.82,City of West Hollywood,96.77,schedule_and_vp,0.45
1,0,Offpeak,52.73,9,7,2023-03-15,Cityline Local-East,Eastbound,252,145,...,1.21,98.0,66.3,38.2,0.98,9.44,City of West Hollywood,107.14,schedule_and_vp,0.45


In [5]:
from IPython.display import HTML, display

# Set drop down menu to be on the upper right for the charts
display(
    HTML(
        """
<style>
form.vega-bindings {
  position: absolute;
  right: 0px;
  top: 0px;
}
</style>
"""
    )
)

In [7]:
data_quality = _report_visuals_utils.divider_chart(
    df, readable_dict.data_quality_graph.title
)

In [9]:
def route_filter(df):
    routes_list = df["Route"].unique().tolist()

    route_dropdown = alt.binding_select(
        options=routes_list,
        name="Routes: ",
    )
    # Column that controls the bar charts
    xcol_param = alt.selection_point(
        fields=["Route"], value=routes_list[0], bind=route_dropdown
    )

    # Charts
    spatial_accuracy = (
        _report_visuals_utils.sample_spatial_accuracy_chart(df[df.Period == "All Day"])
        .add_params(xcol_param)
        .transform_filter(xcol_param)
    )

    avg_scheduled_min = (
        _report_visuals_utils.sample_avg_scheduled_min_chart(df[df.Period == "All Day"])
        .add_params(xcol_param)
        .transform_filter(xcol_param)
    )

    vp_per_minute = (
        _report_visuals_utils.vp_per_minute_chart(df[df.Period == "All Day"])
        .add_params(xcol_param)
        .transform_filter(xcol_param)
    )

    speed = (
        _report_visuals_utils.speed_chart(df)
        .add_params(xcol_param)
        .transform_filter(xcol_param)
    )

    sched_vp_per_min = (
        _report_visuals_utils.sched_vp_per_min_chart(df[df.Period == "All Day"])
        .add_params(xcol_param)
        .transform_filter(xcol_param)
    )

    # Divider Charts
    data_quality = _report_visuals_utils.divider_chart(
        df, readable_dict.data_quality_graph.title
    )
    rider_quality = _report_visuals_utils.divider_chart(
        df, readable_dict.rider_quality_graph.title
    )
    summary = _report_visuals_utils.divider_chart(df, readable_dict.summary_graph.title)

    chart_list = [
        summary,
        rider_quality,
        avg_scheduled_min,
        spatial_accuracy,
        data_quality,
        vp_per_minute,
        speed,
        sched_vp_per_min,
    ]
    chart = alt.vconcat(*chart_list)

    return chart

In [10]:
route_filter(df)

## VP per Minute

In [None]:
def vp_per_minute_chart(df: pd.DataFrame) -> alt.Chart:
    specific_chart_dict = readable_dict.vp_per_min_graph
    ruler = _report_visuals_utils.ruler_chart(df, 3)

    bar = _report_visuals_utils.bar_chart(
        x_col="Date",
        y_col="Average VP per Minute",
        color_col="Average VP per Minute",
        color_scheme=[*specific_chart_dict.colors],
        tooltip_cols=[*specific_chart_dict.tooltip],
        date_format="%b %Y",
    )

    # write this way so that the df is inherited by .facet
    chart = alt.layer(bar, ruler, data=df).properties(width=200, height=250)
    chart = chart.facet(
        column=alt.Column(
            "Direction:N",
        )
    ).properties(
        title={
            "text": specific_chart_dict.title,
            "subtitle": specific_chart_dict.subtitle,
        }
    )
    return chart

In [None]:
vp_per_minute_chart(df[df.Period == "All Day"])

## Text Tables

In [None]:
def reshape_route_stats(df: pd.DataFrame) -> pd.DataFrame:
    """
    Find overall statistics for a route.
    This dataframe backs the last two text table charts.
    """
    most_recent_date = df["Date"].max()
    route_merge_cols = ["Route", "Direction", "Direction (0/1)"]

    # Filter out for the most recent date.
    # Create 3 separate dataframes for all day, peak, and offpeak.
    all_day_stats = df[(df["Date"] == most_recent_date) & (df["Period"] == "All Day")][
        route_merge_cols
        + [
            "Average Scheduled Service (trip minutes)",
            "Average Stop Distance (Miles)",
            "# Scheduled Trips",
            "GTFS Availability",
        ]
    ]

    peak_stats = df[(df["Date"] == most_recent_date) & (df["Period"] == "Peak")][
        route_merge_cols + ["Speed (MPH)", "# Scheduled Trips", "Headway (Minutes)"]
    ].rename(
        columns={
            "Speed (MPH)": "Peak Avg Speed (MPH)",
            "# Scheduled Trips": "peak_scheduled_trips",
            "Headway (Minutes)": "Peak Headway (Minutes)",
        }
    )

    offpeak_stats = df[(df["Date"] == most_recent_date) & (df["Period"] == "Offpeak")][
        route_merge_cols + ["Speed (MPH)", "# Scheduled Trips", "Headway (Minutes)"]
    ].rename(
        columns={
            "Speed (MPH)": "Offpeak Avg Speed (MPH)",
            "# Scheduled Trips": "offpeak_scheduled_trips",
            "Headway (Minutes)": "Offpeak Headway (Minutes)",
        }
    )

    table_df = (
        pd.merge(all_day_stats, peak_stats, on=route_merge_cols, how="outer")
        .merge(offpeak_stats, on=route_merge_cols, how="outer")
        .sort_values(["Route", "Direction"])
        .reset_index(drop=True)
    )

    # Fill nans
    numeric_cols = table_df.select_dtypes(include="number").columns
    table_df[numeric_cols] = table_df[numeric_cols].fillna(0)

    # Clean up column names
    table_df.columns = table_df.columns.str.title().str.replace("_", " ")

    # Add back date
    table_df["Date"] = most_recent_date
    return table_df

In [None]:
route_stats_df = reshape_route_stats(df)

In [None]:
route_stats_df.columns

In [None]:
def reshape_df_text_table(df: pd.DataFrame) -> pd.DataFrame:

    # Create the dataframe first
    route_stats_df = reshape_route_stats(df)

    # Reshape dataframe before plotting
    df2 = route_stats_df.melt(
        id_vars=[
            "Date",
            "Route",
            "Direction",
            "Direction (0/1)",
        ],
        value_vars=[
            "Average Scheduled Service (Trip Minutes)",
            "Average Stop Distance (Miles)",
            "# Scheduled Trips",
            "Gtfs Availability",
            "Peak Avg Speed (Mph)",
            "Peak Scheduled Trips",
            "Peak Headway (Minutes)",
            "Offpeak Avg Speed (Mph)",
            "Offpeak Scheduled Trips",
            "Offpeak Headway (Minutes)",
        ],
    )

    # Create a decoy column so all the text will be centered.
    df2["Zero"] = 0

    # Combine columns so the column title and variable will be aligned.
    # Ex: "Trips Per Hour: 0.56". This column is what will show up on the
    # graphs.
    df2["combo_col"] = df2.variable.astype(str) + ": " + df2.value.astype(str)

    # Clean up
    df2.combo_col = df2.combo_col.str.replace(
        "schedule_and_vp",
        "Schedule and Realtime Data",
    ).str.replace("Gtfs", "GTFS")

    return df2

In [None]:
text_table_df = reshape_df_text_table(df)

In [None]:
text_table_df.head(1)

In [None]:
def text_chart(df: pd.DataFrame) -> alt.Chart:

    specific_chart_dict = readable_dict.text_graph

    # Reshape df for text table
    text_table_df = reshape_df_text_table(df)

    chart = _report_visuals_utils.text_table(text_table_df)

    # write this way so that the df is inherited by .facet
    chart = _report_visuals_utils.configure_chart(
        chart,
        width=400,
        height=250,
        title=specific_chart_dict.title,
        subtitle=specific_chart_dict.subtitle,
    )
    return chart

In [None]:
text_chart(df.loc[df["Direction (0/1)"] == 0])

## Timeliness

In [None]:
def reshape_timeliness_trips(df: pd.DataFrame) -> pd.DataFrame:
    """
    Reshape dataframe for the charts that illustrate
    how timely a route's trips are.
    """
    melted_df = df.melt(
        id_vars=[
            "Date",
            "Portfolio Organization Name",
            "Route",
            "Period",
            "Direction",
            "Direction (0/1)",
            "# Realtime Trips",
        ],
        value_vars=[
            "# Early Arrival Trips",
            "# On-Time Trips",
            "# Late Trips",
        ],
    )

    melted_df["Percentage"] = (melted_df.value / melted_df["# Realtime Trips"]) * 100

    return melted_df

In [None]:
def timeliness_chart(df) -> alt.Chart:

    # Reshape dataframe from wide to long
    df2 = reshape_timeliness_trips(df)

    specific_chart_dict = readable_dict.timeliness_trips_graph

    chart = _report_visuals_utils.line_chart(
        df=df2,
        x_col="Date",
        y_col="Percentage",
        color_col="variable",
        color_scheme=[*specific_chart_dict.colors],
        tooltip_cols=[*specific_chart_dict.tooltip],
    ).properties(width=200, height=250)

    chart = chart.facet(
        column=alt.Column(
            "Direction:N",
        )
    ).properties(
        title={
            "text": specific_chart_dict.title,
            "subtitle": specific_chart_dict.subtitle,
        }
    )
    return chart

In [None]:
timeliness_chart(df[df.Period == "All Day"])

## Total Scheduled Trips

In [None]:
def total_scheduled_trips_chart(df: pd.DataFrame) -> alt.Chart:
    specific_chart_dict = readable_dict.n_scheduled_graph

    chart = _report_visuals_utils.bar_chart(
        x_col="Date:T",
        y_col="# Scheduled Trips",
        color_col="Period:N",
        color_scheme=[*specific_chart_dict.colors],
        tooltip_cols=[*specific_chart_dict.tooltip],
        date_format="%b %Y",
    )

    chart = alt.layer(chart, data=df)

    # write this way so that the df is inherited by .facet
    chart = _report_visuals_utils.configure_chart(
        chart,
        width=400,
        height=250,
        title=specific_chart_dict.title,
        subtitle=specific_chart_dict.subtitle,
    )
    return chart

In [None]:
total_scheduled_trips_chart(
    df[(df["Direction (0/1)"] == 0) & (df["Period"] != "All Day")]
)

## Frequency

In [None]:
def headway_chart(df: pd.DataFrame) -> alt.Chart:

    specific_chart_dict = readable_dict.frequency_graph

    chart = _report_visuals_utils.bar_chart(
        x_col="Date:T",
        y_col="Headway (Minutes)",
        color_col="Headway (Minutes):N",
        color_scheme=[*specific_chart_dict.colors],
        tooltip_cols=[*specific_chart_dict.tooltip],
        date_format="%b %Y",
    )

    chart = (
        alt.layer(chart, data=df)
        .encode(y=alt.Y("Headway (Minutes)", scale=alt.Scale(domain=[0, 250])))
        .properties(width=200, height=250)
    )

    chart = chart.facet(
        column=alt.Column(
            "Direction:N",
        )
    ).properties(
        title={
            "text": specific_chart_dict.title,
            "subtitle": specific_chart_dict.subtitle,
        }
    )
    return chart

In [None]:
headway_chart(df[(df["Period"] == "Peak")])

## Speed MPH 

In [None]:
def speed_chart(df) -> alt.Chart:
    specific_chart_dict = readable_dict.speed_graph

    chart = _report_visuals_utils.line_chart(
        df=df,
        x_col="Date",
        y_col="Speed (MPH)",
        color_col="Period",
        color_scheme=[*specific_chart_dict.colors],
        tooltip_cols=[*specific_chart_dict.tooltip],
    ).properties(width=200, height=250)

    chart = chart.facet(
        column=alt.Column(
            "Direction:N",
        )
    ).properties(
        title={
            "text": specific_chart_dict.title,
            "subtitle": specific_chart_dict.subtitle,
        }
    )
    return chart

In [None]:
speed_chart(df)

## % of Scheduled Trip with 1+/2+ VPs

In [None]:
def reshape_pct_journey_with_vp(df: pd.DataFrame) -> pd.DataFrame:
    """
    Reshape the data for the charts that display the % of
    a journey that recorded 2+ vehicle positions/minute.
    """
    to_keep = [
        "Date",
        "Portfolio Organization Name",
        "Direction",
        "% Scheduled Trip w/ 1+ VP/Minute",
        "% Scheduled Trip w/ 2+ VP/Minute",
        "Route",
        "Period",
    ]
    df2 = df[to_keep]

    df3 = df2.melt(
        id_vars=[
            "Date",
            "Portfolio Organization Name",
            "Route",
            "Direction",
            "Period",
        ],
        value_vars=[
            "% Scheduled Trip w/ 1+ VP/Minute",
            "% Scheduled Trip w/ 2+ VP/Minute",
        ],
    )

    df3 = df3.rename(columns={"variable": "Category", "value": "% of Trip Duration"})

    return df3

In [None]:
def sched_vp_per_min_chart(df) -> alt.Chart:

    # Change df from wide to long
    pct_journey_with_vp_df = reshape_pct_journey_with_vp(df)
    specific_chart_dict = readable_dict.sched_vp_per_min_graph

    ruler = _report_visuals_utils.ruler_chart(pct_journey_with_vp_df, 100)

    circle = _report_visuals_utils.circle_chart(
        df=pct_journey_with_vp_df,
        x_col="Date",
        y_col="% of Trip Duration",
        color_col="Category",
        color_scheme=[*specific_chart_dict.colors],
        tooltip_cols=[*specific_chart_dict.tooltip],
    )

    chart = alt.layer(circle, ruler, data=pct_journey_with_vp_df).properties(
        width=200, height=250
    )

    chart = chart.facet(
        column=alt.Column(
            "Direction:N",
        )
    ).properties(
        title={
            "text": specific_chart_dict.title,
            "subtitle": specific_chart_dict.subtitle,
        }
    )
    return chart

In [None]:
sched_vp_per_min_chart(df[df.Period == "All Day"])