In [None]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import calitp_data_analysis.magics
import geopandas as gpd
import great_tables as gt
import pandas as pd

from IPython.display import display, HTML, Markdown
from great_tables import md

import make_charts
from segment_speed_utils.project_vars import RT_SCHED_GCS
from calitp_data_analysis import calitp_color_palette as cp
from shared_utils import rt_utils, rt_dates

alt.renderers.enable("html")
alt.data_transformers.enable('default', max_rows=None)

In [None]:
#name = "Avalon Schedule"

In [None]:
%%capture_parameters
name

In [None]:
df = pd.read_parquet(
    f"{RT_SCHED_GCS}digest/schedule_vp_metrics.parquet",
    filters = [[("name", "==", name), 
                ("sched_rt_category", "==", "schedule_and_vp")]]
)

most_recent_date = df.service_date.max()

In [None]:
RENAME_COLS = {
    "avg_sched_service_min": "Average Scheduled Service (min)",
    "speed_mph": "Average Route Speed",
    "n_trips": "Daily Trips",
    "direction_id": "Direction",
    "service_date": "Date",
    "route_combined_name": "Route"
}

def labeling(word: str, rename_dict: dict = RENAME_COLS) -> str:
    if word in rename_dict.keys():
        return rename_dict[word]
    else:
        return word.replace('_', ' ').title()

# {name}

## Monthly Trends 

In [None]:
route_categories = df[
    df.time_period=="all_day"
].groupby("sched_rt_category").agg(
    {"route_combined_name": "nunique"}
).reset_index()

(gt.GT(data=route_categories.dropna())
 .fmt_integer(columns = ["route_combined_name"], compact=True)
 .cols_label(
     route_combined_name = "# Routes",
     sched_rt_category = "Category"
 ).tab_options(container_width = "50%")
 .tab_header(
     title=md("Routes with GTFS Availability"),
 ).tab_options(
    table_font_size="12px"
 )
)

In [None]:
route_merge_cols = ["route_combined_name", "direction_id"]

all_day_stats = df[
    (df.service_date == most_recent_date) & 
    (df.time_period=="all_day")
][route_merge_cols + [
   "avg_sched_service_min", "avg_stop_meters", "n_trips",
   "sched_rt_category"
]]

peak_stats = df[
    (df.service_date == most_recent_date) & 
    (df.time_period=="peak")
][route_merge_cols + [
   "speed_mph", "n_trips", "frequency"]
 ].rename(columns = {
    "speed_mph": "peak_avg_speed",
    "n_trips": "peak_trips",
    "frequency": "peak_hourly_freq"
})

offpeak_stats = df[
    (df.service_date == most_recent_date) & 
    (df.time_period=="offpeak")
][route_merge_cols + [
   "speed_mph", "n_trips", "frequency"]
 ].rename(columns = {
    "speed_mph": "offpeak_avg_speed",
    "n_trips": "offpeak_trips",
    "frequency": "offpeak_hourly_freq"
})

In [None]:
table_df = pd.merge(
    all_day_stats,
    peak_stats,
    on = route_merge_cols,
).merge(
    offpeak_stats,
    on = route_merge_cols
).sort_values(
    ["route_combined_name", "direction_id"]
).reset_index(drop=True)

cols_to_fill = ["peak_trips", "offpeak_trips", "n_trips"]
table_df[cols_to_fill] = table_df[cols_to_fill].fillna(0).astype(int)

round_me = ["peak_avg_speed", "offpeak_avg_speed", 
            "avg_stop_meters", "avg_sched_service_min"]
table_df[round_me] = table_df[round_me].round(1)

In [None]:
(gt.GT(data=table_df)
 .fmt_integer(
     columns = ["peak_trips", "offpeak_trips", "n_trips"], 
     compact=True
 ).fmt_number(
     columns = ["avg_stop_meters", "avg_sched_service_min"],
     compact=True,
     sep_mark=","
 )
 .cols_label(
     route_combined_name = "Route",
     sched_rt_category = "GTFS Category",
     avg_sched_service_min = "Avg Scheduled Service (min)",
     avg_stop_meters = "Avg Stop Distance (meters)",
     n_trips = "Daily Trips",
     peak_trips = "Peak Trips",
     offpeak_trips = "Offpeak Trips",
     direction_id = "Direction",
     peak_avg_speed = "Peak Avg Speed (mph)",
     offpeak_avg_speed = "Offpeak Avg Speed (mph)",
     peak_hourly_freq = "Peak Hourly Freq",
     offpeak_hourly_freq = "Offpeak Hourly Freq",
 ).tab_options(container_width = "90%")
 .tab_header(
     title=md("Route Stats"),
 ).tab_options(
    table_font_size="12px"
 )
)

In [None]:
def base_route_chart(df: pd.DataFrame, y_col: str) -> alt.Chart:
    """
    """
    df = df.assign(
        time_period = df.time_period.str.replace('_', ' ').str.title()
    ).reset_index(drop=True)
    
    selected_colors = [
        cp.CALITP_CATEGORY_BOLD_COLORS[4], # blue
        cp.CALITP_CATEGORY_BRIGHT_COLORS[3], # green
        cp.CALITP_CATEGORY_BOLD_COLORS[1], # yellow,
    ]
    
    #https://stackoverflow.com/questions/26454649/python-round-up-to-the-nearest-ten
    max_y = round(df[y_col].max(), -1)
    
    chart = (
        alt.Chart(df)
        .mark_line()
        .encode(
             x = alt.X("yearmonthdate(service_date):O", title = "Date",
                       axis = alt.Axis(format = '%b %Y')
                      ),
             y = alt.Y(f"{y_col}:Q", title = labeling(y_col),
                       scale = alt.Scale(domain=[0, max_y])
                      ),
             color = alt.Color(
                 "time_period:N", title = labeling("time_period"),
                 scale = alt.Scale(range = selected_colors)
             ),
             tooltip = ["route_combined_name", "route_id", "direction_id", 
                        "time_period", y_col]
         ).facet(
             column = alt.Column("direction_id:N", title=labeling("direction_id")),
         ).interactive()
    ).properties(title = labeling(y_col))
    
    return chart

In [None]:
# https://stackoverflow.com/questions/62103632/altair-change-the-position-of-a-slider
display(
    HTML(
        """
        <style>
        form.vega-bindings {
            position: absolute;
            right: 0px;
            top: 0px;
            }
        </style>
        """
    )
)

def filtered_route_charts(
    df: pd.DataFrame,
    control_field: str,
) -> alt.Chart:
    """
    https://stackoverflow.com/questions/58919888/multiple-selections-in-altair
    """
    route_dropdown = alt.binding_select(
        options=sorted(df[control_field].unique().tolist()), 
        name='Routes ', 
    )
        
    # Column that controls the bar charts
    route_selector = alt.selection_point(
        fields=[control_field], 
        bind=route_dropdown,
    )
    
    sched_df = df[df.sched_rt_category != "vp_only"]
    vp_df = df[df.sched_rt_category != "schedule_only"]
    
    sched_service_chart = base_route_chart(
        sched_df[sched_df.time_period=="all_day"], "avg_sched_service_min"
    ).add_params(route_selector).transform_filter(route_selector)
   
    sched_trips_chart = base_route_chart(
        sched_df, "n_trips"
    ).add_params(route_selector).transform_filter(route_selector)

    sched_freq_chart = base_route_chart(
        sched_df, "frequency"
    ).add_params(route_selector).transform_filter(route_selector)

    speeds_chart = base_route_chart(
        vp_df, "speed_mph"
    ).add_params(route_selector).transform_filter(route_selector)
    
    
    chart_list = [
        sched_service_chart, sched_trips_chart,
        sched_freq_chart, speeds_chart
    ]
    
    chart = alt.vconcat(*chart_list).resolve_scale(y="independent")
    
    return chart

In [None]:
filtered_route_charts(df, "route_combined_name")

### Segment Speeds

In [None]:
selected_date = pd.to_datetime(rt_dates.DATES["nov2023"])

speeds = gpd.read_parquet(
    f"{RT_SCHED_GCS}digest/segment_speeds.parquet",
    filters = [[("name", "==", name), 
                ("service_date", "==", selected_date)]]
).drop(columns = "service_date").to_crs("EPSG:3310")

speeds = speeds.assign(
    geometry_arrowized = speeds.apply(
        lambda x: rt_utils.arrowize_segment(x.geometry), axis=1
    ).set_crs("EPSG:3310")
)

In [None]:
def make_map(gdf: gpd.GeoDataFrame, time_period: str):
    subset = (gdf[gdf.time_period==time_period]
              .set_geometry("geometry_arrowized")
              .drop(columns = "geometry")
              .dropna()
             )
    
    if len(subset) > 0:
        
        m = subset.explore(
            "p50_mph", tiles = "CartoDB Positron", 
            cmap = rt_utils.ZERO_THIRTY_COLORSCALE
        )
        display(m)
    else:
        display(Markdown("No speeds data to display"))

In [None]:
make_map(speeds, "offpeak")

In [None]:
make_map(speeds, "peak")