## Section 1: Operator Overview 
* Using the most recent month

In [58]:
import _report_utils
import _section1_utils as section1
import altair as alt
import calitp_data_analysis.magics
import geopandas as gpd
import great_tables as gt
import pandas as pd
from calitp_data_analysis import calitp_color_palette as cp
from calitp_data_analysis.sql import to_snakecase
from great_tables import md
from IPython.display import HTML, Markdown, display
from segment_speed_utils.project_vars import RT_SCHED_GCS
from shared_utils import catalog_utils, rt_dates, rt_utils

In [59]:
alt.renderers.enable("html")
alt.data_transformers.enable("default", max_rows=None)

DataTransformerRegistry.enable('default')

In [60]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [61]:
name = "Bay Area 511 AC Transit Schedule"
organization_name = "Alameda-Contra Costa Transit District"

In [62]:
selected_date = pd.to_datetime(rt_dates.DATES["mar2024"])
year = selected_date.year
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

In [63]:
import yaml

# Readable Dictionary
with open("readable.yml") as f:
    readable_dict = yaml.safe_load(f)

### Map

In [64]:
ac_transit_map = section1.load_operator_map(name)

In [65]:
ac_transit_map.shape

(132, 15)

In [66]:
def plot_route(route):
    filtered_gdf = gdf[gdf["Route"] == route]
    display(
        filtered_gdf.explore(
            column="Route",
            cmap="Spectral",
            tiles="CartoDB positron",
            width=500,
            height=300,
            style_kwds={"weight": 3},
            legend=False,
            tooltip=["Route", "Service Miles"],
        )
    )

### Operator Profiles

In [67]:
ac_transit_profile = section1.load_operator_profiles(organization_name)

In [68]:
ac_transit_profile

Unnamed: 0,schedule_gtfs_dataset_key,# Routes,# Trips,# Shapes,# Stops,# Arrivals,Operator Service Miles,Avg Arrivals per Stop,# Downtown Local Route Types,# Local Route Types,# Rapid Route Types,# Coverage Route Types,Transit Operator,Organization ID,Organization,Date
13,c499f905e33929a641f083dad55c521e,132,5478,346,4733,234582,1459.66,49.56,179,21,92,44,Bay Area 511 AC Transit Schedule,recOZgevYf7Jimm9L,Alameda-Contra Costa Transit District,2024-04-17


#### Original

In [69]:
op_profiles_url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.operator_profiles}.parquet"

op_profiles_df = pd.read_parquet(op_profiles_url)

In [70]:
op_profiles_df1 = op_profiles_df.sort_values(by=["service_date"], ascending=False)

In [71]:
op_profiles_df1.columns

Index(['schedule_gtfs_dataset_key', 'operator_n_routes', 'operator_n_trips',
       'operator_n_shapes', 'operator_n_stops', 'operator_n_arrivals',
       'operator_route_length_miles', 'operator_arrivals_per_stop',
       'n_downtown_local_routes', 'n_local_routes', 'n_rapid_routes',
       'n_coverage_routes', 'name', 'organization_source_record_id',
       'organization_name', 'service_date'],
      dtype='object')

In [72]:
op_profiles_df1.drop_duplicates(subset=["organization_name"])[
    ["service_date"]
].describe()

  op_profiles_df1.drop_duplicates(subset=["organization_name"])[


Unnamed: 0,service_date
count,171
unique,11
top,2024-04-17 00:00:00
freq,139
first,2023-03-15 00:00:00
last,2024-04-17 00:00:00


### Operators who don't have profile info from the most recent service_date but have stuff for sched_vp
* City of Torrance	
* Stanislaus Regional Transit Authority

In [73]:
op_profiles2 = op_profiles_df1.drop_duplicates(subset=["organization_name"])[
    ["organization_name", "service_date"]
]

In [74]:
# op_profiles2.loc[op_profiles2.service_date < '2024-04-17'].sort_values(by = ['organization_name'])

In [75]:
op_profiles_df1.loc[
    op_profiles_df1.organization_name.isin(
        ["City of Torrance", "Stanislaus Regional Transit Authority"]
    )
][["organization_name", "service_date"]]

Unnamed: 0,organization_name,service_date
1595,Stanislaus Regional Transit Authority,2024-03-13
348,City of Torrance,2024-02-14
1594,Stanislaus Regional Transit Authority,2024-01-17
347,City of Torrance,2024-01-17
1593,Stanislaus Regional Transit Authority,2023-12-13
346,City of Torrance,2023-12-13
345,City of Torrance,2023-11-15
1592,Stanislaus Regional Transit Authority,2023-11-15
344,City of Torrance,2023-10-11
318,Stanislaus Regional Transit Authority,2023-10-11


### Total Service
* Checking Eric's data.
* Big Blue Bus Schedule
* https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.fct_monthly_route_service_by_timeofday
* TTL_service_hours: Total scheduled service hours that occurred for the route for this month, day_type, and time_of_day.

In [76]:
og_big_blue_bus = section1.load_scheduled_service("Big Blue Bus Schedule")

In [77]:
og_big_blue_bus.head()

Unnamed: 0,key,name,schedule_source_record_id,route_id_x,route_short_name,route_long_name,time_of_day,month,year,day_type,n_trips,ttl_service_hours,day_name,schedule_gtfs_dataset_key,organization_source_record_id,organization_name,route_id_y,route_combined_name,full_date
1127552,131c820d577cabbc588e8ae37d93ef18,Big Blue Bus Schedule,recpN1dPaxhZvZQV0,3554,1,Main St & Santa Monica Blvd/UCLA,Evening,4,2023,5,72,47.07,Thursday,efbbd5293be71f7a5de0cf82b59febe1,recJHFDLpGRMIFgnL,City of Santa Monica,3705,1 Main St & Santa Monica Blvd/UCLA,2023-04
1127553,131c820d577cabbc588e8ae37d93ef18,Big Blue Bus Schedule,recpN1dPaxhZvZQV0,3554,1,Main St & Santa Monica Blvd/UCLA,Evening,4,2023,5,72,47.07,Thursday,dbbe8ee4864a2715a40749605395d584,recJHFDLpGRMIFgnL,City of Santa Monica,3629,1 Main St & Santa Monica Blvd/UCLA,2023-04
1127554,6ea1bdf6b8a09827b1e0ad8770621b1c,Big Blue Bus Schedule,recpN1dPaxhZvZQV0,3555,2,Wilshire Blvd/UCLA,AM Peak,4,2023,6,56,33.47,Friday,efbbd5293be71f7a5de0cf82b59febe1,recJHFDLpGRMIFgnL,City of Santa Monica,3706,2 Wilshire Blvd/UCLA,2023-04
1127555,6ea1bdf6b8a09827b1e0ad8770621b1c,Big Blue Bus Schedule,recpN1dPaxhZvZQV0,3555,2,Wilshire Blvd/UCLA,AM Peak,4,2023,6,56,33.47,Friday,dbbe8ee4864a2715a40749605395d584,recJHFDLpGRMIFgnL,City of Santa Monica,3630,2 Wilshire Blvd/UCLA,2023-04
1127556,99627bfbe7d4443880a2928196b92329,Big Blue Bus Schedule,recpN1dPaxhZvZQV0,3556,3,Lincoln Blvd/LAX,Evening,2,2023,5,76,56.2,Thursday,efbbd5293be71f7a5de0cf82b59febe1,recJHFDLpGRMIFgnL,City of Santa Monica,3707,3 Lincoln Blvd/LAX,2023-02


In [78]:
og_big_blue_bus.loc[
    (og_big_blue_bus.full_date == "2024-01") & (og_big_blue_bus.day_name == "Wednesday")
][["ttl_service_hours"]].sum()

ttl_service_hours   11129.77
dtype: float64

In [79]:
og_big_blue_bus_agg2 = (
    og_big_blue_bus.groupby(["name", "full_date", "time_of_day", "day_name"])
    .agg(
        {
            "ttl_service_hours": "sum",
        }
    )
    .reset_index()
)

In [80]:
og_big_blue_bus_agg.loc[
    (og_big_blue_bus_agg.full_date == "2024-01")
    & (og_big_blue_bus_agg.day_name == "Wednesday")
][["ttl_service_hours"]].sum()

ttl_service_hours   878.16
dtype: float64

In [81]:
og_big_blue_bus_agg2.loc[
    (og_big_blue_bus_agg2.full_date == "2024-01")
    & (og_big_blue_bus_agg2.day_name == "Wednesday")
][["ttl_service_hours"]].sum()

ttl_service_hours   11129.77
dtype: float64

* For example, adding up Big Blue Bus Wednesday service across times of day you get ~876, which is quite a bit lower than what the Reports site shows (1110). 

In [82]:
section1.single_bar_chart_dropdown(
    big_blue_bus,
    "Day of Week",
    "Total Service (hours)",
    "Time of Day",
    readable_dict["total_scheduled_hours"]["title"],
    "full_date",
    readable_dict["total_scheduled_hours"]["subtitle"],
)



#### Original: 2023 and 2024 in separate files

In [83]:
og_big_blue_bus[['day_type','day_name']].drop_duplicates()

Unnamed: 0,day_type,day_name
1127552,5,Thursday
1127554,6,Friday
1127558,7,Saturday
1127560,2,Monday
1127564,3,Tuesday
1127576,4,Wednesday
1127596,1,Sunday


In [84]:
def tag_day(df: pd.DataFrame, col_to_change:str) -> pd.DataFrame:
    # Function to determine if a date is a weekend day or a weekday
    def which_day(date):
        if date == 1:
            return "Sunday"
        elif date == 2:
            return "Monday"
        elif date == 3:
            return "Tuesday"
        elif date == 4:
            return "Wednesday"
        elif date == 5:
            return "Thursday"
        elif date == 6:
            return "Friday"
        else:
            return "Saturday"

    # Apply the function to each value in the "service_date" column
    df[col_to_change] = df[col_to_change].apply(which_day)

    return df

In [85]:
def load_scheduled_service(year: str, name: str) -> pd.DataFrame:
    url = f"{GTFS_DATA_DICT.schedule_tables.gcs_dir}{GTFS_DATA_DICT.schedule_tables.monthly_scheduled_service}_{year}.parquet"
    df = pd.read_parquet(
        url,
        filters=[[("name", "==", name)]],
    )

    df["month"] = df["month"].astype(str).str.zfill(2)
    df["full_date"] = df.year.astype(str) + "-" + df.month.astype(str)
    df = tag_day(df, "day_type")
    return df

In [86]:
bbb_2023 = load_scheduled_service("2023", name)
bbb_2024 = load_scheduled_service("2024", name)

In [89]:
bbb_2024.loc[
    (bbb_2024.full_date == "2024-01") & (bbb_2024.day_type == "Wednesday")
][['ttl_service_hours']].sum()

ttl_service_hours   19991.78
dtype: float64

In [90]:
bbb_2024.loc[
    (bbb_2024.full_date == "2024-01") & (bbb_2024.day_type == "Wednesday")
]

Unnamed: 0,key,name,source_record_id,route_id,route_short_name,route_long_name,time_of_day,month,year,day_type,n_trips,ttl_service_hours,full_date
6,92118cda35de7b35340207a3f51ad38a,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,6,6,Berkeley - Telegraph - Oakland,AM Peak,1,2024,Wednesday,150,81.83,2024-01
24,d70c3e912a39e7dcb4ab4d13053adc25,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,O,O,Santa Clara - Encinal Transbay,PM Peak,1,2024,Wednesday,105,83.5,2024-01
71,4ed1e7a06d08cdf9808abcd6f21890b7,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,1T,1T,Uptown Oakland\ San Leandro BART Southbound OWL,PM Peak,1,2024,Wednesday,280,251.08,2024-01
79,75897c77c80d03c39966d248595db438,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,21,21,Dimond - Fruitvale - Bay Farm,PM Peak,1,2024,Wednesday,80,69.67,2024-01
91,50d3cf2950601becdcbe84a7600a600d,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,28,28,Alvarado - Castro Valley - B St.,AM Peak,1,2024,Wednesday,30,30.25,2024-01
144,e96b548a7b2cb0b1129dd42af9a888e8,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,39,39,Skyline - Dimond - Fruitvale,Midday,1,2024,Wednesday,50,27.08,2024-01
166,644f07742daebe8413502e46d0c43425,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,52,52,UC Village - Cedar - UC Campus,Early AM,1,2024,Wednesday,40,21.67,2024-01
199,3ef5da077836f5b64c703b8507da91a4,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,62,62,7th St. - San Antonio - 23rd Av.,PM Peak,1,2024,Wednesday,155,100.58,2024-01
223,81f7c90116f3d487f97dc16edcd193ca,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,71,71,Parchester - Rumrill - Carlson,AM Peak,1,2024,Wednesday,60,58.92,2024-01
231,c4e5c062c32614848643c89704232cb5,Bay Area 511 AC Transit Schedule,recJjD8JT53sK302o,74,74,San Pablo Dam - 23rd St.,AM Peak,1,2024,Wednesday,60,46.5,2024-01
