## Section 1: Operator Overview 
* Using the most recent month

In [1]:
import _report_utils
import _section1_utils as section1
import altair as alt
import calitp_data_analysis.magics
import geopandas as gpd
import great_tables as gt
import pandas as pd
from calitp_data_analysis import calitp_color_palette as cp
from calitp_data_analysis.sql import to_snakecase
from great_tables import md
from IPython.display import HTML, Markdown, display
from segment_speed_utils.project_vars import RT_SCHED_GCS
from shared_utils import catalog_utils, rt_dates, rt_utils

In [2]:
alt.renderers.enable("html")
alt.data_transformers.enable("default", max_rows=None)

DataTransformerRegistry.enable('default')

In [3]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [4]:
name = "Bay Area 511 AC Transit Schedule"
organization_name = "Alameda-Contra Costa Transit District"

In [5]:
selected_date = pd.to_datetime(rt_dates.DATES["mar2024"])
year = selected_date.year
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

In [6]:
import yaml

# Readable Dictionary
with open("readable.yml") as f:
    readable_dict = yaml.safe_load(f)

### Map

In [7]:
ac_transit_map = section1.load_operator_map(name)

In [8]:
ac_transit_map.shape

(132, 15)

In [9]:
def plot_route(route):
    filtered_gdf = gdf[gdf["Route"] == route]
    display(
        filtered_gdf.explore(
            column="Route",
            cmap="Spectral",
            tiles="CartoDB positron",
            width=500,
            height=300,
            style_kwds={"weight": 3},
            legend=False,
            tooltip=["Route", "Service Miles"],
        )
    )

### Operator Profiles

In [10]:
ac_transit_profile = section1.load_operator_profiles(organization_name)

In [11]:
ac_transit_profile

Unnamed: 0,schedule_gtfs_dataset_key,# Routes,# Trips,# Shapes,# Stops,# Arrivals,Operator Service Miles,Avg Arrivals per Stop,# Downtown Local Route Types,# Local Route Types,# Rapid Route Types,# Coverage Route Types,Transit Operator,Organization ID,Organization,Date
13,c499f905e33929a641f083dad55c521e,132,5478,346,4733,234582,1459.66,49.56,179,21,92,44,Bay Area 511 AC Transit Schedule,recOZgevYf7Jimm9L,Alameda-Contra Costa Transit District,2024-04-17


#### Original

In [12]:
op_profiles_url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.operator_profiles}.parquet"

op_profiles_df = pd.read_parquet(op_profiles_url)

In [13]:
op_profiles_df1 = op_profiles_df.sort_values(by=["service_date"], ascending=False)

In [14]:
op_profiles_df1.columns

Index(['schedule_gtfs_dataset_key', 'operator_n_routes', 'operator_n_trips',
       'operator_n_shapes', 'operator_n_stops', 'operator_n_arrivals',
       'operator_route_length_miles', 'operator_arrivals_per_stop',
       'n_downtown_local_routes', 'n_local_routes', 'n_rapid_routes',
       'n_coverage_routes', 'name', 'organization_source_record_id',
       'organization_name', 'service_date'],
      dtype='object')

In [15]:
op_profiles_df1.drop_duplicates(subset=["organization_name"])[
    ["service_date"]
].describe()

  op_profiles_df1.drop_duplicates(subset=["organization_name"])[


Unnamed: 0,service_date
count,171
unique,11
top,2024-04-17 00:00:00
freq,139
first,2023-03-15 00:00:00
last,2024-04-17 00:00:00


### Operators who don't have profile info from the most recent service_date but have stuff for sched_vp
* City of Torrance	
* Stanislaus Regional Transit Authority

In [23]:
op_profiles2 = op_profiles_df1.drop_duplicates(subset=["organization_name"])[
    ["organization_name", "service_date"]
]

In [24]:
# op_profiles2.loc[op_profiles2.service_date < '2024-04-17'].sort_values(by = ['organization_name'])

### Total Service
* Checking Eric's data.
* Big Blue Bus Schedule
* https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.fct_monthly_route_service_by_timeofday
* TTL_service_hours: Total scheduled service hours that occurred for the route for this month, day_type, and time_of_day.

In [89]:
og_big_blue_bus = section1.load_scheduled_service("Big Blue Bus Schedule")

In [90]:
og_big_blue_bus["datetime_date"] = pd.to_datetime(og_big_blue_bus["full_date"], format="%Y-%m")

In [91]:
og_big_blue_bus.head(2)

Unnamed: 0,key,name,schedule_source_record_id,route_id_x,route_short_name,route_long_name,time_of_day,month,year,day_type,n_trips,ttl_service_hours,day_name,schedule_gtfs_dataset_key,organization_source_record_id,organization_name,route_id_y,route_combined_name,full_date,datetime_date
318238,131c820d577cabbc588e8ae37d93ef18,Big Blue Bus Schedule,recpN1dPaxhZvZQV0,3554,1,Main St & Santa Monica Blvd/UCLA,Evening,4,2023,5,72,47.07,Thursday,dbbe8ee4864a2715a40749605395d584,recJHFDLpGRMIFgnL,City of Santa Monica,3629,1 Main St & Santa Monica Blvd/UCLA,2023-04,2023-04-01
318239,6ea1bdf6b8a09827b1e0ad8770621b1c,Big Blue Bus Schedule,recpN1dPaxhZvZQV0,3555,2,Wilshire Blvd/UCLA,AM Peak,4,2023,6,56,33.47,Friday,dbbe8ee4864a2715a40749605395d584,recJHFDLpGRMIFgnL,City of Santa Monica,3630,2 Wilshire Blvd/UCLA,2023-04,2023-04-01


In [92]:
og_big_blue_bus.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7462 entries, 318238 to 706372
Data columns (total 20 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   key                            7462 non-null   object        
 1   name                           7462 non-null   object        
 2   schedule_source_record_id      7462 non-null   object        
 3   route_id_x                     7462 non-null   object        
 4   route_short_name               7462 non-null   object        
 5   route_long_name                7462 non-null   object        
 6   time_of_day                    7462 non-null   object        
 7   month                          7462 non-null   object        
 8   year                           7462 non-null   int64         
 9   day_type                       7462 non-null   int64         
 10  n_trips                        7462 non-null   int64         
 11  ttl_servic

In [94]:
# Example usage:
years = [2016, 2016, 2016]
months = [3, 4, 5]
day_counts_df = count_days_in_months(years, months)

In [101]:
def count_days_in_months(dates:list):
    # Initialize a dictionary to store counts for each day of the week
    day_counts = {}
    
    # Iterate over each date
    for date in dates:
        year = date.year
        month = date.month
        
        # Initialize counts dictionary for the current month-year combination
        if (year, month) not in day_counts:
            day_counts[(year, month)] = {
                'Monday': 0,
                'Tuesday': 0,
                'Wednesday': 0,
                'Thursday': 0,
                'Friday': 0,
                'Saturday': 0,
                'Sunday': 0
            }
        
        # Get the calendar matrix for the current month and year
        matrix = calendar.monthcalendar(year, month)
        
        # Iterate over each day in the matrix
        for week in matrix:
            for i, day in enumerate(week):
                # Increment the count for the corresponding day of the week
                if day != 0:
                    weekday = calendar.day_name[i]
                    day_counts[(year, month)][weekday] += 1
    
    # Convert the dictionary to a pandas DataFrame
    df = pd.DataFrame.from_dict(day_counts, orient='index')
    df = df.reset_index()
    return df

In [102]:
dates = [pd.to_datetime('2016-01-01'), pd.to_datetime('2016-02-01'), pd.to_datetime('2016-03-01')]
count_days_in_months(dates)


Unnamed: 0,level_0,level_1,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
0,2016,1,4,4,4,4,5,5,5
1,2016,2,5,4,4,4,4,4,4
2,2016,3,4,5,5,5,4,4,4
