# Refactoring the CT District Portfolio

In [1]:
import altair as alt
import calitp_data_analysis.magics
import geopandas as gpd
import google.auth
import merge_data
import pandas as pd
from IPython.display import HTML, Image, Markdown, display, display_html
from omegaconf import OmegaConf
from shared_utils import portfolio_utils, publish_utils, rt_dates
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS

readable_dict = OmegaConf.load("readable2.yml")
credentials, project = google.auth.default()

import _ct_district_data_prep
import _ct_district_visuals

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)


def formatted(number):
    return "{:,}".format(number)

In [3]:
district = "02 - Redding"

In [4]:
rt_dates.get_week("apr2025", exclude_wed=True)  

['2025-04-14',
 '2025-04-15',
 '2025-04-17',
 '2025-04-18',
 '2025-04-19',
 '2025-04-20']

In [5]:
rt_dates.get_week("oct2024", exclude_wed=True)  

['2024-10-14',
 '2024-10-15',
 '2024-10-17',
 '2024-10-18',
 '2024-10-19',
 '2024-10-20']

## What's the difference between Operator Profiles Report vs. Operator Profiles

In [6]:
OPERATOR_PROFILE = GTFS_DATA_DICT.digest_tables.operator_profiles

In [7]:
operator_df = pd.read_parquet(
    f"{RT_SCHED_GCS}{OPERATOR_PROFILE}.parquet",
)

operator_df2 = operator_df.loc[
    (operator_df.caltrans_district == district)
    & (operator_df.service_date == "2025-02-12T00:00:00.000000000")
]

In [8]:
operator_df2.service_date.unique()

array(['2025-02-12T00:00:00.000000000'], dtype='datetime64[ns]')

In [9]:
operator_df2.shape

(12, 31)

In [10]:
operator_df2.columns

Index(['schedule_gtfs_dataset_key', 'operator_n_routes', 'operator_n_trips',
       'operator_n_shapes', 'operator_n_stops', 'operator_n_arrivals',
       'operator_route_length_miles', 'operator_arrivals_per_stop',
       'n_downtown_local_routes', 'n_local_routes', 'n_coverage_routes',
       'n_rapid_routes', 'n_express_routes', 'n_rail_routes', 'n_ferry_routes',
       'name', 'organization_source_record_id', 'organization_name',
       'service_date', 'vp_per_min_agency', 'spatial_accuracy_agency',
       'caltrans_district', 'service_area_sq_miles', 'hq_city',
       'service_area_pop', 'organization_type', 'primary_uza_name',
       'reporter_type', 'portfolio_organization_name', 'counties_served',
       'sched_rt_category'],
      dtype='object')

In [11]:
operator_df2[
    [
        "schedule_gtfs_dataset_key",
        "name",
        "organization_name",
        "portfolio_organization_name",
        "operator_n_routes",
        "operator_n_trips",
        "operator_n_shapes",
        "operator_n_stops",
        "hq_city",
        "service_area_pop",
        "organization_type",
        "primary_uza_name",
    ]
]

Unnamed: 0,schedule_gtfs_dataset_key,name,organization_name,portfolio_organization_name,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,hq_city,service_area_pop,organization_type,primary_uza_name
368,0d65d96d07115e28313f207d5ed0d3b2,Sage Stage Schedule,Modoc Transportation Agency,Modoc Transportation Agency,1,2,2,12,Alturas,,"City, County or Local Government Unit or Department of Transportation",
2248,330fd5b796496eddf2ce1d1b9828961d,Siskiyou Schedule,Siskiyou County,Siskiyou County,5,37,29,93,Yreka,,"City, County or Local Government Unit or Department of Transportation",
3384,6a74a120fe2da801e778ac523b354e2b,Trinity Schedule,Trinity County,Trinity County,1,2,2,35,Weaverville,,"City, County or Local Government Unit or Department of Transportation",
3759,73c79ccbfd681df300489226a158b9db,Tehama Schedule,Tehama County,Tehama County,9,71,10,72,Gerber,,"City, County or Local Government Unit or Department of Transportation",
3760,73c79ccbfd681df300489226a158b9db,Tehama Schedule,Tehama County,Tehama County,9,71,10,72,,,,
3761,73c79ccbfd681df300489226a158b9db,Tehama Schedule,Susanville Indian Rancheria,Tehama County,9,71,10,72,Gerber,,"City, County or Local Government Unit or Department of Transportation",
3762,73c79ccbfd681df300489226a158b9db,Tehama Schedule,Susanville Indian Rancheria,Tehama County,9,71,10,72,,,,
4394,91af7482fde58c6261f386b732404e11,Redding Schedule,Shasta County,Redding Area Bus Authority,12,200,27,343,Redding,,"City, County or Local Government Unit or Department of Transportation","Redding, CA"
4395,91af7482fde58c6261f386b732404e11,Redding Schedule,Shasta County,Redding Area Bus Authority,12,200,27,343,Redding,126551.0,Independent Public Agency or Authority of Transit Service,"Redding, CA"
4396,91af7482fde58c6261f386b732404e11,Redding Schedule,Redding Area Bus Authority,Redding Area Bus Authority,12,200,27,343,Redding,,"City, County or Local Government Unit or Department of Transportation","Redding, CA"


In [12]:
OPERATOR_PROFILE_REPORT = GTFS_DATA_DICT.digest_tables.operator_profiles_report

In [13]:
operator_df_report = pd.read_parquet(
    f"{RT_SCHED_GCS}{OPERATOR_PROFILE_REPORT}.parquet",
)

operator_df2_report = operator_df_report.loc[
    (operator_df_report.caltrans_district == district)
]

In [14]:
operator_df2_report.columns

Index(['portfolio_organization_name', 'service_date', 'caltrans_district',
       'operator_n_routes', 'operator_n_trips', 'operator_n_shapes',
       'operator_n_stops', 'operator_n_arrivals',
       'operator_route_length_miles', 'n_downtown_local_routes',
       'n_local_routes', 'n_coverage_routes', 'n_rapid_routes',
       'n_express_routes', 'n_rail_routes', 'n_ferry_routes',
       'vp_per_min_agency', 'spatial_accuracy_agency', 'n_feeds',
       'operator_feeds', 'counties_served', 'service_area_pop',
       'service_area_sq_miles', 'hq_city', 'reporter_type',
       'primary_uza_name'],
      dtype='object')

In [15]:
operator_df2_report.shape

(1, 26)

In [16]:
operator_df2_report.T

Unnamed: 0,80
portfolio_organization_name,Redding Area Bus Authority
service_date,2025-06-11 00:00:00
caltrans_district,02 - Redding
operator_n_routes,12
operator_n_trips,198
operator_n_shapes,27
operator_n_stops,343
operator_n_arrivals,5123
operator_route_length_miles,278.49
n_downtown_local_routes,0


## Understand Summary Table 1 & 2

In [20]:
import _report_utils

In [18]:
operator_df = _ct_district_data_prep.data_wrangling_operator_profile(district)

In [21]:
district_summary = _report_utils.district_stats(operator_df, "caltrans_district")

In [22]:
district_summary

Unnamed: 0,caltrans_district,n_operators,operator_n_routes,operator_n_trips,operator_n_stops,operator_n_arrivals,arrivals_per_stop,trips_per_operator
0,02 - Redding,7,43,470,726,7656,10.55,67.14


## Understand GTFS Stats by Operator 

In [23]:
gtfs_table_df = _ct_district_data_prep.create_gtfs_stats(operator_df)

In [24]:
gtfs_table_df

Unnamed: 0,Portfolio Organization Name,# Routes,# Trips,# Shapes,# Stops,# Arrivals,Operator Service Miles,operator_arrivals_per_stop,Avg Arrivals per Stop
0,Modoc Transportation Agency,1,2,2,12,22,193.19,1.83,1.83
1,Siskiyou County,5,38,37,91,639,221.43,7.02,7.02
2,Redding Area Bus Authority,16,275,28,345,4924,192.67,14.27,14.27
3,Tehama County,9,71,10,72,934,284.97,12.97,12.97
4,Trinity County,5,17,10,97,299,178.93,3.08,3.08
5,Plumas Transit Systems,3,34,19,70,603,145.43,8.61,8.61
6,Lassen Transit Service Agency,4,33,12,39,235,166.55,6.03,6.03
