# Map of Routes & Service Hours missing for San Francisco

In [20]:
import _report_utils
import _section1_utils as section1
import geopandas as gpd
import merge_data
import merge_operator_data 
import numpy as np
import pandas as pd
from segment_speed_utils import gtfs_schedule_wrangling, helpers, segment_calcs
from segment_speed_utils.project_vars import COMPILED_CACHED_VIEWS, PROJECT_CRS
from shared_utils import (
    catalog_utils,
    portfolio_utils,
    rt_dates,
    rt_utils,
    time_helpers,
)
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS

# Data Dictionary
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
organization_name = "City and County of San Francisco"

In [6]:
name = section1.organization_name_crosswalk(organization_name)

## Loading operator map: only one row in the original dataframe! 

In [7]:
operator_route_map = section1.load_operator_map(name)

In [8]:
len(operator_route_map)

1

In [11]:
operator_route_map.columns

Index(['shape_array_key', 'geometry', 'feed_key', 'schedule_gtfs_dataset_key',
       'dir_0_1', 'route_key', 'Service meters', 'Service Miles',
       'is_downtown_local', 'is_local', 'is_coverage', 'is_rapid',
       'is_express', 'is_rail', 'Organization ID', 'Organization',
       'Transit Operator', 'route_long_name', 'route_short_name', 'Route',
       'Route ID'],
      dtype='object')

In [21]:
GTFS_DATA_DICT.digest_tables.operator_routes_map

'digest/operator_routes'

In [16]:
op_routes_url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.operator_routes_map}.parquet"
op_routes_gdf = gpd.read_parquet(
    op_routes_url,
    filters=[[("name", "==", name)]])

In [17]:
len(op_routes_gdf)

1

In [12]:
# Find the most recent geography for each route.
op_routes_gdf = op_routes_gdf.sort_values(by = ["service_date"], ascending = False)
    

In [14]:
op_routes_gdf.columns

Index(['shape_array_key', 'geometry', 'feed_key', 'schedule_gtfs_dataset_key',
       'direction_id', 'route_key', 'route_length', 'route_length_miles',
       'is_downtown_local', 'is_local', 'is_coverage', 'is_rapid',
       'is_express', 'is_rail', 'organization_source_record_id',
       'organization_name', 'service_date', 'name', 'route_long_name',
       'route_short_name', 'route_combined_name', 'route_id'],
      dtype='object')

In [15]:
op_routes_gdf.groupby(["service_date"]).agg({"route_combined_name":"nunique"})

Unnamed: 0_level_0,route_combined_name
service_date,Unnamed: 1_level_1
2025-02-12,1


## Sentence says SF only runs one route.

In [18]:
operator_profiles = section1.load_operator_ntd_profile(organization_name)

In [19]:
operator_profiles

Unnamed: 0,schedule_gtfs_dataset_key,VP per Minute (All Routes),Spatial Accuracy (All Routes),Date,# Routes,# Trips,# Shapes,# Stops,# Arrivals,Operator Service Miles,Avg Arrivals per Stop,# Downtown Local Route Types,# Local Route Types,# Coverage Route Types,# Rapid Route Types,# Express Route Types,# Rail Route Types,Transit Operator,Organization ID,Organization,District,counties_served,service_area_sq_miles,hq_city,service_area_pop,organization_type,primary_uza_name,reporter_type
23,e6230c3c190508921b7f350020e82ed5,,,2025-02-12,1.0,32.0,2.0,18.0,320.0,2.39,17.78,2.0,0.0,0.0,1.0,0.0,0.0,Bay Area 511 Golden Gate Park Shuttle Schedule,rechaapWbeffO33OX,City and County of San Francisco,04 - Oakland,San Francisco,49,San Francisco,842754,"City, County or Local Government Unit or Department of Transportation","San Francisco--Oakland, CA",Full Reporter


## Check `operator_profiles`

In [22]:
import sys

sys.path.append("../gtfs_funnel")
import crosswalk_gtfs_dataset_key_to_organization