# looking back

In [11]:
analysis_date = '2025-03-12'

In [63]:
import pandas as pd
import datetime as dt
import geopandas as gpd

In [10]:
from shared_utils import rt_dates, catalog_utils, schedule_rt_utils

In [2]:
this = pd.read_parquet('./_rt_progress_2025-03-12.parquet')

In [3]:
last = pd.read_parquet('./_rt_progress_2025-02-12.parquet')

In [9]:
last.merge(this[['organization_source_record_id']],
           how='left', on='organization_source_record_id',
          indicator=True).query('_merge == "left_only"')

Unnamed: 0,organization_name,organization_source_record_id,name,base64_url,caltrans_district,status,analysis_date,_merge
45,Madera County,recWLwvtjXhiVWjKt,Madera County Connection Schedule,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,06 - Fresno,speedmap_segs_available,2025-02-12,left_only
59,Susanville Indian Rancheria,recKN0Q28cOQdsy5L,Tehama Schedule,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,02 - Redding,speedmap_segs_available,2025-02-12,left_only
65,City and County of San Francisco,rechaapWbeffO33OX,Bay Area 511 Muni Schedule,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,04 - Oakland,speedmap_segs_available,2025-02-12,left_only
75,Shasta County,recx4ZG2lvZb7kGAL,Redding Schedule,aHR0cHM6Ly9ob3N0ZWQtZ3Rmcy1mZWVkcy5zMy5hbWF6b2...,02 - Redding,speedmap_segs_available,2025-02-12,left_only
112,City of San Luis Obispo,recMM99msxjmc6PPv,SLO Peak Transit Schedule,aHR0cDovL2RhdGEucGVha3RyYW5zaXQuY29tL3N0YXRpY2...,05 - San Luis Obispo,speedmap_segs_available,2025-02-12,left_only
114,City of Duarte,recZm8PD8WIdcDL0M,Foothill Schedule,aHR0cHM6Ly9mb290aGlsbHRyYW5zaXQucmlkZXJhbGVydH...,07 - Los Angeles / Ventura,speedmap_segs_available,2025-02-12,left_only
115,Foothill Transit,recSqgaa8QiQ8CRjl,Foothill Schedule,aHR0cHM6Ly9mb290aGlsbHRyYW5zaXQucmlkZXJhbGVydH...,07 - Los Angeles / Ventura,speedmap_segs_available,2025-02-12,left_only
119,City of Solvang,reckp33bhAuZlmO1M,Santa Ynez Mecatran Schedule,aHR0cDovL2FwcC5tZWNhdHJhbi5jb20vdXJiL3dzL2ZlZW...,05 - San Luis Obispo,speedmap_segs_available,2025-02-12,left_only


In [1]:
import yaml

In [8]:
with open ('../gtfs_funnel/published_operators.yml', 'r') as f:
    operators = yaml.safe_load(f)
    operators = {key.isoformat(): operators[key] for key in operators.keys()}

In [17]:
operators[rt_dates.DATES['feb2025']]

['Bay Area 511 Muni Schedule',
 'Foothill Schedule',
 'Madera County Connection Schedule']

In [55]:
operators[rt_dates.DATES['jan2025']]

['Auburn Schedule', 'Golden Gate Park Shuttle Schedule']

In [20]:
dt.datetime.now()

datetime.datetime(2025, 3, 27, 15, 48, 55, 513581)

In [23]:
dt.datetime.now().strftime('%b%Y').lower()

'mar2025'

In [24]:
now = dt.datetime.now()

In [26]:
now-dt.timedelta(days = 30)

datetime.datetime(2025, 2, 25, 15, 50, 26, 343288)

In [61]:
catalog = catalog_utils.get_catalog('gtfs_analytics_data')

In [98]:
def datetime_to_rt_date_key(datetime: dt.datetime, day_offset: int = 0) -> str:
    '''
    using a datetime object and optional day offset,
    compose string key to rt_dates.DATES
    '''
    datetime = datetime + dt.timedelta(days = day_offset)
    return datetime.strftime('%b%Y').lower()

In [53]:
[datetime_to_rt_date_key(now, x) for x in range(0, -61, -30)]

['mar2025', 'feb2025', 'jan2025']

In [92]:
def append_previous(speedmap_segs: pd.DataFrame, date_key: str, operators: dict) -> pd.DataFrame():
    '''
    operators: dict of the most recent rt_date an operator's feed was seen,
    currently via '../gtfs_funnel/published_operators.yml'
    date_key: key to rt_dates.DATES, such as "feb2025"
    '''
    previous_date = rt_dates.DATES[date_key]
    previous_segs = read_segs(previous_date).query('name.isin(@operators[@previous_date])')
    previous_segs['analysis_date'] = previous_date
    speedmap_segs = pd.concat([speedmap_segs, previous_segs])
    return speedmap_segs

In [93]:
df = append_previous(pd.DataFrame(), 'feb2025', operators)

In [94]:
def read_segs(analysis_date: str) -> gpd.GeoDataFrame:
    '''
    read speedmap segments from gcs and keep one row per organization x feed
    '''
    path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.segment_timeofday}_{analysis_date}.parquet'
    org_cols = ['organization_name', 'organization_source_record_id', 'name', 'base64_url']
    speedmap_segs = gpd.read_parquet(path)[org_cols].drop_duplicates().reset_index(drop=True)
    return speedmap_segs

In [95]:
ANALYSIS_DATE_LIST = [datetime_to_rt_date_key(dt.datetime.now(), x) for x in range(0, -61, -30)]
PROGRESS_PATH = f'./_rt_progress_{ANALYSIS_DATE}.parquet'
GEOJSON_SUBFOLDER = f'segment_speeds_{ANALYSIS_DATE}/'

def build_speedmaps_index(analysis_date_list: dt.date, operators: dict) -> pd.DataFrame:
    '''
    An index table for tracking down a given org's schedule/rt feeds.
    Note that in limited cases, multiple orgs may share the same datasets
    (VCTC combined feeds, SD Airport and SDMTS...)
    '''
    speedmap_segs = pd.DataFrame()
    for i in range(len(analysis_date_list)):
        speedmap_segs = append_previous(speedmap_segs, analysis_date_list[i], operators)
    districts = schedule_rt_utils.filter_dim_county_geography(analysis_date)
    new_ix = speedmap_segs.merge(districts, on = 'organization_name')
    new_ix['status'] = 'speedmap_segs_available'
    return new_ix

In [96]:
df = build_speedmaps_index(ANALYSIS_DATE_LIST, operators)

mar2025
feb2025
jan2025


In [99]:
df.analysis_date.value_counts()

2025-03-12    109
2025-02-12      4
Name: analysis_date, dtype: int64

In [101]:
pd.read_parquet('./_rt_progress_2025-03-12.parquet').analysis_date.value_counts()

2025-03-12    109
2025-02-12      4
Name: analysis_date, dtype: int64

In [102]:
df

Unnamed: 0,organization_name,organization_source_record_id,name,base64_url,analysis_date,caltrans_district,status
0,Tulare County Regional Transit Agency,rec4YWVqVwJ9XqDbq,TCRTA TripShot Schedule,aHR0cHM6Ly90Y3J0YS50cmlwc2hvdC5jb20vdjEvZ3Rmcy...,2025-03-12,06 - Fresno / Bakersfield,speedmap_segs_available
1,Marin County Transit District,recNOb7pqBRlQVG5e,Bay Area 511 Marin Schedule,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,2025-03-12,04 - Bay Area / Oakland,speedmap_segs_available
2,Santa Barbara Metropolitan Transit District,recswCrw6a6htmXJ4,SBMTD Schedule,aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...,2025-03-12,05 - San Luis Obispo / Santa Barbara,speedmap_segs_available
3,Los Angeles County Metropolitan Transportation...,recPnGkwdpnr8jmHB,LA Metro Bus Schedule,aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX2J1cy...,2025-03-12,07 - Los Angeles / Ventura,speedmap_segs_available
4,Los Angeles County Metropolitan Transportation...,recPnGkwdpnr8jmHB,LA Metro Rail Schedule,aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX3JhaW...,2025-03-12,07 - Los Angeles / Ventura,speedmap_segs_available
...,...,...,...,...,...,...,...
108,City of Monterey Park,reccAVEcMntkbh6aY,Spirit Bus Passio Schedule,aHR0cHM6Ly9wYXNzaW8zLmNvbS9tb250ZXJleXAvcGFzc2...,2025-03-12,07 - Los Angeles / Ventura,speedmap_segs_available
109,Madera County,recWLwvtjXhiVWjKt,Madera County Connection Schedule,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,2025-02-12,06 - Fresno / Bakersfield,speedmap_segs_available
110,City and County of San Francisco,rechaapWbeffO33OX,Bay Area 511 Muni Schedule,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,2025-02-12,04 - Bay Area / Oakland,speedmap_segs_available
111,City of Duarte,recZm8PD8WIdcDL0M,Foothill Schedule,aHR0cHM6Ly9mb290aGlsbHRyYW5zaXQucmlkZXJhbGVydH...,2025-02-12,07 - Los Angeles / Ventura,speedmap_segs_available
