In [4]:
import yaml
from functools import cache

from calitp_data_analysis.gcs_pandas import GCSPandas
import pandas as pd

@cache
def gcs_pandas():
    return GCSPandas()

# Exporting Analysis Names to Airtable

## Interim Documentation

- `gtfs_funnel/crosswalk_gtfs_dataset_key_to_organization.py` is a dependency
    - part of the usual GTFS Funnel workflow and Makefile
- Run `../_shared_utils/shared_utils/create_portfolio_display_yaml.py`
- Download csv from Airtable of Schedule view of GTFS Datasets
- Very simple update script… (read csv -> lookup -> write csv)
- Open csv in Excel to copy column
- Csv must have a value in top cell even if no Analysis Name to paste properly
    - Script will add a placeholder if na
- Paste back into same Airtable view and check alignment
    - Delete top cell placeholder value if present

In [5]:
airtable_gtfs_datasets_schedule = pd.read_csv('./gtfs datasets-Schedule.csv')

In [6]:
airtable_gtfs_datasets_schedule = airtable_gtfs_datasets_schedule[['Name', 'Analysis Name']]
airtable_gtfs_datasets_schedule.Name = airtable_gtfs_datasets_schedule.Name.str.strip()

In [7]:
airtable_gtfs_datasets_schedule = airtable_gtfs_datasets_schedule.assign(airtable_ix = range(1, airtable_gtfs_datasets_schedule.shape[0]+1))
airtable_gtfs_datasets_schedule = airtable_gtfs_datasets_schedule.set_index('airtable_ix')

In [9]:
with open('./portfolio_organization_name.yml') as f:
    portfolio_organization_name = yaml.safe_load(f)

In [10]:
# portfolio_organization_name

In [11]:
airtable_gtfs_datasets_schedule.Name

airtable_ix
1                     AC Transit Alerts
2                   AC Transit Schedule
3                AC Transit TripUpdates
4           AC Transit VehiclePositions
5                          ACE Schedule
                     ...               
813           Yolobus Vehicle Positions
814    Yosemite Valley Shuttle Schedule
815                Yuba-Sutter Schedule
816                       Yuma Schedule
817                      Yurok Schedule
Name: Name, Length: 817, dtype: object

In [12]:
airtable_gtfs_datasets_schedule['Analysis Name'] = airtable_gtfs_datasets_schedule.Name.map(
    lambda x: portfolio_organization_name[x] if x in portfolio_organization_name.keys() else None)

In [13]:
if not airtable_gtfs_datasets_schedule.iloc[0,1]:
    airtable_gtfs_datasets_schedule.iloc[0,1] = '_placeholder, delete after pasting to Airtable'

In [14]:
airtable_gtfs_datasets_schedule.iloc[0,1]

'_placeholder, delete after pasting to Airtable'

In [15]:
airtable_gtfs_datasets_schedule

Unnamed: 0_level_0,Name,Analysis Name
airtable_ix,Unnamed: 1_level_1,Unnamed: 2_level_1
1,AC Transit Alerts,"_placeholder, delete after pasting to Airtable"
2,AC Transit Schedule,
3,AC Transit TripUpdates,
4,AC Transit VehiclePositions,
5,ACE Schedule,
...,...,...
813,Yolobus Vehicle Positions,
814,Yosemite Valley Shuttle Schedule,Yosemite National Park
815,Yuba-Sutter Schedule,Yuba-Sutter Transit Authority
816,Yuma Schedule,Yuma County Intergovernmental Public Transport...


In [16]:
airtable_gtfs_datasets_schedule.to_csv('output.csv')