# Create YAML file to pull the operators for parameterization 

In [None]:
import geopandas as gpd
import intake
import pandas as pd

catalog = intake.open_catalog("./*.yml")

In [None]:
df = catalog.competitive_route_variability.read()

In [None]:
# Every district is represented
districts = sorted(list(df[df.caltrans_district.notna()].caltrans_district.unique()))
for i in districts:
    subset = df[(df.caltrans_district == i) & (df.route_group.notna())]
    print(f"{i}, # operators: {subset.calitp_itp_id.nunique()}, # obs: {len(subset)}")

In [None]:
#https://github.com/cal-itp/data-analyses/blob/main/rt_delay/04_generate_all.ipynb

In [None]:
import yaml

with open('../portfolio/analyses.yml') as analyses:
    analyses_data = yaml.load(analyses, yaml.Loader)


In [None]:
#https://stackoverflow.com/questions/2170900/get-first-list-index-containing-sub-string
# Find the index for where parallel_corridors analysis is located
site_index = [idx for idx, s in enumerate(analyses_data["sites"]) if 
              'parallel_corridors' in s['name']][0]

In [None]:
exclude_ids = [0]

# Loop through each district, grab the valid itp_ids
# populate each dict key (caption, params, sections) needed to go into analyses.yml
chapters_list = []
for district in districts:
    chapter_dict = {}
    subset = df[(df.caltrans_district == district) & 
                (df.route_group.notna()) & 
                (~df.calitp_itp_id.isin(exclude_ids))
               ]
    chapter_dict['caption'] = f'District {district}'
    chapter_dict['params'] = {'district': district}
    chapter_dict['sections'] = [{'itp_id': itp_id} for itp_id in 
                                subset.calitp_itp_id.unique().tolist()]
    chapters_list += [chapter_dict]
    
# Make this into a list item
parts_list = [{'chapters': chapters_list}]

In [None]:
analyses_data['sites'][site_index]['parts'] = parts_list

In [None]:
output = yaml.dump(analyses_data)

In [None]:
with open('../portfolio/analyses.yml', 'w') as analyses:
    analyses.write(output)