# Create YAML file to pull the operators for parameterization 

In [None]:
import geopandas as gpd
import intake
import pandas as pd

catalog = intake.open_catalog("./*.yml")

In [None]:
import yaml

In [None]:
def overwrite_yaml(PORTFOLIO_SITE_YAML, SITE_NAME):
    """
    PORTFOLIO_SITE_YAML: str
                        relative path to where the yaml is for portfolio
                        '../portfolio/analyses.yml' or '../portfolio/sites.yml'
    SITE_NAME: str
                name given to this analysis 
                'parallel_corridors', 'rt', 'dla'
    """
    df = catalog.competitive_route_variability.read()
    
    districts = sorted(list(df[df.caltrans_district.notna()].caltrans_district.unique()))

    # Eric's example
    # https://github.com/cal-itp/data-analyses/blob/main/rt_delay/04_generate_all.ipynb

    with open(PORTFOLIO_SITE_YAML) as analyses:
        analyses_data = yaml.load(analyses, yaml.Loader)
    
    # https://stackoverflow.com/questions/2170900/get-first-list-index-containing-sub-string
    # Find the index for where parallel_corridors analysis is located
    site_index = [idx for idx, s in enumerate(analyses_data["sites"]) if 
              SITE_NAME in s['name']][0]
    
    # list any ITP IDs to be excluded, either because of invalid data or just too few results
    exclude_ids = [0]

    # Loop through each district, grab the valid itp_ids
    # populate each dict key (caption, params, sections) needed to go into analyses.yml
    chapters_list = []
    for district in districts:
        chapter_dict = {}
        subset = df[(df.caltrans_district == district) & 
                    (df.route_group.notna()) & 
                    (~df.calitp_itp_id.isin(exclude_ids))
                   ]
        chapter_dict['caption'] = f'District {district}'
        chapter_dict['params'] = {'district': district}
        chapter_dict['sections'] = [{'itp_id': itp_id} for itp_id in 
                                    subset.calitp_itp_id.unique().tolist()]
        chapters_list += [chapter_dict]

    # Make this into a list item
    parts_list = [{'chapters': chapters_list}]


    analyses_data['sites'][site_index]['parts'] = parts_list
    
    output = yaml.dump(analyses_data)

    with open(PORTFOLIO_SITE_YAML, 'w') as analyses:
        analyses.write(output)
    
    print("YAML for site generated")
    
    return chapters_list

In [None]:
PORTFOLIO_SITE_YAML = "../portfolio/analyses.yml"
SITE_NAME = "parallel_corridors"

In [None]:
# Use my YAML to grab RT ones
itp_dict = overwrite_yaml(PORTFOLIO_SITE_YAML, SITE_NAME)

In [None]:
parallel_corridors_itp_ids = []
rt_itp_ids = []

In [None]:
with open(PORTFOLIO_SITE_YAML) as analyses:
    analyses_data = yaml.load(analyses, yaml.Loader)
    
RT_SITE = "rt"
rt_site_index = [idx for idx, s in enumerate(analyses_data["sites"]) if 
                 RT_SITE in s['name']][0]

In [None]:
rt_chapters = analyses_data['sites'][rt_site_index]['parts'][0]["chapters"]

rt_itp_ids = []

for x, chapter in enumerate(rt_chapters):
    section_dict = chapter["sections"]
    for i, list_item in enumerate(section_dict):
        rt_itp_ids.append(list_item['itp_id'])
