In [1]:
import pandas as pd
import yaml
from pathlib import Path

In [2]:
# Actual yaml path
# "../portfolio/sites/project_priority_test.yml"

In [3]:
def create_project_priority(PORTFOLIO_SITE_YAML: str, 
                            df_file_path: str,
                            df_sheet_name: str): 
    
     # Open up original data frame. 
    df = pd.read_excel(df_file_path, sheet_name= df_sheet_name)
    
    # Get a list of the unique districts sorted in the correct order
    districts = sorted(df.district.fillna(0).unique().tolist())
    
    # Remove 0 
    districts.remove(0)
    
    # Open yml
    with open(PORTFOLIO_SITE_YAML) as analyses:
        analyses_data = yaml.load(analyses, yaml.Loader)
   
    # Loop through each district & grab the counties that fall within.
    # populate each dict key (caption, params, sections) needed to go into yml
    chapters_list = []
    for district in sorted(districts):
        chapter_dict = {}

        subset = df[df.district == district]

        # Only grab the counties found in this district
        counties_in_district = sorted(subset.full_county_name.fillna("Various")
                                       .unique().tolist())

        chapter_dict['caption'] = f'District {district}'
        chapter_dict['params'] = {'district': district}
        chapter_dict['sections'] = [{'county': county} for county in 
                                    counties_in_district]
        chapters_list += [chapter_dict]

    # Make this into a list item
    parts_list = [{'chapters': chapters_list}]

    analyses_data['parts'] = parts_list

    output = yaml.dump(analyses_data)
    
    with open(PORTFOLIO_SITE_YAML, 'w') as analyses:
        analyses.write(output)

    print(output)

In [14]:
create_project_priority("../portfolio/sites/project_priority_test.yml", "gs://calitp-analytics-data/data-analyses/project_prioritization/fake_data.xlsx", 
                        "fake")

directory: ./project_prioritization/
notebook: ./project_prioritization/county_landing_page.ipynb
parts:
- chapters:
  - caption: District 1
    params:
      district: 1
    sections:
    - county: Del Norte
    - county: Humboldt
    - county: Lake
    - county: Mendocino
    - county: Various
  - caption: District 2
    params:
      district: 2
    sections:
    - county: Lassen
    - county: Modoc
    - county: Shasta
    - county: Siskiyou
    - county: Tehama
    - county: Trinity
  - caption: District 3
    params:
      district: 3
    sections:
    - county: Butte
    - county: Nevada
    - county: Placer
    - county: Sacramento
    - county: Sutter
    - county: Various
    - county: Yolo
    - county: Yuba
  - caption: District 4
    params:
      district: 4
    sections:
    - county: Alameda
    - county: Contra Costa
    - county: Marin
    - county: Napa
    - county: San Francisco
    - county: San Mateo
    - county: Santa Clara
    - county: Solano
    - county: So

In [5]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/project_prioritization/"
FILE = "fake_data.xlsx"

In [6]:
df = pd.read_excel(f"{GCS_FILE_PATH}{FILE}", sheet_name="fake")

In [7]:
# Be outside of data analyses 
MY_SITE_YAML = "../project_prioritization/test1.yml"

In [8]:
with open(MY_SITE_YAML) as analyses:
    analyses_data = yaml.load(analyses, yaml.Loader)

In [9]:
# Get lists of districts sorted in the correct order
districts = sorted(df.district.fillna(0).unique().tolist())

In [10]:
districts

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 74, 75]

In [11]:
# Loop through each district, grab the valid itp_ids
# populate each dict key (caption, params, sections) needed to go into analyses.yml
chapters_list = []
for district in sorted(districts):
    chapter_dict = {}
    
    subset = df[df.district == district]
    
    # Only grab the counties found in this district
    counties_in_district = sorted(subset.full_county_name.fillna("Various")
                                   .unique().tolist())
    
    chapter_dict['caption'] = f'District {district}'
    chapter_dict['params'] = {'district': district}
    chapter_dict['sections'] = [{'county': county} for county in 
                                counties_in_district]
    chapters_list += [chapter_dict]

# Make this into a list item
parts_list = [{'chapters': chapters_list}]

analyses_data['parts'] = parts_list

output = yaml.dump(analyses_data)

print(output)

directory: ./project_prioritization/
notebook: ./project_prioritization/county_landing_page.ipynb
parts:
- chapters:
  - caption: District 0
    params:
      district: 0
    sections:
    - county: Various
  - caption: District 1
    params:
      district: 1
    sections:
    - county: Del Norte
    - county: Humboldt
    - county: Lake
    - county: Mendocino
    - county: Various
  - caption: District 2
    params:
      district: 2
    sections:
    - county: Lassen
    - county: Modoc
    - county: Shasta
    - county: Siskiyou
    - county: Tehama
    - county: Trinity
  - caption: District 3
    params:
      district: 3
    sections:
    - county: Butte
    - county: Nevada
    - county: Placer
    - county: Sacramento
    - county: Sutter
    - county: Various
    - county: Yolo
    - county: Yuba
  - caption: District 4
    params:
      district: 4
    sections:
    - county: Alameda
    - county: Contra Costa
    - county: Marin
    - county: Napa
    - county: San Francis

In [12]:
# What's yaml dump?
output = yaml.dump(analyses_data)

In [13]:
with open(MY_SITE_YAML, 'w') as analyses:
    analyses.write(output)