# Portfolio Yml

In [148]:
import intake
import pandas as pd
import yaml
from yaml import SafeDumper

In [149]:
pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = "{:,.2f}".format

In [150]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/project_prioritization/"
FILE = "fake_data.xlsx"

In [151]:
# Read original df
df = pd.read_excel(f"{GCS_FILE_PATH}{FILE}", sheet_name="fake")

In [152]:
df['full_county_name'] = df['full_county_name'].fillna("Various")

In [153]:
districts = sorted(df.district.fillna(0).unique().tolist())

In [154]:
districts

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 74, 75]

In [155]:
# Open my test yml
with open("./test.yml") as test:
    analyses_data = yaml.load(test, yaml.Loader) #what does this do??

In [156]:
analyses_data

{'title': 'Project Prioritization - 10 Year Non-SHOPP',
 'directory': './project_prioritization/',
 'readme': './project_prioritization/README.md',
 'notebook': './project_prioritization/county_landing_page.ipynb',
 'parts': [{'chapters': [{'caption': 'District 8',
     'params': {'district': 8},
     'sections': [{'county': 'Los Angeles'},
      {'county': 'Riverside'},
      {'county': 'San Bernardino'},
      {'county': 'Various'}]},
    {'caption': 'District 9',
     'params': {'district': 9},
     'sections': [{'county': 'Inyo'},
      {'county': 'Kern'},
      {'county': 'Mono'}]}]}]}

In [157]:
# Slice list for test districts
test_districts = districts[6:8]

In [158]:
test_districts

[6, 7]

In [159]:
# Loop through each district, grab the valid itp_ids
# populate each dict key (caption, params, sections) needed to go into analyses.yml
chapters_list = []
for district in sorted(test_districts):
    chapter_dict = {}
    
    subset = df[df.district == district]
    
    # Only grab the counties found in this district
    counties_in_district = sorted(subset.full_county_name.fillna("Various")
                                   .unique().tolist())
    
    chapter_dict['caption'] = f'District {district}'
    chapter_dict['params'] = {'district': district}
    chapter_dict['sections'] = [{'county': county} for county in 
                                counties_in_district]
    chapters_list += [chapter_dict]

# Make this into a list item
parts_list = [{'chapters': chapters_list}]

analyses_data['parts'] = parts_list

output = yaml.dump(analyses_data)

with open("./test.yml", 'w') as analyses:
    analyses.write(output)
    
print(output)

directory: ./project_prioritization/
notebook: ./project_prioritization/county_landing_page.ipynb
parts:
- chapters:
  - caption: District 6
    params:
      district: 6
    sections:
    - county: Fresno
    - county: Kern
    - county: Kings
    - county: Madera
    - county: Tulare
    - county: Various
  - caption: District 7
    params:
      district: 7
    sections:
    - county: Los Angeles
    - county: Various
    - county: Ventura
readme: ./project_prioritization/README.md
title: Project Prioritization - 10 Year Non-SHOPP



In [160]:
for district in sorted(test_districts):
    test = df[df.district == district]

In [161]:
test.shape

(99, 41)

In [162]:
test.county.unique()

array(['LA', 'LA\nVEN', 'MULTI', 'VEN'], dtype=object)

In [163]:
test[['county','district']].head()

Unnamed: 0,county,district
250,LA,7
251,LA,7
252,LA,7
253,LA,7
254,LA,7


In [164]:
counties_test = sorted(test.full_county_name.fillna("Various")
                                   .unique().tolist())

In [165]:
counties_test

['Los Angeles', 'Various', 'Ventura']

In [166]:
test_list = []

In [167]:
test_list

[]

In [168]:
test_dict = {}

In [169]:
test_dict['caption'] = f'District {district}'

In [170]:
test_dict['params'] = {'district': district}

In [171]:
test_dict['sections'] = [{'county': county} for county in 
                                counties_test]

In [172]:
test_dict

{'caption': 'District 7',
 'params': {'district': 7},
 'sections': [{'county': 'Los Angeles'},
  {'county': 'Various'},
  {'county': 'Ventura'}]}

In [173]:
# Add everything
test_list += [test_dict]

In [174]:
test_list

[{'caption': 'District 7',
  'params': {'district': 7},
  'sections': [{'county': 'Los Angeles'},
   {'county': 'Various'},
   {'county': 'Ventura'}]}]

In [175]:
parts_list = [{'chapters': test_list}]


In [176]:
parts_list

[{'chapters': [{'caption': 'District 7',
    'params': {'district': 7},
    'sections': [{'county': 'Los Angeles'},
     {'county': 'Various'},
     {'county': 'Ventura'}]}]}]

In [177]:
analyses_data['parts'] = parts_list

In [178]:
# Loop through each district, grab the valid itp_ids
# populate each dict key (caption, params, sections) needed to go into analyses.yml
chapters_list = []
for district in sorted(test_districts):
    chapter_dict = {}
    
    subset = df[df.district == district]
    
    # Only grab the counties found in this district
    counties_in_district = sorted(subset.full_county_name.fillna("Various")
                                   .unique().tolist())
    
    chapter_dict['caption'] = f'District {district}'
    chapter_dict['params'] = {'district': district}
    chapter_dict['sections'] = [{'county': county} for county in 
                                counties_in_district]
    chapters_list += [chapter_dict]

# Make this into a list item
parts_list = [{'chapters': chapters_list}]

analyses_data['parts'] = parts_list

output = yaml.dump(analyses_data)

with open("./test.yml", 'w') as analyses:
    analyses.write(output)
print(output)

directory: ./project_prioritization/
notebook: ./project_prioritization/county_landing_page.ipynb
parts:
- chapters:
  - caption: District 6
    params:
      district: 6
    sections:
    - county: Fresno
    - county: Kern
    - county: Kings
    - county: Madera
    - county: Tulare
    - county: Various
  - caption: District 7
    params:
      district: 7
    sections:
    - county: Los Angeles
    - county: Various
    - county: Ventura
readme: ./project_prioritization/README.md
title: Project Prioritization - 10 Year Non-SHOPP

