### Update glossary.csv file using data from schemas.yml

In [83]:
import cea.scripts
import cea.inputlocator
import cea.config
import cea.glossary
from cea.tests.trace_inputlocator import get_csv_schema
import os
import yaml
import json

In [84]:
schemas = cea.scripts.schemas()
glossary_df = cea.glossary.read_glossary_df()
locators = schemas.keys()

### start by finding all entries in schemas.yml without a schema

the following three locator methods need "special" treatment:
- get_optimization_checkpoint
  - "special" schema
- get_optimization_disconnected_cooling_capacity
  - only present in projects with cooling network
- get_optimization_connected_cooling_capacity
  - only present in projects with cooling network
  
this code assumes you have a "reference-case-cooling/baseline" in your projectroot and have run the optimization on that (e.g. run `cea workflow --workflow district-cooling-system`)

In [85]:
config = cea.config.Configuration()
config.scenario = os.path.join(config.project, "..", "reference-case-cooling", "baseline")
locator = cea.inputlocator.InputLocator(scenario=config.scenario)

# load get_optimization_checkpoint schema
if not schemas["get_optimization_checkpoint"]["schema"]:
    with open(locator.get_optimization_checkpoint(1), 'r') as fp:
        get_optimization_checkpoint = json.load(fp)
    schemas["get_optimization_checkpoint"]["schema"] = {
        str(key): {"sample_data": get_optimization_checkpoint[key],
                   "types_found": None}
        for key in get_optimization_checkpoint.keys()
    }

# load get_optimization_disconnected_cooling_capacity schema
if not schemas["get_optimization_disconnected_cooling_capacity"]["schema"]:
    schemas["get_optimization_disconnected_cooling_capacity"]["schema"] = get_csv_schema(
        locator.get_optimization_disconnected_cooling_capacity(1, 1))
    
# load get_optimization_connected_cooling_capacity schema
if not schemas["get_optimization_connected_cooling_capacity"]["schema"]:
    schemas["get_optimization_connected_cooling_capacity"]["schema"] = get_csv_schema(
        locator.get_optimization_disconnected_cooling_capacity(1, 1))

In [86]:
# each locator method needs a "schema" entry (this should not output anything)
for lm in locators:
    if not "schema" in schemas[lm]:
        print lm

In [87]:
# the "schema" entry should not be `None` (this should not output anything)
for lm in locators:
    if not schemas[lm]["schema"]:
        print lm

if any of the above produce printed output, update schemas.yml and re-run the notebook

### make sure the "used-by" and "created-by" lists don't contain duplicates

In [88]:
for lm in locators:
    if not "used_by" in schemas[lm]:
        print lm

In [89]:
for lm in locators:
    if not "created_by" in schemas[lm]:
        print lm

each locator should have a "used_by" and a "created_by" - let's assume they're all lists

In [90]:
for lm in locators:
    schemas[lm]["used_by"] = sorted(set(schemas[lm]["used_by"]))
    schemas[lm]["created_by"] = sorted(set(schemas[lm]["created_by"]))

In [94]:
# save it back
schemas_yml = os.path.join(os.path.dirname(cea.scripts.__file__), 'schemas.yml')
print "saving to:", schemas_yml
with open(schemas_yml, 'w') as fp:
    yaml.dump(schemas, fp)
schemas = cea.scripts.schemas()

saving to: c:\users\darthoma\documents\github\cityenergyanalyst\cea\schemas.yml


### find all schema entries that are not in glossary.csv

### find all glossary entries that are not in schemas.yml