### Update glossary.csv file using data from schemas.yml

In [180]:
import cea.scripts
import cea.inputlocator
import cea.config
import cea.glossary
from cea.tests.trace_inputlocator import get_csv_schema
import os
import yaml
import json

In [181]:
schemas = cea.scripts.schemas()
glossary_df = cea.glossary.read_glossary_df()
locators = schemas.keys()

### start by finding all entries in schemas.yml without a schema

the following three locator methods need "special" treatment:
- get_optimization_checkpoint
  - "special" schema
- get_optimization_disconnected_cooling_capacity
  - only present in projects with cooling network
- get_optimization_connected_cooling_capacity
  - only present in projects with cooling network
  
this code assumes you have a "reference-case-cooling/baseline" in your projectroot and have run the optimization on that (e.g. run `cea workflow --workflow district-cooling-system`)

In [182]:
config = cea.config.Configuration()
config.scenario = os.path.join(config.project, "..", "reference-case-cooling", "baseline")
locator = cea.inputlocator.InputLocator(scenario=config.scenario)

# load get_optimization_checkpoint schema
if not schemas["get_optimization_checkpoint"]["schema"]:
    with open(locator.get_optimization_checkpoint(1), 'r') as fp:
        get_optimization_checkpoint = json.load(fp)
    schemas["get_optimization_checkpoint"]["schema"] = {
        str(key): {"sample_data": get_optimization_checkpoint[key],
                   "types_found": None}
        for key in get_optimization_checkpoint.keys()
    }

# load get_optimization_disconnected_cooling_capacity schema
if not schemas["get_optimization_disconnected_cooling_capacity"]["schema"]:
    schemas["get_optimization_disconnected_cooling_capacity"]["schema"] = get_csv_schema(
        locator.get_optimization_disconnected_cooling_capacity(1, 1))
    
# load get_optimization_connected_cooling_capacity schema
if not schemas["get_optimization_connected_cooling_capacity"]["schema"]:
    schemas["get_optimization_connected_cooling_capacity"]["schema"] = get_csv_schema(
        locator.get_optimization_disconnected_cooling_capacity(1, 1))

In [183]:
# each locator method needs a "schema" entry (this should not output anything)
for lm in locators:
    if not "schema" in schemas[lm]:
        print lm

In [184]:
# the "schema" entry should not be `None` (this should not output anything)
for lm in locators:
    if not schemas[lm]["schema"]:
        print lm

if any of the above produce printed output, update schemas.yml and re-run the notebook

### make sure the "used-by" and "created-by" lists don't contain duplicates

In [185]:
for lm in locators:
    if not "used_by" in schemas[lm]:
        print lm

In [186]:
for lm in locators:
    if not "created_by" in schemas[lm]:
        print lm

each locator should have a "used_by" and a "created_by" - let's assume they're all lists

In [187]:
for lm in locators:
    schemas[lm]["used_by"] = sorted(set(schemas[lm]["used_by"]))
    schemas[lm]["created_by"] = sorted(set(schemas[lm]["created_by"]))

In [188]:
# save it back
schemas_yml = os.path.join(os.path.dirname(cea.scripts.__file__), 'schemas.yml')
print "saving to:", schemas_yml
with open(schemas_yml, 'w') as fp:
    yaml.dump(schemas, fp)
schemas = cea.scripts.schemas()

saving to: c:\users\darthoma\documents\github\cityenergyanalyst\cea\schemas.yml


### find all schema entries that are not in glossary.csv

In [189]:
# first: what are the missing locator methods?
glossary_lms = set(glossary_df.LOCATOR_METHOD.values)
schemas_lms = set(schemas.keys())
missing_lms = sorted(schemas_lms - glossary_lms)
print '\n'.join(missing_lms)

get_building_weekly_schedules
get_database_air_conditioning_systems
get_database_envelope_systems
get_database_lca_buildings
get_database_lca_mobility
get_database_standard_schedules_use
get_database_supply_systems
get_geothermal_potential
get_multi_criteria_analysis
get_network_energy_pumping_requirements_file
get_network_linear_pressure_drop_edges
get_network_linear_thermal_loss_edges_file
get_network_pressure_at_nodes
get_network_temperature_plant
get_network_temperature_return_nodes_file
get_network_temperature_supply_nodes_file
get_network_thermal_loss_edges_file
get_network_total_pressure_drop_file
get_network_total_thermal_loss_file
get_optimization_checkpoint
get_optimization_connected_cooling_capacity
get_optimization_connected_electricity_capacity
get_optimization_connected_heating_capacity
get_optimization_decentralized_folder_building_result_heating
get_optimization_decentralized_folder_building_result_heating_activation
get_optimization_disconnected_cooling_capacity
get_op

for each of those missing locator methods in glossary.csv, we need to append entries for each of the fields of that file. some of those files are special (the optimization checkpoints comes to mind). each glossary.csv entry has the following fields:

- SCRIPT (use first "created_by" or "-", if input file)
- LOCATOR_METHOD
- FILE_NAME (get from schemas.yml file_path)
- VARIABLE (this is the field name)
- DESCRIPTION (use "TODO")
- UNIT (use "TODO")
- VALUES (use "TODO")
- TYPE (use the first from schemas.types_found)
- COLOR (use "black") - I'm not really sure we need this at all in glossary.csv?

In [190]:
for lm in missing_lms:
    script = schemas[lm]["created_by"][0] if len(schemas[lm]["created_by"]) else "-"
    file_name = schemas[lm]["file_path"]
    for variable in schemas[lm]["schema"].keys():
        if "types_found" in schemas[lm]["schema"][variable]:
            type = schemas[lm]["schema"][variable]["types_found"][0] if schemas[lm]["schema"][variable]["types_found"] else "TODO"
        else:
            type = "TODO"
        glossary_df = glossary_df.append({"key": "{lm}!!!{variable}".format(**locals()),
                                          "SCRIPT": script,
                                          "LOCATOR_METHOD": lm,
                                          "FILE_NAME": file_name,
                                          "VARIABLE": variable,
                                          "DESCRIPTION": "TODO",
                                          "UNIT": "TODO",
                                          "VALUES": "TODO",
                                          "TYPE": type,
                                          "COLOR": "black"}, ignore_index=True)
glossary_df.to_csv(os.path.join(os.path.dirname(cea.glossary.__file__), 'glossary.csv'),
                  columns=["SCRIPT", "LOCATOR_METHOD", "FILE_NAME", "VARIABLE", "DESCRIPTION", "UNIT", "VALUES", "TYPE", "COLOR"],
                  index=False)
print("saved new glossary.csv - reloading")
glossary_df = cea.glossary.read_glossary_df()

saved new glossary.csv - reloading


### find all glossary entries that are not in schemas.yml

In [193]:
for index, row in glossary_df.iterrows():
    print row["LOCATOR_METHOD"], row["VARIABLE"]

PVT_metadata_results AREA_m2
PVT_metadata_results BUILDING
PVT_metadata_results B_deg
PVT_metadata_results CATB
PVT_metadata_results CATGB
PVT_metadata_results CATteta_z
PVT_metadata_results SURFACE
PVT_metadata_results TYPE
PVT_metadata_results Xcoor
PVT_metadata_results Xdir
PVT_metadata_results Ycoor
PVT_metadata_results Ydir
PVT_metadata_results Zcoor
PVT_metadata_results Zdir
PVT_metadata_results area_installed_module_m2
PVT_metadata_results array_spacing_m
PVT_metadata_results orientation
PVT_metadata_results surface
PVT_metadata_results surface_azimuth_deg
PVT_metadata_results tilt_deg
PVT_metadata_results total_rad_Whm2
PVT_metadata_results type_orientation
PVT_results Area_PVT_m2
PVT_results Date
PVT_results E_PVT_gen_kWh
PVT_results Eaux_PVT_kWh
PVT_results PVT_roofs_top_E_kWh
PVT_results PVT_roofs_top_Q_kWh
PVT_results PVT_roofs_top_m2
PVT_results PVT_walls_east_E_kWh
PVT_results PVT_walls_east_Q_kWh
PVT_results PVT_walls_east_m2
PVT_results PVT_walls_north_E_kWh
PVT_results

get_data_benchmark code
get_data_benchmark CO2_target_new
get_data_benchmark CO2_target_retrofit
get_data_benchmark CO2_today
get_data_benchmark Description
get_data_benchmark NRE_target_new
get_data_benchmark NRE_target_retrofit
get_data_benchmark NRE_today
get_data_benchmark PEN_target_new
get_data_benchmark PEN_target_retrofit
get_data_benchmark PEN_today
get_data_benchmark code
get_data_benchmark CO2_target_new
get_data_benchmark CO2_target_retrofit
get_data_benchmark CO2_today
get_data_benchmark Description
get_data_benchmark NRE_target_new
get_data_benchmark NRE_target_retrofit
get_data_benchmark NRE_today
get_data_benchmark PEN_target_new
get_data_benchmark PEN_target_retrofit
get_data_benchmark PEN_today
get_data_benchmark code
get_database_air_conditioning_systems controller
get_database_air_conditioning_systems cooling
get_database_air_conditioning_systems dhw
get_database_air_conditioning_systems heating
get_database_air_conditioning_systems ventilation
get_database_envelope

get_network_layout_edges_shapefile Type_mat
get_network_layout_edges_shapefile geometry
get_network_layout_edges_shapefile weight
get_network_layout_nodes_shapefile Building
get_network_layout_nodes_shapefile Name
get_network_layout_nodes_shapefile Type
get_network_layout_nodes_shapefile geometry
get_network_linear_pressure_drop_edges PIPE0
get_network_linear_thermal_loss_edges_file PIPE0
get_network_node_types_csv_file Building
get_network_node_types_csv_file Name
get_network_node_types_csv_file Q_hex_h_ahu
get_network_node_types_csv_file Q_hex_h_aru
get_network_node_types_csv_file Q_hex_h_shu
get_network_node_types_csv_file Q_hex_h_ww
get_network_node_types_csv_file Q_hex_plant_kW
get_network_node_types_csv_file Type
get_network_node_types_csv_file Unnamed: 0
get_network_node_types_csv_file coordinates
get_network_pressure_at_nodes NODE0
get_network_temperature_plant temperature_return_K
get_network_temperature_plant temperature_supply_K
get_network_temperature_return_nodes_file NODE

get_optimization_generation_total_performance_halloffame Capex_a_sys_disconnected_USD
get_optimization_generation_total_performance_halloffame Capex_total_sys_USD
get_optimization_generation_total_performance_halloffame Capex_total_sys_connected_USD
get_optimization_generation_total_performance_halloffame Capex_total_sys_disconnected_USD
get_optimization_generation_total_performance_halloffame GHG_sys_connected_tonCO2
get_optimization_generation_total_performance_halloffame GHG_sys_disconnected_tonCO2
get_optimization_generation_total_performance_halloffame GHG_sys_tonCO2
get_optimization_generation_total_performance_halloffame Opex_a_sys_USD
get_optimization_generation_total_performance_halloffame Opex_a_sys_connected_USD
get_optimization_generation_total_performance_halloffame Opex_a_sys_disconnected_USD
get_optimization_generation_total_performance_halloffame PEN_sys_MJoil
get_optimization_generation_total_performance_halloffame PEN_sys_connected_MJoil
get_optimization_generation_to

get_optimization_slave_electricity_requirements_data E_HP_SC_ET_req_W
get_optimization_slave_electricity_requirements_data E_HP_SC_FP_req_W
get_optimization_slave_electricity_requirements_data E_HP_Server_req_W
get_optimization_slave_electricity_requirements_data E_HP_Sew_req_W
get_optimization_slave_electricity_requirements_data E_PeakBoiler_req_W
get_optimization_slave_electricity_requirements_data E_PeakVCC_AS_req_W
get_optimization_slave_electricity_requirements_data E_PeakVCC_WS_req_W
get_optimization_slave_electricity_requirements_data E_Storage_charging_req_W
get_optimization_slave_electricity_requirements_data E_Storage_discharging_req_W
get_optimization_slave_electricity_requirements_data E_cs_cre_cdata_req_connected_W
get_optimization_slave_electricity_requirements_data E_cs_cre_cdata_req_disconnected_W
get_optimization_slave_electricity_requirements_data E_electricalnetwork_sys_req_W
get_optimization_slave_electricity_requirements_data E_hs_ww_req_connected_W
get_optimizatio

get_optimization_substations_total_file Qhs_sen_sys0_kW
get_optimization_substations_total_file Qhs_sen_sys_MWhyr
get_optimization_substations_total_file Qhs_sys0_kW
get_optimization_substations_total_file Qhs_sys_MWhyr
get_optimization_substations_total_file Qhs_sys_ahu0_kW
get_optimization_substations_total_file Qhs_sys_ahu_MWhyr
get_optimization_substations_total_file Qhs_sys_aru0_kW
get_optimization_substations_total_file Qhs_sys_aru_MWhyr
get_optimization_substations_total_file Qhs_sys_shu0_kW
get_optimization_substations_total_file Qhs_sys_shu_MWhyr
get_optimization_substations_total_file Qww0_kW
get_optimization_substations_total_file Qww_MWhyr
get_optimization_substations_total_file Qww_sys0_kW
get_optimization_substations_total_file Qww_sys_MWhyr
get_optimization_substations_total_file SOLAR_hs0_kW
get_optimization_substations_total_file SOLAR_hs_MWhyr
get_optimization_substations_total_file SOLAR_ww0_kW
get_optimization_substations_total_file SOLAR_ww_MWhyr
get_optimization_s

get_thermal_networks lambda_WmK
get_thermal_networks material
get_thermal_networks rho_kgm3
get_thermal_networks D_ext_m
get_thermal_networks D_ins_m
get_thermal_networks D_int_m
get_thermal_networks Pipe_DN
get_thermal_networks Vdot_max_m3s
get_thermal_networks Vdot_min_m3s
get_total_demand Af_m2
get_total_demand Aocc_m2
get_total_demand Aroof_m2
get_total_demand COAL_hs0_kW
get_total_demand COAL_hs_MWhyr
get_total_demand COAL_ww0_kW
get_total_demand COAL_ww_MWhyr
get_total_demand DC_cdata0_kW
get_total_demand DC_cdata_MWhyr
get_total_demand DC_cre0_kW
get_total_demand DC_cre_MWhyr
get_total_demand DC_cs0_kW
get_total_demand DC_cs_MWhyr
get_total_demand DH_hs0_kW
get_total_demand DH_hs_MWhyr
get_total_demand DH_ww0_kW
get_total_demand DH_ww_MWhyr
get_total_demand E_cdata0_kW
get_total_demand E_cdata_MWhyr
get_total_demand E_cre0_kW
get_total_demand E_cre_MWhyr
get_total_demand E_cs0_kW
get_total_demand E_cs_MWhyr
get_total_demand E_hs0_kW
get_total_demand E_hs_MWhyr
get_total_demand E

### make sure glossary.csv (locator_method, variable) is unique