# 2200 - remove naming.csv

This notebook compares the `naming.csv` file and the `glossary.csv` file to make sure no changes in `naming.csv` are forgotton when switching to `glossary.csv`.

In [10]:
import cea
import csv
import os
from collections import Counter

CEA_SOURCE = os.path.dirname(cea.__file__)
GLOSSARY_CSV = os.path.join(CEA_SOURCE, "glossary.csv")
NAMING_CSV = os.path.join(CEA_SOURCE, "plots", "naming.csv")

assert os.path.exists(GLOSSARY_CSV), "Could not find glossary.csv"
assert os.path.exists(NAMING_CSV), "Could not find naming.csv"

## Make sure variable names are unique

In [20]:
# first check naming.csv
with open(NAMING_CSV) as naming_csv:
    naming_variables = Counter([row["VARIABLE"] for row in csv.DictReader(naming_csv)])
    
for variable, count in naming_variables.items():
    assert count == 1, "Multiple definitions of variable {variable} in naming.csv".format(variable=variable)

In [19]:
# next check glossary.csv - NOTE: we allow multiple definitions here, but they must aggree in these fields:
#    [DESCRIPTION, UNIT, VALUES, TYPE, COLOR]
# and must be unique per (FILE_NAME, VARIABLE)
with open(GLOSSARY_CSV) as glossary_csv:
    glossary_variables = Counter([(row["FILE_NAME"], row["VARIABLE"]) for row in csv.DictReader(glossary_csv)])
    
for variable, count in glossary_variables.items():
    assert count == 1, "Multiple definitions of variable {variable} in glossary.csv".format(variable=variable)

In [24]:
with open(GLOSSARY_CSV) as glossary_csv:
    reader = csv.DictReader(glossary_csv)
    
    glossary_variables = {}  # variable -> {DESCRIPTION, UNIT, VALUES, TYPE, COLOR}
    for row in reader:
        if row["VARIABLE"] in glossary_variables:
            prev = glossary_variables[row["VARIABLE"]]
            for field in ["DESCRIPTION", "UNIT", "VALUES", "TYPE", "COLOR"]:
                assert row[field] == prev[field], "{variable} differs in field {field}:  {prev} -- {row}".format(
                    variable=row["VARIABLE"], field=field, row=row[field], prev=prev[field])
        glossary_variables[row["VARIABLE"]] = row

AssertionError: Excavation differs in field DESCRIPTION:  Typical embodied emissions for site excavation. -- Typical embodied energy for site excavation.

## Make sure file names in glossary.csv are the same for each locator method

(take into account that the FILE_NAME column also indicates Excel worksheet names)

In [None]:
assert False, "Not implemented yet."

## Make sure description for variables are the same

In [9]:
with open(NAMING_CSV) as naming_csv:
    naming = {row['VARIABLE']: row['SHORT_DESCRIPTION'] for row in csv.DictReader(naming_csv)}
    
with open(GLOSSARY_CSV) as glossary_csv:
    glossary = {row["VARIABLE"]: row["DESCRIPTION"] for row in csv.DictReader(glossary_csv)}
    
for variable in naming.keys():
    assert variable in glossary, "Could not find variable in glossary.csv: {}".format(variable)
    assert glossary[variable] == naming[variable], "Definitions don't match: {variable}, {naming} != {glossary}".format(
        variable=variable, naming=naming[variable], glossary=glossary[variable])

AssertionError: Could not find variable in glossary.csv: E_cs_cre_cdata_req_connected_W