## This utility scans configuration scripts and generates a unified representation of changes. ##

In [None]:
from pathlib import Path
from collections import defaultdict
import configparser
import pandas as pd

The git clones making up the hubmap services are rooted in a directory named ...somepath.../hubmap .
*root_path* must be set to match that path.

In [None]:
root_path = Path.home() / 'git/hubmap'

These are the repos which comprise the hubmap services.

In [None]:
repos = ['ingest-pipeline', 'ingest-ui', 'gateway', 'entity-api', 'search-api', 'uuid-api']
repo_path_l = [root_path / repo for repo in repos]

## Locate Configuration Files ##
Find all app.cfg.example or example.env files in the named repos

In [None]:
cfg_path_l = []
for repo_path in repo_path_l:
    for elt in repo_path.glob('**/app.cfg.example'):
        cfg_path_l.append(elt)
    for elt in repo_path.glob('**/example.env'):
        cfg_path_l.append(elt)

In [None]:
print(cfg_path_l)

Build ConfigParsers for all example config files, and print a summary.

In [None]:
def build_parser(path):
    """
    Build a ConfigParser from the file at the given path.  If the file contains no section headers,
    everything goes into a section named _root_.
    """
    cfg = configparser.ConfigParser(interpolation=None)
    try:
        cfg.read(path)
    except configparser.MissingSectionHeaderError:
        with open(path) as stream:
            cfg.read_string('[_root_]\n' + stream.read())
    return cfg

In [None]:
for cfg_path in cfg_path_l:
    print(f'######### {cfg_path} ##########')
    example_cfg = build_parser(cfg_path)
    for sec in example_cfg.sections():
        print(f'{sec}: {[key for key in example_cfg[sec]]}')

And likewise for the config files themselves.

In [None]:
inst_cfg_path_l = []
for cfg_path in cfg_path_l:
    inst_name = cfg_path.name.replace('.example','').replace('example','')  # app.cfg or .env
    inst_cfg_path = cfg_path.parent / inst_name
    inst_cfg_path_l.append(inst_cfg_path)

In [None]:
print(inst_cfg_path_l)

## Utilities ##

In [None]:
def build_pandas_tbl(common_option_counts, common_option_example_values, common_option_cfg_values):
    """
    A utility to build a pandas version of the configuration info, for convenience
    """
    sort_me = [(-common_option_counts[opt], opt) for opt in common_option_counts]
    sorted_opts = [b for a, b in sorted(sort_me)]
    recs = []
    for opt in sorted_opts:
        recs.append({'opt':opt, 'occurs':common_option_counts[opt],
                     'changed':common_option_example_values[opt] != common_option_cfg_values[opt],
                     'example':common_option_example_values[opt],
                     'configured':common_option_cfg_values[opt]})
        rslt = pd.DataFrame(recs, columns=['opt', 'occurs', 'changed', 'example', 'configured'])
    return rslt

In [None]:
def update_common_options(opt, example_sec, cfg_sec, c_data):
    """
    Maintain some tables of changes and option names
    """
    common_option_counts, common_option_example_values, common_option_cfg_values = c_data
    example_val = example_sec[opt] if opt in example_sec else None
    cfg_val = cfg_sec[opt] if opt in cfg_sec else None
    common_option_counts[opt] += 1
    if opt in common_option_example_values:
        if common_option_example_values[opt] != example_val:
            common_option_example_values[opt] = '**mismatch**'
    else:
        common_option_example_values[opt] = example_val
    if opt in common_option_cfg_values:
        if common_option_cfg_values[opt] != cfg_val:
            common_option_cfg_values[opt] = '**mismatch**'
    else:
        common_option_cfg_values[opt] = cfg_val

## The Actual Scanning ##
Execute this block repeatedly as needed

In [None]:
common_option_counts = defaultdict(int)
common_option_example_values = {}
common_option_cfg_values = {}
common_data = (common_option_counts, common_option_example_values, common_option_cfg_values)

for example_path, inst_path in zip(cfg_path_l, inst_cfg_path_l):
    inst_rel = inst_path.relative_to(root_path)
    print(f'#### {inst_rel} ####')
    example_cfg = build_parser(example_path)
    inst_cfg = build_parser(inst_path)
    all_sections = set(example_cfg.sections() + inst_cfg.sections())
    for sec in all_sections:
        if sec not in example_cfg:
            print(f'  Instance has section {sec} not found in example')
        elif sec not in inst_cfg:
            print(f'  Instance lacks section {sec} which is present in example')
        else:
            all_opts = set(example_cfg.options(sec) + inst_cfg.options(sec))
            print(f'  [{sec}]')
            for opt in sorted(all_opts):
                print(f'    {opt}:', end=' ')
                update_common_options(opt, example_cfg[sec], inst_cfg[sec], common_data)
                if opt not in example_cfg.options(sec):
                    print(f'added')
                elif opt not in inst_cfg.options(sec):
                    print(f'MISSING')
                elif len(example_cfg[sec][opt].strip("'").strip('"')) == 0:
                    if len(inst_cfg[sec][opt].strip("'").strip('"')) == 0:
                        print(f'left blank')
                    else:
                        print(f'SET')
                else:
                    if inst_cfg[sec][opt] == example_cfg[sec][opt]:
                        print('match')
                    else:
                        print('changed')

tbl = build_pandas_tbl(*common_data)
display(tbl[tbl.changed==True])