# Software User Accounts (BMC Discovery)

This notebook reproduces the DisMAL `si_user_accounts` report using the raw CSV exports generated by the CLI.
It reads `raw_exports/<appliance>/software_instance_user_accounts.csv` for preview and optional re-export to the standard output folders.


## Requirements

We rely on `pandas` for tabular wrangling and `PyYAML` for configuration. Uncomment the next cell to install them in your environment if needed.


In [None]:
# %pip install -q pandas pyyaml

from pathlib import Path
import pandas as pd
import yaml


## Configuration (from config.yaml)

Locates the repository root, reads configuration, and identifies both the raw export and output directories for each appliance.


In [None]:
def load_config_params(
    start: Path,
    appliance_name: str = None,
    appliance_index: int = 0,
) -> dict:
    def _find_repo_root(path: Path) -> Path:
        for candidate in [path] + list(path.parents):
            if (candidate / 'config.yaml').exists():
                return candidate
        return path.parent

    def _slugify(value: str) -> str:
        return ''.join(ch if ch.isalnum() else '_' for ch in value).strip('_').lower() or 'default'

    repo_root = _find_repo_root(start)
    config_path = repo_root / 'config.yaml'

    with open(config_path, 'r') as fh:
        cfg = yaml.safe_load(fh) or {}

    appliances = cfg.get('appliances') or []
    selected = None
    if isinstance(appliances, list) and appliances:
        if appliance_name:
            selected = next((a for a in appliances if a.get('name') == appliance_name), None)
            if selected is None:
                raise ValueError(f"No appliance named '{appliance_name}' in config.yaml")
        else:
            try:
                selected = appliances[int(appliance_index)]
            except Exception:
                selected = appliances[0]

    target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
    if not target:
        raise ValueError('config.yaml missing "target"')

    sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
    output_dir = repo_root / f'output_{sanitized}'
    output_dir.mkdir(parents=True, exist_ok=True)

    export_name = ((selected or {}).get('name') or appliance_name or sanitized)
    raw_export_dir = repo_root / 'raw_exports' / _slugify(export_name)

    return {
        'repo_root': repo_root,
        'config_path': config_path,
        'cfg': cfg,
        'selected': selected,
        'target': target,
        'output_dir': output_dir,
        'raw_export_dir': raw_export_dir,
    }


## Initialise Instances


In [None]:
twprod = load_config_params(Path.cwd(), appliance_name='prod')
print('Prod Target  :', twprod['target'])
print('Prod Exports :', twprod['raw_export_dir'])
print('Prod Output  :', twprod['output_dir'])

twdev = load_config_params(Path.cwd(), appliance_name='dev')
print('Dev Target   :', twdev['target'])
print('Dev Exports  :', twdev['raw_export_dir'])
print('Dev Output   :', twdev['output_dir'])


## Load and Preview


In [None]:
BASE_EXPORT_COLUMNS = ['Appliance Target', 'Appliance Name', 'Query Title']
EXPECTED_COLUMNS = [
    'Software_Instance',
    'Host',
    'Type',
    'Version',
    'User_Name',
]
EXPORT_FILENAME = 'software_instance_user_accounts.csv'


def load_user_accounts(instance: dict) -> pd.DataFrame:
    csv_path = instance['raw_export_dir'] / EXPORT_FILENAME
    if not csv_path.exists():
        raise FileNotFoundError(f"Missing export {csv_path}")

    df = pd.read_csv(csv_path)
    df = df.drop(columns=[c for c in BASE_EXPORT_COLUMNS if c in df.columns], errors='ignore')

    for col in EXPECTED_COLUMNS:
        if col not in df.columns:
            df[col] = pd.NA

    df.insert(0, 'Discovery Instance', instance['target'])
    ordered = ['Discovery Instance'] + EXPECTED_COLUMNS
    remainder = [c for c in df.columns if c not in ordered]
    return df[[c for c in ordered if c in df.columns] + remainder]

prod_df = load_user_accounts(twprod)
print(twprod['target'])
display(prod_df.head(10))

dev_df = load_user_accounts(twdev)
print(twdev['target'])
display(dev_df.head(10))


## Save CSV


In [None]:
OUTPUT_FILE = 'software_usernames.csv'

prod_path = twprod['output_dir'] / OUTPUT_FILE
prod_df.to_csv(prod_path, index=False)
print(f'Saved prod accounts to {prod_path} (rows: {len(prod_df)})')

dev_path = twdev['output_dir'] / OUTPUT_FILE
dev_df.to_csv(dev_path, index=False)
print(f'Saved dev accounts to {dev_path} (rows: {len(dev_df)})')


---
### Notes
- The export already resolves usernames for discovered processes; adjust the CLI query if additional fields are required.
