# Capture Candidates Report (BMC Discovery)

This notebook reproduces the DisMAL `capture_candidates` report using the raw CSV exports generated by DisMAL.
It loads data from `raw_exports/<appliance>/capture_candidates.csv` for preview and optional re-export to the standard output folders.

In [None]:
# TODO: Fix report headers

## Requirements

Uncomment the next cell to install dependencies in your environment if needed.

In [None]:
# %pip install -q pandas pyyaml

from pathlib import Path
import pandas as pd
import yaml


## Configuration (from config.yaml)

Loads target, token/token_file, API version, and SSL flag; prepares output folder.

In [None]:
from pathlib import Path
import yaml

def load_config_params(
    start: Path,
    appliance_name: str = None,
    appliance_index: int = 0,
) -> dict:
    def _find_repo_root(start: Path) -> Path:
        for p in [start] + list(start.parents):
            if (p / 'config.yaml').exists():
                return p
        return start.parent

    repo_root = _find_repo_root(start)
    config_path = repo_root / 'config.yaml'

    with open(config_path, 'r') as fh:
        cfg = yaml.safe_load(fh) or {}

    apps = cfg.get('appliances') or []
    selected = None
    if isinstance(apps, list) and apps:
        if appliance_name:
            selected = next((a for a in apps if a.get('name') == appliance_name), None)
            if selected is None:
                raise ValueError(f"No appliance named '{appliance_name}' in config.yaml")
        else:
            try:
                selected = apps[int(appliance_index)]
            except Exception:
                selected = apps[0]

    target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
    if not target:
        raise ValueError('config.yaml missing "target"')

    token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
    token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
    if not token and token_file:
        tf_path = Path(token_file)
        if not tf_path.is_absolute():
            tf_path = repo_root / tf_path
        with open(tf_path, 'r') as tf:
            token = tf.read().strip()
    if not token:
        token = None

    api_version = str((selected or {}).get('api_version') or cfg.get('api_version') or 'v1.14')
    verify_ssl = bool((selected or {}).get('verify_ssl', cfg.get('verify_ssl', True)))

    sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
    output_dir = repo_root / f'output_{sanitized}'
    output_dir.mkdir(parents=True, exist_ok=True)

    return {
        "repo_root": repo_root,
        "config_path": config_path,
        "cfg": cfg,
        "selected": selected,
        "target": target,
        "token": token,
        "api_version": api_version,
        "verify_ssl": verify_ssl,
        "output_dir": output_dir,
    }


In [None]:
def init_appliance(appliance_name: str = "prod"):
    params = load_config_params(Path.cwd(), appliance_name=appliance_name)

    target = params["target"]
    output_dir = params["output_dir"]
    repo_root = params["repo_root"]
    csv_path = repo_root / 'raw_exports' / appliance_name / 'capture_candidates.csv'

    print('Appliance Name :', appliance_name)
    print('Target         :', target)
    print('CSV Source     :', csv_path)
    print('Output folder  :', output_dir)

    if not csv_path.exists():
        raise FileNotFoundError(f'Expected CSV not found at {csv_path}')

    return {
        "params": params,
        "target": target,
        "appliance_name": appliance_name,
        "csv_path": csv_path,
        "output_dir": output_dir,
    }


# Initialise Instances

In [None]:
print("Initialise Prod:")
twprod = init_appliance("prod")

print("Initialise Dev:")
twdev = init_appliance("dev")

## Run and Preview

Load the raw export CSVs and preview the first few entries.

In [None]:
COLUMN_ORDER = [
    'Discovery Instance',
    'Access Method',
    'Request Time',
    'Hostname',
    'OS',
    'Failure Reason',
    'Syscontact',
    'Syslocation',
    'Sysdescr',
    'Sysobject ID',
]


In [None]:
# Load CSV and Extract Results
def get_results(instance):
    csv_path = instance['csv_path']
    df = pd.read_csv(csv_path)

    if df.empty:
        df = pd.DataFrame(columns=COLUMN_ORDER)
    else:
        if 'Discovery Instance' not in df.columns:
            df.insert(0, 'Discovery Instance', instance['target'])
        else:
            df['Discovery Instance'] = df['Discovery Instance'].fillna(instance['target'])

        front = [c for c in COLUMN_ORDER if c in df.columns]
        remainder = [c for c in df.columns if c not in front]
        df = df[front + remainder]

    return df

df_prod = get_results(twprod)
print(twprod['target'])
display(df_prod.head(5))

df_dev = get_results(twdev)
print(twdev['target'])
display(df_dev.head(5))


## Save to CSV

Writes `capture_candidates.csv` to the standard output folder, mirroring the raw export.

In [None]:
output_prod = str(twprod['output_dir'] / 'capture_candidates.csv')
df_prod.to_csv(output_prod, index=False)
output_dev = str(twdev['output_dir'] / 'capture_candidates.csv')
df_dev.to_csv(output_dev, index=False)

print(f'Saved to {output_prod}')
print(f'Saved to {output_dev}')
