# Discovery Run Analysis (BMC Discovery)

This notebook reproduces the DisMAL `discovery_run_analysis` report using the raw CSV exports generated by DisMAL.
It loads data from `raw_exports/<appliance>/discovery_run_analysis.csv` for preview and optional re-export to the standard output folders.


In [None]:
# TODO: Fix report headers

## Requirements

Uncomment the next cell to install dependencies in your environment if needed.

In [None]:
# %pip install -q pandas pyyaml

import re
from pathlib import Path

import pandas as pd
import yaml


## Configuration (from config.yaml)

Loads target details, resolves the output folder, and points to the matching raw export directory for each appliance.


In [None]:
from pathlib import Path
import re
import yaml

def load_config_params(
    start: Path,
    appliance_name: str = None,
    appliance_index: int = 0,
) -> dict:
    def _find_repo_root(start: Path) -> Path:
        for p in [start] + list(start.parents):
            if (p / 'config.yaml').exists():
                return p
        return start.parent

    def _slugify(value: str) -> str:
        return re.sub(r'[^A-Za-z0-9]+', '_', value).strip('_').lower() or 'default'

    repo_root = _find_repo_root(start)
    config_path = repo_root / 'config.yaml'

    with open(config_path, 'r') as fh:
        cfg = yaml.safe_load(fh) or {}

    apps = cfg.get('appliances') or []
    selected = None
    if isinstance(apps, list) and apps:
        if appliance_name:
            selected = next((a for a in apps if a.get('name') == appliance_name), None)
            if selected is None:
                raise ValueError(f"No appliance named '{appliance_name}' in config.yaml")
        else:
            try:
                selected = apps[int(appliance_index)]
            except Exception:
                selected = apps[0]

    target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
    if not target:
        raise ValueError('config.yaml missing "target"')

    token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
    token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
    if not token and token_file:
        tf_path = Path(token_file)
        if not tf_path.is_absolute():
            tf_path = repo_root / tf_path
        with open(tf_path, 'r') as tf:
            token = tf.read().strip()
    if not token:
        token = None

    api_version = str((selected or {}).get('api_version') or cfg.get('api_version') or 'v1.14')
    verify_ssl = bool((selected or {}).get('verify_ssl', cfg.get('verify_ssl', True)))

    sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
    output_dir = repo_root / f'output_{sanitized}'
    output_dir.mkdir(parents=True, exist_ok=True)

    export_name = ((selected or {}).get('name') or appliance_name or sanitized)
    raw_export_dir = repo_root / 'raw_exports' / _slugify(export_name)

    return {
        "repo_root": repo_root,
        "config_path": config_path,
        "cfg": cfg,
        "selected": selected,
        "target": target,
        "token": token,
        "api_version": api_version,
        "verify_ssl": verify_ssl,
        "output_dir": output_dir,
        "raw_export_dir": raw_export_dir,
    }


In [None]:
def init_appliance(appliance_name: str = "prod"):
    params = load_config_params(Path.cwd(), appliance_name=appliance_name)

    target = params["target"]
    api_version = params["api_version"]
    verify_ssl = params["verify_ssl"]
    output_dir = params["output_dir"]
    raw_export_dir = params["raw_export_dir"]

    print('Appliance Name :', appliance_name)
    print('Target         :', target)
    print('API Version    :', api_version)
    print('Verify SSL     :', verify_ssl)
    print('Raw CSV folder :', raw_export_dir)
    print('Output folder  :', output_dir)

    return {
        "params": params,
        "target": target,
        "api_version": api_version,
        "verify_ssl": verify_ssl,
        "output_dir": output_dir,
        "raw_export_dir": raw_export_dir,
        "appliance_name": appliance_name,
    }


# Initialise Instances

In [None]:
print("Initialise Prod:")
twprod = init_appliance("prod")

print("Initialise Dev:")
twdev = init_appliance("dev")

In [None]:
BASE_EXPORT_COLUMNS = ['Appliance Target', 'Appliance Name', 'Query Title']
COLUMN_RENAMES = {
    'DiscoveryRun.valid_ranges': 'Explicit Ranges',
    'DiscoveryRun.label': 'Scan Label',
    'DiscoveryRun.endtime': 'End Time',
    'DiscoveryRun.range_summary': 'Range Summary',
    'DiscoveryRun.outpost_name': 'Outpost Name',
    'ScanRange.label': 'Range Label',
    'ScanRange.scan_kind': 'Scan Kind',
    'ScanRange.range_strings_or_provider': 'Range',
    'ScanRange.schedule': 'Schedule',
    'DiscoveryRun.total': 'Total Endpoints',
    'DiscoveryRun.ActiveEndpoints': 'Active Endpoints',
    'DiscoveryRun.dropped': 'Dropped',
    'DiscoveryAccess.scan_kinds': 'Scan Kinds',
}
NUMERIC_COLUMNS = ['Total Endpoints', 'Active Endpoints', 'Dropped']

_SLUG_PATTERN = re.compile(r'[^A-Za-z0-9]+')

def slugify_title(value: str) -> str:
    slug = _SLUG_PATTERN.sub('_', value or '').strip('_').lower()
    return slug or 'unnamed'

def load_discovery_run_analysis(instance, query_title: str = 'Discovery Run Analysis'):
    csv_path = instance['raw_export_dir'] / f"{slugify_title(query_title)}.csv"
    if not csv_path.exists():
        raise FileNotFoundError(f'Missing export for {query_title}: {csv_path}')

    df = pd.read_csv(csv_path)
    df = df.drop(columns=[c for c in BASE_EXPORT_COLUMNS if c in df.columns], errors='ignore')

    rename_map = {}
    cols_to_drop = []

    for source_col, display_col in COLUMN_RENAMES.items():
        if source_col in df.columns:
            if display_col in df.columns:
                df[display_col] = df[display_col].fillna(df[source_col])
                cols_to_drop.append(source_col)
            else:
                rename_map[source_col] = display_col

    if cols_to_drop:
        df = df.drop(columns=cols_to_drop)

    if rename_map:
        df = df.rename(columns=rename_map)

    for display_col in COLUMN_RENAMES.values():
        if display_col not in df.columns:
            df[display_col] = pd.NA

    for col in NUMERIC_COLUMNS:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    ordered = ['Discovery Instance'] + list(COLUMN_RENAMES.values())
    remaining = [c for c in df.columns if c not in ordered]

    df = df[[c for c in ordered if c in df.columns] + remaining]
    df.insert(0, 'Discovery Instance', instance['target'])
    return df


## Run and Preview

Fetch rows, insert the Discovery Instance column, and preview.

In [None]:
dra_prod = load_discovery_run_analysis(twprod)
print(twprod['target'])
display(dra_prod.head(5))

dra_dev = load_discovery_run_analysis(twdev)
print(twdev['target'])
display(dra_dev.head(5))


## Save to CSV

Writes `discovery_run_analysis.csv` to the standard output folder.

In [None]:
from pathlib import Path
import pandas as pd

def save(df: pd.DataFrame, output_dir: Path, filename: str):
    """
    Save a discovery run DataFrame to CSV in the specified output directory.
    """
    output_csv = str(output_dir / f"{filename}.csv")
    df.to_csv(output_csv, index=False)
    print(f"Saved to {output_csv}")


# Usage (pass both the DataFrame and the correct output directory)
save(dra_prod, twprod['output_dir'], "discovery_run_analysis")
save(dra_dev, twdev['output_dir'], "discovery_run_analysis")