# Active Scans Report (BMC Discovery)

This notebook fetches discovery run activity from a BMC Discovery appliance using the REST API and loads the results into pandas for inspection and export.

In [None]:
# TODO: Fix report headers

## Requirements

We use `requests` for HTTP, `pandas` for tabular data, and `PyYAML` to read configuration. If they are not installed, uncomment the install cell below.

In [None]:
# %pip install -q tideway pandas pyyaml

import pandas as pd
import yaml
from pathlib import Path
from typing import Any, Dict, Optional
import xml.etree.ElementTree as ET
import tideway


In [None]:
APPLIANCE_NAME: Optional[str] = None
APPLIANCE_INDEX: int = 0


## Select Appliance (optional)

If your `config.yaml` defines multiple appliances under the `appliances:` list, set `APPLIANCE_NAME` to one of their names (recommended) or set `APPLIANCE_INDEX` to pick by position. Leave both as-is to default to the first appliance.

In [None]:
from pathlib import Path

def load_config_params(start: Path, appliance_name: Optional[str] = None, appliance_index: int = 0) -> Dict[str, Any]:
    def _find_repo_root(start_path: Path) -> Path:
        for p in [start_path] + list(start_path.parents):
            if (p / "config.yaml").exists():
                return p
        return start_path.parent

    repo_root = _find_repo_root(start)
    config_path = repo_root / "config.yaml"
    with open(config_path, "r") as fh:
        cfg = yaml.safe_load(fh) or {}

    apps = cfg.get("appliances") or []
    selected = None
    if isinstance(apps, list) and apps:
        if appliance_name:
            selected = next((a for a in apps if a.get("name") == appliance_name), None)
            if selected is None:
                raise ValueError(f"No appliance named '{appliance_name}' in config.yaml")
        else:
            try:
                selected = apps[int(appliance_index)]
            except Exception:
                selected = apps[0]

    target = ((selected or {}).get("target") or cfg.get("target") or "").strip()
    if not target:
        raise ValueError('config.yaml missing "target"')

    token = (((selected or {}).get("token") or cfg.get("token") or "").strip())
    token_file = (selected or {}).get("token_file") or cfg.get("token_file") or cfg.get("f_token")
    if not token and token_file:
        tf_path = Path(token_file)
        if not tf_path.is_absolute():
            tf_path = repo_root / tf_path
        with open(tf_path, "r") as tf:
            token = tf.read().strip()
    if not token:
        raise ValueError("API token not found in config.yaml (token or token_file)")

    api_version = str((selected or {}).get("api_version") or cfg.get("api_version") or "v1.14")
    verify_ssl = bool((selected or {}).get("verify_ssl", cfg.get("verify_ssl", True)))

    sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
    output_dir = repo_root / f"output_{sanitized}"
    output_dir.mkdir(parents=True, exist_ok=True)

    return {
        "repo_root": repo_root,
        "config_path": config_path,
        "cfg": cfg,
        "selected": selected,
        "target": target,
        "token": token,
        "api_version": api_version,
        "verify_ssl": verify_ssl,
        "output_dir": output_dir,
    }


In [None]:
def init_appliance(appliance_name: Optional[str] = "prod") -> Dict[str, Any]:
    params = load_config_params(Path.cwd(), appliance_name=appliance_name)
    target = params["target"]
    api_number = params["api_version"].lstrip('v')

    print('Base Host     :', target)
    print('API Version   :', api_number)
    print('Verify SSL    :', params["verify_ssl"])
    print('Output folder :', params["output_dir"])

    app = tideway.appliance(target, params["token"], api_version=api_number, ssl_verify=params["verify_ssl"])
    twsearch = app.data()

    try:
        about = app.api_about
        print('Appliance reachable:', getattr(about, 'status_code', 'ok'))
    except Exception as exc:
        print('Warning: failed to contact appliance /api/about:', exc)

    return {
        "params": params,
        "target": target,
        "app": app,
        "search": twsearch,
        "api_version": api_number,
        "output_dir": params["output_dir"],
        "name": (params["selected"] or {}).get("name") or (appliance_name or target),
    }


In [None]:
def load_query(title: str) -> str:
    xml_path = Path('../queries/dismal_queries.xml')
    tree = ET.parse(xml_path)
    root = tree.getroot()
    for query in root.findall('query'):
        if query.get('title') == title:
            search = query.find('search')
            if search is not None and search.text:
                return search.text
    raise ValueError(f"Query '{title}' not found in dismal_queries.xml")

qry_active_runs = load_query('Discovery Run Analysis')


In [None]:
instances: list[Dict[str, Any]] = []

def _attempt_init(name: Optional[str]):
    label = name if name else 'default'
    try:
        print(f"Initialise {label.capitalize()}:")
        inst = init_appliance(name)
        instances.append(inst)
    except Exception as exc:
        print(f"Skipping {label}: {exc}")

_attempt_init('prod')
_attempt_init('dev')

if not instances:
    _attempt_init(None)

if not instances:
    raise RuntimeError('No appliances could be initialised from config.yaml')


In [None]:
def fetch_active_runs(instance: Dict[str, Any]) -> pd.DataFrame:
    search = instance['search']
    try:
        results = search.search({'query': qry_active_runs}, format='object', limit=200)
    except Exception as exc:
        print(f"TW search failed for {instance['target']}: {exc}")
        results = []

    df = pd.DataFrame(results) if results else pd.DataFrame()
    df.insert(0, 'Discovery Instance', instance['target'])

    column_map = {
        'Scan Label': 'Label',
        'End Time': 'End Time',
        'Explicit Ranges': 'Explicit Ranges',
        'Outpost Name': 'Outpost',
        'Scan Level': 'Scan Level',
        'Scan Type': 'Scan Type',
        'Range': 'Range Summary',
        'Total Endpoints': 'Total Endpoints',
        'Active Endpoints': 'Active Endpoints',
        'Dropped': 'Dropped',
        'Scan Kinds': 'Scan Kinds',
    }
    df = df.rename(columns=column_map)

    return df


def convert_numeric_columns(frame: pd.DataFrame) -> pd.DataFrame:
    numeric_columns = ['Total Endpoints', 'Active Endpoints', 'Dropped']
    converted = frame.copy()
    for col in numeric_columns:
        if col in converted.columns:
            converted[col] = pd.to_numeric(converted[col], errors='coerce').astype('Int64')
    return converted


## Fetch discovery runs

Call the Discovery API endpoint that lists discovery runs. We normalize the JSON into a pandas DataFrame.

In [None]:
runs_by_instance: list[dict[str, Any]] = []
for inst in instances:
    df_runs = fetch_active_runs(inst)
    runs_by_instance.append({'instance': inst, 'data': df_runs})
    print(inst['target'])
    if not df_runs.empty:
        display(df_runs.head(10))
    else:
        print('No runs returned.')


## Inspect common fields

Show a few relevant columns such as labels, timing and counts when present.

In [None]:
processed_runs: list[dict[str, Any]] = []
for item in runs_by_instance:
    inst = item['instance']
    df_runs = convert_numeric_columns(item['data'])
    other_cols = [c for c in df_runs.columns if c != 'Discovery Instance']
    if other_cols:
        df_runs = df_runs[['Discovery Instance'] + other_cols]
    processed_runs.append({'instance': inst, 'data': df_runs})

if processed_runs:
    combined_df = pd.concat([item['data'] for item in processed_runs], ignore_index=True)
else:
    combined_df = pd.DataFrame(columns=['Discovery Instance'])

display(combined_df.head(10))


## Save to CSV (optional)

Persist the full dataset to the project output directory (`output_<target>`).

This cell formats the output to match the DisMAL CLI report for Active Scans by:
- Inserting a 'Discovery Instance' column as the first column.
- Casting numeric fields (done, pre_scanning, scanning, total) to integers when present.
- Sorting remaining columns alphabetically to mirror json2csv header ordering.

In [None]:
for item in processed_runs:
    inst = item['instance']
    df_runs = item['data']
    output_csv = inst['output_dir'] / 'active_scans.csv'
    df_runs.to_csv(output_csv, index=False)
    print(f'Saved to {output_csv}')


---
### Notes
- If your appliance uses a self-signed certificate, set `VERIFY_SSL = False`.
- If the appliance exposes a different API version, update `API_VERSION`.
- You can further transform the dataset with `pandas.json_normalize` or additional joins if needed.