# Active Scans Report (BMC Discovery)

This notebook fetches discovery run activity from a BMC Discovery appliance using the REST API and loads the results into pandas for inspection and export.

## Requirements

We use `requests` for HTTP, `pandas` for tabular data, and `PyYAML` to read configuration. If they are not installed, uncomment the install cell below.

In [None]:
# %pip install -q requests pandas pyyaml

import pandas as pd
import requests
import yaml
from pathlib import Path
from typing import Any, Dict, Optional


In [None]:
APPLIANCE_NAME: Optional[str] = None
APPLIANCE_INDEX: int = 0


## Select Appliance (optional)

If your `config.yaml` defines multiple appliances under the `appliances:` list, set `APPLIANCE_NAME` to one of their names (recommended) or set `APPLIANCE_INDEX` to pick by position. Leave both as-is to default to the first appliance.

In [None]:
from pathlib import Path

def load_config_params(start: Path, appliance_name: Optional[str] = None, appliance_index: int = 0) -> Dict[str, Any]:
    def _find_repo_root(start_path: Path) -> Path:
        for p in [start_path] + list(start_path.parents):
            if (p / "config.yaml").exists():
                return p
        return start_path.parent

    repo_root = _find_repo_root(start)
    config_path = repo_root / "config.yaml"
    with open(config_path, "r") as fh:
        cfg = yaml.safe_load(fh) or {}

    apps = cfg.get("appliances") or []
    selected = None
    if isinstance(apps, list) and apps:
        if appliance_name:
            selected = next((a for a in apps if a.get("name") == appliance_name), None)
            if selected is None:
                raise ValueError(f"No appliance named '{appliance_name}' in config.yaml")
        else:
            try:
                selected = apps[int(appliance_index)]
            except Exception:
                selected = apps[0]

    target = ((selected or {}).get("target") or cfg.get("target") or "").strip()
    if not target:
        raise ValueError('config.yaml missing "target"')

    token = (((selected or {}).get("token") or cfg.get("token") or "").strip())
    token_file = (selected or {}).get("token_file") or cfg.get("token_file") or cfg.get("f_token")
    if not token and token_file:
        tf_path = Path(token_file)
        if not tf_path.is_absolute():
            tf_path = repo_root / tf_path
        with open(tf_path, "r") as tf:
            token = tf.read().strip()
    if not token:
        raise ValueError("API token not found in config.yaml (token or token_file)")

    api_version = str((selected or {}).get("api_version") or cfg.get("api_version") or "v1.14")
    verify_ssl = bool((selected or {}).get("verify_ssl", cfg.get("verify_ssl", True)))

    sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
    output_dir = repo_root / f"output_{sanitized}"
    output_dir.mkdir(parents=True, exist_ok=True)

    return {
        "repo_root": repo_root,
        "config_path": config_path,
        "cfg": cfg,
        "selected": selected,
        "target": target,
        "token": token,
        "api_version": api_version,
        "verify_ssl": verify_ssl,
        "output_dir": output_dir,
    }


In [None]:
def init_appliance(appliance_name: Optional[str] = "prod") -> Dict[str, Any]:
    params = load_config_params(Path.cwd(), appliance_name=appliance_name)
    target = params["target"]
    base_url = target if ("://" in target) else f"https://{target}"
    api_version = params["api_version"]
    if not api_version.startswith('v'):
        api_version = f"v{api_version}"

    session = requests.Session()
    token = params["token"]
    auth_value = token if token.lower().startswith('bearer ') else f"Bearer {token}"
    session.headers.update({
        "Authorization": auth_value,
        "Accept": "application/json",
    })
    session.verify = params["verify_ssl"]

    api_base = base_url.rstrip('/') + f"/api/{api_version}/"

    print('Base Host     :', target)
    print('API Version   :', api_version)
    print('Verify SSL    :', params["verify_ssl"])
    print('Output folder :', params["output_dir"])

    return {
        "params": params,
        "target": target,
        "session": session,
        "api_base": api_base,
        "api_version": api_version,
        "output_dir": params["output_dir"],
        "name": (params["selected"] or {}).get("name") or (appliance_name or target),
    }


In [None]:
# Locate config.yaml relative to this notebook (../config.yaml)
# Robustly locate the project root (directory containing config.yaml) without using Path.resolve()
def _find_repo_root(start: Path) -> Path:
    for p in [start] + list(start.parents):
        if (p / 'config.yaml').exists():
            return p
    # Fallback to parent of CWD (useful when running from notebooks/)
    return start.parent

repo_root = _find_repo_root(Path.cwd())
config_path = repo_root / 'config.yaml'
if not config_path.exists():
    raise FileNotFoundError(f'config.yaml not found at {config_path}')

with open(config_path, 'r') as fh:
    cfg = yaml.safe_load(fh) or {}

# Select appliance from list if present
selected = None
apps = cfg.get('appliances') or []
if isinstance(apps, list) and apps:
    if APPLIANCE_NAME:
        selected = next((a for a in apps if a.get('name') == APPLIANCE_NAME), None)
        if selected is None:
            raise ValueError(f"No appliance named '{APPLIANCE_NAME}' in config.yaml")
    else:
        try:
            selected = apps[int(APPLIANCE_INDEX)]
        except Exception:
            selected = apps[0]

# Resolve target and base URL
target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
if not target:
    raise ValueError('config.yaml missing "target"')
BASE_URL = target if ('://' in target) else f'https://{target}'

# Resolve token or token file
token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
if not token and token_file:
    tf_path = Path(token_file)
    if not tf_path.is_absolute():
        tf_path = repo_root / tf_path
    with open(tf_path, 'r') as tf:
        token = tf.read().strip()
if not token:
    raise ValueError('API token not found in config.yaml (token or token_file)')

API_VERSION = str((selected or {}).get('api_version') or cfg.get('api_version') or 'v1.14')
VERIFY_SSL = bool((selected or {}).get('verify_ssl', cfg.get('verify_ssl', True)))

# Prepare output directory consistent with CLI naming
sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
output_dir = repo_root / f'output_{sanitized}'
output_dir.mkdir(parents=True, exist_ok=True)

print('Appliance     :', (selected or {}).get('name', '(single)'))
print('Base URL      :', BASE_URL)
print('API Version   :', API_VERSION)
print('Verify SSL    :', VERIFY_SSL)
print('Output folder :', output_dir)

In [None]:
instances: list[Dict[str, Any]] = []

def _attempt_init(name: Optional[str]):
    label = name if name else 'default'
    try:
        print(f"Initialise {label.capitalize()}:")
        inst = init_appliance(name)
        instances.append(inst)
    except Exception as exc:
        print(f"Skipping {label}: {exc}")

_attempt_init('prod')
_attempt_init('dev')

if not instances:
    _attempt_init(None)

if not instances:
    raise RuntimeError('No appliances could be initialised from config.yaml')


In [None]:
def fetch_active_runs(instance: Dict[str, Any]) -> pd.DataFrame:
    url = instance['api_base'] + 'discovery/runs'
    try:
        resp = instance['session'].get(url, timeout=30)
    except Exception as exc:
        print(f"Request to {url} failed: {exc}")
        df_empty = pd.DataFrame()
        df_empty.insert(0, 'Discovery Instance', instance['target'])
        return df_empty
    if resp.status_code != 200:
        print(f"Error {resp.status_code} fetching {url}: {resp.text[:200]}")
        df_empty = pd.DataFrame()
        df_empty.insert(0, 'Discovery Instance', instance['target'])
        return df_empty
    try:
        payload = resp.json()
    except Exception as exc:
        print(f"Failed to decode JSON for {instance['target']}: {exc}")
        payload = []

    if isinstance(payload, dict) and 'results' in payload:
        records = payload['results']
    elif isinstance(payload, list):
        records = payload
    else:
        records = []

    df = pd.json_normalize(records) if records else pd.DataFrame()
    df.insert(0, 'Discovery Instance', instance['target'])
    return df


def convert_numeric_columns(frame: pd.DataFrame) -> pd.DataFrame:
    numeric_columns = ['done', 'pre_scanning', 'scanning', 'total']
    converted = frame.copy()
    for col in numeric_columns:
        if col in converted.columns:
            converted[col] = pd.to_numeric(converted[col], errors='coerce').astype('Int64')
    return converted


## Fetch discovery runs

Call the Discovery API endpoint that lists discovery runs. We normalize the JSON into a pandas DataFrame.

In [None]:
runs_by_instance: list[dict[str, Any]] = []
for inst in instances:
    df_runs = fetch_active_runs(inst)
    runs_by_instance.append({'instance': inst, 'data': df_runs})
    print(inst['target'])
    if not df_runs.empty:
        display(df_runs.head(10))
    else:
        print('No runs returned.')


## Inspect common fields

Show a few relevant columns such as labels, timing and counts when present.

In [None]:
for item in runs_by_instance:
    inst = item['instance']
    df_runs = item['data']
    if df_runs.empty:
        print(f"No runs for {inst['target']}")
        continue
    if 'finished' in df_runs.columns:
        in_progress = df_runs[df_runs['finished'] == False]
        print(f"In-progress runs for {inst['target']}: {len(in_progress)}")
        if not in_progress.empty:
            display(in_progress.head(10))
    else:
        print(f"{inst['target']} dataset has no 'finished' column.")


## Filter in-progress runs

Filter the DataFrame to show only runs that are not yet finished (when the `finished` field is present).

In [None]:
processed_runs: list[dict[str, Any]] = []
for item in runs_by_instance:
    inst = item['instance']
    df_runs = convert_numeric_columns(item['data'])
    other_cols = [c for c in df_runs.columns if c != 'Discovery Instance']
    if other_cols:
        df_runs = df_runs[['Discovery Instance'] + other_cols]
    processed_runs.append({'instance': inst, 'data': df_runs})

if processed_runs:
    combined_df = pd.concat([item['data'] for item in processed_runs], ignore_index=True)
else:
    combined_df = pd.DataFrame(columns=['Discovery Instance'])

display(combined_df.head(10))


## Save to CSV (optional)

Persist the full dataset to the project output directory (`output_<target>`).

This cell formats the output to match the DisMAL CLI report for Active Scans by:
- Inserting a 'Discovery Instance' column as the first column.
- Casting numeric fields (done, pre_scanning, scanning, total) to integers when present.
- Sorting remaining columns alphabetically to mirror json2csv header ordering.

In [None]:
for item in processed_runs:
    inst = item['instance']
    df_runs = item['data']
    output_csv = inst['output_dir'] / 'active_scans.csv'
    df_runs.to_csv(output_csv, index=False)
    print(f'Saved to {output_csv}')


---
### Notes
- If your appliance uses a self-signed certificate, set `VERIFY_SSL = False`.
- If the appliance exposes a different API version, update `API_VERSION`.
- You can further transform the dataset with `pandas.json_normalize` or additional joins if needed.