# Discovery Run Analysis (BMC Discovery)

This notebook reproduces the DisMAL `discovery_run_analysis` report using the Tideway library only.
It summarizes discovery runs with ranges, schedules, outpost names, totals, active and dropped endpoints, and scan kinds.

## Requirements

Uncomment the next cell to install dependencies in your environment if needed.

In [None]:
# %pip install -q tideway pandas pyyaml

import os, sys
from pathlib import Path
from typing import Any, List, Dict
import pandas as pd
import yaml


## Select Appliance (optional)

If `config.yaml` has multiple appliances, set by name or index. Defaults to the first.

In [None]:
APPLIANCE_NAME = None   # e.g., 'prod' or 'dev'
APPLIANCE_INDEX = 0     # integer index if not using name selection


## Configuration (from config.yaml)

Finds `config.yaml`, loads target and token, and prepares the output folder.

In [None]:
def _find_repo_root(start: Path) -> Path:
    for p in [start] + list(start.parents):
        if (p / 'config.yaml').exists():
            return p
    return start.parent

repo_root = _find_repo_root(Path.cwd())
config_path = repo_root / 'config.yaml'
with open(config_path, 'r') as fh:
    cfg = yaml.safe_load(fh) or {}

apps = cfg.get('appliances') or []
selected = None
if isinstance(apps, list) and apps:
    if APPLIANCE_NAME:
        selected = next((a for a in apps if a.get('name') == APPLIANCE_NAME), None)
        if selected is None:
            raise ValueError(f"No appliance named '{APPLIANCE_NAME}' in config.yaml")
    else:
        try:
            selected = apps[int(APPLIANCE_INDEX)]
        except Exception:
            selected = apps[0]

target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
if not target:
    raise ValueError('config.yaml missing "target"')

token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
if not token and token_file:
    tf_path = Path(token_file)
    if not tf_path.is_absolute():
        tf_path = repo_root / tf_path
    with open(tf_path, 'r') as tf:
        token = tf.read().strip()
if not token:
    raise ValueError('API token not found in config.yaml (token or token_file)')

API_VERSION = str((selected or {}).get('api_version') or cfg.get('api_version') or 'v1.14')
VERIFY_SSL = bool((selected or {}).get('verify_ssl', cfg.get('verify_ssl', True)))

sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
output_dir = repo_root / f'output_{sanitized}'
output_dir.mkdir(parents=True, exist_ok=True)

print('Base Host     :', target)
print('API Version   :', API_VERSION)
print('Verify SSL    :', VERIFY_SSL)
print('Output folder :', output_dir)

# Prefer local Tideway package if present
local_tideway = repo_root / 'Tideway'
if local_tideway.exists():
    sys.path.insert(0, str(local_tideway))
import importlib
tideway = importlib.import_module('tideway')
API_VERSION_NUM = API_VERSION.lstrip('v')
app = tideway.appliance(target, token, api_version=API_VERSION_NUM, ssl_verify=VERIFY_SSL)
twsearch = app.data()
try:
    about = app.api_about
    print('Appliance reachable:', about.status_code)
except Exception as e:
    print('Warning: failed to contact appliance /api/about:', e)


## Helpers

Normalize Data API results and run object-format searches in bulk.

In [None]:
def list_table_to_json(table_like: List[List[Any]]):
    if not table_like or not isinstance(table_like, list):
        return []
    if not table_like or not isinstance(table_like[0], list):
        return []
    headers = table_like[0]
    rows = table_like[1:]
    out = []
    for r in rows:
        try:
            out.append(dict(zip(headers, r)))
        except Exception:
            continue
    return out

def to_rows(payload: Any):
    if isinstance(payload, list):
        if payload and isinstance(payload[0], list):
            return list_table_to_json(payload)
        if payload and isinstance(payload[0], dict):
            return payload
        return []
    if hasattr(payload, 'json'):
        try:
            js = payload.json()
        except Exception:
            return []
        if isinstance(js, list) and js and isinstance(js[0], list):
            return list_table_to_json(js)
        if isinstance(js, list) and js and isinstance(js[0], dict):
            return js
        if isinstance(js, dict) and 'headings' in js and 'results' in js:
            return list_table_to_json([js['headings']] + list(js.get('results') or []))
        return []
    if isinstance(payload, dict) and 'headings' in payload and 'results' in payload:
        return list_table_to_json([payload['headings']] + list(payload.get('results') or []))
    return []

def tw_search_all(search, query: str, limit: int = 500):
    resp = search.search({'query': query}, format='object', limit=limit)
    return to_rows(resp)


## Query

TWQL used by the DisMAL report to summarize run ranges and outcomes.

In [None]:
qry_dra = '''
search DiscoveryRun as DiscoveryRun
  with (traverse :::ScanRange as ScanRange),
       (traverse :::DroppedEndpoints as DroppedEndpoints),
       (traverse :::DiscoveryAccess as DiscoveryAccess)
  show valid_ranges as 'Explicit Ranges', label as 'Scan Label',
       range_summary as 'Range Summary', outpost_name as 'Outpost Name',
       #ScanRange.label as 'Label', #ScanRange.scan_kind as 'Scan Kind',
       (#ScanRange.range_strings or #ScanRange.provider) as 'Range',
       recurrenceDescription(#ScanRange.schedule) as 'Schedule',
       total as 'Total Endpoints',
       (result_success or 0) + (result_skipped or 0) + (result_error or 0) +
       (result_no_access or 0) + (result_no_response or 0) as 'Active Endpoints',
       (result_dropped or 0) as 'Dropped',
       unique(#DiscoveryAccess.scan_kind) as 'Scan Kinds'
  processwith show valid_ranges, label, endtime as 'End Time',
       range_summary, outpost_name, @4, @5, @6, @7, total, @9, @10,
       @11 as 'Scan Kinds'
'''


## Run and Preview

Fetch rows, insert the Discovery Instance column, and preview.

In [None]:
rows = tw_search_all(twsearch, qry_dra)
print('Rows:', len(rows))
df = pd.DataFrame(rows) if rows else pd.DataFrame()
if not df.empty:
    df.insert(0, 'Discovery Instance', target)
df.head(10)


## Save to CSV

Writes `discovery_run_analysis.csv` to the standard output folder.

In [None]:
OUTPUT_CSV = str(output_dir / 'discovery_run_analysis.csv')
df.to_csv(OUTPUT_CSV, index=False)
print(f'Saved to {OUTPUT_CSV}')
