# Credential Success Report (BMC Discovery)

This notebook reproduces the DisMAL `credential_success` report using the REST API.
It reads connection details from `config.yaml`, executes Discovery queries,
and assembles a CSV matching the CLI’s headers and formatting.

> **NOTE:** This can take a little while to run if you have lots of DiscoveryAccesses

## Requirements

We use `requests` for HTTP, `pandas` for tabular data, and `PyYAML` to read configuration.
Uncomment the following to install them in your environment.

In [None]:
# %pip install -q requests pandas pyyaml

import pandas as pd
import requests
import yaml
from pathlib import Path
from urllib.parse import urljoin
import ipaddress
import json, os
import math

## Select Appliance (optional)

If your `config.yaml` defines multiple appliances under the `appliances:` list,
set `APPLIANCE_NAME` to one of their names (e.g., 'prod' or 'dev') or use the index.
Defaults to the first appliance if neither is set.

In [None]:
APPLIANCE_NAME = None   # e.g., 'prod' or 'dev'
APPLIANCE_INDEX = 0     # integer index if not using name selection

## Configuration (from config.yaml)

Reads settings from `../config.yaml` including target, token/token_file,
API version, and SSL verification preference.
Saves the CSV to `../output_<target>/credential_success.csv`.

In [None]:
# Locate config.yaml relative to this notebook (../config.yaml)
def _find_repo_root(start: Path) -> Path:
    for p in [start] + list(start.parents):
        if (p / 'config.yaml').exists():
            return p
    return start.parent

repo_root = _find_repo_root(Path.cwd())
config_path = repo_root / 'config.yaml'
with open(config_path, 'r') as fh:
    cfg = yaml.safe_load(fh) or {}

# Appliance selection
apps = cfg.get('appliances') or []
selected = None
if isinstance(apps, list) and apps:
    if APPLIANCE_NAME:
        selected = next((a for a in apps if a.get('name') == APPLIANCE_NAME), None)
        if selected is None:
            raise ValueError(f"No appliance named '{APPLIANCE_NAME}' in config.yaml")
    else:
        try:
            selected = apps[int(APPLIANCE_INDEX)]
        except Exception:
            selected = apps[0]

target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
if not target:
    raise ValueError('config.yaml missing "target"')
BASE_URL = target if ('://' in target) else f'https://{target}'

token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
if not token and token_file:
    tf_path = Path(token_file)
    if not tf_path.is_absolute():
        tf_path = repo_root / tf_path
    with open(tf_path, 'r') as tf:
        token = tf.read().strip()
if not token:
    raise ValueError('API token not found in config.yaml (token or token_file)')

API_VERSION = str((selected or {}).get('api_version') or cfg.get('api_version') or 'v1.14')
VERIFY_SSL = bool((selected or {}).get('verify_ssl', cfg.get('verify_ssl', True)))

sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
output_dir = repo_root / f'output_{sanitized}'
output_dir.mkdir(parents=True, exist_ok=True)

print('Appliance     :', (selected or {}).get('name', '(single)'))
print('Base URL      :', BASE_URL)
print('API Version   :', API_VERSION)
print('Verify SSL    :', VERIFY_SSL)
print('Output folder :', output_dir)
print('Token:', token)

## Session and API helpers

We create an authenticated session and small helpers to call endpoints:
- `api_url(path)` builds full URLs
- `get_json(url)` fetches JSON via GET
- `post_search(query)` executes TWQL via the Data Search API
- `normalize_results(payload)` returns a list of dicts from varied formats

In [None]:
session = requests.Session()
auth_value = token if token.lower().startswith('bearer ') else f'Bearer {token}'
session.headers.update({'Authorization': auth_value, 'Accept': 'application/json'})
session.verify = VERIFY_SSL

def api_url(path: str) -> str:
    base = BASE_URL.rstrip('/') + f'/api/{API_VERSION}/'
    return urljoin(base, path.lstrip('/'))

def get_json(url: str, **kwargs):
    r = session.get(url, **kwargs)
    if r.status_code != 200:
        print(f'Error {r.status_code} GET {url}: {r.text[:200]}')
        return {}
    try:
        return r.json()
    except Exception as e:
        print('Failed to decode JSON:', e)
        return {}

def normalize_results(raw):
    # Normalize a variety of Discovery API search response shapes into
    # a flat list of dictionaries.
    rows = []
    if isinstance(raw, dict):
        res = raw.get('results')
        # Newer servers may return a nested table object
        if isinstance(res, dict):
            headers = res.get('headers') or res.get('columns')
            rws = res.get('rows') or res.get('data')
            if isinstance(headers, list) and isinstance(rws, list):
                return [dict(zip(headers, r)) for r in rws]
        # Common case: results is a list (either list of dicts, or table)
        if isinstance(res, list):
            rows = res
        elif isinstance(raw, list):
            rows = raw
        else:
            # Sometimes the response is already a list under another key
            # fall through with empty rows.
            rows = []
    elif isinstance(raw, list):
        rows = raw
    else:
        rows = []
    # Convert table format (list of lists) into list of dicts
    if rows and isinstance(rows[0], list):
        headers = rows[0]
        return [dict(zip(headers, r)) for r in rows[1:]]
    return rows

def post_search(query: str, *, limit: int | None = None, page_size: int = 500):
    """Execute TWQL with simple pagination.
    When limit is 0, fetch all rows by paging; when None, defer to server default.
    Returns a list of dict rows.
    """
    url = api_url('data/search')
    rows_all = []
    offset = 0
    fetch_all = (limit == 0)
    while True:
        payload = {'query': query, 'format': 'object'}
        if fetch_all:
            payload['limit'] = page_size
            if offset:
                payload['offset'] = offset
        elif limit is not None:
            payload['limit'] = limit
        r = session.post(url, json=payload)
        if r.status_code >= 400:
            print(f'Error {r.status_code} POST {url}: {r.text[:200]}')
            try:
                data = r.json()
            except Exception:
                data = []
            return normalize_results(data)
        try:
            data = r.json()
        except Exception:
            data = []
        rows = normalize_results(data)
        if not fetch_all:
            return rows
        rows_all.extend(rows)
        if not rows or len(rows) < page_size:
            break
        offset += page_size
    return rows_all

## Queries used by the report

These TWQL queries mirror DisMAL’s credential success workflow.

In [None]:
qry_credential_success = '''
search SessionResult where success
show (credential or slave) as 'SessionResult.credential_or_slave',
     (credential or slave) as 'uuid',
     session_type as 'SessionResult.session_type',
     outpost as 'SessionResult.outpost'
processwith countUnique(1,0)
'''
qry_credential_failure = '''
search SessionResult where not success
show (credential or slave) as 'SessionResult.credential_or_slave',
     (credential or slave) as 'uuid',
     session_type as 'SessionResult.session_type',
     outpost as 'SessionResult.outpost'
processwith countUnique(1,0)
'''
qry_deviceinfo_success = '''
search DeviceInfo where method_success
  and nodecount(traverse DiscoveryResult:DiscoveryAccessResult:DiscoveryAccess:DiscoveryAccess
                traverse DiscoveryAccess:Metadata:Detail:SessionResult) = 0
show (last_credential or last_slave) as 'DeviceInfo.last_credential',
     (last_credential or last_slave) as 'uuid',
     access_method as 'DeviceInfo.access_method'
process with countUnique(1,0)
'''
qry_credential_success_7d = '''
search SessionResult where success and time_index > (currentTime() - 7*24*3600*10000000)
show (credential or slave) as 'SessionResult.credential_or_slave',
     (credential or slave) as 'uuid',
     session_type as 'SessionResult.session_type',
     outpost as 'SessionResult.outpost'
processwith countUnique(1,0)
'''
qry_credential_failure_7d = '''
search SessionResult where not success and time_index > (currentTime() - 7*24*3600*10000000)
show (credential or slave) as 'SessionResult.credential_or_slave',
     (credential or slave) as 'uuid',
     session_type as 'SessionResult.session_type',
     outpost as 'SessionResult.outpost'
processwith countUnique(1,0)
'''
qry_deviceinfo_success_7d = '''
search DeviceInfo where method_success
  and nodecount(traverse DiscoveryResult:DiscoveryAccessResult:DiscoveryAccess:DiscoveryAccess
                traverse DiscoveryAccess:Metadata:Detail:SessionResult) = 0
  and time_index > (currentTime() - 7*24*3600*10000000)
show (last_credential or last_slave) as 'DeviceInfo.last_credential',
     (last_credential or last_slave) as 'uuid',
     access_method as 'DeviceInfo.access_method'
process with countUnique(1,0)
'''
qry_scanrange = '''
search ScanRange where scan_type = 'Scheduled'
show range_id as 'ID', label as 'Label', (range_strings or provider) as 'Scan_Range',
     scan_level as 'Level', recurrenceDescription(schedule) as 'Date_Rules'
'''
qry_excludes = '''
search in '_System' ExcludeRange
show exrange_id as 'ID', name as 'Label', range_strings as 'Scan_Range',
     recurrenceDescription(schedule) as 'Date_Rules'
'''

## Helper functions (formatting and scan membership)

- `session_get(rows)` aggregates counts by credential UUID and stores an access method hint
- `parse_ranges(range_str)` parses comma-separated CIDRs into ipaddress networks
- `labels_covering_ranges(entries, cred_ranges)` returns scan labels that include any credential range

In [None]:
def search_to_df(results):
    """
    Convert a post_search() response into a DataFrame with proper headings.
    Falls back to a simple DataFrame(results) if structure is unexpected.
    """
    if not results or not isinstance(results, list):
        return pd.DataFrame()
    first = results[0]
    if isinstance(first, dict) and "results" in first and "headings" in first:
        return pd.DataFrame(first["results"], columns=first["headings"])
    # Fallback (rare)
    return pd.DataFrame(results)

def preview_search(results, n=10):
    """
    Display the first n rows of the first result block as a DataFrame.
    """
    df = search_to_df(results)
    display(df.head(n))
    return df

def build_map_from_search(results, dedupe=True):
    """
    Build the {uuid: [restype, count]} map from a post_search() response.
    Uses your existing session_get().
    """
    df = search_to_df(results)
    if df.empty:
        return {}
    if dedupe:
        df = df.drop_duplicates()
    return session_get(df)

def show_map_sample(mapping, k=5):
    """
    Quick peek at the mapping: length + first k items.
    """
    print(len(mapping))
    print(dict(list(mapping.items())[:k]))

def session_get(results):
    # Accept either a DataFrame or a list of dicts
    if isinstance(results, pd.DataFrame):
        # Replace NaN with None to make .get logic work
        rows = results.replace({pd.NA: None}).where(pd.notna(results), None).to_dict(orient='records')
    else:
        rows = results

    mapping = {}
    for r in rows:
        if not isinstance(r, dict):
            continue

        uuid = (
            r.get('SessionResult.credential_or_slave')
            or r.get('DeviceInfo.last_credential')
            or r.get('uuid')
        )
        if not uuid:
            continue

        key = str(uuid).split('/')[-1].lower()

        restype = r.get('SessionResult.session_type') or r.get('DeviceInfo.access_method')
        # Count can be labelled differently; try common variants
        raw_count = r.get('Count', r.get('count', 0)) or 0
        try:
            count = int(raw_count)
        except (TypeError, ValueError):
            count = 0

        mapping[key] = [restype, count]

    return mapping

import pandas as pd
from ipaddress import ip_network

def parse_ranges(ranges):
    """
    Accepts a string like '10.0.0.0/8,192.168.0.0/16,::/0' or a list of strings.
    Returns a list of ip_network objects, skipping invalids.
    """
    if ranges is None or (isinstance(ranges, float) and pd.isna(ranges)):
        return []

    if isinstance(ranges, str):
        parts = [p.strip() for p in ranges.replace(';', ',').split(',') if p.strip()]
    elif isinstance(ranges, list):
        parts = []
        for r in ranges:
            if r is None or (isinstance(r, float) and pd.isna(r)):
                continue
            parts.extend([p.strip() for p in str(r).replace(';', ',').split(',') if p.strip()])
    else:
        parts = [str(ranges).strip()]

    # normalise common typos
    norm = []
    for p in parts:
        if p == '::0':
            p = '::/0'
        norm.append(p)

    nets = []
    for p in norm:
        try:
            nets.append(ip_network(p, strict=False))
        except Exception:
            # skip malformed/non-CIDR entries
            continue
    return nets

def to_rows(entries):
    """
    Converts post_search(...) output to a list of row dicts using the first block's headings/results.
    Accepts a DataFrame, list[dict], or raw dict.
    """
    if isinstance(entries, pd.DataFrame):
        # if they gave us a df of the 'first page', just return its records
        return entries.replace({pd.NA: None}).where(pd.notna(entries), None).to_dict(orient='records')

    if isinstance(entries, list) and entries and isinstance(entries[0], dict):
        block = entries[0]
        if 'headings' in block and 'results' in block:
            df = pd.DataFrame(block['results'], columns=block['headings'])
            return df.replace({pd.NA: None}).where(pd.notna(df), None).to_dict(orient='records')

    if isinstance(entries, dict) and 'headings' in entries and 'results' in entries:
        df = pd.DataFrame(entries['results'], columns=entries['headings'])
        return df.replace({pd.NA: None}).where(pd.notna(df), None).to_dict(orient='records')

    # fallback: assume it's already list[dict]
    return entries if isinstance(entries, list) else []

def labels_covering_ranges(entries, cred_ranges):
    """
    Returns sorted unique labels whose Scan_Range overlaps any of the cred_ranges networks.
    entries: post_search(...) result or a DataFrame/list[dict] with columns: Label, Scan_Range
    cred_ranges: string '10.0.0.0/8,::/0' or list of CIDR strings
    """
    labels = []
    cred_nets = parse_ranges(cred_ranges)
    if not cred_nets:
        return labels

    rows = to_rows(entries)
    for row in rows:
        label = row.get('Label')
        scan_rs = row.get('Scan_Range')  # may be str or list
        scan_nets = parse_ranges(scan_rs)
        if not scan_nets or not label:
            continue

        # overlap check (IPv4 vs IPv6 mismatches are naturally non-overlapping here)
        found = False
        for cn in cred_nets:
            for sn in scan_nets:
                # only compare same address family
                if cn.version != sn.version:
                    continue
                if cn.overlaps(sn):
                    found = True
                    break
            if found:
                break

        if found:
            labels.append(label)

    return sorted(set(labels))

## Fetch reference data

- Vault credentials (labels, usernames, ranges, enabled/usage)
- Scan ranges and Exclude ranges
- Outpost list and (credential -> outpost) mappings

In [None]:
# Vault credentials
vault_url = api_url('/vault/credentials')
vault_creds = normalize_results(get_json(vault_url))
if not isinstance(vault_creds, list) or not vault_creds:
    print('Warning: vault credentials not returned; report may be incomplete.')
else:
    df = pd.DataFrame(vault_creds)
    display(df.head(10))

In [None]:
# Scan ranges and excludes
scan_ranges = post_search(qry_scanrange, limit=0)
df = pd.DataFrame(scan_ranges)
display(df.head(10))
exclude_ranges = post_search(qry_excludes, limit=0)
df = pd.DataFrame(exclude_ranges)
display(df.head(10))

In [None]:
# Outposts list and mapping
outposts = normalize_results(get_json(api_url('discovery/outposts?deleted=false')))
id_to_url = {}
for op in outposts:
    op_id = op.get('id') or op.get('outpost') or op.get('outpost_id') or op.get('uuid')
    if op_id:
        id_to_url[str(op_id)] = op.get('url')

df = pd.DataFrame(outposts)
display(df.head(10))

In [None]:
# We're going to run this again without countUnique as this can be intensive and cuase the API to timeout
qry_outpost_credentials = '''
search SessionResult
show credential, credential as 'uuid', outpost
'''
cred_outpost_map = {}  # uuid -> {'id': outpost_id, 'url': url}
op_creds = post_search(qry_outpost_credentials, limit=0)

data = op_creds[0]  # grab the first object
df = pd.DataFrame(data["results"], columns=data["headings"])

# drop duplicates
df_unique = df.drop_duplicates()

display(df_unique.head(10))  # show first 10 unique rows

In [None]:
for row in df_unique.itertuples(index=False):
    uuid = getattr(row, 'credential') or getattr(row, 'uuid')
    opid = getattr(row, 'outpost')
    if uuid and opid:
        info = {'id': str(opid), 'url': id_to_url.get(str(opid))}
        cred_outpost_map[str(uuid).lower()] = info

df_map = pd.DataFrame.from_dict(cred_outpost_map, orient='index')
display(df_map.head(10))

## Execute success/failure queries

We gather counts for all time and for the last 7 days, from SessionResult and DeviceInfo.

In [None]:
# Success (all time)
credsux_results = post_search(qry_credential_success, limit=0)
preview_search(credsux_results)  # optional: see head(10)
suxCreds = build_map_from_search(credsux_results)
show_map_sample(suxCreds)

In [None]:
# DeviceInfo success (all time)
devinfosux = post_search(qry_deviceinfo_success, limit=0)
preview_search(devinfosux)       # optional
suxDev = build_map_from_search(devinfosux)
show_map_sample(suxDev)

In [None]:
# Credential failure (all time)
credfail_results = post_search(qry_credential_failure, limit=0)
preview_search(credfail_results) # optional
failCreds = build_map_from_search(credfail_results)
show_map_sample(failCreds)

In [None]:
# Success (last 7d)
credsux7_results = post_search(qry_credential_success_7d, limit=0)
preview_search(credsux7_results) # optional
suxCreds7 = build_map_from_search(credsux7_results)
show_map_sample(suxCreds7)

In [None]:
# DeviceInfo success (last 7d)
devinfosux7 = post_search(qry_deviceinfo_success_7d, limit=0)
preview_search(devinfosux7)      # optional
suxDev7 = build_map_from_search(devinfosux7)
show_map_sample(suxDev7)

In [None]:
# Credential failure (last 7d)
credfail7_results = post_search(qry_credential_failure_7d, limit=0)
preview_search(credfail7_results) # optional
failCreds7 = build_map_from_search(credfail7_results)
show_map_sample(failCreds7)

## Build the report rows

Loop through vault credentials, compute success/failure counts and percentages,
attach scheduling/exclusion coverage, and outpost info.

In [None]:
rows = []
for cred in vault_creds or []:
    if not isinstance(cred, dict):
        continue
    idx = cred.get('index')
    uuid = (cred.get('uuid') or '').strip()
    if not uuid:
        continue
    uuid_key = uuid.split('/')[-1].lower()
    label = cred.get('label')
    enabled = bool(cred.get('enabled'))
    types = cred.get('types')
    usage = cred.get('usage')
    # best-effort username field
    username = cred.get('username') or cred.get('snmp.v3.securityname') or cred.get('aws.access_key_id') or cred.get('azure.application_id')
    ip_range = cred.get('ip_range')
    ip_exclusion = cred.get('ip_exclusion')
    status = 'Enabled' if enabled else 'Disabled'

    sessions = suxCreds.get(uuid_key, [None, 0])
    devinfos = suxDev.get(uuid_key, [None, 0])
    failure = failCreds.get(uuid_key, [None, 0])
    sessions7 = suxCreds7.get(uuid_key, [None, 0])
    devinfos7 = suxDev7.get(uuid_key, [None, 0])
    failure7 = failCreds7.get(uuid_key, [None, 0])

    # Active if present in any mapping or any count present
    active = (uuid_key in suxCreds or uuid_key in suxDev or uuid_key in failCreds or
              uuid_key in suxCreds7 or uuid_key in suxDev7 or uuid_key in failCreds7 or
              any(x[1] for x in [sessions, devinfos, failure, sessions7, devinfos7, failure7]))

    success_all = int((sessions[1] or 0)) + int((devinfos[1] or 0))
    fails_all = int(failure[1] or 0)
    total = success_all + fails_all
    percent_all = (success_all / total) if total else 0.0
    success7 = int((sessions7[1] or 0)) + int((devinfos7[1] or 0))
    fails7 = int(failure7[1] or 0)
    total7 = success7 + fails7
    percent7 = (success7 / total7) if total7 else 0.0

    scheduled_scans = labels_covering_ranges(scan_ranges, ip_range)
    excluded_scans = labels_covering_ranges(exclude_ranges, ip_range)

    op_info = cred_outpost_map.get(uuid_key, {})
    outpost_id = op_info.get('id')
    outpost_url = op_info.get('url')
    proto = sessions[0] or failure[0] or types

    if active:
        rows.append([
            label, idx, uuid, username, proto, success_all, fails_all, percent_all, percent7,
            status, usage, ip_range, ip_exclusion, scheduled_scans or None, excluded_scans or None,
            outpost_id, outpost_url
        ])
    else:
        rows.append([
            label, idx, uuid, username, types, 0, 0, 0.0, 0.0,
            f'Credential appears to not be in use ({status})', usage, ip_range, ip_exclusion,
            scheduled_scans or None, excluded_scans or None, outpost_id, outpost_url
        ])

headers = [
    'Discovery Instance', 'Credential', 'Index', 'UUID', 'Login ID', 'Protocol',
    'Successes', 'Failures', 'Success % All Time', 'Success % 7 Days', 'State',
    'Usage', 'Ranges', 'Excludes', 'Scheduled Scans', 'Exclusion Lists',
    'Outpost', 'Outpost URL'
]
df_out = pd.DataFrame(rows, columns=headers[1:])
df_out.insert(0, 'Discovery Instance', target)
df_out.head()

## Save to CSV

Writes the report to the standard output folder as used by the CLI.

In [None]:
OUTPUT_CSV = str(output_dir / 'credential_success.csv')
df_out.to_csv(OUTPUT_CSV, index=False)
print(f'Saved to {OUTPUT_CSV}')