# Discovery Schedules (with Credentials)

This notebook reproduces the DisMAL `schedules` report: it lists scheduled Scan Ranges and Exclude Ranges,
and, for each, counts how many vault credentials would apply to those IP ranges.
It reads settings from `../config.yaml` and writes `schedules.csv` under `output_<target>`.

## Requirements
We use `tideway` (Discovery SDK), plus `pandas`, `PyYAML`, and `ipaddress`.
Uncomment the `%pip install` if needed.

In [None]:
# %pip install -q tideway pandas pyyaml

import os, sys, subprocess
from pathlib import Path
from typing import Any, Dict, List, Tuple
import ipaddress
import pandas as pd
import yaml


## Select Appliance (optional)
If your `config.yaml` defines multiple `appliances:`, set `APPLIANCE_NAME` or `APPLIANCE_INDEX`.

In [None]:
APPLIANCE_NAME = None   # e.g., 'prod' or 'dev'
APPLIANCE_INDEX = 0     # integer index if not using name selection


## Configuration (from config.yaml)
Locates the repo root, reads connection details, and initialises the Tideway SDK endpoints.

In [None]:
def _find_repo_root(start: Path) -> Path:
    for p in [start] + list(start.parents):
        if (p / 'config.yaml').exists():
            return p
    return start.parent

repo_root = _find_repo_root(Path.cwd())
cfg = yaml.safe_load(open(repo_root / 'config.yaml', 'r')) or {}
apps = cfg.get('appliances') or []
selected = None
if isinstance(apps, list) and apps:
    if APPLIANCE_NAME:
        selected = next((a for a in apps if a.get('name') == APPLIANCE_NAME), None)
        if selected is None:
            raise ValueError(f"No appliance named '{APPLIANCE_NAME}' in config.yaml")
    else:
        try:
            selected = apps[int(APPLIANCE_INDEX)]
        except Exception:
            selected = apps[0]
target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
if not target:
    raise ValueError('config.yaml missing "target"')
token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
if not token and token_file:
    tf_path = Path(token_file)
    if not tf_path.is_absolute():
        tf_path = repo_root / tf_path
    token = open(tf_path, 'r').read().strip()
if not token:
    raise ValueError('API token not found in config.yaml (token or token_file)')
API_VERSION = str((selected or {}).get('api_version') or cfg.get('api_version') or 'v1.14')
VERIFY_SSL = bool((selected or {}).get('verify_ssl', cfg.get('verify_ssl', True)))
sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
output_dir = repo_root / f'output_{sanitized}'
output_dir.mkdir(parents=True, exist_ok=True)

# Import tideway from pip; install if missing
try:
    import tideway  # type: ignore
except Exception:
    print('Installing tideway via pip...')
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'tideway'])
    import tideway  # retry

API_VERSION_NUM = API_VERSION.lstrip('v')
app = tideway.appliance(target, token, api_version=API_VERSION_NUM, ssl_verify=VERIFY_SSL)
twsearch = app.data()
twcreds = app.credentials()

print('Base Host      :', target)
print('API Version    :', API_VERSION)
print('Verify SSL     :', VERIFY_SSL)
print('Output folder  :', output_dir)


## TWQL Queries
Mirrors `core/queries.py` for scheduled Scan Ranges and Exclude Ranges.

In [None]:
qry_scanrange = '''
search ScanRange where scan_type = 'Scheduled'
show
  range_id as 'ID',
  label as 'Label',
  (range_strings or provider) as 'Scan_Range',
  scan_level as 'Level',
  recurrenceDescription(schedule) as 'Date_Rules'
'''

qry_excludes = '''
search in '_System' ExcludeRange
show
  exrange_id as 'ID',
  name as 'Label',
  range_strings as 'Scan_Range',
  recurrenceDescription(schedule) as 'Date_Rules'
'''


## Helpers
Normalise responses and evaluate range/credential overlaps.

In [None]:
def _tw_to_rows(payload: Any) -> List[Dict[str, Any]]:
    if hasattr(payload, 'json'):
        try:
            js = payload.json()
        except Exception:
            return []
    else:
        js = payload
    if isinstance(js, list):
        if js and isinstance(js[0], dict) and 'results' in js[0] and 'headings' in js[0]:
            heads = js[0]['headings']
            return [dict(zip(heads, r)) for r in (js[0].get('results') or [])]
        return js if js and isinstance(js[0], dict) else []
    if isinstance(js, dict) and 'results' in js and 'headings' in js:
        return [dict(zip(js['headings'], r)) for r in (js.get('results') or [])]
    return []

def _parse_tokens(value) -> Tuple[bool, List[Tuple[str, Any]]]:
    # Returns (wildcard, items) where items are tuples of ('network'|"range"|"single", data)
    if isinstance(value, list):
        tokens = []
        for v in value:
            if isinstance(v, str):
                tokens.extend([x.strip() for x in v.split(',') if x.strip()])
    elif isinstance(value, str):
        tokens = [x.strip() for x in value.split(',') if x.strip()]
    else:
        tokens = []
    wildcard = False
    items: List[Tuple[str, Any]] = []
    for t in tokens:
        if t in ('0.0.0.0/0', '::/0', '0.0.0.0/0,::/0'):
            wildcard = True
            continue
        if '-' in t:
            try:
                start, end = [ipaddress.ip_address(x.strip()) for x in t.split('-', 1)]
                items.append(('range', (int(start), int(end), start.version)))
                continue
            except Exception:
                pass
        if '/' in t:
            try:
                net = ipaddress.ip_network(t, strict=False)
                items.append(('network', (net, net.version)))
                continue
            except Exception:
                pass
        try:
            ip = ipaddress.ip_address(t)
            items.append(('single', (int(ip), ip.version)))
        except Exception:
            pass
    return wildcard, items

def _match_endpoint(ep: str, wildcard: bool, items) -> bool:
    if wildcard:
        return True
    try:
        ip = ipaddress.ip_address(ep)
        ival = int(ip)
        ver = ip.version
    except Exception:
        return False
    for kind, data in items:
        if kind == 'network':
            net, nver = data
            if ver == nver and ip in net:
                return True
        elif kind == 'range':
            start, end, rver = data
            if ver == rver and start <= ival <= end:
                return True
        elif kind == 'single':
            sval, sver = data
            if ver == sver and ival == sval:
                return True
    return False


## Build report
- Load vault credentials and derive their IP matchers.
- Query Exclude Ranges and Scan Ranges; for each, count matching credentials.

In [None]:
# Vault credentials -> (uuid, wildcard, items, label)
cred_endpoint = twcreds.get_vault_credentials
# Trigger the request similarly to core.api.get_json
try:
    resp = cred_endpoint() if callable(cred_endpoint) else cred_endpoint
except Exception:
    resp = cred_endpoint
try:
    vjson = resp.json() if hasattr(resp, 'json') else resp
except Exception:
    vjson = []
creds = []
for entry in (vjson or []):
    uuid = entry.get('uuid')
    label = entry.get('label')
    ip_range = entry.get('ip_range') or entry.get('iprange')
    wc, items = _parse_tokens(ip_range)
    creds.append((uuid, wc, items, label))

# Fetch ranges
ex_resp = twsearch.search({'query': qry_excludes}, format='object')
sc_resp = twsearch.search({'query': qry_scanrange}, format='object')
rows_exc = _tw_to_rows(ex_resp)
rows_sc = _tw_to_rows(sc_resp)

def _count_matching(credentials, label_row) -> Tuple[int, int]:
    # Returns (range_count, credential_count)
    ranges = label_row.get('Scan_Range')
    if isinstance(ranges, list):
        tokens = []
        for r in ranges:
            if isinstance(r, str):
                tokens.extend([x.strip() for x in r.split(',') if x.strip()])
    elif isinstance(ranges, str):
        tokens = [x.strip() for x in ranges.split(',') if x.strip()]
    else:
        tokens = []
    wc, items = _parse_tokens(tokens)
    matched = set()
    for uuid, c_wc, c_items, c_label in credentials:
        if c_wc and wc:
            matched.add(uuid)
            continue
        # To approximate DisMAL logic, if credential wildcard then match all
        if c_wc:
            matched.add(uuid)
            continue
        # Otherwise, match any endpoint within the label items.
        # Since we don't enumerate endpoints here, approximate by matching
        # intersection between credential items and label items.
        found = False
        for kind, data in c_items:
            if kind == 'network':
                c_net, cver = data
                for l_kind, ldat in items:
                    if l_kind == 'network':
                        l_net, lver = ldat
                        if cver == lver and (c_net.overlaps(l_net) or c_net == l_net):
                            found = True; break
                    elif l_kind == 'range':
                        start, end, rver = ldat
                        if cver == rver and (int(c_net.network_address) <= end and int(c_net.broadcast_address) >= start):
                            found = True; break
            elif kind == 'range':
                c_start, c_end, cver = data
                for l_kind, ldat in items:
                    if l_kind == 'network':
                        l_net, lver = ldat
                        if cver == lver and (c_start <= int(l_net.broadcast_address) and c_end >= int(l_net.network_address)):
                            found = True; break
                    elif l_kind == 'range':
                        l_start, l_end, lver = ldat
                        if cver == lver and not (c_end < l_start or c_start > l_end):
                            found = True; break
            elif kind == 'single':
                sval, sver = data
                for l_kind, ldat in items:
                    if l_kind == 'network':
                        l_net, lver = ldat
                        if sver == lver and int(l_net.network_address) <= sval <= int(l_net.broadcast_address):
                            found = True; break
                    elif l_kind == 'range':
                        l_start, l_end, lver = ldat
                        if sver == lver and l_start <= sval <= l_end:
                            found = True; break
                    elif l_kind == 'single':
                        s2, lver = ldat
                        if sver == lver and s2 == sval:
                            found = True; break
            if found:
                matched.add(uuid); break
    range_count = len(label_row.get('Scan_Range') or [])
    return range_count, len(matched)

rows = []
for r in rows_exc:
    rc, cc = _count_matching(creds, r)
    rows.append([r.get('Label'), 'Exclude Range', r.get('ID'), rc, None, r.get('Date_Rules'), cc])
for r in rows_sc:
    rc, cc = _count_matching(creds, r)
    rows.append([r.get('Label'), 'Scan Range', r.get('ID'), rc, r.get('Level'), r.get('Date_Rules'), cc])

# Sort by Range ID
def _safe_int(x):
    try:
        return int(x) if x is not None else 0
    except Exception:
        return 0
rows.sort(key=lambda x: _safe_int(x[2]))

headers = ['Discovery Instance', 'Name', 'Type', 'Range ID', 'Ranges', 'Scan Level', 'When', 'Credentials']
df = pd.DataFrame(rows, columns=headers[1:])
df.insert(0, 'Discovery Instance', target)
display(df.head(20)) if not df.empty else print('No rows to display')

OUT_CSV = str(output_dir / 'schedules.csv')
df.to_csv(OUT_CSV, index=False)
print(f'Saved to {OUT_CSV} (rows: {len(df)})')
