# IP Analysis (BMC Discovery)

This notebook reproduces the DisMAL `ip_analysis` report logic using the Discovery Data API.
It reads configuration from `../config.yaml`, evaluates the relevant TWQL queries,
derives overlapping schedules and gaps, then writes `ip_analysis.csv` to the standard output directory.

## Requirements

We use `requests`, `pandas`, and `PyYAML`. Uncomment below to install if needed.

In [None]:
# %pip install -q requests pandas pyyaml

import pandas as pd
import requests
import yaml
from pathlib import Path
from urllib.parse import urljoin
import ipaddress
import json, os
import math
import tideway
import re

## Configuration (from config.yaml)

Reads settings from `../config.yaml` including target, token/token_file,
API version, and SSL verification preference.
Saves the CSV to `../output_<target>/credential_success.csv`.

In [None]:
def load_config_params(
    start: Path,
    appliance_name: str = None,
    appliance_index: int = 0,
) -> dict:
    def _find_repo_root(start: Path) -> Path:
        for p in [start] + list(start.parents):
            if (p / 'config.yaml').exists():
                return p
        return start.parent

    repo_root = _find_repo_root(start)
    config_path = repo_root / 'config.yaml'

    with open(config_path, 'r') as fh:
        cfg = yaml.safe_load(fh) or {}

    apps = cfg.get('appliances') or []
    selected = None
    if isinstance(apps, list) and apps:
        if appliance_name:
            selected = next((a for a in apps if a.get('name') == appliance_name), None)
            if selected is None:
                raise ValueError(f"No appliance named '{appliance_name}' in config.yaml")
        else:
            try:
                selected = apps[int(appliance_index)]
            except Exception:
                selected = apps[0]

    target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
    if not target:
        raise ValueError('config.yaml missing "target"')

    token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
    token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
    if not token and token_file:
        tf_path = Path(token_file)
        if not tf_path.is_absolute():
            tf_path = repo_root / tf_path
        with open(tf_path, 'r') as tf:
            token = tf.read().strip()
    if not token:
        raise ValueError('API token not found in config.yaml (token or token_file)')

    api_version = str((selected or {}).get('api_version') or cfg.get('api_version') or 'v1.14')
    verify_ssl = bool((selected or {}).get('verify_ssl', cfg.get('verify_ssl', True)))

    sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
    output_dir = repo_root / f'output_{sanitized}'
    output_dir.mkdir(parents=True, exist_ok=True)

    return {
        "repo_root": repo_root,
        "config_path": config_path,
        "cfg": cfg,
        "selected": selected,
        "target": target,
        "token": token,
        "api_version": api_version,
        "verify_ssl": verify_ssl,
        "output_dir": output_dir,
    }

In [None]:
def init_appliance(appliance_name: str = "prod"):
    params = load_config_params(Path.cwd(), appliance_name=appliance_name)

    target = params["target"]
    api_version = params["api_version"]
    verify_ssl = params["verify_ssl"]
    output_dir = params["output_dir"]

    print('Base Host     :', target)
    print('API Version   :', api_version)
    print('Verify SSL    :', verify_ssl)
    print('Output folder :', output_dir)

    api_number = api_version.lstrip('v')
    app = tideway.appliance(target, params["token"], api_version=api_number, ssl_verify=verify_ssl)

    try:
        about = app.api_about
        print('Appliance reachable:', about.status_code)
    except Exception as e:
        print('Warning: failed to contact appliance /api/about:', e)

    return {
        "params": params,
        "target": target,
        "app": app,
        "api_version":api_number,
        "output_dir":output_dir,
    }

# Initialise Instances

In [None]:
print("Initialise Prod:")
twprod = init_appliance("prod")

print("Initialise Dev:")
twdev = init_appliance("dev")

In [None]:
# bootstrapping variables

token = twprod["params"]["token"]
tokendev = twdev["params"]["token"]
VERIFY_SSL = twprod["params"]["verify_ssl"]
verify_ssl_dev = twdev["params"]["verify_ssl"]
target = twprod["target"]
target_dev = twdev["target"]
api_version = twprod["api_version"]
api_version_dev = twdev["api_version"]
output_dir = twprod["output_dir"]
output_dir_dev = twdev["output_dir"]

BASE_URL = target if ('://' in target) else f'https://{target}'
API_VERSION = api_version

## Session and helpers
`post_search` aggregates the 'object' table format across pages and returns {'headings': [...], 'results': [...]}.
`table_to_dicts` zips headings into dict rows.

In [None]:
from typing import List, Dict, Any, Tuple

session = requests.Session()
auth_value = token if token.lower().startswith('bearer ') else f'Bearer {token}'
session.headers.update({'Authorization': auth_value, 'Accept': 'application/json'})
session.verify = VERIFY_SSL

def api_url(path: str) -> str:
    base = BASE_URL.rstrip('/') + f'/api/{API_VERSION}/'
    return urljoin(base, path.lstrip('/'))

def post_search(query: str, *, limit: int | None = None, page_size: int = 500) -> Dict[str, Any]:
    url = api_url('data/search')
    headings = None
    results: List[List[Any]] = []
    offset = 0
    fetch_all = (limit == 0)
    while True:
        payload = {'query': query, 'format': 'object'}
        if fetch_all:
            payload['limit'] = page_size
            if offset:
                payload['offset'] = offset
        elif limit is not None:
            payload['limit'] = limit
        r = session.post(url, json=payload)
        if r.status_code >= 400:
            print(f'Error {r.status_code} POST {url}: {r.text[:200]}')
            return {'headings': [], 'results': []}
        try:
            data = r.json()
        except Exception:
            data = []
        table = None
        if isinstance(data, list):
            for x in data:
                if isinstance(x, dict) and 'headings' in x and 'results' in x:
                    table = x
                    break
        elif isinstance(data, dict) and 'headings' in data and 'results' in data:
            table = data
        if not table:
            return {'headings': [], 'results': []}
        if headings is None:
            headings = table.get('headings', [])
        page_rows = table.get('results') or []
        results.extend(page_rows)
        if not fetch_all or len(page_rows) < page_size:
            break
        offset += page_size
    return {'headings': (headings or []), 'results': results}

def table_to_dicts(table: Dict[str, Any]) -> List[Dict[str, Any]]:
    heads = table.get('headings') or []
    rows = table.get('results') or []
    return [dict(zip(heads, r)) for r in rows]


## Build report data
- Collect scan ranges and excludes; prepare matchers.
- For each endpoint, identify schedule labels; record overlaps and missing schedules.
- Add 'seen but unscanned' IPs.

In [None]:
def _parse_scan_tokens(tokens: List[str]):
    items = []
    wildcard = False
    for t in tokens:
        t = (t or '').strip()
        if not t:
            continue
        if t in ('0.0.0.0/0', '::/0'):
            wildcard = True
            continue
        if '-' in t:
            try:
                start, end = [ipaddress.ip_address(x.strip()) for x in t.split('-', 1)]
                items.append(('range', (int(start), int(end), start.version)))
                continue
            except Exception:
                pass
        if '/' in t:
            try:
                net = ipaddress.ip_network(t, strict=False)
                items.append(('network', (net, net.version)))
                continue
            except Exception:
                pass
        # Single IP
        try:
            ip = ipaddress.ip_address(t)
            items.append(('single', (int(ip), ip.version)))
        except Exception:
            # Unknown token; skip
            pass
    return wildcard, items

def _endpoint_in_items(ep: str, wildcard: bool, items) -> bool:
    if wildcard:
        return True
    try:
        ip = ipaddress.ip_address(ep)
        ival = int(ip)
        ver = ip.version
    except Exception:
        return False
    for kind, data in items:
        if kind == 'network':
            net, nver = data
            if ver == nver and ip in net:
                return True
        elif kind == 'range':
            start, end, rver = data
            if ver == rver and start <= ival <= end:
                return True
        elif kind == 'single':
            sval, sver = data
            if ver == sver and ival == sval:
                return True
    return False

In [None]:
def get_results(instance, qry, columns=['No Results']):
    results = instance['app'].data().search({'query': qry}, format='object', limit=500)
    df = pd.DataFrame(results) if results else pd.DataFrame()
    if df.empty:
        # Provide headers if no rows returned
        df = pd.DataFrame(columns=columns)
    if 'Discovery Instance' not in df.columns:
        df.insert(0, 'Discovery Instance', instance['target'])
    else:
        df['Discovery Instance'] = instance['target']
    return df

def run_and_display(instance, query, columns, head: int = 5):
    """
    Run get_results for an instance, print the target, and display the head of the DataFrame.
    """
    df = get_results(instance, query, columns)
    print(instance['target'])
    display(df.head(head))
    return df

# Fetch tables
qry_scanrange = '''
                search ScanRange where scan_type = 'Scheduled'
                show
                range_id as 'ID',
                label as 'Label',
                (range_strings or provider) as 'Scan_Range',
                scan_level as 'Level',
                recurrenceDescription(schedule) as 'Date_Rules'
'''

cols= ["ID","Label","Scan_Range","Level","Date_Rules",]

#t_scan = post_search(qry_scanrange, limit=0)
#rows_scan = table_to_dicts(t_scan)
#s_df = pd.DataFrame(rows_scan)
#display(s_df.head())

s_prod = run_and_display(twprod, qry_scanrange, cols)
s_dev = run_and_display(twdev, qry_scanrange, cols)

qry_excludes = '''
                search in '_System' ExcludeRange
                show
                exrange_id as 'ID',
                name as 'Label',
                range_strings as 'Scan_Range',
                recurrenceDescription(schedule) as 'Date_Rules'
'''
#t_exc = post_search(qry_excludes, limit=0)
#rows_exc = table_to_dicts(t_exc)
#e_df = pd.DataFrame(rows_exc)
#display(e_df.head())

e_prod = run_and_display(twprod, qry_excludes, cols)
e_dev = run_and_display(twdev, qry_excludes, cols)

# Combine two DataFrames with the same headers
s_prod = pd.concat([s_prod, e_prod], ignore_index=True)
s_dev = pd.concat([s_dev, e_dev], ignore_index=True)

In [None]:
# Build label -> matchers

# Endpoints and their schedule count
qry_ip_schedules = '''
                    search DiscoveryAccess
                    show endpoint,
                    nodecount(traverse Member:List:List:DiscoveryRun where scan_type = 'Scheduled') as 'schedules'
                    process with unique()
'''
#t_sched = post_search(qry_ip_schedules, limit=0)
#rows_sched = table_to_dicts(t_sched)
#i_df = pd.DataFrame(rows_sched)
#display(i_df.head())

i_prod = run_and_display(twprod, qry_ip_schedules, cols)
i_dev = run_and_display(twdev, qry_ip_schedules, cols)

In [None]:
def attach_schedules(i, s, range_col):
    # Expand schedules so each Scan_Range entry gets its own row
    s_expanded = s.explode(range_col)

    # Join endpoints against s_expanded ranges
    merged = i.merge(
        s_expanded,
        left_on='endpoint',
        right_on='Scan_Range',
        how='left'
    )

    # Group so that each endpoint collects all matching labels
    grouped = (
        merged.groupby('endpoint')['Label']
        .apply(lambda x: ', '.join(sorted(set(x.dropna()))))
        .reset_index()
    )

    # Replace blanks with default message
    grouped['Label'] = grouped['Label'].replace(
        '', "Endpoint has previous DiscoveryAccess, but not currently scheduled."
    )

    # Rename for clarity
    grouped = grouped.rename(columns={'Label': 'scan_schedules'})

    return grouped

prod = attach_schedules(i_prod, s_prod, 'Scan_Range')
dev = attach_schedules(i_prod, s_prod, 'Scan_Range')

display(prod.head())

In [None]:
# Seen but unscanned
qry_connections_unscanned = '''
                            search Host
                            traverse InferredElement:Inference:Associate:DiscoveryAccess
                            traverse DiscoveryAccess:DiscoveryAccessResult:DiscoveryResult:NetworkConnectionList
                            traverse List:List:Member:DiscoveredNetworkConnection
                            order by remote_ip_addr
                            show remote_ip_addr as 'endpoint'
                            processwith connectionsToUnseen
'''

u_prod = run_and_display(twprod, qry_connections_unscanned, cols)
u_dev = run_and_display(twdev, qry_connections_unscanned, cols)

In [None]:
# Extract unscanned endpoints (prod)
eu_prod = u_prod[['endpoint']].copy()
eu_prod['scan_schedules'] = "Seen but unscanned"

# Extract unscanned endpoints (dev)
eu_dev = u_dev[['endpoint']].copy()
eu_dev['scan_schedules'] = "Seen but unscanned"

# Append to main results
prod = pd.concat([prod, eu_prod], ignore_index=True)
dev = pd.concat([dev, eu_dev], ignore_index=True)

In [None]:
def save(df: pd.DataFrame, output_dir: Path, filename: str):
    """
    Save a discovery run DataFrame to CSV in the specified output directory.
    """
    output_csv = str(output_dir / f"{filename}.csv")
    df.to_csv(output_csv, index=False)
    print(f"Saved to {output_csv}")

save(prod, output_dir, "ip_analysis")
save(dev, output_dir, "ip_analysis")
