# Device Identities (device_ids)

Fetch device identity rows from BMC Discovery using the Tideway SDK (bulk search), build unique identities per originating endpoint, and save CSV to the standard DisMAL output folder.

> **NOTE:** Due to limitations of the API, this may take a while to run.

In [None]:
# %pip install -q pandas pyyaml
import pandas as pd
import yaml
from pathlib import Path
import json, os
import sys
try:
    import tideway
except ImportError:
    print('The tideway SDK must be available in this environment.')
    raise


## Appliance selection (optional)

In [None]:
APPLIANCE_NAME = None   # e.g., 'prod' or 'dev'
APPLIANCE_INDEX = 1     # numeric index if not using name

## Optional filters

In [None]:
DEVICE_NAME_FILTER = None  # e.g., 'host-name'
INCLUDE_ENDPOINTS = []     # e.g., ['10.1.2.3']
ENDPOINT_PREFIX = None     # e.g., '10.1.'


## Load configuration and prepare Tideway appliance

In [None]:
def _find_repo_root(start: Path) -> Path:
    for p in [start] + list(start.parents):
        if (p / 'config.yaml').exists():
            return p
    return start.parent

repo_root = _find_repo_root(Path.cwd())
cfg_path = repo_root / 'config.yaml'
cfg = yaml.safe_load(cfg_path.read_text()) or {}
apps = cfg.get('appliances') or []
selected = None
if isinstance(apps, list) and apps:
    if APPLIANCE_NAME:
        selected = next((a for a in apps if a.get('name') == APPLIANCE_NAME), None)
        if selected is None:
            raise ValueError(f"No appliance named '{APPLIANCE_NAME}' in config.yaml")
    else:
        try:
            selected = apps[int(APPLIANCE_INDEX)]
        except Exception:
            selected = apps[0]

target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
if not target:
    raise ValueError('config.yaml missing \"target\"')
token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
if not token and token_file:
    tf = Path(token_file)
    if not tf.is_absolute():
        tf = repo_root / tf
    token = tf.read_text().strip()
if not token:
    raise ValueError('API token not found in config.yaml (token or token_file)')

# Create an appliance and detect API version
app = tideway.appliance(target, token)
about = app.about()
if not getattr(about, 'ok', True):
    print('About call failed:', getattr(about, 'status_code', 'unknown'))
apivers = None
try:
    apivers = about.json().get('api_versions', [])
except Exception:
    apivers = []
if apivers:
    app = tideway.appliance(target, token, api_version=apivers[-1])

data_ep = app.data()
print('Connected to', target, 'API version', app.api_version if hasattr(app, 'api_version') else 'unknown')


## Query with Tideway (bulk search) and normalize

In [None]:
qry_device_ids = (
    """
search DiscoveryAccess
show
#::InferredElement:.name as 'InferredElement.name',
#::InferredElement:.hostname as 'InferredElement.hostname',
#::InferredElement:.local_fqdn as 'InferredElement.local_fqdn',
#::InferredElement:.sysname as 'InferredElement.sysname',
endpoint as 'DiscoveryAccess.endpoint',
#DiscoveryAccess:Endpoint:Endpoint:Endpoint.endpoint as 'Endpoint.endpoint',
#DiscoveryAccess:DiscoveryAccessResult:DiscoveryResult:DiscoveredIPAddressList.#List:List:Member:DiscoveredIPAddress.ip_addr as 'DiscoveredIPAddress.ip_addr',
#::InferredElement:.__all_ip_addrs as 'InferredElement.__all_ip_addrs',
#::InferredElement:.#DeviceWithInterface:DeviceInterface:InterfaceOfDevice:NetworkInterface.ip_addr as 'NetworkInterface.ip_addr',
#::InferredElement:.#DeviceWithInterface:DeviceInterface:InterfaceOfDevice:NetworkInterface.fqdns as 'NetworkInterface.fqdns'
"""
)

payload = data_ep.search({'query': qry_device_ids}, format='object', limit=0)

# Light normalisation only when needed
if hasattr(payload, 'json'):
    payload = payload.json()

if isinstance(payload, dict):                 # dict with "results"
    payload = payload.get('results', [])

if payload and isinstance(payload[0], list):  # header-row table -> records
    headers, *rows = payload
    payload = [dict(zip(headers, r)) for r in rows]

rows = pd.json_normalize(payload)               # flattens nested fields better than DataFrame
# Preview normalized results (5 rows)
display(rows.head(5))


## Filtering and identity build

In [None]:
# Filter, select, and aggregate using pandas only (no custom helpers)

# Build a working DataFrame reference
df = rows.copy()
# Show the initial working frame (5 rows)
display(df.head(5))

# Columns that may contain device names and IP addresses
names_cols = ['InferredElement.name','InferredElement.hostname','InferredElement.local_fqdn','InferredElement.sysname','NetworkInterface.fqdns']
ips_cols = ['DiscoveryAccess.endpoint','Endpoint.endpoint','DiscoveredIPAddress.ip_addr','InferredElement.__all_ip_addrs','NetworkInterface.ip_addr']


In [None]:
# Optional filter by device name substring (case-insensitive)
# Ensure expected columns exist to avoid KeyError when absent
for col in set(names_cols + ips_cols + ['DiscoveryAccess.endpoint']):
    if col not in df.columns:
        df[col] = pd.NA
# Apply optional name filter across known name columns
if DEVICE_NAME_FILTER:
    needle = str(DEVICE_NAME_FILTER).lower()
    name_mask = False
    for col in names_cols:
        # Convert values (including list-like) to string then search
        col_str = df[col].astype(str).str.lower()
        name_mask = name_mask | col_str.str.contains(needle, na=False)
    df = df[name_mask]
# Show the frame after name filter (5 rows)
display(df.head(5))


In [None]:
# Optional filters to restrict endpoints in scope
ep_col = 'DiscoveryAccess.endpoint'
if INCLUDE_ENDPOINTS:
    df = df[df[ep_col].isin(INCLUDE_ENDPOINTS)]
elif ENDPOINT_PREFIX:
    df = df[df[ep_col].astype(str).str.startswith(str(ENDPOINT_PREFIX))]
# Show the frame after endpoint filter (5 rows)
print(f'Rows after filters: {len(df)}')
display(df.head(5))


In [None]:
# Build per-row lists of IPs and Names across their source columns
df['ips_all'] = df.apply(lambda r: [x for col in ips_cols for x in (r[col] if isinstance(r[col], list) else ([] if pd.isna(r[col]) else [r[col]]))], axis=1)
df['names_all'] = df.apply(lambda r: [x for col in names_cols for x in (r[col] if isinstance(r[col], list) else ([] if pd.isna(r[col]) else [r[col]]))], axis=1)
# Preview the constructed list columns (5 rows)
display(df[['ips_all','names_all']].head(5))


In [None]:
# Flatten lists and aggregate per endpoint using explode (handles nested lists)
# We explode twice to cover list-of-lists cases commonly returned by the API
df_ips = df[[ep_col, 'ips_all']].explode('ips_all').explode('ips_all')
# Preview exploded IP rows (5 rows)
display(df_ips.head(5))
df_ips = df_ips[df_ips['ips_all'].notna()]
ips_agg = df_ips.groupby(ep_col, dropna=True)['ips_all'].agg(lambda s: sorted(pd.unique(s.astype(str))))

df_names = df[[ep_col, 'names_all']].explode('names_all').explode('names_all')
# Preview exploded Name rows (5 rows)
display(df_names.head(5))
df_names = df_names[df_names['names_all'].notna()]
names_agg = df_names.groupby(ep_col, dropna=True)['names_all'].agg(lambda s: sorted(pd.unique(s.astype(str))))

agg_df = pd.concat([ips_agg, names_agg], axis=1).reset_index()
# Preview aggregated result (5 rows)
display(agg_df.head(5))


In [None]:
# Shape final output and preview
out_df = agg_df.rename(columns={
    'ips_all': 'List of IPs',
    'names_all': 'List of Names'
})
out_df.insert(0, 'Discovery Instance', target)
display(out_df.head(5))


In [None]:
# Export
csv_path = repo_root / f"output_{target.replace('.', '_')}" / 'device_ids.csv'
csv_path.parent.mkdir(parents=True, exist_ok=True)
out_df.to_csv(csv_path, index=False)
print('Saved to', csv_path)
