# App Model Explorer

Provide a hostname and explore what Discovery currently associates to it:
- `SoftwareInstance`
- `CandidateSoftwareInstance`
- `DiscoveredProcess`
- `DiscoveredService`

Optionally provide a `FUZZY` string (app name, username, pid, etc.) to filter each nodekind.

This uses the Tideway SDK and reads connection details from `config.yaml` at the repo root.


## Requirements

We use `tideway` from pip (remote), plus `pandas` and `PyYAML`.
Uncomment the following to install in your environment if needed.


In [None]:
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional

import pandas as pd
import yaml

pd.set_option("display.max_rows", None)


## Parameters

- `HOSTNAME`: exact hostname (recommended) or a partial string.
- `HOST_MATCH_MODE`: how to match `HOSTNAME` against `Host.hostname`/`Host.name`.
- `HOST_SELECTION_INDEX`: if multiple hosts match, pick which row to use.
- `FUZZY`: optional string to filter results (e.g. `tomcat`, `svc_user`, `1234`).
- `LIMIT`: API limit per query (`0` means no limit).


In [None]:
HOSTNAME = ""  # e.g. 'myhost01'

# How to match HOSTNAME against Host.hostname / Host.name
# - 'exact': hostname = HOSTNAME OR name = HOSTNAME
# - 'prefix': case-insensitive regex match at start of string
# - 'substring': contains match
# - 'regex': case-insensitive regex match (HOSTNAME is the regex)
HOST_MATCH_MODE = 'prefix'

# If multiple hosts match, select one by row index (0-based). Set to None to force manual selection.
HOST_SELECTION_INDEX: Optional[int] = 0

FUZZY: Optional[str] = None  # e.g. 'tomcat', 'svc_user', '1234'
LIMIT = 0

# If True, only return RUNNING services
RUNNING_SERVICES_ONLY = False

# Optional: write results to CSV under output_<target>/deepdive/
EXPORT_CSV = False


## Configuration (from config.yaml)

Reads settings from `config.yaml` including target, token/token_file, API version, and SSL verification.

If your `config.yaml` defines multiple appliances under the `appliances:` list,
set `APPLIANCE_NAME` to one of their names (e.g., 'prod' or 'dev') or use the index.


In [None]:
APPLIANCE_NAME = None  # e.g. 'prod' or 'dev'
APPLIANCE_INDEX = 0    # integer index if not using name selection

def _find_repo_root(start: Path) -> Path:
    for p in [start] + list(start.parents):
        if (p / 'config.yaml').exists():
            return p
    return start.parent

repo_root = _find_repo_root(Path.cwd())
config_path = repo_root / 'config.yaml'
cfg: Dict[str, Any] = yaml.safe_load(config_path.read_text()) or {}

# Appliance selection
apps = cfg.get('appliances') or []
selected = None
if isinstance(apps, list) and apps:
    if APPLIANCE_NAME:
        selected = next((a for a in apps if a.get('name') == APPLIANCE_NAME), None)
        if selected is None:
            raise ValueError(f"No appliance named '{APPLIANCE_NAME}' in config.yaml")
    else:
        try:
            selected = apps[int(APPLIANCE_INDEX)]
        except Exception:
            selected = apps[0]

target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
if not target:
    raise ValueError('config.yaml missing "target"')

# Token handling: inline token or token file
token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
if not token and token_file:
    tf_path = Path(token_file)
    if not tf_path.is_absolute():
        tf_path = repo_root / tf_path
    token = tf_path.read_text().strip()
if not token:
    raise ValueError('API token not found in config.yaml (token or token_file)')

# Version and SSL
API_VERSION = str((selected or {}).get('api_version') or cfg.get('api_version') or 'v1.14')
VERIFY_SSL = bool((selected or {}).get('verify_ssl', cfg.get('verify_ssl', True)))

# Optional output directory
sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
output_dir = repo_root / f'output_{sanitized}' / 'deepdive'
output_dir.mkdir(parents=True, exist_ok=True)

print('Base Host      :', target)
print('API Version    :', API_VERSION)
print('Verify SSL     :', VERIFY_SSL)
print('Output folder  :', output_dir)
print('Token set      :', bool(token))

# Import tideway from pip; install if needed (respects PIP_INDEX_URL/PIP_EXTRA_INDEX_URL)
try:
    import tideway  # type: ignore
except Exception:
    import subprocess
    print('Installing tideway via pip...')
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'tideway'])
    import tideway  # type: ignore

API_VERSION_NUM = API_VERSION.lstrip('v')
app = tideway.appliance(target, token, api_version=API_VERSION_NUM, ssl_verify=VERIFY_SSL)
twsearch = app.data()

# Quick probe (optional)
try:
    about = app.api_about
    print('Appliance reachable:', about.status_code)
except Exception as e:
    print('Warning: failed to contact appliance /api/about:', e)


## Helpers

These helpers normalize Tideway Data API responses into `List[Dict]` and `pandas.DataFrame`.


In [None]:
def list_table_to_json(table_like: List[List[Any]]) -> List[Dict[str, Any]]:
    if not table_like or not isinstance(table_like, list):
        return []
    if not table_like or not isinstance(table_like[0], list):
        return []
    headers = table_like[0]
    rows = table_like[1:]
    out: List[Dict[str, Any]] = []
    for row in rows:
        try:
            out.append(dict(zip(headers, row)))
        except Exception:
            continue
    return out

def to_rows(payload: Any) -> List[Dict[str, Any]]:
    if isinstance(payload, list):
        if payload and isinstance(payload[0], list):
            return list_table_to_json(payload)
        if payload and isinstance(payload[0], dict):
            return payload
        return []

    if hasattr(payload, 'json'):
        try:
            js = payload.json()
        except Exception:
            return []
        if isinstance(js, list):
            if js and isinstance(js[0], list):
                return list_table_to_json(js)
            if js and isinstance(js[0], dict):
                return js
        if isinstance(js, dict) and 'results' in js and 'headings' in js:
            table_like = [js['headings']] + list(js.get('results') or [])
            return list_table_to_json(table_like)
        return []

    if isinstance(payload, dict) and 'results' in payload and 'headings' in payload:
        table_like = [payload['headings']] + list(payload.get('results') or [])
        return list_table_to_json(table_like)

    return []

def twql_quote(value: str) -> str:
    # TWQL string literals are single-quoted; escape backslashes and quotes.
    return value.replace('\\', '\\\\').replace("'", "\\'")

def run_twql(query: str, *, limit: int = 0) -> pd.DataFrame:
    rows = to_rows(twsearch.search({'query': query}, format='object', limit=limit))
    return pd.DataFrame(rows)

def export_df(df: pd.DataFrame, filename: str) -> None:
    if not EXPORT_CSV:
        return
    out_path = output_dir / filename
    df.to_csv(out_path, index=False)
    print('Saved:', out_path)

# itables is required for friendly table output.
# Install once in this notebook if needed:
# %pip install itables
try:
    from itables import init_notebook_mode, show as itables_show, options as itables_options  # type: ignore
    init_notebook_mode(all_interactive=True)
    # Show ALL rows by default (DataTables uses -1 for 'All')
    itables_options.pageLength = -1
    itables_options.lengthMenu = [10, 25, 50, 100, 250, 500, 1000, -1]
except Exception as e:
    raise ImportError("itables is required for table output. Install with: %pip install itables") from e


def show_table(df: pd.DataFrame, label: str, **_ignored: object) -> None:
    if df is None or df.empty:
        print(f"{label}: 0 rows")
        return
    print(f"{label}: {len(df)} rows")
    itables_show(df)


## Host lookup

If `EXACT_HOST_MATCH` is False, this will do a substring match to help you find candidates.


In [None]:
if not HOSTNAME.strip():
    raise ValueError('Set HOSTNAME (top of notebook) before running.')

raw_hostname = HOSTNAME.strip()
hn = twql_quote(raw_hostname)

# Build the candidate match clause.
if HOST_MATCH_MODE == 'exact':
    host_where_candidates = f"(hostname = '{hn}' or name = '{hn}')"
elif HOST_MATCH_MODE == 'prefix':
    host_where_candidates = f"(hostname matches regex '(?i)\\A{hn}' or name matches regex '(?i)\\A{hn}')"
elif HOST_MATCH_MODE == 'substring':
    host_where_candidates = f"(hostname has substring '{hn}' or name has substring '{hn}')"
elif HOST_MATCH_MODE == 'regex':
    host_where_candidates = f"(hostname matches regex '(?i){hn}' or name matches regex '(?i){hn}')"
else:
    raise ValueError("HOST_MATCH_MODE must be one of: 'exact', 'prefix', 'substring', 'regex'")

qry_hosts = f"""
search Host
where {host_where_candidates}
show #id as 'Host.#id', name, hostname, os_type, os, last_update_success
process with unique()
""".strip()

hosts_df = run_twql(qry_hosts, limit=LIMIT)
print(f"Host candidates: {len(hosts_df)}")
display(hosts_df.head(50)) if not hosts_df.empty else print('No matching Host records found.')

if hosts_df.empty:
    raise ValueError('No matching Host records found; adjust HOSTNAME or HOST_MATCH_MODE.')

# Select one host if multiple candidates returned.
if len(hosts_df) > 1 and HOST_SELECTION_INDEX is None:
    raise ValueError(
        'Multiple Host records matched; set HOST_SELECTION_INDEX to pick one (0-based), '
        'or narrow HOSTNAME / HOST_MATCH_MODE.'
    )

selected_index = int(HOST_SELECTION_INDEX or 0)
if selected_index < 0 or selected_index >= len(hosts_df):
    raise ValueError(f'HOST_SELECTION_INDEX out of range (0..{len(hosts_df)-1}): {selected_index}')

selected_host = hosts_df.iloc[selected_index].to_dict()
selected_host_id = str(selected_host.get('Host.#id') or '').strip()
if not selected_host_id:
    raise ValueError('Host selection did not include Host.#id; cannot build a unique filter.')

host_id_escaped = selected_host_id.replace('\\', '\\\\').replace('"', '\\"')
host_where = f'#id = "{host_id_escaped}"'

print('Selected Host index :', selected_index)
print('Selected Host #id   :', selected_host_id)
print('Selected Host.name  :', selected_host.get('name'))
print('Selected Host.hostname:', selected_host.get('hostname'))


## Queries

Runs the four exploration queries in order. Each result is returned as a DataFrame.


## Nodekind exploration

Each nodekind runs in its own cell. If `FUZZY` is set, a second cell runs the fuzzy-filtered follow-up for that nodekind.


In [None]:
fuzzy = (FUZZY or '').strip()
fuzzy_q = twql_quote(fuzzy) if fuzzy else None
fuzzy_pid = int(fuzzy) if fuzzy.isdigit() else None

# Filter fragments (leading space included). Empty string means no filtering.
sw_filter = ''
csi_filter = ''
proc_filter = ''
svc_filter = ''
pkg_filter = ''

if fuzzy_q:
    sw_filter = (
        f" where (name has substring '{fuzzy_q}'"
        f" or type has substring '{fuzzy_q}'"
        f" or product_version has substring '{fuzzy_q}')"
    )
    pkg_filter = (
        f" where (name has substring '{fuzzy_q}'"
        f" or version has substring '{fuzzy_q}')"
    )
    csi_filter = (
        f" where (name has substring '{fuzzy_q}'"
        f" or type has substring '{fuzzy_q}'"
        f" or product_version has substring '{fuzzy_q}')"
    )
    proc_filter = (
        f" where (cmd has substring '{fuzzy_q}'"
        f" or args has substring '{fuzzy_q}'"
        f" or username has substring '{fuzzy_q}'"
        + (f" or pid = {fuzzy_pid}" if fuzzy_pid is not None else '')
        + ')'
    )
    svc_filter = (
        f" where (name has substring '{fuzzy_q}'"
        f" or display_name has substring '{fuzzy_q}'"
        f" or state has substring '{fuzzy_q}'"
        f" or cmd has substring '{fuzzy_q}')"
    )

# Combine DiscoveredService filters so we don't generate two 'where' clauses.
svc_where_running = " where state = 'RUNNING'" if RUNNING_SERVICES_ONLY else ''
svc_where_fuzzy = ''
if svc_filter:
    if svc_where_running:
        svc_where_fuzzy = svc_where_running + svc_filter.replace(' where ', ' and ', 1)
    else:
        svc_where_fuzzy = svc_filter
else:
    svc_where_fuzzy = svc_where_running


### BusinessApplicationInstances

Check whether this Host is related to any BusinessApplicationInstance nodes (useful context before reviewing SoftwareInstances/CSIs).


In [None]:
qry_bai = f"""
search Host
where {host_where}
traverse :::BusinessApplicationInstance
show taxonomy 'summary'
process with unique()
""".strip()

bai_df = run_twql(qry_bai, limit=LIMIT)
bai_df.insert(0, 'Host', HOSTNAME)
show_table(bai_df, 'BusinessApplicationInstances (all)', text_filename='business_application_instances_full.txt')


### SoftwareInstances


In [None]:
qry_sw = f"""
search Host
where {host_where}
traverse :::SoftwareInstance
show taxonomy 'summary'
process with unique()
""".strip()

sw_df = run_twql(qry_sw, limit=LIMIT)
sw_df.insert(0, 'Host', HOSTNAME)
print('SoftwareInstances (all) rows:', len(sw_df))
display(sw_df.head(50)) if not sw_df.empty else print('No SoftwareInstances returned.')
export_df(sw_df, 'software_instances.csv')


In [None]:
if not fuzzy_q:
    print('FUZZY not set; skipping SoftwareInstances fuzzy follow-up.')
else:
    qry_sw_fuzzy = f"""
    search Host
    where {host_where}
    traverse :::SoftwareInstance{sw_filter}
    show taxonomy 'summary'
    process with unique()
    """.strip()

    sw_fuzzy_df = run_twql(qry_sw_fuzzy, limit=LIMIT)
    sw_fuzzy_df.insert(0, 'Host', HOSTNAME)
    sw_fuzzy_df.insert(1, 'FUZZY', fuzzy)
    print('SoftwareInstances (fuzzy) rows:', len(sw_fuzzy_df))
    display(sw_fuzzy_df.head(50)) if not sw_fuzzy_df.empty else print('No fuzzy-matched SoftwareInstances returned.')
    export_df(sw_fuzzy_df, 'software_instances_fuzzy.csv')


### CandidateSoftwareInstances


In [None]:
qry_csi = f"""
search Host
where {host_where}
traverse :::CandidateSoftwareInstance
show taxonomy 'summary'
process with unique()
""".strip()

csi_df = run_twql(qry_csi, limit=LIMIT)
csi_df.insert(0, 'Host', HOSTNAME)
print('CandidateSoftwareInstances (all) rows:', len(csi_df))
display(csi_df.head(50)) if not csi_df.empty else print('No CandidateSoftwareInstances returned.')
export_df(csi_df, 'candidate_software_instances.csv')


In [None]:
if not fuzzy_q:
    print('FUZZY not set; skipping CandidateSoftwareInstances fuzzy follow-up.')
else:
    qry_csi_fuzzy = f"""
    search Host
    where {host_where}
    traverse :::CandidateSoftwareInstance{csi_filter}
    show taxonomy 'summary'
    process with unique()
    """.strip()

    csi_fuzzy_df = run_twql(qry_csi_fuzzy, limit=LIMIT)
    csi_fuzzy_df.insert(0, 'Host', HOSTNAME)
    csi_fuzzy_df.insert(1, 'FUZZY', fuzzy)
    print('CandidateSoftwareInstances (fuzzy) rows:', len(csi_fuzzy_df))
    display(csi_fuzzy_df.head(50)) if not csi_fuzzy_df.empty else print('No fuzzy-matched CandidateSoftwareInstances returned.')
    export_df(csi_fuzzy_df, 'candidate_software_instances_fuzzy.csv')


### DiscoveredProcesses


In [None]:
qry_proc = f"""
search Host
where {host_where}
traverse :::DiscoveryAccess where _last_marker
traverse :::ProcessList
traverse :::DiscoveredProcess
show taxonomy 'summary'
process with unique()
""".strip()

proc_df = run_twql(qry_proc, limit=LIMIT)
proc_df.insert(0, 'Host', HOSTNAME)
show_table(proc_df, 'DiscoveredProcesses (all)', text_filename='discovered_processes_full.txt')
export_df(proc_df, 'discovered_processes.csv')


In [None]:
if not fuzzy_q:
    print('FUZZY not set; skipping DiscoveredProcesses fuzzy follow-up.')
else:
    qry_proc_fuzzy = f"""
    search Host
    where {host_where}
    traverse :::DiscoveryAccess where _last_marker
    traverse :::ProcessList
    traverse :::DiscoveredProcess{proc_filter}
    show taxonomy 'summary'
    process with unique()
    """.strip()

    proc_fuzzy_df = run_twql(qry_proc_fuzzy, limit=LIMIT)
    proc_fuzzy_df.insert(0, 'Host', HOSTNAME)
    proc_fuzzy_df.insert(1, 'FUZZY', fuzzy)
    show_table(proc_fuzzy_df, 'DiscoveredProcesses (fuzzy)', text_filename='discovered_processes_fuzzy_full.txt')
    export_df(proc_fuzzy_df, 'discovered_processes_fuzzy.csv')


### DiscoveredServices


In [None]:
svc_where_all = " where state = 'RUNNING'" if RUNNING_SERVICES_ONLY else ''
qry_svc = f"""
search Host
where {host_where}
traverse :::DiscoveryAccess where _last_marker
traverse :::ServiceList
traverse :::DiscoveredService{svc_where_all}
show taxonomy 'summary'
process with unique()
""".strip()

svc_df = run_twql(qry_svc, limit=LIMIT)
svc_df.insert(0, 'Host', HOSTNAME)
show_table(svc_df, 'DiscoveredServices (all)', text_filename='discovered_services_full.txt')
export_df(svc_df, 'discovered_services.csv')


In [None]:
if not fuzzy_q:
    print('FUZZY not set; skipping DiscoveredServices fuzzy follow-up.')
else:
    qry_svc_fuzzy = f"""
    search Host
    where {host_where}
    traverse :::DiscoveryAccess where _last_marker
    traverse :::ServiceList
    traverse :::DiscoveredService{svc_where_fuzzy}
    show taxonomy 'summary'
    process with unique()
    """.strip()

    svc_fuzzy_df = run_twql(qry_svc_fuzzy, limit=LIMIT)
    svc_fuzzy_df.insert(0, 'Host', HOSTNAME)
    svc_fuzzy_df.insert(1, 'FUZZY', fuzzy)
    show_table(svc_fuzzy_df, 'DiscoveredServices (fuzzy)', text_filename='discovered_services_fuzzy_full.txt')
    export_df(svc_fuzzy_df, 'discovered_services_fuzzy.csv')


### Packages


In [None]:
qry_pkg = f"""
search Host
where {host_where}
traverse :::Package
show taxonomy 'summary'
process with unique()
""".strip()

pkg_df = run_twql(qry_pkg, limit=LIMIT)
pkg_df.insert(0, 'Host', HOSTNAME)
show_table(pkg_df, 'Packages (all)', text_filename='packages_full.txt')
export_df(pkg_df, 'packages.csv')


In [None]:
if not fuzzy_q:
    print('FUZZY not set; skipping Packages fuzzy follow-up.')
else:
    qry_pkg_fuzzy = f"""
    search Host
    where {host_where}
    traverse :::Package{pkg_filter}
    show taxonomy 'summary'
    process with unique()
    """.strip()

    pkg_fuzzy_df = run_twql(qry_pkg_fuzzy, limit=LIMIT)
    pkg_fuzzy_df.insert(0, 'Host', HOSTNAME)
    pkg_fuzzy_df.insert(1, 'FUZZY', fuzzy)
    show_table(pkg_fuzzy_df, 'Packages (fuzzy)', text_filename='packages_fuzzy_full.txt')
    export_df(pkg_fuzzy_df, 'packages_fuzzy.csv')
