# Host Utilisation (BMC Discovery)

This notebook reproduces the DisMAL `host_utilisation` report, listing non-hypervisor hosts with basic utilisation indicators.
It reads connection details from `config.yaml`, queries the appliance, and writes `host_utilisation.csv` under `output_<target>`.

## Requirements

We use `tideway` from pip (remote), plus `pandas` and `PyYAML`.
Uncomment the following to install in your environment if needed.

In [None]:
# %pip install -q tideway pandas pyyaml

import os, sys
from pathlib import Path
import pandas as pd
import yaml
from typing import Any, Dict, List


## Select Appliance (optional)

If your `config.yaml` defines multiple appliances under the `appliances:` list,
set `APPLIANCE_NAME` to one of their names (e.g., 'prod' or 'dev') or use the index.
Defaults to the first appliance if neither is set.

In [None]:
APPLIANCE_NAME = None   # e.g., 'prod' or 'dev'
#APPLIANCE_INDEX = 0     # integer index if not using name selection
APPLIANCE_INDEX = 1

## Configuration (from config.yaml)

Reads settings from `../config.yaml` including target, token/token_file,
API version, and SSL verification preference.
Saves the CSV to `../output_<target>/host_utilisation.csv`.

In [None]:
def _find_repo_root(start: Path) -> Path:
    for p in [start] + list(start.parents):
        if (p / 'config.yaml').exists():
            return p
    return start.parent

repo_root = _find_repo_root(Path.cwd())
config_path = repo_root / 'config.yaml'
with open(config_path, 'r') as fh:
    cfg = yaml.safe_load(fh) or {}

# Appliance selection
apps = cfg.get('appliances') or []
selected = None
if isinstance(apps, list) and apps:
    if APPLIANCE_NAME:
        selected = next((a for a in apps if a.get('name') == APPLIANCE_NAME), None)
        if selected is None:
            raise ValueError(f"No appliance named '{APPLIANCE_NAME}' in config.yaml")
    else:
        try:
            selected = apps[int(APPLIANCE_INDEX)]
        except Exception:
            selected = apps[0]

target = ((selected or {}).get('target') or cfg.get('target') or '').strip()
if not target:
    raise ValueError('config.yaml missing "target"')

# Token handling: inline token or token file
token = (((selected or {}).get('token') or cfg.get('token') or '').strip())
token_file = (selected or {}).get('token_file') or cfg.get('token_file') or cfg.get('f_token')
if not token and token_file:
    tf_path = Path(token_file)
    if not tf_path.is_absolute():
        tf_path = repo_root / tf_path
    with open(tf_path, 'r') as tf:
        token = tf.read().strip()
if not token:
    raise ValueError('API token not found in config.yaml (token or token_file)')

# Version and SSL
API_VERSION = str((selected or {}).get('api_version') or cfg.get('api_version') or 'v1.14')
VERIFY_SSL = bool((selected or {}).get('verify_ssl', cfg.get('verify_ssl', True)))

# Output path
sanitized = target.replace('.', '_').replace(':', '_').replace('/', '_')
output_dir = repo_root / f'output_{sanitized}'
output_dir.mkdir(parents=True, exist_ok=True)

print('Base Host      :', target)
print('API Version    :', API_VERSION)
print('Verify SSL     :', VERIFY_SSL)
print('Output folder  :', output_dir)
print('Token set      :', bool(token))

# Import tideway from pip; install if needed (respects PIP_INDEX_URL/PIP_EXTRA_INDEX_URL)
try:
    import tideway  # type: ignore
except Exception:
    import subprocess
    print('Installing tideway via pip...')
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'tideway'])
    import tideway  # retry

API_VERSION_NUM = API_VERSION.lstrip('v')
app = tideway.appliance(target, token, api_version=API_VERSION_NUM, ssl_verify=VERIFY_SSL)
twsearch = app.data()

# Quick probe (optional)
try:
    about = app.api_about
    print('Appliance reachable:', about.status_code)
except Exception as e:
    print('Warning: failed to contact appliance /api/about:', e)


## Query

The TWQL mirrors `core/queries.py:host_utilisation`.

In [None]:
qry_host_util = '''
                        search Host where type <> 'Hypervisor'
                        show
                        hostname as 'Host.hostname',
                        hash(hostname) as 'Host.hostname_hash',
                        os as 'Host.os',
                        os_type as 'Host.os_type',
                        virtual as 'Host.virtual',
                        cloud as 'Host.cloud',
                        #InferredElement:Inference:Associate:DiscoveryAccess.endpoint as 'DiscoveryAccess.endpoint',
                        nodecount(traverse :::SoftwareInstance) as 'Host.running_software_instances',
                        nodecount(traverse :::CandidateSoftwareInstance) as 'Host.candidate_software_instances',
                        nodecount(traverse :::DiscoveryAccess where _last_marker traverse :::ProcessList traverse :::DiscoveredProcess) as 'Host.running_processes',
                        nodecount(traverse :::DiscoveryAccess where _last_marker traverse :::ServiceList traverse :::DiscoveredService where state = 'RUNNING') as 'Host.running_services'
                      '''


## Helpers (normalization)

In [None]:
def list_table_to_json(table_like: List[List[Any]]) -> List[Dict[str, Any]]:
    if not table_like or not isinstance(table_like, list):
        return []
    if not table_like or not isinstance(table_like[0], list):
        return []
    headers = table_like[0]
    rows = table_like[1:]
    out: List[Dict[str, Any]] = []
    for r in rows:
        try:
            out.append(dict(zip(headers, r)))
        except Exception:
            continue
    return out

def to_rows(payload: Any) -> List[Dict[str, Any]]:
    if isinstance(payload, list):
        if payload and isinstance(payload[0], list):
            return list_table_to_json(payload)
        if payload and isinstance(payload[0], dict):
            return payload
        return []
    if hasattr(payload, 'json'):
        try:
            js = payload.json()
        except Exception:
            return []
        if isinstance(js, list):
            if js and isinstance(js[0], list):
                return list_table_to_json(js)
            if js and isinstance(js[0], dict):
                return js
        if isinstance(js, dict) and 'results' in js and 'headings' in js:
            table_like = [js['headings']] + list(js.get('results') or [])
            return list_table_to_json(table_like)
        return []
    if isinstance(payload, dict) and 'results' in payload and 'headings' in payload:
        table_like = [payload['headings']] + list(payload.get('results') or [])
        return list_table_to_json(table_like)
    return []


## Run search and normalize

Executes the query via Tideway Data API, converts to a DataFrame, and enforces numeric columns.

In [None]:
rows = to_rows(twsearch.search({'query': qry_host_util}, format='object', limit=0))
df = pd.DataFrame(rows)
print(f'Total hosts: {len(df)}')
display(df.head(20)) if not df.empty else print('No records returned.')

# Enforce numeric columns as Int64 (nullable)
num_cols = [
    'Host.running_software_instances',
    'Host.candidate_software_instances',
    'Host.running_processes',
    'Host.running_services',
]
for c in num_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0).astype('Int64')


## Prepare output and save CSV

Insert 'Discovery Instance' as the first column and save to `output_<target>/host_utilisation.csv`.

In [None]:
df_out = df.copy()
df_out.insert(0, 'Discovery Instance', target)
other_cols = sorted([c for c in df_out.columns if c != 'Discovery Instance'])
df_out = df_out[['Discovery Instance'] + other_cols]

display(df_out.head(20)) if not df_out.empty else print('No records to save.')

OUTPUT_CSV = str(output_dir / 'host_utilisation.csv')
df_out.to_csv(OUTPUT_CSV, index=False)
print(f'Saved to {OUTPUT_CSV}')


---
### Notes
- Numeric columns are coerced to integers to match CLI behavior.
- Increase the search `limit` or use `limit=0` (all) depending on dataset size.
- You can extend the query with additional fields if desired.