# Discovery Analysis (CSV Exports)

This notebook rebuilds the DisMAL `discovery_analysis` report from raw Discovery CSV exports.
It reads appliance definitions from `config.yaml`, iterates over the available export folders,
and writes the consolidated dataset for each instance without calling the Discovery API.


## Requirements

We rely on `pandas`, `numpy`, and `PyYAML` for data wrangling. Uncomment the cell below to install them if needed.


In [None]:
# %pip install -q pandas numpy pyyaml

import math
import datetime as dt
from ast import literal_eval
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional

import numpy as np
import pandas as pd
import yaml
from pandas.errors import EmptyDataError


## Configuration

Adjust these values to point at your raw CSV exports and to control which instances are processed.


In [None]:
# Root folder that contains raw_exports/<instance> subdirectories
RAW_EXPORT_ROOT = Path("../raw_exports")

# Optional filters (set INCLUDE_INSTANCES to something like ["prod"] to limit processing)
INCLUDE_INSTANCES: Optional[Iterable[str]] = None
EXCLUDE_INSTANCES: Iterable[str] = ()

# Optional credential UUID filter (accepts full path or just the trailing UUID)
FILTER_CREDENTIAL_UUID = None  # e.g., "7636fe3b4bd69466ab487f0000010700"

# Optional override for where outputs are written (per appliance sub-folder is created automatically)
OUTPUT_BASE_DIR = None  # e.g., Path("../../csv_outputs")
OUTPUT_FILENAME = "discovery_analysis.csv"


In [None]:
def find_repo_root(start: Path) -> Path:
    for candidate in [start] + list(start.parents):
        if (candidate / "config.yaml").exists() or (candidate / ".git").is_dir():
            return candidate
    return start

NOTEBOOK_DIR = Path.cwd()
REPO_ROOT = find_repo_root(NOTEBOOK_DIR)
CONFIG_PATH = REPO_ROOT / "config.yaml"

if not CONFIG_PATH.exists():
    raise FileNotFoundError(f"config.yaml not found at {CONFIG_PATH}")

with CONFIG_PATH.open("r", encoding="utf-8") as fh:
    cfg = yaml.safe_load(fh) or {}

appliance_entries = cfg.get("appliances") or []
if isinstance(appliance_entries, dict):
    appliance_entries = [appliance_entries]

if not appliance_entries:
    fallback_target = cfg.get("target")
    fallback_name = cfg.get("name") or (fallback_target or "default")
    appliance_entries = [{"name": fallback_name, "target": fallback_target}]

available_appliances: List[Dict[str, Any]] = []
for entry in appliance_entries:
    name = str(entry.get("name") or "").strip()
    target = str(entry.get("target") or "").strip()
    if not name:
        continue
    available_appliances.append({
        "name": name,
        "target": target or name,
    })

if not available_appliances:
    raise ValueError("No appliances with a name found in config.yaml")

exports_root = RAW_EXPORT_ROOT if RAW_EXPORT_ROOT.is_absolute() else (NOTEBOOK_DIR / RAW_EXPORT_ROOT).resolve()
if not exports_root.exists():
    raise FileNotFoundError(f"Raw export root not found: {exports_root}")

include_set = {str(v).strip() for v in (INCLUDE_INSTANCES or []) if str(v).strip()}
exclude_set = {str(v).strip() for v in (EXCLUDE_INSTANCES or []) if str(v).strip()}

available_dirs = {path.name: path for path in exports_root.iterdir() if path.is_dir()}

selected_appliances: List[Dict[str, Any]] = []
skipped_missing: List[str] = []
skipped_filtered: List[str] = []

for appliance in available_appliances:
    name = appliance["name"]
    if include_set and name not in include_set:
        skipped_filtered.append(name)
        continue
    if name in exclude_set:
        skipped_filtered.append(name)
        continue
    export_dir = available_dirs.get(name)
    if not export_dir:
        skipped_missing.append(name)
        continue
    selected_appliances.append({
        "name": name,
        "target": appliance.get("target") or name,
        "export_dir": export_dir,
    })

print(f"Repo root         : {REPO_ROOT}")
print(f"Config path       : {CONFIG_PATH}")
print(f"Exports root      : {exports_root}")
print(f"Config appliances : {[a['name'] for a in available_appliances]}")
print(f"Export directories: {sorted(available_dirs)}")
print(f"Selected          : {[a['name'] for a in selected_appliances]}")
if skipped_missing:
    print(f"Missing export dirs: {skipped_missing}")
if skipped_filtered:
    print(f"Skipped by filter  : {skipped_filtered}")

if not selected_appliances:
    raise RuntimeError("No appliances selected for processing â€“ check raw exports and filters.")


In [None]:
METADATA_COLUMNS = ["Appliance Target", "Appliance Name", "Query Title"]
IDENTITY_IP_COLUMNS = [
    "DiscoveryAccess.endpoint",
    "Endpoint.endpoint",
    "DiscoveredIPAddress.ip_addr",
    "InferredElement.__all_ip_addrs",
    "NetworkInterface.ip_addr",
]
IDENTITY_NAME_COLUMNS = [
    "InferredElement.name",
    "InferredElement.hostname",
    "InferredElement.local_fqdn",
    "InferredElement.sysname",
    "NetworkInterface.fqdns",
]
OUTPUT_COLUMNS = [
    "endpoint",
    "device_name",
    "list_of_device_names",
    "list_of_endpoints",
    "node_kind",
    "os_type",
    "os_version",
    "os_class",
    "discovery_run",
    "discovery_run_start",
    "discovery_run_end",
    "scan_start",
    "scan_end",
    "scan_end_raw",
    "when_was_that",
    "consistency",
    "current_access",
    "access_method",
    "inferred_node_updated",
    "reason_not_updated",
    "end_state",
    "previous_end_state",
    "end_state_change",
    "session_results_logged",
    "last_credential",
    "credential_name",
    "credential_login",
    "timestamp",
    "da_id",
    "prev_da_id",
    "next_node_id",
]

INVALID_STRINGS = {"", "none", "nan", "null"}

def load_csv(path: Path) -> pd.DataFrame:
    if not path.exists():
        print(f"Missing CSV: {path}")
        return pd.DataFrame()
    try:
        return pd.read_csv(path, low_memory=False)
    except EmptyDataError:
        print(f"Empty CSV: {path}")
        return pd.DataFrame()

def drop_metadata(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(columns=[c for c in METADATA_COLUMNS if c in df.columns], errors="ignore")

def ensure_columns(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
    for col in columns:
        if col not in df.columns:
            df[col] = None
    return df

def is_missing(value) -> bool:
    if value is None:
        return True
    if isinstance(value, float) and math.isnan(value):
        return True
    if isinstance(value, str) and value.strip().lower() in INVALID_STRINGS:
        return True
    return False

def to_clean_str(value) -> Optional[str]:
    if is_missing(value):
        return None
    return str(value).strip()

def parse_listish(value):
    if isinstance(value, list):
        return [to_clean_str(v) for v in value if to_clean_str(v)]
    if is_missing(value):
        return []
    text = str(value).strip()
    if text.startswith("[") and text.endswith("]"):
        try:
            parsed = literal_eval(text)
        except (ValueError, SyntaxError):
            return [to_clean_str(text)]
        if isinstance(parsed, list):
            return [to_clean_str(v) for v in parsed if to_clean_str(v)]
    return [to_clean_str(text)]

def combine_values(row: pd.Series, columns: List[str]) -> List[str]:
    collected: List[str] = []
    for col in columns:
        values = row.get(col, [])
        if isinstance(values, list):
            collected.extend([v for v in values if to_clean_str(v)])
        elif not is_missing(values):
            collected.append(str(values).strip())
    return sorted({to_clean_str(v) for v in collected if to_clean_str(v)})

def merge_lists(series: pd.Series) -> List[str]:
    combined: List[str] = []
    for values in series:
        if not values:
            continue
        combined.extend(values)
    return sorted({to_clean_str(v) for v in combined if to_clean_str(v)})

def collect_unique(series: pd.Series) -> List[str]:
    return sorted({to_clean_str(v) for v in series if to_clean_str(v)})

def clean_uuid(value) -> Optional[str]:
    text = to_clean_str(value)
    if text is None:
        return None
    return text.split("/")[-1].lower()

def to_bool(value) -> bool:
    if isinstance(value, bool):
        return value
    text = to_clean_str(value)
    if text is None:
        return False
    lowered = text.lower()
    if lowered in {"true", "1", "yes"}:
        return True
    if lowered in {"false", "0", "no"}:
        return False
    return False

def format_timestamp(value) -> Optional[str]:
    if isinstance(value, dt.datetime):
        if value.tzinfo is None:
            value = value.replace(tzinfo=dt.timezone.utc)
        return value.isoformat()
    return to_clean_str(value)

def calc_when(ts: Optional[dt.datetime]) -> Optional[str]:
    if ts is None:
        return None
    if ts.tzinfo is None:
        ts = ts.replace(tzinfo=dt.timezone.utc)
    delta = dt.datetime.now(dt.timezone.utc) - ts
    minutes = delta.total_seconds() / 60.0
    bands = [
        (60, "Less than 60 minutes ago"),
        (1440, "Less than 24 hours ago"),
        (10080, "Less than 7 days ago"),
        (43830, "Less than 1 month ago"),
        (131487, "Less than 3 months ago"),
        (262974, "Less than 6 months ago"),
        (525949, "Less than 12 months ago"),
    ]
    for threshold, label in bands:
        if minutes < threshold:
            return label
    return "Over a year ago"

def build_identity_table(id_df: pd.DataFrame) -> pd.DataFrame:
    if id_df is None or id_df.empty:
        return pd.DataFrame(columns=["Identities.endpoint", "list_of_ips", "list_of_names"])

    work = id_df.copy()
    length = len(work)
    for col in IDENTITY_IP_COLUMNS + IDENTITY_NAME_COLUMNS:
        if col in work.columns:
            work[col] = work[col].apply(parse_listish)
        else:
            work[col] = [[] for _ in range(length)]

    primary: List[Optional[str]] = []
    for access_list, endpoint_list in zip(
        work["DiscoveryAccess.endpoint"],
        work["Endpoint.endpoint"] if "Endpoint.endpoint" in work.columns else [ [] for _ in range(length) ]
    ):
        candidate = None
        for value in (access_list or []) + (endpoint_list or []):
            clean = to_clean_str(value)
            if clean:
                candidate = clean
                break
        primary.append(candidate)
    work["Identities.endpoint"] = primary
    work = work[work["Identities.endpoint"].notna()].copy()

    work["ips_tmp"] = work.apply(lambda row: combine_values(row, IDENTITY_IP_COLUMNS), axis=1)
    work["names_tmp"] = work.apply(lambda row: combine_values(row, IDENTITY_NAME_COLUMNS), axis=1)

    aggregated = (
        work.groupby("Identities.endpoint", dropna=False)
        .agg({
            "ips_tmp": merge_lists,
            "names_tmp": merge_lists,
        })
        .reset_index()
        .rename(columns={"ips_tmp": "list_of_ips", "names_tmp": "list_of_names"})
    )
    return aggregated


In [None]:
def build_output_dir(target: str) -> Path:
    sanitized = (target or "unknown").replace(".", "_").replace(":", "_").replace("/", "_")
    if OUTPUT_BASE_DIR is None:
        return REPO_ROOT / f"output_{sanitized}"
    base_root = OUTPUT_BASE_DIR if isinstance(OUTPUT_BASE_DIR, Path) else Path(OUTPUT_BASE_DIR)
    return base_root.expanduser().resolve() / f"output_{sanitized}"


def empty_output(label: str) -> pd.DataFrame:
    df = pd.DataFrame(columns=OUTPUT_COLUMNS)
    df.insert(0, "Discovery Instance", label)
    return df.iloc[0:0]


def process_instance(instance: Dict[str, Any]) -> Dict[str, Any]:
    name = instance["name"]
    target = instance.get("target") or name
    export_dir: Path = instance["export_dir"]
    label = target or name
    print(f"=== Processing {name} ({label}) ===")

    output_dir = build_output_dir(label)
    output_dir.mkdir(parents=True, exist_ok=True)

    def load_export(filename: str) -> pd.DataFrame:
        path = export_dir / filename
        df = load_csv(path)
        return drop_metadata(df)

    key_df = load_export("discovery_analysis_key_map.csv")
    access_df = load_export("discovery_analysis_access_summary.csv")
    device_df = load_export("discovery_analysis_deviceinfo.csv")
    run_df = load_export("discovery_analysis_discovery_runs.csv")
    session_df = load_export("discovery_analysis_session_results.csv")
    dropped_df = load_export("discovery_analysis_dropped_endpoints.csv")
    identities_raw = load_export("devices_report_identities.csv")

    print(f"Key map rows         : {len(key_df)}")
    print(f"Access summary rows  : {len(access_df)}")
    print(f"Device info rows     : {len(device_df)}")
    print(f"Discovery run rows   : {len(run_df)}")
    print(f"Session result rows  : {len(session_df)}")
    print(f"Dropped endpoint rows: {len(dropped_df)}")

    key_df = ensure_columns(key_df, [
        "DiscoveryAccess.id",
        "DiscoveryAccess.previous_id",
        "DiscoveryAccess.next_id",
        "DeviceInfo.id",
        "DiscoveryRun.id",
        "SessionResult.id",
    ])

    access_df = ensure_columns(access_df, [
        "DiscoveryAccess.id",
        "DiscoveryAccess.endpoint",
        "DiscoveryAccess.scan_starttime",
        "DiscoveryAccess.scan_endtime",
        "DiscoveryAccess.scan_endtime_raw",
        "DiscoveryAccess.when_last_scan",
        "DiscoveryAccess.current_access",
        "DiscoveryAccess.node_kind",
        "DiscoveryAccess.host_node_updated",
        "DiscoveryAccess.reason_not_updated",
        "DiscoveryAccess.end_state",
    ])
    if "DiscoveryAccess.host_node_updated" in access_df.columns:
        access_df["DiscoveryAccess.host_node_updated"] = access_df["DiscoveryAccess.host_node_updated"].apply(to_bool)

    device_df = ensure_columns(device_df, [
        "DeviceInfo.id",
        "DeviceInfo.hostname",
        "DeviceInfo.os_type",
        "DeviceInfo.os_version",
        "DeviceInfo.os_class",
        "DeviceInfo.kind",
        "DeviceInfo.inferred_kind",
        "DeviceInfo.last_access_method",
        "DeviceInfo.probed_os",
        "DeviceInfo.last_credential",
        "DeviceInfo.last_slave",
    ])

    run_df = ensure_columns(run_df, [
        "DiscoveryRun.id",
        "DiscoveryRun.label",
        "DiscoveryRun.starttime",
        "DiscoveryRun.endtime",
    ])

    session_df = ensure_columns(session_df, [
        "SessionResult.id",
        "SessionResult.provider",
        "SessionResult.session_type",
        "SessionResult.success",
    ])

    identities_df = build_identity_table(identities_raw)
    identity_records = identities_df.to_dict("records")
    id_map = {
        ip: rec
        for rec in identity_records
        for ip in rec.get("list_of_ips", []) or []
        if ip
    }
    print(f"Identity endpoints   : {len(identities_df)}")

    merged = key_df.copy()
    if not access_df.empty and "DiscoveryAccess.id" in access_df.columns:
        merged = merged.merge(access_df, on="DiscoveryAccess.id", how="left")
    if not device_df.empty and "DeviceInfo.id" in device_df.columns:
        merged = merged.merge(device_df, on="DeviceInfo.id", how="left")
    if not run_df.empty and "DiscoveryRun.id" in run_df.columns:
        merged = merged.merge(run_df, on="DiscoveryRun.id", how="left")
    if not session_df.empty and "SessionResult.id" in session_df.columns:
        merged = merged.merge(session_df, on="SessionResult.id", how="left")

    if merged.empty or "DiscoveryAccess.id" not in merged.columns:
        print("No merged rows available; writing empty output")
        df_out = empty_output(label)
        output_csv = output_dir / OUTPUT_FILENAME
        df_out.to_csv(output_csv, index=False)
        return {
            "instance": name,
            "target": label,
            "output_path": output_csv,
            "rows": 0,
            "status": "no-data",
        }

    if "DiscoveryAccess.host_node_updated" in merged.columns:
        merged["DiscoveryAccess.host_node_updated"] = merged["DiscoveryAccess.host_node_updated"].apply(to_bool)
    else:
        merged["DiscoveryAccess.host_node_updated"] = False

    if "DeviceInfo.last_credential" in merged.columns:
        merged["last_cred_short"] = merged["DeviceInfo.last_credential"].apply(clean_uuid)
    else:
        merged["last_cred_short"] = None

    if FILTER_CREDENTIAL_UUID:
        wanted = clean_uuid(FILTER_CREDENTIAL_UUID)
        merged = merged[merged["last_cred_short"] == wanted].copy()
        print(f"Filtered merged rows to credential {wanted}: {len(merged)}")

    if "SessionResult.provider" in merged.columns:
        merged["DiscoveryAccess.session_results_logged"] = (
            merged.groupby("DiscoveryAccess.id")["SessionResult.provider"].transform(lambda s: s.notna().any()).fillna(False)
        )
    else:
        merged["DiscoveryAccess.session_results_logged"] = False

    if not access_df.empty and "DiscoveryAccess.id" in access_df.columns and "DiscoveryAccess.end_state" in access_df.columns:
        prev_map = access_df.set_index("DiscoveryAccess.id")["DiscoveryAccess.end_state"].to_dict()
        prev_id_series = merged.get("DiscoveryAccess.previous_id")
        if prev_id_series is not None:
            merged["DiscoveryAccess.previous_end_state"] = prev_id_series.map(prev_map)
        else:
            merged["DiscoveryAccess.previous_end_state"] = None
    else:
        merged["DiscoveryAccess.previous_end_state"] = None

    lam = merged.get("DeviceInfo.last_access_method")
    if lam is None:
        lam = pd.Series([None] * len(merged), index=merged.index)
    slave = merged["DeviceInfo.last_slave"].apply(to_bool) if "DeviceInfo.last_slave" in merged.columns else pd.Series([False] * len(merged), index=merged.index)
    probed = merged["DeviceInfo.probed_os"].apply(to_bool) if "DeviceInfo.probed_os" in merged.columns else pd.Series([False] * len(merged), index=merged.index)

    lam = lam.astype("object")
    cond1 = lam.isin(["windows", "rcmd"]) & slave
    merged["DiscoveryAccess.access_method"] = lam
    merged["DiscoveryAccess.current_access"] = np.where(cond1, lam, np.where(probed, "Probe", lam))

    print(f"Merged rows          : {len(merged)}")

    dropped_records = dropped_df.to_dict("records") if not dropped_df.empty else []
    cred_map: Dict[str, Dict[str, Optional[str]]] = {}

    def parse_timestamp(scan_end_raw, friendly_end) -> Optional[dt.datetime]:
        ts = None
        raw = to_clean_str(scan_end_raw)
        if raw:
            try:
                ts = dt.datetime.fromisoformat(raw.replace("Z", "+00:00"))
            except Exception:
                ts = None
        if ts is None:
            friendly = to_clean_str(friendly_end)
            if friendly:
                try:
                    ts = dt.datetime.strptime(" ".join(friendly.split(" ")[:2]), "%Y-%m-%d %H:%M:%S").replace(tzinfo=dt.timezone.utc)
                except Exception:
                    ts = None
        if ts is not None and ts.tzinfo is None:
            ts = ts.replace(tzinfo=dt.timezone.utc)
        return ts

    def compute_consistency(states: List[Optional[str]]) -> Optional[str]:
        cleaned = [to_clean_str(s) for s in states if to_clean_str(s)]
        if not cleaned:
            return None
        total = len(cleaned)
        counts: Dict[str, int] = {}
        for state in cleaned:
            counts[state] = counts.get(state, 0) + 1
        top = max(counts, key=counts.get)
        if counts[top] == total:
            return f"Always {top}"
        if counts[top] >= total - 2:
            return f"Usually {top}"
        return f"Most Often {top}"

    by_endpoint: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
    for _, row in merged.iterrows():
        ep = to_clean_str(row.get("DiscoveryAccess.endpoint"))
        if not ep:
            continue
        by_endpoint.setdefault(ep, {"discos": [], "dropped": []})["discos"].append(row.to_dict())
    for rec in dropped_records:
        ep = to_clean_str(rec.get("Endpoint"))
        if not ep:
            continue
        by_endpoint.setdefault(ep, {"discos": [], "dropped": []})["dropped"].append(rec)

    endpoint_rows: List[Dict[str, Any]] = []
    for ep, recs in by_endpoint.items():
        ident = id_map.get(ep, {})
        names = ident.get("list_of_names") or []
        eps = ident.get("list_of_ips") or []

        states = [r.get("DiscoveryAccess.end_state") for r in recs["discos"]] + [r.get("End_State") for r in recs["dropped"]]
        consistency = compute_consistency(states)

        endpoint_records: List[Dict[str, Any]] = []

        for d in recs["discos"]:
            scan_end_raw = d.get("DiscoveryAccess.scan_endtime_raw")
            friendly_end = d.get("DiscoveryAccess.scan_endtime")
            ts = parse_timestamp(scan_end_raw, friendly_end)
            when = calc_when(ts) if ts else None
            hostname = d.get("DeviceInfo.hostname") or (names[0] if names else None)
            node_kind = d.get("DiscoveryAccess.node_kind") or d.get("DeviceInfo.kind") or d.get("DeviceInfo.inferred_kind")
            last_cred_uuid = d.get("last_cred_short")
            cred_info = cred_map.get(last_cred_uuid or "", {})
            endpoint_records.append({
                "endpoint": ep,
                "hostname": hostname,
                "list_of_names": names,
                "list_of_endpoints": eps,
                "node_kind": node_kind,
                "os_type": d.get("DeviceInfo.os_type"),
                "os_version": d.get("DeviceInfo.os_version"),
                "os_class": d.get("DeviceInfo.os_class"),
                "disco_run": d.get("DiscoveryRun.label"),
                "run_start": d.get("DiscoveryRun.starttime"),
                "run_end": d.get("DiscoveryRun.endtime"),
                "scan_start": d.get("DiscoveryAccess.scan_starttime"),
                "scan_end": d.get("DiscoveryAccess.scan_endtime"),
                "scan_end_raw": scan_end_raw,
                "when_was_that": when,
                "consistency": consistency,
                "current_access": d.get("DiscoveryAccess.current_access"),
                "access_method": d.get("DiscoveryAccess.access_method"),
                "inferred_node_updated": d.get("DiscoveryAccess.host_node_updated"),
                "reason_not_updated": d.get("DiscoveryAccess.reason_not_updated"),
                "end_state": d.get("DiscoveryAccess.end_state"),
                "previous_end_state": d.get("DiscoveryAccess.previous_end_state"),
                "session_results_logged": d.get("DiscoveryAccess.session_results_logged"),
                "last_credential": last_cred_uuid,
                "credential_name": cred_info.get("label"),
                "credential_login": cred_info.get("username"),
                "timestamp": ts,
                "da_id": d.get("DiscoveryAccess.id"),
                "prev_da_id": d.get("DiscoveryAccess.previous_id"),
                "next_node_id": d.get("DiscoveryAccess.next_id"),
            })

        for d in recs["dropped"]:
            ts = parse_timestamp(d.get("End_Raw"), d.get("End"))
            when = calc_when(ts) if ts else None
            endpoint_records.append({
                "endpoint": ep,
                "hostname": names[0] if names else None,
                "list_of_names": names,
                "list_of_endpoints": eps,
                "disco_run": d.get("Run"),
                "run_start": d.get("Start"),
                "run_end": d.get("End"),
                "when_was_that": when,
                "consistency": consistency,
                "reason_not_updated": d.get("Reason_Not_Updated"),
                "end_state": d.get("End_State"),
                "timestamp": ts,
                "scan_end_raw": d.get("End_Raw"),
            })

        if not endpoint_records:
            continue
        latest = max(endpoint_records, key=lambda r: r.get("timestamp") or dt.datetime.min.replace(tzinfo=dt.timezone.utc))
        named = [r for r in endpoint_records if r.get("hostname") or r.get("credential_name")]
        chosen = max(named, key=lambda r: r.get("timestamp") or dt.datetime.min.replace(tzinfo=dt.timezone.utc)) if named else latest
        for key, value in latest.items():
            if chosen.get(key) in (None, "") and value not in (None, ""):
                chosen[key] = value
        endpoint_rows.append(chosen)

    rows = []
    for rec in endpoint_rows:
        rows.append([
            rec.get("endpoint"),
            rec.get("hostname"),
            rec.get("list_of_names"),
            rec.get("list_of_endpoints"),
            rec.get("node_kind"),
            rec.get("os_type"),
            rec.get("os_version"),
            rec.get("os_class"),
            rec.get("disco_run"),
            rec.get("run_start"),
            rec.get("run_end"),
            rec.get("scan_start"),
            rec.get("scan_end"),
            rec.get("scan_end_raw"),
            rec.get("when_was_that"),
            rec.get("consistency"),
            rec.get("current_access"),
            rec.get("access_method"),
            rec.get("inferred_node_updated"),
            rec.get("reason_not_updated"),
            rec.get("end_state"),
            rec.get("previous_end_state"),
            (f"{rec.get('previous_end_state')} -> {rec.get('end_state')}" if rec.get('end_state') is not None else None),
            rec.get("session_results_logged"),
            rec.get("last_credential"),
            rec.get("credential_name"),
            rec.get("credential_login"),
            format_timestamp(rec.get("timestamp")),
            rec.get("da_id"),
            rec.get("prev_da_id"),
            rec.get("next_node_id"),
        ])

    df_out = pd.DataFrame(rows, columns=OUTPUT_COLUMNS)
    df_out.insert(0, "Discovery Instance", label)

    output_csv = output_dir / OUTPUT_FILENAME
    df_out.to_csv(output_csv, index=False)

    print(f"Output rows: {len(df_out)} | Saved to {output_csv}")
    display(df_out.head(5))

    return {
        "instance": name,
        "target": label,
        "output_path": output_csv,
        "rows": int(len(df_out)),
        "status": "ok",
    }


In [None]:
results: List[Dict[str, Any]] = []
for appliance in selected_appliances:
    outcome = process_instance(appliance)
    results.append(outcome)

summary_df = pd.DataFrame(results)
if "output_path" in summary_df.columns:
    summary_df["output_path"] = summary_df["output_path"].map(lambda p: str(p) if p is not None else None)
display(summary_df)
