# Outpost Credentials (CSV Exports)

This notebook rebuilds the DisMAL `outpost_creds` report from raw Discovery CSV exports.
It reads appliance entries from `config.yaml`, processes the credential/outpost exports,
and writes the per-instance CSV without calling the Discovery API.


## Requirements

We rely on `pandas` and `PyYAML`. Uncomment below to install them if needed.


In [None]:
# %pip install -q pandas pyyaml

from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional

import pandas as pd
import yaml


## Configuration

Adjust these values to control which instances are processed and where outputs are written.


In [None]:
# Root folder containing raw_exports/<instance> subdirectories
RAW_EXPORT_ROOT = Path("../../raw_exports")

# Optional filters (set INCLUDE_INSTANCES to something like ["prod"] to limit processing)
INCLUDE_INSTANCES: Optional[Iterable[str]] = None
EXCLUDE_INSTANCES: Iterable[str] = ()

# Optional override for outputs (per appliance sub-folder is created automatically)
OUTPUT_BASE_DIR = None  # e.g., Path("../../csv_outputs")
OUTPUT_FILENAME = "outpost_creds.csv"


In [None]:
def find_repo_root(start: Path) -> Path:
    for candidate in [start] + list(start.parents):
        if (candidate / "config.yaml").exists() or (candidate / ".git").is_dir():
            return candidate
    return start

NOTEBOOK_DIR = Path.cwd()
REPO_ROOT = find_repo_root(NOTEBOOK_DIR)
CONFIG_PATH = REPO_ROOT / "config.yaml"

if not CONFIG_PATH.exists():
    raise FileNotFoundError(f"config.yaml not found at {CONFIG_PATH}")

with CONFIG_PATH.open("r", encoding="utf-8") as fh:
    cfg = yaml.safe_load(fh) or {}

appliance_entries = cfg.get("appliances") or []
if isinstance(appliance_entries, dict):
    appliance_entries = [appliance_entries]

if not appliance_entries:
    fallback_target = cfg.get("target")
    fallback_name = cfg.get("name") or (fallback_target or "default")
    appliance_entries = [{"name": fallback_name, "target": fallback_target}]

available_appliances: List[Dict[str, Any]] = []
for entry in appliance_entries:
    name = str(entry.get("name") or "").strip()
    target = str(entry.get("target") or "").strip()
    if not name:
        continue
    available_appliances.append({"name": name, "target": target or name})

if not available_appliances:
    raise ValueError("No appliances with a name found in config.yaml")

exports_root = RAW_EXPORT_ROOT if RAW_EXPORT_ROOT.is_absolute() else (NOTEBOOK_DIR / RAW_EXPORT_ROOT).resolve()
if not exports_root.exists():
    raise FileNotFoundError(f"Raw export root not found: {exports_root}")

include_set = {str(v).strip() for v in (INCLUDE_INSTANCES or []) if str(v).strip()}
exclude_set = {str(v).strip() for v in (EXCLUDE_INSTANCES or []) if str(v).strip()}

available_dirs = {path.name: path for path in exports_root.iterdir() if path.is_dir()}

selected_appliances: List[Dict[str, Any]] = []
skipped_missing: List[str] = []
skipped_filtered: List[str] = []

for appliance in available_appliances:
    name = appliance["name"]
    if include_set and name not in include_set:
        skipped_filtered.append(name)
        continue
    if name in exclude_set:
        skipped_filtered.append(name)
        continue
    export_dir = available_dirs.get(name)
    if not export_dir:
        skipped_missing.append(name)
        continue
    selected_appliances.append({
        "name": name,
        "target": appliance.get("target") or name,
        "export_dir": export_dir,
    })

print(f"Repo root         : {REPO_ROOT}")
print(f"Config path       : {CONFIG_PATH}")
print(f"Exports root      : {exports_root}")
print(f"Config appliances : {[a['name'] for a in available_appliances]}")
print(f"Export directories: {sorted(available_dirs)}")
print(f"Selected          : {[a['name'] for a in selected_appliances]}")
if skipped_missing:
    print(f"Missing export dirs: {skipped_missing}")
if skipped_filtered:
    print(f"Skipped by filter  : {skipped_filtered}")

if not selected_appliances:
    raise RuntimeError("No appliances selected for processing â€“ check raw exports and filters.")


In [None]:
METADATA_COLUMNS = ["Appliance Target", "Appliance Name", "Query Title"]

OUTPOSTS_FILENAME = "outposts_with_ips.csv"
CREDENTIALS_FILENAME = "session_outpost_credentials.csv"
FALLBACK_CREDENTIALS_FILENAME = "outpost_credentials.csv"

from pandas.errors import EmptyDataError


def load_csv(path: Path) -> pd.DataFrame:
    if not path.exists():
        print(f"Missing CSV: {path}")
        return pd.DataFrame()
    try:
        return pd.read_csv(path, low_memory=False)
    except EmptyDataError:
        print(f"Empty CSV: {path}")
        return pd.DataFrame()


def drop_metadata(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(columns=[c for c in METADATA_COLUMNS if c in df.columns], errors="ignore")


def coerce_unique(df: pd.DataFrame, col: str) -> pd.DataFrame:
    if col in df.columns:
        series = df[col]
        try:
            df[col] = series.astype(str).str.strip()
        except Exception:
            df[col] = series.apply(lambda v: str(v).strip() if v is not None else None)
    return df


In [None]:
def build_output_dir(target: str) -> Path:
    sanitized = (target or "unknown").replace(".", "_").replace(":", "_").replace("/", "_")
    if OUTPUT_BASE_DIR is None:
        return REPO_ROOT / f"output_{sanitized}"
    base_root = OUTPUT_BASE_DIR if isinstance(OUTPUT_BASE_DIR, Path) else Path(OUTPUT_BASE_DIR)
    return base_root.expanduser().resolve() / f"output_{sanitized}"


def load_outposts(export_dir: Path) -> pd.DataFrame:
    outposts = drop_metadata(load_csv(export_dir / OUTPOSTS_FILENAME))
    if outposts.empty and (export_dir / OUTPOSTS_FILENAME).exists():
        print("Outposts export present but empty.")
    outposts = outposts.rename(columns={
        "Outpost.id": "outpost_id",
        "Outpost.uuid": "outpost_uuid",
        "Outpost.name": "outpost_name",
        "Outpost.url": "outpost_url",
    })
    if "outpost_id" not in outposts.columns and "outpost_uuid" in outposts.columns:
        outposts["outpost_id"] = outposts["outpost_uuid"]
    return outposts


def load_credential_map(export_dir: Path) -> pd.DataFrame:
    primary = drop_metadata(load_csv(export_dir / CREDENTIALS_FILENAME))
    if primary.empty:
        print("Primary session_outpost_credentials.csv empty or missing; falling back to outpost_credentials.csv")
        primary = drop_metadata(load_csv(export_dir / FALLBACK_CREDENTIALS_FILENAME))
    rename_map = {
        "Credential": "credential_uuid",
        "credential": "credential_uuid",
        "uuid": "credential_uuid",
        "Outpost": "outpost_id",
        "outpost": "outpost_id",
        "Outpost Id": "outpost_id",
        "outpost_id": "outpost_id",
    }
    primary = primary.rename(columns=rename_map)
    if "credential_uuid" not in primary.columns:
        for candidate in ["credential", "Credential", "uuid"]:
            if candidate in primary.columns:
                primary["credential_uuid"] = primary[candidate]
                break
    if "outpost_id" not in primary.columns:
        for candidate in ["Outpost Id", "Outpost", "outpost"]:
            if candidate in primary.columns:
                primary["outpost_id"] = primary[candidate]
                break
    keep_cols = [c for c in ["credential_uuid", "outpost_id"] if c in primary.columns]
    if not keep_cols:
        return pd.DataFrame(columns=["credential_uuid", "outpost_id"])
    primary = primary[keep_cols]
    primary = primary.replace({"credential_uuid": {"None": None, "": None}, "outpost_id": {"None": None, "": None}})
    subset_cols = [c for c in ["credential_uuid", "outpost_id"] if c in primary.columns]
    if subset_cols:
        primary = primary.dropna(subset=subset_cols, how="any")
    primary = primary.drop_duplicates()
    if "credential_uuid" in primary.columns:
        primary = coerce_unique(primary, "credential_uuid")
    if "outpost_id" in primary.columns:
        primary = coerce_unique(primary, "outpost_id")
    return primary


def process_instance(instance: Dict[str, Any]) -> Dict[str, Any]:
    name = instance["name"]
    target = instance.get("target") or name
    export_dir: Path = instance["export_dir"]
    print(f"=== Processing {name} ({target}) ===")

    output_dir = build_output_dir(target)
    output_dir.mkdir(parents=True, exist_ok=True)

    outposts = load_outposts(export_dir)
    creds_map = load_credential_map(export_dir)

    if outposts.empty:
        print("No outposts found; writing empty output")
    if creds_map.empty:
        print("No credential associations found.")

    if creds_map.empty:
        merged = pd.DataFrame(columns=["outpost_id", "credential_uuid"])
    elif outposts.empty or "outpost_id" not in outposts.columns:
        merged = creds_map.copy()
        merged["outpost_id"] = merged.get("outpost_id")
    else:
        merge_key = "outpost_id" if "outpost_id" in outposts.columns else None
        if merge_key:
            merged = creds_map.merge(outposts, how="left", left_on="outpost_id", right_on=merge_key)
        else:
            merged = creds_map.copy()

    merged = merged.rename(columns={
        "outpost_url": "Outpost URL",
        "outpost_id": "Outpost Id",
        "outpost_name": "Outpost Name",
        "credential_uuid": "Credential UUID",
    })
    output_cols = [
        "Outpost URL",
        "Outpost Id",
        "Outpost Name",
        "Credential UUID",
    ]
    for col in output_cols:
        if col not in merged.columns:
            merged[col] = None

    df_out = merged[output_cols].drop_duplicates().reset_index(drop=True)
    df_out.insert(0, "Discovery Instance", target)

    output_csv = output_dir / OUTPUT_FILENAME
    df_out.to_csv(output_csv, index=False)

    print(f"Output rows: {len(df_out)} | Saved to {output_csv}")
    display(df_out.head(10))

    return {
        "instance": name,
        "target": target,
        "output_path": output_csv,
        "rows": int(len(df_out)),
        "status": "ok",
    }


In [None]:
results: List[Dict[str, Any]] = []
for appliance in selected_appliances:
    outcome = process_instance(appliance)
    results.append(outcome)

summary_df = pd.DataFrame(results)
if "output_path" in summary_df.columns:
    summary_df["output_path"] = summary_df["output_path"].map(lambda p: str(p) if p is not None else None)
display(summary_df)
