In [None]:
# Optional: install deps if needed
# %pip install requests python-dotenv ipywidgets pandas --quiet

from __future__ import annotations
from datetime import datetime, timedelta, timezone, date
from typing import Any, Dict, Iterable, List, Optional, Tuple
import os
import time
import json

import pandas as pd
import requests

# Try to load API key from .env if available
try:
    from dotenv import load_dotenv  # type: ignore
    load_dotenv()
except Exception:
    pass

ARKHAM_API_KEY = os.getenv("ARKHAM_API_KEY") or os.getenv("ARKHAM_INTEL_API_KEY")
if not ARKHAM_API_KEY:
    raise RuntimeError("Set ARKHAM_API_KEY in your .env or environment.")

# Default base URL (adjust if the docs specify a different one)
ARKHAM_API_BASE_URL = os.getenv("ARKHAM_API_BASE_URL", "https://intel.arkm.com/api")

# Reasonable default timeout and backoff
HTTP_TIMEOUT_SECS = float(os.getenv("ARKHAM_HTTP_TIMEOUT", "30"))
HTTP_BACKOFF_SECS = float(os.getenv("ARKHAM_HTTP_BACKOFF", "0.75"))
MAX_RETRIES = int(os.getenv("ARKHAM_HTTP_MAX_RETRIES", "3"))

SESSION = requests.Session()


def build_headers(api_key: str) -> Dict[str, str]:
    """Construct headers. Arkham may accept either Bearer or X-API-Key schemes.
    We send both for compatibility.
    """
    return {
        "Authorization": f"Bearer {api_key}",
        "X-API-Key": api_key,
        "Accept": "application/json",
        "User-Agent": "op-analytics/arkham-entity-transfers-notebook",
    }


HEADERS = build_headers(ARKHAM_API_KEY)


def _http_request(method: str, path: str, *, params: Optional[Dict[str, Any]] = None, json_body: Optional[Dict[str, Any]] = None) -> requests.Response:
    url = path if path.startswith("http") else f"{ARKHAM_API_BASE_URL.rstrip('/')}/{path.lstrip('/')}"
    last_exc: Optional[Exception] = None
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            resp = SESSION.request(method=method, url=url, headers=HEADERS, params=params, json=json_body, timeout=HTTP_TIMEOUT_SECS)
            if resp.status_code == 429 or (500 <= resp.status_code < 600):
                # Backoff and retry on rate limit and server errors
                time.sleep(HTTP_BACKOFF_SECS * attempt)
                continue
            return resp
        except requests.RequestException as exc:  # network hiccup
            last_exc = exc
            time.sleep(HTTP_BACKOFF_SECS * attempt)
    if last_exc is not None:
        raise last_exc
    raise RuntimeError("HTTP request failed without exception.")


def _try_paths_get(paths: Iterable[str], params: Optional[Dict[str, Any]] = None) -> Tuple[str, requests.Response]:
    """Try multiple endpoint paths until one returns HTTP 200.
    Returns the successful path and response. Raises if none succeed.
    """
    errors: List[str] = []
    for p in paths:
        r = _http_request("GET", p, params=params)
        if r.ok:
            return p, r
        errors.append(f"{p} -> {r.status_code} {r.text[:200]}")
    raise RuntimeError("No endpoint succeeded. Tried: \n" + "\n".join(errors))


# --- Arkham API wrappers -----------------------------------------------------

ENTITY_SEARCH_PATHS = [
    "/entities/search",           # common pattern
    "/search/entities",           # alternate
    "/entities",                  # some APIs use a single list endpoint with q=...
]

ENTITY_DETAILS_PATHS = [
    "/entities/{entity_id}",
]

TRANSFERS_PATHS_TEMPLATES = [
    "/entities/{entity_id}/transfers",
    "/entities/{entity_id}/transactions",
    "/transactions",  # fallback with entityId as a query parameter
]


def search_entities(query: str, limit: int = 10) -> List[Dict[str, Any]]:
    params = {"q": query, "limit": limit}
    path, resp = _try_paths_get(ENTITY_SEARCH_PATHS, params=params)
    data = resp.json()
    # Some APIs wrap results; try common shapes
    if isinstance(data, dict):
        if "results" in data and isinstance(data["results"], list):
            return data["results"]
        if "data" in data and isinstance(data["data"], list):
            return data["data"]
    if isinstance(data, list):
        return data
    return []


def get_entity_details(entity_id: str) -> Dict[str, Any]:
    paths = [p.format(entity_id=entity_id) for p in ENTITY_DETAILS_PATHS]
    _, resp = _try_paths_get(paths)
    data = resp.json()
    if isinstance(data, dict):
        return data.get("data", data)
    return data


def _format_iso8601(dt: datetime | date) -> str:
    if isinstance(dt, date) and not isinstance(dt, datetime):
        dt = datetime(dt.year, dt.month, dt.day, tzinfo=timezone.utc)
    if dt.tzinfo is None:
        dt = dt.replace(tzinfo=timezone.utc)
    return dt.isoformat()


def fetch_entity_transfers(
    entity_id: str,
    *,
    start_time: Optional[datetime | date] = None,
    end_time: Optional[datetime | date] = None,
    chains: Optional[List[str]] = None,
    min_usd: Optional[float] = None,
    page_size: int = 200,
    max_pages: int = 20,
) -> List[Dict[str, Any]]:
    """Fetch transfers/transactions for an entity, following cursors/pages if available.
    We attempt multiple likely endpoints and parameter names for best compatibility.
    """
    # Common param names used by APIs
    params: Dict[str, Any] = {"limit": page_size}
    if start_time is not None:
        params["startTime"] = _format_iso8601(start_time)
        params.setdefault("fromTime", params["startTime"])  # alias
    if end_time is not None:
        params["endTime"] = _format_iso8601(end_time)
        params.setdefault("toTime", params["endTime"])  # alias
    if chains:
        params["chains"] = ",".join(chains)
        params.setdefault("chain", params["chains"])  # alias
    if min_usd is not None:
        params["minUsdValue"] = float(min_usd)
        params.setdefault("minUsd", float(min_usd))

    all_items: List[Dict[str, Any]] = []
    next_cursor: Optional[str] = None

    for page in range(max_pages):
        page_params = dict(params)
        if next_cursor:
            page_params["cursor"] = next_cursor
            page_params.setdefault("pageCursor", next_cursor)
        paths = [t.format(entity_id=entity_id) for t in TRANSFERS_PATHS_TEMPLATES]
        try:
            _, resp = _try_paths_get(paths, params=page_params)
        except Exception as exc:
            # Last resort: try a generic transactions endpoint with entityId in query
            fallback_params = dict(page_params)
            fallback_params["entityId"] = entity_id
            _, resp = _try_paths_get(["/transactions", "/transfers"], params=fallback_params)
        data = resp.json()
        # Common shapes
        items: List[Dict[str, Any]] = []
        if isinstance(data, dict):
            if isinstance(data.get("results"), list):
                items = data["results"]
            elif isinstance(data.get("data"), list):
                items = data["data"]
            elif isinstance(data.get("items"), list):
                items = data["items"]
            # cursors
            next_cursor = (
                data.get("nextCursor")
                or data.get("next_page_token")
                or data.get("next")
                or data.get("cursor")
            )
        elif isinstance(data, list):
            items = data
            next_cursor = None
        else:
            items = []
            next_cursor = None
        if not items:
            break
        all_items.extend(items)
        if not next_cursor:
            break
    return all_items


# --- Normalization helpers ---------------------------------------------------

def _get_nested(d: Dict[str, Any], keys: List[str], default: Any = None) -> Any:
    cur: Any = d
    for k in keys:
        if not isinstance(cur, dict) or k not in cur:
            return default
        cur = cur[k]
    return cur


def _labels_str(labels: Any) -> str:
    if isinstance(labels, list):
        parts: List[str] = []
        for x in labels:
            if isinstance(x, str):
                parts.append(x)
            elif isinstance(x, dict):
                name = x.get("name") or x.get("label") or x.get("tag")
                if name:
                    parts.append(str(name))
        return ", ".join(sorted(set(parts)))
    return str(labels) if labels is not None else ""


def normalize_transfers(raw_items: List[Dict[str, Any]]) -> pd.DataFrame:
    rows: List[Dict[str, Any]] = []
    for it in raw_items:
        # Times
        ts = it.get("timestamp") or it.get("time") or it.get("blockTime")
        # Basic transfer fields
        usd = it.get("usdValue") or it.get("valueUsd") or it.get("usd")
        amount = it.get("amount") or it.get("quantity") or it.get("value")
        token = (
            it.get("tokenSymbol")
            or it.get("assetSymbol")
            or _get_nested(it, ["token", "symbol"]) or _get_nested(it, ["asset", "symbol"]) or ""
        )
        token_addr = (
            it.get("tokenAddress")
            or _get_nested(it, ["token", "address"]) or _get_nested(it, ["asset", "address"]) or ""
        )
        tx_hash = it.get("txHash") or it.get("hash") or it.get("transactionHash")
        chain = it.get("chain") or it.get("chainName") or it.get("network")
        direction = it.get("direction") or it.get("flow") or ("inflow" if (usd and usd >= 0) else "outflow")
        # Parties and labels
        from_addr = it.get("fromAddress") or it.get("from") or _get_nested(it, ["from", "address"]) or ""
        to_addr = it.get("toAddress") or it.get("to") or _get_nested(it, ["to", "address"]) or ""

        from_entity = (
            _get_nested(it, ["fromEntity", "name"]) or _get_nested(it, ["from", "entity", "name"]) or _get_nested(it, ["fromEntity", "label"]) or ""
        )
        to_entity = (
            _get_nested(it, ["toEntity", "name"]) or _get_nested(it, ["to", "entity", "name"]) or _get_nested(it, ["toEntity", "label"]) or ""
        )
        from_labels = _labels_str(_get_nested(it, ["from", "labels"]) or it.get("fromLabels"))
        to_labels = _labels_str(_get_nested(it, ["to", "labels"]) or it.get("toLabels"))
        tx_labels = _labels_str(it.get("labels") or it.get("tags"))

        rows.append({
            "timestamp": ts,
            "chain": chain,
            "direction": direction,
            "token": token,
            "token_address": token_addr,
            "amount": amount,
            "usd_value": usd,
            "tx_hash": tx_hash,
            "from_address": from_addr,
            "from_entity": from_entity,
            "from_labels": from_labels,
            "to_address": to_addr,
            "to_entity": to_entity,
            "to_labels": to_labels,
            "tx_labels": tx_labels,
            "raw": it,
        })
    df = pd.DataFrame(rows)
    # Best-effort typing
    if not df.empty:
        for col in ["usd_value", "amount"]:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors="coerce")
        if "timestamp" in df.columns:
            df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce", utc=True)
        # Sort by time desc
        if "timestamp" in df.columns:
            df = df.sort_values("timestamp", ascending=False).reset_index(drop=True)
    return df



In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# UI controls
entity_name_w = widgets.Text(value="Revolut", description="Entity", placeholder="e.g., Revolut")
start_w = widgets.DatePicker(description="Start", value=(datetime.now(timezone.utc) - timedelta(days=90)).date())
end_w = widgets.DatePicker(description="End", value=datetime.now(timezone.utc).date())
chains_options = (
    "ethereum",
    "optimism",
    "arbitrum",
    "base",
    "polygon",
    "bsc",
    "avalanche",
    "tron",
    "bitcoin",
)
chains_w = widgets.SelectMultiple(options=chains_options, value=("optimism",), description="Chains")
min_usd_w = widgets.FloatText(value=1.0, description="Min USD")
limit_w = widgets.IntSlider(value=200, min=50, max=1000, step=50, description="Page size")
max_pages_w = widgets.IntSlider(value=10, min=1, max=50, step=1, description="Max pages")
run_button = widgets.Button(description="Run", button_style="primary")

ui = widgets.VBox([
    widgets.HBox([entity_name_w]),
    widgets.HBox([start_w, end_w, min_usd_w]),
    widgets.HBox([chains_w]),
    widgets.HBox([limit_w, max_pages_w]),
    run_button,
])

display(ui)

out = widgets.Output()
display(out)


LATEST_DF: Optional[pd.DataFrame] = None
LATEST_ENTITY: Optional[Dict[str, Any]] = None


def _format_entity_match(e: Dict[str, Any]) -> str:
    name = e.get("name") or e.get("label") or e.get("title") or e.get("slug") or e.get("id")
    eid = e.get("id") or e.get("entityId") or e.get("entity_id")
    typ = e.get("type") or e.get("entityType")
    return f"{name} (id={eid}, type={typ})"


@out.capture(clear_output=True)
def on_run_click(_):
    global LATEST_DF, LATEST_ENTITY
    print("Searching entities…")
    candidates = search_entities(entity_name_w.value, limit=10)
    if not candidates:
        print("No candidates found.")
        return
    # Pick top candidate
    top = candidates[0]
    eid = top.get("id") or top.get("entityId") or top.get("entity_id")
    if not eid:
        print("Top candidate missing entity id:", json.dumps(top, indent=2)[:800])
        return
    LATEST_ENTITY = top
    print("Using:", _format_entity_match(top))

    start = start_w.value
    end = end_w.value
    chains = list(chains_w.value) if chains_w.value else None
    items = fetch_entity_transfers(
        str(eid),
        start_time=start,
        end_time=end,
        chains=chains,
        min_usd=min_usd_w.value if min_usd_w.value else None,
        page_size=int(limit_w.value),
        max_pages=int(max_pages_w.value),
    )
    print(f"Fetched {len(items)} records.")

    df = normalize_transfers(items)
    LATEST_DF = df
    if df.empty:
        print("No rows after normalization.")
        return

    # Brief summary
    by_chain = df.groupby("chain").agg(count=("tx_hash", "count"), usd=("usd_value", "sum")).sort_values("usd", ascending=False)
    display(by_chain)
    display(df.head(25))


run_button.on_click(on_run_click)



In [None]:
# Export helpers
import pathlib

def export_latest_csv(prefix: str = "arkham_transfers") -> Optional[str]:
    global LATEST_DF, LATEST_ENTITY
    if LATEST_DF is None or LATEST_DF.empty:
        print("Nothing to export. Run a query first.")
        return None
    ent_name = (LATEST_ENTITY.get("name") if isinstance(LATEST_ENTITY, dict) else "entity") or "entity"
    safe_ent = str(ent_name).lower().replace(" ", "_")[:64]
    ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
    outdir = pathlib.Path("notebooks/adhoc/csv_outputs")
    outdir.mkdir(parents=True, exist_ok=True)
    outpath = outdir / f"{prefix}-{safe_ent}-{ts}.csv"
    LATEST_DF.to_csv(outpath, index=False)
    print(f"Saved: {outpath}")
    return str(outpath)

# Convenience button
export_button = widgets.Button(description="Export CSV", button_style="")

def on_export_click(_):
    export_latest_csv()

export_button.on_click(on_export_click)
display(export_button)

