# Daily Pair Dual Replay (Single-path Model vs Real Trajectory)

This notebook runs all txs for one day on the rUSD/XRP pair in both directions.
- Real track: from metadata (`AffectedNodes`), keeping multipath visible.
- Model track: single-path replay via `xrpl_router` src modules.


In [1]:
from __future__ import annotations

import json
import urllib.request
from dataclasses import dataclass
from decimal import Decimal, ROUND_FLOOR
from pathlib import Path
from typing import Any

import pandas as pd
from pyspark.sql import SparkSession, functions as F

from xrpl_router.amm import AMM
from xrpl_router.book_step import RouterQuoteView
from xrpl_router.clob import from_levels as clob_from_levels
from xrpl_router.core import IOUAmount, XRPAmount
from xrpl_router.core.datatypes import Segment
from xrpl_router.core.fmt import amount_to_decimal


In [2]:
# Config
REPO_ROOT = Path.cwd().resolve().parent if Path.cwd().name == 'notebooks' else Path.cwd().resolve()
PROFILE = str(REPO_ROOT / 'data' / 'config.share')
SHARE = 'ripple-ubri-share'
SCHEMA = 'ripplex'
TARGET_DATE = '2025-12-15'
RPC_URL = 'https://s1.ripple.com:51234/'
RUSD_HEX = '524C555344000000000000000000000000000000'
XRP = 'XRP'
MAX_TX = 10
SHOW_DETAIL_FOR_TOP_N = 3
EXPORT_RESULTS = True
INCLUDE_CLOB_ONLY_TX = False

def ds_url(table: str) -> str:
    return f"{PROFILE}#{SHARE}.{SCHEMA}.{table}"
DELTA_SHARING_SPARK_PACKAGE = 'io.delta:delta-sharing-spark_2.12:3.1.0'
DETAIL_TX_HASH = ''
TOP_N_DETAIL = 5


In [3]:
def _to_dec(x: Any) -> Decimal:
    try:
        return Decimal(str(x))
    except Exception:
        return Decimal('0')

def _is_pair(a: str, b: str) -> bool:
    return (a == XRP and b == RUSD_HEX) or (a == RUSD_HEX and b == XRP)

def _amt_floor(cur: str, v: Decimal):
    if cur == XRP:
        drops = int((v / Decimal('0.000001')).to_integral_value(rounding=ROUND_FLOOR))
        return XRPAmount(value=drops)
    units = int((v / Decimal('0.000000000000001')).to_integral_value(rounding=ROUND_FLOOR))
    return IOUAmount.from_components(units, -15)

def _parse_amount(v: Any) -> tuple[str | None, Decimal]:
    if v is None:
        return None, Decimal('0')
    if isinstance(v, str):
        return XRP, Decimal(v) / Decimal(1_000_000)
    if isinstance(v, dict):
        c = str(v.get('currency')) if v.get('currency') is not None else None
        val = _to_dec(v.get('value'))
        return c, val
    return None, Decimal('0')

def _fetch_tx_metadata(tx_hash: str) -> dict[str, Any]:
    payload = {'method': 'tx', 'params': [{'transaction': tx_hash, 'binary': False}]}
    req = urllib.request.Request(
        RPC_URL,
        data=json.dumps(payload).encode('utf-8'),
        headers={'Content-Type': 'application/json'},
        method='POST',
    )
    with urllib.request.urlopen(req, timeout=30) as resp:
        return json.loads(resp.read().decode('utf-8'))

def _extract_real_steps(tx_result: dict[str, Any]) -> tuple[pd.DataFrame, set[str]]:
    affected = (tx_result.get('meta') or {}).get('AffectedNodes') or []
    rows: list[dict[str, Any]] = []
    seen_ccy: set[str] = set()

    for idx, n in enumerate(affected):
        nk = next((k for k in ('CreatedNode', 'ModifiedNode', 'DeletedNode') if k in n), None)
        if nk is None:
            continue
        body = n[nk]
        entry = body.get('LedgerEntryType')
        prev = body.get('PreviousFields', {}) or {}
        final = body.get('FinalFields', {}) or body.get('NewFields', {}) or {}

        kind = None
        source = None
        if entry == 'AMM':
            kind = 'AMM'
            source = str(final.get('Account') or final.get('AuctionSlot', {}).get('Account') or 'amm')
        elif entry == 'Offer':
            kind = 'CLOB'
            source = str(body.get('LedgerIndex') or final.get('Sequence') or 'offer')

        if kind is None:
            continue

        # Currency scan for multipath-like tagging
        for fld in ['TakerGets', 'TakerPays', 'taker_gets_funded', 'taker_pays_funded']:
            for obj in [prev.get(fld), final.get(fld)]:
                c, _ = _parse_amount(obj)
                if c is not None:
                    seen_ccy.add(c)

        rows.append({
            'step_idx': idx,
            'segment_kind': kind,
            'node_type': nk,
            'source': source,
            'ledger_entry_type': entry,
        })

    raw_df = pd.DataFrame(rows)
    if raw_df.empty:
        return raw_df, seen_ccy

    # Collapse only adjacent same kind+source for readability
    keep = [True]
    for i in range(1, len(raw_df)):
        prev_row = raw_df.iloc[i - 1]
        cur_row = raw_df.iloc[i]
        keep.append(not (prev_row['segment_kind'] == cur_row['segment_kind'] and prev_row['source'] == cur_row['source']))
    seg_df = raw_df[keep].reset_index(drop=True)
    seg_df.insert(0, 'segment_no', range(1, len(seg_df) + 1))
    return seg_df, seen_ccy

def _direction_from_swap(sw_row: pd.Series) -> str:
    return f"{sw_row.get('asset_in_currency')}->{sw_row.get('asset_out_currency')}"


In [4]:
# Build Spark with Delta Sharing connector.
# If an existing Spark session lacks the connector, restart kernel and run again.
spark = (
    SparkSession.builder
    .appName('daily_pair_dual_replay')
    .config('spark.jars.packages', DELTA_SHARING_SPARK_PACKAGE)
    .config('spark.sql.extensions', 'io.delta.sql.DeltaSparkSessionExtension')
    .config('spark.sql.catalog.spark_catalog', 'org.apache.spark.sql.delta.catalog.DeltaCatalog')
    .config('spark.sql.shuffle.partitions', '8')
    .getOrCreate()
)

swaps_df = (
    spark.read.format('deltaSharing').load(ds_url('fact_amm_swaps'))
    .where(F.col('close_time_date') == F.lit(TARGET_DATE))
    .toPandas()
)
offers_df = (
    spark.read.format('deltaSharing').load(ds_url('offers_fact_tx'))
    .where(F.col('close_time_date') == F.lit(TARGET_DATE))
    .toPandas()
)
fees_df = (
    spark.read.format('deltaSharing').load(ds_url('fact_amm_fees'))
    .where(F.col('close_time_date') == F.lit(TARGET_DATE))
    .toPandas()
)

pair_swaps = swaps_df[swaps_df.apply(lambda r: _is_pair(str(r.get('asset_in_currency')), str(r.get('asset_out_currency'))), axis=1)].copy()
pair_clob = offers_df[offers_df.apply(lambda r: _is_pair(str(r.get('base_currency')), str(r.get('counter_currency'))), axis=1)].copy()

swap_hashes = set(pair_swaps.get('transaction_hash', pd.Series([], dtype=str)).dropna().astype(str).tolist())
clob_hashes = set(pair_clob.get('tx_hash', pd.Series([], dtype=str)).dropna().astype(str).tolist())

if INCLUDE_CLOB_ONLY_TX:
    tx_universe = sorted(swap_hashes | clob_hashes)
else:
    tx_universe = sorted(swap_hashes)

if MAX_TX and len(tx_universe) > MAX_TX:
    tx_universe = tx_universe[:MAX_TX]

print('TARGET_DATE:', TARGET_DATE)
print('pair swaps rows:', len(pair_swaps))
print('pair clob rows:', len(pair_clob))
print('swap_hashes:', len(swap_hashes), '| clob_hashes:', len(clob_hashes))
print('INCLUDE_CLOB_ONLY_TX:', INCLUDE_CLOB_ONLY_TX)
print('tx_universe size:', len(tx_universe))


26/02/17 13:00:56 WARN Utils: Your hostname, MacBook-Pro-5.local resolves to a loopback address: 127.0.0.1; using 192.168.31.179 instead (on interface en0)
26/02/17 13:00:56 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


:: loading settings :: url = jar:file:/opt/homebrew/anaconda3/envs/xrpl-amm-clob/lib/python3.11/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /Users/guohanze/.ivy2/cache
The jars for the packages stored in: /Users/guohanze/.ivy2/jars
io.delta#delta-sharing-spark_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-a60d55d7-545d-4175-a88a-e9aa656b0777;1.0
	confs: [default]
	found io.delta#delta-sharing-spark_2.12;3.1.0 in central
	found io.delta#delta-spark_2.12;3.1.0 in central
	found io.delta#delta-storage;3.1.0 in central
	found org.antlr#antlr4-runtime;4.9.3 in central
	found io.delta#delta-sharing-client_2.12;1.0.4 in central
	found org.apache.httpcomponents#httpclient;4.5.13 in central
	found org.apache.httpcomponents#httpcore;4.4.13 in central
	found commons-logging#commons-logging;1.2 in central
	found commons-codec#commons-codec;1.11 in central
:: resolution report :: resolve 945ms :: artifacts dl 38ms
	:: modules in use:
	commons-codec#commons-codec;1.11 from central in [default]
	commons-logging#commons-logging;1.2 from central in [default]
	io.delta

TARGET_DATE: 2025-12-15
pair swaps rows: 295
pair clob rows: 11434
swap_hashes: 295 | clob_hashes: 8462
INCLUDE_CLOB_ONLY_TX: False
tx_universe size: 10


In [5]:
@dataclass
class TxReplayResult:
    tx_hash: str
    direction: str
    is_multipath_like: bool
    real_presence: str
    model_presence: str
    real_pair_in: Decimal
    real_pair_out: Decimal
    model_in: Decimal
    model_out: Decimal
    in_gap_pct: Decimal
    out_gap_pct: Decimal
    real_steps: pd.DataFrame
    model_steps: pd.DataFrame


def _presence_from_kinds(seq: list[str]) -> str:
    k = [str(x).upper() for x in seq if str(x).upper() in {'AMM', 'CLOB'}]
    return '+'.join(k) if k else 'NONE'


def _presence_from_model_slices(model_steps: pd.DataFrame) -> str:
    if model_steps.empty or 'segment_kind' not in model_steps.columns:
        return 'NONE'
    seq = model_steps['segment_kind'].astype(str).str.upper().tolist()
    return _presence_from_kinds(seq)


def _infer_direction(tx_result: dict[str, Any], sw_tx: pd.DataFrame, clob_tx: pd.DataFrame) -> tuple[str | None, str | None]:
    # Prefer user-request direction from metadata.
    pays_cur, _ = _parse_amount(tx_result.get('TakerPays'))
    gets_cur, _ = _parse_amount(tx_result.get('TakerGets'))
    if pays_cur is not None and gets_cur is not None and _is_pair(pays_cur, gets_cur):
        return str(pays_cur), str(gets_cur)

    # Fallback to AMM side if available.
    if not sw_tx.empty:
        r = sw_tx.sort_values(['ledger_index', 'transaction_index']).iloc[0]
        return str(r.get('asset_in_currency')), str(r.get('asset_out_currency'))

    # Final fallback from CLOB row currencies (best-effort).
    if not clob_tx.empty:
        r = clob_tx.iloc[0]
        bc = str(r.get('base_currency'))
        cc = str(r.get('counter_currency'))
        if _is_pair(bc, cc):
            # Use metadata request preference if one side appears in TakerPays/TakerGets.
            if pays_cur in {bc, cc}:
                return str(pays_cur), (cc if pays_cur == bc else bc)
            return cc, bc

    return None, None


def _build_model_segments_from_tx_clob(tx_clob: pd.DataFrame, in_cur: str, out_cur: str) -> list[Segment]:
    levels: list[tuple[Decimal, Decimal]] = []
    ids: list[str] = []

    for _, r in tx_clob.iterrows():
        bc = str(r.get('base_currency'))
        cc = str(r.get('counter_currency'))
        ba = _to_dec(r.get('base_amount'))
        ca = _to_dec(r.get('counter_amount'))

        if bc == out_cur and cc == in_cur:
            out_dec, in_dec = ba, ca
        elif cc == out_cur and bc == in_cur:
            out_dec, in_dec = ca, ba
        else:
            continue

        if out_dec <= 0 or in_dec <= 0:
            continue

        levels.append((out_dec / in_dec, out_dec))
        ids.append(str(r.get('ledger_index')) + ':' + str(r.get('transaction_index')))

    if not levels:
        return []

    segs_base = clob_from_levels(levels, in_is_xrp=(in_cur == XRP), out_is_xrp=(out_cur == XRP))
    out: list[Segment] = []
    for i, s in enumerate(segs_base):
        sid = ids[i] if i < len(ids) else f'clob_leg_{i+1}'
        out.append(
            Segment(
                src='CLOB',
                quality=s.quality,
                out_max=s.out_max,
                in_at_out_max=s.in_at_out_max,
                raw_quality=s.raw_quality or s.quality,
                source_id=sid,
            )
        )
    return out


def _build_amm_from_swaps(sw_tx: pd.DataFrame, in_cur: str, out_cur: str, fee_tx: pd.DataFrame) -> AMM | None:
    if sw_tx.empty:
        return None

    r = sw_tx.sort_values(['ledger_index', 'transaction_index']).iloc[0]

    x_cur = str(r.get('amm_asset_currency'))
    y_cur = str(r.get('amm_asset2_currency'))
    x_pre = _to_dec(r.get('amm_asset_balance_before'))
    y_pre = _to_dec(r.get('amm_asset2_balance_before'))

    if in_cur == x_cur and out_cur == y_cur:
        x_res, y_res = x_pre, y_pre
    elif in_cur == y_cur and out_cur == x_cur:
        x_res, y_res = y_pre, x_pre
    else:
        return None

    fee = Decimal('0')
    if not fee_tx.empty and 'trading_fee' in fee_tx.columns and pd.notna(fee_tx.iloc[0].get('trading_fee')):
        fee = _to_dec(fee_tx.iloc[0].get('trading_fee'))

    return AMM(x_res, y_res, fee, x_is_xrp=(in_cur == XRP), y_is_xrp=(out_cur == XRP))


def _pair_totals(sw_tx: pd.DataFrame, clob_tx: pd.DataFrame, in_cur: str, out_cur: str) -> tuple[Decimal, Decimal]:
    real_amm_in = Decimal('0')
    real_amm_out = Decimal('0')
    if not sw_tx.empty:
        r = sw_tx.sort_values(['ledger_index', 'transaction_index']).iloc[0]
        real_amm_in = _to_dec(r.get('asset_in_value'))
        real_amm_out = _to_dec(r.get('asset_out_value'))

    real_clob_in = Decimal('0')
    real_clob_out = Decimal('0')
    for _, r in clob_tx.iterrows():
        bc = str(r.get('base_currency'))
        cc = str(r.get('counter_currency'))
        ba = _to_dec(r.get('base_amount'))
        ca = _to_dec(r.get('counter_amount'))
        if bc == out_cur and cc == in_cur:
            real_clob_out += ba
            real_clob_in += ca
        elif cc == out_cur and bc == in_cur:
            real_clob_out += ca
            real_clob_in += ba

    return real_amm_in + real_clob_in, real_amm_out + real_clob_out


def _replay_one_tx(tx_hash: str) -> TxReplayResult | None:
    sw_tx = pair_swaps[pair_swaps['transaction_hash'].astype(str) == tx_hash].copy()
    clob_tx = pair_clob[pair_clob['tx_hash'].astype(str) == tx_hash].copy()

    # fee table may use transaction_hash or tx_hash depending on source shape
    if 'transaction_hash' in fees_df.columns:
        fee_tx = fees_df[fees_df['transaction_hash'].astype(str) == tx_hash].copy()
    elif 'tx_hash' in fees_df.columns:
        fee_tx = fees_df[fees_df['tx_hash'].astype(str) == tx_hash].copy()
    else:
        fee_tx = pd.DataFrame()

    tx_meta = _fetch_tx_metadata(tx_hash)
    tx_result = tx_meta.get('result') or {}
    real_steps, ccy_seen = _extract_real_steps(tx_result)

    in_cur, out_cur = _infer_direction(tx_result, sw_tx, clob_tx)
    if in_cur is None or out_cur is None:
        return None
    if not _is_pair(in_cur, out_cur):
        return None

    direction = f'{in_cur}->{out_cur}'
    real_pair_in, real_pair_out = _pair_totals(sw_tx, clob_tx, in_cur, out_cur)
    if real_pair_out <= 0:
        return None

    segs = _build_model_segments_from_tx_clob(clob_tx, in_cur, out_cur)
    amm = _build_amm_from_swaps(sw_tx, in_cur, out_cur, fee_tx)

    # Unified replay: same call, engine naturally uses whichever sources are available.
    q = RouterQuoteView(lambda: segs, amm=amm).preview_out(_amt_floor(out_cur, real_pair_out))

    model_out = _to_dec(amount_to_decimal(q['summary']['total_out']))
    model_in = _to_dec(amount_to_decimal(q['summary']['total_in']))

    model_steps = pd.DataFrame(q.get('slices', []) or [])
    if not model_steps.empty:
        model_steps.insert(0, 'segment_no', range(1, len(model_steps) + 1))
        model_steps['segment_kind'] = model_steps['src'].astype(str).str.upper()

    pair_set = {XRP, RUSD_HEX}
    is_multipath_like = any(c not in pair_set for c in ccy_seen if c is not None)

    real_presence = _presence_from_kinds(real_steps.get('segment_kind', pd.Series([], dtype=str)).tolist()) if not real_steps.empty else 'NONE'
    model_presence = _presence_from_model_slices(model_steps)

    in_gap_pct = Decimal('0') if real_pair_in == 0 else (model_in - real_pair_in) / real_pair_in * Decimal('100')
    out_gap_pct = Decimal('0') if real_pair_out == 0 else (model_out - real_pair_out) / real_pair_out * Decimal('100')

    return TxReplayResult(
        tx_hash=tx_hash,
        direction=direction,
        is_multipath_like=is_multipath_like,
        real_presence=real_presence,
        model_presence=model_presence,
        real_pair_in=real_pair_in,
        real_pair_out=real_pair_out,
        model_in=model_in,
        model_out=model_out,
        in_gap_pct=in_gap_pct,
        out_gap_pct=out_gap_pct,
        real_steps=real_steps,
        model_steps=model_steps,
    )


In [6]:
results: list[TxReplayResult] = []
errors: list[tuple[str, str]] = []
skipped: list[tuple[str, str]] = []

for txh in tx_universe:
    try:
        r = _replay_one_tx(txh)
        if r is not None:
            results.append(r)
        else:
            skipped.append((txh, 'replay_returned_none'))
    except Exception as e:
        errors.append((txh, str(e)))

print('processed tx (full pair sample):', len(tx_universe))
print('successful replays:', len(results))
print('skipped:', len(skipped))
print('errors:', len(errors))

if skipped:
    print('[Skipped sample]')
    display(pd.DataFrame(skipped, columns=['tx_hash', 'reason']).head(20))

if errors:
    print('[Error sample]')
    display(pd.DataFrame(errors, columns=['tx_hash', 'error']).head(20))


processed tx (full pair sample): 10
successful replays: 10
skipped: 0
errors: 0


In [7]:
summary_df = pd.DataFrame([
    {
        'tx_hash': r.tx_hash,
        'direction': r.direction,
        'is_multipath_like': r.is_multipath_like,
        'real_presence': r.real_presence,
        'model_presence': r.model_presence,
        'presence_match': (r.real_presence == r.model_presence),
        'real_pair_in': float(r.real_pair_in),
        'real_pair_out': float(r.real_pair_out),
        'model_in': float(r.model_in),
        'model_out': float(r.model_out),
        'in_gap_pct': float(r.in_gap_pct),
        'out_gap_pct': float(r.out_gap_pct),
    }
    for r in results
])

if summary_df.empty:
    print('No replay result rows.')
else:
    print('[Per-tx summary]')
    display(summary_df.sort_values('in_gap_pct', ascending=False).reset_index(drop=True))

    print('[Aggregate by direction and multipath tag]')
    agg = (
        summary_df
        .groupby(['direction', 'is_multipath_like'], dropna=False)
        .agg(
            tx_count=('tx_hash', 'count'),
            in_gap_pct_mean=('in_gap_pct', 'mean'),
            in_gap_pct_p50=('in_gap_pct', 'median'),
            in_gap_pct_p90=('in_gap_pct', lambda s: s.quantile(0.9)),
            out_gap_pct_mean=('out_gap_pct', 'mean'),
            presence_match_rate=('presence_match', 'mean'),
        )
        .reset_index()
    )
    display(agg)


[Per-tx summary]


Unnamed: 0,tx_hash,direction,is_multipath_like,real_presence,model_presence,presence_match,real_pair_in,real_pair_out,model_in,model_out,in_gap_pct,out_gap_pct
0,0028A4B87BEC41BEBADCA2D77642D1BBD2F854241FAE13...,524C555344000000000000000000000000000000->XRP,False,NONE,AMM,False,3.274698,6.523171,13.132861,6.52317,301.0404,-1.53292e-05
1,006D6DF55FB4277BE2509B32FAD00A8C269651966C93A7...,524C555344000000000000000000000000000000->XRP,False,CLOB+CLOB+CLOB+CLOB,CLOB+CLOB+CLOB+AMM,False,1830.770223,1093.239515,2063.731021,1093.239515,12.72474,-3.624152e-08
2,00DB708972DF27CFFCF9853631B7873B67925A37C962CB...,524C555344000000000000000000000000000000->XRP,False,CLOB+CLOB+CLOB,CLOB+CLOB+CLOB+AMM,False,9662.207232,5584.063152,10750.972587,5584.063152,11.26829,-1.898988e-09
3,021E168159F99A539287021F0D55EFF8C7F47A68FBF360...,XRP->524C555344000000000000000000000000000000,False,CLOB+CLOB+CLOB+CLOB+CLOB+CLOB,CLOB+CLOB+CLOB+CLOB+CLOB+AMM+CLOB+AMM,False,2163.398704,4105.998663,2163.398705,4105.998663,4.622356e-08,-2.679007e-15
4,01BCBE47B02680362F869508D1DA42BB6D4EDFD08057FE...,524C555344000000000000000000000000000000->XRP,False,NONE,AMM,False,73.415016,0.085066,73.414312,0.085066,-0.0009580311,0.0
5,03043B52B513A577F2502F0B05D694B5BCCDAB491991AF...,524C555344000000000000000000000000000000->XRP,False,NONE,AMM,False,238.197473,0.027244,238.191325,0.027244,-0.002580976,0.0
6,02773A5DF18B8A9307ADD79432BD9A88BD75693F242608...,524C555344000000000000000000000000000000->XRP,False,NONE,AMM,False,0.000759,0.000399,0.000758,0.000399,-0.1807142,0.0
7,0154BB2EA95E921CEEA1FFB64EEC4473369DA99139167B...,XRP->524C555344000000000000000000000000000000,False,CLOB+CLOB+CLOB+CLOB+CLOB+CLOB+CLOB+CLOB+CLOB+C...,CLOB+CLOB+CLOB+CLOB+CLOB+CLOB+CLOB+CLOB+CLOB+C...,False,21317.159927,39784.939465,20931.83207,39784.939465,-1.807595,-4.016595e-15
8,04EF434187A12C2796BE223E3BDF469C8951E8CC5CADA7...,XRP->524C555344000000000000000000000000000000,False,CLOB,CLOB+AMM,False,276.6489,221.925177,117.041819,221.925177,-57.69301,0.0
9,0229F6D3DA60CD13F0F1C1F92A20CC1202DF7052454DA1...,XRP->524C555344000000000000000000000000000000,False,CLOB,CLOB+AMM,False,113.657722,64.370759,34.061692,64.370759,-70.03134,0.0


[Aggregate by direction and multipath tag]


Unnamed: 0,direction,is_multipath_like,tx_count,in_gap_pct_mean,in_gap_pct_p50,in_gap_pct_p90,out_gap_pct_mean,presence_match_rate
0,524C555344000000000000000000000000000000->XRP,False,6,54.141524,5.633665,156.882556,-2.561223e-06,0.0
1,XRP->524C555344000000000000000000000000000000,False,4,-32.382987,-29.750303,-0.542278,-1.673901e-15,0.0


In [8]:
if not summary_df.empty:
    if DETAIL_TX_HASH.strip():
        detail_hashes = [DETAIL_TX_HASH.strip()]
    else:
        detail_hashes = summary_df.sort_values('in_gap_pct', ascending=False).head(TOP_N_DETAIL)['tx_hash'].astype(str).tolist()

    for txh in detail_hashes:
        hit = [x for x in results if x.tx_hash == txh]
        if not hit:
            print(f'No replay result found for DETAIL tx_hash={txh}')
            continue

        r = hit[0]
        print(f"\n===== tx_hash={txh} | direction={r.direction} | multipath_like={r.is_multipath_like} =====")
        print(f"presence: real={r.real_presence} | model={r.model_presence}")
        print(f"real_pair_in={r.real_pair_in} | real_pair_out={r.real_pair_out}")
        print(f"model_in={r.model_in} | model_out={r.model_out} | in_gap_pct={r.in_gap_pct}% | out_gap_pct={r.out_gap_pct}%")

        print('[Real trajectory from metadata]')
        display(r.real_steps)

        print('[Model trajectory from unified replay]')
        display(r.model_steps)



===== tx_hash=0028A4B87BEC41BEBADCA2D77642D1BBD2F854241FAE13F5ABD09C98CC88973A | direction=524C555344000000000000000000000000000000->XRP | multipath_like=False =====
presence: real=NONE | model=AMM
real_pair_in=3.274698 | real_pair_out=6.523170999949798
model_in=13.13286094300072 | model_out=6.523170 | in_gap_pct=301.0403690050416862867965229% | out_gap_pct=-0.00001532919799293465653401410229%
[Real trajectory from metadata]


[Model trajectory from unified replay]


Unnamed: 0,segment_no,src,source_id,out_take,in_take,avg_quality,pre_spq,post_spq,dx_eff,fee_paid,slippage_price_premium,segment_kind
0,1,AMM,,XRPAmount(value=6523170),"IOUAmount(mantissa=1313286094300072, exponent=...",Quality(rate=IOUAmount(mantissa=49670594100000...,Quality(rate=IOUAmount(mantissa=49671049572762...,Quality(rate=IOUAmount(mantissa=49670135404266...,"IOUAmount(mantissa=1306338810861224, exponent=...","IOUAmount(mantissa=6947283438847400, exponent=...",9e-06,AMM



===== tx_hash=006D6DF55FB4277BE2509B32FAD00A8C269651966C93A7261EDC39501B547EBB | direction=524C555344000000000000000000000000000000->XRP | multipath_like=False =====
presence: real=CLOB+CLOB+CLOB+CLOB | model=CLOB+CLOB+CLOB+AMM
real_pair_in=1830.77022260400009 | real_pair_out=1093.23951539620662
model_in=2063.731020535841 | model_out=1093.239515 | in_gap_pct=12.72474257312797025633721193% | out_gap_pct=-3.624152021768154782704751547E-8%
[Real trajectory from metadata]


Unnamed: 0,segment_no,step_idx,segment_kind,node_type,source,ledger_entry_type
0,1,1,CLOB,ModifiedNode,03BD9F912B54AEE45A2AFE8E9CABAF32CA76D903B87961...,Offer
1,2,6,CLOB,DeletedNode,582D696DACA03983BC5C58E67724C3EE4643ED3E5A088A...,Offer
2,3,7,CLOB,DeletedNode,776F5C692DD73837E9D85539BB23B3076A6EB1CDA214F6...,Offer
3,4,9,CLOB,DeletedNode,88BED402A646C086F297B9A5F37566CD4C2B163A6265AB...,Offer


[Model trajectory from unified replay]


Unnamed: 0,segment_no,src,source_id,out_take,in_take,avg_quality,pre_spq,post_spq,dx_eff,fee_paid,slippage_price_premium,segment_kind
0,1,CLOB,None:None,XRPAmount(value=410132589),"IOUAmount(mantissa=7726897961394579, exponent=...",Quality(rate=IOUAmount(mantissa=53078556410000...,,,,,,CLOB
1,2,CLOB,None:None,XRPAmount(value=13594735),"IOUAmount(mantissa=2561585246454219, exponent=...",Quality(rate=IOUAmount(mantissa=53071570200000...,,,,,,CLOB
2,3,CLOB,None:None,XRPAmount(value=500000000),"IOUAmount(mantissa=9425000000000000, exponent=...",Quality(rate=IOUAmount(mantissa=53050397920000...,,,,,,CLOB
3,4,AMM,,XRPAmount(value=169512191),"IOUAmount(mantissa=3229253719318418, exponent=...",Quality(rate=IOUAmount(mantissa=52492682790000...,Quality(rate=IOUAmount(mantissa=52505718057022...,Quality(rate=IOUAmount(mantissa=52479581385576...,"IOUAmount(mantissa=3212170967143223, exponent=...","IOUAmount(mantissa=1708275217519444, exponent=...",0.000248,AMM



===== tx_hash=00DB708972DF27CFFCF9853631B7873B67925A37C962CB880B010A2D2A98CA11 | direction=524C555344000000000000000000000000000000->XRP | multipath_like=False =====
presence: real=CLOB+CLOB+CLOB | model=CLOB+CLOB+CLOB+AMM
real_pair_in=9662.2072319465055 | real_pair_out=5584.0631521060407
model_in=10750.97258663029 | model_out=5584.063152 | in_gap_pct=11.26828817212655302336862815% | out_gap_pct=-1.898988193928403831673588549E-9%
[Real trajectory from metadata]


Unnamed: 0,segment_no,step_idx,segment_kind,node_type,source,ledger_entry_type
0,1,0,CLOB,ModifiedNode,0374DE2B16E1BD0EEDE70418479387228E217C80D90EA0...,Offer
1,2,6,CLOB,DeletedNode,925BFDD4911AB763508FB2ED1BEB4A921BE8782DFFB384...,Offer
2,3,14,CLOB,DeletedNode,F670D6DA61A12F3D9A00FCDA05366FAF29A5F067D84AE7...,Offer


[Model trajectory from unified replay]


Unnamed: 0,segment_no,src,source_id,out_take,in_take,avg_quality,pre_spq,post_spq,dx_eff,fee_paid,slippage_price_premium,segment_kind
0,1,CLOB,None:None,XRPAmount(value=4785943839),"IOUAmount(mantissa=9198584058557999, exponent=...",Quality(rate=IOUAmount(mantissa=52029136321000...,,,,,,CLOB
1,2,CLOB,None:None,XRPAmount(value=29426763),"IOUAmount(mantissa=5656587238850650, exponent=...",Quality(rate=IOUAmount(mantissa=52022115100000...,,,,,,CLOB
2,3,CLOB,None:None,XRPAmount(value=5138537),"IOUAmount(mantissa=9883000000000001, exponent=...",Quality(rate=IOUAmount(mantissa=51993701000000...,,,,,,CLOB
3,4,AMM,,XRPAmount(value=763554013),"IOUAmount(mantissa=1485939655683792, exponent=...",Quality(rate=IOUAmount(mantissa=51385263873000...,Quality(rate=IOUAmount(mantissa=51439542506632...,Quality(rate=IOUAmount(mantissa=51330754454294...,"IOUAmount(mantissa=1478079034905224, exponent=...","IOUAmount(mantissa=7860620778567260, exponent=...",0.001056,AMM



===== tx_hash=021E168159F99A539287021F0D55EFF8C7F47A68FBF3605C375C7DFA2C5525C5 | direction=XRP->524C555344000000000000000000000000000000 | multipath_like=False =====
presence: real=CLOB+CLOB+CLOB+CLOB+CLOB+CLOB | model=CLOB+CLOB+CLOB+CLOB+CLOB+AMM+CLOB+AMM
real_pair_in=2163.398704 | real_pair_out=4105.99866317584711
model_in=2163.398705 | model_out=4105.998663175847 | in_gap_pct=4.622356471560500666732395343E-8% | out_gap_pct=-2.679007204423170150806502177E-15%
[Real trajectory from metadata]


Unnamed: 0,segment_no,step_idx,segment_kind,node_type,source,ledger_entry_type
0,1,3,CLOB,DeletedNode,1BBD99611D3C6C0D8B185AD035D7E170B1AD92D007722D...,Offer
1,2,4,CLOB,DeletedNode,1C202CFCEDF7F2B1AC7FC82AD5ACF50A19E5CE214CA1AA...,Offer
2,3,6,CLOB,DeletedNode,37F3773C4681A74B2E9E4B5616B50F013BEFF510A8E660...,Offer
3,4,9,CLOB,DeletedNode,58FDAB3F1573A08B1AE09F75EC13A62945031FAEFB0007...,Offer
4,5,19,CLOB,DeletedNode,B2DBA2CE18823F77DF8A95DB975303EA0BDBB83ED4FBCB...,Offer
5,6,22,CLOB,DeletedNode,CBA8A1D8218328519409529FCF8FF9C77B9BB49E8D6868...,Offer


[Model trajectory from unified replay]


Unnamed: 0,segment_no,src,source_id,out_take,in_take,avg_quality,pre_spq,post_spq,dx_eff,fee_paid,slippage_price_premium,segment_kind
0,1,CLOB,None:None,"IOUAmount(mantissa=3796000000000000, exponent=...",XRPAmount(value=2000000000),Quality(rate=IOUAmount(mantissa=18980000000000...,,,,,,CLOB
1,2,CLOB,None:None,"IOUAmount(mantissa=1679870452000000, exponent=...",XRPAmount(value=8850740),Quality(rate=IOUAmount(mantissa=18980000000000...,,,,,,CLOB
2,3,CLOB,None:None,"IOUAmount(mantissa=4403702800000000, exponent=...",XRPAmount(value=23203825),Quality(rate=IOUAmount(mantissa=18978348612782...,,,,,,CLOB
3,4,CLOB,None:None,"IOUAmount(mantissa=7187065338466000, exponent=...",XRPAmount(value=3787220),Quality(rate=IOUAmount(mantissa=18977153000000...,,,,,,CLOB
4,5,CLOB,None:None,"IOUAmount(mantissa=2197436723830284, exponent=...",XRPAmount(value=11580483),Quality(rate=IOUAmount(mantissa=18975346052753...,,,,,,CLOB
5,6,AMM,,"IOUAmount(mantissa=2129791361820674, exponent=...",XRPAmount(value=112274480),Quality(rate=IOUAmount(mantissa=18969505463936...,Quality(rate=IOUAmount(mantissa=18972614372200...,Quality(rate=IOUAmount(mantissa=18966380538200...,XRPAmount(value=111680548),XRPAmount(value=593932),0.000164,AMM
6,7,CLOB,None:None,"IOUAmount(mantissa=6625500000000000, exponent=...",XRPAmount(value=3492711),Quality(rate=IOUAmount(mantissa=18969505349855...,,,,,,CLOB
7,8,AMM,,"IOUAmount(mantissa=3968618970120000, exponent=...",XRPAmount(value=209246),Quality(rate=IOUAmount(mantissa=18966283561549...,Quality(rate=IOUAmount(mantissa=18966380538200...,Quality(rate=IOUAmount(mantissa=18966368923100...,XRPAmount(value=208139),XRPAmount(value=1107),5e-06,AMM



===== tx_hash=01BCBE47B02680362F869508D1DA42BB6D4EDFD08057FE3D96A63023DDE38B37 | direction=524C555344000000000000000000000000000000->XRP | multipath_like=False =====
presence: real=NONE | model=AMM
real_pair_in=73.41501563199563 | real_pair_out=0.085066
model_in=73.41431229329971 | model_out=0.085066 | in_gap_pct=-0.0009580311191999146123125476259% | out_gap_pct=0%
[Real trajectory from metadata]


[Model trajectory from unified replay]


Unnamed: 0,segment_no,src,source_id,out_take,in_take,avg_quality,pre_spq,post_spq,dx_eff,fee_paid,slippage_price_premium,segment_kind
0,1,AMM,,XRPAmount(value=85066),"IOUAmount(mantissa=7341431229329971, exponent=...",Quality(rate=IOUAmount(mantissa=11587160000000...,Quality(rate=IOUAmount(mantissa=11594308446637...,Quality(rate=IOUAmount(mantissa=11579856041355...,"IOUAmount(mantissa=7272788847335735, exponent=...","IOUAmount(mantissa=6864238199423530, exponent=...",0.000617,AMM


In [9]:
if EXPORT_RESULTS and not summary_df.empty:
    out_dir = REPO_ROOT / 'artifacts' / 'notebook_exports' / f'daily_dual_replay_{TARGET_DATE}'
    out_dir.mkdir(parents=True, exist_ok=True)

    summary_df.to_csv(out_dir / 'summary.csv', index=False)

    for r in results:
        p = out_dir / r.tx_hash
        p.mkdir(parents=True, exist_ok=True)
        r.real_steps.to_csv(p / 'real_metadata_steps.csv', index=False)
        r.model_steps.to_csv(p / 'model_unified_replay_steps.csv', index=False)

    print('export_saved_to:', out_dir)


export_saved_to: /Users/guohanze/Documents/Codebase/xrpl-amm-clob/artifacts/notebook_exports/daily_dual_replay_2025-12-15
