# CM-OPT Benchmark Dashboard

Run and compare `example_constraint_manifold_arm`, `example_constraint_manifold_connected_poses`, and `example_constraint_manifold_cable_robot` from Python.

This notebook:
- runs each benchmark with `subprocess`
- captures logs per benchmark
- reads a CSV emitted by C++ benchmark helpers
- prints tables via `tabulate`
- renders Plotly comparisons

In [19]:
from __future__ import annotations

import csv
import datetime as dt
import os
import re
import subprocess
import time
from pathlib import Path

from tabulate import tabulate
import pandas as pd
import plotly.graph_objects as go

# Update if needed
REPO_ROOT = Path('/Users/dellaert/git/GTDynamics')
BUILD_DIR = REPO_ROOT / 'build'
DATA_DIR = REPO_ROOT / 'data'
LOG_DIR = BUILD_DIR / 'benchmark_logs_cmopt_notebook'
STATUS_PATH = LOG_DIR / 'run_status.csv'

DEFAULT_EXPECTED_METHODS = [
    'Soft Constraint',
    'Penalty Method',
    'Augmented Lagrangian',
    'Constraint Manifold (F)',
    'Constraint Manifold (I)',
]

TARGETS = {
    'arm': {
        'build_target': 'example_constraint_manifold_arm',
        'executable': 'examples/example_constraint_manifold/example_constraint_manifold_arm',
        'benchmark_id': 'arm',
        'args': ['--skip-cm-f'],
        'expected_methods': [
            'Soft Constraint',
            'Penalty Method',
            'Augmented Lagrangian',
            'Constraint Manifold (I)',
        ],
    },
    'arm_cm_f_only': {
        'build_target': 'example_constraint_manifold_arm',
        'executable': 'examples/example_constraint_manifold/example_constraint_manifold_arm',
        'benchmark_id': 'arm',
        'run_timeout_s': 180,
        'args': ['--cm-f-only'],
        'expected_methods': [
            'Constraint Manifold (F)',
        ],
    },
    'connected_poses': {
        'build_target': 'example_constraint_manifold_connected_poses',
        'executable': 'examples/example_constraint_manifold/example_constraint_manifold_connected_poses',
        'benchmark_id': 'connected_poses',
    },
    'cable_robot': {
        'build_target': 'example_constraint_manifold_cable_robot',
        'executable': 'examples/example_constraint_manifold/example_constraint_manifold_cable_robot',
        'benchmark_id': 'cable_robot',
    },
    'quadruped': {
        'build_target': 'example_constraint_manifold_quadruped_mp',
        'executable': 'examples/example_constraint_manifold/example_constraint_manifold_quadruped_mp',
        'benchmark_id': 'quadruped',
        'args': ['--num-steps', '10', '--methods', 'all'],
    },
}

JOBS = 2
RUN_TIMEOUT_S = 240
PRINT_SUBPROCESS_OUTPUT = True
BENCHMARK_VERBOSE = False
RETRACTOR_VERBOSE = False

LOG_DIR.mkdir(parents=True, exist_ok=True)
DATA_DIR.mkdir(parents=True, exist_ok=True)
print(f'REPO_ROOT: {REPO_ROOT}')
print(f'BUILD_DIR: {BUILD_DIR}')
print(f'DATA_DIR: {DATA_DIR}')
print(f'LOG_DIR: {LOG_DIR}')


REPO_ROOT: /Users/dellaert/git/GTDynamics
BUILD_DIR: /Users/dellaert/git/GTDynamics/build
DATA_DIR: /Users/dellaert/git/GTDynamics/data
LOG_DIR: /Users/dellaert/git/GTDynamics/build/benchmark_logs_cmopt_notebook


In [20]:
def normalize_method(method: str) -> str:
    """Normalize method labels from C++ (including LaTeX wrappers)."""
    method = method.strip()
    m = re.fullmatch(r"\\textbf\{(.+)\}", method)
    return m.group(1) if m else method


def read_results_csv(path: Path):
    if not path.exists():
        return []

    rows = []
    with path.open('r', newline='') as f:
        reader = csv.DictReader(f)
        for row in reader:
            parsed = {
                'benchmark': row['benchmark'],
                'method_raw': row['method'],
                'method': normalize_method(row['method']),
                'f_dim': int(row['f_dim']),
                'v_dim': int(row['v_dim']),
                'time_s': float(row['time_s']),
                'iters': int(row['iters']),
                'constraint_l2': float(row['constraint_l2']),
                'cost': float(row['cost']),
            }
            rows.append(parsed)
    return rows


def benchmark_csv_path(benchmark_id: str) -> Path:
    return DATA_DIR / f'{benchmark_id}_benchmark.csv'


def read_all_results_csv(targets: dict):
    rows = []
    seen = set()
    for _, target_cfg in targets.items():
        benchmark_id = target_cfg.get('benchmark_id')
        if benchmark_id in seen:
            continue
        seen.add(benchmark_id)
        rows.extend(read_results_csv(benchmark_csv_path(benchmark_id)))
    return rows


def write_status_csv(status_rows):
    with STATUS_PATH.open('w', newline='') as f:
        fieldnames = ['benchmark', 'benchmark_id', 'target', 'return_code', 'elapsed_s', 'timestamp_utc', 'log_path', 'benchmark_csv']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(status_rows)


def run_one_target(benchmark: str, target_cfg: dict, jobs: int = 2):
    benchmark_id = target_cfg.get('benchmark_id', benchmark)
    benchmark_csv = benchmark_csv_path(benchmark_id)
    run_timeout_s = target_cfg.get('run_timeout_s', RUN_TIMEOUT_S)
    build_target = target_cfg['build_target']
    executable = target_cfg['executable']
    extra_args = target_cfg.get('args', [])

    log_path = LOG_DIR / f'{benchmark}.log'
    env = os.environ.copy()

    build_cmd = ['make', f'-j{jobs}', build_target]
    run_cmd = [
        str(BUILD_DIR / executable),
        '--benchmark-id', benchmark_id,
        '--benchmark-csv', str(benchmark_csv),
    ]
    if BENCHMARK_VERBOSE:
        run_cmd.append('--verbose-benchmark')
    if RETRACTOR_VERBOSE:
        run_cmd.append('--verbose-retractor')
    run_cmd.extend(extra_args)
    build_cmd_str = ' '.join(build_cmd)
    run_cmd_str = ' '.join(run_cmd)
    print(f'Building: {build_cmd_str} (benchmark={benchmark})')

    start = time.perf_counter()
    timed_out = False
    with log_path.open('w') as log_file:
        build_log_pos = log_file.tell()
        build_result = subprocess.run(
            build_cmd,
            cwd=BUILD_DIR,
            env=env,
            text=True,
            stdout=log_file,
            stderr=subprocess.STDOUT,
        )
        if PRINT_SUBPROCESS_OUTPUT:
            log_file.flush()
            with log_path.open('r') as rf:
                rf.seek(build_log_pos)
                for line in rf:
                    if 'Built target' in line or 'error' in line.lower():
                        print(f'[{benchmark}] {line}', end='')

        if build_result.returncode != 0:
            elapsed = time.perf_counter() - start
            print(f'  -> build failed, return_code={build_result.returncode}, elapsed={elapsed:.2f}s, log={log_path}')
            return {
                'benchmark': benchmark,
                'benchmark_id': benchmark_id,
                'target': f"{build_target} -> {executable}",
                'return_code': build_result.returncode,
                'elapsed_s': elapsed,
                'timestamp_utc': dt.datetime.now(dt.UTC).isoformat(timespec='seconds').replace('+00:00', 'Z'),
                'log_path': str(log_path),
                'benchmark_csv': str(benchmark_csv),
            }

        print(f'Running: {run_cmd_str} (benchmark={benchmark})')
        try:
            run_log_pos = log_file.tell()
            run_result = subprocess.run(
                run_cmd,
                cwd=BUILD_DIR,
                env=env,
                text=True,
                stdout=log_file,
                stderr=subprocess.STDOUT,
                timeout=run_timeout_s,
            )
            if PRINT_SUBPROCESS_OUTPUT:
                log_file.flush()
                with log_path.open('r') as rf:
                    rf.seek(run_log_pos)
                    for line in rf:
                        stripped = line.strip()
                        if (
                            stripped.startswith('[BENCH]')
                            or stripped.startswith('& ')
                            or stripped.startswith('soft constraints:')
                            or stripped.startswith('penalty method:')
                            or stripped.startswith('augmented lagrangian:')
                            or stripped.startswith('constraint manifold basis variables')
                        ):
                            print(f'[{benchmark}] {line}', end='')
            return_code = run_result.returncode
        except subprocess.TimeoutExpired:
            timed_out = True
            log_file.write('\n[TIMEOUT]\n')
            if PRINT_SUBPROCESS_OUTPUT:
                print(f'[TIMEOUT] process exceeded timeout={run_timeout_s}s')
            return_code = 124

    elapsed = time.perf_counter() - start
    timeout_suffix = ' (timeout)' if timed_out else ''
    print(f'  -> return_code={return_code}, elapsed={elapsed:.2f}s{timeout_suffix}, log={log_path}')

    return {
        'benchmark': benchmark,
        'benchmark_id': benchmark_id,
        'target': f"{build_target} -> {executable}",
        'return_code': return_code,
        'elapsed_s': elapsed,
        'timestamp_utc': dt.datetime.now(dt.UTC).isoformat(timespec='seconds').replace('+00:00', 'Z'),
        'log_path': str(log_path),
        'benchmark_csv': str(benchmark_csv),
    }


def run_all_targets(clear_previous: bool = True, jobs: int = 2):
    LOG_DIR.mkdir(parents=True, exist_ok=True)
    if clear_previous:
        for p in LOG_DIR.glob('*.log'):
            p.unlink()
        seen = set()
        for _, target_cfg in TARGETS.items():
            benchmark_id = target_cfg.get('benchmark_id')
            if benchmark_id in seen:
                continue
            seen.add(benchmark_id)
            p = benchmark_csv_path(benchmark_id)
            if p.exists():
                p.unlink()

    status_rows = []
    for benchmark, target_cfg in TARGETS.items():
        status_rows.append(run_one_target(benchmark, target_cfg, jobs=jobs))

    write_status_csv(status_rows)
    return status_rows


In [21]:
# Run all benchmarks
run_status = run_all_targets(clear_previous=True, jobs=JOBS)

print()
print(tabulate(
    run_status,
    headers='keys',
    tablefmt='github',
    floatfmt='.3f',
))

print(f'\nstatus CSV: {STATUS_PATH}')
for benchmark_id in sorted({cfg.get('benchmark_id') for cfg in TARGETS.values()}):
    print(f'benchmark CSV ({benchmark_id}): {benchmark_csv_path(benchmark_id)}')


Building: make -j2 example_constraint_manifold_arm (benchmark=arm)
[arm] [ 95%] Built target gtdynamics
[arm] [100%] Built target example_constraint_manifold_arm
Running: /Users/dellaert/git/GTDynamics/build/examples/example_constraint_manifold/example_constraint_manifold_arm --benchmark-id arm --benchmark-csv /Users/dellaert/git/GTDynamics/data/arm_benchmark.csv --skip-cm-f (benchmark=arm)
[arm] [BENCH] Skipping CM(F); running CM(I) with other methods.
[arm] soft constraints:
[arm] [BENCH] Soft Constraint: f_dim=688, v_dim=605, time_s=0.279, iters=205, constraint_l2=8.571e-03, cost=0.430867
[arm] penalty method:
[arm] [BENCH] Penalty Method: f_dim=688, v_dim=605, time_s=1.92799, iters=0, constraint_l2=2.670e-05, cost=0.234963
[arm] augmented lagrangian:
[arm] [BENCH] Augmented Lagrangian: f_dim=688, v_dim=605, time_s=1.426, iters=0, constraint_l2=1.063e-04, cost=0.233363
[arm] constraint manifold basis variables (infeasible):
[arm] [BENCH] \textbf{Constraint Manifold (I)}: f_dim=146, 

In [22]:
rows = read_all_results_csv(TARGETS)

if not rows:
    print('No benchmark rows found in benchmark CSV files.')
else:
    rows_sorted = sorted(rows, key=lambda r: (r['benchmark'], r['method']))
    print(tabulate(
        rows_sorted,
        headers='keys',
        tablefmt='github',
        floatfmt='.6g',
    ))

    present = {}
    for r in rows_sorted:
        present.setdefault(r['benchmark'], set()).add(r['method'])

    expected_by_benchmark = {}
    for benchmark, target_cfg in TARGETS.items():
        benchmark_id = target_cfg.get('benchmark_id', benchmark)
        expected = set(target_cfg.get('expected_methods', DEFAULT_EXPECTED_METHODS))
        expected_by_benchmark.setdefault(benchmark_id, set()).update(expected)

    summary = []
    for benchmark_id in sorted(expected_by_benchmark.keys()):
        expected_methods = sorted(expected_by_benchmark[benchmark_id])
        got = present.get(benchmark_id, set())
        missing = [m for m in expected_methods if m not in got]
        summary.append({
            'benchmark': benchmark_id,
            'methods_found': ', '.join(sorted(got)) if got else '(none)',
            'missing_expected_methods': ', '.join(missing) if missing else '(none)',
        })

    print('\nMethod coverage summary:')
    print(tabulate(summary, headers='keys', tablefmt='github'))


| benchmark       | method_raw                       | method                  |   f_dim |   v_dim |   time_s |   iters |   constraint_l2 |        cost |
|-----------------|----------------------------------|-------------------------|---------|---------|----------|---------|-----------------|-------------|
| arm             | Augmented Lagrangian             | Augmented Lagrangian    |     688 |     605 | 1.426    |       0 |     0.00010625  |    0.233363 |
| arm             | \textbf{Constraint Manifold (F)} | Constraint Manifold (F) |     146 |      70 | 0.004    |       0 |     0.00318196  |  594.715    |
| arm             | \textbf{Constraint Manifold (I)} | Constraint Manifold (I) |     146 |      70 | 0.006    |       0 |     2.84091e-10 |  594.715    |
| arm             | Penalty Method                   | Penalty Method          |     688 |     605 | 1.92799  |       0 |     2.66998e-05 |    0.234963 |
| arm             | Soft Constraint                  | Soft Constraint      

In [23]:
def make_grouped_bar(rows, metric: str, title: str, y_title: str, log_y: bool = False):
    methods_order = [
        'Soft Constraint',
        'Penalty Method',
        'Augmented Lagrangian',
        'Constraint Manifold (F)',
        'Constraint Manifold (I)',
    ]
    benchmarks = []
    seen_benchmarks = set()
    for target_name, target_cfg in TARGETS.items():
        benchmark_name = target_cfg.get('benchmark_id', target_name)
        if benchmark_name not in seen_benchmarks:
            seen_benchmarks.add(benchmark_name)
            benchmarks.append(benchmark_name)
    for row in rows:
        benchmark_name = row['benchmark']
        if benchmark_name not in seen_benchmarks:
            seen_benchmarks.add(benchmark_name)
            benchmarks.append(benchmark_name)

    value_map = {}
    for r in rows:
        value_map[(r['benchmark'], r['method'])] = r[metric]

    fig = go.Figure()
    for method in methods_order:
        y = []
        for b in benchmarks:
            y.append(value_map.get((b, method), None))
        if any(v is not None for v in y):
            fig.add_trace(go.Bar(name=method, x=benchmarks, y=y))

    fig.update_layout(
        title=title,
        barmode='group',
        xaxis_title='Benchmark',
        yaxis_title=y_title,
        template='plotly_white',
    )
    if log_y:
        fig.update_yaxes(type='log')
    return fig


if not rows:
    print('No rows to plot.')
else:
    fig_time = make_grouped_bar(rows, 'time_s', 'Runtime Comparison', 'time (s)', log_y=True)
    fig_cost = make_grouped_bar(rows, 'cost', 'Cost Comparison', 'cost', log_y=True)
    fig_constraint = make_grouped_bar(rows, 'constraint_l2', 'Constraint Violation Comparison', 'constraint L2', log_y=True)

    fig_time.show()
    fig_cost.show()
    fig_constraint.show()

In [24]:
def tail_log(benchmark: str, n: int = 80):
    p = LOG_DIR / f'{benchmark}.log'
    if not p.exists():
        print(f'No log found: {p}')
        return
    lines = p.read_text(errors='replace').splitlines()
    print(f'--- tail({n}) {p} ---')
    for line in lines[-n:]:
        print(line)

# Example:
# tail_log('arm', n=120)