# Router Benchmark on PYNQ-Z2

This notebook demonstrates how to run the router benchmark and read cycle counts from the FPGA.

**LED Mapping:**
- LED0 = Base12 (condition 2)
- LED1 = Router (condition 3) — should light with default latencies
- LED2 = Base10 (condition 1)
- LED3 = Base2 (condition 0)

**Prerequisites:**
- Copy `router_bench.bit` and `router_bench.hwh` to the same directory as this notebook
- Or update the `overlay_path` below to point to the correct location

In [None]:
from pynq import Overlay, MMIO
import time

# Load the overlay
overlay_path = "router_bench.bit"
ol = Overlay(overlay_path)

print("Overlay loaded successfully!")

In [None]:
# Create MMIO object for the benchmark module
BASE_ADDR = 0x43C00000
mmio = MMIO(BASE_ADDR, 0x1000)

# Register offsets
CTRL     = 0x00  # Control: bit0=start, bit1=soft_clear
STAT     = 0x04  # Status: bit0=running, bit1=done, bits[3:2]=winner_code
T0       = 0x08  # Cycle count for condition 0 (Base2)
T1       = 0x0C  # Cycle count for condition 1 (Base10)
T2       = 0x10  # Cycle count for condition 2 (Base12)
T3       = 0x14  # Cycle count for condition 3 (Router)
ONEHOT   = 0x18  # Internal one-hot winner (cond0..3)
TTOTAL   = 0x1C  # Sum of condition cycle totals
TRUNTIME = 0x20  # Cycles from start→done (includes control overhead)
INFO     = 0x24  # [15:0] = operations executed per condition

print("MMIO initialized at address 0x{:08X}".format(BASE_ADDR))


In [None]:
CLK_FREQ_HZ = 100_000_000  # AXI clock frequency; update if different on your platform
CLK_PERIOD_NS = 1e9 / CLK_FREQ_HZ

def cycles_to_seconds(cycles):
    return cycles / CLK_FREQ_HZ

def cycles_to_microseconds(cycles):
    return cycles_to_seconds(cycles) * 1e6

def start_benchmark():
    """Start the benchmark by writing bit0=1 to CTRL register"""
    mmio.write(CTRL, 0x1)

def read_status():
    """Read status register and decode fields"""
    v = mmio.read(STAT)
    running     = (v & 0x1) != 0
    done        = (v & 0x2) != 0
    winner_code = (v >> 2) & 0x3
    return running, done, winner_code

def wait_done(timeout_s=2.0):
    """Wait for benchmark to complete with timeout"""
    t0 = time.time()
    while True:
        running, done, winner_code = read_status()
        if done:
            return winner_code
        if (time.time() - t0) > timeout_s:
            raise TimeoutError("Benchmark did not finish")
        time.sleep(0.001)

def read_cycles():
    """Read cycle counts for all four conditions"""
    return (mmio.read(T0), mmio.read(T1), mmio.read(T2), mmio.read(T3))

def read_metrics():
    """Read cycle counts plus aggregate benchmark metrics"""
    cycles = read_cycles()
    total_cycles = mmio.read(TTOTAL)
    runtime_cycles = mmio.read(TRUNTIME)
    info = mmio.read(INFO)
    ops_per_condition = info & 0xFFFF
    return cycles, total_cycles, runtime_cycles, ops_per_condition

print("Helper functions defined")


In [None]:
# Condition labels with LED mapping
cond_labels = {
    0: "Base2   (LED3)",
    1: "Base10  (LED2)",
    2: "Base12  (LED0)",
    3: "Router  (LED1)",
}

print("Running benchmark...")
start_benchmark()
winner = wait_done()
(cycles, total_cycles, runtime_cycles, ops_per_condition) = read_metrics()

print("\n" + "="*50)
print("Benchmark Results")
print("="*50)
print("\nCycle totals per condition:")
for idx in range(len(cycles)):
    label = cond_labels[idx]
    cyc = cycles[idx]
    time_us = cycles_to_microseconds(cyc)
    cycles_per_op = cyc / ops_per_condition if ops_per_condition else float('nan')
    print(f"  {label:>14}: {cyc:6d} cycles ({time_us:8.3f} µs, {cycles_per_op:5.2f} cycles/op)")

total_time_us = cycles_to_microseconds(total_cycles)
runtime_time_us = cycles_to_microseconds(runtime_cycles)
total_ops = ops_per_condition * len(cycles)

print("\nAggregate metrics:")
print(f"  Total benchmark cycles (sum of conditions): {total_cycles:6d} cycles ({total_time_us:8.3f} µs)")
print(f"  Runtime cycles (start→done):               {runtime_cycles:6d} cycles ({runtime_time_us:8.3f} µs)")
print(f"  Operations executed: {ops_per_condition} per condition ({total_ops} total per run)")
if runtime_cycles:
    throughput_ops_per_s = total_ops / cycles_to_seconds(runtime_cycles)
    print(f"  Effective throughput: {throughput_ops_per_s/1e3:8.2f} kOps/s")

print(f"\nWinner: cond{winner} → {cond_labels[winner]}")
print("="*50)


In [None]:
# Optional: Run multiple times and average
import numpy as np

num_runs = 10
cycle_runs = []
total_runs = []
runtime_runs = []
winner_history = []
ops_per_condition = None

print(f"Running benchmark {num_runs} times...")
for i in range(num_runs):
    start_benchmark()
    winner = wait_done()
    cycles, total_cycles, runtime_cycles, ops_val = read_metrics()
    cycle_runs.append(cycles)
    total_runs.append(total_cycles)
    runtime_runs.append(runtime_cycles)
    winner_history.append(winner)
    if ops_per_condition is None:
        ops_per_condition = ops_val
    elif ops_per_condition != ops_val:
        print(f"Warning: ops_per_condition changed from {ops_per_condition} to {ops_val}")
        ops_per_condition = ops_val
    print(f"Run {i+1}: Base2={cycles[0]}, Base10={cycles[1]}, Base12={cycles[2]}, Router={cycles[3]}, Total={total_cycles}, Runtime={runtime_cycles} → Winner: cond{winner}")

cycle_runs = np.array(cycle_runs, dtype=np.float64)
avg = np.mean(cycle_runs, axis=0)
std = np.std(cycle_runs, axis=0)
avg_total = float(np.mean(total_runs))
std_total = float(np.std(total_runs))
avg_runtime = float(np.mean(runtime_runs))
std_runtime = float(np.std(runtime_runs))
avg_time_us = avg / CLK_FREQ_HZ * 1e6
std_time_us = std / CLK_FREQ_HZ * 1e6
avg_total_time_us = avg_total / CLK_FREQ_HZ * 1e6
std_total_time_us = std_total / CLK_FREQ_HZ * 1e6
avg_runtime_time_us = avg_runtime / CLK_FREQ_HZ * 1e6
std_runtime_time_us = std_runtime / CLK_FREQ_HZ * 1e6
avg_cycles_per_op = avg / ops_per_condition if ops_per_condition else np.full_like(avg, np.nan)
std_cycles_per_op = std / ops_per_condition if ops_per_condition else np.full_like(std, np.nan)
total_ops = ops_per_condition * cycle_runs.shape[1] if ops_per_condition else np.nan
avg_throughput_ops = (total_ops / (avg_runtime / CLK_FREQ_HZ)) if (ops_per_condition and avg_runtime) else float('nan')

print("\n" + "="*50)
print(f"Average over {num_runs} runs:")
print("="*50)
for idx in range(cycle_runs.shape[1]):
    label = cond_labels[idx]
    print(
        f"  {label:>14}: {avg[idx]:6.1f} ± {std[idx]:4.1f} cycles "
        f"({avg_time_us[idx]:7.3f} ± {std_time_us[idx]:5.3f} µs, {avg_cycles_per_op[idx]:5.2f} ± {std_cycles_per_op[idx]:5.2f} cycles/op)"
    )
print("="*50)
print(f"  Total cycles: {avg_total:6.1f} ± {std_total:4.1f} cycles ({avg_total_time_us:7.3f} ± {std_total_time_us:5.3f} µs)")
print(f"  Runtime cycles (start→done): {avg_runtime:6.1f} ± {std_runtime:4.1f} cycles ({avg_runtime_time_us:7.3f} ± {std_runtime_time_us:5.3f} µs)")
print(f"  Operations per condition: {ops_per_condition} → {total_ops} total operations per run")
if not np.isnan(avg_throughput_ops):
    print(f"  Average throughput: {avg_throughput_ops/1e3:8.2f} kOps/s")

winner_counts = np.bincount(winner_history, minlength=len(cond_labels))
print("\nWinner distribution:")
for idx in range(len(cond_labels)):
    print(f"  cond{idx} ({cond_labels[idx]}): {winner_counts[idx]} wins")


In [None]:
# Optional: Visualize results
import matplotlib.pyplot as plt

conditions = ['Base2\n(cond0)', 'Base10\n(cond1)', 'Base12\n(cond2)', 'Router\n(cond3)']
colors = ['#ff6b6b', '#4ecdc4', '#45b7d1', '#96ceb4']

plt.figure(figsize=(10, 6))
bars = plt.bar(conditions, avg, yerr=std, color=colors, alpha=0.7, capsize=5)
plt.ylabel('Cycles per condition', fontsize=12)
plt.title('Router Benchmark Results (Average over {} runs)'.format(num_runs), fontsize=14)
plt.grid(axis='y', alpha=0.3)

for bar, cyc_per_op, time_us in zip(bars, avg_cycles_per_op, avg_time_us):
    plt.text(
        bar.get_x() + bar.get_width() / 2,
        bar.get_height(),
        f"{cyc_per_op:5.2f} cyc/op\n{time_us:6.3f} µs",
        ha='center', va='bottom', fontsize=9
    )

# Highlight the winner
min_idx = int(np.argmin(avg))
bars[min_idx].set_edgecolor('red')
bars[min_idx].set_linewidth(3)

plt.tight_layout()
plt.show()

print(
    f"Winner: {conditions[min_idx].replace(chr(10), ' ')} with {avg[min_idx]:.1f} cycles "
    f"({avg_time_us[min_idx]:.3f} µs)"
)
