Analysis of MEV pipeline timing and its effect on block propagation on Ethereum mainnet.

In [None]:
import pandas as pd
import polars as pl
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from scipy import stats

from loaders import load_parquet, display_sql
from plotly_theme import horizontal_colorbar

target_date = None  # Set via papermill, or auto-detect from manifest

In [None]:
display_sql("block_production_timeline", target_date)

In [None]:
df = pl.from_pandas(load_parquet("block_production_timeline", target_date))

# Flag MEV vs local blocks
df = df.with_columns(
    pl.col("winning_bid_value").is_not_null().alias("has_mev")
).with_columns(
    pl.when(pl.col("has_mev")).then(pl.lit("MEV")).otherwise(pl.lit("Local")).alias("block_type")
)

total_slots = len(df)
mev_count = df.filter(pl.col("has_mev")).height
local_count = df.filter(~pl.col("has_mev")).height

print(f"Total slots: {total_slots:,}")
print(f"MEV blocks: {mev_count:,} ({mev_count/total_slots*100:.1f}%)")
print(f"Local blocks: {local_count:,} ({local_count/total_slots*100:.1f}%)")

## Bid trace coverage

MEV block data comes from two sources with different coverage:

- **Payload delivered** (`mev_relay_proposer_payload_delivered`): Records when a relay delivers a block to a proposer. Has value, builder, and relay info for all MEV blocks.
- **Bid trace** (`mev_relay_bid_trace`): Records individual bids during the auction. Has bid timing but may not include the winning block if it was a late bid or data gap.

The chart below shows what proportion of each relay's blocks have bid timing data available.

In [None]:
# Bid trace coverage analysis
df_trace = df.filter(pl.col("has_mev")).with_columns(
    pl.col("winning_relays").list.get(0).fill_null("Unknown").alias("relay"),
    pl.col("winning_bid_ms").is_not_null().alias("has_bid_timing")
)

# Aggregate by relay
relay_coverage = df_trace.group_by("relay").agg(
    pl.col("slot").count().alias("total"),
    pl.col("has_bid_timing").sum().alias("with_timing"),
).with_columns(
    (pl.col("total") - pl.col("with_timing")).alias("without_timing"),
    (pl.col("with_timing") / pl.col("total") * 100).round(1).alias("pct_with_timing")
).sort("total")

# Summary stats
total_mev = relay_coverage.select(pl.col("total").sum()).item()
total_with_timing = relay_coverage.select(pl.col("with_timing").sum()).item()
print(f"MEV blocks: {total_mev:,}")
print(f"With bid timing: {total_with_timing:,} ({total_with_timing/total_mev*100:.1f}%)")
print(f"Without bid timing: {total_mev - total_with_timing:,} ({(total_mev - total_with_timing)/total_mev*100:.1f}%)")

In [None]:
# Stacked horizontal bar chart
relay_coverage_pd = relay_coverage.to_pandas()

fig = go.Figure()

fig.add_trace(go.Bar(
    y=relay_coverage_pd["relay"],
    x=relay_coverage_pd["with_timing"],
    name="With bid timing",
    orientation="h",
    marker_color="#2ecc71",
    text=relay_coverage_pd.apply(lambda r: f"{r['pct_with_timing']:.0f}%" if r['with_timing'] > 0 else "", axis=1),
    textposition="inside",
    hovertemplate="<b>%{y}</b><br>With timing: %{x:,}<extra></extra>",
))

fig.add_trace(go.Bar(
    y=relay_coverage_pd["relay"],
    x=relay_coverage_pd["without_timing"],
    name="Without bid timing",
    orientation="h",
    marker_color="#e74c3c",
    hovertemplate="<b>%{y}</b><br>Without timing: %{x:,}<extra></extra>",
))

fig.update_layout(
    barmode="stack",
    margin=dict(l=150, r=30, t=30, b=60),
    xaxis=dict(title="Number of blocks"),
    yaxis=dict(title=""),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    height=350,
)
fig.show(config={"responsive": True})

## MEV pipeline and blob count effects

How do blob count and MEV pipeline characteristics interact to affect block propagation? All scatter plots use blob count as the color dimension.

In [None]:
# Prepare data for MEV analysis
# Filter out missed slots (block never produced - shows as invalid timestamps)
# A valid block_first_seen_ms should be positive and reasonable (< 60 seconds)
df_mev = df.filter(
    pl.col("block_first_seen_ms").is_not_null() &
    (pl.col("block_first_seen_ms") >= 0) &
    (pl.col("block_first_seen_ms") < 60000)
)

# Calculate derived columns
df_mev = df_mev.with_columns(
    (pl.col("last_bid_ms") - pl.col("first_bid_ms")).alias("bidding_duration_ms"),
    (pl.col("first_column_first_seen_ms") - pl.col("block_first_seen_ms")).alias("block_to_column_ms"),
    (pl.col("block_first_seen_ms") - pl.col("winning_bid_ms")).alias("bid_to_block_ms"),
)

# Dynamic blob count bins based on actual data
max_blobs = df_mev.select(pl.col("blob_count").max()).item()
bin_size = 3
# Create bins: [-1, 0, 3, 6, 9, 12, 15, ...] to match 0, 1-3, 4-6, etc.
bins = [-1, 0] + list(range(bin_size, max_blobs + bin_size, bin_size))
if bins[-1] < max_blobs:
    bins.append(((max_blobs // bin_size) + 1) * bin_size)
labels = ["0"] + [f"{bins[i]+1}-{bins[i+1]}" for i in range(1, len(bins)-1)]

# Create blob_bin column using cut
df_mev = df_mev.with_columns(
    pl.col("blob_count").cut(bins[1:], labels=labels).alias("blob_bin")
)
BLOB_BIN_ORDER = labels  # Store for use in charts

# Generate Plasma-based discrete colors, truncated to avoid light yellow (poor contrast)
# Sample from 0.0 to 0.70 of the Plasma scale (more aggressive truncation)
PLASMA_MAX = 0.70
sample_points = [i / (len(labels) - 1) * PLASMA_MAX for i in range(len(labels))]
BLOB_COLORS = dict(zip(labels, px.colors.sample_colorscale("Plasma", sample_points)))

# Create truncated Plasma colorscale for continuous use (avoids light yellow)
PLASMA_TRUNCATED = px.colors.sample_colorscale("Plasma", [i/10 * PLASMA_MAX for i in range(11)])

# MEV-only subset for MEV timing plots
df_mev_only = df_mev.filter(pl.col("has_mev"))

total_df = len(df)
valid_blocks = len(df_mev)
mev_blocks = df_mev.filter(pl.col("has_mev")).height
local_blocks = df_mev.filter(~pl.col("has_mev")).height

print(f"Total slots in data: {total_df:,}")
print(f"Slots with valid blocks: {valid_blocks:,} ({valid_blocks/total_df*100:.1f}%)")
print(f"MEV blocks: {mev_blocks:,} ({mev_blocks/valid_blocks*100:.1f}%)")
print(f"Local blocks: {local_blocks:,} ({local_blocks/valid_blocks*100:.1f}%)")
print(f"Max blob count: {max_blobs}, bins: {labels}")

### Winning bid timing vs block arrival

Does late bidding combined with high blob count delay block propagation?

In [None]:
if len(df_mev_only) > 0:
    # Extract first relay from array for display
    df_plot = df_mev_only.filter(
        pl.col("winning_bid_ms").is_not_null() & pl.col("block_first_seen_ms").is_not_null()
    ).with_columns(
        pl.col("winning_relays").list.get(0).alias("relay"),
        pl.col("blob_count").cast(pl.Float64).alias("blob_count_f")
    ).to_pandas()

    fig = px.scatter(
        df_plot,
        x="winning_bid_ms",
        y="block_first_seen_ms",
        color="blob_count_f",
        color_continuous_scale=PLASMA_TRUNCATED,
        range_color=[0, max_blobs],
        opacity=0.6,
        hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Winning bid timing (ms from slot start)"),
        yaxis=dict(title="Block first seen (ms from slot start)"),
        coloraxis_colorbar=dict(title="Blobs"),
        height=450,
    )
    fig.show(config={"responsive": True})
else:
    print("No MEV data available.")

### Bid to block delay

How long does it take from when the winning bid is submitted until the block is first seen on the network? This measures the latency through the MEV pipeline: bid submission → relay processing → proposer signing → network propagation.

In [None]:
# Prepare bid to block delay data
df_bid_delay = df_mev_only.filter(
    pl.col("bid_to_block_ms").is_not_null() &
    (pl.col("bid_to_block_ms") > 0) &
    (pl.col("bid_to_block_ms") < 5000)
).with_columns(
    pl.col("winning_relays").list.get(0).alias("relay"),
    pl.col("blob_count").cast(pl.Float64).alias("blob_count_f")
)

# Summary stats
median_delay = df_bid_delay.select(pl.col("bid_to_block_ms").median()).item()
p95_delay = df_bid_delay.select(pl.col("bid_to_block_ms").quantile(0.95)).item()
print(f"Bid to block delay (MEV blocks): median {median_delay:.0f}ms, P95 {p95_delay:.0f}ms, n={len(df_bid_delay):,}")

In [None]:
if len(df_bid_delay) > 0:
    df_bid_delay_pd = df_bid_delay.to_pandas()
    
    # Scatter plot: bid_to_block_ms vs blob_count
    fig = px.scatter(
        df_bid_delay_pd,
        x="blob_count",
        y="bid_to_block_ms",
        color="blob_count_f",
        color_continuous_scale=PLASMA_TRUNCATED,
        range_color=[0, max_blobs],
        opacity=0.5,
        hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
    )
    
    # Add median line per blob count
    median_by_blob = df_bid_delay.group_by("blob_count").agg(
        pl.col("bid_to_block_ms").median()
    ).sort("blob_count").to_pandas()
    
    fig.add_trace(go.Scatter(
        x=median_by_blob["blob_count"],
        y=median_by_blob["bid_to_block_ms"],
        mode="lines+markers",
        line=dict(color="white", width=2),
        marker=dict(size=8, color="white", line=dict(width=1, color="black")),
        name="Median",
        hovertemplate="Blobs: %{x}<br>Median: %{y:.0f}ms<extra></extra>",
    ))
    
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Blob count", dtick=3),
        yaxis=dict(title="Bid to block delay (ms)"),
        coloraxis_colorbar=dict(title="Blobs"),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=450,
    )
    fig.show(config={"responsive": True})
else:
    print("No bid timing data available.")

### Winning bid value vs block arrival

Do high-value blocks with more blobs behave differently?

In [None]:
if len(df_mev_only) > 0:
    # Convert winning_bid_value from wei to ETH and filter
    df_plot = df_mev_only.with_columns(
        (pl.col("winning_bid_value").cast(pl.Float64) / 1e18).alias("winning_bid_eth"),
        pl.col("winning_relays").list.get(0).alias("relay"),
        pl.col("blob_count").cast(pl.Float64).alias("blob_count_f")
    ).filter(
        (pl.col("winning_bid_eth") > 0) & pl.col("block_first_seen_ms").is_not_null()
    ).to_pandas()

    fig = px.scatter(
        df_plot,
        x="winning_bid_eth",
        y="block_first_seen_ms",
        color="blob_count_f",
        color_continuous_scale=PLASMA_TRUNCATED,
        range_color=[0, max_blobs],
        opacity=0.6,
        log_x=True,
        hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Winning bid value (ETH, log scale)"),
        yaxis=dict(title="Block first seen (ms from slot start)"),
        coloraxis_colorbar=dict(title="Blobs"),
        height=450,
    )
    fig.show(config={"responsive": True})

### Bidding window duration vs block arrival

Does competitive bidding (longer bidding window) affect propagation differently by blob count?

In [None]:
df_bidding = df_mev_only.filter(
    pl.col("bidding_duration_ms").is_not_null() & pl.col("block_first_seen_ms").is_not_null()
)

if len(df_bidding) > 0:
    df_plot = df_bidding.with_columns(
        pl.col("winning_relays").list.get(0).alias("relay"),
        pl.col("blob_count").cast(pl.Float64).alias("blob_count_f")
    ).to_pandas()

    fig = px.scatter(
        df_plot,
        x="bidding_duration_ms",
        y="block_first_seen_ms",
        color="blob_count_f",
        color_continuous_scale=PLASMA_TRUNCATED,
        range_color=[0, max_blobs],
        opacity=0.6,
        hover_data={"slot": True, "relay": True, "blob_count": True, "blob_count_f": False},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Bidding window duration (ms)"),
        yaxis=dict(title="Block first seen (ms from slot start)"),
        coloraxis_colorbar=dict(title="Blobs"),
        height=450,
    )
    fig.show(config={"responsive": True})

### Block arrival by relay and blob count

Do some relays handle blob-heavy blocks better than others?

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

In [None]:
# Extract first relay from array for analysis
df_relay = df_mev_only.with_columns(
    pl.col("winning_relays").list.get(0).alias("relay")
).filter(pl.col("relay").is_not_null())

if len(df_relay) > 0:
    # Get top relays by volume
    top_relays = df_relay.group_by("relay").agg(
        pl.count().alias("count")
    ).sort("count", descending=True).head(8).select("relay").to_series().to_list()
    
    df_relay_top = df_relay.filter(pl.col("relay").is_in(top_relays)).to_pandas()
    
    fig = px.box(
        df_relay_top,
        x="relay",
        y="block_first_seen_ms",
        color="blob_bin",
        category_orders={"blob_bin": BLOB_BIN_ORDER},
        color_discrete_map=BLOB_COLORS,
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=100),
        xaxis=dict(title="Relay", tickangle=45),
        yaxis=dict(title="Block first seen (ms from slot start)"),
        legend_title="Blob count",
        height=500,
    )
    fig.show(config={"responsive": True})

## MEV vs local block comparison

Do MEV blocks propagate differently than locally-built blocks? The following charts compare timing distributions by blob count.

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

In [None]:
# Prepare data for MEV vs Local comparison
# Create ordered list from 0 to max_blobs (reversed for bottom-to-top display)
all_blob_counts = list(range(int(max_blobs) + 1))
blob_count_order = [str(b) for b in all_blob_counts]
# Reverse for category_orders (Plotly categorical y-axis goes top-to-bottom by default)
blob_count_order_reversed = blob_count_order[::-1]

# Convert blob_count to string for proper categorical ordering and ensure numeric x-axis
df_compare = df_mev.with_columns(
    pl.col("blob_count").cast(pl.Utf8).alias("blob_count_str"),
    pl.col("block_first_seen_ms").cast(pl.Float64)
)

# Summary stats
mev_median = df_compare.filter(pl.col("block_type") == "MEV").select(pl.col("block_first_seen_ms").median()).item()
local_median = df_compare.filter(pl.col("block_type") == "Local").select(pl.col("block_first_seen_ms").median()).item()
print(f"Block first seen median - MEV: {mev_median:.0f}ms, Local: {local_median:.0f}ms")

In [None]:
if len(df_compare) > 0:
    df_compare_pd = df_compare.to_pandas()
    
    fig = px.box(
        df_compare_pd,
        y="blob_count_str",
        x="block_first_seen_ms",
        color="block_type",
        orientation="h",
        category_orders={"blob_count_str": blob_count_order_reversed, "block_type": ["MEV", "Local"]},
        color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Block first seen (ms from slot start)"),
        yaxis=dict(
            title="Blob count",
            tickmode="array",
            tickvals=blob_count_order_reversed,
            ticktext=blob_count_order_reversed,
        ),
        legend_title="Block type",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=800,
    )
    fig.show(config={"responsive": True})

In [None]:
# Prepare first column first seen data
df_col_first = df_mev.filter(
    pl.col("first_column_first_seen_ms").is_not_null() & (pl.col("blob_count") > 0)
).with_columns(
    pl.col("blob_count").cast(pl.Utf8).alias("blob_count_str"),
    pl.col("first_column_first_seen_ms").cast(pl.Float64)
)

# Create ordered list from 1 to max_blobs (reversed for bottom-to-top display)
col_blob_counts = list(range(1, int(max_blobs) + 1))
col_blob_count_order = [str(b) for b in col_blob_counts]
col_blob_count_order_reversed = col_blob_count_order[::-1]

# Summary stats
mev_median = df_col_first.filter(pl.col("block_type") == "MEV").select(pl.col("first_column_first_seen_ms").median()).item()
local_median = df_col_first.filter(pl.col("block_type") == "Local").select(pl.col("first_column_first_seen_ms").median()).item()
print(f"First column seen median - MEV: {mev_median:.0f}ms, Local: {local_median:.0f}ms")

In [None]:
if len(df_col_first) > 0:
    df_col_first_pd = df_col_first.to_pandas()
    
    fig = px.box(
        df_col_first_pd,
        y="blob_count_str",
        x="first_column_first_seen_ms",
        color="block_type",
        orientation="h",
        category_orders={"blob_count_str": col_blob_count_order_reversed, "block_type": ["MEV", "Local"]},
        color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="First column first seen (ms from slot start)"),
        yaxis=dict(
            title="Blob count",
            tickmode="array",
            tickvals=col_blob_count_order_reversed,
            ticktext=col_blob_count_order_reversed,
        ),
        legend_title="Block type",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=750,
    )
    fig.show(config={"responsive": True})
else:
    print("No column timing data available.")

In [None]:
# Prepare last column first seen data
df_col_last = df_mev.filter(
    pl.col("last_column_first_seen_ms").is_not_null() & (pl.col("blob_count") > 0)
).with_columns(
    pl.col("blob_count").cast(pl.Utf8).alias("blob_count_str"),
    pl.col("last_column_first_seen_ms").cast(pl.Float64)
)

# Summary stats
mev_median = df_col_last.filter(pl.col("block_type") == "MEV").select(pl.col("last_column_first_seen_ms").median()).item()
local_median = df_col_last.filter(pl.col("block_type") == "Local").select(pl.col("last_column_first_seen_ms").median()).item()
print(f"Last column seen median - MEV: {mev_median:.0f}ms, Local: {local_median:.0f}ms")

In [None]:
if len(df_col_last) > 0:
    df_col_last_pd = df_col_last.to_pandas()
    
    fig = px.box(
        df_col_last_pd,
        y="blob_count_str",
        x="last_column_first_seen_ms",
        color="block_type",
        orientation="h",
        category_orders={"blob_count_str": col_blob_count_order_reversed, "block_type": ["MEV", "Local"]},
        color_discrete_map={"MEV": "#9b59b6", "Local": "#3498db"},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Last column first seen (ms from slot start)"),
        yaxis=dict(
            title="Blob count",
            tickmode="array",
            tickvals=col_blob_count_order_reversed,
            ticktext=col_blob_count_order_reversed,
        ),
        legend_title="Block type",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=750,
    )
    fig.show(config={"responsive": True})
else:
    print("No column timing data available.")

### Block propagation by builder

Do different builders produce blocks that propagate at different speeds? Each panel shows one builder's blocks.

In [None]:
# Prepare builder data for heatmap
df_builder = df_mev_only.filter(
    pl.col("winning_builder").is_not_null() & pl.col("block_first_seen_ms").is_not_null()
).with_columns(
    # Shorten builder pubkeys for display (first 10 + last 4 chars)
    pl.when(pl.col("winning_builder").str.len_chars() > 14)
    .then(pl.col("winning_builder").str.slice(0, 10) + pl.lit("...") + pl.col("winning_builder").str.slice(-4))
    .otherwise(pl.col("winning_builder"))
    .alias("builder_short")
)

# Get top builders by block count, sorted descending
builder_counts = df_builder.group_by("builder_short").agg(
    pl.count().alias("count")
).sort("count", descending=True)

top_builders = builder_counts.head(9).select("builder_short").to_series().to_list()
builder_order = top_builders  # Already sorted by count descending
df_builder_top = df_builder.filter(pl.col("builder_short").is_in(top_builders))

# Print builder stats
print("Top builders by block count:")
builder_counts_dict = dict(zip(
    builder_counts.select("builder_short").to_series().to_list(),
    builder_counts.select("count").to_series().to_list()
))
for builder in builder_order:
    count = builder_counts_dict[builder]
    median_ms = df_builder_top.filter(pl.col("builder_short") == builder).select(pl.col("block_first_seen_ms").median()).item()
    print(f"  {builder}: {count:,} blocks, median {median_ms:.0f}ms")

In [None]:
if len(df_builder_top) > 0:
    df_builder_top_pd = df_builder_top.to_pandas()
    
    n_builders = len(top_builders)
    n_cols = 3
    n_rows = (n_builders + n_cols - 1) // n_cols
    
    # Create 100ms bins for x-axis (block timing)
    x_max = df_builder_top.select(pl.col("block_first_seen_ms").quantile(0.99)).item()
    x_bins = int(x_max // 100) + 1
    
    fig = px.density_heatmap(
        df_builder_top_pd,
        x="block_first_seen_ms",
        y="blob_count",
        facet_col="builder_short",
        facet_col_wrap=n_cols,
        facet_row_spacing=0.10,
        facet_col_spacing=0.05,
        category_orders={"builder_short": builder_order},
        nbinsx=x_bins,
        nbinsy=int(max_blobs) + 1,
        range_x=[0, x_max],
        range_y=[-0.5, int(max_blobs) + 0.5],
        color_continuous_scale=PLASMA_TRUNCATED,
        histnorm="percent",
    )
    fig.update_layout(
        margin=dict(l=70, r=30, t=40, b=50),
        height=280 * n_rows,
        coloraxis_colorbar=dict(title="% of<br>blocks"),
    )
    # Clean up facet titles - add block count
    for ann in fig.layout.annotations:
        builder = ann.text.replace("builder_short=", "")
        count = builder_counts_dict.get(builder, 0)
        ann.update(text=f"{builder}<br>({count:,} blocks)", font_size=9, yshift=8)
    
    # Add axis titles and ensure ticks are visible on all panels
    fig.for_each_xaxis(lambda x: x.update(
        title="Block seen (ms)", 
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[0, x_max],
    ))
    fig.for_each_yaxis(lambda y: y.update(
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[-0.5, int(max_blobs) + 0.5],
    ))
    fig.update_yaxes(title="Blob count", col=1)
    for i in range(2, n_cols + 1):
        fig.update_yaxes(title="", col=i)
    
    fig.show(config={"responsive": True})

### Block propagation by relay

Same analysis but grouped by winning relay instead of builder.

In [None]:
# Prepare relay data for heatmap
df_relay_heat = df_mev_only.filter(
    pl.col("block_first_seen_ms").is_not_null()
).with_columns(
    pl.col("winning_relays").list.get(0).alias("relay")
).filter(pl.col("relay").is_not_null())

# Get top relays by block count, sorted descending
relay_counts = df_relay_heat.group_by("relay").agg(
    pl.count().alias("count")
).sort("count", descending=True)

top_relays = relay_counts.head(9).select("relay").to_series().to_list()
relay_order = top_relays  # Already sorted by count descending
df_relay_top = df_relay_heat.filter(pl.col("relay").is_in(top_relays))

# Print relay stats
print("Top relays by block count:")
relay_counts_dict = dict(zip(
    relay_counts.select("relay").to_series().to_list(),
    relay_counts.select("count").to_series().to_list()
))
for relay in relay_order:
    count = relay_counts_dict[relay]
    median_ms = df_relay_top.filter(pl.col("relay") == relay).select(pl.col("block_first_seen_ms").median()).item()
    print(f"  {relay}: {count:,} blocks, median {median_ms:.0f}ms")

In [None]:
if len(df_relay_top) > 0:
    df_relay_top_pd = df_relay_top.to_pandas()
    
    n_relays = len(top_relays)
    n_cols = 3
    n_rows = (n_relays + n_cols - 1) // n_cols
    
    # Create 100ms bins for x-axis (block timing)
    x_max = df_relay_top.select(pl.col("block_first_seen_ms").quantile(0.99)).item()
    x_bins = int(x_max // 100) + 1
    
    fig = px.density_heatmap(
        df_relay_top_pd,
        x="block_first_seen_ms",
        y="blob_count",
        facet_col="relay",
        facet_col_wrap=n_cols,
        facet_row_spacing=0.10,
        facet_col_spacing=0.05,
        category_orders={"relay": relay_order},
        nbinsx=x_bins,
        nbinsy=int(max_blobs) + 1,
        range_x=[0, x_max],
        range_y=[-0.5, int(max_blobs) + 0.5],
        color_continuous_scale=PLASMA_TRUNCATED,
        histnorm="percent",
    )
    fig.update_layout(
        margin=dict(l=70, r=30, t=40, b=50),
        height=280 * n_rows,
        coloraxis_colorbar=dict(title="% of<br>blocks"),
    )
    # Clean up facet titles - add block count
    for ann in fig.layout.annotations:
        relay = ann.text.replace("relay=", "")
        count = relay_counts_dict.get(relay, 0)
        ann.update(text=f"{relay}<br>({count:,} blocks)", font_size=9, yshift=8)
    
    # Add axis titles and ensure ticks are visible on all panels
    fig.for_each_xaxis(lambda x: x.update(
        title="Block seen (ms)", 
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[0, x_max],
    ))
    fig.for_each_yaxis(lambda y: y.update(
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[-0.5, int(max_blobs) + 0.5],
    ))
    fig.update_yaxes(title="Blob count", col=1)
    for i in range(2, n_cols + 1):
        fig.update_yaxes(title="", col=i)
    
    fig.show(config={"responsive": True})

## Bid timing density

Where do most blocks land in the bid timing vs propagation delay space? Contour shows density, white circles highlight outliers (P95+).

In [None]:
# Density contour with outlier markers
df_timing = df_mev_only.filter(
    pl.col("winning_bid_ms").is_not_null() &
    pl.col("bid_to_block_ms").is_not_null() &
    (pl.col("bid_to_block_ms") > 0) &
    (pl.col("bid_to_block_ms") < 5000)
)

if len(df_timing) > 0:
    df_timing_pd = df_timing.to_pandas()
    
    fig = go.Figure()
    
    # Density contour base
    contour = px.density_contour(df_timing_pd, x="winning_bid_ms", y="bid_to_block_ms")
    for trace in contour.data:
        trace.update(
            contours_coloring="fill", 
            colorscale="Plasma", 
            showscale=True,
            colorbar=dict(title="Density"),
            line=dict(width=0.5, color="rgba(255,255,255,0.3)"),
        )
        fig.add_trace(trace)
    
    # Outliers (P95+ on either axis)
    q95_x = df_timing.select(pl.col("winning_bid_ms").quantile(0.95)).item()
    q95_y = df_timing.select(pl.col("bid_to_block_ms").quantile(0.95)).item()
    outliers = df_timing.filter(
        (pl.col("winning_bid_ms") > q95_x) | (pl.col("bid_to_block_ms") > q95_y)
    ).to_pandas()
    
    fig.add_trace(go.Scatter(
        x=outliers["winning_bid_ms"],
        y=outliers["bid_to_block_ms"],
        mode="markers",
        marker=dict(
            size=6,
            color="rgba(255,255,255,0.8)",
            line=dict(width=1.5, color="rgba(0,0,0,0.4)"),
        ),
        name=f"Outliers ({len(outliers)})",
        customdata=outliers["slot"],
        hovertemplate="<b>Slot %{customdata}</b><br>Bid: %{x:.0f}ms<br>Propagation: %{y:.0f}ms<extra></extra>",
    ))
    
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Winning bid timing (ms from slot start)"),
        yaxis=dict(title="Bid to block delay (ms)"),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        height=450,
    )
    fig.show(config={"responsive": True})

## Bid timing by blob count

Does the bid timing → propagation delay relationship change with blob count? Each panel shows density for a different blob count range.

In [None]:
# Faceted heatmap by blob count (same bins as other charts)
df_timing = df_mev_only.filter(
    pl.col("winning_bid_ms").is_not_null() &
    pl.col("bid_to_block_ms").is_not_null() &
    (pl.col("bid_to_block_ms") > 0) &
    (pl.col("bid_to_block_ms") < 5000)
)

if len(df_timing) > 0:
    df_timing_pd = df_timing.to_pandas()
    
    n_rows = (len(BLOB_BIN_ORDER) + 2) // 3
    
    # Calculate axis ranges
    x_max = df_timing.select(pl.col("winning_bid_ms").quantile(0.99)).item()
    y_max = df_timing.select(pl.col("bid_to_block_ms").quantile(0.99)).item()

    fig = px.density_heatmap(
        df_timing_pd,
        x="winning_bid_ms",
        y="bid_to_block_ms",
        facet_col="blob_bin",
        facet_col_wrap=3,
        facet_row_spacing=0.12,
        facet_col_spacing=0.06,
        category_orders={"blob_bin": BLOB_BIN_ORDER},
        nbinsx=25,
        nbinsy=25,
        range_x=[0, x_max],
        range_y=[0, y_max],
        color_continuous_scale=PLASMA_TRUNCATED,
    )
    fig.update_layout(
        margin=dict(l=80, r=30, t=40, b=50),
        height=320 * n_rows,
        coloraxis_colorbar=dict(title="Count"),
    )
    # Clean up facet titles
    fig.for_each_annotation(lambda a: a.update(
        text=a.text.replace("blob_bin=", "") + " blobs",
        font_size=11,
        yshift=5,
    ))
    # Add x-axis title to all subplots, with smaller tick fonts and explicit ranges
    fig.for_each_xaxis(lambda x: x.update(
        title="Bid timing (ms)", 
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[0, x_max],
    ))
    # Y-axis title only on leftmost column, with explicit ranges
    fig.for_each_yaxis(lambda y: y.update(
        tickfont_size=9, 
        title_font_size=10,
        showticklabels=True,
        range=[0, y_max],
    ))
    fig.update_yaxes(title="Propagation (ms)", col=1)
    for i in range(2, 4):
        fig.update_yaxes(title="", col=i)
    fig.show(config={"responsive": True})