Analysis of block to column propagation timing on Ethereum mainnet.

In [None]:
import polars as pl
import pandas as pd  # Required for plotly
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

from loaders import load_parquet, display_sql

target_date = None  # Set via papermill, or auto-detect from manifest

In [None]:
display_sql("block_production_timeline", target_date)

In [None]:
df = pl.from_pandas(load_parquet("block_production_timeline", target_date))

# Flag MEV vs local blocks
df = df.with_columns(
    pl.col("winning_bid_value").is_not_null().alias("has_mev"),
)
df = df.with_columns(
    pl.when(pl.col("has_mev")).then(pl.lit("MEV")).otherwise(pl.lit("Local")).alias("block_type"),
)

# Filter to slots with blobs
df = df.with_columns(
    (pl.col("blob_count") > 0).alias("has_blobs"),
)
df_blobs = df.filter(pl.col("has_blobs"))

# Calculate block to first column delay
df_blobs = df_blobs.drop_nulls(subset=["block_first_seen_ms", "first_column_first_seen_ms"])
df_blobs = df_blobs.with_columns(
    (pl.col("first_column_first_seen_ms") - pl.col("block_first_seen_ms")).alias("block_to_column_ms"),
)

total_slots = len(df)
blob_slots = len(df_blobs)
mev_count = df_blobs.filter(pl.col("has_mev")).height
local_count = df_blobs.filter(~pl.col("has_mev")).height

print(f"Total slots: {total_slots:,}")
print(f"Slots with blobs: {blob_slots:,} ({blob_slots/total_slots*100:.1f}%)")
print(f"  MEV: {mev_count:,} ({mev_count/blob_slots*100:.1f}%)")
print(f"  Local: {local_count:,} ({local_count/blob_slots*100:.1f}%)")

## Block to column delay

Time from block first seen to first column first seen. Shows how quickly columns start propagating after the block arrives.

**Note on negative values:** A negative delay means a column was observed before the block. This can happen due to how data propagates through the network: columns may reach certain parts of the network before the block does.

In [None]:
if len(df_blobs) > 0:
    fig = px.histogram(
        df_blobs.to_pandas(),
        x="block_to_column_ms",
        color="block_type",
        category_orders={"block_type": ["MEV", "Local"]},
        nbins=60,
        barmode="overlay",
        opacity=0.7,
        color_discrete_map={"MEV": "#AB63FA", "Local": "#19D3F3"},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Block to first column (ms)"),
        yaxis=dict(title="Slots"),
        legend_title="Block type",
        height=400,
    )
    fig.show(config={"responsive": True})
else:
    print("No block-to-column timing data available.")

In [None]:
# Summary statistics
if len(df_blobs) > 0:
    block_to_column = df_blobs["block_to_column_ms"]
    median = block_to_column.median()
    p90 = block_to_column.quantile(0.9)
    p95 = block_to_column.quantile(0.95)
    p99 = block_to_column.quantile(0.99)
    max_val = block_to_column.max()
    
    print("Block to first column (ms):")
    print(f"  Median: {median:.0f}")
    print(f"  P90:    {p90:.0f}")
    print(f"  P95:    {p95:.0f}")
    print(f"  P99:    {p99:.0f}")
    print(f"  Max:    {max_val:.0f}")

## Block to column delay over time

How the block-to-column delay varies throughout the day.

In [None]:
if len(df_blobs) > 0:
    df_plot = df_blobs.with_columns(
        pl.col("blob_count").cast(pl.Float64).alias("blob_count_f"),
    )
    max_blobs = df_plot["blob_count"].max()
    
    fig = px.scatter(
        df_plot.to_pandas(),
        x="slot_start_date_time",
        y="block_to_column_ms",
        color="blob_count_f",
        color_continuous_scale="Plasma",
        range_color=[0, max_blobs],
        opacity=0.5,
        hover_data={"slot": True, "blob_count": True, "block_to_column_ms": ":.0f", "slot_start_date_time": False, "blob_count_f": False},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Time (UTC)", tickformat="%H:%M"),
        yaxis=dict(title="Block to first column (ms)"),
        coloraxis_colorbar=dict(title="Blobs"),
        height=400,
    )
    fig.show(config={"responsive": True})

## Column spread by blob count (MEV vs local)

Does MEV vs local block production affect how columns spread at each blob count?

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

In [None]:
# Filter to slots with blobs (column_spread only exists for blob slots)
df_col_spread = df.filter(pl.col("blob_count") > 0).drop_nulls(subset=["column_spread_ms"])
if len(df_col_spread) > 0:
    fig = px.box(
        df_col_spread.to_pandas(),
        x="blob_count",
        y="column_spread_ms",
        color="block_type",
        category_orders={"block_type": ["MEV", "Local"]},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Blob count", dtick=1),
        yaxis=dict(title="Column spread (ms)"),
        legend_title="Block type",
        height=450,
    )
    fig.show(config={"responsive": True})

## Block-to-column delay by blob count

How much additional delay per blob for column propagation to begin after block arrival?

Box: 25th-75th percentile. Line: median. Whiskers: min/max excluding outliers.

In [None]:
df_delay = df.filter(pl.col("blob_count") > 0).drop_nulls(subset=["block_first_seen_ms", "first_column_first_seen_ms"])
df_delay = df_delay.with_columns(
    (pl.col("first_column_first_seen_ms") - pl.col("block_first_seen_ms")).alias("block_to_column_ms"),
)
if len(df_delay) > 0:
    fig = px.box(
        df_delay.to_pandas(),
        x="blob_count",
        y="block_to_column_ms",
        color="block_type",
        category_orders={"block_type": ["MEV", "Local"]},
    )
    fig.update_layout(
        margin=dict(l=60, r=30, t=30, b=60),
        xaxis=dict(title="Blob count", dtick=1),
        yaxis=dict(title="Block to first column (ms)"),
        legend_title="Block type",
        height=450,
    )
    fig.show(config={"responsive": True})