In [None]:
%pip install jupysql clickhouse_sqlalchemy matplotlib python-dotenv pandas seaborn imageio plotly polars-u64-idx > /dev/null 2>&1

In [2]:
import dotenv
import os
from datetime import datetime

import pandas as pd
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import imageio
import plotly.io as pio
import plotly.express as px

plt.rcParams['figure.dpi'] = 300  # Set high DPI for better quality figures
dotenv.load_dotenv()

# Use a nicer plotly theme
pio.templates.default = "seaborn"  # Clean, professional theme


In [None]:

class Config:
    def __init__(self, time_ranges, network):
        self.time_ranges = time_ranges
        self.network = network

config = Config(
    [
        # ("2025-03-28T12:10:00Z", "2025-03-28T13:10:00Z"),
        # ("2025-04-01T05:10:00Z", "2025-04-01T06:10:00Z")

        # Before/After Fork, with Nimbus running latest
        # ("2025-03-23T05:10:00Z", "2025-03-23T05:20:00Z"),
        # ("2025-04-01T05:10:00Z", "2025-04-01T05:20:00Z")

        # Before/After Fork, with Nimbus running old version
        # ("2025-03-23T05:10:00Z", "2025-03-23T06:10:00Z"),
        # ("2025-04-01T05:10:00Z", "2025-04-01T06:10:00Z")

        # Before/After Fork, with Nimbus running old version
        # ("2025-03-23T05:10:00Z", "2025-03-23T08:10:00Z"),
        # ("2025-03-24T05:10:00Z", "2025-03-24T08:10:00Z"),
        # ("2025-03-25T05:10:00Z", "2025-03-25T08:10:00Z"),
        # ("2025-03-26T05:10:00Z", "2025-03-26T08:10:00Z"),
        # ("2025-03-27T05:10:00Z", "2025-03-27T08:10:00Z"),
        # ("2025-03-28T05:10:00Z", "2025-03-28T08:10:00Z"),
        # ("2025-03-29T05:10:00Z", "2025-03-29T08:10:00Z"),
        # ("2025-03-30T05:10:00Z", "2025-03-30T08:10:00Z"),
        # ("2025-03-31T05:10:00Z", "2025-03-31T08:10:00Z"),
        # ("2025-04-01T05:10:00Z", "2025-04-01T08:10:00Z"),
        # ("2025-04-02T05:10:00Z", "2025-04-02T08:10:00Z"),

        # ("2025-03-17T05:10:00Z", "2025-03-17T05:30:00Z"),
        ("2025-03-18T05:10:00Z", "2025-03-18T05:30:00Z"),
        ("2025-03-19T05:10:00Z", "2025-03-19T05:30:00Z"),
        ("2025-03-20T05:10:00Z", "2025-03-20T05:30:00Z"),
        ("2025-03-21T05:10:00Z", "2025-03-21T05:30:00Z"),
        ("2025-03-22T05:10:00Z", "2025-03-22T05:30:00Z"),
        ("2025-03-23T05:10:00Z", "2025-03-23T05:30:00Z"),
        ("2025-03-24T05:10:00Z", "2025-03-24T05:30:00Z"),
        ("2025-03-25T05:10:00Z", "2025-03-25T05:30:00Z"),
        ("2025-03-26T05:10:00Z", "2025-03-26T05:30:00Z"),
        ("2025-03-27T05:10:00Z", "2025-03-27T05:30:00Z"),
        ("2025-03-28T05:10:00Z", "2025-03-28T05:30:00Z"),
        ("2025-03-29T05:10:00Z", "2025-03-29T05:30:00Z"),
        ("2025-03-30T05:10:00Z", "2025-03-30T05:30:00Z"),
        ("2025-03-31T05:10:00Z", "2025-03-31T05:30:00Z"),
        ("2025-04-01T05:10:00Z", "2025-04-01T05:30:00Z"),
        ("2025-04-02T05:10:00Z", "2025-04-02T05:30:00Z"),
    ],
    "hoodi"
)

# Hoodi Electra Fork
event_date = pd.to_datetime("2025-03-27T00:37:12Z", utc=True)

# config = Config(
#     [
#         # ("2025-03-25T12:10:00Z", "2025-03-25T12:30:00Z"),
#         # ("2025-03-27T12:10:00Z", "2025-03-27T12:30:00Z")
#         ("2025-03-2T12:10:00Z", "2025-03-25T12:30:00Z"),
#         ("2025-03-27T12:10:00Z", "2025-03-27T12:30:00Z")
#     ],
#     "hoodi"
# )

# event_date = pd.to_datetime("2025-03-26T14:37:12Z", utc=True)
event_date_naive = event_date.tz_localize(None) if event_date.tzinfo is not None else event_date

annotations = {
    "2025-03-26T14:37:12Z": "Hoodi Electra Fork",
}

# Convert annotation timestamps to datetime objects for later use
annotation_datetimes = {
    datetime.fromisoformat(ts.replace("Z", "+00:00")): msg
    for ts, msg in annotations.items()
}

# Print the config contents
print("config network:", config.network)
print("config time ranges:", config.time_ranges)
print("event date:", event_date)


In [4]:
# Connect to ClickHouse
import os
username = os.getenv('XATU_CLICKHOUSE_USERNAME')
password = os.getenv('XATU_CLICKHOUSE_PASSWORD')
host = os.getenv('XATU_CLICKHOUSE_HOST')


db_url = f"clickhouse+http://{username}:{password}@{host}:443/default?protocol=https"

from sqlalchemy import create_engine, text

engine = create_engine(db_url)

connection = engine.connect()

In [None]:
# Load known validators from the YAML file
import yaml
import os
from pathlib import Path

def load_validators(network):
    """Load validators from the YAML file for the specified network."""
    validators_path = Path(f"../../assets/ethereum/{network}/validators.yaml")
    
    if not validators_path.exists():
        print(f"Validators file not found at {validators_path}")
        return {}
    
    with open(validators_path, 'r') as file:
        validators_data = yaml.safe_load(file)
    
    # Process the validators data
    validators_map = {}
    for range_str, client in validators_data.items():
        if isinstance(range_str, str) and '-' in range_str:
            start, end = map(int, range_str.split('-'))
            for validator_index in range(start, end + 1):
                validators_map[validator_index] = client
        elif isinstance(range_str, int):
            # Handle single validator case
            validators_map[range_str] = client
    
    return validators_map

def get_validator_entity(validator_index, validators_map):
    """Get the entity/client associated with a validator index."""
    if validator_index in validators_map:
        return validators_map[validator_index]
    return "unknown"

def get_validator_entities(validator_indices, validators_map):
    """Get entities for multiple validator indices."""
    return {idx: get_validator_entity(idx, validators_map) for idx in validator_indices}

def is_validator_from_entity(validator_index, entity, validators_map):
    """Check if a validator belongs to a specific entity."""
    return get_validator_entity(validator_index, validators_map) == entity

def get_validator_entity_sizes(validators_map):
    """Get the size of each entity."""
    return {entity: len(validators_map[entity]) for entity in validators_map}

# Load validators for the configured network
validators = load_validators(config.network)
print(f"Loaded {len(validators)} validators for {config.network}")


In [None]:
# We'll be loading in all the `elaborated_attestations` from the `canonical_beacon_elab_attestations` table
# canonical_beacon_elaborated_attestation
# #
# Contains elaborated attestations from beacon blocks.

# Availability
# #
# Data is partitioned daily on slot_start_date_time for the following networks:

# mainnet: 2020-12-01 to 2025-03-10
# holesky: 2023-09-23 to 2025-03-11
# sepolia: 2022-06-22 to 2025-03-10
# Examples
# #
# Parquet file
# Your Clickhouse
# EthPandaOps Clickhouse
# Columns
# #
# Name	Type	Description
# updated_date_time	DateTime	When this row was last updated
# block_slot	UInt32	The slot number of the block containing the attestation
# block_slot_start_date_time	DateTime	The wall clock time when the block slot started
# block_epoch	UInt32	The epoch number of the block containing the attestation
# block_epoch_start_date_time	DateTime	The wall clock time when the block epoch started
# position_in_block	UInt32	The position of the attestation in the block
# block_root	FixedString(66)	The root of the block containing the attestation
# validators	Array(UInt32)	Array of validator indices participating in the attestation
# committee_index	LowCardinality(String)	The index of the committee making the attestation
# beacon_block_root	FixedString(66)	The root of the beacon block being attested to
# slot	UInt32	The slot number being attested to
# slot_start_date_time	DateTime	**
# epoch	UInt32	**
# epoch_start_date_time	DateTime	**
# source_epoch	UInt32	The source epoch referenced in the attestation
# source_epoch_start_date_time	DateTime	The wall clock time when the source epoch started
# source_root	FixedString(66)	The root of the source checkpoint in the attestation
# target_epoch	UInt32	The target epoch referenced in the attestation
# target_epoch_start_date_time	DateTime	The wall clock time when the target epoch started
# target_root	FixedString(66)	The root of the target checkpoint in the attestation
# meta_client_name	LowCardinality(String)	Name of the client that generated the event
# meta_client_id	String	Unique Session ID of the client that generated the event. This changes every time the client is restarted.
# meta_client_version	LowCardinality(String)	Version of the client that generated the event
# meta_client_implementation	LowCardinality(String)	Implementation of the client that generated the event
# meta_client_os	LowCardinality(String)	Operating system of the client that generated the event
# meta_client_ip	Nullable(IPv6)	IP address of the client that generated the event
# meta_client_geo_city	LowCardinality(String)	City of the client that generated the event
# meta_client_geo_country	LowCardinality(String)	Country of the client that generated the event
# meta_client_geo_country_code	LowCardinality(String)	Country code of the client that generated the event
# meta_client_geo_continent_code	LowCardinality(String)	Continent code of the client that generated the event
# meta_client_geo_longitude	Nullable(Float64)	Longitude of the client that generated the event
# meta_client_geo_latitude	Nullable(Float64)	Latitude of the client that generated the event
# meta_client_geo_autonomous_system_number	Nullable(UInt32)	Autonomous system number of the client that generated the event
# meta_client_geo_autonomous_system_organization	Nullable(String)	Autonomous system organization of the client that generated the event
# meta_network_id	Int32	Ethereum network ID
# meta_network_name	LowCardinality(String)	Ethereum network name
# meta_consensus_version	LowCardinality(String)	Ethereum consensus client version that generated the event
# meta_consensus_version_major	LowCardinality(String)	Ethereum consensus client major version that generated the event
# meta_consensus_version_minor	LowCardinality(String)	Ethereum consensus client minor version that generated the event
# meta_consensus_version_patch	LowCardinality(String)	Ethereum consensus client patch version that generated the event
# meta_consensus_implementation	LowCardinality(String)	Ethereum consensus client implementation that generated the event
# meta_labels	Map(String, String)	Labels associated with the event

import polars as pl
import concurrent.futures
from sqlalchemy import text # Make sure text is imported if not already

def fetch_attestations_for_time_range(time_range, network):
    start_date, end_date = time_range
    print(f"Loading attestations for time range: {start_date} to {end_date} for network: {network}")
    
    attestations_query = text("""
        SELECT
            beacon_block_root,
            block_slot,
            block_slot_start_date_time,
            committee_index,
            slot,
            slot_start_date_time,
            epoch,
            position_in_block,
            validator
        FROM (
            SELECT
                beacon_block_root,
                block_slot,
                block_slot_start_date_time,
                committee_index,
                slot,
                slot_start_date_time,
                epoch,
                position_in_block,
                validator,
                min(block_slot) OVER (PARTITION BY validator, epoch) as first_block_slot
            FROM (
                SELECT
                    beacon_block_root,
                    block_slot,
                    block_slot_start_date_time,
                    committee_index,
                    slot,
                    slot_start_date_time,
                    epoch,
                    position_in_block,
                    validator
                FROM canonical_beacon_elaborated_attestation
                ARRAY JOIN validators AS validator
                WHERE
                    block_epoch_start_date_time BETWEEN toDateTime(:start_date, 'UTC') AND toDateTime(:end_date, 'UTC')
                    AND meta_network_name = :network
            )
        )
        WHERE block_slot = first_block_slot
        ORDER BY block_slot ASC, position_in_block ASC, validator ASC
    """)

    # Assuming 'connection' is defined globally or passed appropriately
    result = connection.execute(attestations_query, {
        "start_date": start_date.replace('Z', ''), 
        "end_date": end_date.replace('Z', ''), 
        "network": network
    })
    
    data = result.fetchall()
    
    # Define the schema explicitly using Polars dtypes
    schema = {
        'beacon_block_root': pl.Utf8, 
        'block_slot': pl.UInt32, 
        'block_slot_start_date_time': pl.Datetime, 
        'committee_index': pl.Utf8, # Or pl.Categorical if preferred
        'slot': pl.UInt32, 
        'slot_start_date_time': pl.Datetime,
        'epoch': pl.UInt32, 
        'position_in_block': pl.UInt32, 
        'validator': pl.UInt32
    }
    column_names = list(schema.keys())

    # Create DataFrame with explicit schema
    # Polars can infer schema from data, but explicit is often better.
    # If data is empty, this creates an empty DataFrame with the correct columns and types.
    df = pl.DataFrame(data, schema=schema)
    
    print(f"Loaded {len(df)} attestations from time window {start_date} to {end_date}")
    return df

# Load all elaborated attestations for the time period in parallel
# Assuming 'config' and 'validators' are defined globally or passed appropriately
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = [executor.submit(fetch_attestations_for_time_range, time_range, config.network) 
               for time_range in config.time_ranges]
    
    attestation_dfs = [future.result() for future in concurrent.futures.as_completed(futures)]

# Combine all dataframes
all_attestations = pl.concat(attestation_dfs) if attestation_dfs else pl.DataFrame({name: pl.Series(name=name, dtype=dtype) for name, dtype in schema.items()})


# Add validator entity information only if there are attestations
if not all_attestations.is_empty():
    validators_dict = {k: v for k, v in validators.items()} # Assuming 'validators' is loaded
    all_attestations = all_attestations.with_columns(
        pl.col('validator').map_elements(lambda x: get_validator_entity(x, validators_dict), return_dtype=pl.Utf8).alias('entity')
    )
    print(f"Loaded {len(all_attestations)} attestations for {all_attestations.select(pl.col('validator').n_unique())[0, 0]} unique validators")
else:
    # Ensure the 'entity' column exists even if empty
    all_attestations = all_attestations.with_columns(pl.lit(None, dtype=pl.Utf8).alias('entity'))
    print("Loaded 0 attestations.")


In [7]:
metrics = {
    "Attestation Performance Per Slot": {
        "description": (),
        "metrics": {
            "correct_head_percentage": "The percentage of attestations that have the correct head.",
        }
    },
    "Attestation Performance Per Attestation": {
        "description": (),
        "metrics": {
            "head_correct": "Whether the attestation has the correct head.",
        }
    }
}

def get_full_metric_description(metric_name):
    """
    Combines the top-level description with the specific metric description.
    
    Args:
        metric_name: The name of the metric to get the description for
        
    Returns:
        A combined description string or None if metric not found
    """
    for category, data in metrics.items():
        if metric_name in data["metrics"]:
            return f"{data['description']} {data['metrics'][metric_name]}"
    return None

In [8]:
import polars as pl
from sqlalchemy import text
import pandas as pd # Keep pandas for date manipulation if needed

# Assume 'all_attestations' is already a Polars DataFrame from the previous cell
# Assume 'validators' is a dictionary {validator_index: entity_name}
# Assume 'connection' is a valid SQLAlchemy connection
# Assume 'config' has 'time_ranges' and 'network'
# Assume 'event_date' is defined, e.g., event_date = "2025-03-28T00:00:00Z"
# Assume 'event_date_naive' is defined (datetime without tzinfo)

# Assign all_attestations directly as it's already a Polars DataFrame
all_attestations_pl = all_attestations
print(f"Using existing Polars DataFrame with {len(all_attestations_pl)} attestations.")

# Convert validators map to a Polars DataFrame for joining
print("Creating validators Polars DataFrame...")
try:
    validators_df = pl.DataFrame({
        'validator': [int(k) for k in validators.keys()],
        'entity': list(validators.values())
    }).with_columns(pl.col('validator').cast(pl.UInt32)) # Match attestation validator type if needed
    print(f"Validators DataFrame created with {len(validators_df)} entries.")
except Exception as e:
    print(f"Error creating validators DataFrame: {e}. Ensure validator keys are numeric.")
    # Handle error or create an empty DF
    validators_df = pl.DataFrame({'validator': pl.Series([], dtype=pl.UInt32), 'entity': pl.Series([], dtype=pl.Utf8)})


# Fetch beacon blocks using Polars
def fetch_beacon_blocks_pl(time_ranges, network, connection):
    all_beacon_blocks_pl = []
    for start_date, end_date in time_ranges:
        # Using pandas for date padding for simplicity
        padded_start_date = (pd.to_datetime(start_date) - pd.Timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%S')
        padded_end_date = (pd.to_datetime(end_date) + pd.Timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%S')

        beacon_blocks_query = text("""
            SELECT
                slot,
                block_root,
                proposer_index,
                slot_start_date_time -- Need this for period calculation later
            FROM canonical_beacon_block FINAL
            WHERE
                slot_start_date_time BETWEEN toDateTime(:start_date, 'UTC') AND toDateTime(:end_date, 'UTC')
                AND meta_network_name = :network
            ORDER BY slot ASC
        """)

        try:
            # Using pandas read_sql as a bridge
            # TODO: Explore direct Polars read_sql if connector supports it well
            beacon_blocks_pd = pd.read_sql(
                beacon_blocks_query,
                connection.engine, # Assuming connection has an engine attribute
                params={"start_date": padded_start_date.replace('Z', ''), "end_date": padded_end_date.replace('Z', ''), "network": network}
            )
            # Convert timestamp to Polars Datetime with UTC timezone explicitly
            beacon_blocks = pl.from_pandas(beacon_blocks_pd).with_columns(
                pl.col("slot_start_date_time").dt.replace_time_zone("UTC")
            )
            all_beacon_blocks_pl.append(beacon_blocks)
            print(f"Fetched {len(beacon_blocks)} beacon blocks for range {start_date} to {end_date}")
        except Exception as e:
             print(f"Error fetching beacon blocks for range {start_date} to {end_date}: {e}")


    if not all_beacon_blocks_pl:
        # Return an empty DataFrame with the correct schema if no data was fetched
        return pl.DataFrame({
            'slot': pl.Series([], dtype=pl.UInt64),
            'block_root': pl.Series([], dtype=pl.Utf8),
            'proposer_index': pl.Series([], dtype=pl.UInt64),
            'slot_start_date_time': pl.Series([], dtype=pl.Datetime(time_unit="us", time_zone="UTC")) # Match type
        })

    return pl.concat(all_beacon_blocks_pl)

print("Fetching beacon blocks...")
beacon_blocks_pl = fetch_beacon_blocks_pl(config.time_ranges, config.network, connection)

# Cast proposer_index if necessary (adjust type based on actual data)
beacon_blocks_pl = beacon_blocks_pl.with_columns(
    pl.col("proposer_index").cast(pl.UInt32) # Match validator index type
)

# Add proposer entity by joining with validators_df
# Rename columns to distinguish proposer info
beacon_blocks_pl = beacon_blocks_pl.join(
    validators_df.rename({"validator": "proposer_index", "entity": "proposer_entity"}),
    on="proposer_index",
    how="left"
)
# Set unknown entities to 'unknown'
beacon_blocks_pl = beacon_blocks_pl.with_columns(
    pl.col("proposer_entity").fill_null("unknown")
)
print(f"Beacon blocks DataFrame created with {len(beacon_blocks_pl)} entries.")
print(beacon_blocks_pl.head())


# Prepare canonical block roots DataFrame for joining
canonical_blocks_df = beacon_blocks_pl.select(
    pl.col("slot").alias("canonical_slot"), # Alias to avoid name collision if joining on 'slot' later
    pl.col("block_root").alias("canonical_block_root")
)
print(f"Canonical block roots DataFrame created with {len(canonical_blocks_df)} entries.")


# Enrich attestations DataFrame (using Polars throughout)
print("Enriching attestations DataFrame...")

# Ensure correct types in all_attestations_pl before joining
# Assuming 'block_slot_start_date_time' exists from the initial load or previous steps
# If 'block_slot_start_date_time' is not already datetime, cast it here. Example:
# all_attestations_pl = all_attestations_pl.with_columns(
#     pl.col('block_slot_start_date_time').str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S%.f").dt.replace_time_zone("UTC")
# )

# Ensure types for join keys match
all_attestations_pl = all_attestations_pl.with_columns(
    pl.col('validator').cast(pl.UInt32),
    pl.col('slot').cast(pl.UInt64), # Match canonical_slot type
    pl.col('block_slot').cast(pl.UInt64) # Match beacon_blocks_pl slot type
)

# 1. Add attester entity
enriched_attestations = all_attestations_pl.join(
    validators_df.rename({"entity": "attester_entity"}),
    on="validator",
    how="left"
)

# Set unknown entities to 'unknown'
enriched_attestations = enriched_attestations.with_columns(
    pl.col("attester_entity").fill_null("unknown")
)

# 2. Add canonical block root for the attested slot
enriched_attestations = enriched_attestations.join(
    canonical_blocks_df,
    left_on="slot",
    right_on="canonical_slot",
    how="left"
)

# 3. Add proposer info for the block the attestation was included in
# Also add block_slot_start_date_time from beacon_blocks_pl
enriched_attestations = enriched_attestations.join(
    beacon_blocks_pl.select(
        "slot", "proposer_index", "proposer_entity", "slot_start_date_time"
    ).rename({
        "slot": "block_slot", # Join on the block slot containing the attestation
        "proposer_index": "block_proposer_index",
        "proposer_entity": "block_proposer_entity",
        "slot_start_date_time": "block_slot_start_date_time_canonical" # Get canonical time for the block
    }),
    on="block_slot",
    how="left"
)

# Set unknown block proposer entities to 'unknown'
enriched_attestations = enriched_attestations.with_columns(
    pl.col("block_proposer_entity").fill_null("unknown")
)

# 4. Calculate head_correct, head_timely, and inclusion_distance
enriched_attestations = enriched_attestations.with_columns(
    (pl.col('beacon_block_root') == pl.col('canonical_block_root')).alias('head_correct'),
    (pl.col('block_slot') - pl.col('slot')).alias('inclusion_distance')
).with_columns(
    (pl.col('head_correct') & (pl.col('inclusion_distance') == 1)).alias('head_timely')
)

# 5. Add period column (before/after event) using the canonical block time
# Ensure event_date_naive is a naive datetime object compatible with Polars comparison
enriched_attestations = enriched_attestations.with_columns(
    pl.when(pl.col('block_slot_start_date_time_canonical').dt.replace_time_zone(None) < event_date_naive)
    .then(pl.lit("before"))
    .otherwise(pl.lit("after"))
    .alias("period")
)

# Finally sort them by block_slot_start_date_time_canonical
enriched_attestations = enriched_attestations.sort("block_slot_start_date_time")



print("Attestations DataFrame enriched.")
print(enriched_attestations.head())


In [9]:

# --- Calculate Aggregated Metrics ---

# # 1. Slot Metrics (Performance per slot)
# print("Calculating slot metrics...")
# slot_metrics_df = enriched_attestations.group_by("block_slot").agg(
#     pl.len().alias("total_attestations"),
#     pl.sum("head_correct").alias("correct_head_count"),
#     pl.first("block_slot_start_date_time"),
#     # Keep proposer info associated with the block_slot
#     pl.first("block_proposer_index").alias("proposer_index"),
#     pl.first("block_proposer_entity").alias("proposer_entity")
# ).with_columns(
#     (pl.col("correct_head_count") / pl.col("total_attestations") * 100)
#     .fill_null(0.0) # Handle division by zero if total_attestations is 0
#     .alias("correct_head_percentage")
# ).sort("block_slot")

# print("Slot metrics calculated.")
# print(slot_metrics_df.head())


# # 2. Attester Entity Performance (Performance per attesting entity)
# print("Calculating attester entity performance...")
# attester_entity_performance_df = enriched_attestations.group_by("attester_entity").agg(
#     pl.len().alias("total_attestations"),
#     pl.sum("head_correct").alias("correct_head_count"),
#     pl.mean("head_correct").alias("avg_correct_head_percentage_calc") * 100, # Calculate mean directly
#     pl.sum("head_timely").alias("timely_head_count"),
#     pl.mean("head_timely").alias("avg_timely_head_percentage_calc") * 100, # Calculate mean directly
#     pl.min("block_slot_start_date_time").alias("first_attestation_time"), # Use min/max for time range
#     pl.max("block_slot_start_date_time").alias("last_attestation_time")
# ).rename({ # Rename calculated columns for clarity
#     "avg_correct_head_percentage_calc": "avg_correct_head_percentage",
#     "avg_timely_head_percentage_calc": "avg_timely_head_percentage"
# }).sort("avg_correct_head_percentage", descending=True)


# print("Attester entity performance calculated.")
# print(attester_entity_performance_df.head())


# # 3. Proposer Entity Performance (Accuracy of attestations in blocks proposed by entity)
# print("Calculating proposer entity performance metrics...")

# # Handle event_date properly to avoid timezone issues
# try:
#     # Convert to naive datetime if it's timezone-aware
#     if isinstance(event_date, pd.Timestamp) and event_date.tzinfo is not None:
#         event_date_naive = event_date.tz_localize(None)
#     else:
#         event_date_naive = pd.to_datetime(event_date)
    
#     # Create period column based on comparison with naive datetime
#     slot_metrics_with_period = slot_metrics_df.with_columns(
#         pl.when(pl.col("block_slot_start_date_time") < pl.lit(event_date_naive))
#         .then(pl.lit("Before"))
#         .otherwise(pl.lit("After"))
#         .alias("period")
#     )
#     print(f"Period column created using event date: {event_date_naive}")
# except Exception as e:
#     # If event_date processing fails, create period column with a default value
#     slot_metrics_with_period = slot_metrics_df.with_columns(pl.lit("After").alias("period"))
#     print(f"Using default 'After' period due to event_date processing issue: {e}")


# # Group by proposer entity and period
# proposer_entity_performance = slot_metrics_with_period.group_by(["proposer_entity", "period"]).agg(
#     pl.len().alias("total_blocks_proposed"), # Count slots = count blocks proposed
#     pl.mean("correct_head_percentage").alias("avg_head_vote_accuracy"),
#     pl.min("correct_head_percentage").alias("min_head_vote_accuracy"),
#     pl.max("correct_head_percentage").alias("max_head_vote_accuracy"),
#     pl.std("correct_head_percentage").alias("std_head_vote_accuracy")
# )

# # Add timely head metrics (aggregated by attester entity) - replicating original logic's merge
# # Note: This merges average *attester* timeliness onto the *proposer* performance summary.
# # Consider if a different aggregation (e.g., timeliness of attestations *within* proposed blocks) is desired.
# timely_entity_performance_agg = attester_entity_performance_df.select(
#     pl.col("attester_entity").alias("proposer_entity"), # Rename to match join key
#     "timely_head_count",
#     "avg_timely_head_percentage"
# )

# proposer_entity_performance = proposer_entity_performance.join(
#     timely_entity_performance_agg,
#     on="proposer_entity",
#     how="left"
# )


# # Sort by period and average head vote accuracy
# proposer_entity_performance = proposer_entity_performance.sort(
#     ["period", "avg_head_vote_accuracy"], descending=[False, True]
# )

# print("Proposer entity performance calculated.")
# print("Head vote accuracy and timely head metrics for blocks proposed by each entity (before and after event):")
# print(proposer_entity_performance)

# # Display the main dataframes (optional, depends on notebook context)
# # print("\nEnriched Attestations Head:")
# # print(enriched_attestations.head())
# # print("\nSlot Metrics Head:")
# # print(slot_metrics_df.head())
# # print("\nAttester Entity Performance Head:")
# # print(attester_entity_performance_df.head())
# # print("\nProposer Entity Performance:")
# # print(proposer_entity_performance)


In [10]:
# Create interactive visualization for head timely percentage by attester entity, split by period
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Calculate head_timely percentage per attester_entity and period
attester_performance = enriched_attestations.group_by(["attester_entity", "period"]).agg(
    pl.sum("head_timely").alias("timely_head_count"),
    pl.count("head_timely").alias("total_attestations")
).with_columns(
    (pl.col("timely_head_count") / pl.col("total_attestations") * 100).alias("head_timely_percentage")
)

# Calculate overall average per period
overall_avg = attester_performance.group_by("period").agg(
    pl.mean("head_timely_percentage").alias("overall_average")
)

# Convert to pandas for plotting
attester_data = attester_performance.to_pandas()
overall_avg_data = overall_avg.to_pandas()

# Get top 25 entities by attestation count
top_entities = attester_performance.group_by("attester_entity").agg(
    pl.sum("total_attestations").alias("attestation_count")
).sort("attestation_count", descending=True)["attester_entity"].to_list()

# Filter data for top entities
top_entities_data = attester_data[attester_data["attester_entity"].isin(top_entities)]

# Create a summary table with before/after values
summary_table = top_entities_data.pivot_table(
    index='attester_entity',
    columns='period',
    values='head_timely_percentage',
    aggfunc='mean'
).reset_index()

# Add network average to the summary table
network_avg_row = pd.DataFrame({
    'attester_entity': ['network average'],
    'before': [overall_avg_data[overall_avg_data['period'] == 'before']['overall_average'].values[0]],
    'after': [overall_avg_data[overall_avg_data['period'] == 'after']['overall_average'].values[0]]
})
summary_table = pd.concat([summary_table, network_avg_row])

# Create interactive subplot figure
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=("Before", "After"),
    shared_yaxes=True,
    horizontal_spacing=0.05 # Add a bit more space between plots
)

# Colors for entities
colors = px.colors.qualitative.Plotly

# For each period (before/after)
for i, period in enumerate(["before", "after"]):
    col = i + 1  # Plotly is 1-indexed

    # Filter data for this period
    period_data = top_entities_data[top_entities_data["period"] == period]

    # Add scatter plot for each entity
    for j, entity in enumerate(top_entities):
        entity_data = period_data[period_data["attester_entity"] == entity]
        if not entity_data.empty:
            # Get before/after values for legend name
            before_val = summary_table[summary_table['attester_entity'] == entity]['before'].values[0] if 'before' in summary_table.columns else np.nan
            after_val = summary_table[summary_table['attester_entity'] == entity]['after'].values[0] if 'after' in summary_table.columns else np.nan

            # Format legend name with values
            legend_name = f"{entity} (Before: {before_val:.2f}%, After: {after_val:.2f}%)"

            fig.add_trace(
                go.Scatter(
                    x=[entity] * len(entity_data),
                    y=entity_data["head_timely_percentage"],
                    mode='markers',
                    name=legend_name,
                    marker=dict(
                        size=10, # Slightly smaller markers
                        color=colors[j % len(colors)],
                        opacity=0.7
                    ),
                    hovertemplate=
                    '<b>%{text}</b><br>' +
                    'Head Timely: %{y:.2f}%<br>' +
                    '<extra></extra>',
                    text=[entity] * len(entity_data),
                    showlegend=(col == 2)  # Only show legend for the second plot
                ),
                row=1, col=col
            )

    # Add a line for the overall average
    overall_avg_value = overall_avg_data[overall_avg_data["period"] == period]["overall_average"].values[0]

    # Get both before/after values for network average legend
    before_avg = overall_avg_data[overall_avg_data["period"] == "before"]["overall_average"].values[0]
    after_avg = overall_avg_data[overall_avg_data["period"] == "after"]["overall_average"].values[0]
    network_legend = f"Network Average (Before: {before_avg:.2f}%, After: {after_avg:.2f}%)"

    fig.add_trace(
        go.Scatter(
            x=[entity for entity in top_entities],
            y=[overall_avg_value] * len(top_entities),
            mode='lines',
            line=dict(color='red', width=2, dash='dash'),
            name=network_legend,
            showlegend=(col == 2)  # Only show legend for the second plot
        ),
        row=1, col=col
    )
# Update layout for main figure, show the title in the middle
fig.update_layout(
    # autosize=True, # Disable autosize to set manual dimensions
    width=1400, # Increase width
    height=700, # Increase height
    hovermode="closest",
    legend=dict(
        yanchor="top",
        y=-0.5, # Move legend further down
        xanchor="center",
        x=0.5,
        orientation="h"
    ),
    title=dict(
        text="Hoodi Head Timely Percentage by Attester Entity<br><sup>Shows the percentage of correct head attestations that were immediately included in the next block<br>Grouped by attester entity, split by period (before/after event Electra fork)</sup>",
        x=0.5,  # Center the title
        xanchor="center",
        y=0.95,  # Move title down a bit to create more padding
        pad=dict(t=20)  # Add padding between title and plot
    ),
    margin=dict(l=50, r=50, t=120, b=150) # Increased top margin for more space between title and plots
)

# Update axes - always show y ticks
fig.update_yaxes(
    title_text="Head Timely Percentage (%)",
    range=[0, 100],
    gridcolor='lightgray',
    showticklabels=True,
    tickmode='linear',
    tick0=0,
    dtick=5
)
# Rotate x-axis labels to prevent overlap
fig.update_xaxes(
    showticklabels=True,
    tickangle=90 # Rotate labels
)

# Add logo to the top right corner
ethpandaops_path = "../../assets/content/ethpandaops.png"
fig.add_layout_image(
    dict(
        source=ethpandaops_path,
        xref="paper", yref="paper",
        x=0.05, y=1.1,
        sizex=0.35, sizey=0.35,
        xanchor="right", yanchor="bottom",
        opacity=1,
        layer="above"
    )
)

xatu_path = "../../assets/content/xatu.png"
fig.add_layout_image(
    dict(
        source=xatu_path,
        xref="paper", yref="paper",
        x=1.0, y=1.1,
        sizex=0.35, sizey=0.35,
        xanchor="right", yanchor="bottom",
        opacity=1,
        layer="above"
    )
)   

# Display the interactive plot
fig.show()

In [11]:
# Create interactive visualization for head timely percentage by proposer entity, split by period
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Calculate head_timely percentage per proposer_entity and period
proposer_performance = enriched_attestations.group_by(["block_proposer_entity", "period"]).agg(
    pl.sum("head_timely").alias("timely_head_count"),
    pl.count("head_timely").alias("total_attestations")
).with_columns(
    (pl.col("timely_head_count") / pl.col("total_attestations") * 100).alias("head_timely_percentage")
)

# Calculate overall average per period
overall_avg = proposer_performance.group_by("period").agg(
    pl.mean("head_timely_percentage").alias("overall_average")
)

# Convert to pandas for plotting
proposer_data = proposer_performance.to_pandas()
overall_avg_data = overall_avg.to_pandas()

# Get top 25 entities by attestation count
top_entities = proposer_performance.group_by("block_proposer_entity").agg(
    pl.sum("total_attestations").alias("attestation_count")
).sort("attestation_count", descending=True)["block_proposer_entity"].to_list()

# Filter data for top entities
top_entities_data = proposer_data[proposer_data["block_proposer_entity"].isin(top_entities)]

# Create a summary table with before/after values
summary_table = top_entities_data.pivot_table(
    index='block_proposer_entity',
    columns='period',
    values='head_timely_percentage',
    aggfunc='mean'
).reset_index()

# Add network average to the summary table
network_avg_row = pd.DataFrame({
    'block_proposer_entity': ['network average'],
    'before': [overall_avg_data[overall_avg_data['period'] == 'before']['overall_average'].values[0]],
    'after': [overall_avg_data[overall_avg_data['period'] == 'after']['overall_average'].values[0]]
})
summary_table = pd.concat([summary_table, network_avg_row])

# Create interactive subplot figure
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=("Before", "After"),
    shared_yaxes=True,
    horizontal_spacing=0.05 # Add a bit more space between plots
)

# Colors for entities
colors = px.colors.qualitative.Plotly

# For each period (before/after)
for i, period in enumerate(["before", "after"]):
    col = i + 1  # Plotly is 1-indexed

    # Filter data for this period
    period_data = top_entities_data[top_entities_data["period"] == period]

    # Add scatter plot for each entity
    for j, entity in enumerate(top_entities):
        entity_data = period_data[period_data["block_proposer_entity"] == entity]
        if not entity_data.empty:
            # Get before/after values for legend name
            before_val = summary_table[summary_table['block_proposer_entity'] == entity]['before'].values[0] if 'before' in summary_table.columns else np.nan
            after_val = summary_table[summary_table['block_proposer_entity'] == entity]['after'].values[0] if 'after' in summary_table.columns else np.nan

            # Format legend name with values
            legend_name = f"{entity} (Before: {before_val:.2f}%, After: {after_val:.2f}%)"

            fig.add_trace(
                go.Scatter(
                    x=[entity] * len(entity_data),
                    y=entity_data["head_timely_percentage"],
                    mode='markers',
                    name=legend_name,
                    marker=dict(
                        size=10, # Slightly smaller markers
                        color=colors[j % len(colors)],
                        opacity=0.7
                    ),
                    hovertemplate=
                    '<b>%{text}</b><br>' +
                    'Head Timely: %{y:.2f}%<br>' +
                    '<extra></extra>',
                    text=[entity] * len(entity_data),
                    showlegend=(col == 2)  # Only show legend for the second plot
                ),
                row=1, col=col
            )

    # Add a line for the overall average
    overall_avg_value = overall_avg_data[overall_avg_data["period"] == period]["overall_average"].values[0]

    # Get both before/after values for network average legend
    before_avg = overall_avg_data[overall_avg_data["period"] == "before"]["overall_average"].values[0]
    after_avg = overall_avg_data[overall_avg_data["period"] == "after"]["overall_average"].values[0]
    network_legend = f"Network Average (Before: {before_avg:.2f}%, After: {after_avg:.2f}%)"

    fig.add_trace(
        go.Scatter(
            x=[entity for entity in top_entities],
            y=[overall_avg_value] * len(top_entities),
            mode='lines',
            line=dict(color='red', width=2, dash='dash'),
            name=network_legend,
            showlegend=(col == 2)  # Only show legend for the second plot
        ),
        row=1, col=col
    )
# Update layout for main figure, show the title in the middle
fig.update_layout(
    # autosize=True, # Disable autosize to set manual dimensions
    width=1400, # Increase width
    height=700, # Increase height
    hovermode="closest",
    legend=dict(
        yanchor="top",
        y=-0.5, # Move legend further down
        xanchor="center",
        x=0.5,
        orientation="h"
    ),
    title=dict(
        text="Hoodi Head Timely Percentage by Block Proposer Entity<br><sup>Shows the percentage of correct head attestations that were immediately included in the block by the proposer<br>Grouped by proposer entity, split by period (before/after event Electra fork)</sup>",
        x=0.5,  # Center the title
        xanchor="center",
        y=0.95,  # Move title down a bit to create more padding
        pad=dict(t=20)  # Add padding between title and plot
    ),
    margin=dict(l=50, r=50, t=120, b=150) # Increased top margin for more space between title and plots
)

# Update axes - always show y ticks
fig.update_yaxes(
    title_text="Head Timely Percentage (%)",
    range=[0, 100],
    gridcolor='lightgray',
    showticklabels=True,
    tickmode='linear',
    tick0=0,
    dtick=5
)
# Rotate x-axis labels to prevent overlap
fig.update_xaxes(
    showticklabels=True,
    tickangle=90 # Rotate labels
)

# Add logo to the top right corner
ethpandaops_path = "../../assets/content/ethpandaops.png"
fig.add_layout_image(
    dict(
        source=ethpandaops_path,
        xref="paper", yref="paper",
        x=0.05, y=1.1,
        sizex=0.35, sizey=0.35,
        xanchor="right", yanchor="bottom",
        opacity=1,
        layer="above"
    )
)

xatu_path = "../../assets/content/xatu.png"
fig.add_layout_image(
    dict(
        source=xatu_path,
        xref="paper", yref="paper",
        x=1.0, y=1.1,
        sizex=0.25, sizey=0.25,
        xanchor="right", yanchor="bottom",
        opacity=1,
        layer="above"
    )
)   


# Display the interactive plot
fig.show()

In [12]:
# Create a plot showing average head timely percentage per attester entity per day

# Extract date from datetime
enriched_attestations = enriched_attestations.with_columns(
    pl.col("block_slot_start_date_time").dt.date().alias("date")
)

# Calculate head_timely percentage per attester_entity and date
daily_performance = enriched_attestations.group_by(["attester_entity", "date"]).agg(
    pl.sum("head_timely").alias("timely_head_count"),
    pl.len().alias("total_attestations")
).with_columns(
    (pl.col("timely_head_count") / pl.col("total_attestations") * 100).alias("head_timely_percentage")
)

# Calculate overall average per date
daily_overall_avg = daily_performance.group_by("date").agg(
    pl.mean("head_timely_percentage").alias("overall_average")
)

# Get entity sizes from validators map
entity_counts = {}
for validator_index, entity in validators.items():
    if entity in entity_counts:
        entity_counts[entity] += 1
    else:
        entity_counts[entity] = 1

# Sort entities by size and get top 15
top_entities = sorted(entity_counts.items(), key=lambda x: x[1], reverse=True)[:15]
top_15_entities = [entity for entity, count in top_entities]

# Convert to pandas for plotting
daily_data = daily_performance.to_pandas()
daily_avg_data = daily_overall_avg.to_pandas()

# Sort data by date to ensure lines are drawn correctly
daily_avg_data = daily_avg_data.sort_values("date")

# Create a subplot figure with one subplot per entity
fig_daily = make_subplots(
    rows=5, 
    cols=3,
    subplot_titles=[entity for entity in top_15_entities],
    vertical_spacing=0.1,
    horizontal_spacing=0.05
)

# Add a line for each top entity in its own subplot
for i, entity in enumerate(top_15_entities):
    row = (i // 3) + 1
    col = (i % 3) + 1
    
    entity_data = daily_data[daily_data["attester_entity"] == entity]
    if not entity_data.empty:
        # Sort by date to ensure proper line drawing
        entity_data = entity_data.sort_values("date")
        
        # Add entity line
        fig_daily.add_trace(
            go.Scatter(
                x=entity_data["date"],
                y=entity_data["head_timely_percentage"],
                mode='lines',
                name=entity,
                line=dict(color=colors[i % len(colors)]),
                showlegend=False,
                hovertemplate=
                '<b>%{text}</b><br>' +
                'Date: %{x}<br>' +
                'Head Timely: %{y:.2f}%<br>' +
                '<extra></extra>',
                text=[entity] * len(entity_data)
            ),
            row=row, col=col
        )
        
        # Add network average line to the same subplot
        fig_daily.add_trace(
            go.Scatter(
                x=daily_avg_data["date"],
                y=daily_avg_data["overall_average"],
                mode='lines',
                line=dict(color='red', width=2, dash='dash'),
                name='Network Average',
                showlegend=(i == 0),  # Only show in legend once
                hovertemplate=
                '<b>Network Average</b><br>' +
                'Date: %{x}<br>' +
                'Head Timely: %{y:.2f}%<br>' +
                '<extra></extra>',
            ),
            row=row, col=col
        )
        
        # Convert event_date to timestamp for plotly
        event_date_timestamp = event_date.timestamp() * 1000  # Convert to milliseconds for plotly
        
        # Add vertical line for Electra fork event using timestamp
        fig_daily.add_vline(
            x=event_date_timestamp,
            line=dict(color="green", width=2, dash="dash"),
            annotation_text="Electra Fork",
            annotation_position="top right",
            row=row, col=col
        )
        
        # Set y-axis range for each subplot
        fig_daily.update_yaxes(range=[0, 100], row=row, col=col)

# Update layout
fig_daily.update_layout(
    width=1600,
    height=1200,
    title=dict(
        text="Daily Head Timely Percentage by Top Attester Entities vs Network Average<br><sup>Shows the percentage of correct head attestations that were immediately included in the next block over time</sup>",
        x=0.5,
        xanchor="center",
        y=0.98
    ),
    hovermode="closest",
    legend=dict(
        yanchor="top",
        y=-0.05,
        xanchor="center",
        x=0.5,
        orientation="h"
    ),
    margin=dict(l=50, r=50, t=120, b=100)
)

# Update all x-axes
fig_daily.update_xaxes(
    tickangle=45,
    gridcolor='lightgray'
)

# Update all y-axes
fig_daily.update_yaxes(
    title_text="Head Timely %",
    gridcolor='lightgray',
    tickmode='linear',
    tick0=0,
    dtick=20
)

# Add logos
fig_daily.add_layout_image(
    dict(
        source=ethpandaops_path,
        xref="paper", yref="paper",
        x=0.05, y=1.05,
        sizex=0.06, sizey=0.06,
        xanchor="right", yanchor="bottom",
        opacity=1,
        layer="above"
    )
)

fig_daily.add_layout_image(
    dict(
        source=xatu_path,
        xref="paper", yref="paper",
        x=1.0, y=1.05,
        sizex=0.06, sizey=0.06,
        xanchor="right", yanchor="bottom",
        opacity=1,
        layer="above"
    )
)

# Display the interactive plot
fig_daily.show()


In [None]:
# Create a plot showing average head timely percentage per attester entity per day

# Extract date from datetime
enriched_attestations = enriched_attestations.with_columns(
    pl.col("block_slot_start_date_time").dt.date().alias("date")
)

# Calculate head_timely percentage per block_proposer_entity and date
daily_performance = enriched_attestations.group_by(["block_proposer_entity", "date"]).agg(
    pl.sum("head_timely").alias("timely_head_count"),
    pl.len().alias("total_attestations")
).with_columns(
    (pl.col("timely_head_count") / pl.col("total_attestations") * 100).alias("head_timely_percentage")
)

# Calculate overall average per date
daily_overall_avg = daily_performance.group_by("date").agg(
    pl.mean("head_timely_percentage").alias("overall_average")
)

# Get the top 15 entities by total attestation count
entity_attestation_counts = enriched_attestations.group_by("block_proposer_entity").agg(
    pl.len().alias("total_attestations")
).sort("total_attestations", descending=True).limit(15)

top_15_entities = entity_attestation_counts["block_proposer_entity"].to_list()

# Convert to pandas for plotting
daily_data = daily_performance.to_pandas()
daily_avg_data = daily_overall_avg.to_pandas()

# Sort data by date to ensure lines are drawn correctly
daily_avg_data = daily_avg_data.sort_values("date")

# Create a subplot figure with one subplot per entity
fig_daily = make_subplots(
    rows=5, 
    cols=3,
    subplot_titles=[entity for entity in top_15_entities],
    vertical_spacing=0.1,
    horizontal_spacing=0.05
)

# Add a line for each top entity in its own subplot
for i, entity in enumerate(top_15_entities):
    row = (i // 3) + 1
    col = (i % 3) + 1
    
    entity_data = daily_data[daily_data["block_proposer_entity"] == entity]
    if not entity_data.empty:
        # Sort by date to ensure proper line drawing
        entity_data = entity_data.sort_values("date")
        
        # Add entity line
        fig_daily.add_trace(
            go.Scatter(
                x=entity_data["date"],
                y=entity_data["head_timely_percentage"],
                mode='lines',
                name=entity,
                line=dict(color=colors[i % len(colors)]),
                showlegend=False,
                hovertemplate=
                '<b>%{text}</b><br>' +
                'Date: %{x}<br>' +
                'Head Timely: %{y:.2f}%<br>' +
                '<extra></extra>',
                text=[entity] * len(entity_data)
            ),
            row=row, col=col
        )
        
        # Add network average line to the same subplot
        fig_daily.add_trace(
            go.Scatter(
                x=daily_avg_data["date"],
                y=daily_avg_data["overall_average"],
                mode='lines',
                line=dict(color='red', width=2, dash='dash'),
                name='Network Average',
                showlegend=(i == 0),  # Only show in legend once
                hovertemplate=
                '<b>Network Average</b><br>' +
                'Date: %{x}<br>' +
                'Head Timely: %{y:.2f}%<br>' +
                '<extra></extra>',
            ),
            row=row, col=col
        )
        
        # Convert event_date to timestamp for plotly
        event_date_timestamp = event_date.timestamp() * 1000  # Convert to milliseconds for plotly
        
        # Add vertical line for Electra fork event using timestamp
        fig_daily.add_vline(
            x=event_date_timestamp,
            line=dict(color="green", width=2, dash="dash"),
            annotation_text="Electra Fork",
            annotation_position="top right",
            row=row, col=col
        )
        
        # Set y-axis range for each subplot
        fig_daily.update_yaxes(range=[0, 100], row=row, col=col)

# Update layout
fig_daily.update_layout(
    width=1600,
    height=1200,
    title=dict(
        text="Daily Head Timely Percentage by Top Block Proposers Entities vs Network Average<br><sup>Shows the percentage of correct head attestations that the block proposer immediately included in the next block over time</sup>",
        x=0.5,
        xanchor="center",
        y=0.98
    ),
    hovermode="closest",
    legend=dict(
        yanchor="top",
        y=-0.05,
        xanchor="center",
        x=0.5,
        orientation="h"
    ),
    margin=dict(l=50, r=50, t=120, b=100)
)

# Update all x-axes
fig_daily.update_xaxes(
    tickangle=45,
    gridcolor='lightgray'
)

# Update all y-axes
fig_daily.update_yaxes(
    title_text="Head Timely %",
    gridcolor='lightgray',
    tickmode='linear',
    tick0=0,
    dtick=20
)

# Add logos
fig_daily.add_layout_image(
    dict(
        source=ethpandaops_path,
        xref="paper", yref="paper",
        x=0.05, y=1.05,
        sizex=0.06, sizey=0.06,
        xanchor="right", yanchor="bottom",
        opacity=1,
        layer="above"
    )
)

fig_daily.add_layout_image(
    dict(
        source=xatu_path,
        xref="paper", yref="paper",
        x=1.0, y=1.05,
        sizex=0.06, sizey=0.06,
        xanchor="right", yanchor="bottom",
        opacity=1,
        layer="above"
    )
)

# Display the interactive plot
fig_daily.show()
