# Xatu Public Contributors Analysis

This notebook fetches and analyzes public contributor data from Clickhouse and generates JSON files for the frontend.

In [1]:
import os
from datetime import datetime, timedelta
import pandas as pd
from pathlib import Path
from lib import Lab

# Initialize lab
lab = Lab('xatu-public-contributors', '../config.yaml')
lab.setup()
lab.setup_pandaops_clickhouse()
log = lab.log

# Get notebook specific config
notebook_config = lab.get_notebook_config()

writer = lab.get_data_writer()

pandaops_clickhouse_client = lab.get_pandaops_clickhouse_client()

log.info("Good to go!")

2025-01-09 12:16:35,391 - xatu-public-contributors - INFO - Good to go!


In [2]:
xatu_public_contributors_config = lab.get_notebook_config().as_xatu_public_contributors()
xatu_public_contributors_config


XatuPublicContributors(time_windows=[TimeWindow(file='last_30_days', step='1d', label='Last 30d', range='-30d'), TimeWindow(file='last_1_day', step='1h', label='Last 1d', range='-1d'), TimeWindow(file='last_90_days', step='3d', label='Last 90d', range='-90d')], data_dir='../data/xatu-public-contributors', networks=['mainnet', 'sepolia', 'holesky'])

In [16]:
from sqlalchemy import text
from datetime import datetime, timezone

query = text("""
    WITH time_slots AS (
        SELECT 
            toStartOfInterval(slot_start_date_time, INTERVAL :step_seconds second) as time_slot,
            meta_client_geo_country as country,
            meta_network_name,
            count(distinct meta_client_name) AS total
        FROM beacon_api_eth_v1_events_block FINAL
        WHERE
            slot_start_date_time BETWEEN toDateTime(:start_date) AND toDateTime(:end_date)
            AND meta_client_name NOT LIKE 'ethpandaops%'
            AND meta_network_name IN (:networks)
            AND meta_client_name != ''
            AND meta_client_name IS NOT NULL
        GROUP BY time_slot, country, meta_network_name
    )
    SELECT
        toDate(time_slot) as time,
        country,
        meta_network_name,
        total
    FROM time_slots
""")

countries_by_window = {}

for window in xatu_public_contributors_config.time_windows:
    start_date, end_date = window.get_time_range(datetime.now(timezone.utc))
    step_seconds = window.get_step_seconds()
    
    # Format dates without microseconds for Clickhouse
    start_str = start_date.strftime('%Y-%m-%d %H:%M:%S')
    end_str = end_date.strftime('%Y-%m-%d %H:%M:%S')

    total_timesteps = (end_date - start_date).total_seconds() / step_seconds

    log.info(f"Fetching data for {window.file}, total timesteps: {total_timesteps}")
    
    result = pandaops_clickhouse_client.execute(
        query,
        {
            "start_date": start_str, 
            "end_date": end_str,
            "networks": xatu_public_contributors_config.networks,
            "step_seconds": step_seconds
        }
    )
    countries = result.fetchall()

    if len(countries) == 0:
        log.warning(f"No countries found for time window {window.file}")
        continue

    countries_by_window[window.file] = countries
    log.info(f"Found {len(countries)} countries for time window {window.file}")
    
    # Group by network and write separate files
    for network in xatu_public_contributors_config.networks:
        network_countries = [c for c in countries if c[2] == network]
        if not network_countries:
            continue
        
        # Group by timestamp
        time_grouped = []
        for c in network_countries:
            timestamp = int(datetime.combine(c[0], datetime.min.time()).timestamp())
            time_grouped.append({
                "time": timestamp,
                "countries": [{
                    "name": c[1],
                    "value": c[3]
                }]
            })
            
        # Merge entries with same timestamp
        merged = {}
        for entry in time_grouped:
            if entry["time"] not in merged:
                merged[entry["time"]] = entry
            else:
                merged[entry["time"]]["countries"].extend(entry["countries"])
                
        # Convert to list and write to file
        final_data = list(merged.values())
        
        # Write to single file per time window and network
        lab.write_json(f"countries/{network}/{window.file}.json", final_data)


2025-01-09 12:39:47,849 - xatu-public-contributors - INFO - Fetching data for last_30_days, total timesteps: 30.0


TypeError: Connection.execute() got an unexpected keyword argument 'columnar'