In [1]:
# ----------------------------
# Daily ETH gas fees from Dune
# Save under: data/raw/ethereum/gas_fees/eth_gas_fees_daily.parquet
# ----------------------------
from pathlib import Path
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import logging

# (Optional) quiet Dune client logs
# logging.getLogger("dune_client").setLevel(logging.WARNING)

# Dune client
from dune_client.client import DuneClient
from dune_client.query import QueryBase
from dune_client.types import QueryParameter

# ----------------------------
# Config
# ----------------------------
DATA_DIR   = Path("../data")
RAW_DIR    = DATA_DIR / "raw" / "ethereum" / "gas_fees"
RAW_DIR.mkdir(parents=True, exist_ok=True)

OUT_PATH   = RAW_DIR / "eth_gas_fees_daily.parquet"  # single canonical file (overwrites on rerun)
QUERY_ID   = 5697155                                 # Dune query
START_DATE = "2021-02-01 00:00:00"
END_DATE   = "2023-03-01 00:00:00"

# Load API key
load_dotenv("../config/.env")
dune = DuneClient(os.getenv("DUNE_API_KEY"))

# ----------------------------
# Run query
# ----------------------------
q = QueryBase(
    name="Daily ETH gas fees",
    query_id=QUERY_ID,
    params=[
        QueryParameter.date_type(name="start_date", value=START_DATE),
        QueryParameter.date_type(name="end_date",   value=END_DATE),
    ],
)
print("Results available at:", q.url())

fees = dune.run_query_dataframe(q)

# ----------------------------
# Clean types & columns
# ----------------------------
# Dune can return '<nil>' strings; coerce to NaN then cast.
fees = fees.replace({"<nil>": np.nan})

# Parse day to UTC timestamp and also a plain date column for convenience
fees["day"] = pd.to_datetime(fees["day"], utc=True, errors="coerce")
fees["date"] = fees["day"].dt.date

float_cols = [
    "median_base_fee_gwei",
    "median_priority_fee_gwei",
    "median_effective_gas_price_gwei",
    "median_total_fee_gwei",
]
for c in float_cols:
    if c in fees.columns:
        fees[c] = pd.to_numeric(fees[c], errors="coerce")

int_cols = ["n_blocks_with_basefee", "n_blocks", "n_txs"]
for c in int_cols:
    if c in fees.columns:
        fees[c] = pd.to_numeric(fees[c], errors="coerce").astype("Int64")

# Deterministic sort, then save
fees = fees.sort_values("day").reset_index(drop=True)
fees.to_parquet(OUT_PATH, index=False)

# ----------------------------
# Summary
# ----------------------------
print("[ETH Gas Fees] Saved ->", OUT_PATH)
print(f"rows={len(fees):,}  window={fees['date'].min()} → {fees['date'].max()}")
nulls = fees[float_cols + int_cols].isna().mean().mul(100).round(2)
print("[null % by column]")
print(nulls[nulls > 0].to_dict())

2025-09-11 23:48:06,681 INFO dune_client.api.base executing 5697155 on medium cluster


Results available at: https://dune.com/queries/5697155?start_date=2021-02-01+00%3A00%3A00&end_date=2023-03-01+00%3A00%3A00


2025-09-11 23:48:06,933 INFO dune_client.api.base waiting for query execution 01K4XC01S88S2R8W1TR833199H to complete: ExecutionState.PENDING (queue position: 84)
2025-09-11 23:48:07,977 INFO dune_client.api.base waiting for query execution 01K4XC01S88S2R8W1TR833199H to complete: ExecutionState.PENDING (queue position: 84)
2025-09-11 23:48:09,036 INFO dune_client.api.base waiting for query execution 01K4XC01S88S2R8W1TR833199H to complete: ExecutionState.EXECUTING
2025-09-11 23:48:10,071 INFO dune_client.api.base waiting for query execution 01K4XC01S88S2R8W1TR833199H to complete: ExecutionState.EXECUTING
2025-09-11 23:48:11,127 INFO dune_client.api.base waiting for query execution 01K4XC01S88S2R8W1TR833199H to complete: ExecutionState.EXECUTING
2025-09-11 23:48:12,163 INFO dune_client.api.base waiting for query execution 01K4XC01S88S2R8W1TR833199H to complete: ExecutionState.EXECUTING
2025-09-11 23:48:13,210 INFO dune_client.api.base waiting for query execution 01K4XC01S88S2R8W1TR833199H

[ETH Gas Fees] Saved -> ..\data\raw\ethereum\gas_fees\eth_gas_fees_daily.parquet
rows=758  window=2021-02-01 → 2023-02-28
[null % by column]
{'median_base_fee_gwei': 24.41, 'median_priority_fee_gwei': 24.41, 'median_total_fee_gwei': 24.41}
