In [1]:
# Standard library imports
import requests

# Third-party imports
import polars as pl

# Local imports
from op_analytics.datapipeline.chains.load import load_chain_metadata
from op_analytics.datasources.coingecko.price_data import CoinGeckoDataSource
from op_analytics.datasources.coingecko.dataaccess import CoinGecko
from op_analytics.coreutils.request import new_session

In [2]:
# 1. Load chain metadata to get token IDs
from op_analytics.datapipeline.chains.load import load_chain_metadata

# Load chain metadata
chain_metadata = load_chain_metadata()

# Get unique non-null CoinGecko API keys
token_ids = (
    chain_metadata.filter(pl.col("cgt_coingecko_api").is_not_null())
    .select("cgt_coingecko_api")
    .unique()
    .to_series()
    .to_list()
)

print(f"Found {len(token_ids)} unique tokens with CoinGecko API keys")

[2m2025-06-17 13:33:08[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m [36mprocess[0m=[35m94000[0m
[2m2025-06-17 13:33:08[0m [[32m[1mdebug    [0m] [1mloaded vault: 28 items        [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m79[0m [36mprocess[0m=[35m94000[0m
[2m2025-06-17 13:33:08[0m [[32m[1mdebug    [0m] [1mFound vault variable GOOGLE_SERVICE_ACCOUNT (has JSON key)[0m [36mfilename[0m=[35mgcpauth.py[0m [36mlineno[0m=[35m18[0m [36mprocess[0m=[35m94000[0m
[2m2025-06-17 13:33:10[0m [[32m[1minfo     [0m] [1mLoaded OP chains metadata from /Users/michaelsilberling/Documents/GitHub/op-analytics/op_chains_tracking/inputs/chain_metadata_raw.csv[0m [36mfilename[0m=[35mload.py[0m [36mlineno[0m=[35m87[0m [36mprocess[0m=[35m94000[0m
[2m2025-06-17 13:33:10[0m [[32m[1minfo     [0m] [1m[REPO vs. GSHEETS] ERROR: Chain Metadata is different[0m 

In [3]:
# 2. Initialize the data source
from op_analytics.datasources.coingecko.price_data import CoinGeckoDataSource
from op_analytics.coreutils.request import new_session

# Create a session with debug logging
session = requests.Session()
data_source = CoinGeckoDataSource(session=session)

[2m2025-06-17 13:33:10[0m [[32m[1minfo     [0m] [1mUsing CoinGecko Free API (rate limited)[0m [36mfilename[0m=[35mprice_data.py[0m [36mlineno[0m=[35m90[0m [36mprocess[0m=[35m94000[0m


In [4]:
# 3. Fetch price data for all tokens
# Fetch 7 days of data for all tokens
all_prices = []
for token_id in token_ids:
    try:
        prices = data_source.get_token_prices(token_ids=[token_id], days=365)
        all_prices.append(prices)
        print(f"Successfully fetched data for {token_id}")
    except Exception as e:
        print(f"Error fetching data for {token_id}: {e}")

# Combine all price data
prices_df = pl.concat(all_prices)
print(f"\nSuccessfully fetched price data for {len(prices_df)} token-days")
print("\nSample of fetched data:")
print(prices_df.head())

[2m2025-06-17 13:33:10[0m [[32m[1minfo     [0m] [1mReceived data for tokens      [0m [36mcount[0m=[35m1[0m [36mfilename[0m=[35mprice_data.py[0m [36mlineno[0m=[35m163[0m [36mprocess[0m=[35m94000[0m
Successfully fetched data for settled-ethxy-token
[2m2025-06-17 13:33:22[0m [[32m[1minfo     [0m] [1mReceived data for tokens      [0m [36mcount[0m=[35m1[0m [36mfilename[0m=[35mprice_data.py[0m [36mlineno[0m=[35m163[0m [36mprocess[0m=[35m94000[0m
Successfully fetched data for automata
[2m2025-06-17 13:33:34[0m [[32m[1minfo     [0m] [1mReceived data for tokens      [0m [36mcount[0m=[35m1[0m [36mfilename[0m=[35mprice_data.py[0m [36mlineno[0m=[35m163[0m [36mprocess[0m=[35m94000[0m
Successfully fetched data for frax-ether
Error fetching data for mantle: Rate limit exceeded: 429 Client Error: Too Many Requests for url: https://api.coingecko.com/api/v3/coins/mantle/market_chart?vs_currency=usd&days=90&interval=daily
[2m2025-06-17

In [5]:
# 4. Write to BigQuery
from op_analytics.datasources.coingecko.dataaccess import CoinGecko

# Write the data to BigQuery
CoinGecko.DAILY_PRICES.write(prices_df)
print("Successfully wrote data to BigQuery")

[2m2025-06-17 13:36:58[0m [[32m[1minfo     [0m] [1mdone writing 10rows 2.6KB     [0m [36mfilename[0m=[35mgcs_parquet.py[0m [36mlineno[0m=[35m57[0m [36mmaxrss[0m=[35m203489280[0m [36mpath[0m=[35m'/Users/michaelsilberling/Documents/GitHub/op-analytics/ozone/warehouse/coingecko/fact_coingecko_daily_prices_v1/dt=2025-03-19/out.parquet'[0m [36mprocess[0m=[35m94000[0m [36mroot[0m=[35mcoingecko/fact_coingecko_daily_prices_v1[0m [36mrows[0m=[35m10[0m [36msize[0m=[35m2587[0m
[2m2025-06-17 13:36:58[0m [[32m[1minfo     [0m] [1mmemory usage                  [0m [36mfilename[0m=[35mgcs_parquet.py[0m [36mlineno[0m=[35m65[0m [36mmax_rss[0m=[35m203.49[0m [36mprocess[0m=[35m94000[0m [36mroot[0m=[35mcoingecko/fact_coingecko_daily_prices_v1[0m
[2m2025-06-17 13:36:59[0m [[32m[1mdebug    [0m] [1mdone writing coingecko/fact_coingecko_daily_prices_v1 to LOCAL[0m [36mfilename[0m=[35mwritemanager.py[0m [36mlineno[0m=[35m131[0m [3

In [6]:
# 5. Read back from production to verify
df = CoinGecko.DAILY_PRICES.read_polars(min_date=prices_df["dt"].min())
print("\nData from production:")
print(df.head())

[2m2025-06-17 13:36:59[0m [[32m[1minfo     [0m] [1minitialized duckdb at /var/folders/by/kltjc8yd0yz_7_wrtrzhrm9m0000gn/T/4by8zxu8/op-analytics.duck.db[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m118[0m [36mprocess[0m=[35m94000[0m
[2m2025-06-17 13:36:59[0m [[32m[1minfo     [0m] [1mcreate duckddb gcs secret     [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m66[0m [36mprocess[0m=[35m94000[0m
[2m2025-06-17 13:37:00[0m [[32m[1minfo     [0m] [1mReading data from 'coingecko/fact_coingecko_daily_prices_v1' with filters min_date=2025-03-19, max_date=None, date_range_spec=None[0m [36mfilename[0m=[35mdailydata.py[0m [36mlineno[0m=[35m94[0m [36mprocess[0m=[35m94000[0m
[2m2025-06-17 13:37:00[0m [[32m[1minfo     [0m] [1mquerying markers for 'coingecko/fact_coingecko_daily_prices_v1' DateFilter(min_date=datetime.date(2025, 3, 19), max_date=None, datevals=None)[0m [36mfilename[0m=[35mdailydataread.py[0m [36mlineno

In [8]:
# 6. Optional: Analyze the data
# Get the date range of the data
print(f"Overall date range: {prices_df['dt'].min()} to {prices_df['dt'].max()}")

# Count unique tokens
print(f"Number of unique tokens: {prices_df['token_id'].n_unique()}")

# Get date range and average price per token
token_stats = prices_df.group_by('token_id').agg([
    pl.col('price_usd').mean().alias('avg_price'),
    pl.col('dt').min().alias('min_date'),
    pl.col('dt').max().alias('max_date'),
    pl.col('dt').count().alias('days_of_data')
]).sort('token_id')

print("\nStats per token:")
print(token_stats)

Overall date range: 2025-03-19 to 2025-06-17
Number of unique tokens: 10

Stats per token:
shape: (10, 5)
┌─────────────────────┬─────────────┬────────────┬────────────┬──────────────┐
│ token_id            ┆ avg_price   ┆ min_date   ┆ max_date   ┆ days_of_data │
│ ---                 ┆ ---         ┆ ---        ┆ ---        ┆ ---          │
│ str                 ┆ f64         ┆ str        ┆ str        ┆ u32          │
╞═════════════════════╪═════════════╪════════════╪════════════╪══════════════╡
│ automata            ┆ 0.054153    ┆ 2025-03-19 ┆ 2025-06-17 ┆ 91           │
│ binancecoin         ┆ 626.539114  ┆ 2025-03-19 ┆ 2025-06-17 ┆ 91           │
│ binary-holdings     ┆ 0.021033    ┆ 2025-03-19 ┆ 2025-06-17 ┆ 91           │
│ celo                ┆ 0.342097    ┆ 2025-03-19 ┆ 2025-06-17 ┆ 91           │
│ frax-ether          ┆ 2123.987747 ┆ 2025-03-19 ┆ 2025-06-17 ┆ 91           │
│ gameswift           ┆ 0.013044    ┆ 2025-03-19 ┆ 2025-06-17 ┆ 91           │
│ hashkey-ecopoints   ┆ 0