In [1]:
# Standard library imports
import requests

# Third-party imports
import polars as pl
import os

# Local imports
from op_analytics.datapipeline.chains.load import load_chain_metadata
from op_analytics.datasources.coingecko.price_data import CoinGeckoDataSource
from op_analytics.datasources.coingecko.dataaccess import CoinGecko
from op_analytics.coreutils.request import new_session
os.environ["ALLOW_WRITE"] = "true"

In [None]:
# 1. Load chain metadata to get token IDs
from op_analytics.datapipeline.chains.load import load_chain_metadata

# Load chain metadata
chain_metadata = load_chain_metadata()


In [None]:
# Get unique non-null CoinGecko API keys
token_ids = (
    chain_metadata.filter(pl.col("cgt_coingecko_api").is_not_null())
    .select("cgt_coingecko_api")
    .unique()
    .to_series()
    .to_list()
)

print(f"Found {len(token_ids)} unique tokens with CoinGecko API keys")

In [None]:
# 2. Initialize the data source
from op_analytics.datasources.coingecko.price_data import CoinGeckoDataSource
from op_analytics.coreutils.request import new_session

# Create a session with debug logging
session = requests.Session()
data_source = CoinGeckoDataSource(session=session)

In [None]:
# 3. Fetch price data for all tokens
# Fetch 7 days of data for all tokens
all_prices = []
for token_id in token_ids:
    try:
        prices = data_source.get_token_prices(token_ids=[token_id], days=365)
        all_prices.append(prices)
        print(f"Successfully fetched data for {token_id}")
    except Exception as e:
        print(f"Error fetching data for {token_id}: {e}")

# Combine all price data
prices_df = pl.concat(all_prices)
print(f"\nSuccessfully fetched price data for {len(prices_df)} token-days")
print("\nSample of fetched data:")
print(prices_df.head())

In [None]:
# 4. Write to BigQuery
from op_analytics.datasources.coingecko.dataaccess import CoinGecko

# Write the data to BigQuery
CoinGecko.DAILY_PRICES.write(result)
print("Successfully wrote data to BigQuery")

In [None]:
# 5. Read back from production to verify
df = CoinGecko.DAILY_PRICES.read_polars(min_date=prices_df["dt"].min())
print("\nData from production:")
print(df.head())

In [None]:
# 6. Optional: Analyze the data
# Get the date range of the data
print(f"Overall date range: {prices_df['dt'].min()} to {prices_df['dt'].max()}")

# Count unique tokens
print(f"Number of unique tokens: {prices_df['token_id'].n_unique()}")

# Get date range and average price per token
token_stats = prices_df.group_by('token_id').agg([
    pl.col('price_usd').mean().alias('avg_price'),
    pl.col('dt').min().alias('min_date'),
    pl.col('dt').max().alias('max_date'),
    pl.col('dt').count().alias('days_of_data')
]).sort('token_id')

print("\nStats per token:")
print(token_stats)

In [None]:
# Overall run
from op_analytics.datasources.coingecko.execute import execute_pull
from op_analytics.coreutils.partitioned.dailydatawrite import write_to_prod

# Path to your config file
extra_token_ids_file = "../../../src/op_analytics/datasources/coingecko/config/extra_token_ids.txt"

with write_to_prod():
    # Run the full pipeline, including extra tokens
    result = execute_pull(days=365, extra_token_ids_file=extra_token_ids_file)

In [None]:
print(result.sample(5))