In [1]:
# Standard library imports
import requests

# Third-party imports
import polars as pl
import os

# Local imports
from op_analytics.datapipeline.chains.load import load_chain_metadata
from op_analytics.datasources.coingecko.price_data import CoinGeckoDataSource
from op_analytics.datasources.coingecko.dataaccess import CoinGecko
from op_analytics.coreutils.request import new_session

In [None]:
# Get unique non-null CoinGecko API keys
token_ids = (
    chain_metadata.filter(pl.col("cgt_coingecko_api").is_not_null())
    .select("cgt_coingecko_api")
    .unique()
    .to_series()
    .to_list()
)

print(f"Found {len(token_ids)} unique tokens with CoinGecko API keys")

In [None]:
# Test metadata pull with contract address exploration
print(f"Fetching metadata for {len(token_ids)} tokens...")

# Initialize data source if not already done
if 'data_source' not in locals():
    session = new_session()
    data_source = CoinGeckoDataSource(session=session)

# Fetch metadata for all tokens
metadata_df = data_source.get_token_metadata(token_ids)

# Add dt partition column
from op_analytics.coreutils.time import now_dt
metadata_df = metadata_df.with_columns(dt=pl.lit(now_dt()))

print(f"Successfully fetched metadata for {len(metadata_df)} tokens")

# Explore contract addresses structure
if 'contract_addresses' in metadata_df.columns:
    print("\n=== CONTRACT ADDRESSES EXPLORATION ===")
    
    # Get a sample token with contract addresses
    sample_row = metadata_df.filter(pl.col('contract_addresses').is_not_null()).head(1)
    if len(sample_row) > 0:
        token_id = sample_row['token_id'][0]
        contract_data = sample_row['contract_addresses'][0]
        
        print(f"\nSample token: {token_id}")
        print(f"Contract addresses data type: {type(contract_data)}")
        
        if isinstance(contract_data, str):
            import json
            try:
                contract_dict = json.loads(contract_data)
                print(f"Parsed contract addresses:")
                for platform, address in contract_dict.items():
                    print(f"  {platform}: {address}")
            except json.JSONDecodeError:
                print(f"Raw contract data: {contract_data}")
        elif isinstance(contract_data, dict):
            print(f"Contract addresses:")
            for platform, address in contract_data.items():
                print(f"  {platform}: {address}")
    
    # Show all unique platforms/chains across all tokens
    print(f"\n=== ALL PLATFORMS/CHAINS FOUND ===")
    all_platforms = set()
    
    for contract_data in metadata_df['contract_addresses'].drop_nulls():
        if isinstance(contract_data, str):
            import json
            try:
                contract_dict = json.loads(contract_data)
                all_platforms.update(contract_dict.keys())
            except json.JSONDecodeError:
                continue
        elif isinstance(contract_data, dict):
            all_platforms.update(contract_data.keys())
    
    print(f"Found {len(all_platforms)} unique platforms/chains:")
    for platform in sorted(all_platforms):
        print(f"  - {platform}")

print("\nSample metadata:")
print(metadata_df.head())

In [None]:
# Display metadata for Celo token
print("\n=== CELO TOKEN METADATA ===")
celo_metadata = metadata_df.filter(pl.col("token_id") == "celo")
print(celo_metadata)

celo_contracts = celo_metadata.select("contract_addresses")[0,0]
print("\n=== CELO CONTRACT ADDRESSES ===")
if isinstance(celo_contracts, str):
    # Print raw contract addresses data for Celo token
    try:
        contract_dict = json.loads(celo_contracts)
        for platform, address in contract_dict.items():
            print(f"  {platform}: {address}")
    except json.JSONDecodeError:
        print(f"  Raw data: {celo_contracts}")
elif isinstance(celo_contracts, dict):
    for platform, address in celo_contracts.items():
        print(f"  {platform}: {address}")
print("\nRaw contract_addresses data:")
print(celo_contracts)


In [2]:
# Overall run
from op_analytics.datasources.coingecko.execute import execute_pull, execute_metadata_pull
from op_analytics.coreutils.partitioned.dailydatawrite import write_to_prod

# Path to your config file
extra_token_ids_file = "../../../src/op_analytics/datasources/coingecko/config/extra_token_ids.txt"


In [3]:
with write_to_prod():
    # Run the full pipeline, including extra tokens
    result = execute_pull(days=365, extra_token_ids_file=extra_token_ids_file, include_top_tokens=0, fetch_metadata=False, skip_existing_partitions=False)
    # result = execute_pull(days=365, fetch_metadata=True)
    # result = execute_pull(days=365, skip_existing_partitions=True, fetch_metadata=False, token_id='ethereum')
    #Metadata Only
    # result = execute_metadata_pull(extra_token_ids_file=extra_token_ids_file, include_top_tokens=25)
    # result = execute_metadata_pull()

[2m2025-07-09 11:19:58[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m [36mprocess[0m=[35m19578[0m
[2m2025-07-09 11:19:58[0m [[32m[1mdebug    [0m] [1mloaded vault: 28 items        [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m79[0m [36mprocess[0m=[35m19578[0m
[2m2025-07-09 11:19:58[0m [[32m[1minfo     [0m] [1mUsing CoinGecko Free API (rate limited)[0m [36mfilename[0m=[35mprice_data.py[0m [36mlineno[0m=[35m91[0m [36mprocess[0m=[35m19578[0m
[2m2025-07-09 11:19:58[0m [[32m[1mdebug    [0m] [1mFound vault variable GOOGLE_SERVICE_ACCOUNT (has JSON key)[0m [36mfilename[0m=[35mgcpauth.py[0m [36mlineno[0m=[35m18[0m [36mprocess[0m=[35m19578[0m
[2m2025-07-09 11:20:00[0m [[32m[1minfo     [0m] [1mLoaded OP chains metadata from /Users/michaelsilberling/Documents/GitHub/op-analytics/op_chains_tracking/inputs/chain_metadata_raw.csv[0m [36mfil

In [None]:
result = execute_pull(days=365, extra_token_ids_file=extra_token_ids_file, include_top_tokens=25, fetch_metadata=True)

In [None]:
print(result)

In [None]:
# 1. Load chain metadata to get token IDs
from op_analytics.datapipeline.chains.load import load_chain_metadata

# Load chain metadata
chain_metadata = load_chain_metadata()

# Get unique non-null CoinGecko API keys
token_ids = (
    chain_metadata.filter(pl.col("cgt_coingecko_api").is_not_null())
    .select("cgt_coingecko_api")
    .unique()
    .to_series()
    .to_list()
)

print(f"Found {len(token_ids)} unique tokens with CoinGecko API keys")