In [96]:
import pandas as pd
import numpy as np
from datetime import date
import requests
import pickle
import re
import time
import json
import json.decoder
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
from typing import List

In [97]:
def makeCMCApiCall(url: str, params: dict, retries: int=3) -> Optional[Dict[str, Any]]:
    """ makes an API call to CoinGecko using the provided url and parameters.
    
    Args:
        url (str): The API endpoint URL to call.
        params (dict): A dictionary of parameters to include in the API call.
        retries (int): The number of times to retry the API call if it fails. Default is 3.
        
    Returns:
        response.json() (dict): the data from the api response, or None if the api call failed.
    """
    for attempt in range(retries):
        try:
            response = requests.get(url, params=params, timeout=3)
        except requests.exceptions.Timeout:
            # Timeout error, retry after a short delay
            print('The API call timed out, retrying...')
            time.sleep(1)
            continue
        
        if response.ok:
            try:
                return response.json()
            except json.decoder.JSONDecodeError as e:
                print(f'Error decoding JSON response: {str(e)}')
        else:
            # There was an error, retry after a short delay
            print(f'The API call failed with status code {response.status_code}, retrying...')
            time.sleep(0.5)

    print('The api call failed after 3 attempts.')
    return None

In [98]:
def getDateList(start_date_str: str, end_date_str: str) -> List[str]:
    """
    Returns a list of dates between the start and end dates (inclusive).
    
    Args:
        start_date_str (str): A string representation of the start date in format 'dd-mm-yyyy'
        end_date_str (str): A string representation of the end date in format 'dd-mm-yyyy'
    
    Returns:
        list: A list of date strings in format 'dd-mm-yyyy' between the start and end dates (inclusive)
    """
    # Convert input strings to datetime objects
    start_date = datetime.strptime(start_date_str, '%d-%m-%Y')
    end_date = datetime.strptime(end_date_str, '%d-%m-%Y')
    
    # Calculate the number of days between the start and end dates
    delta = end_date - start_date
    
    # Create a list of dates using a list comprehension
    date_list = [start_date + timedelta(days=i) for i in range(delta.days + 1)]
    
    # Convert the datetime objects back to strings in the desired format
    date_list = [datetime.strftime(date, '%d-%m-%Y') for date in date_list]
    
    return date_list

In [99]:
def formCoingeckoAssetUniverse(cmc_assets_fp: str, base_params: dict, base_url: str) -> pd.DataFrame:
    """ form universe of coingecko assets to obtain data for, mapped to cmc slugs.

    Args:
        cmc_assets_fp (str): filepath for the cmc asset universe.
        base_params (dict): dictionary containing the basic parameters for the coingecko api call.
        base_url (str): the base url for pinging the coingecko api.

    Returns:
        cmc_assets_df (pd.DataFrame): dataframe containing the crosswalk between the cmc and cg assets.
    """
    # little helper function
    def removeNonLetters(text):
        return re.sub(r'[^a-zA-Z]', '', text)

    # import cmc token universe
    with open(cmc_assets_fp, 'rb') as f:
        cmc_asset_universe_dict = pickle.load(f)

    # form unique asset df
    cmc_assets = []
    for k, v in cmc_asset_universe_dict.items():
        cmc_assets.extend(v)
    cmc_assets = list(np.unique(np.array(cmc_assets)))
    cmc_assets_df = pd.DataFrame(data={'asset_cmc': cmc_assets})
                                
    # obtain coingecko asset ids
    endpoint = '/coins/list'
    url = f"{base_url}{endpoint}"                           
    params = base_params.copy()
    params['include_platform'] = 'false'
    id_symbol_dict_list = makeCMCApiCall(url, params)

    # create editted names of cmc asset
    cmc_assets_df['asset_cmc_lower'] = cmc_assets_df.asset_cmc.str.lower()
    cmc_assets_lower = list(cmc_assets_df.asset_cmc_lower.values)
    cmc_assets_df['asset_cmc_lower_nosymbol'] = cmc_assets_df.asset_cmc_lower.apply(removeNonLetters)
    cmc_assets_lower_nosymbol = list(cmc_assets_df.asset_cmc_lower_nosymbol.values)
    assert(cmc_assets_df.shape[0]==len(np.unique(np.array(cmc_assets_lower_nosymbol))))

    # match the symbols on various logic
    cmc_assets_df['asset_gecko'] = None
    for id_symbol_dict in id_symbol_dict_list:
        gecko_name = id_symbol_dict['name'].lower()
        gecko_id   = id_symbol_dict['id'].lower()
        if gecko_name in cmc_assets_lower:
            cmc_assets_df.loc[cmc_assets_df.asset_gecko.isnull()
                & (cmc_assets_df.asset_cmc_lower==gecko_name), 'asset_gecko'] = gecko_id
        elif gecko_id in cmc_assets_lower:
            cmc_assets_df.loc[cmc_assets_df.asset_gecko.isnull()
                & (cmc_assets_df.asset_cmc_lower==gecko_id), 'asset_gecko'] = gecko_id
        elif removeNonLetters(gecko_name) in cmc_assets_lower_nosymbol:
            cmc_assets_df.loc[cmc_assets_df.asset_gecko.isnull()
                & (cmc_assets_df.asset_cmc_lower_nosymbol==removeNonLetters(gecko_name)), 'asset_gecko'] = gecko_id
        elif removeNonLetters(gecko_id) in cmc_assets_lower_nosymbol:
            cmc_assets_df.loc[cmc_assets_df.asset_gecko.isnull()
                & (cmc_assets_df.asset_cmc_lower_nosymbol==removeNonLetters(gecko_id)), 'asset_gecko'] = gecko_id

    # manually fix non matches
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='aave-old', 'asset_gecko'] = 'aave'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='alpha-finance-lab', 'asset_gecko'] = 'alpha-finance'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='crypto-com', 'asset_gecko'] = 'crypto-com-chain'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='ethereum-pow', 'asset_gecko']  = 'ethereum-pow-iou'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='sushiswap', 'asset_gecko'] = 'sushi'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='abbc-coin', 'asset_gecko'] = 'abcc-token'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='cream-finance', 'asset_gecko'] = 'cream'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='haven-protocol', 'asset_gecko'] = 'haven'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='fetch', 'asset_gecko'] = 'fetch-ai'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='kucoin-token', 'asset_gecko'] = 'kucoin-shares'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='orchid', 'asset_gecko'] = 'orchid-protocol'
    cmc_assets_df.loc[cmc_assets_df.asset_cmc=='yearn-finance-ii', 'asset_gecko'] = 'yearn-finance'

    # return the crosswalk
    return cmc_assets_df[['asset_cmc', 'asset_gecko']]

In [100]:
def pullPriceMcapVolume(base_url: str, base_params: dict, gecko_id_universe: list) -> pd.DataFrame:
    """ Pull price, market cap, and volume data for a given universe of CoinGecko IDs.

    Args:
        base_url (str): The base URL for the Coingecko API.
        base_params (dict): A dictionary containing the basic parameters for the Coingecko API call.
        gecko_id_universe (list): A list of unique gecko ids to pull.

    Returns:
        panel_df (pd.DataFrame): panel data with columns 'date', 'asset_gecko', 'usd_per_token_cg', 
                                 'usd_mcap_cg', and 'usd_volume_24h_cg'.
    """
    # set up params
    params = base_params.copy()
    params.update({
        'vs_currency': 'usd',
        'days': 'max'
    })

    # set up object to store all
    panel_df = pd.DataFrame()

    # loop over assets to pull
    for i in range(len(gecko_id_universe)):
        # set current id to pull
        gecko_id = gecko_id_universe[i]

        # monitor progress
        print(f"Processing id #{i+1} ({(i+1)/len(gecko_id_universe)*100:.2f}%): {gecko_id}")

        # set up endpoint
        endpoint = f"/coins/{gecko_id}/market_chart"
        url = f"{base_url}{endpoint}"

        # update params with this id
        params['id'] = gecko_id

        # make the call
        response_json = makeCMCApiCall(url, params)

        # extract the data
        prices_df = pd.DataFrame(response_json['prices'], columns=['date', 'usd_per_token_cg']).dropna()
        mcaps_df = pd.DataFrame(response_json['market_caps'], columns=['date', 'usd_mcap_cg']).dropna()
        volumes_df = pd.DataFrame(response_json['total_volumes'], columns=['date', 'usd_volume_24h_cg']).dropna()

        # format the dfs and put it together
        asset_df = prices_df.copy()
        asset_df['date'] = pd.to_datetime(asset_df.date, unit='ms').dt.ceil('D').dt.date
        mcaps_df['date'] = pd.to_datetime(mcaps_df.date, unit='ms').dt.ceil('D').dt.date
        volumes_df['date'] = pd.to_datetime(volumes_df.date, unit='ms').dt.ceil('D').dt.date
        asset_df   = asset_df.groupby('date').last().reset_index()
        mcaps_df   = mcaps_df.groupby('date').last().reset_index()
        volumes_df = volumes_df.groupby('date').last().reset_index()
        asset_df = asset_df.merge(mcaps_df,
                                on='date',
                                how='outer',
                                validate='one_to_one')
        asset_df = asset_df.merge(volumes_df,
                                on='date',
                                how='outer',
                                validate='one_to_one')
        asset_df['asset_gecko'] = gecko_id
        asset_df = asset_df[['date', 'asset_gecko', 'usd_per_token_cg', 'usd_mcap_cg', 'usd_volume_24h_cg']]

        # append results
        panel_df = pd.concat((panel_df, asset_df))

        # space out the calls
        time.sleep(0.2)

    # clean up the results
    panel_df = panel_df[panel_df.asset_gecko.isin(gecko_id_universe)]

    return panel_df

In [10]:
if __name__ == "__main__":
    # set args
    api_fp = '../../admin/coingecko.txt'
    start_date = date(2015, 1, 1)
    end_date   = date(2023, 2, 1)
    base_url = "https://pro-api.coingecko.com/api/v3"
    cmc_assets_fp = "../data/raw/cmc_asset_universe.pkl"
    cw_fp = "../data/raw/coingecko_cmc_cw.pkl"
    panel_fp = "../data/raw/coingecko_price_volume_mcap_panel.pkl"

    # import api key and set base parameters
    with open(api_fp) as f:
        API_KEY = f.readlines()
        API_KEY = API_KEY[0].strip()
    base_params = {'x_cg_pro_api_key': API_KEY}

    # Test it is working
    url = f"{base_url}/ping"
    r = requests.get(url, params=base_params)
    print(r.json()['gecko_says'])

    # obtain coingecko assets
    cmc_assets_df = formCoingeckoAssetUniverse(cmc_assets_fp, base_params, base_url)
    cmc_assets_df.to_pickle(cw_fp)
    gecko_id_universe = list(np.unique(cmc_assets_df[~cmc_assets_df.asset_gecko.isnull()].asset_gecko.values))

    # pull price mcap and volume data
    panel_df = pullPriceMcapVolume(base_url, base_params, gecko_id_universe)

    # update the universe to just the assets that have price, volume, or mcap data
    gecko_id_universe = list(np.unique(panel_df.asset_gecko.values))

     # save the data
    panel_df.to_pickle(panel_fp)
    

(V3) To the Moon!
