In [111]:
import pandas as pd
import numpy as np
import pickle
import time
from typing import Dict, List
from datetime import datetime
from helper_functions import Helper

In [112]:
def formCoingeckoAssetUniverse(base_url: str, base_params: Dict[str, str], asset_universe: List[str]) -> pd.DataFrame:
    """ Form universe of coingecko assets mapped to coinmetrics asset ids.

    Args:
        base_url (str): Base URL for the API.
        base_params (Dict[str, str]): Base parameters for the API.  
        asset_universe (List[str]): list of strings of coinmetrics asset IDs.

    Returns:
        df (pd.DataFrame): crosswalk betwen asset_cg IDs and asset_cm IDs.
    
    """
    # set ids that we will drop to make one to one mapping
    ids_to_drop = ['acala', 'aavegotchi-alpha', 'alphacoin', 'polyalpha-finance', 'apemove',
        'flux', 'zelcash', 'sol-wormhole', 'wrapped-solana', 'playground-waves-floor-index',
        'velocimeter-flow', 'unicorn-token', 'uniswap-wormhole', 'superrarebears-rare', 'unique-one',
        'twitfi', 'truebit-protocol', 'tron-bsc', 't', 'thorchain-erc20', 'san-diego-coin',
        'the-sandbox-wormhole', 'retawars-goldrose-token', 'green-ride-token', 'synergy-diamonds',
        'supernova', 'superciety', 'aztec-nodes-sun', 'stox', 'gmt-token', 'atlas-fc-fan-token',
        'atlantis', 'smartlands', 'shibchain', 'shiba-inu-wormhole', 'rose', 'rose-finance',
        'heco-peg-xrp', 'binance-peg-xrp', 'retawars-goldrose-token', 'green-ride-token', 'rad',
        'quick', 'qi-dao', 'qiswap', 'galatasaray-fan-token', 'poly-maximus', 'binance-peg-polkadot',
        'playchip', 'planet', 'plair', 'orcadao', 'binance-peg-ontology', 'binance-coin-wormhole',
        'heco-peg-bnb', 'oec-binance-coin', 'meta-dance', 'mechaverse', 'mask-vault-nftx',
        'laro', 'binance-peg-litecoin', 'audius-wormhole', 'listenify', 'lido-dao-wormhole',
        'avalanche-wormhole', 'binance-peg-avalanche', 'binance-peg-bitcoin-cash', 'binance-peg-cardano',
        'compound-coin', 'bondly-defi', 'avatly', 'constitutiondao-wormhole', 'covicoin',
        'creamlands', 'icecream-finance', 'decentraland-wormhole', 'genesis-mana', 'binance-peg-dogecoin',
        'dydx-wormhole', 'binance-peg-eos', 'ethereum-wormhole', 'binance-peg-filecoin', 'ftx-wormhole',
        'gas-dao', 'game', 'ecowatt', 'cybertronchain', 'hymnode', 'binance-peg-iotex']

    # obtain coingecko asset ids
    endpoint = '/coins/list'
    url = f"{base_url}{endpoint}"                           
    params = base_params.copy()
    params['include_platform'] = 'false'
    response_json = Helper.makeApiCall(url, headers={}, params=params)
    df = pd.DataFrame(response_json)
    df = df.drop(columns='name', axis=1)

    # subset down to matched assets
    df = df[df.symbol.isin(asset_universe)]

    # remove duplicated assets
    df = df[~df.id.isin(ids_to_drop)]

    # manually add one missing asset
    df = pd.concat([df, pd.DataFrame(data={'id': ['nano'], 'symbol': ['nano']})])

    # rename
    df = df.rename(columns={'id': 'asset_cg', 'symbol': 'asset_cm'})

    # confirm full one to one mapping
    assert len(asset_universe) == np.sum(np.unique(df.asset_cm.values) == np.unique(asset_universe))

    # return
    return df.sort_values(by='asset_cm', ignore_index=True)

In [113]:
def pullAssetCovariates(base_url: str, base_params: Dict[str, str], gecko_id_universe: List[str], start_date: str, end_date: str) -> pd.DataFrame:
    """ Pull various asset covariates for a given universe of CoinGecko IDs.

    Args:
        base_url (str): The base URL for the Coingecko API.
        base_params (dict): A dictionary containing the basic parameters for the Coingecko API call.
        gecko_id_universe (list): A list of unique gecko ids to pull.
        study_start (str): string time for the start of the study window in format 'YYYY-MM-DD'.
        study_end (str): string time for the end of the study window in format 'YYYY-MM-DD'.
            
    Returns:
        asset_covars_df (pd.DataFrame): panel data with asset covariates.
    """
    # set up object to store all
    gecko_covars_dict = {'date':[],
                        'asset_gecko': [],
                        'usd_per_token_cg': [],
                        'usd_volume_cg': [],
                        'usd_mcap_cg': [],
                        'twitter_followers': [],
                        'reddit_average_posts_48h': [],
                        'reddit_average_comments_48h': [],
                        'reddit_subscribers': [],
                        'reddit_accounts_active_48h': [],
                        'forks': [],
                        'stars': [],
                        'subscribers': [],
                        'total_issues': [],
                        'closed_issues': [],
                        'pull_requests_merged': [],
                        'pull_request_contributors': [],
                        'code_additions_4_weeks': [],
                        'code_deletions_4_weeks': [],
                        'commit_count_4_weeks': [],
                        'alexa_rank': []}


    # loop over assets to pull
    for i in range(len(gecko_id_universe)):
        # set current id to pull
        gecko_id = gecko_id_universe[i]

        # monitor progress
        print(f"Processing id #{i+1} ({(i+1)/len(gecko_id_universe)*100:.2f}%): {gecko_id}")

        # set up endpoint
        endpoint = f"/coins/{gecko_id}/history"
        url = f"{base_url}{endpoint}"

        # set up params
        params = base_params.copy()
        params['id'] = gecko_id

        # extract dates for this asset
        all_dates  = Helper.generateDailyDateList(start_date, end_date)

        # Loop over all dates to pull
        for current_date in all_dates:
            # update params
            params['date'] = current_date[8:]+current_date[4:8]+current_date[:4]

            # make the call
            response_json = Helper.makeApiCall(url, headers={}, params=params)

            if 'market_data' in response_json.keys():
                # add data to results dict
                gecko_covars_dict['date'].append(np.datetime64(datetime.strptime(current_date, '%Y-%m-%d'), 'D'))
                gecko_covars_dict['asset_cg'].append(response_json['id'])
                if 'market_data' in response_json.keys():
                    gecko_covars_dict['usd_per_token_cg'].append(response_json['market_data']['current_price']['usd'])
                    gecko_covars_dict['usd_volume_cg'].append(response_json['market_data']['total_volume']['usd'])
                    gecko_covars_dict['usd_mcap_cg'].append(response_json['market_data']['market_cap']['usd'])
                else:
                    gecko_covars_dict['usd_per_token_cg'].append(None)
                    gecko_covars_dict['usd_volume_cg'].append(None)
                    gecko_covars_dict['usd_mcap_cg'].append(None)
                if 'community_data' in response_json.keys():
                    gecko_covars_dict['twitter_followers'].append(response_json['community_data']['twitter_followers'])
                    gecko_covars_dict['reddit_average_posts_48h'].append(response_json['community_data']['reddit_average_posts_48h'])
                    gecko_covars_dict['reddit_average_comments_48h'].append(response_json['community_data']['reddit_average_comments_48h'])
                    gecko_covars_dict['reddit_subscribers'].append(response_json['community_data']['reddit_subscribers'])
                    gecko_covars_dict['reddit_accounts_active_48h'].append(response_json['community_data']['reddit_accounts_active_48h'])  
                else:
                    gecko_covars_dict['twitter_followers'].append(None)
                    gecko_covars_dict['reddit_average_posts_48h'].append(None)
                    gecko_covars_dict['reddit_average_comments_48h'].append(None)
                    gecko_covars_dict['reddit_subscribers'].append(None)
                    gecko_covars_dict['reddit_accounts_active_48h'].append(None)
                if 'developer_data' in response_json.keys():
                    gecko_covars_dict['forks'].append(response_json['developer_data']['forks'])
                    gecko_covars_dict['stars'].append(response_json['developer_data']['stars'])
                    gecko_covars_dict['subscribers'].append(response_json['developer_data']['subscribers'])
                    gecko_covars_dict['total_issues'].append(response_json['developer_data']['total_issues'])
                    gecko_covars_dict['closed_issues'].append(response_json['developer_data']['closed_issues'])
                    gecko_covars_dict['pull_requests_merged'].append(response_json['developer_data']['pull_requests_merged'])
                    gecko_covars_dict['pull_request_contributors'].append(response_json['developer_data']['pull_request_contributors'])
                    gecko_covars_dict['code_additions_4_weeks'].append(response_json['developer_data']['code_additions_deletions_4_weeks']['additions'])
                    gecko_covars_dict['code_deletions_4_weeks'].append(response_json['developer_data']['code_additions_deletions_4_weeks']['deletions'])
                    gecko_covars_dict['commit_count_4_weeks'].append(response_json['developer_data']['commit_count_4_weeks'])
                else:
                    gecko_covars_dict['forks'].append(None)
                    gecko_covars_dict['stars'].append(None)
                    gecko_covars_dict['subscribers'].append(None)
                    gecko_covars_dict['total_issues'].append(None)
                    gecko_covars_dict['closed_issues'].append(None)
                    gecko_covars_dict['pull_requests_merged'].append(None)
                    gecko_covars_dict['pull_request_contributors'].append(None)
                    gecko_covars_dict['code_additions_4_weeks'].append(None)
                    gecko_covars_dict['code_deletions_4_weeks'].append(None)
                    gecko_covars_dict['commit_count_4_weeks'].append(None)
                if 'public_interest_stats' in response_json.keys():
                    gecko_covars_dict['alexa_rank'].append(response_json['public_interest_stats']['alexa_rank'])
                else:
                    gecko_covars_dict['alexa_rank'].append(None)
            else:
                continue

            # space out the calls
            time.sleep(0.1)

    # convert to df to return
    panel_df = pd.DataFrame(gecko_covars_dict)

    # clean up the data
    panel_df = panel_df.drop_duplicates(subset=['date', 'asset_cg'])

    return panel_df


In [114]:
if __name__ == "__main__":
    # Set args
    CW_IN_FP = '../data/derived/cm_to_coinapi_cw.pkl'
    ASSET_IN_FP = '../data/clean/asset_universe_dict.pickle'
    API_FP = '../../admin/coingecko.txt'
    START_DATE = '2016-07-01'
    END_DATE = '2023-01-02'
    BASE_URL = "https://pro-api.coingecko.com/api/v3"
    PANEL_OUT_FP = "../data/raw/coingecko_panel.pkl"
    CW_OUT_FP = '../data/raw/coingecko_coinmetrics_cw.pkl'

    # Import asset universe and cw
    cw_df = pd.read_pickle(CW_IN_FP)
    with open(ASSET_IN_FP, "rb") as f:
        asset_universe_dict = pickle.load(f)
    asset_universe = Helper.findUniqueAssets(asset_universe_dict)

    # import api key and set base parameters
    with open(API_FP) as f:
        API_KEY = f.readlines()
        API_KEY = API_KEY[0].strip()
    BASE_PARAMS = {'x_cg_pro_api_key': API_KEY}

    # Test it is working
    r = requests.get(f"{BASE_URL}/ping", params=BASE_PARAMS)
    print(r.json()['gecko_says'])

    # Form crosswalk
    cg_df = formCoingeckoAssetUniverse(BASE_URL, BASE_PARAMS, asset_universe)
    cg_df.to_pickle(CW_OUT_FP)

    # Pull covariates to build the panel
    gecko_id_universe = list(np.unique(cg_df.asset_cg.values))
    panel_df = pullAssetCovariates(BASE_URL, BASE_PARAMS, gecko_id_universe, START_DATE, END_DATE)
    panel_df.to_pickle(PANEL_OUT_FP)


(V3) To the Moon!
Processing id #1 (0.36%): 0x


The API call failed with error: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
Retrying after 0.92 seconds.
