In [534]:
import pandas as pd
import numpy as np
from coinmetrics.api_client import CoinMetricsClient
import time
import pickle
import requests
from datetime import datetime
import time
from typing import Dict, List
import logging

In [535]:
def makeCmApiCall(base_url: str, endpoint: str, params: Dict[str, str], num_retries: int = 3) -> requests.Response:
    """
    Makes an API call to the given endpoint with the given parameters.

    Args:
    - base_url (str): string representing the base URL for the API.
    - endpoint (str): string representing the endpoint to call.
    - params (Dict): dictionary containing the parameters for the API call.
    - num_retries (int): integer representing the number of times to retry the API call in case of an error.

    Returns:
    - response (requests.Response): the response object from the API call.
    """
    url = f"{base_url}/{endpoint}"
    retries = 0
    while retries < num_retries:
        try:
            response = requests.get(url, params=params, timeout=10)
            if response.status_code == 403:
                return params['metrics']
            response.raise_for_status()
            return response
        except requests.exceptions.Timeout as e:
            logging.warning(f"Timeout error occurred: {e}")
        except requests.exceptions.RequestException as e:
            logging.warning(f"Request error occurred: {e}")
        retries += 1
        if retries == 1:
            time.sleep(1)
        elif retries == 2:
            time.sleep(20)
    raise Exception(f"Failed to make API call after {num_retries} retries")

In [536]:
def formCoinmetricsAssetUniverse(client: CoinMetricsClient, cmc_assets_fp: str) -> pd.DataFrame:
    """ map cmc universe to coinmetrics universe. 
    (1) pull all cm assets and open my universe of cmc assets.
    (2) adjust cm asset names so they match to my cmc assets.
    (3) merge asset ids together on both the cm asset id and the full name.
    (4) clean the merged data.
    (5) add assets from cm that should be in the universe but aren't in cmc.
    (6) remove stablecoins and derivatives.

    Args:
        client (CoinMetricsClient): cm client object for pinging api.
        cmc_assets_fp (str): filepath to cmc asset universe pickle.
    
    Returns:
        merged_df (pd.DataFrame): dataframe of crosswalk between cmc id and cm id.    
    """
    # import cmc token universe
    with open(cmc_assets_fp, 'rb') as f:
        cmc_asset_universe_dict = pickle.load(f)

    # form unique cmc asset df
    cmc_assets = []
    for k, v in cmc_asset_universe_dict.items():
        cmc_assets.extend(v)
    cmc_assets = list(np.unique(np.array(cmc_assets)))
    cmc_assets_df = pd.DataFrame(data={'asset_cmc': cmc_assets})

    # pull all cm assets
    full_asset_catalog = client.catalog_full_assets()
    cm_assets_df = pd.DataFrame(full_asset_catalog)

    # Check that the "asset" column is unique in both dataframes
    assert (cmc_assets_df["asset_cmc"].is_unique 
            and cm_assets_df["full_name"].is_unique 
            and cm_assets_df['asset'].is_unique)

    # remove duplicated cm asset; they have a data error
    cm_assets_df = cm_assets_df[~cm_assets_df.asset.isin(['seed', 'tree', 'aurora'])]

    # change cm full names before merge so they match cmc for known nonmatches
    cm_assets_df.loc[cm_assets_df.asset=='aave', 'full_name'] = 'aave-old'
    cm_assets_df.loc[cm_assets_df.asset=='alpha', 'full_name'] = 'alpha-finance-lab'
    cm_assets_df.loc[cm_assets_df.asset=='mco', 'full_name'] = 'crypto-com'
    cm_assets_df.loc[cm_assets_df.asset=='fet', 'full_name'] = 'fetch'
    cm_assets_df.loc[cm_assets_df.asset=='clv', 'full_name'] = 'clover'
    cm_assets_df.loc[cm_assets_df.asset=='gno', 'full_name'] = 'gnosis-gno'
    cm_assets_df.loc[cm_assets_df.asset=='glm', 'full_name'] = 'golem-network-tokens'
    cm_assets_df.loc[cm_assets_df.asset=='hive', 'full_name'] = 'hive-blockchain'
    cm_assets_df.loc[cm_assets_df.asset=='rook', 'full_name'] = 'keeperdao'
    cm_assets_df.loc[cm_assets_df.asset=='yffii', 'full_name'] = 'yearn-finance-ii'
    cm_assets_df.loc[cm_assets_df.asset=='btt', 'full_name'] = 'bittorrent'
    cm_assets_df.loc[cm_assets_df.asset=='idex', 'full_name'] = 'aurora'
    cm_assets_df.loc[cm_assets_df.asset=='egld', 'full_name'] = 'multiversx-egld'
    cm_assets_df.loc[cm_assets_df.asset=='cfx', 'full_name'] = 'confluxnetwork'
    cm_assets_df.loc[cm_assets_df.asset=='xch', 'full_name'] = 'chia-network'
    cm_assets_df.loc[cm_assets_df.asset=='syn', 'full_name'] = 'synapse2'

    # clean the asset names to just low case letters and numbers and merge
    cmc_assets_df["asset_clean"] = cmc_assets_df["asset_cmc"].str.lower().str.replace(r"[^a-zA-Z0-9]", "")
    cm_assets_df["asset_clean"] = cm_assets_df["full_name"].str.lower().str.replace(r"[^a-zA-Z0-9]", "")
    merged_df = pd.merge(cmc_assets_df, cm_assets_df, 
                        on="asset_clean", how='inner',
                        validate='one_to_one')

    # repeat but use the unique asset abbreviation id from cm
    cm_assets_df["asset_clean"] = cm_assets_df["asset"].str.lower().str.replace(r"[^a-zA-Z0-9]", "")
    merged_df2 = pd.merge(cmc_assets_df, cm_assets_df, 
                        on="asset_clean", how='inner',
                        validate='one_to_one')

    # remove duplicated assets from the two merged dataframes and put them together
    merged_df2 = merged_df2[~merged_df2.asset.isin(list(merged_df.asset.values))]
    merged_df = pd.concat((merged_df, merged_df2))
    assert merged_df.asset.is_unique

    # clean up the merged data
    merged_df = merged_df[['asset_cmc', 'asset']]
    merged_df = merged_df.rename(columns={'asset': 'asset_cm'})
    merged_df = merged_df.reset_index(drop=True)

    # manually add to my universe of cm assets these assets to consider
    assets_to_add = ['ape', 'apt', 'arpa', 'badger', 'bal', 'cake', 'cel', 'comp', 'cvx',
        'dot', 'etc', 'fil', 'flr', 'flux', 'ftt', 'fun', 'gmx', 'grin', 'hnt',
        'inv', 'knc', 'krl', 'luna', 'luna2', 'mir', 'multi', 'nft', 'nu', 'ocean', 'ohm',
        'op', 'poly', 'qi', 'rndr', 'rpl', 'skl', 'snt', 'theta', 'tru', 'xdc', 'zrx']
    merged_df = pd.concat((merged_df, pd.DataFrame(data={'asset_cmc': np.repeat(np.nan, len(assets_to_add)),
                                                        'asset_cm': assets_to_add})))

    # manually remove stables and derivatives
    merged_df = merged_df[~merged_df.asset_cm.isin(['steth', 'wbtc', 'tusd', 'gusd', 'usdd', 'btcb'])]

    # manually add in both aave old and new
    merged_df = pd.concat((merged_df, pd.DataFrame(data={'asset_cmc': ['aave'],
                                                        'asset_cm': ['aave']}))).reset_index(drop=True)

    return merged_df


In [537]:
def pullAssetMetrics(client: CoinMetricsClient, base_url: str, cm_asset_universe: list, base_params: dict, num_retries=3):
    """
    Pulls asset metrics for assets in the given asset universe using the given base URL and parameters.

    Args:
    - client (CoinMetricsClient): client object for interacting with the CM API.
    - base_url: string representing the base URL for the API.
    - cm_asset_universe: list of strings representing the assets to pull metrics for.
    - base_params: dictionary containing the base parameters for the API call.
    - num_retries: integer representing the number of times to retry the API call in case of an error.

    Returns:
    - results_df: Pandas DataFrame containing the asset metrics.
    """
    # Pull all the metrics for all assets
    asset_metrics_df = pd.DataFrame(client.catalog_full_assets())

    # Cut down to CM universe
    asset_metrics_df = asset_metrics_df[asset_metrics_df.asset.isin(cm_asset_universe)]
    assert asset_metrics_df.asset.is_unique
    asset_universe = list(asset_metrics_df.asset.values)

    # Define endpoint and parameters
    endpoint = 'timeseries/asset-metrics'
    params = base_params.copy()
    params['page_size'] = 1000

    # Initialize dataframe to return results
    results_df = pd.DataFrame(data={'asset': [], 'time': []})

    # Initialize list of metrics that are unavailable
    unavailable_metrics = []

    # For every asset, pull all metrics available for this asset
    for i in range(len(asset_universe)):
        # Update asset
        asset = asset_universe[i]

        # Monitor progress
        print(f"Processing the {i+1}th asset ({(i+1)/len(asset_universe)*100:.2f}%): {asset}")

        # Initialize object for this asset results
        asset_results_df = pd.DataFrame(data={'asset': [], 'time': []})

        # update parameters for this asset
        params['assets'] = asset

        # determine all metrics for this asset (skip if there aren't metrics for this asset)
        metrics = asset_metrics_df[asset_metrics_df.asset==asset].metrics.values[0]
        if type(metrics) is not list:
            continue
        metrics_df = pd.DataFrame(metrics)

        # pull data for each metric
        assert metrics_df.metric.is_unique
        for metric in list(metrics_df.metric.values):
            # Print out the metric we are doing
            print(f"Working on metric: {metric}.")

            # Skip the metric if we know we don't have access
            if metric in unavailable_metrics:
                continue

            # form dataframe of the different frequency options for this metric
            metric_options_df = pd.DataFrame(metrics_df[metrics_df.metric==metric].frequencies.values[0])

            # Set frequency to 1d but report if it does not have a one day option and stop execution
            if '1d' in list(metric_options_df.frequency.values):
                frequency = '1d'
            else:
                print(metric_options_df)
                raise ValueError(f"The metric {metric} does not have a 1d frequency option.")
        
            # Set the start and end time for this frequency
            start_time = metric_options_df[metric_options_df.frequency==frequency].min_time.values[0]
            end_time = metric_options_df[metric_options_df.frequency==frequency].max_time.values[0]

            # Update params for this metric and frequency
            params['metrics'] = metric
            params['frequency'] = frequency
            params['start_time'] = start_time
            params['end_time'] = end_time

            # Make the API request
            response = makeCmApiCall(base_url, endpoint, params)
            if type(response)==str:
                unavailable_metrics.append(response)
                print(f'The metric {metric} is not available for my API key.')
                continue
            data = response.json()['data']
            asset_df = pd.DataFrame(data)

            # Confirm we obtained the expected number of obs
            start_datetime = datetime.fromisoformat(start_time[:-7] + '+00:00')
            end_datetime = datetime.fromisoformat(end_time[:-7]+ '+00:00')
            num_days = (end_datetime - start_datetime).days
            assert asset_df.shape[0] >= (num_days-1), f"Did not obtain expected number of days for {asset} {metric['metric']}"

            # Add data to master dataframe
            asset_results_df = asset_results_df.merge(asset_df, 
                                                      on=['asset', 'time'], 
                                                      how='outer', 
                                                      validate='one_to_one')
            
            # Space out the calls
            time.sleep(5)

        # Add this asset's results to the overall df
        results_df = pd.concat((results_df, asset_results_df))

        # Space out the calls across assets
        time.sleep(20)

    return results_df


In [None]:
if __name__ == "__main__":
    # set parameters
    cmc_assets_fp = "../data/raw/cmc_asset_universe.pkl"
    cm_api_fp = '../../admin/coinmetrics.txt'
    cw_fp = "../data/raw/coinmetrics_cmc_cw.pkl"
    panel_fp = "../data/raw/coinmetrics_panel.pkl"
    base_params = {'api_key': API_KEY}
    base_url = 'https://api.coinmetrics.io/v4/'

    # initialize client class
    with open(cm_api_fp) as f:
        API_KEY = f.readlines()
        API_KEY = API_KEY[0].strip()
    client = CoinMetricsClient(API_KEY)

    # obtain coinmetrics assets
    cw_df = formCoinmetricsAssetUniverse(client, cmc_assets_fp)
    cw_df.to_pickle(cw_fp)
    cm_asset_universe = list(np.unique(cw_df[~cw_df.asset_cm.isnull()].asset_cm.values))

    # pull all metrics for assets in universe
    asset_metrics_df = pullAssetMetrics(client, base_url, cm_asset_universe, base_params)

In [267]:
# TODO
# make sure i got various volume measures and supply / mcap measures
# for assets without reference rate nor priceUSD, see if i can get something else
# for all assets in my universe, find out when they first have an exchange price at a legit exchange to add this data somewhere.


In [None]:
# TODO pull creation date from asset profile
# TODO pull their taxonomy
# TODO: get a list of legit exchanges from them; use list i already have and maybe add some if they have?
# TODO: pull exchange metrics for those so maybe just volume?
# TODO: pull open interest in futures? maybe as macro variable but lets see if i can get for a bunch of assets
# TODO: pull the institutions and associated metrics to see if anything of interest
# TODO: pull bid and ask price for markets of target coins?
# TODO: pull defi balance sheets
# TODO: see if i am missing anything else to pull