In [12]:
# Import packages
import pandas as pd
import numpy as np
import requests
import time
from datetime import date
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json
from dateutil.relativedelta import relativedelta

In [13]:
def initiateAPI(base_url: str, headers: dict) -> Session:
    """ confirm the cmc api is working for the set api key.

    Args:
        base_url (str): the url for the pro api at cmc.
        headers (dict): parameters for api call.    
    
    Returns:
        session (requests.Session): request class for pinging cmc.
    """
    endpoint = '/v1/key/info'
    final_url = base_url + endpoint
    session = Session()
    session.headers.update(headers)
    r = session.get(final_url)
    print(r.json())

    return session


In [14]:
if __name__ == "__main__":
    # set args
    api_fp = '../../admin/cmc.txt'

    # import api key
    with open(api_fp) as f:
        API_KEY = f.readlines()
        API_KEY = API_KEY[0].strip()
    
    # set up API
    base_url = "https://pro-api.coinmarketcap.com"
    headers = {'Accepts': 'application/json',
            'X-CMC_PRO_API_KEY': API_KEY}

    # confirm api is working
    session = initiateAPI(base_url, headers)

{'status': {'timestamp': '2023-02-17T17:41:49.897Z', 'error_code': 0, 'error_message': None, 'elapsed': 4, 'credit_count': 0, 'notice': None}, 'data': {'plan': {'credit_limit_daily': 16666, 'credit_limit_daily_reset': 'In 21 hours, 54 minutes', 'credit_limit_daily_reset_timestamp': '2023-02-18T15:36:12.000Z', 'credit_limit_monthly': 500000, 'credit_limit_monthly_reset': 'In 26 days, 21 hours, 54 minutes', 'credit_limit_monthly_reset_timestamp': '2023-03-16T15:36:12.000Z', 'rate_limit_minute': 60}, 'usage': {'current_minute': {'requests_made': 0, 'requests_left': 60}, 'current_day': {'credits_used': 0, 'credits_left': 16666}, 'current_month': {'credits_used': 0, 'credits_left': 500000}}}}


## (1) Determine universe of tokens

In [19]:
# Specify the dates to obtain
dates = ['2015-10-01']


In [20]:
# Obtain top 5000 tokens for each quarter in study period
token_cmc_ids = []
for i in range(0,len(dates)):
    date = dates[i]

    # Set up the call
    endpoint = '/v1/cryptocurrency/listings/historical'
    final_url = base_url+endpoint
    parameters = {'date': date,
                  'limit': 5000,
                  'convert': 'USD',
                  'aux': 'cmc_rank'}

    # Make the call
    try:
        response = session.get(final_url, params=parameters)
        assert json.loads(response.text)['status']['error_message'] == None
        data = json.loads(response.text)
    except (ConnectionError, Timeout, TooManyRedirects) as e:
        print(e)
        print('\n')

    new_tokens = [token['id'] for token in data['data']]
    token_cmc_ids.extend(new_tokens)


In [21]:
len(token_cmc_ids)

599

In [494]:
# Drop redundant tokens
unique_token_cmc_ids = np.unique(np.array(token_cmc_ids))

In [495]:
# Obtain the CMC mapping of IDs to tokens
full_data = []
starts = [1, 5001, 10001]

for start in starts:
    # Set up the call
    endpoint = '/v1/cryptocurrency/map'
    final_url = base_url+endpoint
    parameters = {'listing_status': 'active,inactive,untracked',
                  'limit': 5000,
                  'start': start,
                  'aux': 'platform,first_historical_data,last_historical_data'}

    # Make the call
    try:
        response = session.get(final_url, params=parameters)
        assert json.loads(response.text)['status']['error_message'] == None
        data = json.loads(response.text)['data']
    except (ConnectionError, Timeout, TooManyRedirects) as e:
        print(e)

    # Append the results
    full_data.extend(data)

In [496]:
# Clean up the dictionary of data
clean_full_data = []
for token_dict in full_data:
    new_dict = {}
    new_dict['cmc_id'] = token_dict['id']
    new_dict['cmc_symbol'] = token_dict['symbol']
    new_dict['name'] = token_dict['name']
    new_dict['cmc_slug'] = token_dict['slug']
    try:
        new_dict['cmc_first_date'] = token_dict['first_historical_data']
        new_dict['cmc_last_date'] = token_dict['last_historical_data']
    except KeyError:
        new_dict['cmc_first_date'] = None
        new_dict['cmc_last_date'] = None
    if token_dict['platform'] != None:
        new_dict['platform_cmc_slug'] = token_dict['platform']['slug']
    else:
        new_dict['platform_cmc_slug'] = None
    clean_full_data.append(new_dict)

cmc_tokens_df = pd.DataFrame(clean_full_data)

In [497]:
# Merge down to just the tokens of interest
target_tokens_df = pd.DataFrame(data = {'cmc_id': unique_token_cmc_ids})
final_df = cmc_tokens_df.merge(target_tokens_df,
                               on='cmc_id',
                               how='inner',
                               validate='one_to_one')

In [498]:
# Remove tokens that CMC does not have data for
cmc_ids_to_remove = [76]
for cmc_id in cmc_ids_to_remove:
    final_df = final_df[final_df.cmc_id != cmc_id]

In [499]:
final_df

Unnamed: 0,cmc_id,cmc_symbol,name,cmc_slug,cmc_first_date,cmc_last_date,platform_cmc_slug
0,1,BTC,Bitcoin,bitcoin,2013-04-28T18:47:21.000Z,2022-02-11T01:49:00.000Z,
1,2,LTC,Litecoin,litecoin,2013-04-28T18:47:22.000Z,2022-02-11T01:49:00.000Z,
2,3,NMC,Namecoin,namecoin,2013-04-28T18:47:22.000Z,2022-02-11T01:49:00.000Z,
3,4,TRC,Terracoin,terracoin,2013-04-28T18:47:22.000Z,2022-02-11T01:49:00.000Z,
4,5,PPC,Peercoin,peercoin,2013-04-28T18:47:23.000Z,2022-02-11T01:49:00.000Z,
...,...,...,...,...,...,...,...
8940,12243,RDOGE,Doge Racing,doge-racing,2021-10-01T10:15:17.000Z,2021-10-17T18:20:09.000Z,bnb
8941,12244,BARREL,BARREL,barrel,2021-10-01T13:39:21.000Z,2021-10-15T18:19:09.000Z,bnb
8942,12249,HEP,Health Potion,health-potion,2021-10-01T14:00:15.000Z,2022-02-11T01:45:00.000Z,bnb
8943,12251,PARA,Parabolic,parabolic,2021-10-01T15:36:29.000Z,2022-02-11T01:46:00.000Z,bnb


## (2) Pull price, market cap, volume data

In [512]:
# Obtain price, volume, and mcap data
# NOTE: THIS IS A HUGE API CALL
token_dfs = []

# Repeat for every token
for token_id in list(final_df.cmc_id.values):
    token_id = str(token_id)
    print(final_df[final_df.cmc_id == int(token_id)].cmc_slug.values[0])

    # Set up the call
    endpoint = '/v1/cryptocurrency/quotes/historical'
    final_url = base_url+endpoint
    parameters = {'id': token_id,
                  'time_start': '2015-01-01',
                  'time_end': '2022-01-07',
                  'count': 1,
                  'interval': '1d',
                  'convert': 'USD'} 

    # Make the call
    try:
        response = session.get(final_url, params=parameters)
        r_json = json.loads(response.text)
        if (r_json['status']['error_message'] == None):
            data = r_json['data']
        elif (r_json['status']['error_message'][:29] == 'Search query is out of range.'):
            continue # skip if the data is not in the range of interest
        else:
            assert(1==0),'json has error'
        
    except (ConnectionError, Timeout, TooManyRedirects) as e:
        print(e)
        print('\n')

    # Add the cleaned up data as a data frame if it is not fiat
    if data['is_fiat'] == 0:
        token_quote_dict_list = []
        for quote in data['quotes']:
            new_dict = {}
            new_dict['date']           = quote['quote']['USD']['timestamp'][:10]
            new_dict['usd_per_token']  = quote['quote']['USD']['price']
            new_dict['usd_volume_24h'] = quote['quote']['USD']['volume_24h']
            new_dict['usd_mcap']       = quote['quote']['USD']['market_cap']
            token_quote_dict_list.append(new_dict)

        token_df = pd.DataFrame(token_quote_dict_list)
        token_df['cmc_id'] = data['id']
        token_dfs.append(token_df)
    else:
        print(data['name'] + ' is fiat')
    
    time.sleep(1)
    print('\n')

bitcoinregular


skymap


dapp-token


cpuchain


ddkoin


gowithmi


safex-cash


zer-dex


binance-gbp-stable-coin


volume-network


coinchase


ultra


multicoin


syntropy


populous-xbrl-token


dynamite


lnx-protocol


ftx-token


pledge-coin


sharetoken


swace


chainx


opennity


oasis-city


xenoverse


wink


birdchain


mb8-coin


equilibria


linktoken


uptrennd


agrocoin


eminer


bosagora


general-attention-currency


1million-token


mcashchain


crex-token


gomoney


ferrum-network


yobit-token


lighthouse-token


helpico


membrana


3dcoin


asgard


everitoken


bitcloud-pro


custody-token


planet


citex-token


ccuniverse


enecuum


blacknet


blockium


simone


findora


zeuxcoin


deepcloud-ai


stellar-classic


cryptobonusmiles


hintchain


cryptoads-marketplace


klaytn


bitball-treasure


spice


aitheon


sucrecoin


ultralpha


bitcanna


ritocoin


monarch


newyork-exchange


gatetoken


taklimakan-network


sessia


cocos-bcx


defi


d



bigbang-core


mega-lottery-services-global


ptokens-btc


epic-cash


bizzcoin


entherfound


cartesi


lbk


usdj


helix


metaprediction


beer-money


wibx


kardiachain


trexcoin


two-prime-ff1-token


3x-short-bitcoin-sv-token


3x-long-bitcoin-sv-token


3x-short-litecoin-token


3x-long-litecoin-token


3x-short-tezos-token


3x-long-tezos-token


alchemy


3x-short-bitcoin-cash-token


3x-long-bitcoin-cash-token


isiklar-coin


amaten


ghost


crdt


ixinium


ghostprism


ecochain


ecosc


uca-coin


bali-coin


basic


counos-x


binance-krw


jack-token


earnbet


just


sudan-gold-coin


lux-bio-cell


etoro-pound-sterling


etoro-new-zealand-dollar


etoro-euro


algory-project


aax-token


affil-coin


blockidcoin


crypto-holding-frank-token


the-transfer-token


loa-protocol


torex


martkist


ezystayz


senso


aragon-court


tnc-coin


becaz


xenon-xen


asyagro


rebit


skinchain


inverse-bitcoin-volatility-token


bankcoin-reserve


atromg8


buzz



ecomi


eska


empow


ethart


fanbi-token


fanzy


fashion-coin


fisco


force-for-fast


gbrick


funkeypay


globaltrustfund-token


hamebi-token


prepayway


heartnumber


super-running-coin


hemelios


loanburst


peerex


hiblocks


honestcoin


idcm-token


infinite-ricks


inmaxcoin


fxpay


galaxy-pool-coin


intelligence-quickly-chain


jur


just-network


kaleido


kevacoin


khipu-token


itam-games


lovehearts


portal


metacoin


threefold


tronbetdice


cbdao


rmpl


aquila-protocol


halving-coin


kulupu


troneuroperewardcoin


strong


aludra-network


hopr


machix


ndau


universalenergychain


mangochain


ludos


ripple-alpha


nantrade


thc


roonex


mandi-token


smartxone


near-protocol


mantra-dao


rio-defi


curve-dao-token


yamv1


showcase


relevant


happy-birthday-coin


barter


tw-token


web-coin-pay


degenerate-platform


frens-community


waifu-token


gamercoin


money-party


xrpalike-gene


orbyt-token


vig


zensports


ti



florin


piedao-defi-small-cap


polyient-games-governance-token


yeafinance


zero-utility-token


yfos-finance


neutrino-system-base-token


youcash


stream-protocol


xcredit


darkbuild


dexe


lync-network


owl-token-stealthswap


yfpro-finance


easyfi


defi-insurance-protocol


conflux-network


fudfinance


index-cooperative


piedao-defi-large-cap


vera-cruz-coin


wrapped-origin-axie


yfscience


almace-shards


hauteclere-shards


kauri-crypto


qoober


portion


centaur


crypxie


fryworld


jiviz


reflex


thirm-protocol


yearn-finance-infrastructure-labs


yfedfinance


bfis-finance


fridn


pop-network-token


usdl


topcoinfx


stabilize


snowswap


power-index-pool-token


mover


bittiger


sup


doki-doki-finance


dogeswap


seal-finance


limitswap


dracula-token


cofix


acoconut


bfine


dandy-dego


decraft-finance


spaceswap


non-fungible-yearn


shake


talent-token


atari-token


moon


tavittcoin


coreto


global-gaming


yfi-managemen



node-runners


spartan


trinity-protocol


buy-sell


renfil


predictz


rizen-coin


lido-dao


mirrored-apple


spiderdao


mirrored-google


mirrored-tesla


mirrored-netflix


mirrored-alibaba


natural-farm-union-protocol


davincij15-token


mediconnectuk


piranhas


fnk-wallet


mirrored-amazon


mirrored-microsoft


mirrored-twitter


n3rd-finance


defiato


hland-token


tetris


scifi-finance


mirrored-ishares-gold-trust


mirrored-invesco-qqq-trust


mirrored-ishares-silver-trust


mirrored-united-states-oil-fund


mirrored-proshares-vix-short-term-futures-etf


oxygen


nftlootbox


governance-zil


absorber-protocol


cryptonits


biopassport-token


grom


yvs-finance


terra-virtua-kolect


wallstreet-finance


reflector-finance


r34p


refract


mahadao


adappter-token


apy-vision


cybertronchain


first-interchangeable-token


everyonescrypto


torn


filup


fildown


sushiup


xlmdown


xlmup


union-protocol-governance-token


anrkey-x


binance-vnd


gol



cad-coin


dextf-protocol


shrimp-capital


slime-finance


minty-art


blank-wallet


konomi-network


ruler-protocol


degen-index


nft-index


siren


ares-protocol


fusible


playcent


alpaca-finance


big-data-protocol


etha-lend


balpha


pando


keytango


space-cow-boy


taraxa


convergence


oddz


illuvium


inverse-finance


wealthlocks


bonkey-dapp


bogged-finance


yearnyfi-network


idavoll-network


utile-network


unique-photo


belt


cryptex


swop


basketcoin


panda-dao


hyruleswap


gspi-governance


pastel


deadsoul


shd-cash


sovi-finance


ainori


toshimon


a2a-50x-com


project-inverse


cheesecakeswap-token


niubiswap


prime-whiterock-company


landbox


srnartgallery


nerve-finance


coralfarm


safemoon


dfuture


zcore-finance


esportspro


decentralized-nations


revelation-finance


veraswap


awsb


myneighboralice


unifund


roxe-cash


meetple


astronaut


gyen


zusd


octree


flourmix


value-set-dollar


biscuit-farm-financ



wavax


mooni-defi


edgecoin


celo-euro


spore


unicly-genesis-collection


unicly-aavegotchi-astronauts-collection


unicly-cryptopunks-collection


unicly-doki-doki-collection


unicly-chris-mccann-collection


unicly-mystic-axies-collection


unicly-hashmasks-collection


wault-finance-new


hope-token


pendle


lympo-market-token


e1337


openbisea


bitcoin-trc20


zookeeper


dopple-finance


ara-blocks


smaugs-nft


etherland


reflexer-ungovernance-token


evolution


enreachdao


pippi-finance


cryptotycoon


naos-finance


lever-token


goztepe-sk-fantoken


universidad-de-chile-fan-token


legia-warsaw-fan-token


fortuna-sittard-fan-token


dfyn-network


cubiex-power


laikacoin


memepad


labracoin


pop


bonfire


media-network


moonmoon


locgame


nft-alley


faraland


polymoon


impermax


greentrust


life-token


crossfi


epik-protocol


anti-lockdown


unitedcrowd


polkarare


nftb


odin-protocol


aurussilver


croxswap


mercurial-finance


peri-



kombai-inu


tegridy


pamp-cc


gravity-finance


safe-energy


bullperks


yuang-coin


dirty-finance


tastenft


kai-inu


safeswap-governance-token


puglife


babyswap


hamster


sheesha-finance-bep20


bscarmy


stellar-invictus-gaming


homecoin


olecoin


at-finance


cribnb-decentralized-renting-and-sharing


human


sarcophagus


black-eye-galaxy


holder-swap


peach-finance


chumhum


feyorra


memex-exchange


apwine-finance


cake-monster


april


cryptex-finance


safepanda


newinu


dacxi


solfarm


ethereumpay


polychart


dappstore


dare-token


karencoin


zild-finance


cold-koala


dao1


ally-direct-token


star-foxx


supremex


alpha-impact


fluity-usd


creator-platform


the-everlasting-parachain


leopard


orion


bogecoin


blackpool


kommunitas


integral


nftyplay


baby-doge-coin


formation-fi


opulous


moonfarm-finance


hodooi


dina


green-shiba-inu-new


fat-doge


eject


2crazynft


u-coin


torum


hollaex-token


volatility-prot



intersola


cardwallet


relay-token


goldex-token


sherpa


epochtoken


ydragon


mobius-finance


forest-knight


soccerhub


sphynx-network


kampay


vimworld


eloin


storage-area-network-anywhere


banketh


ysl-sysl


nobility-new


os


block-commerce-protocol


immutable


delta-theta


mate


civilization


radio-caca


nuketoken


identity


adapad


nftlaunch


sos-foundation


moonie-nft


wagyuswap


bzzone


art-blocks-curated-full-set


niftynft


dragon-slayer


alinx


polkaparty


grimtoken


robodoge-coin


cardswap


paribus


feisty-doge-nft


duel-network


robofi


metaverse-miner


cake-bank


1doge


bigfoot


dogecoin-2


my-pet-social


buni-universal-reward


somee-social


nftpad


cropperfinance


hibiki-finance


moon-rabbit


yucreat


green-climate-world


joe


kaiken-shiba


honeyfarm-bear


elements-game


laser-eyes


flourish-coin


bored-museum


pocket-doge


big-digital-shares


tarot


pinkslip-finance


binemon


ceres


qubit


yield-y



lucky-lion


torii-finance


bingus-network


owldao


zeropay-finance


mars4


crypto-cars-world


grape-network


doge-racing


barrel


health-potion


parabolic


bombcrypto




In [516]:
df = pd.concat(token_dfs)

## (3) Pull other data

In [531]:
# OBTAIN CMC COVARIATES AT DAILY LEVEL FOR ALL TOKENS
# NOTE: THIS TAKES 40K CREDITS AND ABOUT 60 MINUTES!

# Form list of strings of all dates in study period
dates = list(pd.date_range('2015-01-01', '2022-01-07', freq='D').strftime('%Y-%m-%d'))
             
# Initialize dictionary for the data
cmc_covars_dict = {'date': [],
                   'cmc_id': [],
                   'num_market_pairs': [],
                   'max_supply': [],
                   'circulating_supply': [],
                   'total_supply': [],
                   'cmc_rank': [],
                   'tags': []}

for date in dates: 
    # Update where we are
    print(date)
    print('\n')
    
    # Set up the call
    endpoint = '/v1/cryptocurrency/listings/historical'
    final_url = base_url+endpoint
    parameters = {'date': date,
                  'limit': 5000,
                  'convert': 'USD',
                  'aux': 'tags,circulating_supply,total_supply,max_supply,cmc_rank,num_market_pairs'}

    # Make the call
    nb_tries = 3
    while True:
        nb_tries -= 1
        try:
            response = session.get(final_url, params=parameters)
            r_json = json.loads(response.text)
            if (r_json['status']['error_message'] == None):
                break
            elif (r_json['status']['error_message'][:29] == 'Search query is out of range.'):
                print('error due to out of range')
                time.sleep(1)
                if nb_tries <= 0:
                    assert(1==0),'out of range error occured several times'
            else:
                assert(1==0),'json has error'

        except (ConnectionError, Timeout, TooManyRedirects) as err:
            if nb_tries <= 0:
                raise err
            else:
                print('error due to connection, timeout, or redirect')
                time.sleep(1)

    # Add the data for that day to the dictionary
    for token in r_json['data']:
        cmc_covars_dict['date'].append(date)
        cmc_covars_dict['cmc_id'].append(token['id'])
        cmc_covars_dict['num_market_pairs'].append(token['num_market_pairs'])
        cmc_covars_dict['max_supply'].append(token['max_supply'])
        cmc_covars_dict['circulating_supply'].append(token['circulating_supply'])
        cmc_covars_dict['total_supply'].append(token['total_supply'])
        cmc_covars_dict['cmc_rank'].append(token['cmc_rank'])
        cmc_covars_dict['tags'].append(token['tags'])

    # Delay next call to not break limits
    time.sleep(1)
    

2020-09-22


2020-09-23


2020-09-24


2020-09-25


2020-09-26


2020-09-27


2020-09-28


2020-09-29


2020-09-30


2020-10-01


2020-10-02


2020-10-03


2020-10-04


2020-10-05


2020-10-06


2020-10-07


2020-10-08


2020-10-09


2020-10-10


2020-10-11


2020-10-12


2020-10-13


2020-10-14


2020-10-15


2020-10-16


2020-10-17


2020-10-18


2020-10-19


2020-10-20


2020-10-21


2020-10-22


2020-10-23


2020-10-24


2020-10-25


2020-10-26


2020-10-27


2020-10-28


2020-10-29


2020-10-30


2020-10-31


2020-11-01


2020-11-02


2020-11-03


2020-11-04


2020-11-05


2020-11-06


2020-11-07


2020-11-08


2020-11-09


2020-11-10


2020-11-11


2020-11-12


2020-11-13


2020-11-14


2020-11-15


2020-11-16


2020-11-17


2020-11-18


2020-11-19


2020-11-20


2020-11-21


2020-11-22


2020-11-23


2020-11-24


2020-11-25


2020-11-26


2020-11-27


2020-11-28


2020-11-29


2020-11-30


2020-12-01


2020-12-02


2020-12-03


2020-12-04


2020-12-05


2020-12-06


2020-12-07



In [532]:
# Convert data into a df
cmc_covars_df = pd.DataFrame(cmc_covars_dict)

In [533]:
# Subset data down to IDs common in the two different pulls of top tokens
unique_ids_1 = final_df.cmc_id.values
unique_ids_2 = np.unique(cmc_covars_df.cmc_id.values)
unique_ids_common = list(set(unique_ids_2).intersection(set(unique_ids_1)))
cmc_covars_df = cmc_covars_df[cmc_covars_df.cmc_id.isin(unique_ids_common)]
final_df = final_df[final_df.cmc_id.isin(unique_ids_common)]

In [534]:
# OBTAIN GLOBAL COINMARKETCAP DATA

# Set up the call
endpoint = '/v1/global-metrics/quotes/historical'
final_url = base_url+endpoint
parameters = {'time_start': '2014-12-30',
              'time_end': '2022-01-07',
              'count': 10,
              'interval': '1d',
              'convert': 'USD',
              'aux': 'btc_dominance,active_cryptocurrencies,active_exchanges,active_market_pairs,total_volume_24h,total_volume_24h_reported,altcoin_market_cap,altcoin_volume_24h,altcoin_volume_24h_reported'}

response = session.get(final_url, params=parameters)
r_json = json.loads(response.text)

# Initialize dictionary for the data
cmc_macro_dict = {'date': [],
                  'total_market_cap': [],
                  'total_volume_24h': [],
                  'total_volume_24h_reported': [],
                  'altcoin_market_cap': [],
                  'altcoin_volume_24h': [],
                  'altcoin_volume_24h_reported': [],
                  'btc_dominance': [],
                  'active_cryptocurrencies': [],
                  'active_exchanges': [],
                  'active_market_pairs': []}

# Convert JSON into dictionary
for token in r_json['data']['quotes']:
    cmc_macro_dict['date'].append(token['timestamp'][:10])
    cmc_macro_dict['total_market_cap'].append(token['quote']['USD']['total_market_cap'])
    cmc_macro_dict['total_volume_24h'].append(token['quote']['USD']['total_volume_24h'])
    cmc_macro_dict['total_volume_24h_reported'].append(token['quote']['USD']['total_volume_24h_reported'])
    cmc_macro_dict['altcoin_market_cap'].append(token['quote']['USD']['altcoin_market_cap'])
    cmc_macro_dict['altcoin_volume_24h'].append(token['quote']['USD']['altcoin_volume_24h'])
    cmc_macro_dict['altcoin_volume_24h_reported'].append(token['quote']['USD']['altcoin_volume_24h_reported'])
    cmc_macro_dict['btc_dominance'].append(token['btc_dominance'])
    cmc_macro_dict['active_cryptocurrencies'].append(token['active_cryptocurrencies'])
    cmc_macro_dict['active_exchanges'].append(token['active_exchanges'])
    cmc_macro_dict['active_market_pairs'].append(token['active_market_pairs'])

# Clean up the dataframe to have all study period dates and interpolate missing dates
macro_df = pd.DataFrame(cmc_macro_dict)[1:-6]
macro_df = macro_df[~macro_df.duplicated(keep='last', subset=['date'])] # One duplicated row to drop
dates = dates = list(pd.date_range('2015-01-01', '2021-12-31', freq='D').strftime('%Y-%m-%d'))
dates_df = pd.DataFrame(data = {'date': dates})
macro_df = macro_df.merge(dates_df, 
                          on='date',
                          how='outer',
                          validate='one_to_one')
macro_df['date'] = macro_df['date'].astype('datetime64[ns]')
macro_df = macro_df.sort_values(by='date')
macro_df = macro_df.interpolate()

In [535]:
# DETERMINE RELEVANT EXCHANGES TO PULL HISTORICAL DATA ON

# Set up the call
endpoint = '/v1/exchange/map'
final_url = base_url+endpoint
parameters = {'listing_status': 'active',
              'limit': 500,
              'aux': 'first_historical_data'}

# Make the call
response = session.get(final_url, params=parameters)
r_json = json.loads(response.text)

# Clean it up
exchange_df = pd.concat([pd.DataFrame(exchange, index=[0]) for exchange in r_json['data']])
exchange_df = exchange_df.reset_index(drop=True)
exchange_df = exchange_df.rename(columns = {'id': 'exchange_id',
                                            'slug': 'exchange_slug'})
exchange_df = exchange_df[['exchange_id', 'exchange_slug']]

In [536]:
# OBTAIN METADATA

# Set up the call
exchange_ids = ','.join([str(ex_id)for ex_id in exchange_df.exchange_id.values])
endpoint = '/v1/exchange/info'
final_url = base_url+endpoint
parameters = {'id': exchange_ids,
              'aux': 'date_launched'}

# Make the call
response = session.get(final_url, params=parameters)
r_json = json.loads(response.text)

# Add date launched to the data frame
for key in r_json['data'].keys():
    exchange_df.loc[exchange_df.exchange_id == int(key), 
                    'date_launched'] = r_json['data'][key]['date_launched']

In [537]:
# Dropping exchanges that do not have historical data
exchange_names_to_drop = ['feg-exchange', 'uniswap-v3-arbitrum', 'huckleberry', 
                          'photonswap-finance', 'maiar-exchange', 'katana', 
                          'kine-protocol-polygon', 'bit2me', 'balancer-v2-polygon',
                          'balancer-v2-arbitrum', 'uniswap-v3-polygon', 'tinyman', 
                          'algebra', 'kine-protocol-bsc', 'btcex-exchange']
exchange_df = exchange_df[~exchange_df.exchange_slug.isin(exchange_names_to_drop)]

In [538]:
# OBTAIN EXCHANGE HISTORICAL DATA

ex_hist_data_dict = {'exchange_id': [],
                     'date': [],
                     'exchange_volume_24h': [],
                     'num_market_pairs': []}

# Loop over all exchanges
for exchange_id in exchange_df.exchange_id.values: 
    print(exchange_df[exchange_df.exchange_id == exchange_id]['exchange_slug'].values[0])

    # Set up the call
    endpoint = '/v1/exchange/quotes/historical'
    final_url = base_url+endpoint
    parameters = {'id': exchange_id,
                  'time_start': '2015-01-01',
                  'time_end': '2021-12-31',
                  'interval': '1d',
                  'count': 10000,
                  'convert': 'USD'}

    # Make the call
    response = session.get(final_url, params=parameters)
    r_json = json.loads(response.text)

    # Add the data to the dictionary
    for ex_data in r_json['data']['quotes']:
        ex_hist_data_dict['exchange_id'].append(exchange_id)
        ex_hist_data_dict['date'].append(ex_data['quote']['USD']['timestamp'])
        ex_hist_data_dict['exchange_volume_24h'].append(ex_data['quote']['USD']['volume_24h'])
        ex_hist_data_dict['num_market_pairs'].append(ex_data['num_market_pairs'])
        
    # Sleep
    time.sleep(1)

poloniex
bittrex
kraken
bleutrade
bittylicious
cex-io
bitfinex
hitbtc
exmo
okcoin
indodax
bitstamp
itbit
zaif
therocktrading
coinmate
zonda
coinbase-exchange
bitex-la
bitonic
yobit
huobi-global
litebit
coincheck
liquid
southxchange
bitso
btcbox
coincorner
bitflyer
isx
gemini
dex-trade
exrates
bitmex
independent-reserve
luno
coinone
bisq
korbit
bithumb
lykke-exchange
kuna
mercatox
p2pb2b
tidex
heat-wallet
freiexchange
btc-markets
paribu
btc-alpha
coingi
ripplefox
gatehub
coss
btcturk-pro
stex
waves-exchange
koinim
stellar-decentralized-exchange
buda
btc-trade-ua
localtrade
bitbank
mercado-bitcoin
altcoin-trader
bancor-network
binance
bits-blockchain
tidebit
cryptomarket
okx
gate-io
idex
kucoin
bitcointrade
topbtc
aex
coinfalcon
coinut
satang-pro
zb-com
bigone
lbank
gopax
bibox
coinbene
coinex
upbit
tradeogre
c-patex
crxzone
fatbtc
paymium
ddex
rudex
zebpay
bitbns
unocoin
latoken
crex24
bithesap
cryptonex
cointiger
b2bx
dragonex
hotbit
switcheo
bitforex
kyber-network
coindeal
bitmart
dig

In [539]:
# Convert to dataframe
ex_historical_df = pd.DataFrame(ex_hist_data_dict)

## (4) Save all the data

In [540]:
# Save cmc token id crosswalk
final_df.to_csv('../3-data/raw/cmc_token_universe.csv', index=False)

In [541]:
# Save cmc price volume mcap panel
df.to_csv('../3-data/raw/cmc_price_vol_mcap_panel.csv', index=False)

In [542]:
# Save cmc token covars panel
cmc_covars_df.to_csv('../3-data/raw/cmc_token_covars_panel.csv', index=False)

In [543]:
# Save cmc macro timeseries data
macro_df.to_csv('../3-data/raw/cmc_macro_timeseries.csv', index=False)

In [544]:
# Save cmc exchange covariates
exchange_df.to_csv('../3-data/raw/cmc_exchange_covar.csv', index=False)

In [545]:
# Save cmc exchange panel data
ex_historical_df.to_csv('../3-data/raw/cmc_exchange_panel.csv', index=False)

In [None]:
# MOVE THESE NOTES TO CLEANING

# manually look through it to confirm they are legit tokens
# or maybe give this task to jacob
# or maybe schedule a time to do this with jacob so we 2x the speed

# Lets look to see if the 0.01% mcap rule is good for the entire time period

# Jan 1 2015 - $5B - $500k
# Jan 1 2016 - $7B - $700k
# Jan 1 2017 - $18B - $1.8M
# Jan 1 2018 - $600B - $60M
# Apr 1 2018 - $300B - $30M
# Jul 1 2018 - $250B - $25M
# Jan 1 2019 - $125B - $12M
# Apr 1 2019 - $145B - $14M
# Jul 1 2019 - $330B - $33M
# Oct 1 2019 - $220B - $22M
# Jan 1 2020 - $200B - $20M
# Apr 1 2020 - $175B - $17M
# Jul 1 2020 - $260B - $26M
# Oct 1 2020 - $340B - $34M
# Jan 1 2021 - $770B - $77M
# Apr 1 2021 - $1.9T - $190M
# Jul 1 2021 - $1.4T - $140M
# Oct 1 2021 - $2T - $200M

## (5) Pull only BTC and ETH data

In [6]:
# set parameters
btc_eth_cmc_ids = [1, 1027]
start_date      = '2022-08-10'
#start_date      = '2022-07-01'
from datetime import date
end_date = (date.today() + pd.Timedelta(1, unit="d")).strftime('%Y-%m-%d')

In [7]:
# Obtain price, volume, and mcap data
token_dfs = []

# Repeat for BTC and ETH
for token_id in btc_eth_cmc_ids:
    # Set up the call
    endpoint = '/v1/cryptocurrency/quotes/historical'
    final_url = base_url+endpoint
    token_id = str(token_id)
    parameters = {'id': token_id,
                  'time_start': start_date,
                  'time_end': end_date,
                  'count': 1,
                  'interval': '1d',
                  'convert': 'USD'} 

    # Make the call
    try:
        response = session.get(final_url, params=parameters)
        r_json = json.loads(response.text)
        if (r_json['status']['error_message'] == None):
            data = r_json['data']
        elif (r_json['status']['error_message'][:29] == 'Search query is out of range.'):
            continue # skip if the data is not in the range of interest
        else:
            assert(1==0),'json has error'
        
    except (ConnectionError, Timeout, TooManyRedirects) as e:
        print(e)
        print('\n')

    # Add the cleaned up data as a data frame if it is not fiat
    if data['is_fiat'] == 0:
        token_quote_dict_list = []
        for quote in data['quotes']:
            new_dict = {}
            new_dict['date']           = quote['quote']['USD']['timestamp'][:10]
            new_dict['usd_per_token']  = quote['quote']['USD']['price']
            new_dict['usd_volume_24h'] = quote['quote']['USD']['volume_24h']
            new_dict['usd_mcap']       = quote['quote']['USD']['market_cap']
            token_quote_dict_list.append(new_dict)

        token_df = pd.DataFrame(token_quote_dict_list)
        token_df['cmc_id'] = data['id']
        token_dfs.append(token_df)
    else:
        print(data['name'] + ' is fiat')
    
    time.sleep(1)

df = pd.concat(token_dfs)
df = df.sort_values(by=['date', 'cmc_id'])
df = df[['date', 'cmc_id', 'usd_per_token', 'usd_volume_24h', 'usd_mcap']]

In [8]:
# OBTAIN GLOBAL COINMARKETCAP DATA

# Set up the call
endpoint = '/v1/global-metrics/quotes/historical'
final_url = base_url+endpoint
parameters = {'time_start': start_date,
              'time_end': end_date,
              'count': 5,
              'interval': '1d',
              'convert': 'USD',
              'aux': 'btc_dominance,total_volume_24h,altcoin_market_cap'}

response = session.get(final_url, params=parameters)
r_json = json.loads(response.text)

# Initialize dictionary for the data
cmc_macro_dict = {'date': [],
                  'total_volume_24h': [],
                  'altcoin_market_cap': [],
                  'btc_dominance': []}

# Convert JSON into dictionary
for token in r_json['data']['quotes']:
    cmc_macro_dict['date'].append(token['timestamp'][:10])
    cmc_macro_dict['total_volume_24h'].append(token['quote']['USD']['total_volume_24h'])
    cmc_macro_dict['altcoin_market_cap'].append(token['quote']['USD']['altcoin_market_cap'])
    cmc_macro_dict['btc_dominance'].append(token['btc_dominance'])
    
# Cut out duplicated dates in macro data
macro_df = pd.DataFrame(cmc_macro_dict)
macro_df = macro_df[macro_df.index.isin(macro_df['date'].drop_duplicates().index)]
    
# Clean up the dataframe to have all study period dates and interpolate missing dates
dates = dates = list(pd.date_range(start_date, end_date, freq='D').strftime('%Y-%m-%d'))
dates_df = pd.DataFrame(data = {'date': dates})
macro_df = macro_df.merge(dates_df, 
                          on='date',
                          how='outer',
                          validate='one_to_one')
macro_df['date'] = macro_df['date'].astype('datetime64[ns]')
macro_df = macro_df.sort_values(by='date')
macro_df['date'] = macro_df.date - pd.Timedelta(1, unit="d") # timestamps are midnight of following day so im tech backing up 1 second
macro_df['date'] = macro_df['date'].astype(str)
macro_df = macro_df.dropna()

In [9]:
# OBTAIN CMC COVARIATES AT DAILY LEVEL FOR BTC AND ETH

# Adjust end date to be today
from datetime import date
end_date = date.today().strftime('%Y-%m-%d')
#end_date = '2022-08-13' #TODO ADJUST HERE IF I AM RUNNING BEFORE 5 PM PT LOCAL TIME

# Form list of strings of all dates in study period
dates = list(pd.date_range(start_date, end_date, freq='D').strftime('%Y-%m-%d'))
             
# Initialize dictionary for the data
cmc_covars_dict = {'date': [],
                   'cmc_id': [],
                   'num_market_pairs': [],
                   'circulating_supply': []}

for date in dates:    
    # Update where we are
    print(date)
    print('\n')
    
    # Set up the call
    endpoint = '/v1/cryptocurrency/listings/historical'
    final_url = base_url+endpoint
    parameters = {'date': date,
                  'limit': 5,
                  'convert': 'USD',
                  'aux': 'num_market_pairs,circulating_supply'}

    # Make the call
    nb_tries = 3
    while True:
        nb_tries -= 1
        try:
            response = session.get(final_url, params=parameters)
            r_json = json.loads(response.text)
            if (r_json['status']['error_message'] == None):
                break
            elif (r_json['status']['error_message'][:29] == 'Search query is out of range.'):
                print('error due to out of range')
                time.sleep(1)
                if nb_tries <= 0:
                    assert(1==0),'out of range error occured several times'
            else:
                assert(1==0),'json has error'

        except (ConnectionError, Timeout, TooManyRedirects) as err:
            if nb_tries <= 0:
                raise err
            else:
                print('error due to connection, timeout, or redirect')
                time.sleep(1)

    # Add the data for that day to the dictionary
    for token in r_json['data']:
        cmc_covars_dict['date'].append(date)
        cmc_covars_dict['cmc_id'].append(token['id'])
        cmc_covars_dict['num_market_pairs'].append(token['num_market_pairs'])
        cmc_covars_dict['circulating_supply'].append(token['circulating_supply'])
        
    # Delay next call to not break limits
    time.sleep(1)

# Convert data into a df
cmc_covars_df = pd.DataFrame(cmc_covars_dict)
cmc_covars_df = cmc_covars_df[cmc_covars_df.cmc_id.isin(btc_eth_cmc_ids)]


2022-08-10


2022-08-11


2022-08-12


2022-08-13




In [10]:
# READ IN THE OLD DATA
old_df        = pd.read_csv('../3-data/raw/cmc_price_vol_mcap_btceth-20151201_20220812.csv')
old_covars_df = pd.read_csv('../3-data/raw/cmc_token_covars_btceth-20151201_20220812.csv')
old_macro_df  = pd.read_csv('../3-data/raw/cmc_macro_timeseries_btceth-20151201_20220812.csv')

In [11]:
# COMBINE DATA

# drop dates in the existing data from the new data pulled
last_date_old_main_data  = np.max(old_df.date.values)
df                       = df[df.date>last_date_old_main_data]
last_date_old_macro_data = np.max(old_macro_df.date.values)
macro_df                 = macro_df[macro_df.date>last_date_old_macro_data]
last_date_old_covars_data = np.max(old_covars_df.date.values)
cmc_covars_df             = cmc_covars_df[cmc_covars_df.date>last_date_old_covars_data]

# put data together
df = pd.concat((old_df, df)).reset_index(drop=True)
macro_df = pd.concat((old_macro_df, macro_df)).reset_index(drop=True)
cmc_covars_df = pd.concat((old_covars_df, cmc_covars_df)).reset_index(drop=True)

# sort
df            = df.sort_values(by=['date', 'cmc_id'])
macro_df      = macro_df.sort_values(by=['date'])
cmc_covars_df = cmc_covars_df.sort_values(by=['date', 'cmc_id'])

In [12]:
## SAVE DATA

df.to_csv('../3-data/raw/cmc_price_vol_mcap_btceth.csv', index=False)
cmc_covars_df.to_csv('../3-data/raw/cmc_token_covars_btceth.csv', index=False)
macro_df.to_csv('../3-data/raw/cmc_macro_timeseries_btceth.csv', index=False)

In [None]:
# SAVE DATA TO NEW DATES IF IT IS A NEW MONTH
# df.to_csv('../3-data/raw/cmc_price_vol_mcap_btceth-20151201_20220812.csv', index=False)
# cmc_covars_df.to_csv('../3-data/raw/cmc_token_covars_btceth-20151201_20220812.csv', index=False)
# macro_df.to_csv('../3-data/raw/cmc_macro_timeseries_btceth-20151201_20220812.csv', index=False)