In [1]:
import pandas as pd
import numpy as np
import time
from datetime import date
from requests import Session
import json
from dateutil.relativedelta import relativedelta
import json.decoder
from typing import Dict, Any, Optional
import pickle

In [None]:
# TODO REPLACE API CALL WITH COINAPI FUNC
# TODO MERGE ASSET UNIVERSE TO CMC NAMES AND MAKE SURE I MATCH 100%
# TODO PULL ASSET METADATA TO FORM CW; does it include rank or have to pull separately?
# -figure out the initail meta dta pull i did or the day by day one; can i do hourly?
# TODO PULL MCAP AND VOLUME AT 1H FREQ
# TODO PULL MACRO DATA AT ONE HOUR FREQ
# TODO PULL EXCHANGE DATA AT ONE HOUR FREQ


In [2]:
def initiateAPI(base_url: str, API_KEY: str) -> Session:
    """ confirm the cmc api is working for the set api key.

    Args:
        base_url (str): the url for the pro api at cmc. 
    
    Returns:
        session (requests.Session): request class for pinging cmc.
    """
    endpoint = '/v1/key/info'
    headers = {'Accepts': 'application/json',
               'X-CMC_PRO_API_KEY': API_KEY}
    final_url = base_url + endpoint
    session = Session()
    session.headers.update(headers)
    r = session.get(final_url)
    print(r.json())

    return session


In [3]:
def makeCMCApiCall(session: Session, url: str, params: dict, retries: int=3) -> Optional[Dict[str, Any]]:
    """ makes an API call to CoinMarketCap using the provided requests.Session object.
    
    Args:
        session (requests.Session): A requests.Session object that will be used to make the API call.
        url (str): The API endpoint URL to call.
        params (dict): A dictionary of parameters to include in the API call.
        retries (int): The number of times to retry the API call if it fails. Default is 3.
        
    Returns:
        data (dict): the data from the api response, or None if the api call failed.
    """
    for attempt in range(retries):
        response = session.get(url, params=params)
        if response.ok:
            try:
                return response.json()['data']
            except json.decoder.JSONDecodeError as e:
                print(f'Error decoding JSON response: {str(e)}')
        else:
            # There was an error, retry after a short delay
            print(f'The API call failed with status code {response.status_code}, retrying...')
            time.sleep(0.5)
    
    print('The api call failed after 3 attempts.')
    return None

In [4]:
def obtainTopCMCAssets(base_url: str, session: Session, start_date: date, end_date: date) -> list:
    """ obtain the top cmc assets for each month of the study peiod.

    Args:
        base_url (str): The url for the pro api at cmc. 
        session (Session): A requests.Session object that will be used to make the API call.
        start_date (date): A datetime.date object representing the start date for the study period.
        end_date (date): A datetime.date object representing the end date for the study period.
    
    Returns:
        unique_token_cmc_ids (list): unique cmc asset integer ids.
    """
    # specify the dates to obtain
    dates = [start_date]
    current_date = start_date+relativedelta(months=1)
    while current_date <= end_date:
        dates.append(current_date)
        current_date += relativedelta(months=1)

    # set up target url
    endpoint = '/v1/cryptocurrency/listings/historical'
    url = f"{base_url}{endpoint}"

    # obtain the top 500 assets by cmc ranking for each month in the study period
    asset_cmc_ids = []
    for date in dates:
        # set up params for call
        if date.year <= 2016:
            limit = 50
        elif date.year <= 2019:
            limit = 300
        else:
            limit = 500
        params = {'date': date,
                  'limit': limit,
                  'convert': 'USD',
                  'aux': 'cmc_rank'}

        # make the call
        data = makeCMCApiCall(session, url, params)

        # extract the asset ids
        new_assets = [asset['id'] for asset in data]
        asset_cmc_ids.extend(new_assets)

        # space out calls
        time.sleep(1)
        print(date)

    # drop redundant assets
    unique_asset_cmc_ids = list(np.unique(np.array(asset_cmc_ids)))

    return unique_asset_cmc_ids


In [5]:
def formDataframeOfTopCMCAssets(base_url: str, session: Session, cmc_ids: list) -> pd.DataFrame():
    """ pull all cmc meta data for assets and merge onto universe of top assets in cmc_ids.

    Args:
        base_url (str): the url for the pro api at cmc. 
        session (Session): A requests.Session object that will be used to make the API call.
        cmc_ids (list): top assets by cmc ranking.

    Returns:
        cw_df (pd.DataFrame): dataframe of asset meta data for top assets by cmc ranking.
    """

    # set up target url for obtaining mapping from id to asset info
    endpoint = '/v1/cryptocurrency/map'
    url = f"{base_url}{endpoint}"

    # obtain the CMC mapping of IDs to asset info
    full_data = []
    starts = [1, 5001, 10001, 15001]
    for start in starts:
        # set up params for call
        params = {'listing_status': 'active,inactive,untracked',
                  'limit': 5000,
                  'start': start,
                  'aux': 'platform,first_historical_data,last_historical_data'}

        # make the call
        data = makeCMCApiCall(session, url, params)

        # Append the results
        full_data.extend(data)

        # space out calls
        time.sleep(1)

    # clean up asset info dictionaries
    clean_full_data = []
    for asset_dict in full_data:
        new_dict = {}
        new_dict['cmc_id'] = asset_dict['id']
        new_dict['cmc_symbol'] = asset_dict['symbol']
        new_dict['name'] = asset_dict['name']
        new_dict['cmc_slug'] = asset_dict['slug']
        try:
            new_dict['cmc_first_date'] = asset_dict['first_historical_data']
            new_dict['cmc_last_date'] = asset_dict['last_historical_data']
        except KeyError:
            new_dict['cmc_first_date'] = None
            new_dict['cmc_last_date'] = None
        if asset_dict['platform'] != None:
            new_dict['platform_cmc_slug'] = asset_dict['platform']['slug']
        else:
            new_dict['platform_cmc_slug'] = None
        clean_full_data.append(new_dict)

    cmc_assets_df = pd.DataFrame(clean_full_data)

    # Merge down to just the assets of interest
    target_assets_df = pd.DataFrame(data = {'cmc_id': cmc_ids})
    cw_df = cmc_assets_df.merge(target_assets_df,
                                on='cmc_id',
                                how='inner',
                                validate='one_to_one')

    # reset index and sort
    cw_df = cw_df.sort_values(by='cmc_id', ignore_index=True)

    return cw_df

In [6]:
def pullPriceMcapVolume(base_url: str, session: Session, 
        cw_df: pd.DataFrame, start_date: date, end_date: date) -> pd.DataFrame:
    """ pulls historical price, volume, and mcap data for asset ids in cw_df.
    
    Args:
        base_url (str): The base URL for the CoinMarketCap API.
        session (requests.Session): A requests.Session object to be used to make the API calls.
        cw_df (pd.DataFrame): A pandas DataFrame that contains information about the assets to 
                                 retrieve data for. Must include columns 'cmc_id' and 'cmc_slug'.
        start_date (date): A datetime.date object representing the start date for the study period.
        end_date (date): A datetime.date object representing the end date for the study period.
        
    Returns:
        df (pd.DataFrame): price, volume, and mcap for target assets within specified date range. 
                           The DataFrame has columns 'cmc_id', 'date', 'usd_per_asset', 'usd_mcap',
                           and 'usd_volume_24h'.
    """
    # initialize list to build
    cw_dfs = []

    # set up target url
    endpoint = '/v1/cryptocurrency/quotes/historical'
    url = f"{base_url}{endpoint}"

    # loop over assets
    asset_ids = list(cw_df.cmc_id.values)
    asset_names = list(cw_df.cmc_slug.values)
    for i, (asset_id, asset_name) in enumerate(zip(asset_ids, asset_names)):
        # monitor progress
        print(f"Processing the {i+1}th asset ({(i+1)/len(asset_ids)*100:.2f}%): {asset_name}")

        # build parameters
        params = {'id': str(asset_id),
                  'time_start': start_date.strftime('%Y-%m-%d'),
                  'time_end': end_date.strftime('%Y-%m-%d'),
                  'count': 1,
                  'interval': '1d',
                  'convert': 'USD'} 
        
        # make the api call
        data = makeCMCApiCall(session, url, params, retries=3)

        # clean the data
        if data != None:
            if data['is_fiat'] == 0:
                asset_quote_dict_list = []
                for quote in data['quotes']:
                    new_dict = {}
                    new_dict['date']           = quote['quote']['USD']['timestamp'][:10]
                    new_dict['usd_per_asset']  = quote['quote']['USD']['price']
                    new_dict['usd_volume_24h'] = quote['quote']['USD']['volume_24h']
                    new_dict['usd_mcap']       = quote['quote']['USD']['market_cap']
                    asset_quote_dict_list.append(new_dict)

                cw_df = pd.DataFrame(asset_quote_dict_list)
                cw_df['cmc_id'] = data['id']
                cw_dfs.append(cw_df)
            else:
                print(f"{data['name']} is fiat")        

        # space out calls
        time.sleep(0.2)

    # build final dataframe
    df = pd.concat(cw_dfs)

    return df            

In [7]:
def initialCleanAssetMetadata(cw_df: pd.DataFrame, column_map: dict, dropna: bool = True) -> pd.DataFrame:
    """ Clean asset metadata to return cleaned dataframe. 

    Args:
        cw_df (pd.DataFrame): DataFrame containing asset metadata to be cleaned.
        column_map (dict): a mapping of the current column names to the desired column names.
        dropna (bool): whether to drop any rows with missing values in key columns.
    
    Returns:
        cw_df (pd.DataFrame): cleaned DataFrame.
    """
    # confirm has required columns
    for k, v in column_map.items():
        if k not in cw_df.columns:
            raise ValueError(f"Input DataFrame must contain '{k}' column.")
        
    # apply column map renaming
    cw_df = cw_df.rename(columns=column_map)

    # subset to useful columns
    cw_df = cw_df[['cmc_id', 'slug_cmc', 'symbol_cmc', 'first_date_cmc', 'last_date_cmc']]

    # convert date columns to date type
    cw_df['first_date_cmc'] = pd.to_datetime(cw_df.first_date_cmc, format='%Y-%m-%d', utc=False)
    cw_df['last_date_cmc'] = pd.to_datetime(cw_df.last_date_cmc, format='%Y-%m-%d', utc=False)

    # drop rows with missing values in key columns
    if dropna:
        cw_df = cw_df.dropna(subset=['cmc_id', 'slug_cmc'])
    
    # assert that each row has a unique `cmc_id` and `slug_cmc` value (if desired)
    if len(cw_df) != len(cw_df['cmc_id'].unique()):
        raise ValueError("Input DataFrame has non-unique 'cmc_id' values.")
    if len(cw_df) != len(cw_df['slug_cmc'].unique()):
        raise ValueError("Input DataFrame has non-unique 'slug_cmc' values.")
    
    # sort values and reset index
    cw_df = cw_df.sort_values(by='cmc_id', ignore_index=True)

    return cw_df
    


In [8]:
def initialCleanPanel(panel_df: pd.DataFrame, start_year: int=2015, end_date: str='2023-02-02', ) -> pd.DataFrame:
    """ clean panel of cmc prices, volume, and mcap data.
     
    Args:
        panel_df (pandas.DataFrame): panel data to clean.
        start_year (int): the minimum year to include in the DataFrame (default: 2015).
        end_date (str): the maximum date (inclusive) to include in the DataFrame (default: '2023-02-02').

    Returns:
        (pd.DataFrame): The cleaned DataFrame.
    """
    # confirm has the right columns
    expected_cols = ['date', 'cmc_id', 'usd_per_token', 'usd_mcap', 'usd_volume_24h']
    if not all(col in panel_df.columns for col in expected_cols):
        raise ValueError(f"Missing expected columns: {expected_cols}")
    
    # rename columns to standard convention (with data source name in it)
    panel_df = panel_df.rename(columns = {'usd_per_token': 'usd_per_token_cmc',
                                          'usd_mcap': 'usd_mcap_cmc',
                                          'usd_volume_24h': 'usd_volume_24h_cmc'})

    # convert columns to correct data type
    panel_df['date'] = pd.to_datetime(panel_df.date, format='%Y-%m-%d', utc=False)

    # set column order
    panel_df = panel_df[['date', 'cmc_id', 'usd_per_token_cmc', 'usd_mcap_cmc', 'usd_volume_24h_cmc']]

    # drop rows
    panel_df = panel_df[(panel_df.date.dt.year >= 2015) & (panel_df.date <= '2023-02-02')]
    panel_df = panel_df.dropna(how='any', subset=['date', 'cmc_id'])
    panel_df = panel_df.dropna(how='all', subset=['usd_per_token_cmc', 'usd_mcap_cmc', 'usd_volume_24h_cmc'])

    # form list of data columns to work with
    data_cols = list(panel_df.columns.values)
    data_cols.remove('date')
    data_cols.remove('cmc_id')

    # set negative values to missing and too large values to missing
    for col in data_cols:
        panel_df.loc[panel_df[col] < 0, col] = np.nan
        panel_df.loc[panel_df[col] > 2e12, col] = np.nan

    # drop duplicated rows across id columns
    panel_df = panel_df.drop_duplicates(subset=['date', 'cmc_id'])

    # sort values and reset index
    panel_df = panel_df.sort_values(by=['date', 'cmc_id'], 
                                    ignore_index=True)

    return panel_df

In [14]:
def pullCMCMacro(base_url: str, session: Session, start_date: date, end_date: date) -> pd.DataFrame:
    """
    Args:
        base_url (str): The base URL for the CoinMarketCap API.
        session (requests.Session): A requests.Session object to be used to make the API calls.
        start_window (date): A datetime.date object representing the start date for the study period.
        end_date (date): A datetime.date object representing the end date for the study period.

    Returns:
        macro_df (pd.DataFrame): time series data of cmc macro covariates.
    """
        
    # set up the call
    endpoint = '/v1/global-metrics/quotes/historical'
    url      = f"{base_url}{endpoint}"
    params = {'time_start': start_date.strftime('%Y-%m-%d'),
              'time_end': end_date.strftime('%Y-%m-%d'),
              'count': 10,
              'interval': '1d',
              'convert': 'USD',
              'aux': 'btc_dominance,active_cryptocurrencies,active_exchanges,active_market_pairs,total_volume_24h,altcoin_market_cap,altcoin_volume_24h'}

    # make the call
    data = makeCMCApiCall(session, url, params, retries=3)

    # initialize dictionary for the data
    cmc_macro_dict = {'date': [],
                      'total_market_cap': [],
                      'total_volume_24h': [],
                      'altcoin_market_cap': [],
                      'altcoin_volume_24h': [],
                      'btc_dominance': [],
                      'active_cryptocurrencies': [],
                      'active_exchanges': [],
                      'active_market_pairs': []}

    # convert JSON into dictionary
    for days_data in data['quotes']:
        cmc_macro_dict['date'].append(days_data['timestamp'])
        cmc_macro_dict['total_market_cap'].append(days_data['quote']['USD']['total_market_cap'])
        cmc_macro_dict['total_volume_24h'].append(days_data['quote']['USD']['total_volume_24h'])
        cmc_macro_dict['altcoin_market_cap'].append(days_data['quote']['USD']['altcoin_market_cap'])
        cmc_macro_dict['altcoin_volume_24h'].append(days_data['quote']['USD']['altcoin_volume_24h'])
        cmc_macro_dict['btc_dominance'].append(days_data['btc_dominance'])
        cmc_macro_dict['active_cryptocurrencies'].append(days_data['active_cryptocurrencies'])
        cmc_macro_dict['active_exchanges'].append(days_data['active_exchanges'])
        cmc_macro_dict['active_market_pairs'].append(days_data['active_market_pairs'])

    # clean up the dataframe to have all study period dates and interpolate missing dates
    macro_df = pd.DataFrame(cmc_macro_dict)
    macro_df['date'] = pd.to_datetime(macro_df.date).dt.ceil('D')
    macro_df['date'] = macro_df.date.dt.strftime('%Y-%m-%d')
    macro_df['date'] = pd.to_datetime(macro_df.date, format='%Y-%m-%d', utc=False)

    return macro_df

In [15]:
def pullCMCExchangeHistoricalData(base_url: str, session: Session, start_date: date, end_date: date) -> pd.DataFrame:
    """
    Args:
        base_url (str): The base URL for the CoinMarketCap API.
        session (requests.Session): A requests.Session object to be used to make the API calls.
        start_window (date): A datetime.date object representing the start date for the study period.
        end_date (date): A datetime.date object representing the end date for the study period.

    Returns:
        ex_df (pd.DataFrame): panel data frame of exchange covariates.
    """
    # obtain exchange mapping ids
    endpoint = '/v1/exchange/map'
    url      = f"{base_url}{endpoint}"
    params   = {'listing_status': 'active',
                'limit': 250,
                'sort': "volume_24h"}
    data = makeCMCApiCall(session, url, params, retries=3)

    # subset down to exchanges of interest
    exchanges_dict = {'exchange_slug': [],
                    'exchange_id': []}
    exchanges_to_keep = ['poloniex', 'kraken', 'bitfinex', 'okcoin', 'coinbase-exchange', 'gemini', 'kucoin', 'ftx', 'ftx-us', 'binance-us', 'huobi', 'bitmex',
                        'uniswap-v3', 'dydx', 'pancakeswap-v2', 'uniswap-v2', 'sushiswap', 'curve-finance', 'balancer-v2', 'bancor-network']
    for ex_data in data:
        if ex_data['slug'] in exchanges_to_keep:
            exchanges_dict['exchange_slug'].append(ex_data['slug'])
            exchanges_dict['exchange_id'].append(ex_data['id'])
    exchanges_df = pd.DataFrame(exchanges_dict)

    # obtain metadata for exchanges
    exchange_ids = ','.join([str(ex_id)for ex_id in exchanges_df.exchange_id.values])
    endpoint = '/v1/exchange/info'
    url      = f"{base_url}{endpoint}"
    params   = {'id': exchange_ids,
                'aux': 'date_launched'}
    data = makeCMCApiCall(session, url, params, retries=3)

    # extract the metadata of interest
    exchange_metadata_dict = {'exchange_id': [],
                            'exchange_date_launched': []}
    for k, v in data.items():
        exchange_metadata_dict['exchange_id'].append(data[k]['id'])
        exchange_metadata_dict['exchange_date_launched'].append(data[k]['date_launched'])
    exchange_metadata_df = pd.DataFrame(exchange_metadata_dict)
    exchanges_df = exchanges_df.merge(exchange_metadata_df,
                                    on='exchange_id',
                                    how='inner',
                                    validate='one_to_one')

    # manually fix some missing data
    exchanges_df.loc[exchanges_df.exchange_slug=='pancakeswap-v2', 'exchange_date_launched'] = '2021-04-23T00:00:00.000Z'
    exchanges_df.loc[exchanges_df.exchange_slug=='balancer-v2', 'exchange_date_launched'] = '2021-03-31T00:00:00.000Z'
            
    # obtain the exchange historical data
    endpoint = '/v1/exchange/quotes/historical'
    url = f"{base_url}{endpoint}"
    params = {'time_start': start_date.strftime('%Y-%m-%d'),
            'time_end': end_date.strftime('%Y-%m-%d'),
            'interval': '1d',
            'count': 10000,
            'convert': 'USD'}

    # intialize dict for the data
    ex_hist_data_dict = {'exchange_id': [],
                        'date': [],
                        'exchange_volume_24h': [],
                        'num_market_pairs': []}
    exchange_ids = list(exchanges_df.exchange_id.values)

    # extract the exchange historical information
    for exchange_id in exchange_ids: 
        # update id to pull
        params['id'] = exchange_id

        # make the call
        data = makeCMCApiCall(session, url, params, retries=3)

        # extract datat to dict
        for ex_data in data['quotes']:
            ex_hist_data_dict['exchange_id'].append(exchange_id)
            ex_hist_data_dict['date'].append(ex_data['quote']['USD']['timestamp'])
            ex_hist_data_dict['exchange_volume_24h'].append(ex_data['quote']['USD']['volume_24h'])
            ex_hist_data_dict['num_market_pairs'].append(ex_data['num_market_pairs'])
            
        # Sleep
        time.sleep(1)

    # Convert to dataframe
    ex_historical_df = pd.DataFrame(ex_hist_data_dict)

    # format dates and round to midnight in the future
    exchanges_df['exchange_date_launched'] =  pd.to_datetime(exchanges_df['exchange_date_launched'], format='%Y-%m-%d', utc=False).dt.ceil('D')
    ex_historical_df['date'] = pd.to_datetime(ex_historical_df['date'], format='%Y-%m-%d', utc=False).dt.ceil('D')

    # combine and clean data
    ex_df = exchanges_df.merge(ex_historical_df,
                                on='exchange_id',
                                how='inner',
                                validate='one_to_many')
    ex_df = ex_df[['date', 'exchange_slug', 'exchange_date_launched', 'exchange_volume_24h', 'num_market_pairs']]
    assert(0==ex_df[ex_df.exchange_date_launched>ex_df.date].shape[0]),('some exchanges have data before they launched!')
    ex_df = ex_df.rename(columns={'exchange_slug': 'ex_slug_cmc',
                                'exchange_date_launched': 'ex_date_launched_cmc',
                                'exchange_volume_24h': "ex_volume_24h_cmc",
                                'num_market_pairs': 'ex_num_market_pairs_cmc'})

    return ex_df

In [16]:
def pullCMCAssetMetadata(base_url: str, session: Session, start_date: date, end_date: date) -> pd.DataFrame:
    """
    Args:
        base_url (str): The base URL for the CoinMarketCap API.
        session (requests.Session): A requests.Session object to be used to make the API calls.
        start_window (date): A datetime.date object representing the start date for the study period.
        end_date (date): A datetime.date object representing the end date for the study period.

    Returns:
        asset_covars_df (pd.DataFrame): panel data frame of additional asset covariates.
    """
    # set up the call
    dates    = list(pd.date_range(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'), freq='D').strftime('%Y-%m-%d'))
    endpoint = '/v1/cryptocurrency/listings/historical'
    url      = f"{base_url}{endpoint}"
    params   = {'limit': 600,
                'convert': 'USD',
                'sort': 'cmc_rank',
                'sort_dir': 'asc',
                'aux': 'tags,circulating_supply,total_supply,max_supply,cmc_rank,num_market_pairs'}

    # initialize dictionary for the data
    covars_dict = {'date': [],
                'cmc_id': [],
                'num_market_pairs': [],
                'max_supply': [],
                'circulating_supply': [],
                'total_supply': [],
                'cmc_rank': [],
                'tags': []}

    for i in range(len(dates)):
        # update current date to pull
        current_date = dates[i]
        params['date'] = current_date

        # monitor progress
        print(f"Processing {current_date} ({(i+1)/len(dates)*100:.2f}% done).")

        # make the call
        data = makeCMCApiCall(session, url, params, retries=3)

        # add data to dict if in universe
        for asset in data:
            if asset['slug'] in asset_universe_list:
                covars_dict['date'].append(current_date)
                covars_dict['cmc_id'].append(asset['id'])
                covars_dict['num_market_pairs'].append(asset['num_market_pairs'])
                covars_dict['max_supply'].append(asset['max_supply'])
                covars_dict['circulating_supply'].append(asset['circulating_supply'])
                covars_dict['total_supply'].append(asset['total_supply'])
                covars_dict['cmc_rank'].append(asset['cmc_rank'])
                covars_dict['tags'].append(asset['tags'])

        # space out the calls
        time.sleep(0.5)

    # convert to df
    asset_covars_df = pd.DataFrame(covars_dict)

    # clean the columns
    asset_covars_df['date'] = pd.to_datetime(asset_covars_df['date'], format='%Y-%m-%d', utc=False).dt.ceil('D')
    asset_covars_df['date'] = asset_covars_df.date + pd.Timedelta(days=1)

    return asset_covars_df
    

In [17]:
def formFinalPanel(panel_df: pd.DataFrame, asset_covars_df: pd.DataFrame) -> pd.DataFrame:
    """ add the asset covars to the main panel of price, volume, and mcap.
    
    Args:
        panel_df (pd.DataFrame): panel with asset price, trading volume, and mcap.
        asset_covars_df (p.DataFrame): panel with asset metadata.
    
    Returns:
        panel_df (pd.DataFrame): panel with all asset covariates.
    """
    asset_covars_df = asset_covars_df.merge(cw_df[['cmc_id', 'slug_cmc']],
                                        on='cmc_id',
                                        how='left',
                                        validate='many_to_one')
    asset_covars_df = asset_covars_df[asset_covars_df.cmc_id != 3958]
    assert(0==asset_covars_df.slug_cmc.isnull().sum())
    asset_covars_df = asset_covars_df.drop('cmc_id', axis=1)
    panel_df = panel_df.merge(asset_covars_df,
                            on=['date', 'slug_cmc'],
                            validate='one_to_one',
                            how='outer')
    return panel_df

In [18]:
if __name__ == "__main__":
    # set args
    api_fp = '../../admin/cmc.txt'
    start_date = date(2015, 1, 1)
    end_date   = date(2023, 2, 1)
    base_url = "https://pro-api.coinmarketcap.com"
    asset_fp = "../data/raw/cmc_asset_universe.pkl"
    cw_fp    = "../data/raw/cmc_cw.pkl"
    panel_fp = "../data/raw/cmc_price_volume_mcap_panel.pkl"
    ex_fp = "../data/raw/cmc_exchange_panel.pkl"
    macro_fp = "../data/raw/cmc_macro.pkl"
    cw_new_old_col_mapping  = {'cmc_symbol': 'symbol_cmc',
                               'cmc_slug': 'slug_cmc',
                               'cmc_first_date': 'first_date_cmc',
                               'cmc_last_date': 'last_date_cmc'}

    # import api key
    with open(api_fp) as f:
        API_KEY = f.readlines()
        API_KEY = API_KEY[0].strip()
    
    # confirm api is working
    session = initiateAPI(base_url, API_KEY)


{'status': {'timestamp': '2023-03-14T04:00:11.377Z', 'error_code': 0, 'error_message': None, 'elapsed': 137, 'credit_count': 0, 'notice': None}, 'data': {'plan': {'credit_limit_daily': 16666, 'credit_limit_daily_reset': 'In 1 hours, 42 minutes', 'credit_limit_daily_reset_timestamp': '2023-03-14T05:43:08.000Z', 'credit_limit_monthly': 1200000, 'credit_limit_monthly_reset': 'In 25 days, 1 hours, 42 minutes', 'credit_limit_monthly_reset_timestamp': '2023-04-08T05:43:08.000Z', 'rate_limit_minute': 60}, 'usage': {'current_minute': {'requests_made': 0, 'requests_left': 60}, 'current_day': {'credits_used': 0, 'credits_left': 16666}, 'current_month': {'credits_used': 0, 'credits_left': 1200000}}}}


In [19]:
# obtain potential asset ids to include in study
cmc_ids  = obtainTopCMCAssets(base_url, session, start_date, end_date)

2015-01-01
2015-02-01
2015-03-01
2015-04-01
2015-05-01
2015-06-01
2015-07-01
2015-08-01
2015-09-01
2015-10-01
2015-11-01
2015-12-01
2016-01-01
2016-02-01
2016-03-01
2016-04-01
2016-05-01
2016-06-01
2016-07-01
2016-08-01
2016-09-01
2016-10-01
2016-11-01
2016-12-01
2017-01-01
2017-02-01
2017-03-01
2017-04-01
2017-05-01
2017-06-01
2017-07-01
2017-08-01
2017-09-01
2017-10-01
2017-11-01
2017-12-01
2018-01-01
2018-02-01
2018-03-01
2018-04-01
2018-05-01
2018-06-01
2018-07-01
2018-08-01
2018-09-01
The API call failed with status code 400, retrying...
2018-10-01
2018-11-01
2018-12-01
2019-01-01
2019-02-01
2019-03-01
2019-04-01
2019-05-01
2019-06-01
2019-07-01
2019-08-01
2019-09-01
2019-10-01
2019-11-01
2019-12-01
2020-01-01
2020-02-01
2020-03-01
2020-04-01
2020-05-01
2020-06-01
2020-07-01
2020-08-01
2020-09-01
2020-10-01
2020-11-01
2020-12-01
2021-01-01
2021-02-01
2021-03-01
2021-04-01
2021-05-01
2021-06-01
2021-07-01
2021-08-01
2021-09-01
2021-10-01
2021-11-01
2021-12-01
2022-01-01
2022-02-01


In [20]:
cw_df = formDataframeOfTopCMCAssets(base_url, session, cmc_ids)

# obtain price, volume, and mcap data for target assets
panel_df = pullPriceMcapVolume(base_url, session, cw_df, start_date, end_date)

Processing the 1th asset (0.05%): bitcoin
Processing the 2th asset (0.10%): litecoin
Processing the 3th asset (0.15%): namecoin
Processing the 4th asset (0.20%): terracoin
Processing the 5th asset (0.25%): peercoin
Processing the 6th asset (0.30%): novacoin
Processing the 7th asset (0.35%): feathercoin
Processing the 8th asset (0.40%): freicoin
Processing the 9th asset (0.45%): ixcoin
Processing the 10th asset (0.50%): bitbar
Processing the 11th asset (0.54%): worldcoin
Processing the 12th asset (0.59%): yacoin
Processing the 13th asset (0.64%): digitalcoin
Processing the 14th asset (0.69%): goldcoin
Processing the 15th asset (0.74%): bottlecaps
Processing the 16th asset (0.79%): fastcoin
Processing the 17th asset (0.84%): megacoin
Processing the 18th asset (0.89%): infinitecoin
Processing the 19th asset (0.94%): primecoin
Processing the 20th asset (0.99%): anoncoin
Processing the 21th asset (1.04%): casinocoin
Processing the 22th asset (1.09%): bullion
Processing the 23th asset (1.14%

In [None]:


    

    # clean the data
    cw_df = initialCleanAssetMetadata(cw_df, cw_new_old_col_mapping)
    panel_df = initialCleanPanel(panel_df)

    # cut down to initial inclusion criteria so i pull just these across other providers
    panel_df = prepPanelForInitialInclusiveCriteria(panel_df, cw_df)
    asset_universe_dict = buildInitialAssetUniverse(panel_df, start_date, end_date)
    asset_universe_list = determineUniqueAssets(asset_universe_dict)

    # pull remaining cmc data
    asset_covars_df = pullCMCAssetMetadata(base_url, session, start_date, end_date)
    macro_df = pullCMCMacro(base_url, session, start_date, end_date)
    ex_df    = pullCMCExchangeHistoricalData(base_url, session, start_date, end_date)

    # form the final panel
    panel_df = formFinalPanel(panel_df, asset_covars_df)

    # save the data
    cw_df.to_pickle(cw_fp)
    panel_df.to_pickle(panel_fp)
    ex_df.to_pickle(ex_fp)
    macro_df.to_pickle(macro_fp)
    with open(asset_fp, 'wb') as f:
        pickle.dump(asset_universe_dict, f)

In [None]:
# TODO
# -get rid of the session BS
# -convert api function to coinapi one
# -confirm i get cmc ranking from asset metadata and if so then get rid of the pulling top cmc assets function and call
# -confirm i get the mcap and volume from the asset metadata call and then get rid of the price mcap and volume pull
# -pull the asset metadata to form cmc panel
# -pull cmc macro data
# -pull cmc exchange data

In [291]:
# TODO:
# get rid of the universe and all this price and volume business as i will just use from coinapi
# maybe just keep price to cross check for errors? but idc about volume per asset date.

# TODO:
# -repull and make sure i am getting data for all the assets on the mcap, price, and volume

# TODO for script with final universe:
# -adjust the asset meta data pull to also extract the price, mcap, and volume
# --in that, adjust the date to not add a date but just do the round; confirm this is OK
# --in that, take the average value between the two or just the one if one is missing
# ---look at counts of both, spread in diff, when i have one but not the other, and when neither, etc.
