In [1]:
import pandas as pd
import numpy as np
import time
import san
import pickle
from collections import Counter
from typing import Dict, List, Any, Callable
from helper_functions import Helper
import retrying

In [2]:
@retrying.retry(
    wait_exponential_multiplier=1000,
    wait_exponential_max=20000,
    stop_max_attempt_number=3
)
def callSanFunction(san_function: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
    """Calls a function in the 'san' library with retrying.

    Args:
        san_function: The function to call from the 'san' library.
        *args: Positional arguments to pass to the function.
        **kwargs: Keyword arguments to pass to the function.

    Returns:
        The return value of the 'san_function' call.
    """
    try:
        result = san_function(*args, **kwargs)
        return result
    except retrying.RetryError as e:
        print(f"Request failed after {e.last_attempt_time}s of retries.")
        raise
    except retrying.Retrying as r:
        remaining_time = round((r.wait_time - r.elapsed), 2)
        print(f"Request failed. Retrying in {remaining_time}s...")
        raise
    except Exception:
        print("An error occurred while calling the 'san_function'.")
        raise

In [3]:
def formSantimentAssetUniverse(asset_universe: List[str]) -> pd.DataFrame:
    """ form the crosswalk for santiment to coinmetrics assets.

    Args:
        asset_universe (List[str]): Coinmetrics asset IDs.

    Returns:
        df (pd.DataFrame): crosswalk between asset_san and asset_cm with column for category_san.    
    """
    # Set slugs to drop that duplciates
    slugs_to_drop = ['o-balancer', 'p-balancer', 'farmatrust', 'planet', 'plair', 'invacio', 'nftx-hashmasks-index', 
                    'truebit', 'game', 'bonded-finance', 'o-aave', 'bnb-aave', 'p-aave', 'p-chainlink', 
                    'arb-chainlink', 'bnb-chainlink', 'o-chainlink', 'mir-coin', 'p-matic-network', 'bnb-ankr',
                    'bnb-cardano', 'o-perpetual-protocol', 'bnb-mines-of-dalarnia', 'p-quickswap', 
                    'bnb-synthetix-network-token', 'o-synthetix-network-token', 'arb-curve', 'p-uniswap',
                    'bnb-uniswap', 'arb-stargate-finance', 'arb-sushi', 'bnb-sushi', 'bnb-green-metaverse-token', 
                    'bnb-swipe', 'bnb-1inch', 'bnb-chromia', 'bnb-myneighboralice', 'bnb-alpha-finance-lab', 
                    'bnb-avalanche', 'bnb-axie-infinity']

    # specify san endpoint
    san_function = san.get
    san_args = ("projects/all",) 

    # make the call
    result = callSanFunction(san_function, *san_args)

    # convert to dataframe object
    assert type(result) == pd.DataFrame
    df = result.copy()

    # manually add asset that we are missig
    manual_df = df[df.slug.isin(['nano'])][['marketSegment', 'slug']]
    manual_df['symbol_lower'] = 'nano'

    # subset down to matched assets
    df['symbol_lower'] = df.ticker.str.lower()
    df = df[df.symbol_lower.isin(asset_universe)]

    # remove duplicated assets
    df = df[~df.slug.isin(slugs_to_drop)]

    # subset to relevant columns and append other asset(s)
    df = df[['marketSegment', 'slug', 'symbol_lower']]
    df = pd.concat([manual_df, df])

    # rename
    df = df.rename(columns={'marketSegment': 'category_san', 'slug': 'asset_san', 'symbol_lower': 'asset_cm'})

    # confirm full one to one mapping, knowing we are missing orca
    asset_universe.remove('orca')
    assert len(asset_universe) == np.sum(np.unique(df.asset_cm.values) == np.unique(asset_universe))

    # return
    return df[['asset_san', 'asset_cm', 'category_san']].sort_values(by='asset_cm', ignore_index=True)

In [32]:
def formAssetMetricsDicts(san_slug_universe: List[str]) -> tuple:
    """ form dictionaries of asset slugs and associated metrics that we could pull and that we will pull.

    Args:
        san_slug_universe (List[str]): list of strings of Santiment slugs in study universe.
    
    Returns:
        (tuple): dictionaries of asset slugs and associated metrics that we could pull and that we will pull.
    """
    # define metrics of interest at asset level
    asset_metrics_to_include = ['active_addresses_1h', 'active_deposits', 'active_deposits_per_exchange',
        'active_holders_distribution_combined_balance_over_1', 'active_holders_distribution_combined_balance_over_10',
        'active_holders_distribution_combined_balance_over_100', 'active_holders_distribution_combined_balance_over_100k',
        'active_holders_distribution_combined_balance_over_10k', 'active_holders_distribution_combined_balance_over_1M',
        'active_holders_distribution_combined_balance_over_1k', 'active_holders_distribution_combined_balance_total',
        'active_holders_distribution_over_1', 'active_holders_distribution_over_10', 'active_holders_distribution_over_100',
        'active_holders_distribution_over_100k', 'active_holders_distribution_over_10k', 'active_holders_distribution_over_1M',
        'active_holders_distribution_over_1k', 'active_holders_distribution_total', 'active_withdrawals',
        'active_withdrawals_per_exchange', 'age_consumed', 'age_destroyed', 'age_distribution', 'all_known_balance',
        'amount_in_exchange_top_holders', 'amount_in_non_exchange_top_holders', 'amount_in_top_holders',
        'cex_balance', 'cexes_to_defi_flow', 'cexes_to_dex_flow', 'cexes_to_dex_traders_flow', 'cexes_to_traders_flow',
        'cexes_to_whale_flow', 'circulation', 'circulation_1d', 'circulation_2y', 'circulation_30d', 'circulation_365d',
        'circulation_3y', 'circulation_5y', 'circulation_7d', 'circulation_90d', 'community_messages_count_total',
        'daily_active_addresses', 'defi_balance', 'defi_cex_balance', 'defi_dex_balance', 'defi_exchange_balance', 
        'defi_to_cexes_flow', 'defi_to_dex_traders_flow', 'defi_to_dexes_flow', 'defi_to_exchanges_flow', 'defi_to_traders_flow',
        'defi_to_whale_flow', 'deposit_balance', 'deposit_transactions', 'deposit_transactions_per_exchange', 'dev_activity',
        'dev_activity_contributors_count', 'dex_balance', 'dex_cex_balance', 'dex_to_cexes_flow', 'dex_trader_balance',
        'dex_traders_cex_balance', 'dex_traders_defi_balance', 'dex_traders_dex_balance',
        'dex_traders_exchange_balance', 'dex_traders_to_cexes_flow', 'dex_traders_to_defi_flow', 'dex_traders_to_dexes_flow', 
        'dex_traders_to_exchanges_flow', 'dex_traders_to_whale_flow', 'dex_traders_whale_balance', 'dexes_to_defi_flow',
        'dexes_to_dex_traders_flow', 'dexes_to_traders_flow', 'dexes_to_whale_flow', 'dormant_circulation_180d', 
        'dormant_circulation_365d', 'dormant_circulation_90d', 'exchange_balance', 'exchange_inflow', 'exchange_inflow_usd',
        'exchange_outflow', 'exchange_outflow_usd', 'exchanges_to_defi_flow', 'exchanges_to_dex_traders_flow',
        'exchanges_to_genesis_flow', 'exchanges_to_miners_flow', 'exchanges_to_traders_flow', 'exchanges_to_whales_flow',
        'github_activity', 'github_activity_contributors_count',
        'holders_distribution_combined_balance_over_1', 'holders_distribution_combined_balance_over_10',
        'holders_distribution_combined_balance_over_100', 'holders_distribution_combined_balance_over_100k',
        'holders_distribution_combined_balance_over_10k', 'holders_distribution_combined_balance_over_1M',
        'holders_distribution_combined_balance_over_1k', 'holders_distribution_combined_balance_total',
        'holders_distribution_over_1', 'holders_distribution_over_10', 'holders_distribution_over_100',
        'holders_distribution_over_100k', 'holders_distribution_over_10k', 'holders_distribution_over_1M', 
        'holders_distribution_over_1k', 'holders_distribution_total', 'holders_labeled_distribution_total',
        'holders_labeled_negative_distribution_total', 'labeled_to_labeled_flow', 'labeled_to_unlabeled_flow',
        'labelled_exchange_balance_sum', 'labelled_historical_balance', 'labelled_historical_balance_changes',
        'marketcap_usd', 'mean_age', 'mean_dollar_invested_age', 'mean_realized_price_usd',
        'miners_balance', 'miners_exchange_balance', 'miners_to_exchanges_flow', 'mvrv_long_short_diff_usd', 'mvrv_usd',
        'network_growth', 'nft_social_volume', 'nvt', 'nvt_transaction_volume', 'payments_count', 'percent_of_total_supply_in_profit',
        'percent_of_total_supply_on_exchanges', 'price_usd', 'realized_value_usd', 'sentiment_balance_reddit', 'sentiment_balance_total',
        'sentiment_balance_twitter', 'sentiment_balance_twitter_crypto', 'sentiment_negative_reddit', 'sentiment_negative_total',
        'sentiment_negative_twitter', 'sentiment_negative_twitter_crypto', 'sentiment_positive_reddit', 'sentiment_positive_total',
        'sentiment_positive_twitter', 'sentiment_positive_twitter_crypto', 'sentiment_volume_consumed_reddit',
        'sentiment_volume_consumed_total','sentiment_volume_consumed_twitter', 'sentiment_volume_consumed_twitter_crypto', 
        'social_dominance_reddit', 'social_dominance_total', 'social_dominance_twitter', 'social_dominance_twitter_crypto',
        'social_volume_reddit', 'social_volume_total', 'social_volume_twitter', 'social_volume_twitter_crypto', 'stock_to_flow',
        'supply_on_exchanges', 'supply_outside_exchanges', 'total_supply', 'total_supply_in_profit', 'trader_balance',
        'traders_cex_balance', 'traders_defi_balance', 'traders_dex_balance', 'traders_exchange_balance', 'traders_to_cexes_flow',
        'traders_to_defi_flow', 'traders_to_dexes_flow', 'traders_to_exchanges_flow', 'traders_to_whale_flow',
        'traders_whale_balance', 'transaction_volume', 'transactions_count', 'twitter_followers', 'unique_social_volume_total_1h',
        'volume_usd', 'whale_balance', 'whale_cex_balance', 'whale_defi_balance', 'whale_dex_balance', 'whale_to_cexes_flow',
        'whale_to_defi_flow', 'whale_to_dex_traders_flow', 'whale_to_dexes_flow', 'whale_to_traders_flow', 'whales_exchange_balance',
        'whales_to_exchanges_flow', 'withdrawal_balance', 'withdrawal_transactions']

    # specify san endpoint
    san_function = san.available_metrics_for_slug

    # initialize list for the metrics
    assets_metrics_dict = {}

    # loop over all assets to build list of all available metrics
    for i, san_slug in enumerate(san_slug_universe):
        # update progress
        progress = (i + 1) / len(san_slug_universe) * 100
        print(f"Processing asset number #{i+1} ({progress:.2f}%): {san_slug}")

        # make the call
        san_args = (san_slug,) 
        result = callSanFunction(san_function, *san_args)
        if type(result) == list:
            assets_metrics_dict[san_slug] = result
        else:
            print(f"Did not obtain data for asset {san_slug}")
            break
        time.sleep(0.1)

    # form dictionary of assets and metrics to pull
    assets_metrics_to_pull_dict = {}
    for key in assets_metrics_dict:
        assets_metrics_to_pull_dict[key] = [value for value in assets_metrics_dict[key] if value in asset_metrics_to_include]

    # return both what to pull and a master list
    return assets_metrics_to_pull_dict, assets_metrics_dict

In [4]:
if __name__ == "__main__":
    # Set args
    CW_IN_FP = '../data/derived/cm_to_coinapi_cw.pkl'
    ASSET_IN_FP = '../data/clean/asset_universe_dict.pickle'
    PANEL_DAILY_IN_FP = '../data/derived/basic_panel.pkl'
    API_FP = '../../admin/santiment.txt'
    STUDY_START = '2016-07-01'
    STUDY_END = '2023-01-02'
    PANEL_OUT_FP = "../data/raw/san_panel.pkl"
    CW_OUT_FP = '../data/raw/san_coinmetrics_cw.pkl'
    MACRO_OUT_FP = '../data/raw/san_macro.pkl'

    # Import asset universe and cw
    cw_df = pd.read_pickle(CW_IN_FP)
    with open(ASSET_IN_FP, "rb") as f:
        asset_universe_dict = pickle.load(f)
    asset_universe = Helper.findUniqueAssets(asset_universe_dict)

    # import api key and set
    with open(API_FP) as f:
        API_KEY = f.readlines()
        API_KEY = API_KEY[0].strip()
    san.ApiConfig.api_key = API_KEY
    
    # monitor progress
    print(san.api_calls_remaining())

    # Form crosswalk
    san_df = formSantimentAssetUniverse(asset_universe)
    san_df.to_pickle(CW_OUT_FP)
    san_slug_universe = list(san_df.asset_san.values)

    # Form asset metrics dictionary to pull
    assets_metrics_dict_to_pull, assets_metrics_dict = formAssetMetricsDicts(san_slug_universe)
        


{'month_remaining': '599610', 'hour_remaining': '29948', 'minute_remaining': '548'}


In [37]:
assets_metrics_dict_to_pull

{'1inch': ['active_addresses_1h',
  'active_deposits',
  'active_deposits_per_exchange',
  'active_holders_distribution_combined_balance_over_1',
  'active_holders_distribution_combined_balance_over_10',
  'active_holders_distribution_combined_balance_over_100',
  'active_holders_distribution_combined_balance_over_100k',
  'active_holders_distribution_combined_balance_over_10k',
  'active_holders_distribution_combined_balance_over_1M',
  'active_holders_distribution_combined_balance_over_1k',
  'active_holders_distribution_combined_balance_total',
  'active_holders_distribution_over_1',
  'active_holders_distribution_over_10',
  'active_holders_distribution_over_100',
  'active_holders_distribution_over_100k',
  'active_holders_distribution_over_10k',
  'active_holders_distribution_over_1M',
  'active_holders_distribution_over_1k',
  'active_holders_distribution_total',
  'active_withdrawals',
  'active_withdrawals_per_exchange',
  'age_consumed',
  'age_destroyed',
  'age_distribution

In [None]:
# TODO quick check to loop over all my metrics of interest to ensure they all return isAccessible == True

metric_metadata = san.metadata(
    'age_consumed',
    arr=["availableSlugs", "defaultAggregation", "humanReadableName", "isAccessible", "isRestricted", "restrictedFrom", "restrictedTo"]
)


In [60]:
# loop over assets

In [61]:
# obtain metrics for this asset

In [62]:
# obtain start and end date for this asset from my asset universe panel

In [64]:
# pull at hourly level


In [69]:

# try to one hour call for all the yea
df = san.get(
    'age_consumed',
    slug="bitcoin",
    from_date="2016-07-01",
    to_date="2023-01-02",
    interval="1h"
)

# try the one day call if it comes back as none

# continue on if it comes back as none again

In [71]:
# clean up the data, including adding asset column

# append it to the master DataFrame

In [None]:
# scope other scripts for other things to do in ehre!

In [None]:



# TODO figure out all possible macro metrics and cut down to ones of interest

san.available_metrics()


In [None]:
# TODO pull all the macro metrics to for macro timeseries

san.get(
    "daily_active_addresses",
    slug="santiment",
    from_date="2018-06-01",
    to_date="2018-06-05",
    interval="1d"
)


In [None]:

# TODO save the panel
# TODO save the macro data

In [22]:
temp_macro_metrics = ['aave_v2_action_deposits_usd',
'aave_v2_action_liquidations_usd',
'aave_v2_action_new_debt_usd',
'aave_v2_action_repayments_usd',
'aave_v2_total_borrowed_usd',
'aave_v2_total_deposits_usd',
'aave_v2_total_liquidations_usd',
'aave_v2_total_new_debt_usd',
'aave_v2_total_repayments_usd',
'aave_v2_total_supplied_usd',
'aave_v2_stable_borrow_apy',
'aave_v2_supply_apy',
'aave_v2_variable_borrow_apy',
'compound_action_deposits_usd',
'compound_action_liquidations_usd',
'compound_action_new_debt_usd',
'compound_action_repayments_usd',
'compound_total_borrowed_usd',
'compound_total_supplied_usd',
'dai_created',
'dai_repaid',
'mcd_liquidation',
'mcd_collat_ratio',
'mcd_locked_token',
'miners_total_supply',
'total_trade_volume_by_dex',
'compound_total_deposits_usd',
'compound_total_liquidations_usd',
'compound_total_new_debt_usd',
'compound_total_repayments_usd',
'average_fees_usd',
'eth2_roi',
'eth2_stakers_count',
'fees_usd',
'median_fees_usd',
'nft_retail_trade_volume_usd',
'nft_retail_trades_count',
'nft_trade_volume_usd',
'nft_trades_count',
'nft_whale_trade_volume_usd',
'nft_whale_trades_count',
'uniswap_claims_amount',
'uniswap_lp_claims_amount',
'uniswap_total_claims_amount',
'uniswap_total_lp_claims_amount',
'uniswap_total_user_claims_amount',
'uniswap_user_claims_amount',
'dex_volume_in_usd_5m',
'total_assets_issued',
         'usdt_binance_funding_rate',
         'usdt_binance_open_interest',
         'usdt_binance_open_value',
         'usdt_bnb_funding_rates',
         'usdt_bnb_open_interest',
         'usdt_bnb_open_value']

In [126]:
# Build dictionary of macro metrics and corresponding slugs to pull data for

macro_metric_slugs_dict = {}
macro_metric_slugs_dict['mcd_collat_ratio'] = ["wrapped-bitcoin", "gemini-dollar",
                                               "usd-coin", "paxos-standard",
                                               "decentraland", "trueusd",
                                               "yearn-finance", "chainlink",
                                               "weth", "balancer"]
macro_metric_slugs_dict['defi_total_value_locked_usd'] = ['ethereum']
macro_metric_slugs_dict['nft_trade_volume_usd'] = ['ethereum']
macro_metric_slugs_dict['nft_trades_count'] = ['ethereum']
macro_metric_slugs_dict['nft_retail_trade_volume_usd'] = ['ethereum']
macro_metric_slugs_dict['nft_whale_trade_volume_usd'] = ['ethereum']
macro_metric_slugs_dict['nft_whale_trades_count'] = ['ethereum']
macro_metric_slugs_dict['percent_of_whale_stablecoin_total_supply'] = ['ethereum']
macro_metric_slugs_dict['average_fees_usd'] = ['ethereum']
macro_metric_slugs_dict['fees_usd'] = ['ethereum']
macro_metric_slugs_dict['eth2_roi'] = ['ethereum']
macro_metric_slugs_dict['median_fees_usd'] = ['ethereum']
macro_metric_slugs_dict['miners_to_exchanges_flow'] = ['ethereum', 'bitcoin']
macro_metric_slugs_dict['miners_exchange_balance'] = ['ethereum', 'bitcoin']

# Determine slugs that are available for the following macro metrics
macro_metrics = ['defi_to_dexes_flow',
                 'defi_dex_balance',
                 'cexes_to_dex_flow',
                 'dexes_to_defi_flow',
                 'defi_to_cexes_flow',
                 'defi_to_exchanges_flow',
                 'exchanges_to_defi_flow',
                 'whale_to_defi_flow', 
                 'dex_traders_to_defi_flow',
                 'whale_defi_balance',
                 'mvrv_usd_intraday']

def obtainSlugsForMetric(metric):
    san_slugs_for_metric_dict['san_slugs'] = san.metadata(metric, arr=['availableSlugs'])['availableSlugs']
    time.sleep(1.21)

def obtainStartTimeforMetric(metric, san_slug):
    time_start_dict[san_slug] = san.available_metric_for_slug_since(metric=metric, slug=san_slug)
    time.sleep(1.21)

# Drop metrics that are not available early enough
failed_pulls_list_macro            = []
failed_pulls_time_start_list_macro = []
i=0
num_metrics = len(macro_metrics)
for metric in macro_metrics[i:]:
    print(metric)
    print('Completed ' + str(np.round(i/num_metrics * 100, 2)) + '% of the metrics.')
    i += 1
    
    # Obtain a slug that is available for the metric
    manager = multiprocessing.Manager()
    san_slugs_for_metric_dict = manager.dict()
    p = multiprocessing.Process(target=obtainSlugsForMetric, args=(metric, ))
    p.start()
    
    # if thread is active
    time.sleep(2)
    if p.is_alive():
        print("api call is still running... let's kill it...")
        p.terminate()
        p = multiprocessing.Process(target=obtainSlugsForMetric, args=(metric, ))
        p.start()
        time.sleep(3)
        if p.is_alive():
            print('api call failed twice\n')
            p.terminate()
            failed_pulls_list.append(metric)
            continue
    
    p.join()
    san_slugs_for_metric = san_slugs_for_metric_dict['san_slugs']
    if 'bitcoin' in san_slugs_for_metric:
        san_slug = 'bitcoin'
    elif 'ethereum' in san_slugs_for_metric:
        san_slug = 'ethereum'
    elif 'cardano' in san_slugs_for_metric:
        san_slug = 'cardano'
    elif 'tether' in san_slugs_for_metric:
        san_slug = 'decentraland'
    else:
        print(san_slugs_for_metric)
        assert(1==0)
        
    # Obtain the time the metric starts
    manager = multiprocessing.Manager()
    time_start_dict = manager.dict()
    p = multiprocessing.Process(target=obtainStartTimeforMetric, args=(metric, san_slug))
    p.start()
    
    # if thread is active
    time.sleep(2)
    if p.is_alive():
        print("api call is still running... let's kill it...")
        p.terminate()
        p = multiprocessing.Process(target=obtainStartTimeforMetric, args=(metric, san_slug))
        p.start()
        time.sleep(3)
        if p.is_alive():
            print('api call failed twice\n')
            p.terminate()
            failed_pulls_time_start_list.append(metric)
            continue
    
    p.join()
    time_start = dt.datetime.strptime(time_start_dict[san_slug], '%Y-%m-%dT%H:%M:%SZ')
    
    if time_start.year >= 2018:
        print('We are losing ' + metric + ' given it starts in ' + str(time_start.year) + '.')
    elif time_start.year <= 2017:
        macro_metric_slugs_dict[metric] = san_slugs_for_metric
    else:
        assert (1==0),('PROBLEM!')
        
    time.sleep(2)
    print('\n')
        
print(failed_pulls_list_macro)
print(failed_pulls_time_start_list_macro)


defi_to_dexes_flow
Completed 0.0% of the metrics.
api call is still running... let's kill it...
api call is still running... let's kill it...
We are losing defi_to_dexes_flow given it starts in 2018.


defi_dex_balance
Completed 9.09% of the metrics.
api call is still running... let's kill it...
api call is still running... let's kill it...
We are losing defi_dex_balance given it starts in 2018.


cexes_to_dex_flow
Completed 18.18% of the metrics.


dexes_to_defi_flow
Completed 27.27% of the metrics.
api call is still running... let's kill it...
api call is still running... let's kill it...
We are losing dexes_to_defi_flow given it starts in 2018.


defi_to_cexes_flow
Completed 36.36% of the metrics.
api call is still running... let's kill it...
We are losing defi_to_cexes_flow given it starts in 2018.


defi_to_exchanges_flow
Completed 45.45% of the metrics.
api call is still running... let's kill it...
api call is still running... let's kill it...
We are losing defi_to_exchanges_flow

In [128]:
# Obtain the macro timeseries data
def obtainTimeseriesData(metric, san_slug):
    temp_df = san.get(metric + '/' + san_slug,
                      from_date="2015-01-01",
                      to_date="2022-01-07",
                      interval="1d")
    time.sleep(.121)
    if not temp_df.empty:
        temp_df['san_slug'] = san_slug
        metric_dict['date'] = temp_df.index.values
        metric_dict['san_slug'] = temp_df['san_slug'].values
        metric_dict[metric] = temp_df['value'].values

# Initialize dataframe to store all results
macro_df = pd.DataFrame(data={'date': [],
                              'san_slug': []})

# Initialize dictionary to keep track of the data we missed
failed_pulls_dict = {}

# Loop over all the metrics to pull the data
i = 0
num_metrics = len(list(macro_metric_slugs_dict.keys()))
for metric in list(macro_metric_slugs_dict.keys())[i:]:
    print(metric)
    print('Completed ' + str(np.round(i/num_metrics * 100, 2)) + '% of the metrics.\n')
    i += 1

    # Initialize dataframe to store results for this metric
    metric_df = pd.DataFrame()
    failed_pulls_dict[metric] = []

    # Obtain the list of slugs that have this metric
    san_slugs = macro_metric_slugs_dict[metric]
    
    # Drop wrapped-bitcoin
    if ('bitcoin' in san_slugs) and ('wrapped-bitcoin' in san_slugs):
        san_slugs.remove('wrapped-bitcoin')
        
    # Loop over all the slugs to pull the data
    j = 0
    num_slugs = len(san_slugs)
    for san_slug in san_slugs:
        print('Completed ' + str(np.round(j/num_slugs * 100, 2)) + '% of the slugs for this metric.')
        j += 1

        # Initiate the call as a process
        manager = multiprocessing.Manager()
        metric_dict = manager.dict()
        p = multiprocessing.Process(target=obtainTimeseriesData, args=(metric, san_slug))
        p.start()

        time.sleep(3)
        # If thread is active
        if p.is_alive():
            print("api call is still running... let's kill it...")
            p.terminate()

            # Try the call again
            manager = multiprocessing.Manager()
            metric_dict = manager.dict()
            p = multiprocessing.Process(target=obtainTimeseriesData, args=(metric, san_slug))
            p.start()
            time.sleep(4)
            if p.is_alive():
                print('api call failed twice')
                p.terminate()

                # Try the call one last time
                manager = multiprocessing.Manager()
                metric_dict = manager.dict()
                p = multiprocessing.Process(target=obtainTimeseriesData, args=(metric, san_slug))
                p.start()
                time.sleep(5)
                if p.is_alive():
                    print('api call failed thrice\n')
                    p.terminate()
                    failed_pulls_dict[metric].append(san_slug)
                    continue

        # Cleanup
        p.join()

        # Add results for this slug and metric to metric dataframe
        # only if the pull was successful
        if len(metric_dict.keys()) >= 1:
            metric_df = metric_df.append(pd.DataFrame(data={'date': metric_dict['date'],
                                                            'san_slug': metric_dict['san_slug'],
                                                            metric: metric_dict[metric]}))
            assert(0==metric_df[metric_df.duplicated(subset=['date', 'san_slug'])].shape[0]),('duped rows')
        else:
            failed_pulls_dict[metric].append(san_slug)
            print('For '+san_slug+' pull of '+metric+', we received no data so it is skipped.\n')
            
    # Merge each metrics dataframe onto the master df
    macro_df = macro_df.merge(metric_df,
                              on=['date', 'san_slug'],
                              how='outer',
                              validate='one_to_one')

    # Space out prints
    print('\n\n\n')

mcd_collat_ratio
Completed 0.0% of the metrics.

Completed 0.0% of the slugs for this metric.
Completed 10.0% of the slugs for this metric.
Completed 20.0% of the slugs for this metric.
Completed 30.0% of the slugs for this metric.
Completed 40.0% of the slugs for this metric.
Completed 50.0% of the slugs for this metric.
Completed 60.0% of the slugs for this metric.
Completed 70.0% of the slugs for this metric.
Completed 80.0% of the slugs for this metric.
Completed 90.0% of the slugs for this metric.




scd_collat_ratio
Completed 4.76% of the metrics.

Completed 0.0% of the slugs for this metric.




defi_total_value_locked_usd
Completed 9.52% of the metrics.

Completed 0.0% of the slugs for this metric.




nft_trade_volume_usd
Completed 14.29% of the metrics.

Completed 0.0% of the slugs for this metric.




nft_trades_count
Completed 19.05% of the metrics.

Completed 0.0% of the slugs for this metric.




nft_retail_trade_volume_usd
Completed 23.81% of the metrics.

Completed 0.0

Completed 94.44% of the slugs for this metric.
Completed 95.37% of the slugs for this metric.
api call is still running... let's kill it...
Completed 96.3% of the slugs for this metric.
Completed 97.22% of the slugs for this metric.
Completed 98.15% of the slugs for this metric.
Completed 99.07% of the slugs for this metric.




dex_traders_to_defi_flow
Completed 85.71% of the metrics.

Completed 0.0% of the slugs for this metric.
Completed 0.83% of the slugs for this metric.
Completed 1.67% of the slugs for this metric.
Completed 2.5% of the slugs for this metric.
Completed 3.33% of the slugs for this metric.
Completed 4.17% of the slugs for this metric.
Completed 5.0% of the slugs for this metric.
Completed 5.83% of the slugs for this metric.
Completed 6.67% of the slugs for this metric.
Completed 7.5% of the slugs for this metric.
Completed 8.33% of the slugs for this metric.
Completed 9.17% of the slugs for this metric.
Completed 10.0% of the slugs for this metric.
Completed 10.83%

Completed 30.66% of the slugs for this metric.
Completed 31.39% of the slugs for this metric.
Completed 32.12% of the slugs for this metric.
Completed 32.85% of the slugs for this metric.
Completed 33.58% of the slugs for this metric.
Completed 34.31% of the slugs for this metric.
Completed 35.04% of the slugs for this metric.
Completed 35.77% of the slugs for this metric.
Completed 36.5% of the slugs for this metric.
Completed 37.23% of the slugs for this metric.
Completed 37.96% of the slugs for this metric.
Completed 38.69% of the slugs for this metric.
Completed 39.42% of the slugs for this metric.
Completed 40.15% of the slugs for this metric.
Completed 40.88% of the slugs for this metric.
Completed 41.61% of the slugs for this metric.
Completed 42.34% of the slugs for this metric.
Completed 43.07% of the slugs for this metric.
Completed 43.8% of the slugs for this metric.
Completed 44.53% of the slugs for this metric.
Completed 45.26% of the slugs for this metric.
api call is sti

In [133]:
# CLEAN THE MACRO DATA

# Move over some data from the timeseries df to the macro df
temp_df = ts_df[['date', 'san_slug', 'stock_to_flow', 'traders_to_defi_flow', 'traders_defi_balance']]
ts_df = ts_df.drop(['stock_to_flow', 'traders_to_defi_flow', 'traders_defi_balance'], axis=1)
macro_df = macro_df.merge(temp_df,
                          on=['date', 'san_slug'],
                          how='outer',
                          validate='one_to_one')

# Form mcap weighted average variables
temp_df = macro_df[['date', 'san_slug', 'stock_to_flow', 'mvrv_usd_intraday', 'mcd_collat_ratio']]
temp_df = temp_df.dropna(how='all', subset=['stock_to_flow', 'mvrv_usd_intraday', 'mcd_collat_ratio'])
stf_tokens  = list(np.unique(temp_df[~temp_df.stock_to_flow.isnull()].san_slug.values))
mvrv_tokens = list(np.unique(temp_df[~temp_df.mvrv_usd_intraday.isnull()].san_slug.values))
mcd_tokens  = list(np.unique(temp_df[~temp_df.mcd_collat_ratio.isnull()].san_slug.values))
mcap_needed_tokens = list(np.unique(np.array(stf_tokens + mvrv_tokens + mcd_tokens + scd_tokens)))
temp_mcap_df = ts_df[ts_df.san_slug.isin(mcap_needed_tokens)][['date', 'san_slug', "marketcap_usd"]]
temp_df = temp_df.merge(temp_mcap_df,
                        on=['date', 'san_slug'],
                        how='inner',
                        validate='one_to_one')
for col in ['stock_to_flow', 'mvrv_usd_intraday', 'mcd_collat_ratio']:
    mcap_avg_temp_df = temp_df[['date', 'san_slug', col, 'marketcap_usd']]
    mcap_avg_temp_df = mcap_avg_temp_df.dropna()
    mcap_avg_temp_df['total_mcap'] = mcap_avg_temp_df.groupby('date')['marketcap_usd'].transform('sum')
    mcap_avg_temp_df['mcap_fraction'] = mcap_avg_temp_df.marketcap_usd / mcap_avg_temp_df.total_mcap
    mcap_avg_temp_df['temp'] = mcap_avg_temp_df.mcap_fraction * mcap_avg_temp_df[col]
    mcap_avg_temp_df = mcap_avg_temp_df.groupby('date')[['temp']].sum()
    mcap_avg_temp_df['san_slug'] = 'macro'
    mcap_avg_temp_df = mcap_avg_temp_df.reset_index()
    mcap_avg_temp_df = mcap_avg_temp_df.rename(columns = {'temp': ('santiment_token_mcap_avg_'+col)})
    macro_df = macro_df.merge(mcap_avg_temp_df,
                              on=['date', 'san_slug'],
                              how='outer',
                              validate='one_to_one')
    
# Form columns of dollar sum across tokens
sum_columns = ['cexes_to_dex_flow', 'exchanges_to_defi_flow', 'whale_to_defi_flow', 'dex_traders_to_defi_flow', 
               'whale_defi_balance', 'traders_to_defi_flow', 'traders_defi_balance']
for col in sum_columns: 
    temp_df = macro_df[['date', col]]
    temp_df = temp_df.groupby('date')[[col]].sum()
    temp_df = temp_df.rename(columns={col: 'santiment_token_sum_'+col})
    temp_df['san_slug'] = 'macro'
    temp_df = temp_df.reset_index()
    macro_df = macro_df.merge(temp_df,
                              on=['date', 'san_slug'],
                              how='outer',
                              validate='one_to_one')
    
# Form macro variables from column with just bitcoin and/or ethereum
columns = ['defi_total_value_locked_usd', 'nft_trade_volume_usd', 
           'nft_trades_count', 'nft_retail_trade_volume_usd', 
           'nft_whale_trade_volume_usd', 'nft_whale_trades_count', 
           'percent_of_whale_stablecoin_total_supply', 
           'average_fees_usd', 'fees_usd', 'eth2_roi', 'median_fees_usd',
           'miners_to_exchanges_flow', 'miners_exchange_balance', 
           'traders_to_defi_flow', 'traders_defi_balance', 
           'mvrv_usd_intraday', 'stock_to_flow']
for col in columns:
    temp_df = macro_df[['date', 'san_slug', col]]
    temp_df = temp_df.dropna()
    btc_eth_token = list(np.unique(temp_df.san_slug.values))
    if col in ['traders_defi_balance', 'traders_to_defi_flow',
               'mvrv_usd_intraday', 'stock_to_flow']:
        btc_eth_token = ['ethereum']
    for token in btc_eth_token:
        temp_token_df = temp_df[temp_df.san_slug == token]
        temp_token_df['san_slug'] = 'macro'
        temp_token_df = temp_token_df.rename(columns = {col: 'santiment_'+token+'_'+col})
        macro_df = macro_df.merge(temp_token_df,
                                  on=['date', 'san_slug'],
                                  how='outer',
                                  validate='one_to_one')

# Drop sum_columns and columns from macro_df
macro_df = macro_df.drop((sum_columns + columns), axis=1)
macro_df = macro_df.drop(['mcd_collat_ratio'], axis=1)

# Keep just the new rows
macro_df = macro_df[macro_df.san_slug == 'macro']
macro_df = macro_df.drop('san_slug', axis=1)

# Clean it up
macro_df = macro_df.reset_index(drop=True)
macro_df = macro_df.sort_values(by='date')

In [246]:
# SAVE DATA
ts_df.to_pickle('../3-data/raw/santiment_panel.pkl')
macro_df.to_pickle('../3-data/raw/santiment_macro.pkl')
cw_df.to_pickle('../3-data/raw/santiment_cw.pkl')