In [1]:
from typing import Dict, List
import statsmodels.api as sm
import pandas as pd
import numpy as np
import pickle


In [2]:
def subsetByMonthToAssetUniverse(panel_df: pd.DataFrame, asset_universe_dict: Dict[str, List[str]]) -> pd.DataFrame:
    """
    Subset the panel data to the assets in each month (key) of asset_universe_dict.

    Args:
        panel_df: A Pandas DataFrame containing panel data at the asset-hour level
                    with ID columns 'date' and 'asset'.
        asset_universe_dict: A dictionary with keys as dates in the format YYYY-MM-DD 
                                and values as lists of asset strings.
    
    Returns: A Pandas DataFrame without the rows not included in the study.
    """
    # Initialize an empty DataFrame to store the subsetted data
    new_df = pd.DataFrame()
    
    # Loop through the keys (dates) in the asset_universe_dict
    for date_key, asset_list in asset_universe_dict.items():
        # Convert the date_key string to a datetime object to work with pandas
        date_key_dt = pd.to_datetime(date_key)
        
        # Create a date mask for the month
        date_mask = (panel_df['date'].dt.year == date_key_dt.year) & (panel_df['date'].dt.month == date_key_dt.month)
        
        # Subset the panel_df DataFrame based on the date mask and the asset list
        subset = panel_df[date_mask & panel_df['asset'].isin(asset_list)]
        
        # Append the subset to the new_df DataFrame
        new_df = new_df.append(subset, ignore_index=True)
    
    return new_df

def subsetToWeeklyFreq(df: pd.DataFrame) -> pd.DataFrame:
    """ Takes DataFrame with datetime column "date" to subset
        it to observations on Sunday at midnight. """
    return df[(df.date.dt.day_name() == 'Sunday') 
            & (df.date.dt.time == pd.Timestamp('00:00:00').time())]
            
def setMissingIfIncomplete(panel_df: pd.DataFrame, return_col: str, hours_to_check: int) -> pd.DataFrame:
    """
    Set the specified return column to missing (np.nan) if the DataFrame is missing any of the previous hours
    specified by hours_to_check for each asset.

    Args:
        panel_df (pd.DataFrame): The input DataFrame
        return_col (str): The name of the column to set to missing (np.nan) if any of the previous hours are missing
        hours_to_check (int): The number of previous hours to check for.

    Returns: The DataFrame with the return column set to missing if any of the previous hours are missing
    """
    # Shift the date column by the specified hours_to_check
    panel_df['prev_date'] = panel_df.groupby('asset')['date'].shift(hours_to_check)
    
    # Calculate the rolling sum of hour differences over a window of size hours_to_check
    panel_df['hours_present'] = panel_df.groupby('asset')['date'].transform(
        lambda x: x.diff().dt.total_seconds().rolling(window=hours_to_check).sum() / 3600
    )

    # Set the return column value to missing (None) if the total number of hours present is not equal to hours_to_check
    panel_df.loc[panel_df['hours_present'] != hours_to_check, return_col] = np.nan
    
    # Drop temporary columns
    panel_df.drop(columns=['prev_date', 'hours_present'], inplace=True)

    return panel_df

def formNewColumnByAsset(panel_df: pd.DataFrame, target_col: str, new_col: str, range_hours: int, func) -> pd.DataFrame:
    """ Adds a new column to a Pandas DataFrame containing panel data at the asset-hour level.
    The new column is calculated by applying a function to a range of previous values for each asset.
    Any values that do not have the previous range_hours are reset to missing (np.nan).

    Args:
        panel_df: Pandas DataFrame containing the panel data.
        target_col: Name of the column to apply the given function to.
        new_col: Name of the new column to add.
        range_hours: Number of previous hours to consider for each asset.
        func: Function to apply to the range of values for each asset.
    
    Returns: The modified panel_df with the new column added.
    """
    # Group the DataFrame by asset
    grouped = panel_df.groupby('asset')
    
    # Apply the function to each asset's previous values and store the result in a new Series
    result = grouped.apply(lambda x: x[target_col].rolling(range_hours).apply(func))
    
    # Add the new column to the DataFrame
    panel_df[new_col] = result.values
    
    # Reset missing values
    panel_df = setMissingIfIncomplete(panel_df, new_col, range_hours)
    
    return panel_df


In [3]:
if __name__ == "__main__":
    # set args
    ASSET_IN_FP         = '../data/clean/asset_universe_dict.pickle'
    PANEL_IN_FP         = '../data/clean/panel.pkl'
    WEEKLY_PANEL_OUT_FP  = '../data/clean/weekly_panel.pkl' 
    HOURLY_PANEL_OUT_FP = '../data/clean/hourly_panel.pkl' 
    
    # import
    with open(ASSET_IN_FP, "rb") as f:
        asset_universe_dict = pickle.load(f)
    panel_df = pd.read_pickle(PANEL_IN_FP)

    # form panels
    # weekly_df = formWeeklyPanel(panel_df)
    
    # output
    # weekly_df.to_pickle(WEEKLY_PANEL_OUT_FP)

In [None]:
# TODO FEAT ENG FUNCTIONS I WANT:

# REPORT CORR BETWEEN TWO COLUMNS
# -OVERALL
# -BY YEAR
# -# SIGN CHANGES ACROSS YEARS

# REPORT MI BETWEEN TWO COLUMNS
# -OVERALL
# -BY YEAR
# -# SIGN CHANGES ACROSS YEARS

# REPORT AVG DIFF BTWN AVG RET OF TOP AND BOTTOM TERTILE AS SORTED BY COL

# REPORT CORR WITH OTHER RHS COLS WHERE I JUST REPORT HTE CoRR IF ABOVE 0.8 in abs

series2 = df.price_usd[1:].values
for col in df.columns.values[1:-2]:
    series1 = df[col][:-1].values

    # Find indices where both series have non-missing values
    non_missing_indices = np.logical_not(np.isnan(series1) | np.isnan(series2))

    # Compute the correlation using non-missing values only
    corr_matrix = np.corrcoef(series1[non_missing_indices], series2[non_missing_indices])

    # Extract the correlation coefficient (off-diagonal element)
    corr_coef = corr_matrix[0, 1]
    print(col)
    print(f"Correlation coefficient: {corr_coef}")



In [None]:
def formStaticCols(panel_df: pd.DataFrame) -> pd.DataFrame:
    # set columns to keep
    static_cols = ['char_industry_asset_mgmt',
                'char_industry_cex',
                'char_industry_cloud_compute',
                'char_industry_currency',
                'char_industry_data_mgmt',
                'char_industry_dex',
                'char_industry_gaming',
                'char_industry_infra',
                'char_industry_interop',
                'char_industry_lending',
                'char_industry_media',
                'char_industry_other_defi',
                'char_industry_smart_contract',
                'char_asset_usage_access',
                'char_asset_usage_discount',
                'char_asset_usage_dividends',
                'char_asset_usage_payments',
                'char_asset_usage_vote',
                'char_asset_usage_work',
                'char_pow',
                'char_pos',
                'char_ico_price',
                'char_ico']
    
    # form column subset
    static_df = panel_df[['date', 'asset']+static_cols]

    # subset to weekly freq for Sunday midnight
    static_df = subsetToWeeklyFreq(static_df)

    return static_df


In [None]:
def formDescStatCols(panel_df: pd.DataFrame) -> pd.DataFrame:
    # identify cols
    cols = ['char_price_global_t', 'char_volume_24h_global_t', 
            'char_ico_days_since_t', 'char_vc_t',
            'char_rank_cmc_t', 'char_num_pairs_t']

    # subset to cols
    desc_stat_df = panel_df[['date', 'asset']+cols]

    # subset to weekly freq for Sunday midnight
    desc_stat_df = subsetToWeeklyFreq(desc_stat_df)

    return desc_stat_df

In [None]:
def formMomentumCols(panel_df: pd.DataFrame) -> pd.DataFrame:
    # Define function to use in the Pandas rolling
    def calcReturn(x: pd.Series) -> float:
        return (x.iloc[-1] - x.iloc[0]) / x.iloc[0]

    # Form relevant data
    temp_df = panel_df[['date', 'asset', 'char_price_t']].copy()

    # Calculate momentums over various day windows: 1, 7, 14, 30, 60, 90.
    mom1h_df = formNewColumnByAsset(temp_df, target_col='char_price_t', new_col='char_r_tm1h', range_hours=1, func=calcReturn)
    mom1_df = formNewColumnByAsset(temp_df, target_col='char_price_t', new_col='char_r_tm1', range_hours=24, func=calcReturn)
    mom7_df = formNewColumnByAsset(temp_df, target_col='char_price_t', new_col='char_r_tm7', range_hours=168, func=calcReturn)
    mom14_df = formNewColumnByAsset(temp_df, target_col='char_price_t', new_col='char_r_tm14', range_hours=336, func=calcReturn)
    mom30_14_df = formNewColumnByAsset(temp_df, target_col='char_price_t', new_col='char_r_tm30_tm14', range_hours=384, func=calcReturn)
    mom30_df = formNewColumnByAsset(temp_df, target_col='char_price_t', new_col='char_r_tm30', range_hours=720, func=calcReturn)
    mom60_df = formNewColumnByAsset(temp_df, target_col='char_price_t', new_col='char_r_tm60', range_hours=1440, func=calcReturn)
    mom90_df = formNewColumnByAsset(temp_df, target_col='char_price_t', new_col='char_r_tm90', range_hours=2160, func=calcReturn)

    # Form reversals
    # TODO CHECK THESE SHIFT THE APPROPRIATE DIRECTION
    mom7_df['char_r_tm14_tm7'] = mom7_df.groupby('asset')['char_r_tm7'].shift(-168)
    mom30_14_df['char_r_tm30_tm14'] = mom30_14_df.groupby('asset')['char_r_tm30_tm14'].shift(-336)
    mom60_df['char_r_tm90_tm30'] = mom60_df.groupby('asset')['char_r_tm90_tm30'].shift(-720)

    # Form single momentum df
    mom_df = mom1_df.drop('char_price_t', axis=1).copy()
    for df in [mom1h_df, mom7_df, mom14_df, mom30_df, mom60_df, mom90_df, mom30_14_df]:
        df = df.drop('char_price_t', axis=1)
        mom_df = mom_df.merge(df, on=['date', 'asset'], how='inner', validate='one_to_one')

    # subset to weekly freq for Sunday midnight
    mom_df = subsetToWeeklyFreq(mom_df)

    return mom_df
        

In [None]:
def formCmktCol(panel_df: pd.DataFrame, mom_df: pd.DataFrame, asset_universe_dict: Dict[str, List[str]]) -> pd.DataFrame:
    # subset to relevant columns
    rel_assets_dt_df = subsetByMonthToAssetUniverse(panel_df[['date', 'asset', 'char_mcap_t']], 
                                                    asset_universe_dict)
    temp_df = mom_df[['date', 'asset', 'char_r_tm7']].copy()
    temp_df = temp_df.merge(rel_assets_dt_df,
                            on=['date', 'asset'], how='inner', validate='one_to_one')

    # subset to relevant dates
    temp_df = subsetToWeeklyFreq(temp_df)

    # form cmkt-weighted average return by week
    cmkt_df = temp_df.groupby('date').apply(lambda x: (x['char_r_tm7']*x['char_mcap_t']).sum() / x['char_mcap_t'].sum())
    
    # clean up
   # TODO
    # name it macro_cmkt_t

    return cmkt_df

In [None]:


def formCumRetCols(df: pd.DataFrame) -> pd.DataFrame:
    """
    Adds two new columns to the input DataFrame containing panel data at the asset-hour level.
    The new columns are 'char_r_ath_t' and 'char_r_atl_t', representing the cumulative return since
    each asset's historical all-time high price and all-time low price, respectively.

    Args:
        df (pd.DataFrame): The input DataFrame containing panel data at the asset-hour level.

    Returns:
        pd.DataFrame: The modified DataFrame with the new columns added.
    """
    # Form group mask
    grouped = df.groupby('asset')

    # Calculate the cumulative maximum for the 'char_price_t' column within each group
    df['cummax_price'] = grouped['char_price_t'].cummax()

    # Calculate the return since the all-time high price
    df['char_r_ath_t'] = df['char_price_t'] / df['cummax_price'] - 1

    # Calculate the cumulative minimum for the 'char_price_t' column within each group
    df['cummin_price'] = grouped['char_price_t'].cummin()

    # Calculate the return since the all-time low price
    df['char_r_atl_t'] = df['char_price_t'] / df['cummin_price'] - 1

    # Drop the temporary 'cummax_price' and 'cummin_price' columns
    df.drop(columns=['cummax_price', 'cummin_price'], inplace=True)

    return df

def formFinancialCols(panel_df: pd.DataFrame, 
                        mom_df: pd.DataFrame, cmkt_df: pd.DataFrame) -> pd.DataFrame:
    """""""
    # subset to needed columns
    fin_df = panel_df[['date', 'asset', 'char_price_t', 'char_mcap_t']].copy()

    # merge on new data
    fin_df = fin_df.merge(cmkt_df, on='date', how='left', validate='many_to_one') 
    # TODO THINK THRU HOW THIS CUTS OUT DATES
    # -DO I MB WANT TO FORM MACRO_CMKT_T FOR THE WHOLE 
    fin_df = fin_df.merge(mom_df[['date', 'asset', 'char_r_tm1h']], on=['date', 'asset'], how='left', validate='one_to_one')
    
    # form characteristics
    fin_df = fin_df.rename(columns={'char_mcap_t': 'char_size_t'})
    
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_r_max_tm1', range_hours=24, func=np.max)
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_r_max_tm7', range_hours=168, func=np.max)
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_r_max_tm30', range_hours=720, func=np.max)
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_vol_tm1', range_hours=24, func=np.std)
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_vol_tm7', range_hours=168, func=np.std)
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_vol_tm30', range_hours=720, func=np.std)
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_vol_tm90', range_hours=2160, func=np.std)

    fin_df['char_tradable_t'] = (fin_df['date'] - fin_df.groupby('asset')['date'].transform('min')).dt.total_seconds() / 3600 / 24

    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_var5_tm7', range_hours=168, func=lambda x: x.quantile(0.05))
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_var5_tm90', range_hours=2160, func=lambda x: x.quantile(0.05))
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_shortfall5_tm7', range_hours=168, func=lambda x: x[x < x.quantile(0.05)].mean())
    fin_df = formNewColumnByAsset(fin_df, target_col='char_r_tm1h', new_col='char_shortfall5_tm90', range_hours=2160, func=lambda x: x[x < x.quantile(0.05)].mean())
    
    fin_df = formCumRetCols(fin_df)

    
    # drop the cols from cmkt and 'char_r_tm1h'

In [None]:
# FINANCIAL

# TODO EDIT TO PASS WINDOW SIZE TO fitAndPredict
# TODO EDIT TO FORM STUFF WITHIN FIT AND PREDICT
# TODO EDIT TO DROP APPRORPRIATE COLS

# Define the rolling window size
window_size = 168

def fitAndPredict(group):
    # Reset the index for each group
    group = group.reset_index(drop=True)
    
    # Build LHS and RHS
    X = group['macro_cmkt_t']
    y = group['char_r_tm1h']
    X = sm.add_constant(X)

    # Define a function to fit the model and predict within the rolling window
    def rolling_predict(index):
        if index < window_size - 1:
            return np.nan
        else:
            start = index - window_size + 1
            end = index + 1
            model = sm.OLS(y.iloc[start:end], X.iloc[start:end], missing='drop').fit()
            return model.predict(X.iloc[index, :].values.reshape(1, -1))[0]

    group['predicted_hourly_returns'] = [rolling_predict(i) for i in range(len(group))]
    return group

panel_df = panel_df.groupby('asset').apply(fitAndPredict)

# Calculate the residuals
panel_df['residuals'] = panel_df['hourly_returns'] - panel_df['predicted_hourly_returns']

# Calculate the rolling standard deviation of the residuals over the past 168 hours
panel_df = formNewColumnByAsset(
    panel_df,
    'residuals',
    'char_ivol_tm7',
    168,
    lambda x: x.std()
)

panel_df = panel_df.drop(['residuals', 'predicted_hourly_returns'], axis=1)

# char_ivol_tm7 = standard deviation of residuals from regressing hourly returns on cmkt over past seven days
# -DROP THIS IF NOT BETTER THAN tm30
# char_ivol_tm30 = standard deviation of residuals from regressing hourly returns on cmkt over past thirty days
# char_ivol_tm90 = standard deviation of residuals from regressing hourly returns on cmkt over past ninety days
# -DROP THIS IF NOT BETTER THAN tm30

# CAN ADJUST THE ABOVE TO GET THIS
# char_alpha_tm30  = intercept from regressing asset excess hourly returns on market excess return over past thirty days
# char_beta_tm30  = slope from regressing asset excess returns on market excess return over past thirty days
# char_beta_downside_tm30 = slope from regressing negative asset excess returns on negative market excess return over past thirty days

# char_coskew_tm30 = coef on excess market return squared term in bivariate regression of asset excess returns on this and market return over 30 day trailing period
# char_iskew_tm30 = same regression, take the skewness of the residuals


In [None]:
# MICROSTRUCTURE


# def formMicrostructureCols(panel_df: pd.DataFrame, mom_df: pd.DataFrame) -> pd.DataFrame:

# Subset to needed columns
mic_df = panel_df[['date', 'asset', 'char_price_t', 'char_volume_t', 'char_trades_t',
    'char_bidask_t', 'char_bid_depth_t', 'char_ask_depth_t', 'char_supply_circ_t']].copy()
mic_df = mic_df.merge(mom_df[['date', 'asset', 'char_r_tm1h']], on=['date', 'asset'], how='left', validate='one_to_one')

# add temporary columns
mic_df['temp_volume_price_t'] = mic_df.char_volume_t * mic_df.char_price_t
mic_df = formNewColumnByAsset(mic_df, target_col='char_r_tm1h', new_col='temp_r_1m1h_abs_avg_tm7', range_hours=168, func=lambda x: np.mean(np.abs(x)))

# form characteristics
mic_df = formNewColumnByAsset(mic_df, target_col='char_volume_t', new_col='char_volume_sum_tm1', range_hours=24, func=np.sum)
mic_df = formNewColumnByAsset(mic_df, target_col='char_volume_t', new_col='char_volume_sum_tm7', range_hours=168, func=np.sum)
mic_df = formNewColumnByAsset(mic_df, target_col='char_volume_t', new_col='char_volume_std_tm7', range_hours=168, func=np.std)
mic_df = formNewColumnByAsset(mic_df, target_col='char_trades_t', new_col='char_trades_sum_tm7', range_hours=168, func=np.sum)
mic_df = formNewColumnByAsset(mic_df, target_col='char_trades_t', new_col='char_trades_std_tm7', range_hours=168, func=np.std)
mic_df = formNewColumnByAsset(mic_df, target_col='temp_volume_price_t', new_col='char_volume_price_avg_tm7', range_hours=168, func=np.mean)
mic_df = formNewColumnByAsset(mic_df, target_col='temp_volume_price_t', new_col='char_volume_price_std_tm7', range_hours=168, func=np.std)
mic_df['char_turnover_t'] = mic_df.char_volume_sum_tm7 / mic_df.char_supply_circ_t
mic_df['char_illiq_tm7'] = mic_df.temp_r_1m1h_abs_avg_tm7 / (mic_df.char_volume_sum_tm7/168)

# drop unneeded columns
mic_df = mic_df.drop(['char_price_t', 'temp_volume_price_t', 
    'char_supply_circ_t', 'char_r_tm1h', 'temp_r_1m1h_abs_avg_tm7'], axis=1)

# char_turnover_std_tm30 = std of residuals from regressing hourly turnover on a constant over last thirty days


# FORM THESE IF USEFUL:
# char_bidask_avg_tm7 = avg bid ask spread over last week
# char_bidask_std_tm7 = std of bid ask spread over last week
# char_bid_depth_avg_tm7 = avg bid depth over last week
# char_bid_depth_std_tm7 = std bid depth over last week
# char_ask_depth_avg_tm7 = avg ask depth over last week
# char_ask_depth_std_tm7 = std ask depth over last week


In [None]:
# ONCHAIN

# def formOnchainCols(panel_df: pd.DataFrame) -> pd.DataFrame:

# Subset to the needed columns
oc_df = panel_df[['date', 'asset', 'char_network_growth_t',
    'char_holders_distribution_total_t', 'char_active_addr_t', 
    'char_tx_volume_t',
    'char_circulation_7d_t', 'char_circulation_30d_t', 
    'char_circulation_90d_t', 'char_circulation_365d_t', 
    'char_circulation_3y_t',  'char_dormant_circulation_365d_t', 
    'char_supply_circ_t', 'char_supply_max_t', 
    'char_age_mean_dollar_t', 'char_age_destroyed_t']].copy()

# Rename columns
oc_df = oc_df.rename(columns={'char_holders_distribution_total_t': 'char_addr_total_t',
                            'char_active_addr_t': 'char_addr_active_t',
                            'char_circulation_7d_t': 'char_circulation_tm7',
                            'char_circulation_30d_t': 'char_circulation_tm30',
                            'char_circulation_90d_t': 'char_circulation_tm90',
                            'char_circulation_365d_t': 'char_circulation_tm365',
                            'char_circulation_3y_t': 'char_circulation_tm3y',
                            'char_dormant_circulation_365d_t': 'char_circulation_dormant_tm365'})


mic_df = formNewColumnByAsset(mic_df, target_col='char_network_growth_t', new_col='char_addr_new_tm7', range_hours=168, func=np.sum)
mic_df = formNewColumnByAsset(mic_df, target_col='char_addr_active_t', new_col='char_addr_active_tm7', range_hours=168, func=np.sum)
mic_df = formNewColumnByAsset(mic_df, target_col='char_tx_volume_t', new_col='char_tx_volume_tm7', range_hours=168, func=np.sum)
mic_df = formNewColumnByAsset(mic_df, target_col='char_age_destroyed_t', new_col='char_age_destroyed_tm7', range_hours=168, func=np.mean)


# "char_addr_new_log_delta_tm14_tm7" = change from previous week to current week in LOG of char_add_new_tm24

# TODO drop the 90 and 180 one if they dont seem to be good and correlated with other stuff
'char_dormant_circulation_180d_t',
'char_dormant_circulation_90d_t'

# TODO DROP IF NO ETTER THAN char_tx_volume_t
 'char_tx_deposit_t',
 'char_tx_withdraw_t',

# TODO DROP UNLESS WAY USEFULL
'char_circulation_1d_t',

# TODO LEAVE AS IS BUT MAKE SURE NOT TOO CORRELATED WITH ANYTHING ELSE OTHERWISE CONSIDER DROPPING

 'char_supply_circ_t',
 'char_supply_max_t',

'char_age_mean_dollar_t' # LEAVE AS IS IF USEFUL. OTHERWISE DROP.

# Drop columns we do not need
mic_df = mic_df.drop(columns=['char_network_growth_t', 'char_addr_active_t', 'char_age_destroyed_t'], axis=1)

# return mic_df




In [4]:
# TODO FUNCTION FOR FORMING WEEKLY PANEL

# def formWeeklyPanel(panel_df: pd.DataFrame) -> pd.DataFrame:
""""""

# Drop rows missing the LHS variable
panel_df = panel_df[panel_df.r_ex_tp168.notnull()].reset_index(drop=True)

# Rename weekly LHS to match naming convention for weekly panel
panel_df = panel_df.rename(columns={'r_ex_tp168': 'r_ex_tp7'})

# Form all RHS characteristics
static_df = formStaticCols(panel_df)
desc_stat_df = formDescStatCols(panel_df)
# TODO CONFIRM THIS GIVES ACTUALLY SUNDAY MIDNIGHT AND NOT MONDAY MIDNIGHT
mom_df = formMomentumCols(panel_df)
cmkt_df = formCmktCol(panel_df, mom_df, asset_universe_dict)
fin_df  = formFinancialCols(panel_df, mom_df, cmkt_df)
mic_df = formMicrostructureCols(panel_df, mom_df)
oc_df  = formOnchainCols(panel_df)
# -FUNCTION TO FORM ONCHAIN CHARS
# -FUNCTION TO FORM BALANCES CHARS
# -FUNCTION TO FORM DEV CHARS
# -FUNCTION TO FORM SOCIAL CHARS
# -FUNCTION TO FORM VALUATION CHARS


    # drop hourly ret
    

# Form RHS macro covariates

# -FUNCTION TO FINISH PROCESSING ALL MACRO COLUMNS TO GIVE RAW STUFF FOR DESC STAT BY JUST TAKING AVG/SUMS
# -split out the macro variables to merge back on so i work at timeseries level and not panel?
# -form snp momentum
# -form snp volatility
# -form squared market return
# -Summarize macro columns as there are just too damn many; use Goyal 8 as guide

# TODO scope distribution of eahc column to confirm they look OK
# -OVERALL, OVER TIME, BY ASSET, AND OVER TIME ASSET

# Merge and clean the finaly weekly panel
# def finalClean()
# weekly_df = .merge()
# -merge panel and macro back together
# -cut down to study period
# -ensure all dates are present
# -report out assets that don't ahve consecutive days to eyeball when they enter and leave
# -ensure no missing
# -ensure all cols have appropriate range
# -ensure rows and cols sorted

# return weekly_df







In [2]:
# TODO EDIT THE WEEKLY TEMPLATE BELOW TO DO HOURLY; EDIT IN LIGHT OF WHAT I ACTUALLY DID FOR WEEKLY


In [3]:
# TODO FUNCTION TO FORM HOURLY PANEL FOLLOWING TEMPLATE OF WEEKLY

# -FUNCTION TO KEEP STATIC CHARS
# -FUNCTION TO KEEP DESC STAT COLS
# -FUNCTION TO FORM ALL MOMENTUMS
# -FUNC TO FORM CMKT RETURN
# -FUNCTION TO FORM FINANCIAL CHARS
# -FUNCTION TO FORM MICROSTRUCTURE CHARS
# -FUNCTION TO FORM ONCHAIN CHARS
# -FUNCTION TO FORM BALANCES CHARS
# -FUNCTION TO FORM DEV CHARS
# -FUNCTION TO FORM SOCIAL CHARS
# -FUNCTION TO FORM VALUATION CHARS
# -FUNCTION TO FORM ALL GU 2019 CHARS
# -FUNCTION TO FINISH PROCESSING ALL MACRO COLUMNS TO GIVE RAW STUFF FOR DESC STAT BY JUST TAKING AVG/SUMS

# scope distribution of eahc column to confirm they look OK
# -OVERALL, OVER TIME, BY ASSET, AND OVER TIME ASSET

# NORMALIZE COLS
# do transformations of characteirstics to cross sectional -1 to 1
# for macro, do transformation to make it stationary and take whatever form of it gives stationary
# -follow Pelger on this for the options and then come up with metric for most stationary and above some threshold
# -do it programatically
# to normalize macro, shift to -1 to 1 from end of validation and back; then for all test do it recursively for each new ob
# -make sure this doesn't break correlation that much


# def finalClean()
# -merge panel and macro back together
# -cut down to study period
# -ensure all dates are present
# -report out assets that don't ahve consecutive days to eyeball when they enter and leave
# -ensure no missing
# -ensure all cols have appropriate range
# -ensure rows and cols sorted

# ensure all columns are -1 to 1?

# save train_val_df and test_df separately

In [None]:

# REMINDER: LEAVE TEMPLATED CODE AS IS

In [None]:
# TODO LEAVE ALL OF THESE AS IS; I CAN JUST TAKE THE SUNDAY MIDNIGHT VALUE

['char_industry_asset_mgmt',
'char_industry_cex',
'char_industry_cloud_compute',
'char_industry_currency',
'char_industry_data_mgmt',
'char_industry_dex',
'char_industry_gaming',
'char_industry_infra',
'char_industry_interop',
'char_industry_lending',
'char_industry_media',
'char_industry_other_defi',
'char_industry_smart_contract',
'char_pow',
'char_pos',
'char_asset_usage_access',
'char_asset_usage_discount',
'char_asset_usage_dividends',
'char_asset_usage_payments',
'char_asset_usage_vote',
'char_asset_usage_work',
'char_ico_price',
'char_ico']

['char_ico_days_since_t',
'char_rank_cmc_t',
'char_vc_t',
'char_num_pairs_t',
'char_price_global_t',
'char_volume_24h_global_t']


In [77]:
# MOMENTUMS

# char_r_tm1
# char_r_tm7
# char_r_tm14
# char_r_tm30
# char_r_tm60
# char_r_tm90
# char_r_tm14_tm7
# char_r_tm30_tm14
# char_r_tm90_tm30

'char_price_t'

90

In [None]:
# FINANCIAL

# create hourly return and create line to drop at end
# 'char_r_tm1h'

# char_r_max_tm1 = max hourly return over past day
# char_r_max_tm7 = max hourly return over past week
# char_r_max_tm30 = max hourly return over past thirty days

# char_price_t = price
# char_price_log_max_tm1 = log of maximum price in past day 
# char_price_log_max_tm7 = log of maximum price in past seven days

# char_size_t = market cap

# char_vol_tm1 = standard deviation of hourly returns in past day
# char_vol_tm7 = standard deviation of hourly returns in past seven days
# char_vol_tm30 = standard deviation of hourly returns in past thirty days
# char_vol_tm90 = standard deviation of hourly returns in past ninety days

# r_ath_t = Cumulative return since ath 
# r_atl_t = Cumulative return since atl 

# char_var5_tm7 = 5th percentile of past seven days of hourly returns
# char_var5_tm90 = 5th percentile of past 90 days of hourly returns
# char_shortfall5_tm7 = avg hourly ret below 5th percentile over past seven days
# char_shortfall5_tm90 = avg hourly ret below 5th percentile over past ninty days

# char_ivol_tm7 = standard deviation of residuals from regressing hourly returns on cmkt over past seven days
# -DROP THIS IF NOT BETTER THAN tm30
# char_ivol_tm30 = standard deviation of residuals from regressing hourly returns on cmkt over past thirty days
# char_ivol_tm90 = standard deviation of residuals from regressing hourly returns on cmkt over past ninety days
# -DROP THIS IF NOT BETTER THAN tm30
# char_alpha_tm30  = intercept from regressing asset excess hourly returns on market excess return over past thirty days
# char_beta_tm30  = slope from regressing asset excess returns on market excess return over past thirty days
# char_beta_downside_tm30 = slope from regressing negative asset excess returns on negative market excess return over past thirty days

# char_coskew_tm30 = coef on excess market return squared term in bivariate regression of asset excess returns on this and market return over 30 day trailing period
# char_iskew_tm30 = same regression, take the skewness of the residuals

# char_tradable_t = number of days since first tradable

['char_price_t',
'char_mcap_t',
'char_r_tm1h']


In [None]:
# MICROSTRUCTURE

# char_volume_t = dollar volume over past week
# char_volume_log_avg_tm24 = average log dollar volume over past day
# char_volume_log_avg_tm24 = std of log dollar volume over past day
# char_trades_t = number of trades over past week
# char_volume_price_log_avg_tm7 = average of log of hourly dollar volume times price over past seven days
# char_volume_price_log_std_tm7 = std of log of price times volume over past seven day at hourly freq

# char_turnover_t = trading volume over past week divided by circulating supply

# char_turnover_std_tm30 = std of residuals from regressing hourly turnover on a constant over last thirty days

# char_illiq_tm7 = avg abs value of hourly returns divided by avg hourly trading volume over past week

['char_volume_t',
'char_trades_t',
'char_bidask_t',
'char_bid_depth_t',
'char_ask_depth_t']

# THESE AS IS
# char_bidask_t = bid ask spread
# char_bid_depth_t = depth of first bid
# char_ask_depth_t = depth of first ask

# FORM THESE IF USEFUL:
# char_bidask_avg_tm7 = avg bid ask spread over last week
# char_bidask_std_tm7 = std of bid ask spread over last week
# char_bid_depth_avg_tm7 = avg bid depth over last week
# char_bid_depth_std_tm7 = std bid depth over last week
# char_ask_depth_avg_tm7 = avg ask depth over last week
# char_ask_depth_std_tm7 = std ask depth over last week


In [None]:
# ONCHAIN

# "char_addr_new_tm7" = char_network_growth_t = number of new addresses that transfer token in last week
# "char_addr_new_log_delta_tm14_tm7" = change from previous week to current week in LOG of char_add_new_tm24
# char_addr_total_t =  'char_holders_distribution_total_t',
# "char_addr_active_t" = 'char_active_addr_t' = number of unique addresses active over last hour
# "char_addr_active_tm7" = sum of char_addr_active_t over last 7 days
# "char_tx_volume_t" = sum of transaction volume in usd  'char_tx_volume_t' over past seven days

# char_circulation_tm7 = number of unique tokens transfered over last 7 days 'char_circulation_7d_t',
# char_circulation_tm30 = number of unique tokens transfered over last 30 days 'char_circulation_30d_t',
# char_circulation_tm90 = number of unique tokens transfered over last 90 days 'char_circulation_90d_t',
# char_circulation_tm365 = number of unique tokens transfered over last one year 'char_circulation_365d_t',
# char_circulation_tm3y = number of unique tokens transfered over last three years 'char_circulation_3y_t'

# TODO drop the 2 year one

# char_circulation_dormant_tm365 = number of tokens transfered in last day that havent moved for over 365 days
 'char_dormant_circulation_365d_t',

# TODO drop the 90 and 180 one if they dont seem to be good and correlated with other stuff
'char_dormant_circulation_180d_t',
'char_dormant_circulation_90d_t'

# TODO DROP IF NO ETTER THAN char_tx_volume_t
 'char_tx_deposit_t',
 'char_tx_withdraw_t',

# TODO DROP UNLESS WAY USEFULL
'char_circulation_1d_t',

# TODO LEAVE AS IS BUT MAKE SURE NOT TOO CORRELATED WITH ANYTHING ELSE OTHERWISE CONSIDER DROPPING

 'char_supply_circ_t',
 'char_supply_max_t',

'char_age_mean_dollar_t' # LEAVE AS IS IF USEFUL. OTHERWISE DROP.

'char_age_destroyed_t' # TAKE MEAN OVER LAST WEEK to form 'char_age_destroyed_tm7'





In [None]:
# BALANCES

# TODO form exchange inflow and outflow in usd
# confirm useful predictor
# confirm not correlated iwth other stuff at super high degree where that other stuff i def keep
# form optimal avg over past hour, day, or week for both

 'char_exchange_inflow_usd_t',
 'char_exchange_outflow_usd_t',

# DROP THESE CONFIRMING THEY DONT OFFER MUCH BEHIND USD VERSIONS

 'char_exchange_inflow_t',
 'char_exchange_outflow_t',


# FORM ALL THE BELOW BY DIVIDING BY CIRC SUPPLY 
# TODO KEEP IF AT ALL USEFUL AND NOT TOTALLY CORR WITH SOMETHING I DEF KEEP

# 'char_traders_balance_t',
# 'char_whale_balance_t',
# 'char_exchange_balance_t',
# 'char_dex_balance_t',
# 'char_cex_balance_t',
# 'char_amount_in_top_holders_t',
# 'char_defi_balance_t',

# TODO DROP ALL OF THE BELOW IF DONT ADD MUCH BEYOND WHAT I FORM ABOVE

['char_supply_on_exchanges_t',
'char_supply_outside_exchanges_t',

'char_defi_cex_balance_t',
'char_defi_dex_balance_t',
'char_defi_exchange_balance_t',
'char_dex_cex_balance_t',

'char_traders_cex_balance_t',
'char_traders_defi_balance_t',
'char_traders_dex_balance_t',
'char_traders_exchange_balance_t',

'char_traders_whale_balance_t',


'char_whale_cex_balance_t',
'char_whale_defi_balance_t',
'char_whale_dex_balance_t',

'char_percent_of_total_supply_on_exchanges_t',
]

# TODO CREATE SOME AGG CHANGE IN HOURLY CHANGE OVER PAST WEEK
# -vector diff from previous hour values and then avg that?
# -play to get tasty that is CONFIRM ALSO MY METRIC IS USEFUL ON ITS OWN AND NOT TOTALLY CORR WITH SOMETHING ELSE 
'char_holders_distribution_over_100_t',
 'char_holders_distribution_over_100k_t',
 'char_holders_distribution_over_10_t',
 'char_holders_distribution_over_10k_t',
 'char_holders_distribution_over_1M_t',
 'char_holders_distribution_over_1_t',
 'char_holders_distribution_over_1k_t',

# TODO CONFIRM MORE USEFUL THAN ANY OF THES EON THEIR OWN OR THEIR FIRST DIFFS OR MEAN OF FIRST DIFFS


# TODO REPEAT ALL OF THE ABOVE FOR THE BELOW COLUMSN TO GET A FEEL OF FLOW
['char_cexes_to_defi_flow_t',
 'char_cexes_to_dex_flow_t',
 'char_cexes_to_dex_traders_flow_t',
 'char_cexes_to_traders_flow_t',
 'char_cexes_to_whale_flow_t',
 'char_defi_to_cexes_flow_t',
 'char_defi_to_dex_traders_flow_t',
 'char_defi_to_dexes_flow_t',
 'char_defi_to_exchanges_flow_t',
 'char_defi_to_traders_flow_t',
 'char_defi_to_whale_flow_t',
 'char_dex_to_cexes_flow_t',
 'char_dex_traders_to_cexes_flow_t',
 'char_dex_traders_to_defi_flow_t',
 'char_dex_traders_to_dexes_flow_t',
 'char_dex_traders_to_exchanges_flow_t',
 'char_dex_traders_to_whale_flow_t',
 'char_dexes_to_defi_flow_t',
 'char_dexes_to_dex_traders_flow_t',
 'char_dexes_to_traders_flow_t',
 'char_dexes_to_whale_flow_t',
 'char_exchanges_to_defi_flow_t',
 'char_exchanges_to_dex_traders_flow_t',
 'char_exchanges_to_genesis_flow_t',
 'char_exchanges_to_traders_flow_t',
 'char_exchanges_to_whales_flow_t',
 'char_traders_to_cexes_flow_t',
 'char_traders_to_defi_flow_t',
 'char_traders_to_dexes_flow_t',
 'char_traders_to_exchanges_flow_t',
 'char_traders_to_whale_flow_t',
 'char_whale_to_cexes_flow_t',
 'char_whale_to_defi_flow_t',
 'char_whale_to_dex_traders_flow_t',
 'char_whale_to_dexes_flow_t',
 'char_whale_to_traders_flow_t',
 'char_whales_to_exchanges_flow_t',]


In [None]:
# DEV

# AS IS OR MAYBE SOME AVG OVER PAST WEEK
['char_dev_activity_t']




# SOCIAL

# CONFIRM BALANCE DOESNT OFFER MUCH BEFORE THE NEG AND POS ONES
# THEN JUST KEEP ALL
['char_sentiment_balance_reddit_t',
'char_sentiment_balance_twitter_t',
'char_sentiment_negative_reddit_t',
'char_sentiment_negative_twitter_t',
'char_sentiment_positive_reddit_t',
'char_sentiment_positive_twitter_t',

'char_sentiment_volume_consumed_total_t', # TOTAL OVER LAST WEK

'char_social_dominance_total_t',

'char_social_volume_reddit_t', # TOTAL OVER LAST WEK
'char_social_volume_twitter_t', # TOTAL OVER LAST WEK

'char_unique_social_volume_total_1h_t'] # TOTAL OVER LAST WEEK IF BETTER THAN RAW TOTALS


# VALUATION

# TODO JUST USE AS IS IF USEFUL AND NOT TOTALLY CORRELATED WITH OTHER STUFF

'char_stock_to_flow_t',
'char_percent_of_total_supply_in_profit_t'

# TODO FIGURE OUT WHICH ONE TO USE OR BOTH
'char_mvrv_long_short_diff_usd_t',
'char_mvrv_usd_t',

# TODO FIGURE OUT WHICH ONE TO USE
'char_mean_realized_price_usd_t',
'char_realized_value_usd_t',

# TODO FIGURE OUT WHICH ONE TO USE
'char_nvt_t',
'char_nvt_transaction_volume_t',



In [None]:
# TODO TASKS FOR LATER
# -RUN UNI FACTOR IMPORTANCE FOR BOTH WEEKLY AND HOURLY
# --WEEKLY IS WHAT IS IN PAPER
# --HOURLY INFORMS HOW TO BUILD AUTOENCODER / DL MODEL
# -WE WILL FIT AUTOENCODER AND MY STUFF WITH THE RAW CHARACTERISTICS AND MACRO TO START TO SHOW MY TRANSFORMER CAN LEARN TEMPLORAL
# --BUT THEN GIVE THEM THE BEST STUFF FROM PRIOR TO SHOW AUTO STILL IS CRUSHED
# -when he imports for uni factor: cut down to asset univser, drop macro, and drop global_price
# -when for low-dim: drop global_price, normalize characteristics, just drop macro, or drop all for just returns


In [None]:
# FOR HOURLY, 
# -maybe just take stuff mostly as is
# -but maybe try to form most stationary version for all?
# -like keeps one that i know i just want that verison of
# -then write a func to loop over the rest to form all the pelger transformations to then take the most stationary one
# --as long as it is above some level of stationary?

 'macro_aaa_t',
 'macro_aave_med_borrow_apy_t',
 'macro_aave_med_supply_apy_t',
 'macro_aave_med_variable_borrow_apy_t',
 'macro_acogno_t',
 'macro_active_cryptos_t',
 'macro_active_exchanges_t',
 'macro_active_market_pairs_t',
 'macro_amdmnox_t',
 'macro_andenox_t',
 'macro_avg_fee_mean_usd_t',
 'macro_avg_fee_med_usd_t',
 'macro_avg_fee_rev_pct_t',
 'macro_avg_ndf_t',
 'macro_avg_nvt_adj_ff_t',
 'macro_avg_rvt_adj_t',
 'macro_avg_ser_t',
 'macro_avg_sopr_t',
 'macro_avg_sply_act_pct_1yr_t',
 'macro_avg_vel_act_1yr_t',
 'macro_avg_vel_cur_1yr_t',
 'macro_awhman_t',
 'macro_awotman_t',
 'macro_baa_t',
 'macro_bogmbase_t',
 'macro_btc_adr_act_cnt_t',
 'macro_btc_adr_bal_cnt_t',
 'macro_btc_cap_act_1yr_usd_t',
 'macro_btc_cap_mrkt_ffusd_t',
 'macro_btc_cap_real_usd_t',
 'macro_btc_diff_mean_t',
 'macro_btc_fee_med_usd_t',
 'macro_btc_fee_tot_usd_t',
 'macro_btc_flow_in_ex_usd_t',
 'macro_btc_flow_miner_net_0hop_all_usd_t',
 'macro_btc_flow_out_ex_usd_t',
 'macro_btc_hash_rate_t',
 'macro_btc_iss_tot_usd_t',
 'macro_btc_mcrc_t',
 'macro_btc_mctc_t',
 'macro_btc_momr_t',
 'macro_btc_mri_0hop_all30d_t',
 'macro_btc_mvrv_t',
 'macro_btc_ndf_t',
 'macro_btc_nvt_adj_ff_t',
 'macro_btc_puell_mul_rev_t',
 'macro_btc_rev_hash_usd_t',
 'macro_btc_rev_usd_t',
 'macro_btc_rvt_adj_t',
 'macro_btc_ser_t',
 'macro_btc_sopr_t',
 'macro_btc_sply_act_10yr_t',
 'macro_btc_sply_act_180d_t',
 'macro_btc_sply_act_1d_t',
 'macro_btc_sply_act_1yr_t',
 'macro_btc_sply_act_30d_t',
 'macro_btc_sply_act_5yr_t',
 'macro_btc_sply_act_7d_t',
 'macro_btc_sply_act_ever_t',
 'macro_btc_sply_act_pct_1yr_t',
 'macro_btc_sply_adr_bal_usd_100_t',
 'macro_btc_sply_adr_bal_usd_10k_t',
 'macro_btc_sply_adr_bal_usd_1_t',
 'macro_btc_sply_adr_bal_usd_1m_t',
 'macro_btc_sply_adr_top_100_t',
 'macro_btc_sply_adr_top_1pct_t',
 'macro_btc_sply_cur_t',
 'macro_btc_sply_ex_usd_t',
 'macro_btc_sply_exp_fut_10yr_t',
 'macro_btc_sply_ff_t',
 'macro_btc_sply_miner_0hop_all_usd_t',
 'macro_btc_sply_rvv_180d_t',
 'macro_btc_sply_rvv_1yr_t',
 'macro_btc_sply_rvv_30d_t',
 'macro_btc_sply_rvv_5yr_t',
 'macro_btc_sply_rvv_7d_t',
 'macro_btc_sply_utxo_loss_t',
 'macro_btc_sply_utxo_prof_t',
 'macro_btc_tx_tfr_cnt_t',
 'macro_btc_tx_tfr_val_adj_usd_t',
 'macro_btc_tx_tfr_val_day_dst_t',
 'macro_btc_tx_tfr_val_med_usd_t',
 'macro_btc_tx_tfr_val_usd_t',
 'macro_btc_utxo_age_med_t',
 'macro_btc_utxo_loss_unreal_usd_t',
 'macro_btc_utxo_prof_unreal_usd_t',
 'macro_btc_vel_act_1yr_t',
 'macro_btc_vel_cur_1yr_t',
 'macro_businvx_t',
 'macro_busloans_t',
 'macro_ce16ov_t',
 'macro_ces06_t',
 'macro_ces20_t',
 'macro_ces30_t',
 'macro_claimsx_t',
 'macro_clf16ov_t',
 'macro_cmrmtsplx_t',
 'macro_compapffx_t',
 'macro_conspi_t',
 'macro_cp3mx_t',
 'macro_cpiaucsl_t',
 'macro_cusr0000sac_t',
 'macro_cusr0000sad_t',
 'macro_cusr0000sas_t',
 'macro_ddurrg3m086sbea_t',
 'macro_dex_volume_t',
 'macro_dgs1mo_t',
 'macro_dndgrg3m086sbea_t',
 'macro_dpcera3m086sbea_t',
 'macro_dserrg3m086sbea_t',
 'macro_dtcthfnm_t',
 'macro_emv_inflation_t',
 'macro_emv_t',
 'macro_eth_adr_act_cnt_t',
 'macro_eth_adr_act_cont_cnt_t',
 'macro_eth_adr_act_rec_cnt_t',
 'macro_eth_adr_act_sent_cnt_t',
 'macro_eth_adr_bal_cnt_t',
 'macro_eth_cap_act_1yr_usd_t',
 'macro_eth_cap_mrkt_ffusd_t',
 'macro_eth_cap_real_usd_t',
 'macro_eth_cont_erc_20_cnt_t',
 'macro_eth_fee_med_t',
 'macro_eth_flow_in_ex_usd_t',
 'macro_eth_flow_out_ex_usd_t',
 'macro_eth_gas_used_tx_t',
 'macro_eth_iss_tot_usd_t',
 'macro_eth_mvrv_t',
 'macro_eth_ndf_t',
 'macro_eth_nvt_adj_ff_t',
 'macro_eth_puell_mul_rev_t',
 'macro_eth_rev_hash_usd_t',
 'macro_eth_rev_usd_t',
 'macro_eth_roi_t',
 'macro_eth_rvt_adj_t',
 'macro_eth_ser_t',
 'macro_eth_sply_act_10yr_t',
 'macro_eth_sply_act_180d_t',
 'macro_eth_sply_act_1d_t',
 'macro_eth_sply_act_1yr_t',
 'macro_eth_sply_act_30d_t',
 'macro_eth_sply_act_5yr_t',
 'macro_eth_sply_act_7d_t',
 'macro_eth_sply_act_ever_t',
 'macro_eth_sply_act_pct_1yr_t',
 'macro_eth_sply_adr_bal_usd_100_t',
 'macro_eth_sply_adr_bal_usd_10k_t',
 'macro_eth_sply_adr_bal_usd_1_t',
 'macro_eth_sply_adr_bal_usd_1m_t',
 'macro_eth_sply_adr_top_100_t',
 'macro_eth_sply_adr_top_1pct_t',
 'macro_eth_sply_burnt_usd_t',
 'macro_eth_sply_cur_t',
 'macro_eth_sply_ex_usd_t',
 'macro_eth_sply_exp_fut_10yr_t',
 'macro_eth_sply_ff_t',
 'macro_eth_stakers_count_t',
 'macro_eth_total_fee_t',
 'macro_eth_tx_tfr_cnt_t',
 'macro_eth_tx_tfr_val_adj_usd_t',
 'macro_eth_tx_tfr_val_med_usd_t',
 'macro_eth_tx_tfr_val_usd_t',
 'macro_eth_vel_act_1yr_t',
 'macro_eth_vel_cur_1yr_t',
 'macro_ex_num_pairs_cex_t',
 'macro_ex_num_pairs_dex_t',
 'macro_ex_open_interest_future_usd_t',
 'macro_ex_usd_volume_24h_cex_t',
 'macro_ex_volume_future_usd_t',
 'macro_ex_volume_t',
 'macro_excausx_t',
 'macro_exjpusx_t',
 'macro_expinf10yr_t',
 'macro_expinf1yr_t',
 'macro_expinf20yr_t',
 'macro_expinf2yr_t',
 'macro_expinf30yr_t',
 'macro_expinf3yr_t',
 'macro_expinf5yr_t',
 'macro_exszusx_t',
 'macro_exusukx_t',
 'macro_fedfunds_t',
 'macro_funding_rate_med_usdt_binance_t',
 'macro_gepu_t',
 'macro_gs10_t',
 'macro_gs1_t',
 'macro_gs5_t',
 'macro_houst_t',
 'macro_hwiuratio_t',
 'macro_ico_count_t',
 'macro_indpro_t',
 'macro_invest_t',
 'macro_m1sl_t',
 'macro_m2real_t',
 'macro_m2sl_t',
 'macro_manemp_t',
 'macro_mcd_avg_liq_t',
 'macro_mcd_med_collat_ratio_t',
 'macro_mvrv_med_t',
 'macro_nonrevsl_t',
 'macro_oilpricex_t',
 'macro_payems_t',
 'macro_pcepi_t',
 'macro_permit_t',
 'macro_realln_t',
 'macro_rpi_t',
 'macro_snp500_t',
 'macro_snp_div_yield_t',
 'macro_snp_indust_t',
 'macro_snp_pe_t',
 'macro_stablecoin_dev_t',
 'macro_t10yie_t',
 'macro_t20yiem_t',
 'macro_t30yiem_t',
 'macro_t5yie_t',
 'macro_tb3ms_t',
 'macro_tb6ms_t',
 'macro_teu_sca_t',
 'macro_tmu_sca_t',
 'macro_total_aave_borrowed_t',
 'macro_total_aave_deposits_t',
 'macro_total_aave_liq_t',
 'macro_total_aave_new_debt_t',
 'macro_total_aave_supply_t',
 'macro_total_adr_act_cnt_t',
 'macro_total_adr_bal_cnt_t',
 'macro_total_adr_bal_usd_100_cnt_t',
 'macro_total_adr_bal_usd_100k_cnt_t',
 'macro_total_adr_bal_usd_10k_cnt_t',
 'macro_total_adr_bal_usd_1m_cnt_t',
 'macro_total_cap_act_1yr_usd_t',
 'macro_total_cap_fut_exp_10yr_usd_t',
 'macro_total_cap_mrkt_cur_usd_t',
 'macro_total_cap_mrkt_ffusd_t',
 'macro_total_cap_mvrv_cur_t',
 'macro_total_cap_real_usd_t',
 'macro_total_compound_borrowed_t',
 'macro_total_compound_deposits_t',
 'macro_total_compound_liq_t',
 'macro_total_compound_new_debt_t',
 'macro_total_compound_supply_t',
 'macro_total_dai_created_t',
 'macro_total_dai_repaid_t',
 'macro_total_fee_tot_usd_t',
 'macro_total_iss_tot_usd_t',
 'macro_total_maker_borrowed_t',
 'macro_total_maker_deposits_t',
 'macro_total_maker_supply_t',
 'macro_total_nft_retail_trades_t',
 'macro_total_nft_retail_volume_t',
 'macro_total_nft_trades_t',
 'macro_total_nft_volume_t',
 'macro_total_nft_whale_trades_t',
 'macro_total_nft_whale_volume_t',
 'macro_total_open_interest_usdt_binance_t',
 'macro_total_open_value_usdt_binance_t',
 'macro_total_rev_usd_t',
 'macro_total_sply_act_10yr_t',
 'macro_total_sply_act_180d_t',
 'macro_total_sply_act_1d_t',
 'macro_total_sply_act_1yr_t',
 'macro_total_sply_act_30d_t',
 'macro_total_sply_act_5yr_t',
 'macro_total_sply_act_7d_t',
 'macro_total_sply_act_ever_t',
 'macro_total_sply_adr_bal_usd_100_t',
 'macro_total_sply_adr_bal_usd_10k_t',
 'macro_total_sply_adr_bal_usd_1_t',
 'macro_total_sply_adr_bal_usd_1m_t',
 'macro_total_sply_adr_top_100_t',
 'macro_total_sply_adr_top_1pct_t',
 'macro_total_sply_ff_t',
 'macro_total_tx_tfr_cnt_t',
 'macro_total_tx_tfr_val_adj_usd_t',
 'macro_total_tx_tfr_val_usd_t',
 'macro_total_uni_claims_t',
 'macro_total_usd_mcap_t',
 'macro_totresns_t',
 'macro_twexafegsmthx_t',
 'macro_uempmean_t',
 'macro_umcsentx_t',
 'macro_unrate_t',
 'macro_us_ex_open_interest_future_usd_t',
 'macro_us_ex_volume_future_usd_t',
 'macro_us_ex_volume_spot_usd_t',
 'macro_us_mpu_t',
 'macro_uscons_t',
 'macro_usfire_t',
 'macro_usgood_t',
 'macro_vixclsx_t'