In [1]:
# To be able to use the quantools, due to my crap path names have to add to sys path.
import sys
sys.path.insert(0, '/home/adam/Dropbox/2-creations/2-crafts/7-buidl/0-utils/quant_tools/code')

# Import.
import matplotlib.pyplot as plt
from typing import List
from tools import QuantTools
import pandas as pd
import numpy as np
import gc


In [2]:
def reportBenchmarkStatistics(
    df: pd.DataFrame, lhs_col: str, cmkt_col: str, periods_in_year: int
    ) -> pd.DataFrame:
    # Create benchmark returns
    btc_df = df[df.asset=='btc'].drop_duplicates().copy()
    eth_df = df[df.asset=='eth'].drop_duplicates().copy()
    btc_returns = btc_df[lhs_col].values
    eth_returns = eth_df[lhs_col].values
    cmkt_df = df[['date', cmkt_col]].drop_duplicates().copy()
    cmkt_returns = cmkt_df[cmkt_col].values
    
    # Report the period we are studying
    print('Benchmark results are for this period:')
    print(f'Start: {np.min(df.date.values)}')
    print(f'End: {np.max(df.date.values)}')
    print('\n')

    # Output statistics on returns
    port_name = ['btc', 'eth', 'cmkt']
    returns_list = [btc_returns, eth_returns, cmkt_returns]
    for portfolio, returns in zip(port_name, returns_list):
        print(f'For the portfolio {portfolio}:')
        print(f'Geom mean annual return: {np.round(QuantTools.calcGeomAvg(returns, annualized=True, periods_in_year=periods_in_year), 4)}')
        print(f'Sharpe: {np.round(QuantTools.calcSharpe(returns, periods_in_year=periods_in_year), 2)}')
        print(f'TS mean annual return: {np.round(QuantTools.calcTSAvgReturn(returns, annualized=True, periods_in_year=periods_in_year), 4)}')
        print('\n')
    
    # Form benchmark return dataframe to return
    b_df = pd.DataFrame(data={'date': cmkt_df.date.values})
    b_df['BTC'] = btc_returns
    b_df['ETH'] = eth_returns
    b_df['CMKT'] = cmkt_returns
    return b_df
    

In [3]:
def outputReturnsByQuantileFig(df: pd.DataFrame, lhs_col: str, out_fp: str,
    restrict_shortable_uni: bool=False, shortable_asset_uni: List[str]=[], 
    restrict_tradable_volume: bool=False, prct_volume_threshold: float=0.05, total_trade_volume_per_hour: int=1e6) -> None:
    # Form quantile returns
    pos_df = QuantTools.formPortfolioWeightsByQuantile(
        df, 20, True, 'yhats',
        restrict_shortable_uni, shortable_asset_uni, 
        restrict_tradable_volume, prct_volume_threshold, total_trade_volume_per_hour
    )
    qntl_rtrns_df = pos_df.groupby('qntl')[[lhs_col]].mean().reset_index()

    # Create and format plot
    plt.plot(qntl_rtrns_df['qntl'], qntl_rtrns_df[lhs_col], color='#482677FF')
    plt.gca().set_frame_on(False)
    plt.grid(axis='y', linestyle='--')
    plt.gca().xaxis.set_visible(False)
    plt.gca().yaxis.set_tick_params(color='grey')
    plt.gca().yaxis.label.set_color('grey')

    # Save plot
    plt.savefig(out_fp)
    plt.close()


In [4]:
if __name__ == "__main__":
    # Set args.
    IN_FP_TEST_PNL   = '../data/clean/panel_test.pkl'
    IN_FP_AUTO       = '../data/clean/test_yhats_autoencoder.pkl'
    IN_FP_TRANSFORMER = '../data/clean/test_yhats_transformer_20220703_20230101.pkl'
    OUT_FP           = '../output/high_dim_fm/high_dim_fms.xlsx'
    OUT_FP_QNTL      = '../output/high_dim_fm/ts_avg_return_by_quantile.png'
    LHS_COL          = 'r_ex_tp1'
    CMKT_COL         = 'macro_cmkt_tp1'
    PERIODS_IN_YEAR  = int(365.25*24)
    NUM_QNTLS_PRTLS  = 5
    TC_PER_HOUR      = 0.00135 # NOTE: margin carry - to/(2/# quantiles) * 2 * (spread/2 + fee + margin_open/2)
    RESTRICT_SHORTABLE_UNI = True
    SHORTABLE_UNI = ['aave', 'algo', 'rep', 'btc', 'bch', 'ada', 'link', 'comp', 'atom', 'dash', 'doge', 'dot', 
        'eos', 'eth', 'etc', 'fil', 'flow', 'kava', 'keep', 'ksm', 'ltc', 'omg', 'matic',  
        'sc', 'sol', 'trx', 'uni', 'xlm', 'xmr', 'xrp', 'xtz', 'zec']
    RESTRICT_TRADABLE_VOLUME = True
    PRCT_VOLUME_THRESHOLD = 0.02
    TOTAL_TRADE_VOLUME_PER_HOUR = 1e6

    # Import data.
    test_df = pd.read_pickle(IN_FP_TEST_PNL)
    a_df    = pd.read_pickle(IN_FP_AUTO)

    # Form single panel.
    df = a_df.copy()
    mcap_df = test_df[['date', 'asset', 'char_size_t']].copy()
    mcap_df = mcap_df.rename(columns={'char_size_t': 'mcap'})
    micro_df = test_df[['date', 'asset', 'char_volume_t', 'char_spread_bps_t']].copy()
    micro_df['date'] -= pd.Timedelta(hours=1)
    micro_df = micro_df.rename(columns={'char_volume_t': 'volume_tp1'})
    micro_df = micro_df.rename(columns={'char_spread_bps_t': 'spread_tp1'})
    cmkt_df = test_df[['date', 'macro_cmkt_tm1h']].drop_duplicates().copy()
    cmkt_df[CMKT_COL] = cmkt_df.macro_cmkt_tm1h.shift(-1)
    cmkt_df = cmkt_df.drop('macro_cmkt_tm1h', axis=1)
    df = df.merge(mcap_df, on=['date', 'asset'], how='inner', validate='one_to_one')
    df = df.merge(micro_df, on=['date', 'asset'], how='inner', validate='one_to_one')
    df = df.merge(cmkt_df, on=['date'], how='inner', validate='many_to_one')

    # Clear memory
    del test_df, a_df
    del mcap_df, cmkt_df, micro_df
    gc.collect()

    # Drop to rel data
    df = df[df.date >= '2021-07-03']

    # Report results
    b_df = reportBenchmarkStatistics(df, LHS_COL, CMKT_COL, PERIODS_IN_YEAR)

    # Generate portfolio statistics
    yhat_col = 'yhats'
    results_df = QuantTools.calcPortfolioStatistics(
        df, LHS_COL, yhat_col, CMKT_COL, 'autoencoder', 
        NUM_QNTLS_PRTLS, PERIODS_IN_YEAR, False, TC_PER_HOUR,
        RESTRICT_SHORTABLE_UNI, SHORTABLE_UNI,
        RESTRICT_TRADABLE_VOLUME, PRCT_VOLUME_THRESHOLD, TOTAL_TRADE_VOLUME_PER_HOUR
    )
    with pd.ExcelWriter(OUT_FP, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: 
        results_df.to_excel(writer, sheet_name='raw_oos_port_stats_equal')
    results_df = QuantTools.calcPortfolioStatistics(
        df, LHS_COL, yhat_col, CMKT_COL, 'autoencoder', 
        NUM_QNTLS_PRTLS, PERIODS_IN_YEAR, True, TC_PER_HOUR,
        RESTRICT_SHORTABLE_UNI, SHORTABLE_UNI,
        RESTRICT_TRADABLE_VOLUME, PRCT_VOLUME_THRESHOLD, TOTAL_TRADE_VOLUME_PER_HOUR
    )
    with pd.ExcelWriter(OUT_FP, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: 
        results_df.to_excel(writer, sheet_name='raw_oos_port_stats_mcap')

    # Generate ts avg of returns by quantile
    outputReturnsByQuantileFig(df, LHS_COL, OUT_FP_QNTL,
        RESTRICT_SHORTABLE_UNI, SHORTABLE_UNI,
        RESTRICT_TRADABLE_VOLUME, PRCT_VOLUME_THRESHOLD, TOTAL_TRADE_VOLUME_PER_HOUR)


Benchmark results are for this period:
Start: 2022-07-01T00:00:00.000000000
End: 2022-12-31T22:00:00.000000000


For the portfolio btc:
Geom mean annual return: -0.3416
Sharpe: -0.89
TS mean annual return: -0.3429


For the portfolio eth:
Geom mean annual return: 0.1836
Sharpe: 0.58
TS mean annual return: 0.3251


For the portfolio cmkt:
Geom mean annual return: -0.2247
Sharpe: -0.37
TS mean annual return: -0.1605


Of a total 4415 datetimes, 0.0% have no shortable assets.
Of a total 4415 datetimes, 12.0% have insufficient volume to trade, or not shortable.
Of a total 4415 datetimes, 28.0% have insufficient volume to trade.
The median spread across the asset-datetimes traded was 8.0 bps.
The 75th quantile spread across the asset-datetimes traded was 14.0 bps.
The 90th quantile spread across the asset-datetimes traded was 27.0 bps.
The 95th quantile spread across the asset-datetimes traded was 43.0 bps.
Of a total 4415 datetimes, 0.0% have no shortable assets.
Of a total 4415 datetimes,