In [1]:
# To be able to use the quantools, due to my crap path names have to add to sys path.
import sys
sys.path.insert(0, '/home/adam/Dropbox/2-creations/2-crafts/7-buidl/0-utils/quant_tools/code')

# Import.
import matplotlib.pyplot as plt
from tools import QuantTools
import pandas as pd
import numpy as np
import gc


In [2]:
def reportBenchmarkStatistics(
    df: pd.DataFrame, lhs_col: str, cmkt_col: str, periods_in_year: int
    ) -> pd.DataFrame:
    # Create benchmark returns
    btc_df = df[df.asset=='btc'].drop_duplicates().copy()
    eth_df = df[df.asset=='eth'].drop_duplicates().copy()
    btc_returns = btc_df[lhs_col].values
    eth_returns = eth_df[lhs_col].values
    cmkt_df = df[['date', cmkt_col]].drop_duplicates().copy()
    cmkt_returns = cmkt_df[cmkt_col].values
    
    # Report the period we are studying
    print('Benchmark results are for this period:')
    print(f'Start: {np.min(df.date.values)}')
    print(f'End: {np.max(df.date.values)}')
    print('\n')

    # Output statistics on returns
    port_name = ['btc', 'eth', 'cmkt']
    returns_list = [btc_returns, eth_returns, cmkt_returns]
    for portfolio, returns in zip(port_name, returns_list):
        print(f'For the portfolio {portfolio}:')
        print(f'Geom mean annual return: {np.round(QuantTools.calcGeomAvg(returns, annualized=True, periods_in_year=periods_in_year), 4)}')
        print(f'Sharpe: {np.round(QuantTools.calcSharpe(returns, periods_in_year=periods_in_year), 2)}')
        print(f'TS mean annual return: {np.round(QuantTools.calcTSAvgReturn(returns, annualized=True, periods_in_year=periods_in_year), 4)}')
        print('\n')
    
    # Form benchmark return dataframe to return
    b_df = pd.DataFrame(data={'date': cmkt_df.date.values})
    b_df['BTC'] = btc_returns
    b_df['ETH'] = eth_returns
    b_df['CMKT'] = cmkt_returns
    return b_df


In [3]:
def outputReturnsByQuantileFig(df: pd.DataFrame, lhs_col: str, out_fp: str) -> None:
    # Form quantile returns
    pos_df = QuantTools.formPortfolioWeightsByQuantile(
        df, 100, False, 'yhats_transformer'
    )
    qntl_rtrns_df = pos_df.groupby('qntl')[[lhs_col]].mean().reset_index()

    # Create and format plot
    plt.plot(qntl_rtrns_df['qntl'], qntl_rtrns_df[lhs_col], color='#482677FF')
    plt.gca().set_frame_on(False)
    plt.grid(axis='y', linestyle='--')
    plt.gca().xaxis.set_visible(False)
    plt.gca().yaxis.set_tick_params(color='grey')
    plt.gca().yaxis.label.set_color('grey')

    # Save plot
    plt.savefig(out_fp)
    plt.close()


In [4]:
if __name__ == "__main__":
    # Set args.
    IN_FP_TEST_PNL   = '../data/clean/panel_train.pkl'
    IN_FP_AUTO       = '../data/clean/test_yhats_autoencoder.pkl'
    IN_FP_TRNSFRMR   = '../data/clean/test_yhats_transformer_20210502_20210701.pkl'
    OUT_FP           = '../output/high_dim_fm/high_dim_fms.xlsx'
    OUT_FP_TRNS_QNTL = '../output/high_dim_fm/ts_avg_return_by_quantile_transformer.png'
    LHS_COL          = 'r_ex_tp1'
    CMKT_COL         = 'macro_cmkt_tp1'
    PERIODS_IN_YEAR  = int(365.25*24)
    NUM_QNTLS_PRTLS  = 5
    TC_PER_HOUR      = 7e-4

    # Import data.
    test_df = pd.read_pickle(IN_FP_TEST_PNL)
    a_df    = pd.read_pickle(IN_FP_AUTO)
    t_df    = pd.read_pickle(IN_FP_TRNSFRMR)

    # Form single panel.
    t_df = t_df.rename(columns={'yhats': 'yhats_transformer'})
    df = t_df.copy() # TODO replace with merge of a_df and t_df
    mcap_df = test_df[['date', 'asset', 'char_size_t']].copy()
    mcap_df = mcap_df.rename(columns={'char_size_t': 'mcap'})
    cmkt_df = test_df[['date', 'macro_cmkt_tm1h']].drop_duplicates().copy()
    cmkt_df[CMKT_COL] = cmkt_df.macro_cmkt_tm1h.shift(-1)
    cmkt_df = cmkt_df.drop('macro_cmkt_tm1h', axis=1)
    df = df.merge(mcap_df, on=['date', 'asset'], how='inner', validate='one_to_one')
    df = df.merge(cmkt_df, on=['date'], how='inner', validate='many_to_one')
        
    # Clear memory
    del test_df, a_df, t_df
    del mcap_df, cmkt_df
    gc.collect()

    # Report results
    b_df = reportBenchmarkStatistics(df, LHS_COL, CMKT_COL, PERIODS_IN_YEAR)

    # Generate portfolio statistics
    results_df = QuantTools.calcPortfolioStatistics(
        df, LHS_COL, 'yhats_transformer', CMKT_COL, 'transformer', 
        NUM_QNTLS_PRTLS, PERIODS_IN_YEAR, False, TC_PER_HOUR
    )
    with pd.ExcelWriter(OUT_FP, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: 
        results_df.to_excel(writer, sheet_name='raw_oos_port_stats_equal')
    results_df = QuantTools.calcPortfolioStatistics(
        df, LHS_COL, 'yhats_transformer', CMKT_COL, 'transformer', 
        NUM_QNTLS_PRTLS, PERIODS_IN_YEAR, True, TC_PER_HOUR
    )
    with pd.ExcelWriter(OUT_FP, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer: 
        results_df.to_excel(writer, sheet_name='raw_oos_port_stats_mcap')

    # Generate ts avg of returns by quantile
    outputReturnsByQuantileFig(df, LHS_COL, OUT_FP_TRNS_QNTL)


Benchmark results are for this period:
Start: 2021-05-02T00:00:00.000000000
End: 2021-06-26T23:00:00.000000000


For the portfolio btc:
Geom mean annual return: -0.9771
Sharpe: -1.2
TS mean annual return: -1.0


For the portfolio eth:
Geom mean annual return: -0.9543
Sharpe: -0.86
TS mean annual return: -1.0


For the portfolio cmkt:
Geom mean annual return: -0.9654
Sharpe: -0.94
TS mean annual return: -1.0




In [5]:
# TODO update quantile plot to optionally do mcap as right now it is equal weighted.

In [6]:
# TODO figure out the TC to take out of autoencoder; have it in its notebook; just confirm it is the same
# TODO figure out TC to take out of the market; 
#           i.e. calc turnover and what not following logic in transformer / autoencoder notebooks.

In [7]:
# TODO ADD AUTOENCODER RESULTS TO PORT STATS ONCE I GET SOME TRANSFORMER RESULTS INTO 2H 2021
# -func to gen both, append, and output

In [8]:
# TODO do the cumulative return figure showing: btc, eth, cmkt, autoencoder, transformer
# -calc strat returns to merge with b_df, taking TC's out of both
# -form all cum return
# -plot
# -clean up plot to make it sexy
