In [None]:
# TODO UPDATE THIS OLD MESSY CODE WITH NEW PANEL

In [20]:
# Import packages
import pandas as pd
import numpy as np
import pickle
from dateutil.relativedelta import relativedelta
from datetime import datetime
import matplotlib.pyplot as plt

In [21]:
def subsetToAssetUniverse(df, asset_universe_dict, train_or_test):
    # determine the asset universe to use for whether train or test data
    if train_or_test == 'train':
        index_start = 0
        index_end   = len(asset_universe_dict)-4
    elif train_or_test=='test':
        index_start = len(asset_universe_dict)-4
        index_end   = len(asset_universe_dict)
    else:
        assert(False),('get wit zee program')
        
    # subset to included assets
    for i in range(index_start, index_end):
        # extract this quarter and its included assets
        date = list(asset_universe_dict.keys())[i]
        assets = asset_universe_dict[date]

        # form start and end date for this window
        start_date = datetime.strptime(date, '%Y-%m-%d')
        end_date   = datetime.strptime(date, '%Y-%m-%d') + relativedelta(months=3)

        # drop rows in this time period that are not the included assets
        df = df[~(((df.date>=start_date) & (df.date<end_date)) & (~df.asset.isin(assets)))]
    
    return df

In [22]:
def subsetColumnsToAssetCovariates(df):
    df = df.drop('date', axis=1)
    df = df.set_index('week_idx')
    columns = df.columns.values
    covariates = [col for col in columns if (col[:6]=='covar_')]
    df = df[['asset', 'r_tplus7', 'usd_mcap']+covariates]  
    return df

In [23]:
def labelUnivariateFactorSortedPortfolios(df, covar, calc_corr=True, 
                                          sort_ascending=None):
    # determine is pos or neg correlation for how to rank
    if calc_corr==True:
        corr = np.corrcoef(df.r_tplus7.values, df[covar].values)[0,1]
        if corr>0:
            sort_ascending = True
        else:
            sort_ascending = False
    # or just take the passed param if we do not want to calc it
    else:
        assert((sort_ascending==True) | (sort_ascending==False))

    # form tertile portfolios each week
    tertile_col                 = 'tertile_'+covar[6:]
    df                          = df.sort_values(by=['week_idx', covar], 
                                                 ascending=[True, sort_ascending])
    df['ranking']               = df.groupby(['week_idx']).cumcount()
    df['counts']                = 1
    df['total_assets_per_week'] = df.groupby('week_idx').counts.transform('sum')
    df['ranking']               = df.ranking/df.total_assets_per_week
    df.loc[df.ranking < 1/3, tertile_col] = 1
    df.loc[(df.ranking>=1/3) & 
           (df.ranking<2/3), tertile_col] = 2
    df.loc[df.ranking>=2/3, tertile_col]  = 3
    df = df.drop(['ranking', 'counts', 'total_assets_per_week'],axis=1)

    return df

In [24]:
def labelMultivarFactorSortedPortfolio(df, covars):
    # form the tertile col names from the covar names
    tertile_cols = []
    for covar in covars:
        tertile_col = 'tertile_'+covar[6:]
        tertile_cols.append(tertile_col)

    # form an equally spaced tertile sum ranking within week
    df['tertile_sum'] = 0
    for tertile_col in tertile_cols:
        df['tertile_sum'] = df.tertile_sum + df[tertile_col]
        
    # form tertiles based on tertile sum with ~equal number of assets
    np.random.seed(42)
    df['rand']    = np.random.uniform(size=df.shape[0])
    df = df.sort_values(by=['week_idx', 'tertile_sum', 'rand'])
    df['ranking'] = df.groupby(['week_idx']).cumcount()
    df['counts']  = 1
    df['total_assets_per_week'] = df.groupby('week_idx').counts.transform('sum')
    df['ranking']               = df.ranking/df.total_assets_per_week
    df.loc[df.ranking < 1/3, 'tertile'] = 1
    df.loc[(df.ranking>=1/3) & 
           (df.ranking<2/3), 'tertile'] = 2
    df.loc[df.ranking>=2/3,  'tertile']  = 3
    df = df.drop(['tertile_sum', 'rand', 'ranking', 
                  'counts', 'total_assets_per_week'], axis=1)

    return df

In [25]:
def formWeekly3m1PortfolioReturnDF(df):
    # assign weekly within-tertile mcap weighted portfolio weights 
    df['mcap_sum']   = df.groupby(['week_idx', 'tertile']).usd_mcap.transform('sum')
    df['mcap_wght']  = df.usd_mcap / df.mcap_sum

    # confirm portfolio weights roughly sum to 1 each week-tertile
    assert(len(np.unique(df.index))*3 == 
           np.sum(np.isclose(df.groupby(['week_idx', 'tertile']).mcap_wght.sum(), 1,
                             rtol=1e-2, atol=1e-2)))

    # calculate weekly portfolio return
    df['r'] = df.mcap_wght * df.r_tplus7
    r_df    = df.groupby(['week_idx', 'tertile'])[['r']].sum().reset_index()
    returns = r_df[r_df.tertile == 3].r.values - r_df[r_df.tertile == 1].r.values
    r_df    = pd.DataFrame(data={'week_idx': np.unique(r_df.week_idx.values),
                                 'r': returns})

    return r_df

In [26]:
def calcPortfolioReturn(df):
    num_wks  = df.shape[0]
    if np.sum(df.r.values <= -1)>=1:
        return -1
    else:
        tot_ret  = np.product(df.r.values+1)-1
        wkly_ret = (tot_ret+1)**(1/num_wks)-1
        annl_ret = (wkly_ret+1)**(52.18)-1
        return annl_ret

In [27]:
def calcFactorPerformance(df, covariates, factors=[]):
    results_df = pd.DataFrame()
    for covar in covariates:
        df         = labelMultivarFactorSortedPortfolio(df, covars=factors+[covar])
        r_df       = formWeekly3m1PortfolioReturnDF(df)
        annl_ret   = calcPortfolioReturn(r_df)
        temp_df    = pd.DataFrame(data={'covariate': [covar],
                                        'annual_return': [annl_ret]})
        results_df = pd.concat((results_df, temp_df))
    results_df = results_df.sort_values('annual_return', ascending=False)
    return results_df

In [28]:
def labelPortfolioWeightsAndReturns(df):
    # assign weekly within-tertile mcap weighted portfolio weights 
    df['mcap_sum']   = df.groupby(['week_idx', 'tertile']).usd_mcap.transform('sum')
    df['mcap_wght']  = df.usd_mcap / df.mcap_sum
    
    # assign prtfl weight that is 0 in bottom, 1/6 in middle, and 5/6 in top
    df.loc[df.tertile == 1, 'prtfl_wght'] = df.mcap_wght*0
    df.loc[df.tertile == 2, 'prtfl_wght'] = df.mcap_wght*1/6
    df.loc[df.tertile == 3, 'prtfl_wght'] = df.mcap_wght*5/6
    
    # confirm portfolio weights roughly sum to 1 for each week
    assert(len(np.unique(df.week_idx)) == 
           np.sum(np.isclose(df.groupby(['week_idx']).prtfl_wght.sum(), 1,
                             rtol=1e-2, atol=1e-2)))
    
    # add weekly returns to the data frame
    df['r'] = df.prtfl_wght*df.r_tplus7
    df['r'] = df.groupby(['week_idx']).r.transform(sum)

    return df

In [29]:
def calcAnnualTransactionCosts(df):
    # merge on the previous week's holdings to the new holdings
    temp_df = df.copy()
    temp_df = temp_df[temp_df.week_idx<np.max(temp_df.week_idx)]
    temp_df['week_idx'] = temp_df.week_idx+1
    temp_df = temp_df[['week_idx', 'asset', 'prtfl_wght']]
    temp_df = temp_df.rename(columns={'prtfl_wght': 'prtfl_wght_tm7'})
    df = df.merge(temp_df,
                  on=['week_idx', 'asset'],
                  how='outer',
                  validate='one_to_one')

    # calc weekly turnover and ensure it has the appropriate range
    df['asset_to'] = np.abs(df.prtfl_wght - df.prtfl_wght_tm7)
    to_df = df.groupby('week_idx')[['asset_to']].sum().reset_index()
    assert((np.min(to_df.asset_to)>=0) & (np.max(to_df.asset_to<=2)))

    # correct the first and last week valid for buying the initial port and liquidating
    to_df.loc[to_df.week_idx==106, 'asset_to'] = 1
    to_df = pd.concat((to_df, pd.DataFrame(data={'week_idx': [262],
                                                 'asset_to': 1})))
    to_df = to_df.reset_index(drop=True)

    # add transaction costs assuming maker and taker fee of 20 bps each
    to_df['tc'] = to_df.asset_to*0.002

    # return annualize transaction cost
    return -np.sum(to_df.tc)

In [30]:
def calcPortfolioSharpe(df):
    wkly_sharpe = np.mean(df.r.values)/np.std(df.r.values)
    annl_sharpe = wkly_sharpe*np.sqrt(52.18)
    return annl_sharpe

In [31]:
def max_draw_down(df):
    cumulative_ret=(df.r+1).cumprod()
    roll_max=cumulative_ret.rolling(len(cumulative_ret), min_periods=1).max()
    daily_drawdown=cumulative_ret/roll_max
    max_daily_drawdown=daily_drawdown.min() - 1
    return max_daily_drawdown

In [32]:
def max_1_month_loss(df):
    max_loss=(df['r']+1).rolling(4).apply(np.prod)
    max_loss_minus=max_loss.min()-1
    return max_loss_minus

In [None]:
# read in data
panel_train_fp = '../3-data/clean/panel_train.pkl'
asset_uni_fp   = '../3-data/clean/asset_universe_dates_and_lists.pkl'
mcap_fp = '../3-data/clean/raw_panel.pkl'
test_fp = '../3-data/clean/panel_oos.pkl'
mcap_df = pd.read_pickle(mcap_fp)
mcap_df = mcap_df[['date', 'asset', 'usd_mcap']]
train_df = pd.read_pickle(panel_train_fp)
nrows    = train_df.shape[0]
train_df = mcap_df.merge(train_df,
                         on=['date', 'asset'],
                         how='inner',
                         validate='one_to_one')
assert(nrows==train_df.shape[0])
with open(asset_uni_fp, 'rb') as handle:
    asset_universe_dict = pickle.load(handle)
test_df  = pd.read_pickle(test_fp)

# clean up test data
test_df = test_df[test_df.week_idx >= 262]
test_df = mcap_df.merge(test_df,
                        on=['date', 'asset'],
                        how='inner',
                        validate='one_to_one')
    
# subset data to relevant rows and cols
train_df = subsetToAssetUniverse(train_df, asset_universe_dict, train_or_test='train')
train_df = subsetColumnsToAssetCovariates(train_df)
test_df  = subsetToAssetUniverse(test_df, asset_universe_dict, train_or_test='test')

# form weekly univariate tertile rankings to use throughout
covariates = list(train_df.columns.values)
covariates.remove('asset')
covariates.remove('r_tplus7')
covariates.remove('usd_mcap')
for covar in covariates:
    train_df = labelUnivariateFactorSortedPortfolios(train_df, covar, calc_corr=True)
    
# rank covars on 3 minus 1 sorted portfolio return to find best one factor model
univar_results_df = calcFactorPerformance(train_df, covariates, factors=[])
factor1 = univar_results_df.covariate.values[0]

# rank covars on 3 minus 1 sorted portfolio return from two factor model
covariates.remove(factor1)
bivar_results_df = calcFactorPerformance(train_df, covariates, factors=[factor1])
factor2 = bivar_results_df.covariate.values[0]

# rank covars on 3 minus 1 sorted portfolio return from three factor model
covariates.remove(factor2)
trivar_results_df = calcFactorPerformance(train_df, covariates, factors=[factor1, factor2])
factor3 = trivar_results_df.covariate.values[0]

# generate validation results
factors = []
for factor in [factor1, factor2, factor3]:
    factors  += [factor]
    print('validation period (2018-2020) results for factor model containing')
    print(factors)
    train_df = labelMultivarFactorSortedPortfolio(train_df, covars=factors)
    val_df   = train_df[train_df.index>=106].reset_index() # 2018 and onward for val period
    val_df   = val_df[['week_idx', 'asset', 'r_tplus7', 'usd_mcap', 'tertile']]
    val_df   = labelPortfolioWeightsAndReturns(val_df)
    annl_tc  = calcAnnualTransactionCosts(val_df)
    print('annual transaction costs in simple return terms: ' + str(np.round(annl_tc, 4)))
    r_df     = val_df[['week_idx', 'r']].drop_duplicates()
    annl_ret = calcPortfolioReturn(r_df)
    annl_sharpe = calcPortfolioSharpe(r_df)
    print('annual simple return before trans costs: ' + str(np.round(annl_ret, 4)))
    print('annual sharpe: '+str(np.round(annl_sharpe, 2)))
    print('\n')
    
# generate test period results
test_df = test_df[['week_idx', 'date', 'asset', 'r_tplus7', 'usd_mcap', factor1, factor2, factor3]]
for factor in [factor1, factor2, factor3]:
    test_df = labelUnivariateFactorSortedPortfolios(test_df, factor, calc_corr=True)
    
factors = []
num_factors = 0
for factor in [factor1, factor2, factor3]:
    factors  += [factor]
    num_factors += 1
    print('test period (2021) results for factor model containing')
    print(factors)
    oos_df = labelMultivarFactorSortedPortfolio(test_df, covars=factors)
    oos_df = oos_df[['week_idx', 'asset', 'r_tplus7', 'usd_mcap', 'tertile']]
    oos_df = labelPortfolioWeightsAndReturns(oos_df)
    annl_tc  = calcAnnualTransactionCosts(oos_df)
    print('annual transaction costs in simple return terms: ' + str(np.round(annl_tc, 4)))
    r_df     = oos_df[['week_idx', 'r']].drop_duplicates()
    annl_ret = calcPortfolioReturn(r_df)
    annl_sharpe = calcPortfolioSharpe(r_df)
    print('annual simple return before trans costs: ' + str(np.round(annl_ret, 4)))
    print('annual sharpe: '+str(np.round(annl_sharpe, 2)))
    max_dd = max_draw_down(r_df)
    print('max drawdown : '+str(np.round(max_dd, 2)))
    max_1mo_loss = max_1_month_loss(r_df)
    print('max one month loss : '+str(np.round(max_1mo_loss, 2)))
    print('\n')

# output equal weight benchmark
print('test period (2021) results for equal-weighted portfolio')
r_df = test_df.copy()
r_df['counts'] = 1
r_df['assets_per_week'] = r_df.groupby('week_idx').counts.transform('sum')
r_df['prtfl_wght'] = 1/r_df.assets_per_week
r_df = r_df.drop(['counts', 'assets_per_week'], axis=1)
annl_tc  = calcAnnualTransactionCosts(r_df)
print('annual transaction costs in simple return terms: ' + str(np.round(annl_tc, 4)))
r_df = r_df.groupby('week_idx')[['r_tplus7']].mean()
r_df = r_df.rename(columns={'r_tplus7': 'r'})
annl_ret = calcPortfolioReturn(r_df)
annl_sharpe = calcPortfolioSharpe(r_df)
print('annual simple return before trans costs: ' + str(np.round(annl_ret, 4)))
print('annual sharpe: '+str(np.round(annl_sharpe, 2)))
max_dd = max_draw_down(r_df)
print('max drawdown : '+str(np.round(max_dd, 2)))
max_1mo_loss = max_1_month_loss(r_df)
print('max one month loss : '+str(np.round(max_1mo_loss, 2)))
print('\n')

# output mcap-weighted benchmark
print('test period (2021) results for mcap-weighted portfolio')
r_df = test_df.copy()
r_df['mcap_sum']  = r_df.groupby(['week_idx']).usd_mcap.transform('sum')
r_df['prtfl_wght'] = r_df.usd_mcap / r_df.mcap_sum
# confirm portfolio weights roughly sum to 1 for each week
assert(len(np.unique(r_df.week_idx)) == 
       np.sum(np.isclose(r_df.groupby(['week_idx']).prtfl_wght.sum(), 1,
                         rtol=1e-2, atol=1e-2)))
r_df = r_df.drop(['mcap_sum'], axis=1)
annl_tc  = calcAnnualTransactionCosts(r_df)
print('annual transaction costs in simple return terms: ' + str(np.round(annl_tc, 4)))
r_df['r'] = r_df.prtfl_wght * r_df.r_tplus7
r_df = r_df.groupby('week_idx')[['r']].sum()
annl_ret = calcPortfolioReturn(r_df)
annl_sharpe = calcPortfolioSharpe(r_df)
print('annual simple return before trans costs: ' + str(np.round(annl_ret, 4)))
print('annual sharpe: '+str(np.round(annl_sharpe, 2)))
max_dd = max_draw_down(r_df)
print('max drawdown : '+str(np.round(max_dd, 2)))
max_1mo_loss = max_1_month_loss(r_df)
print('max one month loss : '+str(np.round(max_1mo_loss, 2)))
print('\n')

In [None]:
# TODO SCOPE OLD SCRIPT FOR ACTUAL NUMBERS