In [None]:
# TODO UPDATE THIS OLD MESSY CODE WITH NEW PANEL

In [1]:
# Imports
import pandas as pd
import numpy as np
import itertools
import time
import tensorflow as tf
import pickle
import sklearn as sk
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras import regularizers
from keras.callbacks import EarlyStopping
from tensorflow.keras import initializers
from keras.models import Model
from dateutil.relativedelta import relativedelta
from datetime import datetime

In [2]:
# TEMP TO RUN ON CPU
tf.config.set_visible_devices([], 'GPU')

In [3]:
def dropRowsAndColsForCA(df):
    # Cut to just covar columns of interest
    df = df[['asset', 'week_idx', 'r_tplus7', 'covar_mcap_t', 
             'covar_age_t',
             'covar_alexa_rank_t', 
             'covar_circulating_supply_t',
             'covar_dev_activ_t', 
             'covar_dev_activity_contributors_count_t',
             'covar_github_activity_contributors_count_t', 
             'covar_kurt_r_daily_tm7',
             'covar_num_market_pairs_t', 
             'covar_p_volume_log_t',
             'covar_r_daily_t', 
             'covar_r_t',
             'covar_r_tm28',
             'covar_rank_cmc_t', 
             'covar_sentiment_balance_bitcointalk_t',
             'covar_sentiment_negative_bitcointalk_avg_daily_tm7',
             'covar_sentiment_volume_consumed_bitcointalk_avg_daily_tm7',
             'covar_skew_r_daily_tm7',
             'covar_social_dominance_total_t',
             'covar_social_volume_total_t',
             'covar_total_supply_t',
             'covar_twitter_followers_t',
             'covar_vol_r_daily_tm7']]

    # Cut panel to the first week where there are two as many tokens as RHS vars
    df['counts'] = 1
    df['coins_per_week'] = df.groupby(['date'])['counts'].transform(sum)
    df = df[df.coins_per_week >= (df.shape[1]-2)*4]
    df = df.drop(columns = ['counts', 'coins_per_week'])

    return df

In [4]:
def formPortfolioReturnCovariates(df):
    # Obtain the weeks of the dataframe
    df = df.sort_values(by = 'date')
    weeks = np.unique(df.index)

    # Form new covariate names
    column_names = list(df.columns.values)
    column_names.remove('asset')
    column_names.remove('r_tplus7')
    covariates = column_names
    new_covariates = ['x_' + cov for cov in covariates]

    for current_week in weeks: 
        # Obtain the week's returns and the previous week's covariates
        returns = df[df.index == current_week].r_tplus7.values
        z_t_1   = df[df.index == current_week][covariates].values

        # Calculate the characteristic managed portfolio returns
        design = np.linalg.inv(np.matmul(np.transpose(z_t_1), z_t_1))
        x_s    = np.matmul(np.matmul(design, np.transpose(z_t_1)), returns)

        # Set the new columns to this week's vector's value
        df.loc[df.index == current_week, new_covariates] = x_s

    return df


In [5]:
def subsetToAssetUniversePerWeek(temp_df, asset_universe_dict, oos_week):
    # Determine what quarter the oos_week is in
    oos_mnth = pd.to_datetime(oos_week).month
    oos_yr   = pd.to_datetime(oos_week).year
    mnth_qtr = int(np.floor((oos_mnth-1)/3)*3+1)
    if mnth_qtr == 10:
        oos_qtr = str(oos_yr)+'-'+str(mnth_qtr)+'-01'
    else:
        oos_qtr = str(oos_yr)+'-0'+str(mnth_qtr)+'-01'

    # Determine the asset universe
    asset_universe = asset_universe_dict[oos_qtr]
    
    # Subset the training data to the asset universe
    temp_df = temp_df[temp_df.asset.isin(asset_universe)]
    
    return temp_df


In [6]:
def subsetToAssetUniverseFull(df, asset_universe_dict, train_or_test):
    # determine the asset universe to use for whether train or test data
    if train_or_test == 'train':
        index_start = 0
        index_end   = len(asset_universe_dict)-4
    elif train_or_test=='test':
        index_start = len(asset_universe_dict)-4
        index_end   = len(asset_universe_dict)
    else:
        assert(False),('get wit zee program')
        
    # subset to included assets
    for i in range(index_start, index_end):
        # extract this quarter and its included assets
        date = list(asset_universe_dict.keys())[i]
        assets = asset_universe_dict[date]

        # form start and end date for this window
        start_date = datetime.strptime(date, '%Y-%m-%d')
        end_date   = datetime.strptime(date, '%Y-%m-%d') + relativedelta(months=3)

        # drop rows in this time period that are not the included assets
        df = df[~(((df.index>=start_date) & (df.index<end_date)) & (~df.asset.isin(assets)))]
    
    return df

In [7]:
def fitAutoencoder(train_df, hps_yhats_dict, val_df=None, early_stopping=True):
    # Obtain the covariates
    column_names = list(df.columns.values)
    column_names.remove('week_idx')
    column_names.remove('asset')
    column_names.remove('r_tplus7')
    covariates = column_names

    # Obtain the covariates on x_t and b_t_1 sides of the network
    x_t   = [covar for covar in covariates if covar[:2] == 'x_']
    b_t_1 = [covar for covar in covariates if covar[:2] != 'x_']

    # Extract the hyperparameters
    number_hidden_layer = hps_yhats_dict['number_hidden_layer']
    number_factor       = hps_yhats_dict['number_factor']
    learning_rate       = hps_yhats_dict['learning_rate']
    l1_penalty          = hps_yhats_dict['l1_penalty']
    batch_size          = hps_yhats_dict['batch_size']
    number_ensemble     = hps_yhats_dict['number_ensemble']
    bootstrap_pct       = hps_yhats_dict['bootstrap_pct']
    epoch               = hps_yhats_dict['epoch']

    # Initialize the models
    models = []

    # Loop over the ensembles to build models for each
    assert(number_ensemble <= 5)
    for i in range(0, number_ensemble):
        
        # Bootstrap the rows so different models in the ensemble are less correlated
        train_df = sk.utils.resample(train_df, replace = True, 
                                     n_samples = int(train_df.shape[0]*bootstrap_pct),
                                     random_state = i)
        
        # Obtain the training input and output data and, if passed, validation data
        train_b_t_1 = train_df[b_t_1]
        train_x_t   = train_df[x_t]  
        train_y     = train_df[['r_tplus7']]
        if val_df is not None:
            val_b_t_1   = val_df[b_t_1]
            val_x_t     = val_df[x_t]  
            val_y       = val_df[['r_tplus7']]
        
        # According to which model in the ensemble it is, initialize parameters.
        if i==0:
            weight_initializer=initializers.HeNormal(seed=i)
            bias_initializer=initializers.GlorotUniform(seed=i)
        elif i==1:
            weight_initializer=initializers.GlorotUniform(seed=i)
            bias_initializer=initializers.RandomUniform(seed=i)
        elif i==2:
            weight_initializer=initializers.HeNormal(seed=i)
            bias_initializer=initializers.RandomUniform(seed=i)
        elif i==3:
            weight_initializer=initializers.RandomUniform(seed=i)
            bias_initializer=initializers.GlorotUniform(seed=i)
        elif i==4:
            weight_initializer=initializers.GlorotUniform(seed=i)
            bias_initializer=initializers.HeNormal(seed=i)
        else:
            weight_initializer=initializers.HeNormal(seed=i)
            bias_initializer=initializers.RandomUniform(seed=i)
        

        # Build the betas model from the t minus 1 covariates
        model_b = tf.keras.models.Sequential()
        model_b.add(tf.keras.Input(shape=(len(b_t_1),)))
        model_b.add(Dense(6, activation='relu',
                          kernel_regularizer=regularizers.l1(l1=l1_penalty),
                          kernel_initializer=weight_initializer,
                          bias_initializer=bias_initializer))
        model_b.add(BatchNormalization())
        if number_hidden_layer >= 2:
            model_b.add(Dense(5, activation='relu',
                              kernel_regularizer=regularizers.l1(l1=l1_penalty),
                              kernel_initializer=weight_initializer,
                              bias_initializer=bias_initializer))
            model_b.add(BatchNormalization())
        if number_hidden_layer == 3:
            model_b.add(Dense(4, activation='relu',
                              kernel_regularizer=regularizers.l1(l1=l1_penalty),
                              kernel_initializer=weight_initializer,
                              bias_initializer=bias_initializer))
            model_b.add(BatchNormalization())
        model_b.add(Dense(number_factor, activation='linear',
                          kernel_initializer=weight_initializer,
                          bias_initializer=bias_initializer))

        # Form the x model from time t returns
        model_x = tf.keras.models.Sequential()
        model_x.add(tf.keras.Input(shape=(len(x_t),)))
        model_x.add(Dense(number_factor, activation='linear',
                          kernel_initializer=weight_initializer,
                          bias_initializer=bias_initializer))

        # Form the dot product output for the combination of the two neurals
        mergedOut = Dot(axes=(1,1))([model_b.output, model_x.output])

        # Form the entire model
        model = Model([model_b.input, model_x.input], mergedOut)

        # Compile the model
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                      loss='mean_squared_error',
                      metrics=['mse'])

        # Prepare early stopping object 
        es = EarlyStopping(monitor='val_mse', mode='min', verbose=0, patience = 2) # VERBOSE 2
        
        # Fit the model
        with tf.device('/CPU:0'):
            if early_stopping == True:
                model.fit(x=[train_b_t_1, train_x_t], y=train_y, 
                          batch_size=batch_size,
                          validation_data=([val_b_t_1, val_x_t], val_y), 
                          epochs=epoch, verbose=0,
                          workers=4, callbacks=[es]) # VERBOSE 1
            else:
                model.fit(x=[train_b_t_1, train_x_t], y=train_y, 
                          batch_size=batch_size,
                          epochs=epoch, verbose=1,
                          workers=4)

        models.append(model)

    return models


In [8]:
def genYhats(df, models, oos_week, number_factor):
    # Obtain the covariates on x_t and b_t_1 sides of the network
    column_names = list(df.columns.values)
    column_names.remove('asset')
    column_names.remove('r_tplus7')
    column_names.remove('week_idx')
    covariates = column_names
    x_t   = [covar for covar in covariates if covar[:2] == 'x_']
    b_t_1 = [covar for covar in covariates if covar[:2] != 'x_']

    # Obtain the oos data
    oos_df = df[df.index == oos_week].copy()
    oos_x_t = oos_df[x_t]
    oos_b_t_1 = oos_df[b_t_1]

    # For each model form the beta hats
    b_hats = np.zeros((oos_df.shape[0],number_factor))
    for model in models:
        layer_name = model.layers[-3]._name 
        assert(model.layers[-3].output_shape[1] == number_factor)
        b_hat_layer = Model(inputs=model.input[0],
                            outputs=model.get_layer(layer_name).output)
        b_hat = b_hat_layer.predict(oos_b_t_1)
        b_hats += b_hat
        
    # Obtain the average b_hat across the models
    b_hats = b_hats/len(models)
    
    # output statistics- 5 stat summary of Beta Hat
#     b_hats_min = b_hats.min()
#     b_hats_max = b_hats.max()
#     b_hats_25_percentile = np.percentile(b_hats,25)
#     b_hats_75_percentile = np.percentile(b_hats,75)
#     b_hats_mean = b_hats.mean()
#     print('Min beta_hats: %.3f'% b_hats_min)
#     print('Q1 beta_hats: %.3f'% b_hats_25_percentile)
#     print('Mean beta_hats: %.3f'% b_hats_mean)
#     print('Q3 beta_hats: %.3f'% b_hats_75_percentile)
#     print('Max beta_hats: %.3f'% b_hats_max)
#     print('\n')

    # Form the sample average of the estimated factors for BEFORE the OOS week
    weeks = np.unique(df[df.index < oos_week].index)
    lambda_hats = np.zeros(number_factor)
    for prev_week in weeks:
        prev_x_t = df[df.index == prev_week][x_t]
        f_hats = np.zeros(number_factor)
        for model in models:
            layer_name = model.layers[-2]._name 
            assert(model.layers[-2].output_shape[1] == number_factor)
            f_hat_layer = Model(inputs=model.input[1],
                                outputs=model.get_layer(layer_name).output)
            f_hat = f_hat_layer.predict(prev_x_t)
            assert(all(np.isclose(f_hat[0,:], f_hat[1,:])))
            f_hats += f_hat[0,:]
        f_hats = f_hats / len(models)
        lambda_hats += f_hats
        lambda_hats = lambda_hats / len(weeks)
        
    # summary of lambda_hats - 5 stat summary
#     lambda_hats_min = lambda_hats.min()
#     lambda_hats_max = lambda_hats.max()
#     lambda_hats_25_percentile = np.percentile(lambda_hats,25)
#     lambda_hats_75_percentile = np.percentile(lambda_hats,75)
#     lambda_hats_mean = lambda_hats.mean()
#     print('Min lambda_hats: %.3f'% lambda_hats_min)
#     print('Q1 lambda_hats: %.3f'% lambda_hats_25_percentile)
#     print('Mean lambda_hats: %.3f'% lambda_hats_mean)
#     print('Q3 lambda_hats: %.3f'% lambda_hats_75_percentile)
#     print('Max lambda_hats: %.3f'% lambda_hats_max) 
#     print('\n')
        
    # Form yhats
    yhats = np.matmul(b_hats, lambda_hats)
    
    # output stats of yhats - 5 stat summary 
#     y_hats_min = yhats.min()
#     y_hats_max = yhats.max()
#     y_hats_25_percentile = np.percentile(yhats,25)
#     y_hats_75_percentile = np.percentile(yhats,75)
#     y_hats_mean = yhats.mean()
#     print('Min y_hats: %.3f'% y_hats_min)
#     print('Q1 y_hats: %.3f'% y_hats_25_percentile)
#     print('Mean y_hats: %.3f'% y_hats_mean)
#     print('Q3 y_hats: %.3f'% y_hats_75_percentile)
#     print('Max y_hats: %.3f'% y_hats_max)
#     print('\n')
    

    return yhats

In [9]:
def runCV(df, asset_universe_dict, last_train_year=2018, val_end_year=2020):
    # Initialize hp result objects
    hps_yhats_dict_list = []
    hps_mse_df_list     = []
    
    # Initialize the hyperparameter grid  
    number_hidden_layers = [3, 1]
    number_factors       = [3, 2, 1]
    learning_rates       = [1e-5, 1e-3, 1e-1]
    batch_sizes          = [128] # note: ensure powers of 2 for eff, [64,256]
    l1_penalties         = [1, 1e-1, 1e-2, 1e-3]
    number_ensembles     = [5] 
    early_stopping       = [True]
    bootstrap_pcts       = [0.99]
    epochs               = [9]

    # Determine the weeks in the validation window  
    val_weeks = np.unique(df[(df.index.year > last_train_year)  
                             & (df.index.year <= val_end_year)].index.values)  

    # Generate yhats for every hyperparameter grid point
    for hps in itertools.product(number_hidden_layers,
                                 number_factors,
                                 learning_rates,
                                 batch_sizes,
                                 l1_penalties,
                                 number_ensembles,
                                 early_stopping,
                                 bootstrap_pcts,
                                 epochs):
        hps_yhats_dict = {'number_hidden_layer': hps[0],
                          'number_factor': hps[1],
                          'learning_rate': hps[2],
                          'batch_size': hps[3],
                          'l1_penalty': hps[4],
                          'number_ensemble': hps[5],
                          'early_stopping': hps[6],
                          'bootstrap_pct': hps[7],
                          'epoch': hps[8],
                          'yhats': np.array([]),
                          'ys':    np.array([])}
        print(hps_yhats_dict, '\n')
        tic = time.perf_counter()
        for val_week in val_weeks:
#             print(val_week, '\n')
            temp_df = df[df.index <= val_week].copy()
            temp_df = subsetToAssetUniversePerWeek(temp_df, asset_universe_dict, oos_week=val_week)
            train_df = temp_df[temp_df.index < val_week].copy()
            val_df   = temp_df[temp_df.index == val_week].copy()
            
            models = fitAutoencoder(train_df, hps_yhats_dict, val_df=val_df, 
                                    early_stopping=hps_yhats_dict['early_stopping'])
            yhats  = genYhats(temp_df, models, val_week, hps_yhats_dict['number_factor'])
            ys     = val_df.r_tplus7.values

            hps_yhats_dict['yhats'] = np.append(hps_yhats_dict['yhats'], yhats)
            hps_yhats_dict['ys']    = np.append(hps_yhats_dict['ys'], ys)

            val_ys_todate = hps_yhats_dict['ys']
            rw_val_mse    = np.mean(np.square(val_ys_todate))
            model_val_mse = np.mean(np.square(val_ys_todate - hps_yhats_dict['yhats']))
#             print('\n val random walk mse: ' + str(rw_val_mse))
#             print('\n val model mse: ' + str(model_val_mse))
#             print('\n val model mse winning?: ' + str(model_val_mse < rw_val_mse))
#             print('\n\n')
        # Save run time and space out result print out
        toc = time.perf_counter()
#         print('\n\n\n')

        # Update lists of results
        hps_yhats_dict_list.append(hps_yhats_dict)
        cv_results_dict = hps_yhats_dict.copy()
        del cv_results_dict['yhats']
        del cv_results_dict['ys']
        cv_results_dict['runtime_mins'] = round((toc - tic)/60, 0)
        cv_results_dict['mse'] = model_val_mse
        hps_mse_df_list.append(pd.DataFrame(cv_results_dict, index=[0]))

        # Save CV results
        cv_df = pd.concat(hps_mse_df_list, ignore_index=True)
        timestr = time.strftime("%Y%m%d_%H%M%S")
        fp = '../4-output/cv-results-autoencoder-' + timestr + '.csv'
        cv_df.to_csv(fp, index=False)

    return hps_yhats_dict_list


In [10]:
def GenTestYhats(df, opt_hps, test_year=2021): 
    test_weeks = np.unique(df[df.index.year == test_year].index.values)
    
    test_df = pd.DataFrame()

    for test_week in test_weeks:
        print(test_week, '\n')
        temp_df  = df[df.index <= test_week].copy()
        temp_df  = subsetToAssetUniversePerWeek(temp_df, asset_universe_dict, oos_week=test_week)
        train_df = temp_df[temp_df.index < test_week].copy()
        oos_df   = temp_df[temp_df.index == test_week].copy()
        
        models   = fitAutoencoder(train_df, opt_hps, val_df=None, early_stopping=False)
        yhats    = genYhats(oos_df, models, test_week, opt_hps['number_factor'])
       
        oos_df = oos_df[['asset', 'r_tplus7']]
        oos_df['yhat'] = yhats
        test_df = pd.concat((test_df, oos_df))
        rw_mse = np.mean(np.square(test_df.r_tplus7.values))
        model_mse = np.mean(np.square(test_df.r_tplus7.values - test_df.yhat.values))
        print('\n test random walk mse: ' + str(rw_mse))
        print('\n test model mse: ' + str(model_mse))
        print('winning?: ' + str(model_mse < rw_mse))
        print('\n')
    
    return test_df


In [11]:
def labelPortfolioWeights(df):
    # assign tertiles
    np.random.seed(42)
    df['rand']    = np.random.uniform(size=df.shape[0])
    df = df.sort_values(by=['date', 'yhat'])
    df['ranking'] = df.groupby(['date']).cumcount()
    df['counts']  = 1
    df['total_assets_per_week'] = df.groupby('date').counts.transform('sum')
    df['ranking']               = df.ranking/df.total_assets_per_week
    df.loc[df.ranking < 1/3, 'prtfl_wght'] = 0
    df.loc[(df.ranking>=1/3) & 
           (df.ranking<2/3), 'prtfl_wght'] = 1/6
    df.loc[df.ranking>=2/3,  'prtfl_wght'] = 5/6
    df['prtfl_wght'] = 3*df.prtfl_wght/df.total_assets_per_week
    
    # clean up
    df = df.drop(['rand', 'ranking', 'counts',
                  'total_assets_per_week'], axis=1)
    
    # confirm portfolio weights roughly sum to 1 for each week
    assert(len(np.unique(df.index)) == 
           np.sum(np.isclose(df.groupby(['date']).prtfl_wght.sum(), 1,
                             rtol=1e-2, atol=1e-2)))

    return df


In [12]:
def calcPortfolioReturn(df):
    num_wks  = df.shape[0]
    if np.sum(df.r.values <= -1)>=1:
        return -1
    else:
        tot_ret  = np.product(df.r.values+1)-1
        wkly_ret = (tot_ret+1)**(1/num_wks)-1
        annl_ret = (wkly_ret+1)**(52.18)-1
        return annl_ret

In [13]:
def calcAnnualTransactionCosts(df):
    # merge on the previous week's holdings to the new holdings
    temp_df = df.copy()
    temp_df = temp_df[temp_df.week_idx<np.max(temp_df.week_idx)]
    temp_df['week_idx'] = temp_df.week_idx+1
    temp_df = temp_df[['week_idx', 'asset', 'prtfl_wght']]
    temp_df = temp_df.rename(columns={'prtfl_wght': 'prtfl_wght_tm7'})
    df = df.merge(temp_df,
                  on=['week_idx', 'asset'],
                  how='outer',
                  validate='one_to_one')

    # calc weekly turnover and ensure it has the appropriate range
    df['asset_to'] = np.abs(df.prtfl_wght - df.prtfl_wght_tm7)
    to_df = df.groupby('week_idx')[['asset_to']].sum().reset_index()
    assert((np.min(to_df.asset_to)>=0) & (np.max(to_df.asset_to<=2)))

    # correct the first and last week valid for buying the initial port and liquidating
    to_df.loc[to_df.week_idx==106, 'asset_to'] = 1
    to_df = pd.concat((to_df, pd.DataFrame(data={'week_idx': [262],
                                                 'asset_to': 1})))
    to_df = to_df.reset_index(drop=True)

    # add transaction costs assuming maker and taker fee of 20 bps each
    to_df['tc'] = to_df.asset_to*0.002

    # return annualize transaction cost
    return -np.sum(to_df.tc)

In [14]:
def calcPortfolioSharpe(df):
    wkly_sharpe = np.mean(df.r.values)/np.std(df.r.values)
    annl_sharpe = wkly_sharpe*np.sqrt(52.18)
    return annl_sharpe

In [15]:
def max_draw_down(df):
    cumulative_ret=(df.r+1).cumprod()
    roll_max=cumulative_ret.rolling(len(cumulative_ret), min_periods=1).max()
    daily_drawdown=cumulative_ret/roll_max
    max_daily_drawdown=daily_drawdown.min() - 1
    return max_daily_drawdown

In [16]:
def max_1_month_loss(df):
    max_loss=(df['r']+1).rolling(4).apply(np.prod)
    max_loss_minus=max_loss.min()-1
    return max_loss_minus

In [None]:
#if __name__ == "__main__":
# load in the data
input_fp = '../3-data/clean/panel_train.pkl'
df = pd.read_pickle(input_fp)
df = df.set_index('date')
df = df.sort_values(by=['date', 'asset'])
with open('../3-data/clean/asset_universe_dates_and_lists.pkl', 'rb') as handle:
    asset_universe_dict = pickle.load(handle)

# drop rows and columns such that data will work for conditional autoencoder (CA)
df = dropRowsAndColsForCA(df)

# form the char-sorted portfolios for factor side of CA input
df = formPortfolioReturnCovariates(df)

# sort the data
df = df.sort_values(by=['date', 'asset'])

In [31]:
#if __name__ == "__main__":
# load in the data
input_fp = '../3-data/clean/panel_train.pkl'
df = pd.read_pickle(input_fp)
df = df.set_index('date')
df = df.sort_values(by=['date', 'asset'])
with open('../3-data/clean/asset_universe_dates_and_lists.pkl', 'rb') as handle:
    asset_universe_dict = pickle.load(handle)


In [None]:
# run CV
hps_yhats_list = runCV(df, asset_universe_dict, last_train_year = 2018, val_end_year=2020)

In [None]:
# TODO: add code to select the best hps combo from the above list
#       i just ran manually with setting the best CV point

In [None]:
# form validation period results
df = df[['asset', 'week_idx', 'r_tplus7']]
df = subsetToAssetUniverseFull(df, asset_universe_dict, train_or_test='train')
df = df[df.index.year >= 2019]
df['yhat'] = hps_yhats_list[0]['yhats']
df      = labelPortfolioWeights(df)
annl_tc = calcAnnualTransactionCosts(df)
df['r'] = df.prtfl_wght*df.r_tplus7
r_df    = df.groupby(['date'])[['r']].sum()
annl_ret = calcPortfolioReturn(r_df)
annl_sharpe = calcPortfolioSharpe(r_df)
max_dd = max_draw_down(r_df)
max_1mo_loss = max_1_month_loss(r_df)
print('annual transaction costs in simple return terms: ' + str(np.round(annl_tc, 4)))
print('annual simple return before trans costs: ' + str(np.round(annl_ret, 4)))
print('annual sharpe: '+str(np.round(annl_sharpe, 2)))
print('max drawdown : '+str(np.round(max_dd, 2)))
print('max one month loss : '+str(np.round(max_1mo_loss, 2)))

In [None]:
# TODO GO SCOPE OLD NOTEBOOK FOR LATEST NUMBERS

In [None]:
# import and clean up data for test fitting
input_fp = '../3-data/clean/panel_oos.pkl'
df = pd.read_pickle(input_fp)
df = df.set_index('date')
df = df.sort_values(by=['date', 'asset'])
opt_hps = hps_yhats_list[0].copy()
del opt_hps['yhats']
del opt_hps['ys']
test_df = GenTestYhats(df, opt_hps) 
test_df = test_df.merge(df[['asset', 'week_idx']],
                        on=['date', 'asset'],
                        how='inner', 
                        validate='one_to_one')

# form test period results
test_df = labelPortfolioWeights(test_df)
annl_tc = calcAnnualTransactionCosts(test_df)
test_df['r'] = test_df.prtfl_wght*test_df.r_tplus7
r_df    = test_df.groupby(['date'])[['r']].sum()
annl_ret = calcPortfolioReturn(r_df)
annl_sharpe = calcPortfolioSharpe(r_df)
max_dd = max_draw_down(r_df)
max_1mo_loss = max_1_month_loss(r_df)
print('annual transaction costs in simple return terms: ' + str(np.round(annl_tc, 4)))
print('annual simple return before trans costs: ' + str(np.round(annl_ret, 4)))
print('annual sharpe: '+str(np.round(annl_sharpe, 2)))
print('max drawdown : '+str(np.round(max_dd, 2)))
print('max one month loss : '+str(np.round(max_1mo_loss, 2)))

# TODO GO SCOPE OLD NOTEBOOK FOR LATEST NUMBERS


In [None]:
# TODO: Figure out why all the test period yhats are close to zero
#       use the comments to report out the distribution 
#       to figure out if factor side or beta side that is turning all to zero