# Main Analysis

Done w/ the data created from the `data_ingestion.ipynb`.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from datetime import datetime

import statsmodels.formula.api as smf
from sklearn.metrics import mean_squared_error

In [2]:
## Import data
ff_data = pd.read_csv("./Data/2018-12-04_Data/ff_dataset_2018-12-04.csv")
ff_data.index = pd.to_datetime( ff_data.Date )
ff_data = ff_data.drop(["Date"], axis=1)

## Rename the "Mkt-RF" column
ff_data.columns = ["Volume", "INDPRO", "Mkt", "SMB", "Mom"]

## Constructing the "Momentum of Factor" Series

In [3]:
## Get the wealth process
ff_data["Vt_mom"] = np.cumprod( np.array(ff_data.Mom) + 1 )

## Populate the momentum of factor series
a = 11; b = 2
val_list = []; date_list = []
for idx in range( a, len(ff_data.index) ):
    
    date_list.append( ff_data.index[idx] )

    ret12 = (ff_data.iloc[idx,5] / ff_data.iloc[idx-a,5]) - 1
    ret2  = (ff_data.iloc[idx,5] / ff_data.iloc[idx-b,5]) - 1

    val_list.append( ret12 - ret2 )
    
## Store the factor momentum series
ff_data["factor_mom"] = pd.Series( data=val_list, index=date_list )

## Remove the nan values
ff_data = ff_data.iloc[a:, :]

## Drop unneeded columns
ff_data = ff_data.drop( ["Volume", "SMB", "Vt_mom"], axis=1 )
ff_data = ff_data.loc[:, ["INDPRO", "Mkt", "factor_mom", "Mom"]]

## Working with Fama-French Data

Using a rolling-window approach, we will loop through the dataframe fetching the last $N$ (to be optimized) dates of data to then regress and predict the next $Mom$ value.

In [4]:
def perform_mv_ff_regerssion( df, factor_name ):
    lm = smf.ols( "{} ~ INDPRO + Mkt + factor_{}".format(factor_name, factor_name.lower()), data=df ).fit()
    betas = np.array(lm.params); rsq = lm.rsquared_adj

    return (betas, rsq)
    
def perform_uv_ff_regerssion( df, colname, factor_name ):
    lm = smf.ols( "{} ~ {}".format(factor_name, colname), data=df ).fit()
    betas = np.array(lm.params); rsq = lm.rsquared_adj

    return (betas, rsq)

def mv_prediction( betas, curr_vals ):
    pred = sum( np.multiply( betas[1:], curr_vals ) ) + betas[0]
    return pred

def uv_prediction( beta, curr_vals ):
    pred = beta[0] + beta[1]*curr_vals #[i]
    return pred

def gather_stats( uv_rsq, uv_preds, mv_rsq, mv_preds, y_vals ):
    
    ## Gather univariate stats

    # INDPRO
    indpro_avg_rsq = np.average( uv_rsq[0] )
    indpro_mse     = mean_squared_error( uv_preds[0], y_vals )

    # MKT
    mkt_avg_rsq    = np.average( uv_rsq[1] )
    mkt_mse        = mean_squared_error( uv_preds[1], y_vals )

    # Factor Momentum
    fmom_avg_rsq   = np.average( uv_rsq[2] )
    fmom_mse       = mean_squared_error( uv_preds[2], y_vals )

    ## Gather multivariate stats
    
    mv_avg_rsq = np.average( mv_rsq )
    mv_mse     = mean_squared_error( mv_preds, y_vals )
    
    ## Form out dataframe
    stats_df = pd.DataFrame( columns = ["MV"] +  ["UV_{}".format(col) for col in ff_data.columns[:-1]], \
                         index = ["Avg_Adj_R^2", "MSE"] )
    stats_df.iloc[0, :] = [mv_avg_rsq, indpro_avg_rsq, mkt_avg_rsq, fmom_avg_rsq]
    stats_df.iloc[1, :] = [mv_mse, indpro_mse, mkt_mse, fmom_mse]
    
    return stats_df

def get_sharpe_ratio( V_t ):
    
    tot_ret = (V_t[-1] / V_t[0]) - 1
    ret_1y  = (1 + tot_ret)**( 1/(len(V_t)/12) ) - 1 # 12 - due to monthly data
    sig_1y  = np.std( (V_t[1:]/V_t[:-1]) - 1 ) * np.sqrt(12)   
    sharpe = ret_1y / sig_1y
    
    return ( sharpe, ret_1y, sig_1y )

"""
We should separate these by trend following / mean reverting strategies
"""
def trading_strategies( idx, df_10d, pred, next_vals_Y, factor ):   

    def trendfollow_strats( idx, df_10d, pred, next_vals_Y, factor ):
        
        """
        The naïve strategy that I already implemented on 12/7/2018
        """
        def strat0( idx, df_10d, pred, next_vals_Y, factor ):

            ## Get historical average
            mom_avg = np.average( df_10d[factor] ); mom_std = np.std( df_10d[factor] )

            # Determine whether to open a position, and long vs. short
            if ( pred < mom_avg - mom_std ):
                return ( 1 - next_vals_Y ) # Shorting 
            elif ( pred > mom_avg + mom_std ):
                return ( 1 + next_vals_Y ) # Longing        
            else:
                return ( 1 ) # Not opening trade
        
        """
        Use all of the data up until the point of
        regression to estimate the historical momentum average / std
        Use these values to determine whether or not to trade
        """
        def strat1( idx, df_10d, pred, next_vals_Y, factor ):
            
            ## Get historical average
            mom_avg = np.average( ff_data.iloc[:idx, 3] ); mom_std = np.std( ff_data.iloc[:idx, 3] )

            # Determine whether to open a position, and long vs. short
            if ( pred < mom_avg - mom_std ):
                return ( 1 - next_vals_Y ) # Shorting
            elif ( pred > mom_avg + mom_std ):
                return ( 1 + next_vals_Y ) # Longing        
            else:
                return ( 1 ) # Not opening trade
        
        """
        Using the median rather than the mean, only on the
        lagged dataframe (w/10 values)
        """
        def strat2( idx, df_10d, pred, next_vals_Y, factor ):

            ## Get historical 25/75 percentils
            mom_med_25 = np.percentile( df_10d[factor], q=0.25 )
            mom_med_75 = np.percentile( df_10d[factor], q=0.75 )

            # Determine whether to open a position, and long vs. short
            if ( pred < mom_med_25 ):
                return ( 1 - next_vals_Y ) # Shorting 
            elif ( pred > mom_med_75 ):
                return ( 1 + next_vals_Y ) # Longing        
            else:
                return ( 1 ) # Not opening trade
        
        """
        Using the median rather than the mean, with the
        entire data set up until that date
        """
        def strat3( idx, df_10d, pred, next_vals_Y, factor ):
            
            ## Get historical 25/75 percentils
            mom_med_25 = np.percentile( ff_data.iloc[:idx, 3], q=0.25 )
            mom_med_75 = np.percentile( ff_data.iloc[:idx, 3], q=0.75 )

            # Determine whether to open a position, and long vs. short
            if ( pred < mom_med_25 ):
                return ( 1 - next_vals_Y ) # Shorting 
            elif ( pred > mom_med_75 ):
                return ( 1 + next_vals_Y ) # Longing        
            else:
                return ( 1 ) # Not opening trade

        
        """
        Use all of the historical data, and the last 10 day data, compare the cross over
        """
        def strat4( idx, df_10d, pred, next_vals_Y, factor ):

            ## Initialize container
            V_t = [1]

            ## Get total historical average
            mom_avg_all = np.average( ff_data.iloc[:idx, 3] )
            mom_avg_10d = np.average( df_10d[factor] )

            ## Determine whether to open a position, and long vs. short
            if ( (mom_avg_10d > mom_avg_all ) and (pred > mom_avg_10d) ):
                return ( 1 + next_vals_Y ) # Longing
            elif ( (mom_avg_10d > mom_avg_all ) and (pred > mom_avg_10d) ):
                return ( 1 - next_vals_Y ) # Shorting
            else:
                return ( 1 ) # Not opening trade

        # Collect all return streams from the trading strategies here
        v_0 = strat0( idx, df_10d, pred, next_vals_Y, factor )
        v_1 = strat1( idx, df_10d, pred, next_vals_Y, factor )
        v_2 = strat2( idx, df_10d, pred, next_vals_Y, factor )
        v_3 = strat3( idx, df_10d, pred, next_vals_Y, factor )
        v_4 = strat4( idx, df_10d, pred, next_vals_Y, factor )
        
        return [v_0, v_1, v_2, v_3, v_4]
        
    
    def meanrevert_strats( idx, df_10d, pred, next_vals_Y, factor ):
        
        """
        The naïve strategy that I already implemented on 12/7/2018
        """
        def strat0( idx, df_10d, pred, next_vals_Y, factor ):

            ## Get historical average
            mom_avg = np.average( df_10d[factor] ); mom_std = np.std( df_10d[factor] )

            # Determine whether to open a position, and long vs. short
            if ( pred < mom_avg - mom_std ):
                return ( 1 + next_vals_Y ) # Longing
            elif ( pred > mom_avg + mom_std ):
                return ( 1 - next_vals_Y ) # Shorting        
            else:
                return ( 1 ) # Not opening trade

        """
        Use all of the data up until the point of
        regression to estimate the historical momentum average / std
        Use these values to determine whether or not to trade
        """
        def strat1( idx, df_10d, pred, next_vals_Y, factor ):

            ## Get historical average
            mom_avg = np.average( ff_data.iloc[:idx, 3] ); mom_std = np.std( ff_data.iloc[:idx, 3] )

            # Determine whether to open a position, and long vs. short
            if ( pred < mom_avg - mom_std ):
                return ( 1 + next_vals_Y ) # Longing
            elif ( pred > mom_avg + mom_std ):
                return ( 1 - next_vals_Y ) # Shorting        
            else:
                return ( 1 ) # Not opening trade

        """
        Using the median rather than the mean, only on the
        lagged dataframe (w/10 values)
        """
        def strat2( idx, df_10d, pred, next_vals_Y, factor ):

            ## Get historical 25/75 percentils
            mom_med_25 = np.percentile( df_10d[factor], q=0.25 )
            mom_med_75 = np.percentile( df_10d[factor], q=0.75 )

            # Determine whether to open a position, and long vs. short
            if ( pred < mom_med_25 ):
                return ( 1 + next_vals_Y ) # Longing 
            elif ( pred > mom_med_75 ):
                return ( 1 - next_vals_Y ) # Shorting        
            else:
                return ( 1 ) # Not opening trade
        
        """
        Using the median rather than the mean, with the
        entire data set up until that date
        """
        def strat3( idx, df_10d, pred, next_vals_Y, factor ):

            ## Get historical 25/75 percentils
            mom_med_25 = np.percentile( ff_data.iloc[:idx, 3], q=0.25 )
            mom_med_75 = np.percentile( ff_data.iloc[:idx, 3], q=0.75 )

            # Determine whether to open a position, and long vs. short
            if ( pred < mom_med_25 ):
                return ( 1 + next_vals_Y ) # Longing 
            elif ( pred > mom_med_75 ):
                return ( 1 - next_vals_Y ) # Shorting         
            else:
                return ( 1 ) # Not opening trade

        """
        Use all of the historical data, and the last 10 day data, compare the cross over
        """
        def strat4( idx, df_10d, pred, next_vals_Y, factor ):

            ## Get total historical average
            mom_avg_all = np.average( ff_data.iloc[:idx, 3] )
            mom_avg_10d = np.average( df_10d[factor] )

            ## Determine whether to open a position, and long vs. short
            if ( (mom_avg_10d > mom_avg_all ) and (pred > mom_avg_10d) ):
                return ( 1 - next_vals_Y ) # Shorting
            elif ( (mom_avg_10d > mom_avg_all ) and (pred > mom_avg_10d) ):
                return ( 1 + next_vals_Y ) # Longing
            else:
                return ( 1 ) # Not opening trade
        
        # Collect all return streams from the trading strategies here
        v_0 = strat0( idx, df_10d, pred, next_vals_Y, factor )
        v_1 = strat1( idx, df_10d, pred, next_vals_Y, factor )
        v_2 = strat2( idx, df_10d, pred, next_vals_Y, factor )
        v_3 = strat3( idx, df_10d, pred, next_vals_Y, factor )
        v_4 = strat4( idx, df_10d, pred, next_vals_Y, factor )
        
        return [v_0, v_1, v_2, v_3, v_4]
        
    ## Get return series from each of the trading strategies, for both beliefs
    V_tf = trendfollow_strats( idx, df_10d, pred, next_vals_Y, factor )
    V_mr = meanrevert_strats( idx, df_10d, pred, next_vals_Y, factor )

    return ( V_tf, V_mr )    

In [5]:
## Define how far we will be looking into the past
N = 10 # To be CV

def trading_algo( N, factor_name ):
    ## Set up needed containers

    # multivariate containers
    mv_rsq = []; mv_preds = []

    # uniivariate containers
    uv_rsq = [[], [], []]
    uv_preds = [[], [], []]

    # Y-value containers
    y_vals = []

    ## Define ALL Wealth container
    tf_vt_0 = []; tf_vt_1 = []; tf_vt_2 = []; tf_vt_3 = []; tf_vt_4 = [];
    mr_vt_0 = []; mr_vt_1 = []; mr_vt_2 = []; mr_vt_3 = []; mr_vt_4 = [];
    

    ## Loop through the dataframe and regress
    for idx in range( N, len(ff_data)-1 ):

        ########## Begin Fetching Data ##########

        # Fetch the last N datapoints
        df = ff_data.iloc[idx-N:idx, [0,1,2,3]]

        # Fetch current values and next values to see how we performed
        curr_vals_X = np.array( ff_data.iloc[idx-1, [0,1,2]] ) 
        next_vals_Y = np.array( ff_data.iloc[idx, 3] )

        # Store the (real) y-val
        y_vals.append( next_vals_Y )

        ########## Finish Fetching Data ##########

        ########## Begin Regressions / Prediction ##########

        # Multivariate #
        mv_betas, mv_rsq = perform_mv_ff_regerssion( df, factor_name )
        mv_pred = mv_prediction( mv_betas, curr_vals_X )
        mv_preds.append( mv_pred )

        # Univariate #
        for i, col in enumerate( df.columns.tolist()[:-1] ):
            # Regress
            uv_beta, uv_rsq_val = perform_uv_ff_regerssion( df, col, factor_name )

            # Predict 
            uv_pred = uv_prediction(uv_beta, curr_vals_X[i]) 

            # Store
            uv_rsq[i].append( uv_rsq_val ); uv_preds[i].append(uv_pred)

        ########## Finish Regressions / Prediction ##########

        ########## Begin Trading / Wealth ##########

        # Extract trading strategy results
        v_tf, v_mr = trading_strategies( idx, df, mv_pred, next_vals_Y, factor_name )
        
        # Store trend following results
        tf_vt_0.append( v_tf[0] ); tf_vt_1.append( v_tf[1] )
        tf_vt_2.append( v_tf[2] ); tf_vt_3.append( v_tf[3] )
        tf_vt_4.append( v_tf[4] ); 

        # Store mean reverting results
        mr_vt_0.append( v_mr[0] ); mr_vt_1.append( v_mr[1] )
        mr_vt_2.append( v_mr[2] ); mr_vt_3.append( v_mr[3] )
        mr_vt_4.append( v_mr[4] ); 

        ########## Finish Trading / Wealth ##########
        
    ## Gather stats
    stats_df = gather_stats( uv_rsq, uv_preds, mv_rsq, mv_preds, y_vals )
    
    out_tf_rets = [tf_vt_0, tf_vt_1, tf_vt_2, tf_vt_3, tf_vt_4]
    out_mr_rets = [mr_vt_0, mr_vt_1, mr_vt_2, mr_vt_3, mr_vt_4]
    
    out_tf_vt = [ np.cumprod(elm) for elm in out_tf_rets ]
    out_mr_vt = [ np.cumprod(elm) for elm in out_mr_rets ]
    
    return ( stats_df, out_tf_vt, out_mr_vt, out_tf_rets, out_mr_rets )

## Timing the Momentum Factor

In [6]:
stats, tf_vt, mr_vt, tf_rets, mr_rets = trading_algo(10, "Mom")

In [7]:
stats

Unnamed: 0,MV,UV_INDPRO,UV_Mkt,UV_factor_mom
Avg_Adj_R^2,-0.0739552,-0.0110322,0.000329836,0.0150152
MSE,0.0037566,0.00331547,0.00334427,0.00298129


In [8]:
print( "\n ====== TREND FOLLOWING STRATEGIES ======\n" )
for i, v_t in enumerate( tf_vt ):
    V_t = np.array(v_t)
    print( "Strategy {}: Sharpe Ratio {}".format( i, get_sharpe_ratio(V_t) ) )



Strategy 0: Sharpe Ratio (-0.2923347998672441, -0.01770328747386596, 0.06055826224556715)
Strategy 1: Sharpe Ratio (-0.06233201730587966, -0.00576893946203505, 0.09255178496350185)
Strategy 2: Sharpe Ratio (0.31282286293821315, 0.05514466021738218, 0.1762807861913661)
Strategy 3: Sharpe Ratio (0.3440593994714535, 0.06058803321676853, 0.17609759625763546)
Strategy 4: Sharpe Ratio (0.213389683622213, 0.019140428756425854, 0.08969706703493802)


In [9]:
print( "\n ====== MEAN REVERTING STRATEGIES ======\n" )
for i, v_t in enumerate( mr_vt ):
    V_t = np.array(v_t)
    print( "Strategy {}: Sharpe Ratio {}".format( i, get_sharpe_ratio(V_t) ) )



Strategy 0: Sharpe Ratio (0.23498116420629847, 0.0142300509647737, 0.06055826224556715)
Strategy 1: Sharpe Ratio (-0.04104429267323796, -0.003798722549472555, 0.09255178496350187)
Strategy 2: Sharpe Ratio (-0.4719311203551078, -0.08319238892437064, 0.17628078619136614)
Strategy 3: Sharpe Ratio (-0.49914265535090985, -0.0878978217969486, 0.17609759625763546)
Strategy 4: Sharpe Ratio (-0.29881212489884124, -0.026802571197903635, 0.08969706703493802)


Using the function above, let's determine the optimal N via the output sharpe ratio. 

## Get the table needed for the powerpoint

In [12]:
ppt_df = pd.DataFrame( columns=["sharpe", "mu", "sigma"], \
                       index=["tf_strat_{}".format(i) for i in range(5)] + \
                             ["mr_strat_{}".format(i) for i in range(5)] )

for i in range( len(tf_vt) ):
    ppt_df.iloc[i,:] = get_sharpe_ratio( np.array(tf_vt[i]) )
    
for j in range( 5, 5+len(tf_vt) ):
    ppt_df.iloc[j,:] = get_sharpe_ratio( np.array(mr_vt[j-5]) )
    
# Save to csv
ppt_df.to_csv( "./ppt_df1.csv" )

## Calculating Optimal Lookback Period

In [13]:
# ## Get data
# Ns = [N for N in range(10,25)]
# sharpes = [ trading_algo(N, "Mom")[2] for N in Ns ] 

# ## Create plot of Sharpes vs. Lookback window
# plt.figure(figsize=(12,8)); plt.title("Sharpe vs. N (Lookback Window Period)")
# plt.xlabel("N"); plt.ylabel("Sharpe")
# plt.plot( Ns, sharpes )

# ## Get optimal N
# N_star = Ns[ np.argmax( sharpes ) ]
# N_star

## Benchmark: Long only, entire time Momentum factor

In [14]:
get_sharpe_ratio( np.cumprod( 1 + np.array(ff_data.Mom) ) )

(0.3658330321810258, 0.06404172541485265, 0.1750572523018172)

## Timing the Value Factor

In [15]:
## Get the value data
all_ff_data = pd.read_csv( "./Data/DataSrcs/ff_data.csv" )
all_ff_data.Date = all_ff_data.Date.apply( lambda x: datetime.strptime( str(x), "%Y%m" ) )
all_ff_data.index = pd.to_datetime( all_ff_data.Date )
all_ff_data = all_ff_data.drop( ["Date", "Mkt-RF", "SMB", "RMW", "CMA", "RF"], axis=1 )
hml_data = all_ff_data[ (all_ff_data.index >= ff_data.index[0]) & (all_ff_data.index <= ff_data.index[-1]) ]

## Constructing the "Momentum of Factor" Series

In [16]:
## Drop the momentum related columns
ff_data = ff_data.drop( ["factor_mom", "Mom"], axis=1 )

## Get the wealth process
ff_data["Val"] = hml_data / 100
ff_data["Vt_val"] = np.cumprod( np.array(ff_data.Val) + 1 )

## Populate the momentum of factor series
a = 11; b = 2
val_list = []; date_list = []
for idx in range( a, len(ff_data.index) ):
    
    date_list.append( ff_data.index[idx] )

    ret12 = (ff_data.iloc[idx,3] / ff_data.iloc[idx-a,3]) - 1
    ret2  = (ff_data.iloc[idx,3] / ff_data.iloc[idx-b,3]) - 1

    val_list.append( ret12 - ret2 )
    
## Store the factor momentum series
ff_data["factor_val"] = pd.Series( data=val_list, index=date_list )

## Remove the nan values
ff_data = ff_data.iloc[a:, :]

## Drop unneeded columns
ff_data = ff_data.drop( ["Vt_val"], axis=1 )
ff_data = ff_data.loc[:, ["INDPRO", "Mkt", "factor_val", "Val"]]

## Now we can do the same analysis as before

In [17]:
stats, tf_vt, mr_vt, tf_rets, mr_rets = trading_algo(10, "Val")

In [18]:
stats

Unnamed: 0,MV,UV_INDPRO,UV_Mkt,UV_factor_val
Avg_Adj_R^2,0.105443,0.0119199,0.00323605,0.0871338
MSE,0.00123204,0.00106734,0.000974514,0.00107057


In [19]:
print( "\n ====== TREND FOLLOWING STRATEGIES ======\n" )
for i, v_t in enumerate( tf_vt ):
    V_t = np.array(v_t)
    print( "Strategy {}: Sharpe Ratio {}".format( i, get_sharpe_ratio(V_t) ) )



Strategy 0: Sharpe Ratio (0.3337078436015478, 0.012285447896600799, 0.03681498092466118)
Strategy 1: Sharpe Ratio (0.05692448173993779, 0.00249157794987509, 0.043769883777914444)
Strategy 2: Sharpe Ratio (0.3205902855864937, 0.031898663942597105, 0.09949978329580732)
Strategy 3: Sharpe Ratio (0.28639208228904, 0.028346122941306673, 0.09897662922363358)
Strategy 4: Sharpe Ratio (0.29068655722841824, 0.011072331396809965, 0.03809027669659127)


In [20]:
print( "\n ====== MEAN REVERTING STRATEGIES ======\n" )
for i, v_t in enumerate( mr_vt ):
    V_t = np.array(v_t)
    print( "Strategy {}: Sharpe Ratio {}".format( i, get_sharpe_ratio(V_t) ) )



Strategy 0: Sharpe Ratio (-0.3663682367664358, -0.013487839647958078, 0.03681498092466116)
Strategy 1: Sharpe Ratio (-0.10043011965074942, -0.004395814664915343, 0.04376988377791444)
Strategy 2: Sharpe Ratio (-0.40771967084345406, -0.04056801889436157, 0.09949978329580732)
Strategy 3: Sharpe Ratio (-0.3751787998452913, -0.03713393296485523, 0.09897662922363358)
Strategy 4: Sharpe Ratio (-0.3254164429652012, -0.012395202354165025, 0.03809027669659126)


In [21]:
ppt_df = pd.DataFrame( columns=["sharpe", "mu", "sigma"], \
                       index=["tf_strat_{}".format(i) for i in range(5)] + \
                             ["mr_strat_{}".format(i) for i in range(5)] )

for i in range( len(tf_vt) ):
    ppt_df.iloc[i,:] = get_sharpe_ratio( np.array(tf_vt[i]) )
    
for j in range( 5, 5+len(tf_vt) ):
    ppt_df.iloc[j,:] = get_sharpe_ratio( np.array(mr_vt[j-5]) )
    
# Save to csv
ppt_df.to_csv( "./ppt_df2.csv" )

## Benchmark: Long only, entire time Value factor

In [22]:
get_sharpe_ratio( np.cumprod( 1 + np.array(ff_data.Val) ) )

(0.3110785042906252, 0.03137513944838055, 0.10085923333059463)