In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date, datetime, timedelta
import statsmodels.api as sm
import pandas_ta as ta
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV

In [2]:
def get_returns(df,stock):
    """Generate Monthly and LookAhead Returns"""
    df = df.resample('M').last()
    df['returns'] = df[stock].pct_change()
    df['lookahead_returns'] = df['returns'].shift(-1)
    return df

def return_corr(df,stock):
    corr = pd.concat([df['returns'],df['lookahead_returns']],axis='columns').corr()
    df.plot(x ='returns', y = 'lookahead_returns', kind = 'scatter')
    return corr

def add_indicators(df, df_instruments):
    df = df.join(df_instruments)
    instrument_names = ['returns']
    imported_instruments = df_instruments.columns
    instrument_names.extend(imported_instruments)
    
    for l in [10, 20, 40, 60]:  
        # Compute moving average using the pandas-ta library
        df['ma' + str(l)] = ta.sma(daily_close, length = l)
        # Compute Relative Strength Indicators
        df['rsi' + str(l)] = ta.rsi(daily_close, l)
        # Update list of instrument names
        instrument_names = instrument_names + ['ma' + str(l), 'rsi' + str(l)]
    return df, instrument_names

def prep_df(df):
    df.fillna(0,inplace=True) 
    features = df[instrument_names] 
    target = df['lookahead_returns']
    return df,features,target

def train_test(df,features, target):
    is_size = int(0.9 * features.shape[0])
    # Add a constant to the instruments
    features = sm.add_constant(features)
    instrument_names.extend(['const'])
    
    #split into train (is_instruments)and test (oos_instruments)
    X_train = features[:is_size]
    y_train = target[:is_size]
    X_test = features[is_size:]
    y_test = target[is_size:]

    return X_train, X_test,y_train, y_test


def train_OLS(y_train, X_train):
    model = sm.OLS(y_train, X_train)
    results = model.fit()
    params = results.params
    r_sq = results.rsquared
    se = results.bse
    return results, params, r_sq, se

def predict_OLS(results, X_train, X_testt):
    # Use the model estimaton to make predictions
    train_preds = results.predict(X_train)
    test_preds = results.predict(X_test)
    return train_preds, test_preds


def train_MLP(X_train,y_train, X_test,y_test):
    #scale the data
    scaler = StandardScaler()
    fit_scaler = scaler.fit(X_train)
    X_train = fit_scaler.transform(X_train)
    X_test = fit_scaler.transform(X_test)
    
    param_distributions =  {
     'hidden_layer_sizes':[(x,) for x in np.arange(1,50,1)],
     'activation':['logistic','relu']
    }
    #train model
    mlp = MLPRegressor(max_iter = 300,learning_rate='adaptive',solver='sgd')
    mlp = GridSearchCV(mlp, param_distributions, n_jobs=-1, cv=5,iid=False,refit=True,scoring="neg_mean_absolute_error")
    mlp_trained = mlp.fit(X_train,y_train)
    
    return mlp_trained
    
    
def predict_MLP(X_train,y_train, X_test,y_test, mlp):

    train_preds = mlp.predict(X_train)
    test_preds = mlp.predict(X_test)
    
    #evaluate accuracy
    train_acc, test_acc = mlp.score(X_train,y_train) , mlp.score(X_test,y_test)
    
    return train_preds, test_preds, train_acc, test_acc

def strategy(train_preds):
    # When the model predicts a positive return (i.e. an increase in prices) we buy 1 unit and vice versa.
    shortPositions = (train_preds < 0).astype(int)  * -1
    longPositions  = (train_preds  > 0).astype(int) * 1

    positions = shortPositions + longPositions
    positions
    return positions

# Momentum

In [3]:
dj_stocks_close = pd.read_csv('dj_stocks_close.csv', parse_dates=['date'], index_col='date')
close = dj_stocks_close.loc[:,['MMM','AXP','AMGN','AAPL','BA','CAT','CVX','CSCO','KO','GS']]

In [4]:
monthly_close = close.resample('M').last()
monthly_returns = monthly_close.pct_change()
# Compute Lookahead returns for how prices change from the current period to the next period
lookahead_monthly_returns = monthly_returns.shift(-1)

In [5]:
# This starter momentum strategy can be implemented, by first identifying two best and worst performing stocks on each period
best = 2
long = (monthly_returns.rank(axis='columns', ascending = False) <= best).astype(int)
short = (monthly_returns.rank(axis='columns', ascending = True) <= best).astype(int)*-1

# Implement the above Strategy Formulation to generate trading postions.
# Invest fifty cents in two stocks that are one of two best performers and vice versa.
Momentum_Strat = (long + short)/best

# Prediction

In [6]:
df = close
df_instruments = pd.read_csv('PredictorData2019.csv', parse_dates=['DATE'], index_col='DATE')
stocks = df.columns
#df

In [7]:
OLS_Positions = pd.DataFrame()
MLP_Positions = pd.DataFrame()
MLP_Preds = pd.DataFrame()
OLS_Preds = pd.DataFrame()
LookAhead = pd.DataFrame()
training_Acc = {}
testing_Acc = {}
r = {}

for s in stocks:
    #Iniatialize shit
    features = pd.DataFrame()
    instrument_names = []
    target = pd.DataFrame()
    X_train = pd.DataFrame()
    X_test = pd.DataFrame()
    y_train = pd.Series()
    y_test = pd.Series()
    s = str(s)
    df_s = df
    daily_close = df_s[s]
    train_preds = pd.Series()
    test_preds = pd.Series()
    positions = pd.Series()
    
    #slice dataset and get returns
    df_s = df_s.loc[:,[s]]
    df_s = get_returns(df_s,s)

    # add additional features
    df_s, instrument_names = add_indicators(df_s, df_instruments) 
    
    #clean data and split into train / test
    df_s, features, target = prep_df(df_s)
    X_train, X_test,y_train, y_test = train_test(df_s,features, target)
    
    #train model
    results, params, r_sq, se = train_OLS(y_train, X_train)
    mlp = train_MLP(X_train,y_train, X_test,y_test)
    
    #Evaluate model
    #training_Acc[s] = train_acc
    #testing_Acc[s] = test_acc
    r[s] = r_sq
    
    #predict
    OLS_train_preds, OLS_test_preds = predict_OLS(results, X_train, X_test)
    MLP_train_preds, MLP_test_preds, train_acc, test_acc = predict_MLP(X_train,y_train, X_test,y_test,mlp)
    
    #formulate strategy
    OLS_positions_train = strategy(OLS_train_preds)
    OLS_positions_test = strategy(OLS_test_preds)
    OLS_positions = OLS_positions_train.append(OLS_positions_test)
    
    MLP_positions_train = strategy(MLP_train_preds)
    MLP_positions_test = strategy(MLP_test_preds)
    MLP_positions = np.concatenate((MLP_positions_train, MLP_positions_test), axis =0)
    
    #Store
    OLS_Positions[s] = OLS_positions
    MLP_Positions[s] = MLP_positions
    MLP_Preds[s] = MLP_train_preds
    OLS_Preds[s] = OLS_train_preds
    LookAhead[s] = target

# Combined Strategy

In [8]:
OLS_Strat = Momentum_Strat.add(OLS_Positions)
OLS_Returns_df = pd.DataFrame()
OLS_Returns_df['Date'] = OLS_Strat.index

for s in stocks:
    OLS_returns = []
    for m in OLS_Strat.index:
        
        look_ahead_return = LookAhead[s][m]
        OLS_position = OLS_Strat[s][m]
        OLS_return = OLS_position * look_ahead_return
        OLS_returns.append(OLS_return)
        
    OLS_Returns_df[s] = OLS_returns
    
OLS_Returns_df = OLS_Returns_df.set_index('Date')
OLS_Returns_df = OLS_Returns_df.mean(axis=1)
OLS_Mean_Return = OLS_Returns_df.mean()
print("The mean OLS monthly return of this strategy is {:.3f} percent".format(100*OLS_Mean_Return))
annConst = 12
OLS_Mean_Return_annual = OLS_Mean_Return * annConst
print("The mean annual return of this strategy is {:.3f} percent".format(100*OLS_Mean_Return_annual))

# The cumulative growth of a dollar invested in the strategy
(1+OLS_Returns_df).cumprod().plot(figsize=(15,5), title="Cumulative Combined OLS Strategy Returns")
None

The mean OLS monthly return of this strategy is 2.457 percent
The mean annual return of this strategy is 29.487 percent


In [9]:
MLP_Positions
train_acc
MLP_Preds

Unnamed: 0,MMM,AXP,AMGN,AAPL,BA,CAT,CVX,CSCO,KO,GS
0,0.543863,-0.510714,-0.485605,0.504824,-0.136824,0.297075,0.378231,0.080311,0.255046,0.088906
1,0.543516,-0.510681,-0.485605,0.504640,-0.136824,0.297073,0.378196,0.081586,0.255046,0.089055
2,0.546424,-0.537313,-0.485605,0.411357,-0.136824,0.297046,0.512364,0.000109,0.255046,0.092679
3,0.539599,-0.518543,-0.485605,0.496307,-0.136824,0.297046,0.537625,0.000306,0.255046,0.094350
4,0.537342,-0.511279,-0.485605,0.505813,-0.136824,0.297096,0.378130,0.089290,0.255046,0.092728
...,...,...,...,...,...,...,...,...,...,...
167,0.555661,-0.509282,-0.485605,0.499483,-0.136824,0.297046,0.377997,0.106958,0.255046,0.088188
168,0.348945,-0.509282,-0.485605,0.499196,-0.289331,0.297046,0.540025,0.000245,0.255046,0.091100
169,0.348062,-0.509282,-0.485605,0.499199,-0.305929,0.297046,0.454789,0.000249,0.255046,0.091100
170,0.542416,-0.509283,-0.485605,0.499598,-0.136824,0.297046,0.377997,0.112977,0.255046,0.093302


In [10]:
MLP_Returns = pd.DataFrame()
MLP_Positions['Date'] = Momentum_Strat.index
MLP_Returns['Date'] = Momentum_Strat.index
MLP_Positions = MLP_Positions.set_index('Date')

MLP_Strat = Momentum_Strat.add(MLP_Positions)

for s in stocks:
    MLP_returns = []
    for m in MLP_Strat.index:
        look_ahead_return = LookAhead[s][m]
        MLP_position = MLP_Strat[s][m]
        MLP_return = MLP_position * look_ahead_return
        MLP_returns.append(MLP_return)
        
    MLP_Returns[s] = MLP_returns

MLP_Returns = MLP_Returns.set_index('Date')
MLP_Returns = MLP_Returns.mean(axis=1)
MLP_Mean_Return = MLP_Returns.mean()

print("The mean MLP monthly return of this strategy is {:.3f} percent".format(100*MLP_Mean_Return))

# The cumulative growth of a dollar invested in the strategy
(1+MLP_Returns).cumprod().plot(figsize=(15,5), title="Cumulative Combined MLP Strategy Returns")
None

The mean MLP monthly return of this strategy is 0.525 percent


In [11]:
training_Acc
testing_Acc

{}

In [12]:
Momentum_Strat.head()

Unnamed: 0_level_0,MMM,AXP,AMGN,AAPL,BA,CAT,CVX,CSCO,KO,GS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2004-01-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-02-29,0.0,0.0,0.0,0.5,0.0,-0.5,0.0,-0.5,0.0,0.5
2004-03-31,0.5,0.0,-0.5,0.5,-0.5,0.0,0.0,0.0,0.0,0.0
2004-04-30,0.5,0.0,0.0,0.0,0.0,0.0,0.5,-0.5,0.0,-0.5
2004-05-31,0.0,0.0,-0.5,0.5,0.5,-0.5,0.0,0.0,0.0,0.0


In [13]:
MLP_Positions.head()

Unnamed: 0_level_0,MMM,AXP,AMGN,AAPL,BA,CAT,CVX,CSCO,KO,GS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2004-01-31,1,-1,-1,1,-1,1,1,1,1,1
2004-02-29,1,-1,-1,1,-1,1,1,1,1,1
2004-03-31,1,-1,-1,1,-1,1,1,1,1,1
2004-04-30,1,-1,-1,1,-1,1,1,1,1,1
2004-05-31,1,-1,-1,1,-1,1,1,1,1,1


In [14]:
MLP_Returns

Date
2004-01-31   -0.002442
2004-02-29    0.042488
2004-03-31   -0.009188
2004-04-30   -0.002301
2004-05-31    0.035250
                ...   
2019-08-31    0.019919
2019-09-30    0.025709
2019-10-31    0.002784
2019-11-30    0.027874
2019-12-31    0.000000
Length: 192, dtype: float64

In [15]:
(1+MLP_Returns).cumprod().plot(figsize=(15,5), title="Cumulative Combined MLP Strategy Returns")

<AxesSubplot:title={'center':'Cumulative Combined MLP Strategy Returns'}, xlabel='Date'>