In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score

In [2]:
raw = pd.read_csv('tr_eikon_eod_data.csv', index_col = 0, parse_dates = True)
data = raw[['AAPL.O', 'MSFT.O', 'INTC.O', 'AMZN.O', 'GS.N']].dropna()
data

Unnamed: 0_level_0,AAPL.O,MSFT.O,INTC.O,AMZN.O,GS.N
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,30.572827,30.950,20.88,133.90,173.08
2010-01-05,30.625684,30.960,20.87,134.69,176.14
2010-01-06,30.138541,30.770,20.80,132.25,174.26
2010-01-07,30.082827,30.452,20.60,130.00,177.67
2010-01-08,30.282827,30.660,20.83,133.52,174.31
...,...,...,...,...,...
2018-06-25,182.170000,98.390,50.71,1663.15,221.54
2018-06-26,184.430000,99.080,49.67,1691.09,221.58
2018-06-27,184.160000,97.540,48.76,1660.51,220.18
2018-06-28,185.500000,98.630,49.25,1701.45,223.42


# Calculate returns for benchmark case

In [3]:
data['Returns'] = data['AAPL.O'] + data['MSFT.O'] + data['INTC.O'] + data['AMZN.O'] + data['GS.N']
log_ret = (np.log(data / data.shift(1))).dropna()
log_ret

Unnamed: 0_level_0,AAPL.O,MSFT.O,INTC.O,AMZN.O,GS.N,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-05,0.001727,0.000323,-0.000479,0.005883,0.017525,0.009973
2010-01-06,-0.016034,-0.006156,-0.003360,-0.018282,-0.010731,-0.012968
2010-01-07,-0.001850,-0.010389,-0.009662,-0.017160,0.019379,0.001509
2010-01-08,0.006626,0.006807,0.011103,0.026717,-0.019093,0.002050
2010-01-11,-0.008861,-0.012802,0.005744,-0.024350,-0.015902,-0.016822
...,...,...,...,...,...,...
2018-06-25,-0.014983,-0.020323,-0.034690,-0.031090,-0.020020,-0.028279
2018-06-26,0.012330,0.006988,-0.020722,0.016660,0.000181,0.013398
2018-06-27,-0.001465,-0.015665,-0.018491,-0.018249,-0.006338,-0.015571
2018-06-28,0.007250,0.011113,0.009999,0.024356,0.014608,0.021077


In [4]:
# split the dataset into 50 (training) / 50 (testing)
from sklearn.model_selection import train_test_split
log_ret_train, log_ret_test = train_test_split(log_ret,
                                         test_size = 0.5,
                                         shuffle = True,
                                         random_state = 0) 

In [5]:
log_ret_train

Unnamed: 0_level_0,AAPL.O,MSFT.O,INTC.O,AMZN.O,GS.N,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-05-14,-0.002336,0.003372,0.004198,-0.000855,0.004067,-0.000119
2013-10-03,-0.012642,-0.001770,-0.012532,-0.018103,-0.011537,-0.014619
2014-03-31,-0.000224,0.016977,0.007544,-0.005707,0.009505,0.000764
2016-05-24,0.015129,0.030705,0.027086,0.010636,0.013673,0.012988
2014-09-26,0.029002,0.008004,0.003509,0.003968,0.005579,0.008269
...,...,...,...,...,...,...
2014-02-12,-0.000075,0.007904,0.003264,-0.035276,-0.005429,-0.019854
2016-11-17,-0.000364,0.016461,0.005153,0.013188,0.016207,0.012375
2013-01-16,0.040671,-0.006267,0.010457,-0.010983,0.039762,0.010348
2013-05-01,-0.007913,-0.011547,0.001669,-0.022230,-0.023972,-0.019173


# AAPL.O stepwise regression

In [6]:
Apple = pd.DataFrame(log_ret['AAPL.O'])
#Adding the actual direction of the stock price movement
Apple['Return']=data['AAPL.O'].pct_change()
Apple['Actual_dir'] = np.sign(Apple['Return'])
for i in range(1,6):
    s = 'lag'+ str(i)
    t = 'lag'+ str(i) + '_dir'
    Apple[s] = Apple['AAPL.O'].shift(i)
    Apple[t] = Apple['Actual_dir'].shift(i)
Apple = Apple.dropna()
Apple

Unnamed: 0_level_0,AAPL.O,Return,Actual_dir,lag1,lag1_dir,lag2,lag2_dir,lag3,lag3_dir,lag4,lag4_dir,lag5,lag5_dir
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-01-12,-0.011440,-0.011375,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.016034,-1.0,0.001727,1.0
2010-01-13,0.014007,0.014106,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.016034,-1.0
2010-01-14,-0.005808,-0.005792,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0
2010-01-15,-0.016853,-0.016712,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0
2010-01-19,0.043288,0.044238,1.0,-0.016853,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-25,-0.014983,-0.014871,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0
2018-06-26,0.012330,0.012406,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0
2018-06-27,-0.001465,-0.001464,-1.0,0.012330,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0
2018-06-28,0.007250,0.007276,1.0,-0.001465,-1.0,0.012330,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0


In [7]:
# Prepare train data 
Apple_train = Apple.merge(log_ret_train, left_index=True, right_index=True).loc[:, 'AAPL.O_x' : 'lag5_dir'].dropna()
Apple_train

Unnamed: 0_level_0,AAPL.O_x,Return,Actual_dir,lag1,lag1_dir,lag2,lag2_dir,lag3,lag3_dir,lag4,lag4_dir,lag5,lag5_dir
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-01-14,-0.005808,-0.005792,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0
2010-01-22,-0.050881,-0.049608,-1.0,-0.017404,-1.0,-0.015536,-1.0,0.043288,1.0,-0.016853,-1.0,-0.005808,-1.0
2010-02-03,0.017060,0.017206,1.0,0.005786,1.0,0.013791,1.0,-0.036938,-1.0,-0.042219,-1.0,0.009395,1.0
2010-02-04,-0.036704,-0.036039,-1.0,0.017060,1.0,0.005786,1.0,0.013791,1.0,-0.036938,-1.0,-0.042219,-1.0
2010-02-09,0.010607,0.010664,1.0,-0.006879,-1.0,0.017600,1.0,-0.036704,-1.0,0.017060,1.0,0.005786,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-12,0.005476,0.005491,1.0,-0.002455,-1.0,-0.009139,-1.0,-0.002684,-1.0,0.003460,1.0,0.007686,1.0
2018-06-14,0.000524,0.000524,1.0,-0.008251,-1.0,0.005476,1.0,-0.002455,-1.0,-0.009139,-1.0,-0.002684,-1.0
2018-06-21,-0.005592,-0.005576,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,0.000524,1.0
2018-06-26,0.012330,0.012406,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0


In [8]:
Apple_train = Apple_train.rename(columns = {'AAPL.O_x' : 'AAPL.O'})
#Added the actual returns as a feature for more accurate predictions

Apple_train

Unnamed: 0_level_0,AAPL.O,Return,Actual_dir,lag1,lag1_dir,lag2,lag2_dir,lag3,lag3_dir,lag4,lag4_dir,lag5,lag5_dir
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-01-14,-0.005808,-0.005792,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0
2010-01-22,-0.050881,-0.049608,-1.0,-0.017404,-1.0,-0.015536,-1.0,0.043288,1.0,-0.016853,-1.0,-0.005808,-1.0
2010-02-03,0.017060,0.017206,1.0,0.005786,1.0,0.013791,1.0,-0.036938,-1.0,-0.042219,-1.0,0.009395,1.0
2010-02-04,-0.036704,-0.036039,-1.0,0.017060,1.0,0.005786,1.0,0.013791,1.0,-0.036938,-1.0,-0.042219,-1.0
2010-02-09,0.010607,0.010664,1.0,-0.006879,-1.0,0.017600,1.0,-0.036704,-1.0,0.017060,1.0,0.005786,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-12,0.005476,0.005491,1.0,-0.002455,-1.0,-0.009139,-1.0,-0.002684,-1.0,0.003460,1.0,0.007686,1.0
2018-06-14,0.000524,0.000524,1.0,-0.008251,-1.0,0.005476,1.0,-0.002455,-1.0,-0.009139,-1.0,-0.002684,-1.0
2018-06-21,-0.005592,-0.005576,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,0.000524,1.0
2018-06-26,0.012330,0.012406,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0


In [9]:
# Stepwise regression select features

from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SequentialFeatureSelector

LR_model = LinearRegression()
SFS_model = SequentialFeatureSelector(LR_model,
                                      scoring = 'r2')

X_train = Apple_train.iloc[:,2:]
y_train = Apple_train.iloc[:,1]

sel_X_bool = SFS_model.fit(X_train, y_train).get_support()
sel_X_train = X_train.loc[:, sel_X_bool]
sel_X_train

Unnamed: 0_level_0,Actual_dir,lag1,lag3,lag4_dir,lag5_dir
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-14,-1.0,0.014007,-0.008861,1.0,-1.0
2010-01-22,-1.0,-0.017404,0.043288,-1.0,-1.0
2010-02-03,1.0,0.005786,-0.036938,-1.0,1.0
2010-02-04,-1.0,0.017060,0.013791,-1.0,-1.0
2010-02-09,1.0,-0.006879,-0.036704,1.0,1.0
...,...,...,...,...,...
2018-06-12,1.0,-0.002455,-0.002684,1.0,1.0
2018-06-14,1.0,-0.008251,-0.002455,-1.0,-1.0
2018-06-21,-1.0,0.004353,-0.000530,-1.0,1.0
2018-06-26,1.0,-0.014983,-0.005592,1.0,-1.0


#### Prediciton on training data

In [40]:
# perform linear regression on the selected variables to predict training data
Apple_train['Predict_Step'] = LR_model.fit(sel_X_train, y_train).predict(sel_X_train)
Apple_train

Unnamed: 0_level_0,AAPL.O,Return,Actual_dir,lag1,lag1_dir,lag2,lag2_dir,lag3,lag3_dir,lag4,lag4_dir,lag5,lag5_dir,Predict_Step,Predict_dir,Returns_Step
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2010-01-14,-0.005808,-0.005792,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.009023,-1.0,0.005808
2010-01-22,-0.050881,-0.049608,-1.0,-0.017404,-1.0,-0.015536,-1.0,0.043288,1.0,-0.016853,-1.0,-0.005808,-1.0,-0.013791,-1.0,0.050881
2010-02-03,0.017060,0.017206,1.0,0.005786,1.0,0.013791,1.0,-0.036938,-1.0,-0.042219,-1.0,0.009395,1.0,0.012626,1.0,0.017060
2010-02-04,-0.036704,-0.036039,-1.0,0.017060,1.0,0.005786,1.0,0.013791,1.0,-0.036938,-1.0,-0.042219,-1.0,-0.010929,-1.0,0.036704
2010-02-09,0.010607,0.010664,1.0,-0.006879,-1.0,0.017600,1.0,-0.036704,-1.0,0.017060,1.0,0.005786,1.0,0.013140,1.0,0.010607
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-12,0.005476,0.005491,1.0,-0.002455,-1.0,-0.009139,-1.0,-0.002684,-1.0,0.003460,1.0,0.007686,1.0,0.011973,1.0,0.005476
2018-06-14,0.000524,0.000524,1.0,-0.008251,-1.0,0.005476,1.0,-0.002455,-1.0,-0.009139,-1.0,-0.002684,-1.0,0.011034,1.0,0.000524
2018-06-21,-0.005592,-0.005576,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,0.000524,1.0,-0.011447,-1.0,0.005592
2018-06-26,0.012330,0.012406,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0,0.011971,1.0,0.012330


### Mean Squared Error

In [43]:
from sklearn.metrics import mean_squared_error


mse = mean_squared_error(Apple_train['Return'], Apple_train['Predict_Step'])
mse

0.0001145351260108257

In [41]:
#Apple_train['dir'] = np.sign(Apple_train['Predict_Step'])
Apple_train['Predict_dir'] = np.sign(Apple_train['Predict_Step'])
Apple_train

Unnamed: 0_level_0,AAPL.O,Return,Actual_dir,lag1,lag1_dir,lag2,lag2_dir,lag3,lag3_dir,lag4,lag4_dir,lag5,lag5_dir,Predict_Step,Predict_dir,Returns_Step
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2010-01-14,-0.005808,-0.005792,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.009023,-1.0,0.005808
2010-01-22,-0.050881,-0.049608,-1.0,-0.017404,-1.0,-0.015536,-1.0,0.043288,1.0,-0.016853,-1.0,-0.005808,-1.0,-0.013791,-1.0,0.050881
2010-02-03,0.017060,0.017206,1.0,0.005786,1.0,0.013791,1.0,-0.036938,-1.0,-0.042219,-1.0,0.009395,1.0,0.012626,1.0,0.017060
2010-02-04,-0.036704,-0.036039,-1.0,0.017060,1.0,0.005786,1.0,0.013791,1.0,-0.036938,-1.0,-0.042219,-1.0,-0.010929,-1.0,0.036704
2010-02-09,0.010607,0.010664,1.0,-0.006879,-1.0,0.017600,1.0,-0.036704,-1.0,0.017060,1.0,0.005786,1.0,0.013140,1.0,0.010607
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-12,0.005476,0.005491,1.0,-0.002455,-1.0,-0.009139,-1.0,-0.002684,-1.0,0.003460,1.0,0.007686,1.0,0.011973,1.0,0.005476
2018-06-14,0.000524,0.000524,1.0,-0.008251,-1.0,0.005476,1.0,-0.002455,-1.0,-0.009139,-1.0,-0.002684,-1.0,0.011034,1.0,0.000524
2018-06-21,-0.005592,-0.005576,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,0.000524,1.0,-0.011447,-1.0,0.005592
2018-06-26,0.012330,0.012406,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0,0.011971,1.0,0.012330


### Accuracy Score

In [42]:
from sklearn.metrics import accuracy_score
train_accuracy = accuracy_score(Apple_train['Actual_dir'], Apple_train['Predict_dir'])
train_accuracy

0.99906191369606

In [44]:
# number of trades in total
(Apple_train['Predict_dir'].diff().dropna() != 0).sum()

554

In [16]:
# number of false prediction
(Apple_train['Predict_dir'] != Apple_train['Actual_dir']).sum()

1

In [17]:
# Calculate return from strategy
Apple_train['Returns_Step'] = Apple_train['Predict_dir'] * Apple_train['AAPL.O']

np.exp(np.sum(Apple_train[['AAPL.O', 'Returns_Step']]))

AAPL.O               2.902430
Returns_Step    176897.722411
dtype: float64

### Accurcy score


#### Testings

In [18]:
# Prepare test data
Apple_test = Apple.merge(log_ret_test, left_index=True, right_index=True).loc[:, 'AAPL.O_x' : 'lag5_dir'].dropna()
Apple_test = Apple_test.rename(columns={'AAPL.O_x' : 'AAPL.O'})
X_test = Apple_test.iloc[:,2:]
sel_X_test = X_test.loc[:, sel_X_bool]
sel_X_test

Unnamed: 0_level_0,Actual_dir,lag1,lag3,lag4_dir,lag5_dir
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-12,-1.0,-0.008861,-0.001850,-1.0,1.0
2010-01-13,1.0,-0.011440,0.006626,-1.0,-1.0
2010-01-15,-1.0,-0.005808,-0.011440,-1.0,1.0
2010-01-19,1.0,-0.016853,0.014007,-1.0,-1.0
2010-01-20,-1.0,0.043288,-0.005808,1.0,-1.0
...,...,...,...,...,...
2018-06-20,1.0,-0.016292,-0.010326,1.0,-1.0
2018-06-22,-1.0,-0.005592,-0.016292,-1.0,-1.0
2018-06-25,-1.0,-0.002916,0.004353,-1.0,-1.0
2018-06-28,1.0,-0.001465,-0.014983,-1.0,-1.0


In [19]:
# perform linear regression on the selected variables to predict test data
Apple_test['Predict_Step'] = LR_model.fit(sel_X_train, y_train).predict(sel_X_test)
Apple_test

Unnamed: 0_level_0,AAPL.O,Return,Actual_dir,lag1,lag1_dir,lag2,lag2_dir,lag3,lag3_dir,lag4,lag4_dir,lag5,lag5_dir,Predict_Step
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2010-01-12,-0.011440,-0.011375,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.016034,-1.0,0.001727,1.0,-0.012032
2010-01-13,0.014007,0.014106,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.016034,-1.0,0.010512
2010-01-15,-0.016853,-0.016712,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.011495
2010-01-19,0.043288,0.044238,1.0,-0.016853,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.009951
2010-01-20,-0.015536,-0.015416,-1.0,0.043288,1.0,-0.016853,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.007732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-20,0.004353,0.004362,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,0.000524,1.0,-0.008251,-1.0,0.012100
2018-06-22,-0.002916,-0.002912,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,-0.010803
2018-06-25,-0.014983,-0.014871,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.011511
2018-06-28,0.007250,0.007276,1.0,-0.001465,-1.0,0.012330,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.011871


In [20]:
Apple_test['Actual_dir'] = np.sign(Apple_test['Predict_Step'])
Apple_test

Unnamed: 0_level_0,AAPL.O,Return,Actual_dir,lag1,lag1_dir,lag2,lag2_dir,lag3,lag3_dir,lag4,lag4_dir,lag5,lag5_dir,Predict_Step
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2010-01-12,-0.011440,-0.011375,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.016034,-1.0,0.001727,1.0,-0.012032
2010-01-13,0.014007,0.014106,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.016034,-1.0,0.010512
2010-01-15,-0.016853,-0.016712,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.011495
2010-01-19,0.043288,0.044238,1.0,-0.016853,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.009951
2010-01-20,-0.015536,-0.015416,-1.0,0.043288,1.0,-0.016853,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.007732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-20,0.004353,0.004362,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,0.000524,1.0,-0.008251,-1.0,0.012100
2018-06-22,-0.002916,-0.002912,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,-0.010803
2018-06-25,-0.014983,-0.014871,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.011511
2018-06-28,0.007250,0.007276,1.0,-0.001465,-1.0,0.012330,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.011871


In [21]:
Apple_test['Predict_dir'] = np.sign(Apple_test['Predict_Step'])
Apple_test

Unnamed: 0_level_0,AAPL.O,Return,Actual_dir,lag1,lag1_dir,lag2,lag2_dir,lag3,lag3_dir,lag4,lag4_dir,lag5,lag5_dir,Predict_Step,Predict_dir
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-01-12,-0.011440,-0.011375,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.016034,-1.0,0.001727,1.0,-0.012032,-1.0
2010-01-13,0.014007,0.014106,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.001850,-1.0,-0.016034,-1.0,0.010512,1.0
2010-01-15,-0.016853,-0.016712,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.006626,1.0,-0.011495,-1.0
2010-01-19,0.043288,0.044238,1.0,-0.016853,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.008861,-1.0,0.009951,1.0
2010-01-20,-0.015536,-0.015416,-1.0,0.043288,1.0,-0.016853,-1.0,-0.005808,-1.0,0.014007,1.0,-0.011440,-1.0,-0.007732,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-20,0.004353,0.004362,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,0.000524,1.0,-0.008251,-1.0,0.012100,1.0
2018-06-22,-0.002916,-0.002912,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.010326,-1.0,-0.010803,-1.0
2018-06-25,-0.014983,-0.014871,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.004353,1.0,-0.016292,-1.0,-0.000530,-1.0,-0.011511,-1.0
2018-06-28,0.007250,0.007276,1.0,-0.001465,-1.0,0.012330,1.0,-0.014983,-1.0,-0.002916,-1.0,-0.005592,-1.0,0.011871,1.0


In [22]:
# number of trades in total
(Apple_test['Predict_dir'].diff().dropna() != 0).sum()

530

In [24]:
# number of false prediction
(Apple_test['Predict_dir'] != Apple_test['Actual_dir']).sum()

0

In [25]:
# Calculate return from strategy
Apple_test['Returns_Step'] = Apple_test['Predict_dir'] * Apple_test['AAPL.O']

np.exp(np.sum(Apple_test[['AAPL.O', 'Returns_Step']]))

AAPL.O               2.124809
Returns_Step    206584.988167
dtype: float64