## Regression

In [93]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression

In [94]:
data = pd.read_csv(filepath_or_buffer='../../resources/train_set.csv', parse_dates=['time'], index_col='time')

In [95]:
data.head(10)

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2009-12-31,1.432706
2010-01-01,1.438994
2010-01-04,1.442398
2010-01-05,1.436596
2010-01-06,1.440403
2010-01-07,1.431803
2010-01-08,1.441109
2010-01-11,1.451126
2010-01-12,1.44766
2010-01-13,1.452391


In [96]:
data.tail(10)

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2020-12-18,1.226272
2020-12-21,1.221613
2020-12-22,1.223691
2020-12-23,1.218665
2020-12-24,1.219141
2020-12-25,1.218472
2020-12-28,1.22051
2020-12-29,1.222345
2020-12-30,1.225295
2020-12-31,1.22999


In [97]:
data.Close.to_frame()

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2009-12-31,1.432706
2010-01-01,1.438994
2010-01-04,1.442398
2010-01-05,1.436596
2010-01-06,1.440403
...,...
2020-12-25,1.218472
2020-12-28,1.220510
2020-12-29,1.222345
2020-12-30,1.225295


Getting more statistical insight into the dataset

In [98]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2866 entries, 2009-12-31 to 2020-12-31
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   2866 non-null   float64
dtypes: float64(1)
memory usage: 44.8 KB


In [99]:
data.describe()

Unnamed: 0,Close
count,2866.0
mean,1.222946
std,0.111835
min,1.039047
25%,1.121604
50%,1.193859
75%,1.323745
max,1.484406


Since the only attribute we need is closing price
we modify the data frame

In [100]:
data = data.Close.to_frame()

In [101]:
data.head(10)

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2009-12-31,1.432706
2010-01-01,1.438994
2010-01-04,1.442398
2010-01-05,1.436596
2010-01-06,1.440403
2010-01-07,1.431803
2010-01-08,1.441109
2010-01-11,1.451126
2010-01-12,1.44766
2010-01-13,1.452391


Since the only attribute we need is closing price
we modify the data frame


In [102]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data.Close, name='Close'))

fig.update_layout(title='EUR/USD', xaxis_title='Time', yaxis_title='Price')

fig.show()

We proceed to define baseline and compute return

In [103]:
data['returns'] = np.log(data.div(data.shift(1)))

In [104]:
data.head(10)

Unnamed: 0_level_0,Close,returns
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-12-31,1.432706,
2010-01-01,1.438994,0.004379
2010-01-04,1.442398,0.002363
2010-01-05,1.436596,-0.004031
2010-01-06,1.440403,0.002647
2010-01-07,1.431803,-0.005989
2010-01-08,1.441109,0.006478
2010-01-11,1.451126,0.006927
2010-01-12,1.44766,-0.002391
2010-01-13,1.452391,0.003262


Predicting returns using linear regression

In [105]:
data['lag1'] = data.returns.shift(1)

In [106]:
data.dropna(inplace=True)

In [107]:
data

Unnamed: 0_level_0,Close,returns,lag1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-04,1.442398,0.002363,0.004379
2010-01-05,1.436596,-0.004031,0.002363
2010-01-06,1.440403,0.002647,-0.004031
2010-01-07,1.431803,-0.005989,0.002647
2010-01-08,1.441109,0.006478,-0.005989
...,...,...,...
2020-12-25,1.218472,-0.000549,0.000390
2020-12-28,1.220510,0.001671,-0.000549
2020-12-29,1.222345,0.001502,0.001671
2020-12-30,1.225295,0.002411,0.001502


Modify the xlim and ylim

In [108]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.returns, y=data.lag1, mode='markers'))

fig.update_layout(title='EUR/USD', xaxis_title='lag1', yaxis_title='Returns')

fig.show()

In [109]:
LR = LinearRegression(fit_intercept=True)

In [110]:
LR.fit(X=data.lag1.to_frame(), y=data.returns)

LinearRegression()

In [111]:
slope = LR.coef_
slope

array([-0.03313165])

In [112]:
intercept = LR.intercept_
intercept

-5.660600823278811e-05

In [113]:
data['predict'] = LR.predict(X=data.lag1.to_frame())

In [114]:
data

Unnamed: 0_level_0,Close,returns,lag1,predict
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,1.442398,0.002363,0.004379,-0.000202
2010-01-05,1.436596,-0.004031,0.002363,-0.000135
2010-01-06,1.440403,0.002647,-0.004031,0.000077
2010-01-07,1.431803,-0.005989,0.002647,-0.000144
2010-01-08,1.441109,0.006478,-0.005989,0.000142
...,...,...,...,...
2020-12-25,1.218472,-0.000549,0.000390,-0.000070
2020-12-28,1.220510,0.001671,-0.000549,-0.000038
2020-12-29,1.222345,0.001502,0.001671,-0.000112
2020-12-30,1.225295,0.002411,0.001502,-0.000106


In [115]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.returns, y=data.lag1, mode='markers'))
fig.add_trace(go.Scatter(x=data.lag1, y=data.predict))

fig.update_layout(title='EUR/USD', xaxis_title='lag1', yaxis_title='Returns')

fig.show()

In [116]:
data.predict = np.sign(data.predict)

In [117]:
data

Unnamed: 0_level_0,Close,returns,lag1,predict
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,1.442398,0.002363,0.004379,-1.0
2010-01-05,1.436596,-0.004031,0.002363,-1.0
2010-01-06,1.440403,0.002647,-0.004031,1.0
2010-01-07,1.431803,-0.005989,0.002647,-1.0
2010-01-08,1.441109,0.006478,-0.005989,1.0
...,...,...,...,...
2020-12-25,1.218472,-0.000549,0.000390,-1.0
2020-12-28,1.220510,0.001671,-0.000549,-1.0
2020-12-29,1.222345,0.001502,0.001671,-1.0
2020-12-30,1.225295,0.002411,0.001502,-1.0


In [118]:
np.sign(data.returns * data.predict)

time
2010-01-04   -1.0
2010-01-05    1.0
2010-01-06    1.0
2010-01-07    1.0
2010-01-08    1.0
             ... 
2020-12-25    1.0
2020-12-28   -1.0
2020-12-29   -1.0
2020-12-30   -1.0
2020-12-31   -1.0
Length: 2864, dtype: float64

In [119]:
hits = np.sign(data.returns * data.predict).value_counts()

In [120]:
hits

 1.0    1453
-1.0    1394
 0.0      17
dtype: int64

In [121]:
hit_ratio = hits[1.0] / sum(hits)
hit_ratio

0.5073324022346368

In [122]:
data.dropna(inplace=True)

In [123]:
lags = 5

In [124]:
cols = []
for lag in range(2, lags+1):

  col = f"lag{lag}"
  data[col] = data.returns.shift(lag)
  cols.append(col)

data.dropna(inplace=True)

In [125]:
data

Unnamed: 0_level_0,Close,returns,lag1,predict,lag2,lag3,lag4,lag5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-11,1.451126,0.006927,0.006478,-1.0,-0.005989,0.002647,-0.004031,0.002363
2010-01-12,1.447660,-0.002391,0.006927,-1.0,0.006478,-0.005989,0.002647,-0.004031
2010-01-13,1.452391,0.003262,-0.002391,1.0,0.006927,0.006478,-0.005989,0.002647
2010-01-14,1.449990,-0.001654,0.003262,-1.0,-0.002391,0.006927,0.006478,-0.005989
2010-01-15,1.439097,-0.007540,-0.001654,-1.0,0.003262,-0.002391,0.006927,0.006478
...,...,...,...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,0.000390,-1.0,-0.004115,0.001699,-0.003806,0.005161
2020-12-28,1.220510,0.001671,-0.000549,-1.0,0.000390,-0.004115,0.001699,-0.003806
2020-12-29,1.222345,0.001502,0.001671,-1.0,-0.000549,0.000390,-0.004115,0.001699
2020-12-30,1.225295,0.002411,0.001502,-1.0,0.001671,-0.000549,0.000390,-0.004115


In [126]:
cols

['lag2', 'lag3', 'lag4', 'lag5']

In [127]:
cols.append('lag1')
cols

['lag2', 'lag3', 'lag4', 'lag5', 'lag1']

In [128]:
LR_diff = LinearRegression(fit_intercept=True)

In [129]:
LR_diff.fit(data[cols], data.returns)

LinearRegression()

In [130]:
LR_diff.coef_

array([ 0.00886454, -0.02793839, -0.00967606,  0.02175438, -0.03196378])

In [131]:
LR_diff.intercept_

-5.754122170213628e-05

In [132]:
data['predict_diff'] = LR_diff.predict(data[cols].values)


X does not have valid feature names, but LinearRegression was fitted with feature names



In [133]:
data

Unnamed: 0_level_0,Close,returns,lag1,predict,lag2,lag3,lag4,lag5,predict_diff
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-11,1.451126,0.006927,0.006478,-1.0,-0.005989,0.002647,-0.004031,0.002363,-0.000301
2010-01-12,1.447660,-0.002391,0.006927,-1.0,0.006478,-0.005989,0.002647,-0.004031,-0.000168
2010-01-13,1.452391,0.003262,-0.002391,1.0,0.006927,0.006478,-0.005989,0.002647,0.000015
2010-01-14,1.449990,-0.001654,0.003262,-1.0,-0.002391,0.006927,0.006478,-0.005989,-0.000570
2010-01-15,1.439097,-0.007540,-0.001654,-1.0,0.003262,-0.002391,0.006927,0.006478,0.000165
...,...,...,...,...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,0.000390,-1.0,-0.004115,0.001699,-0.003806,0.005161,-0.000005
2020-12-28,1.220510,0.001671,-0.000549,-1.0,0.000390,-0.004115,0.001699,-0.003806,-0.000021
2020-12-29,1.222345,0.001502,0.001671,-1.0,-0.000549,0.000390,-0.004115,0.001699,-0.000050
2020-12-30,1.225295,0.002411,0.001502,-1.0,0.001671,-0.000549,0.000390,-0.004115,-0.000169


In [134]:
data.predict_diff = np.sign(data.predict_diff)

In [135]:
data.predict_diff.value_counts()

-1.0    1708
 1.0    1151
Name: predict_diff, dtype: int64

In [136]:
hits = np.sign(data.returns * data.predict_diff).value_counts()
hits

 1.0    1460
-1.0    1382
 0.0      17
dtype: int64

In [137]:
hit_ratio = hits[1.0] / sum(hits)
hit_ratio

0.5106680657572578

In-sample backtesting and look-ahead-bias

In [138]:
data

Unnamed: 0_level_0,Close,returns,lag1,predict,lag2,lag3,lag4,lag5,predict_diff
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-11,1.451126,0.006927,0.006478,-1.0,-0.005989,0.002647,-0.004031,0.002363,-1.0
2010-01-12,1.447660,-0.002391,0.006927,-1.0,0.006478,-0.005989,0.002647,-0.004031,-1.0
2010-01-13,1.452391,0.003262,-0.002391,1.0,0.006927,0.006478,-0.005989,0.002647,1.0
2010-01-14,1.449990,-0.001654,0.003262,-1.0,-0.002391,0.006927,0.006478,-0.005989,-1.0
2010-01-15,1.439097,-0.007540,-0.001654,-1.0,0.003262,-0.002391,0.006927,0.006478,1.0
...,...,...,...,...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,0.000390,-1.0,-0.004115,0.001699,-0.003806,0.005161,-1.0
2020-12-28,1.220510,0.001671,-0.000549,-1.0,0.000390,-0.004115,0.001699,-0.003806,-1.0
2020-12-29,1.222345,0.001502,0.001671,-1.0,-0.000549,0.000390,-0.004115,0.001699,-1.0
2020-12-30,1.225295,0.002411,0.001502,-1.0,0.001671,-0.000549,0.000390,-0.004115,-1.0


In [139]:
data['strategy'] = data.predict_diff * data.returns

In [140]:
data['creturns'] = data['returns'].cumsum().apply(np.exp)
data['cstrategy'] = data['strategy'].cumsum().apply(np.exp)

In [141]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data.creturns, name='Returns (Baseline)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy, name='Returns (Strategy)'))

fig.update_layout(title='EUR/USD', xaxis_title='Time', yaxis_title='Price')

fig.show()

In [142]:
data['trades'] = data.predict_diff.diff().fillna(0).abs()

In [143]:
data.trades.value_counts()

2.0    1552
0.0    1307
Name: trades, dtype: int64

Taking trading costs into consideration

In [144]:
ptc = 0.00007

In [145]:
data['strategy_net'] = data.strategy - data.trades * ptc

In [146]:
data['cstrategy_net'] = data.strategy_net.cumsum().apply(np.exp)

In [147]:
data

Unnamed: 0_level_0,Close,returns,lag1,predict,lag2,lag3,lag4,lag5,predict_diff,strategy,creturns,cstrategy,trades,strategy_net,cstrategy_net
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-01-11,1.451126,0.006927,0.006478,-1.0,-0.005989,0.002647,-0.004031,0.002363,-1.0,-0.006927,1.006951,0.993097,0.0,-0.006927,0.993097
2010-01-12,1.447660,-0.002391,0.006927,-1.0,0.006478,-0.005989,0.002647,-0.004031,-1.0,0.002391,1.004546,0.995475,0.0,0.002391,0.995475
2010-01-13,1.452391,0.003262,-0.002391,1.0,0.006927,0.006478,-0.005989,0.002647,1.0,0.003262,1.007828,0.998728,2.0,0.003122,0.998588
2010-01-14,1.449990,-0.001654,0.003262,-1.0,-0.002391,0.006927,0.006478,-0.005989,-1.0,0.001654,1.006162,1.000381,2.0,0.001514,1.000101
2010-01-15,1.439097,-0.007540,-0.001654,-1.0,0.003262,-0.002391,0.006927,0.006478,1.0,-0.007540,0.998604,0.992866,2.0,-0.007680,0.992450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,0.000390,-1.0,-0.004115,0.001699,-0.003806,0.005161,-1.0,0.000549,0.845510,1.889236,2.0,0.000409,1.520277
2020-12-28,1.220510,0.001671,-0.000549,-1.0,0.000390,-0.004115,0.001699,-0.003806,-1.0,-0.001671,0.846924,1.886083,0.0,-0.001671,1.517739
2020-12-29,1.222345,0.001502,0.001671,-1.0,-0.000549,0.000390,-0.004115,0.001699,-1.0,-0.001502,0.848197,1.883251,0.0,-0.001502,1.515461
2020-12-30,1.225295,0.002411,0.001502,-1.0,0.001671,-0.000549,0.000390,-0.004115,-1.0,-0.002411,0.850245,1.878716,0.0,-0.002411,1.511812


In [148]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data.creturns, name='Returns (Baseline)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy, name='Returns (Strategy)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy_net, name='Returns (Strategy + trading costs)'))

fig.update_layout(title='EUR/USD', xaxis_title='Time', yaxis_title='Price')

fig.show()

Evaluating returns and risks

In [149]:
data[['returns', 'strategy_net']].mean() * (252)

returns        -0.013962
strategy_net    0.036093
dtype: float64

In [150]:
data[['returns', 'strategy_net']].std() * np.sqrt(252)

returns         0.089174
strategy_net    0.089091
dtype: float64

Outsample forward testing

In [151]:
data = pd.read_csv(filepath_or_buffer='../../resources/test_set.csv', parse_dates=['time'], index_col='time')

In [152]:
data

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2020-12-31,1.229990
2021-01-01,1.218027
2021-01-04,1.225070
2021-01-05,1.225160
2021-01-06,1.230027
...,...
2021-12-27,1.132426
2021-12-28,1.133003
2021-12-29,1.131478
2021-12-30,1.136015


In [153]:
data['returns'] = np.log(data.div(data.shift(1)))

In [154]:
data

Unnamed: 0_level_0,Close,returns
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-12-31,1.229990,
2021-01-01,1.218027,-0.009774
2021-01-04,1.225070,0.005766
2021-01-05,1.225160,0.000074
2021-01-06,1.230027,0.003965
...,...,...
2021-12-27,1.132426,-0.000272
2021-12-28,1.133003,0.000510
2021-12-29,1.131478,-0.001347
2021-12-30,1.136015,0.004002


In [155]:
data.dropna(inplace=True)

In [156]:
lags = 5

In [157]:
cols = []
for lag in range(1, lags + 1):

  col = f'lag{lag}'
  data[col] = data.returns.shift(1)
  cols.append(col)

data.dropna(inplace=True)

In [158]:
data

Unnamed: 0_level_0,Close,returns,lag1,lag2,lag3,lag4,lag5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-04,1.225070,0.005766,-0.009774,-0.009774,-0.009774,-0.009774,-0.009774
2021-01-05,1.225160,0.000074,0.005766,0.005766,0.005766,0.005766,0.005766
2021-01-06,1.230027,0.003965,0.000074,0.000074,0.000074,0.000074,0.000074
2021-01-07,1.234111,0.003314,0.003965,0.003965,0.003965,0.003965,0.003965
2021-01-08,1.227144,-0.005661,0.003314,0.003314,0.003314,0.003314,0.003314
...,...,...,...,...,...,...,...
2021-12-27,1.132426,-0.000272,-0.000136,-0.000136,-0.000136,-0.000136,-0.000136
2021-12-28,1.133003,0.000510,-0.000272,-0.000272,-0.000272,-0.000272,-0.000272
2021-12-29,1.131478,-0.001347,0.000510,0.000510,0.000510,0.000510,0.000510
2021-12-30,1.136015,0.004002,-0.001347,-0.001347,-0.001347,-0.001347,-0.001347


In [159]:
data.dropna(inplace=True)

In [160]:
data['pred'] = LR_diff.predict(data[cols].values)


X does not have valid feature names, but LinearRegression was fitted with feature names



In [161]:
data

Unnamed: 0_level_0,Close,returns,lag1,lag2,lag3,lag4,lag5,pred
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-04,1.225070,0.005766,-0.009774,-0.009774,-0.009774,-0.009774,-0.009774,0.000323
2021-01-05,1.225160,0.000074,0.005766,0.005766,0.005766,0.005766,0.005766,-0.000282
2021-01-06,1.230027,0.003965,0.000074,0.000074,0.000074,0.000074,0.000074,-0.000060
2021-01-07,1.234111,0.003314,0.003965,0.003965,0.003965,0.003965,0.003965,-0.000212
2021-01-08,1.227144,-0.005661,0.003314,0.003314,0.003314,0.003314,0.003314,-0.000187
...,...,...,...,...,...,...,...,...
2021-12-27,1.132426,-0.000272,-0.000136,-0.000136,-0.000136,-0.000136,-0.000136,-0.000052
2021-12-28,1.133003,0.000510,-0.000272,-0.000272,-0.000272,-0.000272,-0.000272,-0.000047
2021-12-29,1.131478,-0.001347,0.000510,0.000510,0.000510,0.000510,0.000510,-0.000077
2021-12-30,1.136015,0.004002,-0.001347,-0.001347,-0.001347,-0.001347,-0.001347,-0.000005


In [162]:
data.pred = np.sign(data.pred)

In [163]:
data.pred.value_counts()

-1.0    175
 1.0     85
Name: pred, dtype: int64

In [164]:
hits = np.sign(data.returns * data.pred).value_counts()
hits

 1.0    145
-1.0    113
 0.0      2
dtype: int64

In [165]:
hit_ratio = hits[1.0] / sum(hits)
hit_ratio

0.5576923076923077

In [166]:
data['strategy'] = data.pred * data.returns

In [167]:
data['creturns'] = data['returns'].cumsum().apply(np.exp)
data['cstrategy'] = data['strategy'].cumsum().apply(np.exp)

In [168]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data.creturns, name='Returns (Baseline)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy, name='Returns (Strategy)'))

fig.update_layout(title='EUR/USD', xaxis_title='Time', yaxis_title='Price')

fig.show()

Evaluate return and risk

In [169]:
data[['returns', 'strategy']].mean() * (252)

returns    -0.070562
strategy    0.113951
dtype: float64

In [170]:
data[['returns', 'strategy']].std() * np.sqrt(252)

returns     0.055419
strategy    0.055131
dtype: float64

In [171]:
data['predict_diff'] = LR_diff.predict(data[cols].values)


X does not have valid feature names, but LinearRegression was fitted with feature names



In [172]:
data.predict_diff = np.sign(data.predict_diff)

In [173]:
data.predict_diff.value_counts()


-1.0    175
 1.0     85
Name: predict_diff, dtype: int64

In [174]:
data['trades'] = data.predict_diff.diff().fillna(0).abs()

Including trading costs

In [175]:
ptc=0.00007

In [176]:
data['strategy_net'] = data.strategy - data.trades * ptc

In [177]:
data['cstrategy_net'] = data.strategy_net.cumsum().apply(np.exp)

In [178]:
data

Unnamed: 0_level_0,Close,returns,lag1,lag2,lag3,lag4,lag5,pred,strategy,creturns,cstrategy,predict_diff,trades,strategy_net,cstrategy_net
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2021-01-04,1.225070,0.005766,-0.009774,-0.009774,-0.009774,-0.009774,-0.009774,1.0,0.005766,1.005782,1.005782,1.0,0.0,0.005766,1.005782
2021-01-05,1.225160,0.000074,0.005766,0.005766,0.005766,0.005766,0.005766,-1.0,-0.000074,1.005856,1.005708,-1.0,2.0,-0.000214,1.005567
2021-01-06,1.230027,0.003965,0.000074,0.000074,0.000074,0.000074,0.000074,-1.0,-0.003965,1.009852,1.001728,-1.0,0.0,-0.003965,1.001588
2021-01-07,1.234111,0.003314,0.003965,0.003965,0.003965,0.003965,0.003965,-1.0,-0.003314,1.013205,0.998414,-1.0,0.0,-0.003314,0.998274
2021-01-08,1.227144,-0.005661,0.003314,0.003314,0.003314,0.003314,0.003314,-1.0,0.005661,1.007486,1.004082,-1.0,0.0,0.005661,1.003941
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,1.132426,-0.000272,-0.000136,-0.000136,-0.000136,-0.000136,-0.000136,-1.0,0.000272,0.929722,1.124835,-1.0,0.0,0.000272,1.106250
2021-12-28,1.133003,0.000510,-0.000272,-0.000272,-0.000272,-0.000272,-0.000272,-1.0,-0.000510,0.930196,1.124261,-1.0,0.0,-0.000510,1.105686
2021-12-29,1.131478,-0.001347,0.000510,0.000510,0.000510,0.000510,0.000510,-1.0,0.001347,0.928943,1.125777,-1.0,0.0,0.001347,1.107177
2021-12-30,1.136015,0.004002,-0.001347,-0.001347,-0.001347,-0.001347,-0.001347,-1.0,-0.004002,0.932668,1.121281,-1.0,0.0,-0.004002,1.102755


In [179]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data.creturns, name='Returns (Baseline)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy, name='Returns (Strategy)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy_net, name='Returns (Strategy + trading costs)'))

fig.update_layout(title='EUR/USD', xaxis_title='Time', yaxis_title='Price')

fig.show()

In [180]:
data[['returns', 'strategy_net']].mean() * (252)

returns        -0.070562
strategy_net    0.097803
dtype: float64

In [181]:
data[['returns', 'strategy_net']].std() * np.sqrt(252)

returns         0.055419
strategy_net    0.055204
dtype: float64