## Classification

In [42]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [43]:
data = pd.read_csv(filepath_or_buffer='../../resources/train_set.csv', parse_dates=['time'], index_col='time')

In [44]:
data.head(10)

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2009-12-31,1.432706
2010-01-01,1.438994
2010-01-04,1.442398
2010-01-05,1.436596
2010-01-06,1.440403
2010-01-07,1.431803
2010-01-08,1.441109
2010-01-11,1.451126
2010-01-12,1.44766
2010-01-13,1.452391


In [45]:
data.tail(10)

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2020-12-18,1.226272
2020-12-21,1.221613
2020-12-22,1.223691
2020-12-23,1.218665
2020-12-24,1.219141
2020-12-25,1.218472
2020-12-28,1.22051
2020-12-29,1.222345
2020-12-30,1.225295
2020-12-31,1.22999


In [46]:
data.Close.to_frame()

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2009-12-31,1.432706
2010-01-01,1.438994
2010-01-04,1.442398
2010-01-05,1.436596
2010-01-06,1.440403
...,...
2020-12-25,1.218472
2020-12-28,1.220510
2020-12-29,1.222345
2020-12-30,1.225295


Getting more statistical insight into the dataset

In [47]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2866 entries, 2009-12-31 to 2020-12-31
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   2866 non-null   float64
dtypes: float64(1)
memory usage: 44.8 KB


In [48]:
data.describe()

Unnamed: 0,Close
count,2866.0
mean,1.222946
std,0.111835
min,1.039047
25%,1.121604
50%,1.193859
75%,1.323745
max,1.484406


Since the only attribute we need is closing price
we modify the data frame

In [49]:
data = data.Close.to_frame()

In [50]:
data.head(10)

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2009-12-31,1.432706
2010-01-01,1.438994
2010-01-04,1.442398
2010-01-05,1.436596
2010-01-06,1.440403
2010-01-07,1.431803
2010-01-08,1.441109
2010-01-11,1.451126
2010-01-12,1.44766
2010-01-13,1.452391


Since the only attribute we need is closing price
we modify the data frame


In [51]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data.Close, name='Close'))

fig.update_layout(title='EUR/USD', xaxis_title='Time', yaxis_title='Price')

fig.show()

In [52]:
data['returns'] = np.log(data.div(data.shift(1)))

In [53]:
data.dropna(inplace=True)

In [54]:
data['direction'] = np.sign(data.returns)

In [55]:
data

Unnamed: 0_level_0,Close,returns,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,1.438994,0.004379,1.0
2010-01-04,1.442398,0.002363,1.0
2010-01-05,1.436596,-0.004031,-1.0
2010-01-06,1.440403,0.002647,1.0
2010-01-07,1.431803,-0.005989,-1.0
...,...,...,...
2020-12-25,1.218472,-0.000549,-1.0
2020-12-28,1.220510,0.001671,1.0
2020-12-29,1.222345,0.001502,1.0
2020-12-30,1.225295,0.002411,1.0


In [56]:
data.direction.value_counts()

 1.0    1434
-1.0    1414
 0.0      17
Name: direction, dtype: int64

In [57]:
lags = 5

In [58]:
cols = []
for lag in range(1, lags+1):

  col = f'lag{lag}'
  data[col] = data.returns.shift(lag)
  cols.append(col)

data.dropna(inplace=True)

In [59]:
data

Unnamed: 0_level_0,Close,returns,direction,lag1,lag2,lag3,lag4,lag5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-08,1.441109,0.006478,1.0,-0.005989,0.002647,-0.004031,0.002363,0.004379
2010-01-11,1.451126,0.006927,1.0,0.006478,-0.005989,0.002647,-0.004031,0.002363
2010-01-12,1.447660,-0.002391,-1.0,0.006927,0.006478,-0.005989,0.002647,-0.004031
2010-01-13,1.452391,0.003262,1.0,-0.002391,0.006927,0.006478,-0.005989,0.002647
2010-01-14,1.449990,-0.001654,-1.0,0.003262,-0.002391,0.006927,0.006478,-0.005989
...,...,...,...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,-1.0,0.000390,-0.004115,0.001699,-0.003806,0.005161
2020-12-28,1.220510,0.001671,1.0,-0.000549,0.000390,-0.004115,0.001699,-0.003806
2020-12-29,1.222345,0.001502,1.0,0.001671,-0.000549,0.000390,-0.004115,0.001699
2020-12-30,1.225295,0.002411,1.0,0.001502,0.001671,-0.000549,0.000390,-0.004115


In [60]:
LR = LogisticRegression(C=1e6, max_iter=100000, multi_class='ovr')

In [61]:
LR.fit(data[cols], data.direction)

LogisticRegression(C=1000000.0, max_iter=100000, multi_class='ovr')

In [62]:
data['predict_diff'] = LR.predict(X=data[cols])

In [63]:
data

Unnamed: 0_level_0,Close,returns,direction,lag1,lag2,lag3,lag4,lag5,predict_diff
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-08,1.441109,0.006478,1.0,-0.005989,0.002647,-0.004031,0.002363,0.004379,1.0
2010-01-11,1.451126,0.006927,1.0,0.006478,-0.005989,0.002647,-0.004031,0.002363,-1.0
2010-01-12,1.447660,-0.002391,-1.0,0.006927,0.006478,-0.005989,0.002647,-0.004031,-1.0
2010-01-13,1.452391,0.003262,1.0,-0.002391,0.006927,0.006478,-0.005989,0.002647,1.0
2010-01-14,1.449990,-0.001654,-1.0,0.003262,-0.002391,0.006927,0.006478,-0.005989,-1.0
...,...,...,...,...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,-1.0,0.000390,-0.004115,0.001699,-0.003806,0.005161,1.0
2020-12-28,1.220510,0.001671,1.0,-0.000549,0.000390,-0.004115,0.001699,-0.003806,1.0
2020-12-29,1.222345,0.001502,1.0,0.001671,-0.000549,0.000390,-0.004115,0.001699,1.0
2020-12-30,1.225295,0.002411,1.0,0.001502,0.001671,-0.000549,0.000390,-0.004115,-1.0


In [64]:
data.predict_diff.value_counts()

 1.0    1590
-1.0    1270
Name: predict_diff, dtype: int64

In [65]:
hits = np.sign(data.direction * data.predict_diff).value_counts()
hits

 1.0    1478
-1.0    1365
 0.0      17
dtype: int64

In [66]:
hit_ratio = hits[1.0] / sum(hits)
hit_ratio

0.5167832167832168

In sample backtesting and look ahead bias

In [67]:
data['strategy'] = data.predict_diff * data.returns

In [68]:
data['creturns'] = data['returns'].cumsum().apply(np.exp)
data['cstrategy'] = data['strategy'].cumsum().apply(np.exp)

In [69]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data.creturns, name='Returns (Baseline)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy, name='Returns (Strategy)'))

fig.update_layout(title='EUR/USD', xaxis_title='Time', yaxis_title='Price')

fig.show()

In [70]:
data['trades'] = data.predict_diff.diff().fillna(0).abs()

In [71]:
data.trades.value_counts()

0.0    1765
2.0    1095
Name: trades, dtype: int64

In [72]:
data

Unnamed: 0_level_0,Close,returns,direction,lag1,lag2,lag3,lag4,lag5,predict_diff,strategy,creturns,cstrategy,trades
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2010-01-08,1.441109,0.006478,1.0,-0.005989,0.002647,-0.004031,0.002363,0.004379,1.0,0.006478,1.006499,1.006499,0.0
2010-01-11,1.451126,0.006927,1.0,0.006478,-0.005989,0.002647,-0.004031,0.002363,-1.0,-0.006927,1.013495,0.999552,2.0
2010-01-12,1.447660,-0.002391,-1.0,0.006927,0.006478,-0.005989,0.002647,-0.004031,-1.0,0.002391,1.011075,1.001945,0.0
2010-01-13,1.452391,0.003262,1.0,-0.002391,0.006927,0.006478,-0.005989,0.002647,1.0,0.003262,1.014379,1.005219,2.0
2010-01-14,1.449990,-0.001654,-1.0,0.003262,-0.002391,0.006927,0.006478,-0.005989,-1.0,0.001654,1.012702,1.006883,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,-1.0,0.000390,-0.004115,0.001699,-0.003806,0.005161,1.0,-0.000549,0.851005,1.716040,0.0
2020-12-28,1.220510,0.001671,1.0,-0.000549,0.000390,-0.004115,0.001699,-0.003806,1.0,0.001671,0.852428,1.718910,0.0
2020-12-29,1.222345,0.001502,1.0,0.001671,-0.000549,0.000390,-0.004115,0.001699,1.0,0.001502,0.853710,1.721494,0.0
2020-12-30,1.225295,0.002411,1.0,0.001502,0.001671,-0.000549,0.000390,-0.004115,-1.0,-0.002411,0.855771,1.717348,2.0


Taking trading costs into consideration

In [73]:
ptc = 0.00007

In [74]:
data['strategy_net'] = data.strategy - data.trades * ptc

In [75]:
data['cstrategy_net'] = data.strategy_net.cumsum().apply(np.exp)

In [76]:
data

Unnamed: 0_level_0,Close,returns,direction,lag1,lag2,lag3,lag4,lag5,predict_diff,strategy,creturns,cstrategy,trades,strategy_net,cstrategy_net
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-01-08,1.441109,0.006478,1.0,-0.005989,0.002647,-0.004031,0.002363,0.004379,1.0,0.006478,1.006499,1.006499,0.0,0.006478,1.006499
2010-01-11,1.451126,0.006927,1.0,0.006478,-0.005989,0.002647,-0.004031,0.002363,-1.0,-0.006927,1.013495,0.999552,2.0,-0.007067,0.999412
2010-01-12,1.447660,-0.002391,-1.0,0.006927,0.006478,-0.005989,0.002647,-0.004031,-1.0,0.002391,1.011075,1.001945,0.0,0.002391,1.001805
2010-01-13,1.452391,0.003262,1.0,-0.002391,0.006927,0.006478,-0.005989,0.002647,1.0,0.003262,1.014379,1.005219,2.0,0.003122,1.004937
2010-01-14,1.449990,-0.001654,-1.0,0.003262,-0.002391,0.006927,0.006478,-0.005989,-1.0,0.001654,1.012702,1.006883,2.0,0.001514,1.006460
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,-1.0,0.000390,-0.004115,0.001699,-0.003806,0.005161,1.0,-0.000549,0.851005,1.716040,0.0,-0.000549,1.472349
2020-12-28,1.220510,0.001671,1.0,-0.000549,0.000390,-0.004115,0.001699,-0.003806,1.0,0.001671,0.852428,1.718910,0.0,0.001671,1.474811
2020-12-29,1.222345,0.001502,1.0,0.001671,-0.000549,0.000390,-0.004115,0.001699,1.0,0.001502,0.853710,1.721494,0.0,0.001502,1.477029
2020-12-30,1.225295,0.002411,1.0,0.001502,0.001671,-0.000549,0.000390,-0.004115,-1.0,-0.002411,0.855771,1.717348,2.0,-0.002551,1.473266


In [77]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data.creturns, name='Returns (Baseline)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy, name='Returns (Strategy)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy_net, name='Returns (Strategy + trading costs)'))

fig.update_layout(title='EUR/USD', xaxis_title='Time', yaxis_title='Price')

fig.show()

Evaluate risk and reward

In [78]:
data[['returns', 'strategy_net']].mean() * (252)

returns        -0.013387
strategy_net    0.033805
dtype: float64

In [79]:
data[['returns', 'strategy_net']].std() * np.sqrt(252)

returns         0.089179
strategy_net    0.089101
dtype: float64

Out-sample forward testing

In [80]:
data = pd.read_csv('../../resources/test_set.csv', parse_dates=['time'], index_col='time')

In [81]:
data

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2020-12-31,1.229990
2021-01-01,1.218027
2021-01-04,1.225070
2021-01-05,1.225160
2021-01-06,1.230027
...,...
2021-12-27,1.132426
2021-12-28,1.133003
2021-12-29,1.131478
2021-12-30,1.136015


In [82]:
data['returns'] = np.log(data.div(data.shift(1)))

In [83]:
data['direction'] = np.sign(data.returns)

In [84]:
data

Unnamed: 0_level_0,Close,returns,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-12-31,1.229990,,
2021-01-01,1.218027,-0.009774,-1.0
2021-01-04,1.225070,0.005766,1.0
2021-01-05,1.225160,0.000074,1.0
2021-01-06,1.230027,0.003965,1.0
...,...,...,...
2021-12-27,1.132426,-0.000272,-1.0
2021-12-28,1.133003,0.000510,1.0
2021-12-29,1.131478,-0.001347,-1.0
2021-12-30,1.136015,0.004002,1.0


In [85]:
lags = 5

In [86]:
cols = []

for lag in range(1, lags + 1):

  col = f"lag{lag}"
  data[col] = data.returns.shift(1)
  cols.append(col)

data.dropna(inplace=True)

In [87]:
data

Unnamed: 0_level_0,Close,returns,direction,lag1,lag2,lag3,lag4,lag5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-04,1.225070,0.005766,1.0,-0.009774,-0.009774,-0.009774,-0.009774,-0.009774
2021-01-05,1.225160,0.000074,1.0,0.005766,0.005766,0.005766,0.005766,0.005766
2021-01-06,1.230027,0.003965,1.0,0.000074,0.000074,0.000074,0.000074,0.000074
2021-01-07,1.234111,0.003314,1.0,0.003965,0.003965,0.003965,0.003965,0.003965
2021-01-08,1.227144,-0.005661,-1.0,0.003314,0.003314,0.003314,0.003314,0.003314
...,...,...,...,...,...,...,...,...
2021-12-27,1.132426,-0.000272,-1.0,-0.000136,-0.000136,-0.000136,-0.000136,-0.000136
2021-12-28,1.133003,0.000510,1.0,-0.000272,-0.000272,-0.000272,-0.000272,-0.000272
2021-12-29,1.131478,-0.001347,-1.0,0.000510,0.000510,0.000510,0.000510,0.000510
2021-12-30,1.136015,0.004002,1.0,-0.001347,-0.001347,-0.001347,-0.001347,-0.001347


In [88]:
data['pred'] = LR.predict(X=data[cols])

In [89]:
data.pred.value_counts()

 1.0    151
-1.0    109
Name: pred, dtype: int64

In [90]:
hits = np.sign(data.direction * data.pred).value_counts()

In [91]:
hit_ratio = hits[1.0] / sum(hits)
hit_ratio

0.5307692307692308

In [92]:
data['strategy'] = data.pred * data.returns

In [93]:
data['creturns'] = data['returns'].cumsum().apply(np.exp)
data['cstrategy'] = data['strategy'].cumsum().apply(np.exp)

In [94]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data.creturns, name='Returns (Baseline)'))
fig.add_trace(go.Scatter(x=data.index, y=data.cstrategy, name='Returns (Strategy)'))

fig.update_layout(title='EUR/USD', xaxis_title='Time', yaxis_title='Price')

fig.show()

In [95]:
data['trades'] = data.pred.diff().fillna(0).abs()

In [96]:
data.trades.value_counts()

2.0    135
0.0    125
Name: trades, dtype: int64

In [97]:
data

Unnamed: 0_level_0,Close,returns,direction,lag1,lag2,lag3,lag4,lag5,pred,strategy,creturns,cstrategy,trades
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-01-04,1.225070,0.005766,1.0,-0.009774,-0.009774,-0.009774,-0.009774,-0.009774,1.0,0.005766,1.005782,1.005782,0.0
2021-01-05,1.225160,0.000074,1.0,0.005766,0.005766,0.005766,0.005766,0.005766,-1.0,-0.000074,1.005856,1.005708,2.0
2021-01-06,1.230027,0.003965,1.0,0.000074,0.000074,0.000074,0.000074,0.000074,1.0,0.003965,1.009852,1.009704,2.0
2021-01-07,1.234111,0.003314,1.0,0.003965,0.003965,0.003965,0.003965,0.003965,-1.0,-0.003314,1.013205,1.006363,2.0
2021-01-08,1.227144,-0.005661,-1.0,0.003314,0.003314,0.003314,0.003314,0.003314,-1.0,0.005661,1.007486,1.012076,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,1.132426,-0.000272,-1.0,-0.000136,-0.000136,-0.000136,-0.000136,-0.000136,1.0,-0.000272,0.929722,1.009369,2.0
2021-12-28,1.133003,0.000510,1.0,-0.000272,-0.000272,-0.000272,-0.000272,-0.000272,1.0,0.000510,0.930196,1.009884,0.0
2021-12-29,1.131478,-0.001347,-1.0,0.000510,0.000510,0.000510,0.000510,0.000510,-1.0,0.001347,0.928943,1.011245,2.0
2021-12-30,1.136015,0.004002,1.0,-0.001347,-0.001347,-0.001347,-0.001347,-0.001347,1.0,0.004002,0.932668,1.015300,2.0


Evaluate risk and return

In [99]:
data[['returns', 'strategy']].mean() * (252)

returns    -0.070562
strategy    0.017719
dtype: float64

In [101]:
data[['returns', 'strategy']].std() * np.sqrt(252)

returns     0.055419
strategy    0.055587
dtype: float64