## Trading Bot - Preparation for ML strategy

In [44]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [45]:
data = pd.read_csv(filepath_or_buffer="../resources/train_set.csv", parse_dates=['time'], index_col='time')

In [46]:
data

Unnamed: 0_level_0,Close
time,Unnamed: 1_level_1
2009-12-31,1.432706
2010-01-01,1.438994
2010-01-04,1.442398
2010-01-05,1.436596
2010-01-06,1.440403
...,...
2020-12-25,1.218472
2020-12-28,1.220510
2020-12-29,1.222345
2020-12-30,1.225295


In [47]:
data['returns'] = np.log(data.div(data.shift(1)))

In [48]:
data.dropna(inplace=True)

In [49]:
data['direction'] = np.sign(data.returns)

In [50]:
data

Unnamed: 0_level_0,Close,returns,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,1.438994,0.004379,1.0
2010-01-04,1.442398,0.002363,1.0
2010-01-05,1.436596,-0.004031,-1.0
2010-01-06,1.440403,0.002647,1.0
2010-01-07,1.431803,-0.005989,-1.0
...,...,...,...
2020-12-25,1.218472,-0.000549,-1.0
2020-12-28,1.220510,0.001671,1.0
2020-12-29,1.222345,0.001502,1.0
2020-12-30,1.225295,0.002411,1.0


In [51]:
lags = 2

In [52]:
cols = []

for lag in range(1, lags + 1):

  col = f"lag{lag}"
  data[col] = data.returns.shift(lag)
  cols.append(col)

data.dropna(inplace=True)

In [53]:
data

Unnamed: 0_level_0,Close,returns,direction,lag1,lag2
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-05,1.436596,-0.004031,-1.0,0.002363,0.004379
2010-01-06,1.440403,0.002647,1.0,-0.004031,0.002363
2010-01-07,1.431803,-0.005989,-1.0,0.002647,-0.004031
2010-01-08,1.441109,0.006478,1.0,-0.005989,0.002647
2010-01-11,1.451126,0.006927,1.0,0.006478,-0.005989
...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,-1.0,0.000390,-0.004115
2020-12-28,1.220510,0.001671,1.0,-0.000549,0.000390
2020-12-29,1.222345,0.001502,1.0,0.001671,-0.000549
2020-12-30,1.225295,0.002411,1.0,0.001502,0.001671


In [54]:
lr = LinearRegression(fit_intercept=True)

In [55]:
lr.fit(data[cols], data.direction)

LinearRegression()

In sample "Prediction"

In [56]:
data["pred"] = lr.predict(data[cols])

In [57]:
data

Unnamed: 0_level_0,Close,returns,direction,lag1,lag2,pred
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-05,1.436596,-0.004031,-1.0,0.002363,0.004379,-0.012243
2010-01-06,1.440403,0.002647,1.0,-0.004031,0.002363,0.021122
2010-01-07,1.431803,-0.005989,-1.0,0.002647,-0.004031,-0.000053
2010-01-08,1.441109,0.006478,1.0,-0.005989,0.002647,0.029889
2010-01-11,1.451126,0.006927,1.0,0.006478,-0.005989,-0.014955
...,...,...,...,...,...,...
2020-12-25,1.218472,-0.000549,-1.0,0.000390,-0.004115,0.010715
2020-12-28,1.220510,0.001671,1.0,-0.000549,0.000390,0.007890
2020-12-29,1.222345,0.001502,1.0,0.001671,-0.000549,-0.001056
2020-12-30,1.225295,0.002411,1.0,0.001502,0.001671,-0.003832


In [58]:
hits = np.sign(data.direction * data.pred).value_counts()

In [59]:
hits

 1.0    1464
-1.0    1382
 0.0      17
dtype: int64

In [60]:
hit_ratio = hits[1.0] / sum(hits)
hit_ratio

0.511351728955641

In [61]:
lr

LinearRegression()

Saving the model

In [62]:
import pickle

In [63]:
pickle.dump(lr, open("linear_reg.p", "wb"))