In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd 
import numpy as np
from t_nachine.optimization import Analyzer
from t_nachine.constants import TRADES_ATTRIBUTES

In [2]:
path0 = "logs/bounce.csv"
path1 = "logs/extreme_rsi.csv"

df0 = pd.read_csv(path0)
df1 = pd.read_csv(path1)

df = pd.concat([df0, df1])

df.index = range(len(df))

In [3]:
len(df0), len(df1)

(29439, 99731)

In [4]:
analyzer = Analyzer(df)
results = analyzer.backtest_results

In [5]:
len(results)

128703

In [6]:
analyzer.win_rate

0.38480843492381683

In [7]:
analyzer.stats

Unnamed: 0_level_0,Duration,Duration,Duration,Duration,Duration,RiskToReward,RiskToReward,RiskToReward,RiskToReward,RiskToReward
Unnamed: 0_level_1,mean,median,min,max,std,mean,median,min,max,std
WinningTrade,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
False,13.697,6.0,0.0,1854.0,26.216,-1.049,-1.0,-23.132,0.0,0.317
True,22.974,13.0,0.0,2285.0,42.879,2.044,2.0,0.0,35.12,0.413


## Processing

In [8]:
results = results.sort_values(by="EntryTime")

In [9]:
results.SlPrice = results.EntryPrice - results.OneR

In [10]:
## Adding cohorts as a feature

def compute_cohots(price: float) -> int:
    
    if price <= 5:
        return 1
    
    if price <= 10:
        return 2
    
    if price <= 50:
        return 3
    
    if price <= 100:
        return 4
    
    if price <= 500:
        return 5
    
    if price <= 1000:
        return 6

    return 7

results['cohorts'] = results.EntryPrice.apply(compute_cohots)

In [11]:
date = "2017-01-31"

train = results[pd.to_datetime(results.EntryTime) <= pd.to_datetime(date)]
test = results[pd.to_datetime(results.EntryTime) > pd.to_datetime(date)]

In [12]:
train.shape, test.shape, len(test)/len(results)

((119320, 37), (9383, 37), 0.072904283505435)

## Evalute Function

In [13]:
from sklearn.metrics import recall_score, precision_score, accuracy_score

In [14]:
def evaluate(clf, train, test, features = TRADES_ATTRIBUTES):
    preds_train, preds_test = clf.predict(train[features]), clf.predict(test[features])
    recall_train = recall_score(train.WinningTrade, 
                                             preds_train, 
                                             zero_division=0)
    
    recall_test = recall_score(test.WinningTrade,  preds_test, zero_division=0)
                                                                                                            
    precision_train = precision_score(train.WinningTrade, 
                                      preds_train, 
                                      zero_division=0)
    precision_test = precision_score(test.WinningTrade, preds_test)

    print('recall train test: , ', round(recall_train, 2), round(recall_test, 2))
    print('precision train test: , ', round(precision_train, 2), round(precision_test, 2))
    
    return preds_train, preds_test

## Training

In [103]:
from lightgbm import LGBMClassifier

In [104]:
clf = LGBMClassifier(random_state=2021, class_weight="balanced", n_estimators=200, importance_type="gain")

In [105]:
clf.fit(train[TRADES_ATTRIBUTES], train.WinningTrade)

LGBMClassifier(class_weight='balanced', importance_type='gain',
               n_estimators=200, random_state=2021)

In [106]:
preds_train, preds_test = evaluate(clf, train, test)

recall train test: ,  0.66 0.53
precision train test: ,  0.53 0.47


### Remove correlated features

In [51]:
corr_matrix = train[TRADES_ATTRIBUTES].corr().abs()

# Select upper triangle of correlation matrix
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

# Find features with correlation greater than 0.95
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]

new_features = list(set(TRADES_ATTRIBUTES) - set(to_drop))


In [52]:
clf= LGBMClassifier(random_state=80, class_weight="balanced", n_estimators=100, importance_type="gain",max_depth=10)

clf.fit(train[new_features], train.WinningTrade)

LGBMClassifier(class_weight='balanced', importance_type='gain', max_depth=10,
               random_state=80)

In [53]:
preds_train, preds_test = evaluate(clf, train, test, features=new_features)

recall train test: ,  0.64 0.54
precision train test: ,  0.51 0.47


### use cohort

In [54]:
new_features = TRADES_ATTRIBUTES + ['cohorts']
clf= LGBMClassifier(random_state=80, class_weight="balanced", n_estimators=50, importance_type="gain",max_depth=10)
clf.fit(train[new_features], train.WinningTrade)

LGBMClassifier(class_weight='balanced', importance_type='gain', max_depth=10,
               n_estimators=50, random_state=80)

In [55]:
preds_train, preds_test = evaluate(clf, train, test, features=new_features)

recall train test: ,  0.62 0.54
precision train test: ,  0.49 0.47


In [56]:
test['preds'] = preds_test

In [57]:
trades_to_take_with_ml = test[test['preds'] == True]

In [58]:
trades_to_take_with_ml.WinningTrade.mean()

0.4704770902220123

In [59]:
trades_to_take_with_ml.shape[0] / len(test)

0.45124160716188855

## How many number of trades per year

In [60]:
results['preds'] = clf.predict(results[new_features])

In [61]:
trades_to_take_with_ml = results[results['preds'] == True]

In [62]:
trades_to_take_with_ml.WinningTrade.mean()

0.4913658024392214

In [63]:
len(trades_to_take_with_ml) / len(results)

0.48099111908813313

In [64]:
trades_to_take_with_ml['EntryTime'] = pd.to_datetime(trades_to_take_with_ml['EntryTime']).dt.year

In [65]:
trades_to_take_with_ml[trades_to_take_with_ml['EntryTime'] >= 2010].groupby("EntryTime").size().mean()

4735.75

In [66]:
x = results[TRADES_ATTRIBUTES + ['WinningTrade', 'EntryTime']]

In [67]:
x.to_csv('data.csv', index=False)

In [68]:
x

Unnamed: 0,VWAP,BB_LOWER,BB_MIDDLE,BB_UPPER,pivot,s1,s2,s3,s4,r1,r2,r3,r4,ATR,RSI,ADX,WILLIAMS,Volume,WinningTrade,EntryTime
52195,1.037,0.973,1.027,1.081,0.988,0.982,0.978,0.972,0.966,0.994,0.997,1.003,1.009,0.077,0.348,0.325,-0.705,520952.000,False,1962-04-06
52194,1.040,0.961,1.015,1.069,0.989,0.983,0.979,0.973,0.967,0.995,0.999,1.005,1.011,0.090,0.382,0.347,-0.695,610767.000,False,1962-04-16
52196,1.258,0.877,1.094,1.311,0.950,0.912,0.889,0.851,0.813,0.989,1.012,1.050,1.088,0.216,0.317,0.654,-0.700,5712478.000,True,1962-06-18
52197,0.991,0.916,0.973,1.030,0.982,0.973,0.968,0.959,0.951,0.990,0.996,1.004,1.013,0.100,0.493,0.230,-0.950,1949069.000,False,1962-09-04
52198,0.968,0.952,0.976,0.999,0.980,0.970,0.964,0.954,0.944,0.990,0.996,1.006,1.016,0.073,0.572,0.128,-0.192,1841284.000,True,1963-01-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20037,0.445,0.924,1.038,1.152,0.969,0.961,0.955,0.947,0.939,0.977,0.982,0.990,0.999,0.764,0.440,0.394,-0.390,101117.000,False,2017-11-10
62400,1.995,0.919,1.002,1.085,0.956,0.923,0.902,0.869,0.835,0.990,1.010,1.044,1.077,0.389,0.427,0.189,-0.633,26866.000,False,2017-11-10
26463,0.793,0.933,0.963,0.993,0.967,0.958,0.952,0.943,0.934,0.976,0.982,0.991,1.000,0.639,0.579,0.144,-0.219,96924.000,False,2017-11-10
4745,0.613,0.978,1.012,1.047,0.977,0.960,0.949,0.931,0.913,0.995,1.006,1.024,1.042,1.744,0.505,0.383,-0.474,2120.000,False,2017-11-10


## Test On 2018-2021

In [85]:
from t_nachine.backtester import Backtest
from t_nachine.strategies import ExtremeRSI

In [86]:
yahoo_path = "../archive/yahoo_stocks/"

In [87]:
bt = Backtest(cash=10_000)

In [88]:
res = bt.run(strategy=ExtremeRSI, stock_path=yahoo_path)

100%|███████████████████████████████████████| 1144/1144 [08:10<00:00,  2.33it/s]


In [89]:
bt.log_results(backtest_results=res, backtest_name="extreme_rsi_yahoo.csv")

In [90]:
ana = Analyzer(res)

In [91]:
res_ana = ana.backtest_results

In [98]:
res_ana.shape

(37177, 36)

In [95]:
ana.win_rate

0.39102133039244696

In [125]:
year = "2018-01-01"
trade_2018_2021 = res_ana[pd.to_datetime(res_ana.EntryTime) >= pd.to_datetime(year)]

In [126]:
trade_2018_2021.shape

(7570, 36)

In [127]:
_ , _  = evaluate(clf, trade_2018_2021, trade_2018_2021)

recall train test: ,  0.48 0.48
precision train test: ,  0.49 0.49
