In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd 
import numpy as np
from t_nachine.optimization import Analyzer
from t_nachine.constants import TRADES_ATTRIBUTES

In [2]:
path0 = "logs/bounce.csv"
path1 = "logs/extreme_rsi.csv"

df0 = pd.read_csv(path0)
df1 = pd.read_csv(path1)

df = pd.concat([df0, df1])

df.index = range(len(df))

In [3]:
len(df0), len(df1)

(29439, 99731)

In [4]:
analyzer = Analyzer(df)
results = analyzer.backtest_results

In [5]:
len(results)

128703

In [6]:
analyzer.win_rate

0.38480843492381683

In [7]:
analyzer.stats

Unnamed: 0_level_0,Duration,Duration,Duration,Duration,Duration,RiskToReward,RiskToReward,RiskToReward,RiskToReward,RiskToReward
Unnamed: 0_level_1,mean,median,min,max,std,mean,median,min,max,std
WinningTrade,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
False,13.697,6.0,0.0,1854.0,26.216,-1.049,-1.0,-23.132,0.0,0.317
True,22.974,13.0,0.0,2285.0,42.879,2.044,2.0,0.0,35.12,0.413


## Processing

In [8]:
results = results.sort_values(by="EntryTime")

In [9]:
results.SlPrice = results.EntryPrice - results.OneR

In [10]:
## Adding cohorts as a feature

def compute_cohots(price: float) -> int:
    
    if price <= 5:
        return 1
    
    if price <= 10:
        return 2
    
    if price <= 50:
        return 3
    
    if price <= 100:
        return 4
    
    if price <= 500:
        return 5
    
    if price <= 1000:
        return 6

    return 7

results['cohorts'] = results.EntryPrice.apply(compute_cohots)

In [11]:
date = "2017-01-31"

train = results[pd.to_datetime(results.EntryTime) <= pd.to_datetime(date)]
test = results[pd.to_datetime(results.EntryTime) > pd.to_datetime(date)] # 2017 - 2018

In [12]:
train.shape, test.shape, len(test)/len(results)

((119320, 37), (9383, 37), 0.072904283505435)

## Evalute Function

In [13]:
from sklearn.metrics import recall_score, precision_score, accuracy_score

In [14]:
def evaluate(clf, train, test, features = TRADES_ATTRIBUTES):
    preds_train, preds_test = clf.predict(train[features]), clf.predict(test[features])
    recall_train = recall_score(train.WinningTrade, 
                                             preds_train, 
                                             zero_division=0)
    
    recall_test = recall_score(test.WinningTrade,  preds_test, zero_division=0)
                                                                                                            
    precision_train = precision_score(train.WinningTrade, 
                                      preds_train, 
                                      zero_division=0)
    precision_test = precision_score(test.WinningTrade, preds_test)

    print('recall train test: , ', round(recall_train, 2), round(recall_test, 2))
    print('precision train test: , ', round(precision_train, 2), round(precision_test, 2))
    
    return preds_train, preds_test

## Training

In [128]:
from lightgbm import LGBMClassifier

In [129]:
clf = LGBMClassifier(random_state=2021, class_weight="balanced", n_estimators=200, importance_type="gain")

In [130]:
clf.fit(train[TRADES_ATTRIBUTES], train.WinningTrade)

LGBMClassifier(class_weight='balanced', importance_type='gain',
               n_estimators=200, random_state=2021)

In [131]:
preds_train, preds_test = evaluate(clf, train, test)

recall train test: ,  0.66 0.53
precision train test: ,  0.53 0.47


### Remove correlated features

In [132]:
corr_matrix = train[TRADES_ATTRIBUTES].corr().abs()

# Select upper triangle of correlation matrix
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

# Find features with correlation greater than 0.95
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]

new_features = list(set(TRADES_ATTRIBUTES) - set(to_drop))


In [133]:
clf= LGBMClassifier(random_state=80, class_weight="balanced", n_estimators=100, importance_type="gain",max_depth=10)

clf.fit(train[new_features], train.WinningTrade)

LGBMClassifier(class_weight='balanced', importance_type='gain', max_depth=10,
               random_state=80)

In [134]:
preds_train, preds_test = evaluate(clf, train, test, features=new_features)

recall train test: ,  0.64 0.54
precision train test: ,  0.51 0.47


### use cohort

In [135]:
new_features = TRADES_ATTRIBUTES + ['cohorts']
clf= LGBMClassifier(random_state=80, class_weight="balanced", n_estimators=50, importance_type="gain",max_depth=10)
clf.fit(train[new_features], train.WinningTrade)

LGBMClassifier(class_weight='balanced', importance_type='gain', max_depth=10,
               n_estimators=50, random_state=80)

In [136]:
preds_train, preds_test = evaluate(clf, train, test, features=new_features)

recall train test: ,  0.62 0.54
precision train test: ,  0.49 0.47


In [137]:
test['preds'] = preds_test

In [138]:
trades_to_take_with_ml = test[test['preds'] == True]

In [139]:
trades_to_take_with_ml.WinningTrade.mean()

0.4704770902220123

In [140]:
trades_to_take_with_ml.shape[0] / len(test)

0.45124160716188855

## Test On 2018-2021

In [151]:
yahoo_path = "logs/extreme_rsi_yahoo.csv.csv"

In [152]:
ana = Analyzer(res)

In [153]:
res_ana = ana.backtest_results

In [154]:
res_ana.shape

(37177, 36)

In [155]:
ana.win_rate

0.39102133039244696

In [171]:
year = "2016-01-01"
trade_2018_2021 = res_ana[pd.to_datetime(res_ana.EntryTime) >= pd.to_datetime(year)]
trade_2018_2021['cohorts'] = trade_2018_2021.EntryPrice.apply(compute_cohots)

In [172]:
trade_2018_2021.shape

(11005, 37)

In [173]:
_ , _  = evaluate(clf, trade_2018_2021, trade_2018_2021, features=new_features)

recall train test: ,  0.5 0.5
precision train test: ,  0.5 0.5


In [174]:
for c in trade_2018_2021.cohorts.unique():
    c_df = trade_2018_2021[trade_2018_2021.cohorts == c]
    print(c, len(c_df))
    _ , _  = evaluate(clf, c_df, c_df, features=new_features)
    print('-----------------------------')

1 832
recall train test: ,  0.66 0.66
precision train test: ,  0.49 0.49
-----------------------------
2 1173
recall train test: ,  0.64 0.64
precision train test: ,  0.5 0.5
-----------------------------
3 6224
recall train test: ,  0.5 0.5
precision train test: ,  0.49 0.49
-----------------------------
4 1606
recall train test: ,  0.38 0.38
precision train test: ,  0.52 0.52
-----------------------------
5 1075
recall train test: ,  0.39 0.39
precision train test: ,  0.54 0.54
-----------------------------
6 71
recall train test: ,  0.36 0.36
precision train test: ,  0.6 0.6
-----------------------------
7 24
recall train test: ,  0.56 0.56
precision train test: ,  0.71 0.71
-----------------------------
