In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from t_nachine.backtester import Backtest
from t_nachine.strategies import Bouncing, ExtremeRSI
import warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm
import os 
import pandas as pd
import numpy as np



## Backtest

In [4]:
bt = Backtest(cash=20_000)

In [63]:
results = bt._run(strategy=Bouncing, stock_path="test_stocks/MSFT.csv", symbol="MSFT")

In [64]:
results

Start                    2016-11-04 00:00:00
End                      2021-11-03 00:00:00
Duration                  1825 days 00:00:00
Exposure Time [%]                    20.9062
Equity Final [$]                23060.681192
Equity Peak [$]                 23060.681192
Return [%]                               0.0
Buy & Hold Return [%]             468.897983
Return (Ann.) [%]                        0.0
Volatility (Ann.) [%]                    0.0
Sharpe Ratio                             NaN
Sortino Ratio                            NaN
# Trades                                  19
Win Rate [%]                       57.894737
Best Trade [%]                     13.547814
Worst Trade [%]                   -12.726851
Profit Factor                       2.996353
Expectancy [%]                      3.178271
SQN                                  2.16932
_strategy                           Bouncing
_equity_curve                            ...
_trades                      Size    Volu...
dtype: obj

In [76]:
from t_nachine.backtester.core._plotting import plot
from finta import TA

In [77]:
data = pd.read_csv('test_stocks/MSFT.csv')
data['Date'] = pd.to_datetime(data.Date)

In [78]:
data['ATR']= TA.ATR(data)

In [88]:
TA.VORTEX(data)

Unnamed: 0,VIm,VIp
0,,
1,,
2,,
3,,
4,,
...,...,...
1253,0.590785,1.275162
1254,0.526891,1.313286
1255,0.511747,1.340985
1256,0.507752,1.375758


In [79]:
trades = results['_trades']
trades.EntryTime = pd.to_datetime(trades.EntryTime)

In [80]:
trades = trades.merge(data[['Date', 'Volume', 'ATR']], left_on="EntryTime", right_on="Date")

In [84]:
trades[['PnL', 'ATR']]

Unnamed: 0,PnL,ATR
0,-200.550191,0.782858
1,-198.12,0.601429
2,-198.090213,1.28643
3,388.360876,0.927143
4,-198.900612,0.912855
5,-199.439856,1.876428
6,398.640792,2.487858
7,398.640792,2.487858
8,401.610033,3.065713
9,401.610033,3.065713


In [24]:
plot(results=results, df=df, indicators=results._strategy._indicators)

In [40]:
bt.log_results(backtest_results=results, backtest_name="bounce")

ValueError: file already exist set override to true if you want to override!

###  Extreme Rsi

In [4]:
extreme_results = bt.run(strategy=ExtremeRSI, stock_path="archive/Stocks/")

100%|███████████████████████████████████████| 7124/7124 [20:21<00:00,  5.83it/s]


In [6]:
extreme_results.Volume = extreme_results.Volume.astype(int)
extreme_results.EntryBar = extreme_results.EntryBar.astype(int)
extreme_results.ExitBar = extreme_results.ExitBar.astype(int)
extreme_results.Size = extreme_results.Size.astype(int)

In [8]:
bt.log_results(backtest_results=extreme_results, backtest_name="extreme_rsi")

## Build Dataset 

In [369]:
def STOCHASTICS(data, period, k):
    close, low, high = data.Close, data.Low, data.High
    l_period = low.rolling(window=period).min()
    h_period = high.rolling(window=period).max()
    per_k = 100*(close - l_period) / (h_period - l_period)
    per_k = per_k.rolling(window = k).mean()
    return per_k


def RSI(price, n=14)  -> pd.Series:
    prices = price
    deltas = np.diff(prices)
    seed = deltas[:n+1]
    up = seed[seed >= 0].sum()/n
    down = -seed[seed < 0].sum()/n
    rs = up/down
    rsi = np.zeros_like(prices)
    rsi[:n] = 100. - 100./(1.+rs)
    for i in range(n, len(prices)):
        delta = deltas[i-1]  # The diff is 1 shorter
        if delta > 0:
            upval = delta
            downval = 0.
        else:
            upval = 0.
            downval = -delta
        up = (up*(n-1) + upval)/n
        down = (down*(n-1) + downval)/n
        rs = up/down
        rsi[i] = 100. - 100./(1.+rs)
    return pd.Series(rsi)

In [372]:
FEATURES = ['stochs', 'rsi', 'macd50_100', 'macd50_100_signal', 'bullish']
HISTORY = 10


In [373]:
len(FEATURES)

5

In [429]:
X, Y = build_features(path=path, bt_results=enriched_btr)

100%|███████████████████████████████████████| 5426/5426 [15:42<00:00,  5.76it/s]


In [430]:
for j in range(len(FEATURES)):
    
    for i in range(HISTORY):
        X.rename(columns={j + i * len(FEATURES): FEATURES[j] + f"_{i}"}, inplace=True)

In [431]:
Y.rename(columns={0: "label"}, inplace=True)

In [432]:
X.to_csv('bounce_features.csv', index=False)

In [433]:
Y.to_csv('bounce_labels.csv', index=False)


## Read data 

In [596]:
X = pd.read_csv('bounce_features.csv')
Y = pd.read_csv('bounce_labels.csv')

In [597]:
X.shape, Y.shape

((59019, 53), (59019, 4))

In [598]:
X_sorted = X.sort_values(by=['EntryTime'])
Y_sorted = Y.sort_values(by=['EntryTime'])

In [599]:
X_sorted['EntryTime'] = pd.to_datetime(X_sorted['EntryTime'])
X_sorted['ExitTime'] = pd.to_datetime(X_sorted['ExitTime'])

Y_sorted['EntryTime'] = pd.to_datetime(Y_sorted['EntryTime'])
Y_sorted['ExitTime'] = pd.to_datetime(Y_sorted['ExitTime'])

In [600]:
X_sorted['EntryYear'] = X_sorted['EntryTime'].dt.year
X_sorted['ExitYear'] = X_sorted['ExitTime'].dt.year

Y_sorted['EntryYear'] = Y_sorted['EntryTime'].dt.year
Y_sorted['ExitYear'] = Y_sorted['ExitTime'].dt.year

In [601]:
# train before 2015

X_train = X_sorted[X_sorted.EntryYear <= 2015].drop(columns=['EntryTime', 'EntryYear', 'ExitTime', 'ExitYear', 'Symbol'])
X_test = X_sorted[X_sorted.EntryYear > 2015].drop(columns=['EntryTime', 'EntryYear', 'ExitTime', 'ExitYear', 'Symbol'])

Y_train = Y_sorted[Y_sorted.EntryYear <= 2015]['label']
Y_test = Y_sorted[Y_sorted.EntryYear > 2015]["label"]

In [602]:
Y_train.shape, X_train.shape, X_test.shape, Y_test.shape

((48508,), (48508, 50), (10511, 50), (10511,))

In [603]:
cols2use = [col for col in X_train.columns if "macd" not in col]

In [610]:
from sklearn.metrics import recall_score, precision_score

def evalute(model, x_train, x_test):
    
    y_pred_train = model.predict(x_train)
    y_pred_test = model.predict(x_test)
    pr_train = precision_score(y_pred_train,Y_train)
    pr_test = precision_score(y_pred_test,Y_test)
    re_train = recall_score(y_pred_train,Y_train)
    re_test = recall_score(y_pred_test, Y_test)

    print('Train Precision is:', pr_train*100)
    print('Test Precision is:', pr_test*100)
    print('Train Recall is:', re_train*100)
    print('Test Recall is:', re_test*100)

In [611]:
from lightgbm import LGBMClassifier

In [621]:
clf = LGBMClassifier(random_state = 2,
                     n_estimators = 500,
                     learning_rate=0.6,
                    )

In [622]:
clf.fit(X_train, Y_train)

LGBMClassifier(learning_rate=0.6, n_estimators=500, random_state=2)

In [623]:
evalute(clf, X_train, X_test)

Train Precision is: 98.31823360277762
Test Precision is: 30.014292520247736
Train Recall is: 99.72486656028174
Test Recall is: 40.723981900452486


In [543]:
sample_df = pd.read_csv('yahoo.csv')

## Backtest From 2018

In [145]:
bt = Backtest()

In [545]:
ress = pd.DataFrame()

for s in sample_df.Symbol.unique():
    try:
        data = sample_df[sample_df.Symbol == s]
        data = data[data['Date']>pd.to_datetime('2018-01-01')]
        data.set_index('Date', inplace=True)
        res = bt.run(strategy=Bouncing, data=data)._trades
        res['Symbol'] = [s for _ in range(len(res))]
        ress = ress.append(res)
    
    except:
        pass

In [546]:
ress

In [180]:
(ress.PnL >= 0).mean()

0.3754361479413817

In [181]:
ress.drop_duplicates(inplace=True)

In [200]:
len(ress)

3853

In [215]:
X = np.array(np.zeros((1,history * len(features))))
Y = np.array(np.zeros(1,))

In [216]:
stock_names = ress.Symbol.unique()

for symbol in tqdm(stock_names):
    trades = ress[ress.Symbol == symbol]
    df = sample_df[sample_df.Symbol == symbol]
    df.index = range(len(df))
    ## supports 
    for i in [18,50,100, 150, 200]:
        df['EMA' + str(i)] = df['Close'].ewm(span = i, adjust = False).mean()

    df['stochs'] = STOCHASTICS(df, 5, 3)/100
    df['rsi'] = RSI(df['Close'], n = 2)/100
    df['macd50_100'] = df['EMA50'] - df['EMA100'] 
    
    for f in  ['Open','High','Low','Close']:
        df[f] = (df[f].pct_change(1)).cumsum()
        df.dropna(inplace = True)

    ## supports 
    for i in [18,50,100, 150, 200]:
        df['EMA' + str(i)] = df['Close'].ewm(span = i, adjust = False).mean()
    
    
    for i, t in trades.iterrows():
        try:
        
            df_history = df.iloc[int(t.EntryBar) - history: int(t.EntryBar)]
            X = np.append(X, df_history[features].to_numpy().reshape((1, history * len(features))), axis = 0)
            Y = np.append(Y, t.PnL>0)
        
        except:
            pass 

100%|█████████████████████████████████████████| 856/856 [07:15<00:00,  1.97it/s]


In [217]:
X = X[1:]
Y = Y[1:]
X.shape, Y.shape

((3835, 80), (3835,))

In [218]:
X_df = pd.DataFrame(X)
Y_df = pd.DataFrame(Y)

In [219]:
X_df = X_df.replace([np.inf, -np.inf], np.nan)

In [220]:
Y_df = Y_df.iloc[X_df.index]

In [221]:
clf.predict(X_df)

array([0., 0., 1., ..., 0., 1., 0.])

In [229]:
recall_score(Y_df, clf.predict(X_df)), precision_score(Y_df, clf.predict(X_df))

(0.2364394993045897, 0.396732788798133)

## Checks up

In [91]:
symbol = np.random.choice(stock_names)
symbol

'bcs_d.us.txt'

In [92]:
trades = enriched_btr[enriched_btr.Symbol == symbol]
df = pd.read_csv(f"{path}/{symbol}")



In [93]:
len(trades)

4