In [299]:
import vectorbtpro as vbt
import numpy as np
import pandas as pd

vbt.settings.set_theme('dark')
vbt.settings['plotting']['layout']['width'] = 800
vbt.settings['plotting']['layout']['height'] = 400

In [300]:
df = pd.read_csv('2ySOLdata1h.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
df.set_index('timestamp', inplace=True)

In [301]:
data = vbt.Data.from_data(df)
data

<vectorbtpro.data.base.Data at 0x2aa503430>

In [302]:
# data.plot().show()

In [303]:
open_price = data.get('Open')
high_price = data.get('High')
low_price = data.get('Low')
close_price = data.get('Close')

In [304]:
# vbt.phelp(vbt.BBANDS.run)

In [305]:
adx = vbt.ADX.run(high_price, low_price, close_price, window=14)
atr = vbt.ATR.run(high_price, low_price, close_price, window=14)
bbands = vbt.BBANDS.run(close_price, window=14)
rsi = vbt.RSI.run(close_price)
sma = vbt.MA.run(close_price, window=20)
strend = vbt.SUPERTREND.run(high_price, low_price, close_price, period=7, multiplier=3)


In [306]:
data.data['symbol']['ADX'] = adx.adx
data.data['symbol']['ATR'] = atr.atr
data.data['symbol']['BBAND'] = bbands.bandwidth
data.data['symbol']['RSI'] = rsi.rsi
data.data['symbol']['SMA'] = sma.ma
data.data['symbol']['STREND'] = strend.trend

In [307]:
data.data['symbol'] = data.data['symbol'].dropna()
predictor_list = ['Close', 'ADX', 'ATR', 'BBAND', 'RSI', 'SMA', 'STREND']
X = data.data['symbol'][predictor_list]
# print(X)

y = data.data['symbol']['signal']
# y

In [308]:
from sklearn.model_selection import train_test_split
split_percentage = 0.7
split = int(split_percentage*len(X))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-split_percentage)


In [309]:
max_depth = [int(round(x, 2)) for x in np.linspace(start=10, stop=20, num=5)]

min_samples_leaf = [int(x) for x in np.linspace(start=5, stop=10, num=5)]

# min_samples_split = [int(round(x, 2)) for x in np.linspace(start=2, stop=20, num=5)]

# max_features = [int(x) for x in np.linspace(start=5, stop=15, num=10)]

# max_leaf_nodes = [int(x) for x in np.linspace(start=20, stop=50, num=10)]



# Save these parameters in a dictionry
param_grid = {
    'max_depth': max_depth,
    'min_samples_leaf': min_samples_leaf,
    # 'min_samples_split': min_samples_split,
    # 'max_features': max_features,
    # 'max_leaf_nodes': max_leaf_nodes,
    }

# Print the dictionary
param_grid

{'max_depth': [10, 12, 15, 17, 20], 'min_samples_leaf': [5, 6, 7, 8, 10]}

In [310]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV
clf = DecisionTreeClassifier()


In [311]:
rf_random = RandomizedSearchCV(estimator=clf,
                               param_distributions=param_grid,
                               n_iter=50,
                               random_state=42,
                               cv=5
                               )

rf_random.fit(X_train, y_train)




The total space of parameters 25 is smaller than n_iter=50. Running 25 iterations. For exhaustive searches, use GridSearchCV.



In [312]:
rf_random.best_params_

{'min_samples_leaf': 10, 'max_depth': 10}

In [313]:
best_dec_tree = rf_random.best_estimator_
best_dec_tree.random_state = 42
best_dec_tree.fit(X_train, y_train)


In [314]:
y_pred = best_dec_tree.predict(X_test)
# y_pred

In [315]:
# from sklearn.metrics import classification_report
# from sklearn.metrics import accuracy_score
# report = classification_report(y_test, y_pred)
# accuracy = accuracy_score(y_test, y_pred)
# accuracy

In [316]:
df_split = data.data['symbol'][split:].copy()
df_split.loc[:, "signal"] = y_pred
signal = df_split['signal']
entries = signal == 'SignalLong'
exits = signal == 'SignalShort'

In [317]:
pf = vbt.Portfolio.from_signals(
    close=df_split.Close, 
    long_entries=entries, 
    long_exits=exits,
    size=100,
    size_type='value',
    init_cash='auto'
)

In [318]:
pf.plot(settings=dict(bm_returns=False)).show()

In [319]:
pf.stats()

Start                         2023-05-26 21:00:00+00:00
End                           2023-12-31 06:00:00+00:00
Period                                218 days 10:00:00
Start Value                                  125.980801
Min Value                                     94.085717
Max Value                                    218.908364
End Value                                    210.893187
Total Return [%]                              67.401053
Benchmark Return [%]                         421.028013
Total Time Exposure [%]                       60.549409
Max Gross Exposure [%]                            100.0
Max Drawdown [%]                              29.195084
Max Drawdown Duration                  69 days 03:00:00
Total Orders                                         32
Total Fees Paid                                     0.0
Total Trades                                         16
Win Rate [%]                                       75.0
Best Trade [%]                                36

In [320]:
# import joblib
# def save_model(model, model_filename='trained_model.joblib'):
#     joblib.dump(model, model_filename)
#     print(f'Model saved as {model_filename}')
# save_model(clf)