In [1]:
import vectorbtpro as vbt
import numpy as np
import pandas as pd

vbt.settings.set_theme('dark')
vbt.settings['plotting']['layout']['width'] = 800
vbt.settings['plotting']['layout']['height'] = 400

In [5]:
df = pd.read_csv('../data/1ySOLdata1hAllHassInd.csv')

In [6]:
def process_data(data, window_size, coin):


    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
    data.set_index('timestamp', inplace=True)
    # pd.set_option('future.no_silent_downcasting', True)

    window_size = window_size
    data_trimmed = data.copy()
    data_trimmed.loc[:, 'signal'] = 'SignalNone'

    rolling_max = data_trimmed.loc[:,'price'].rolling(window=2*window_size+1, center=True, min_periods=1).max()
    rolling_min = data_trimmed.loc[:,'price'].rolling(window=2*window_size+1, center=True, min_periods=1).min()

    is_peak = (data_trimmed.loc[:, 'price'] == rolling_max)

    is_low = (data_trimmed.loc[:, 'price'] == rolling_min)

    data_trimmed.loc[is_peak, 'signal'] = 'SignalShort'
    data_trimmed.loc[is_low, 'signal'] = 'SignalLong'
    df = data_trimmed.copy()

    def filter_pivots(data):
      df_filtered = df[df['signal'] != 'SignalNone']


      for i in range(1, len(df_filtered)):
          current_signal = df_filtered.iloc[i]['signal']
          previous_signal = df_filtered.iloc[i - 1]['signal']
          current_close = df_filtered.iloc[i]['price']
          previous_close = df_filtered.iloc[i - 1]['price']

          if current_signal == previous_signal:
              if current_signal == 'SignalLong':
                  if previous_close > current_close:
                      df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
                  else:
                      df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'
              elif current_signal == 'SignalShort':
                  if previous_close < current_close:
                      df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
                  else:
                      df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'
          elif current_signal != previous_signal:
              if current_signal == 'SignalLong':
                  if previous_close < current_close:
                      df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
                      df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'
              elif current_signal == 'SignalShort':
                  if previous_close > current_close:
                      df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
                      df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'

      return df_filtered



    filter_1 = filter_pivots(df)

    df.update(filter_1)
    next_filter = df[['Close', 'signal']].copy()

    filter_2 = filter_pivots(next_filter)
    df.update(filter_2)


    df_fixed = df.copy()
    df_fixed.loc[:,'signal'] = df_fixed.loc[:,'signal'].replace({'SignalLong': 2, 'SignalShort': 0, 'SignalNone': 1})
    df_fixed = df_fixed.ffill()

    feature_names = [col for col in df_fixed.columns if col != 'signal']

    # Save feature names to a JSON file
    # with open(f"/content/models/{coin}_feature_names.json", 'w') as f:
    #     json.dump(feature_names, f)

    return df_fixed

In [7]:
dfpro = process_data(df, 20, 'SOL')

  df_fixed.loc[:,'signal'] = df_fixed.loc[:,'signal'].replace({'SignalLong': 2, 'SignalShort': 0, 'SignalNone': 1})
  df_fixed = df_fixed.ffill()


In [8]:
data = vbt.Data.from_data(dfpro)


In [22]:
predict_list = ['Close', 'atr', 'bbands_lower', 'bop', 'cci', 'cmo', 'crsi', 'donchian_middle', 'ema', 'ht_trendline', 'ht_trendmode', 'ichi', 'kri', 'macdfix_macd', 'sar', 'stochrsi_fastK', 't3', 'tsi', 'willr']
X = data.data['symbol'][predict_list]
# print(X)

y = data.data['symbol']['signal']
y

timestamp
2023-03-12 13:00:00+00:00    1
2023-03-12 14:00:00+00:00    1
2023-03-12 15:00:00+00:00    1
2023-03-12 16:00:00+00:00    2
2023-03-12 17:00:00+00:00    1
                            ..
2024-03-12 22:00:00+00:00    1
2024-03-12 23:00:00+00:00    1
2024-03-13 00:00:00+00:00    1
2024-03-13 01:00:00+00:00    1
2024-03-13 02:00:00+00:00    1
Freq: h, Name: signal, Length: 8798, dtype: int64

In [23]:
from sklearn.model_selection import train_test_split
split_percentage = 0.8
split = int(split_percentage*len(X))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-split_percentage)


In [24]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(criterion='gini', max_depth=10, min_samples_leaf=5, random_state=13, class_weight='balanced').fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [25]:
y_pred_proba = clf.predict_proba(X_test)

In [26]:
y_pred_proba

array([[0.        , 0.05668606, 0.94331394],
       [0.        , 1.        , 0.        ],
       [0.        , 1.        , 0.        ],
       ...,
       [0.        , 1.        , 0.        ],
       [0.        , 1.        , 0.        ],
       [0.        , 1.        , 0.        ]])

In [27]:
# from sklearn.metrics import classification_report
# from sklearn.metrics import accuracy_score
# report = classification_report(y_test, y_pred)
# accuracy = accuracy_score(y_test, y_pred)
# print(accuracy)

In [28]:
y_pred

array([2, 1, 1, ..., 1, 1, 1])

In [29]:
# Store the test dataset in new dataframe df_split
df_split = data.data['symbol'][split:].copy()

# Store the decision tree's predicted output to signal column of df_split dataframe
df_split.loc[:, "signal"] = y_pred

# df_split

In [34]:
df_split

Unnamed: 0_level_0,price,Open,High,Low,Close,Volume,tema_12,abands_upper,abands_middle,abands_lower,...,trix,tsf,tsi,udrsi,ultosc,var,willr,wws,zlma,signal
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-12-30 19:00:00+00:00,103.300,103.684,103.814,102.850,103.300,535552,106.814433,106.729368,103.230214,99.702939,...,-0.083104,103.309352,-8.162742,40.607483,59.265937,1.002797,-39.516504,103.990916,102.542214,2
2023-12-30 20:00:00+00:00,103.373,103.301,104.200,102.900,103.364,594480,106.814433,106.673523,103.131857,99.646380,...,-0.079810,103.613209,-7.996720,41.741071,57.592126,0.831368,-38.028824,103.946136,102.578071,1
2023-12-30 21:00:00+00:00,103.428,103.360,104.402,103.325,103.428,707664,106.814433,106.676606,103.046571,99.547678,...,-0.076196,103.959824,-7.741808,49.062027,51.960795,0.671749,-36.541144,103.909127,102.546929,1
2023-12-30 22:00:00+00:00,102.559,103.418,103.810,101.820,102.560,737844,106.814433,106.630837,102.975714,99.414765,...,-0.074340,103.909670,-8.891729,60.334884,43.906227,0.665392,-56.717806,103.812761,102.557071,1
2023-12-30 23:00:00+00:00,101.891,102.559,102.679,101.500,101.891,591313,106.814433,106.390841,103.007286,99.605484,...,-0.075067,103.331319,-10.917315,50.937117,44.522624,0.581949,-72.268712,103.675492,102.722786,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-12 22:00:00+00:00,147.501,148.533,148.829,147.201,147.502,586736,150.936160,158.127271,149.632643,141.211914,...,0.038698,147.740286,-1.872457,42.533859,54.852730,2.319365,-49.855565,149.227149,147.722500,1
2024-03-12 23:00:00+00:00,148.511,147.501,148.911,147.100,148.512,472388,150.936160,157.449739,149.518143,141.518668,...,0.026583,147.504912,-2.480677,40.128602,65.884575,2.379338,-41.274427,149.176067,147.842357,0
2024-03-13 00:00:00+00:00,151.201,148.511,151.486,148.061,151.201,1017940,151.845643,157.333346,149.473357,141.357989,...,0.020197,148.391857,-0.226241,50.956550,65.171026,2.198516,-11.917431,149.320705,148.000500,1
2024-03-13 01:00:00+00:00,149.649,151.202,151.852,149.543,149.649,692322,151.845643,157.305340,149.387786,141.284983,...,0.015337,148.683560,-0.021409,47.575559,55.103120,2.058619,-25.897625,149.344155,148.150429,1


In [35]:
signal = df_split['signal']
entries = signal == 2
exits = signal == 0


In [36]:
pf = vbt.Portfolio.from_signals(
    close=df_split.Close, 
    long_entries=entries, 
    long_exits=exits,
    size=100,
    size_type='value',
    init_cash='auto'
)

In [37]:
# import joblib
# def save_model(model, model_filename='trained_model.joblib'):
#     joblib.dump(model, model_filename)
#     print(f'Model saved as {model_filename}')
# save_model(clf)

In [39]:
pf.plot({"orders", "cum_returns"}, settings=dict(bm_returns=False)).show()

In [153]:
pf.stats()

Start                         2023-05-26 21:00:00+00:00
End                           2023-12-31 06:00:00+00:00
Period                                218 days 10:00:00
Start Value                                       100.0
Min Value                                     99.887214
Max Value                                    253.018427
End Value                                    232.796495
Total Return [%]                             132.796495
Benchmark Return [%]                         421.028013
Total Time Exposure [%]                       47.520031
Max Gross Exposure [%]                            100.0
Max Drawdown [%]                              20.299032
Max Drawdown Duration                 110 days 03:00:00
Total Orders                                         45
Total Fees Paid                                     0.0
Total Trades                                         23
Win Rate [%]                                  63.636364
Best Trade [%]                                54