In [1]:
import vectorbtpro as vbt
import numpy as np
import pandas as pd

vbt.settings.set_theme('dark')
vbt.settings['plotting']['layout']['width'] = 800
vbt.settings['plotting']['layout']['height'] = 400

In [2]:
df = pd.read_csv('../data/1ySOLdata1hAllHassInd.csv')

In [3]:
def process_data(data, window_size, coin):


    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
    data.set_index('timestamp', inplace=True)
    # pd.set_option('future.no_silent_downcasting', True)

    window_size = window_size
    data_trimmed = data.copy()
    data_trimmed.loc[:, 'signal'] = 'SignalNone'

    rolling_max = data_trimmed.loc[:,'price'].rolling(window=2*window_size+1, center=True, min_periods=1).max()
    rolling_min = data_trimmed.loc[:,'price'].rolling(window=2*window_size+1, center=True, min_periods=1).min()

    is_peak = (data_trimmed.loc[:, 'price'] == rolling_max)

    is_low = (data_trimmed.loc[:, 'price'] == rolling_min)

    data_trimmed.loc[is_peak, 'signal'] = 'SignalShort'
    data_trimmed.loc[is_low, 'signal'] = 'SignalLong'
    df = data_trimmed.copy()

    def filter_pivots(data):
      df_filtered = df[df['signal'] != 'SignalNone']


      for i in range(1, len(df_filtered)):
          current_signal = df_filtered.iloc[i]['signal']
          previous_signal = df_filtered.iloc[i - 1]['signal']
          current_close = df_filtered.iloc[i]['price']
          previous_close = df_filtered.iloc[i - 1]['price']

          if current_signal == previous_signal:
              if current_signal == 'SignalLong':
                  if previous_close > current_close:
                      df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
                  else:
                      df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'
              elif current_signal == 'SignalShort':
                  if previous_close < current_close:
                      df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
                  else:
                      df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'
          elif current_signal != previous_signal:
              if current_signal == 'SignalLong':
                  if previous_close < current_close:
                      df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
                      df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'
              elif current_signal == 'SignalShort':
                  if previous_close > current_close:
                      df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
                      df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'

      return df_filtered



    filter_1 = filter_pivots(df)

    df.update(filter_1)
    next_filter = df[['Close', 'signal']].copy()

    filter_2 = filter_pivots(next_filter)
    df.update(filter_2)


    df_fixed = df.copy()
    df_fixed.loc[:,'signal'] = df_fixed.loc[:,'signal'].replace({'SignalLong': 2, 'SignalShort': 0, 'SignalNone': 1})
    df_fixed = df_fixed.ffill()

    feature_names = [col for col in df_fixed.columns if col != 'signal']

    # Save feature names to a JSON file
    # with open(f"/content/models/{coin}_feature_names.json", 'w') as f:
    #     json.dump(feature_names, f)

    return df_fixed

In [4]:
dfcopy = df.copy()
dfpro = process_data(dfcopy, 50, 'SOL')

  df_fixed.loc[:,'signal'] = df_fixed.loc[:,'signal'].replace({'SignalLong': 2, 'SignalShort': 0, 'SignalNone': 1})
  df_fixed = df_fixed.ffill()


In [5]:
data = vbt.Data.from_data(dfpro)


In [13]:
predict_list = ['Close', 'abands_upper', 'abands_middle', 'cci', 'cmo', 'donchian_lower', 'ema', 'kama', 'keltner_lower', 'kri', 'mom', 'sar', 'stochrsi_fastK']
X = data.data['symbol'][predict_list]
# print(X)

y = data.data['symbol']['signal']
y

timestamp
2023-03-12 13:00:00+00:00    1
2023-03-12 14:00:00+00:00    1
2023-03-12 15:00:00+00:00    1
2023-03-12 16:00:00+00:00    2
2023-03-12 17:00:00+00:00    1
                            ..
2024-03-12 22:00:00+00:00    1
2024-03-12 23:00:00+00:00    1
2024-03-13 00:00:00+00:00    1
2024-03-13 01:00:00+00:00    1
2024-03-13 02:00:00+00:00    1
Freq: h, Name: signal, Length: 8798, dtype: int64

In [14]:
from sklearn.model_selection import train_test_split
split_percentage = 0.7
split = int(split_percentage*len(X))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-split_percentage)


In [15]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier

clf = BaggingClassifier(
        estimator = DecisionTreeClassifier(criterion='gini', max_depth=10, min_samples_leaf=5),
        n_estimators = 10,
        random_state = 42
    ).fit(X_train, y_train)

y_pred = clf.predict(X_test)

In [16]:
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
df_split = data.data['symbol'][split:].copy()
df_split.loc[:, "signal"] = y_pred
signal = df_split['signal']
entries = signal == 'SignalLong'
exits = signal == 'SignalShort'

In [20]:
pf = vbt.Portfolio.from_signals(
    close=df_split.Close, 
    long_entries=entries, 
    long_exits=exits,
    size=100,
    size_type='value',
    init_cash='auto'
)

In [21]:
pf.plot(settings=dict(bm_returns=False)).show()

In [140]:
pf.stats()

Start                         2023-05-26 21:00:00+00:00
End                           2023-12-31 06:00:00+00:00
Period                                218 days 10:00:00
Start Value                                       100.0
Min Value                                     98.836553
Max Value                                    342.182467
End Value                                    305.947321
Total Return [%]                             205.947321
Benchmark Return [%]                         421.028013
Total Time Exposure [%]                       46.127432
Max Gross Exposure [%]                            100.0
Max Drawdown [%]                              13.497222
Max Drawdown Duration                  36 days 04:00:00
Total Orders                                          4
Total Fees Paid                                     0.0
Total Trades                                          2
Win Rate [%]                                      100.0
Best Trade [%]                               137

In [141]:
# import joblib
# def save_model(model, model_filename='trained_model.joblib'):
#     joblib.dump(model, model_filename)
#     print(f'Model saved as {model_filename}')
# save_model(clf)