In [37]:
import vectorbtpro as vbt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
vbt.settings.set_theme('dark')
vbt.settings['plotting']['layout']['width'] = 800
vbt.settings['plotting']['layout']['height'] = 400

In [38]:
sol_data = pd.read_csv('2ySOLdata1h.csv')
sol_data['timestamp'] = pd.to_datetime(sol_data['timestamp'], unit='s')
sol_data.set_index('timestamp', inplace=True)
pd.set_option('future.no_silent_downcasting', True)
data = sol_data.iloc[:, 0:5].copy()

In [39]:

data_trimmed = data.copy()
pd.set_option('future.no_silent_downcasting', True)
data_trimmed.loc[:, 'signal'] = 'SignalNone'


# Define window size
window_size = 10

rolling_max = data_trimmed.loc[:,'Close'].rolling(window=2*window_size+1, center=True, min_periods=1).max()
rolling_min = data_trimmed.loc[:,'Close'].rolling(window=2*window_size+1, center=True, min_periods=1).min()

is_peak = (data_trimmed.loc[:, 'Close'] == rolling_max)

is_low = (data_trimmed.loc[:, 'Close'] == rolling_min) 

# Update signal columns where conditions are met
data_trimmed.loc[is_peak, 'signal'] = 'SignalShort'  # Mark peaks with SignalShort
data_trimmed.loc[is_low, 'signal'] = 'SignalLong'   # Mark lows with SignalLong
df = data_trimmed.copy()
df_filtered = df[df['signal'] != 'SignalNone'].copy()

# Iterate through the DataFrame and adjust the signals
for i in range(1, len(df_filtered)):
    current_signal = df_filtered.iloc[i]['signal']
    previous_signal = df_filtered.iloc[i - 1]['signal']
    current_close = df_filtered.iloc[i]['Close']
    previous_close = df_filtered.iloc[i - 1]['Close']
    
    if current_signal == previous_signal:
        if current_signal == 'SignalLong' and previous_close > current_close:
            df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
        elif current_signal != 'SignalLong' and previous_close < current_close:
            df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
        else:
            df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'


df.update(df_filtered)

df.loc[:,'signal'] = df.loc[:,'signal'].replace({'SignalLong': 1, 'SignalShort': 0, 'SignalNone': 0.5})
df = df.ffill()

df['signal'] = df['signal'].astype(float)
long_signals = df['signal'] == 1
short_signals = df['signal'] == 0

scaler = MinMaxScaler(feature_range=(0, 1))
for idx in df.index[:-1]:
    if short_signals.loc[idx]:
        short_index = idx
        next_long_idx = df.loc[idx:].index[long_signals[idx:]].min()
        bear_slice = df.loc[short_index : next_long_idx].copy()
        bear_slice['signal'] = bear_slice['Close']
        signal_values = bear_slice['signal'].values.reshape(-1, 1)
        scaled_signal_values = scaler.fit_transform(signal_values)
        scaled_signal_values_transformed = 1 - (scaled_signal_values)
        bear_slice['signal'] = scaled_signal_values_transformed.flatten()
        df.update(bear_slice)
    elif long_signals.loc[idx]:
        long_index = idx
        next_short_idx = df.loc[idx:].index[short_signals[idx:]].min()
        bull_slice = df.loc[long_index : next_short_idx].copy()
        bull_slice['signal'] = bull_slice['Close']
        signal_values = bull_slice['signal'].values.reshape(-1, 1)
        scaled_signal_values = scaler.fit_transform(signal_values)
        scaled_signal_values_transformed = 1 - (scaled_signal_values)
        bull_slice['signal'] = scaled_signal_values_transformed.flatten()
        df.update(bull_slice)


In [40]:
data = vbt.Data.from_data(df)

features = data.run("talib", mavp=vbt.run_arg_dict(periods=14))

data.data['symbol'] = pd.concat([data.data['symbol'], features], axis=1)
data.data['symbol'].drop(['Open', 'High', 'Low', ('cosh', 'real'), ('exp', 'real'), ('sinh', 'real')], axis=1, inplace=True)
# This will drop columns from the DataFrame where all values are NaN
data.data['symbol'] = data.data['symbol'].dropna(axis=1, how='all')


data.data['symbol'] = data.data['symbol'].dropna()
predictor_list = data.data['symbol'].drop('signal', axis=1).columns.tolist()
X = data.data['symbol'][predictor_list]

y = data.data['symbol']['signal']

X.columns = X.columns.astype(str)

In [41]:
open_price = data.get('Open')
high_price = data.get('High')
low_price = data.get('Low')
close_price = data.get('Close')

In [42]:
# vbt.IF.list_indicators("vbt") 

In [24]:
# vbt.phelp(vbt.SUPERTREND.run)

In [25]:
# adx = vbt.ADX.run(high_price, low_price, close_price, window=14)
# atr = vbt.ATR.run(high_price, low_price, close_price, window=14)
# bbands = vbt.BBANDS.run(close_price, window=14)
# rsi = vbt.RSI.run(close_price)
# sma = vbt.MA.run(close_price, window=20)
# strend = vbt.SUPERTREND.run(high_price, low_price, close_price, period=7, multiplier=3)


In [44]:
# data.data['symbol']['ADX'] = adx.adx
# data.data['symbol']['ATR'] = atr.atr
# data.data['symbol']['LBBAND'] = bbands.lower
# data.data['symbol']['UBBAND'] = bbands.upper
# data.data['symbol']['RSI'] = rsi.rsi
# data.data['symbol']['SMA'] = sma.ma
# data.data['symbol']['STREND'] = strend.trend

In [45]:
from sklearn.impute import SimpleImputer

# Assuming X is your input DataFrame
# Replace inf/-inf with NaN
X.replace([np.inf, -np.inf], np.nan, inplace=True)

# Now, you can choose to impute the NaN values. Here's an example using median imputation:
imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(X)

# X_imputed is now a NumPy array. If you need it back as a DataFrame:
X_clean = pd.DataFrame(X_imputed, columns=X.columns)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.replace([np.inf, -np.inf], np.nan, inplace=True)


In [46]:
train_length = int(len(data.data['symbol'])*0.80)

X_train = X[:train_length]
X_test = X[train_length:]
y_train = y[:train_length]
y_test = y[train_length:]

In [47]:
# Regression tree model
from sklearn.tree import DecisionTreeRegressor
dtr = DecisionTreeRegressor(min_samples_leaf=400)

In [48]:
dtr.fit(X_train, y_train)

In [34]:
# import graphviz
# from sklearn import tree

# dot_data = tree.export_graphviz(dtr,
#                                 out_file=None,
#                                 filled=True,
#                                 feature_names=predictor_list)
# graphviz.Source(dot_data)

In [None]:
# data.data['symbol']['predicted_signal'] = np.where(
#     ((data.data['symbol']['ret20'] > 0.002) & (data.data['symbol']['std20'] > 0.01) & (data.data['symbol']['ret20'] <= 0.049) & (data.data['symbol']['ret40'] <= 0.031)), 1, 0)

In [59]:
dtr.predict(X)

array([0.59333013, 0.52838094, 0.52838094, ..., 0.71377006, 0.64192377,
       0.5569873 ])

In [86]:
data.data['symbol']['predicted_signal'] = np.where(dtr.predict(X) > 0.5, 1, 0)
signal = data.data['symbol']['predicted_signal']
entries = signal == 0
exits = signal == 1


In [87]:
pf = vbt.Portfolio.from_signals(
    close=data.data['symbol']['Close'],
    long_entries=entries, 
    short_entries=exits,
    size=100,
    size_type='value',
    init_cash='auto'
)
pf.plot({"orders", "cum_returns"}, settings=dict(bm_returns=False)).show()

In [70]:
pf.stats()["Total Return [%]"]

121.66228248116195

In [None]:
# # Store the test dataset in new dataframe df_split
# df_split = data.data['symbol'][train_length:].copy()

# # Store the decision tree's predicted output to signal column of df_split dataframe
# df_split.loc[:, "signal"] = y_pred

In [67]:
# import joblib
# def save_model(model, model_filename='trained_model.joblib'):
#     joblib.dump(model, model_filename)
#     print(f'Model saved as {model_filename}')
# save_model(clf)