In [1]:
import vectorbtpro as vbt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
vbt.settings.set_theme('dark')
vbt.settings['plotting']['layout']['width'] = 800
vbt.settings['plotting']['layout']['height'] = 400

In [2]:
sol_data = pd.read_csv('2ySOLdata1h.csv')
sol_data['timestamp'] = pd.to_datetime(sol_data['timestamp'], unit='s')
sol_data.set_index('timestamp', inplace=True)
pd.set_option('future.no_silent_downcasting', True)
data = sol_data.iloc[:, 0:5].copy()

In [3]:

data_trimmed = data.copy()
pd.set_option('future.no_silent_downcasting', True)
data_trimmed.loc[:, 'signal'] = 'SignalNone'


# Define window size
window_size = 10

rolling_max = data_trimmed.loc[:,'Close'].rolling(window=2*window_size+1, center=True, min_periods=1).max()
rolling_min = data_trimmed.loc[:,'Close'].rolling(window=2*window_size+1, center=True, min_periods=1).min()

is_peak = (data_trimmed.loc[:, 'Close'] == rolling_max)

is_low = (data_trimmed.loc[:, 'Close'] == rolling_min) 

# Update signal columns where conditions are met
data_trimmed.loc[is_peak, 'signal'] = 'SignalShort'  # Mark peaks with SignalShort
data_trimmed.loc[is_low, 'signal'] = 'SignalLong'   # Mark lows with SignalLong
df = data_trimmed.copy()
df_filtered = df[df['signal'] != 'SignalNone'].copy()

# Iterate through the DataFrame and adjust the signals
for i in range(1, len(df_filtered)):
    current_signal = df_filtered.iloc[i]['signal']
    previous_signal = df_filtered.iloc[i - 1]['signal']
    current_close = df_filtered.iloc[i]['Close']
    previous_close = df_filtered.iloc[i - 1]['Close']
    
    if current_signal == previous_signal:
        if current_signal == 'SignalLong' and previous_close > current_close:
            df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
        elif current_signal != 'SignalLong' and previous_close < current_close:
            df_filtered.iloc[i - 1, df_filtered.columns.get_loc('signal')] = 'SignalNone'
        else:
            df_filtered.iloc[i, df_filtered.columns.get_loc('signal')] = 'SignalNone'


df.update(df_filtered)

df.loc[:,'signal'] = df.loc[:,'signal'].replace({'SignalLong': 1, 'SignalShort': 0, 'SignalNone': 0.5})
df = df.ffill()

df['signal'] = df['signal'].astype(float)
long_signals = df['signal'] == 1
short_signals = df['signal'] == 0

scaler = MinMaxScaler(feature_range=(0, 1))
for idx in df.index[:-1]:
    if short_signals.loc[idx]:
        short_index = idx
        next_long_idx = df.loc[idx:].index[long_signals[idx:]].min()
        bear_slice = df.loc[short_index : next_long_idx].copy()
        bear_slice['signal'] = bear_slice['Close']
        signal_values = bear_slice['signal'].values.reshape(-1, 1)
        scaled_signal_values = scaler.fit_transform(signal_values)
        scaled_signal_values_transformed = 1 - (scaled_signal_values)
        bear_slice['signal'] = scaled_signal_values_transformed.flatten()
        df.update(bear_slice)
    elif long_signals.loc[idx]:
        long_index = idx
        next_short_idx = df.loc[idx:].index[short_signals[idx:]].min()
        bull_slice = df.loc[long_index : next_short_idx].copy()
        bull_slice['signal'] = bull_slice['Close']
        signal_values = bull_slice['signal'].values.reshape(-1, 1)
        scaled_signal_values = scaler.fit_transform(signal_values)
        scaled_signal_values_transformed = 1 - (scaled_signal_values)
        bull_slice['signal'] = scaled_signal_values_transformed.flatten()
        df.update(bull_slice)


In [4]:
data = vbt.Data.from_data(df)

features = data.run("talib", mavp=vbt.run_arg_dict(periods=14))

data.data['symbol'] = pd.concat([data.data['symbol'], features], axis=1)
data.data['symbol'].drop(['Open', 'High', 'Low', ('cosh', 'real'), ('exp', 'real'), ('sinh', 'real')], axis=1, inplace=True)
# This will drop columns from the DataFrame where all values are NaN
data.data['symbol'] = data.data['symbol'].dropna(axis=1, how='all')


data.data['symbol'] = data.data['symbol'].dropna()
predictor_list = data.data['symbol'].drop('signal', axis=1).columns.tolist()
X = data.data['symbol'][predictor_list]

y = data.data['symbol']['signal']

X.columns = X.columns.astype(str)

In [5]:
data = vbt.Data.from_data(df)
data

<vectorbtpro.data.base.Data at 0x16bfa1e10>

In [6]:
open_price = data.get('Open')
high_price = data.get('High')
low_price = data.get('Low')
close_price = data.get('Close')

In [7]:
# vbt.IF.list_indicators("vbt") 

In [8]:
# vbt.phelp(vbt.SUPERTREND.run)

In [9]:
# adx = vbt.ADX.run(high_price, low_price, close_price, window=14)
# atr = vbt.ATR.run(high_price, low_price, close_price, window=14)
# bbands = vbt.BBANDS.run(close_price, window=14)
# rsi = vbt.RSI.run(close_price)
# sma = vbt.MA.run(close_price, window=20)
# strend = vbt.SUPERTREND.run(high_price, low_price, close_price, period=7, multiplier=3)


In [10]:
# data.data['symbol']['ADX'] = adx.adx
# data.data['symbol']['ATR'] = atr.atr
# data.data['symbol']['LBBAND'] = bbands.lower
# data.data['symbol']['UBBAND'] = bbands.upper
# data.data['symbol']['RSI'] = rsi.rsi
# data.data['symbol']['SMA'] = sma.ma
# data.data['symbol']['STREND'] = strend.trend

In [11]:
from sklearn.impute import SimpleImputer

# Assuming X is your input DataFrame
# Replace inf/-inf with NaN
X.replace([np.inf, -np.inf], np.nan, inplace=True)

# Now, you can choose to impute the NaN values. Here's an example using median imputation:
imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(X)

# X_imputed is now a NumPy array. If you need it back as a DataFrame:
X_clean = pd.DataFrame(X_imputed, columns=X.columns)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.replace([np.inf, -np.inf], np.nan, inplace=True)


In [12]:
X

Unnamed: 0_level_0,Close,Volume,"('ad', 'real')","('add', 'real')","('adosc', 'real')","('adx', 'real')","('adxr', 'real')","('apo', 'real')","('aroon', 'aroondown')","('aroon', 'aroonup')",...,"('trange', 'real')","('trima', 'real')","('trix', 'real')","('tsf', 'real')","('typprice', 'real')","('ultosc', 'real')","('var', 'real')","('wclprice', 'real')","('willr', 'real')","('wma', 'real')"
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-04 20:00:00+00:00,168.630,223226,-5.542854e+05,336.920,-78226.605683,24.625521,31.268133,0.905321,0.000000,71.428571,...,2.520,169.158083,-0.068943,170.902747,168.516667,42.940558,3.005456,168.54500,-77.386197,169.718344
2022-01-04 21:00:00+00:00,168.760,111808,-5.686608e+05,337.700,-85589.728297,24.450194,31.073205,0.985705,85.714286,64.285714,...,1.400,169.188625,-0.067684,170.143736,168.820000,41.576409,1.926296,168.80500,-75.477239,169.655140
2022-01-04 22:00:00+00:00,169.140,76645,-5.213408e+05,337.570,-65764.680628,24.287390,30.768574,1.127949,78.571429,57.142857,...,1.150,169.250458,-0.066491,169.654615,168.903333,39.694054,0.721016,168.96250,-69.897210,169.627527
2022-01-04 23:00:00+00:00,168.980,73878,-4.823109e+05,337.400,-39257.226182,24.160373,30.710014,1.033269,71.428571,50.000000,...,1.060,169.324542,-0.065373,169.053846,168.793333,42.900560,0.292016,168.84000,-72.246696,169.593634
2022-01-05 00:00:00+00:00,167.830,99582,-5.412739e+05,336.560,-43605.391719,24.433595,30.193484,0.762885,64.285714,42.857143,...,1.520,169.400000,-0.064491,167.962308,168.130000,41.797605,0.206456,168.05500,-89.133627,169.488301
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-31 02:00:00+00:00,101.411,731765,6.352426e+07,204.271,-218226.323154,14.175673,16.116652,-0.363718,100.000000,35.714286,...,1.855,103.420617,-0.051720,101.954143,101.894000,40.869890,0.225366,101.77325,-94.646624,103.080923
2023-12-31 03:00:00+00:00,100.738,970135,6.329972e+07,201.894,-325294.607736,15.243094,16.575318,-0.434654,100.000000,28.571429,...,1.806,103.332183,-0.053042,101.302714,100.877333,40.527157,0.428556,100.84250,-85.996772,102.895510
2023-12-31 04:00:00+00:00,100.743,858035,6.389982e+07,200.574,-148582.728583,16.472826,16.779804,-0.514019,100.000000,21.428571,...,1.304,103.245583,-0.054629,100.723978,100.439000,46.152872,0.572653,100.51500,-79.347623,102.719424
2023-12-31 05:00:00+00:00,101.974,879783,6.428805e+07,203.065,60743.768042,16.052558,16.319158,-0.525917,92.857143,14.285714,...,2.001,103.160696,-0.056088,100.750209,101.679667,50.303404,0.573200,101.75325,-56.402610,102.630602


In [13]:
np.isfinite(X)

Unnamed: 0_level_0,Close,Volume,"('ad', 'real')","('add', 'real')","('adosc', 'real')","('adx', 'real')","('adxr', 'real')","('apo', 'real')","('aroon', 'aroondown')","('aroon', 'aroonup')",...,"('trange', 'real')","('trima', 'real')","('trix', 'real')","('tsf', 'real')","('typprice', 'real')","('ultosc', 'real')","('var', 'real')","('wclprice', 'real')","('willr', 'real')","('wma', 'real')"
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-04 20:00:00+00:00,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2022-01-04 21:00:00+00:00,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2022-01-04 22:00:00+00:00,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2022-01-04 23:00:00+00:00,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2022-01-05 00:00:00+00:00,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-31 02:00:00+00:00,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2023-12-31 03:00:00+00:00,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2023-12-31 04:00:00+00:00,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2023-12-31 05:00:00+00:00,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True


In [14]:
train_length = int(len(data.data['symbol'])*0.80)

X_train = X[:train_length]
X_test = X[train_length:]
y_train = y[:train_length]
y_test = y[train_length:]

In [15]:
# Regression tree model
from sklearn.tree import DecisionTreeRegressor
dtr = DecisionTreeRegressor(min_samples_leaf=400)

In [16]:
dtr.fit(X_train, y_train)

In [None]:
# import graphviz
# from sklearn import tree

# dot_data = tree.export_graphviz(dtr,
#                                 out_file=None,
#                                 filled=True,
#                                 feature_names=predictor_list)
# graphviz.Source(dot_data)

In [None]:
data.data['symbol']['predicted_signal'] = np.where(
    ((data.data['symbol']['ret20'] > 0.002) & (data.data['symbol']['std20'] > 0.01) & (data.data['symbol']['ret20'] <= 0.049) & (data.data['symbol']['ret40'] <= 0.031)), 1, 0)

In [None]:
# # Store the test dataset in new dataframe df_split
# df_split = data.data['symbol'][train_length:].copy()

# # Store the decision tree's predicted output to signal column of df_split dataframe
# df_split.loc[:, "signal"] = y_pred

In [None]:
signal = data.data['symbol']['predicted_signal']
entries = signal == 1
exits = signal == 0


In [None]:
pf = vbt.Portfolio.from_signals(
    close=data.data['symbol']['Close'],
    long_entries=entries, 
    long_exits=exits,
    size=100,
    size_type='value',
    init_cash='auto'
)

In [None]:
# import joblib
# def save_model(model, model_filename='trained_model.joblib'):
#     joblib.dump(model, model_filename)
#     print(f'Model saved as {model_filename}')
# save_model(clf)

In [None]:
pf.plot({"orders", "cum_returns"}, settings=dict(bm_returns=False)).show()

In [None]:
pf.stats()