In [1]:
from sklearn.preprocessing import StandardScaler

from influxdb.commands import InfluxDBQueries, DataProcessor
import warnings
import pandas as pd
import tensorflow as tf

tf.config.set_visible_devices([], 'GPU')

warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

In [2]:
df_train = InfluxDBQueries.get_data_from_influx(groupby_time='5m', pair='BTCUSDT', start="2024-01-01T01:00:00Z",
                                                stop="2025-01-01T01:00:00Z")
df_val = InfluxDBQueries.get_data_from_influx(groupby_time='5m', pair='BTCUSDT', start="2025-01-01T01:00:00Z",
                                              stop="2025-03-01T01:00:00Z")
df_test = InfluxDBQueries.get_data_from_influx(groupby_time='5m', pair='BTCUSDT', start="2025-03-01T01:00:00Z",
                                               stop="2025-04-01T01:00:00Z")

In [3]:
df_combined = pd.concat([df_train, df_test, df_val])

In [4]:
df_combined, scaler, columns, scaler_y = DataProcessor.add_features(df_combined)

DatetimeIndex(['2024-01-01 01:15:00+00:00', '2024-01-01 01:20:00+00:00',
               '2024-01-01 01:25:00+00:00', '2024-01-01 01:30:00+00:00',
               '2024-01-01 01:35:00+00:00', '2024-01-01 01:40:00+00:00',
               '2024-01-01 01:45:00+00:00', '2024-01-01 01:50:00+00:00',
               '2024-01-01 01:55:00+00:00', '2024-01-01 02:00:00+00:00',
               ...
               '2025-04-01 00:15:00+00:00', '2025-04-01 00:20:00+00:00',
               '2025-04-01 00:25:00+00:00', '2025-04-01 00:30:00+00:00',
               '2025-04-01 00:35:00+00:00', '2025-04-01 00:40:00+00:00',
               '2025-04-01 00:45:00+00:00', '2025-04-01 00:50:00+00:00',
               '2025-04-01 00:55:00+00:00', '2025-04-01 01:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='time', length=131265, freq=None)
DatetimeIndex(['2024-01-01 01:15:00+00:00', '2024-01-01 01:20:00+00:00',
               '2024-01-01 01:25:00+00:00', '2024-01-01 01:30:00+00:00',
               '2024-0

In [5]:
df_train = df_combined.loc[df_combined.index.strftime('%Y-%m-%dT%H:%M:%SZ') < df_val.index[0].strftime('%Y-%m-%dT%H:%M:%SZ')]
df_val = df_combined.loc[(df_combined.index.strftime('%Y-%m-%dT%H:%M:%SZ') >= df_val.index[0].strftime('%Y-%m-%dT%H:%M:%SZ')) &
                         (df_combined.index.strftime('%Y-%m-%dT%H:%M:%SZ') < df_test.index[0].strftime('%Y-%m-%dT%H:%M:%SZ'))]
df_test = df_combined.loc[df_combined.index.strftime('%Y-%m-%dT%H:%M:%SZ') >= df_test.index[0].strftime('%Y-%m-%dT%H:%M:%SZ')]

In [6]:
df_train['target']

time
2024-01-02 01:10:00+00:00         NaN
2024-01-02 01:15:00+00:00         NaN
2024-01-02 01:20:00+00:00         NaN
2024-01-02 01:25:00+00:00         NaN
2024-01-02 01:30:00+00:00         NaN
                               ...   
2025-01-01 00:40:00+00:00   -0.795257
2025-01-01 00:45:00+00:00   -0.704135
2025-01-01 00:50:00+00:00   -0.774324
2025-01-01 00:55:00+00:00   -1.094592
2025-01-01 01:00:00+00:00   -1.473444
Name: target, Length: 104914, dtype: float64

In [7]:
df_train = df_train.dropna()
df_test = df_test.dropna()
df_val = df_val.dropna()

In [8]:
X_train, y_train= DataProcessor.make_sequences(df_train, columns_to_select=columns, window_size=288, forecast_horizon=1)

In [9]:
X_val, y_val= DataProcessor.make_sequences(df_val, columns_to_select=columns, window_size=288, forecast_horizon=1)

In [10]:
from influxdb.commands import LSTMModel

In [11]:
X_train.shape

(104613, 288, 25)

Based on the previous cells in the notebook, I can see that `X_train.shape` would reveal the correct shape for the input. Looking at how the data is processed, I need to match the shape with the columns used in the LSTM model.



In [12]:
lstm_forecaster = LSTMModel(input_shape=(288, len(columns)), output_steps=1, lstm_units=32)

In [None]:
lstm_forecaster.fit(X_train, y_train, X_val,y_val, epochs=200, batch_size=128, patience=10)

Training started with EarlyStopping (patience=10)...
Epoch 1/200
[1m818/818[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 543ms/step - directional_accuracy: 0.5007 - loss: 1.2818 - mse: 0.6182
Epoch 1: val_loss improved from inf to 0.80933, saving model to models/best_model.h5




[1m818/818[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m474s[0m 577ms/step - directional_accuracy: 0.5007 - loss: 1.2812 - mse: 0.6179 - val_directional_accuracy: 0.5299 - val_loss: 0.8093 - val_mse: 0.1938 - learning_rate: 0.0010
Epoch 2/200
[1m818/818[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 555ms/step - directional_accuracy: 0.5010 - loss: 0.4062 - mse: 0.1557
Epoch 2: val_loss improved from 0.80933 to 0.61506, saving model to models/best_model.h5




[1m818/818[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m477s[0m 583ms/step - directional_accuracy: 0.5010 - loss: 0.4062 - mse: 0.1557 - val_directional_accuracy: 0.5298 - val_loss: 0.6151 - val_mse: 0.1358 - learning_rate: 0.0010
Epoch 3/200
[1m818/818[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 561ms/step - directional_accuracy: 0.5012 - loss: 0.3433 - mse: 0.1271
Epoch 3: val_loss did not improve from 0.61506
[1m818/818[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m482s[0m 589ms/step - directional_accuracy: 0.5012 - loss: 0.3433 - mse: 0.1271 - val_directional_accuracy: 0.5293 - val_loss: 0.6189 - val_mse: 0.1443 - learning_rate: 0.0010
Epoch 4/200
[1m818/818[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 579ms/step - directional_accuracy: 0.5011 - loss: 0.3222 - mse: 0.1123
Epoch 4: val_loss improved from 0.61506 to 0.53092, saving model to models/best_model.h5




[1m818/818[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m496s[0m 607ms/step - directional_accuracy: 0.5011 - loss: 0.3222 - mse: 0.1123 - val_directional_accuracy: 0.5293 - val_loss: 0.5309 - val_mse: 0.1094 - learning_rate: 0.0010
Epoch 5/200
[1m523/818[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m2:44[0m 557ms/step - directional_accuracy: 0.5012 - loss: 0.3049 - mse: 0.1083

In [34]:
X_test, y_test = DataProcessor.make_sequences(df_test, columns_to_select=columns, window_size=288, forecast_horizon=1)

In [35]:
pred = lstm_forecaster.predict(X_test)

[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 68ms/step


In [36]:
lstm_forecaster.model.evaluate(X_test, y_test)

[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - directional_accuracy: 0.4861 - loss: 0.0032 - mse: 2.2232e-05


[0.003195805475115776, 2.2317519324133173e-05, 0.4799107015132904]

In [None]:
pred[-1]

In [None]:
df_test_pred = df_test

In [None]:
df_test_pred = df_test.iloc[:12]

In [None]:
df_test_pred['close'] = pred[0]

In [None]:
df_test_pred[columns] = scaler.inverse_transform(df_test_pred[columns])

In [None]:
df_test_pred

In [None]:
df_test[columns] = scaler.inverse_transform(df_test[columns])

In [None]:
df_test

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(df_test.iloc[:12]['close'], label='Actual')
plt.plot(df_test_pred['close'], label='Predicted')
plt.legend()