In [1]:
# Verify that we are using the correct Python (/home/ata/miniconda3/envs/ml/bin/)
!which python
!which jupyter

/home/ata/miniconda3/envs/ml-2/bin/python
/home/ata/miniconda3/envs/ml-2/bin/jupyter


In [1]:
import sys
assert sys.version_info >= (3, 7)

In [2]:
from packaging import version
import tensorflow as tf

assert version.parse(tf.__version__) >= version.parse("2.8.0")

2025-01-08 15:41:08.416174: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-08 15:41:08.419008: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-08 15:41:08.425742: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736332868.440260 1538158 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736332868.444153 1538158 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-08 15:41:08.457924: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [3]:
# Import the class from the Python file (module)
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from BinanceClient import BinanceClient
import numpy as np
from typing import Final
import joblib
from BatchFeatures import BatchFeatures
from datetime import datetime, timedelta
%matplotlib widget

#### Fetch pair data

In [4]:
# Initialize Binance client with your API credentials
# dotenv_path = Path('.env-secret')
# load_dotenv(dotenv_path=dotenv_path)
api_secret = os.getenv("BINANCE_SECRET_KEY")
api_key = os.getenv("BINANCE_API_KEY")

# Create Binance client & initialize it
pair = "BTCUSDT"
time_delta = 12
db_name = pair + "_1min_" + str(time_delta) + "weeks.db"
db_name = "BTCUSDT_1min_dry_run.db"             # For dry run testing
binance_client = BinanceClient(db_name)
binance_client.set_interval("1m")
batch_feature = BatchFeatures()


#### Fecth Data from Binance

In [3]:

# Create connection to fetch data
binance_client.make(api_key, api_secret)

# Get current server time
server_time = binance_client.get_server_time()

# Compute start and end time for the last x hours
server_time_dt = datetime.fromtimestamp(server_time['serverTime'] / 1000, tz=datetime.timezone.utc if hasattr(datetime, 'timezone') else None)
end_date = server_time_dt
# start_date = server_time_dt - timedelta(hours=10)
start_date = server_time_dt - timedelta(weeks=time_delta)
start_date_str = int(start_date.timestamp() * 1000)  # Convert to milliseconds
end_date_str = int(end_date.timestamp() * 1000)      # Convert to milliseconds

# Fetch data
data = binance_client.fetch_data(pair, start_date_str, end_date_str)
binance_client.store_data_to_db(pair, data)

# Check if data is fetched
if not data.empty:
    df = data
else:
    print("No data found!!!.")

Fetching data from Binance API...


#### Fetch data from db

In [5]:
#Fetch data from db
df = binance_client.fetch_data_from_db(pair)

Feature engineering

In [6]:
# Feature Engineering (mind the order since some features are dependent on others)
bf = BatchFeatures()

# Must-have features

# EMA: Compute for short-term and long-term spans
bf.calculate_ema(df, spans=[10, 50])  # Short-term (10), Long-term (50)

# MACD: Standard MACD (12-26-9) and Fast MACD (6-13-5)
bf.calculate_macd(df, spans={'standard': (12, 26, 9), 'fast': (6, 13, 5)})

# RSI: Compute for default (14) and shorter-term (7) windows
bf.calculate_rsi(df, windows=[7, 14])

# Bollinger Bands: Compute for default 20-period with 2 standard deviations
bf.calculate_bollinger_bands(df, window=20, num_std_dev=2)

# Volume Features: Compute for default 20-period
bf.calculate_volume_features(df, windows=[20])  # Include backward-compatible volume_ratio

# Candle Features: Include optional 'candle_range' based on compatibility
bf.calculate_candle_features(df, legacy_compatibility=True)  # Default behavior for backward compatibility


# # Optionals
# bf.calculate_sma(df)
# bf.calculate_atr(df)
# bf.calculate_moving_average_crossover(df)
# bf.calculate_historical_volatility(df)
# bf.calculate_money_flow_index(df)
# bf.calculate_roc(df)
# bf.calculate_stochastic_oscillator(df)
# bf.calculate_williams_r(df)

# # Low value fatures
# bf.calculate_lagged_features(df)
# bf.calculate_on_balance_volume(df)
# bf.calculate_croc(df)

# drop NaNs
df.dropna(inplace=True)

#### Define target 

In [7]:
# Number of candles to look ahead for predictions
nn = 10

# Predict future EMA values
df['target_ema_10'] = df['ema_10'].shift(-nn)
df['target_ema_50'] = df['ema_50'].shift(-nn)

# Predict future MACD line and MACD signal
df['target_macd'] = df['macd'].shift(-nn)
df['target_macd_signal'] = df['macd_signal'].shift(-nn)

# Predict future RSI values
df['target_rsi_14'] = df['rsi_14'].shift(-nn)

# Predict future Bollinger Bands
df['target_bollinger_upper'] = df['bollinger_upper_20'].shift(-nn)
df['target_bollinger_lower'] = df['bollinger_lower_20'].shift(-nn)

# Drop rows with NaN values due to shifting
df.dropna(inplace=True)


#### Test/Train Splot

Split the df into three parts, train, validation and testing

In [26]:
from sklearn.preprocessing import StandardScaler
import joblib
import pandas as pd

# Define split ratios
train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2

# Use only the last 3 hours of data for training, validation, and testing
df_recent = df.copy().iloc[-1920*4:]  # Adjust slice as needed

# Calculate split indices
n = len(df_recent)
train_end = int(train_ratio * n)
val_end = train_end + int(val_ratio * n)

# Perform the splits
train_data = df_recent.iloc[:train_end]
val_data = df_recent.iloc[train_end:val_end]
test_data = df_recent.iloc[val_end:]

# Identify target columns (derived targets for the model)
target_columns = [
    'target_ema_10',
    'target_ema_50',
    'target_macd',
    'target_macd_signal',
    'target_rsi_14',
    'target_bollinger_upper',
    'target_bollinger_lower',
]

# Separate features (X_*) and targets (y_*)
X_train = train_data.drop(columns=target_columns)
y_train = train_data[target_columns] / 1e5 # normalize

X_valid = val_data.drop(columns=target_columns)
y_valid = val_data[target_columns] / 1e5 # normalize

X_test = test_data.drop(columns=target_columns)
y_test = test_data[target_columns] / 1e5 # normalize

# Initialize the scaler and scale only the X_* components
scaler = StandardScaler()

# Fit the scaler on the training features and transform
X_train_scaled = scaler.fit_transform(X_train)
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)

# Transform validation and test features
X_valid_scaled = scaler.transform(X_valid)
X_valid_scaled = pd.DataFrame(X_valid_scaled, columns=X_valid.columns, index=X_valid.index)

X_test_scaled = scaler.transform(X_test)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

# Save the scaler for later use
joblib.dump(scaler, 'lstm_scaler_derived_features.pkl')


['lstm_scaler_derived_features.pkl']

Randomize the time sequences

In [27]:
# Sequence length (5 hours = 300 instances for 1-minute resolution)
seq_length = 60
batch_size = 32 * 4

# Create time series datasets
tf.random.set_seed(42)  # Ensures reproducibility

# Training dataset
train_ds = tf.keras.utils.timeseries_dataset_from_array(
    data=X_train_scaled.iloc[:-seq_length].to_numpy(),  # Exclude the last 'seq_length' rows for input
    targets=y_train.iloc[seq_length:].to_numpy(),  # Shift target by 'seq_length'
    sequence_length=seq_length,
    batch_size=batch_size,
    shuffle=True,
    seed=42
)

# Validation dataset
valid_ds = tf.keras.utils.timeseries_dataset_from_array(
    data=X_valid_scaled.iloc[:-seq_length].to_numpy(),
    targets=y_valid.iloc[seq_length:].to_numpy(),  # Ensure alignment with shifted targets
    sequence_length=seq_length,
    batch_size=batch_size
)


In [28]:
def mae_per_target(y_true, y_pred):
    """
    Calculate MAE for each target individually and return the mean (scalar).
    """
    # Ensure consistent data types
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)

    # Compute absolute error per target
    abs_error = tf.abs(y_true - y_pred)

    # Calculate mean error per target
    per_target_mae = tf.reduce_mean(abs_error, axis=0)

    # Return the aggregated mean for compatibility with Keras
    return tf.reduce_mean(per_target_mae)


In [24]:
def custom_mvar_loss(y_true, y_pred):
    """
    Custom loss function for MVAR: Computes MAE per target and averages them.
    """
    # Ensure both y_true and y_pred are the same data type
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)

    # Compute absolute errors for all targets
    abs_error = tf.abs(y_true - y_pred)

    # Take mean across the batch dimension for each target
    mae_per_target = tf.reduce_mean(abs_error, axis=0)

    # Aggregate by averaging across targets
    loss = tf.reduce_mean(mae_per_target)
    return loss


In [22]:
def fit_and_evaluate_mvar(model, train_set, valid_set, learning_rate, epochs=500):
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss", patience=50, restore_best_weights=True
    )
    opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(
        loss=custom_mvar_loss,
        optimizer=opt,
        metrics=[mae_per_target],  # Collective MAE across all targets
    )

    # Train the model
    history = model.fit(
        train_set, validation_data=valid_set, epochs=epochs, callbacks=[early_stopping_cb]
    )

    # Evaluate the model
    results = model.evaluate(valid_set, return_dict=True)

    # Extract predictions and calculate per-target MAE manually
    y_true = []
    y_pred = []
    for x_batch, y_batch in valid_set:
        y_true.append(y_batch.numpy())
        y_pred.append(model.predict(x_batch))

    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    per_target_mae = np.mean(np.abs(y_true - y_pred), axis=0)
    per_target_metrics = {f"target_{i}_mae": mae for i, mae in enumerate(per_target_mae)}

    return per_target_metrics


Multivar LSTM

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, activation="relu", return_sequences=True, input_shape=[None, X_train_scaled.shape[1]]),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(64, activation="relu"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(len(target_columns))  # Number of output neurons matches the number of target variables
])


  super().__init__(**kwargs)


In [29]:
# Fit and evaluate the model
metrics = fit_and_evaluate_mvar(model, train_ds, valid_ds, learning_rate=0.001, epochs=500)

# Display per-target MAE
for target, mae in metrics.items():
    print(f"{target}: {mae:.4f}")

Epoch 1/500


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 52ms/step - loss: 15865.4707 - mae_per_target: 15858.1055 - val_loss: 1011.1753 - val_mae_per_target: 1076.2124
Epoch 2/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - loss: 553.8893 - mae_per_target: 553.5843 - val_loss: 287.3329 - val_mae_per_target: 301.6217
Epoch 3/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - loss: 1653.8323 - mae_per_target: 1652.7106 - val_loss: 1204.1383 - val_mae_per_target: 1239.8429
Epoch 4/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - loss: 1732.1566 - mae_per_target: 1730.9668 - val_loss: 536.7826 - val_mae_per_target: 572.3749
Epoch 5/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step - loss: 9234.9287 - mae_per_target: 9219.1045 - val_loss: 755.4918 - val_mae_per_target: 798.2376
Epoch 6/500
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - 

2025-01-08 16:01:26.217885: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Save the trtained model

In [125]:
# Save the model
joblib.dump(model, 'lstm_multivar_10candles_1min.joblib')

['lstm_10candles_1min.joblib']

Load previously trained model

In [16]:
# load the best model
model = joblib.load('lstm_multivar_10candles_1min.joblib')

In [30]:
# Iterate through each target column in y_test
for target in y_test.columns:
    print(f"Statistics for {target}:")
    print(f"  Absolute Mean: {y_test[target].abs().mean():.4f}")
    print(f"  Standard Deviation: {y_test[target].std():.4f}")
    print(f"  Minimum Value: {y_test[target].min():.4f}")
    print(f"  Maximum Value: {y_test[target].max():.4f}")
    print("-" * 40)


Statistics for target_ema_10:
  Absolute Mean: 0.9504
  Standard Deviation: 0.0151
  Minimum Value: 0.9182
  Maximum Value: 0.9719
----------------------------------------
Statistics for target_ema_50:
  Absolute Mean: 0.9502
  Standard Deviation: 0.0147
  Minimum Value: 0.9223
  Maximum Value: 0.9710
----------------------------------------
Statistics for target_macd:
  Absolute Mean: 0.0006
  Standard Deviation: 0.0015
  Minimum Value: -0.0021
  Maximum Value: 0.0132
----------------------------------------
Statistics for target_macd_signal:
  Absolute Mean: 0.0006
  Standard Deviation: 0.0014
  Minimum Value: -0.0018
  Maximum Value: 0.0116
----------------------------------------
Statistics for target_rsi_14:
  Absolute Mean: 0.0005
  Standard Deviation: 0.0002
  Minimum Value: 0.0000
  Maximum Value: 0.0010
----------------------------------------
Statistics for target_bollinger_upper:
  Absolute Mean: 0.9526
  Standard Deviation: 0.0156
  Minimum Value: 0.9229
  Maximum Value: 1.

In [14]:
INITIAL_BALANCE=1000

In [33]:
# Generate sequences for LSTM input
X_test_copy = X_test_scaled.copy()
y_test_copy = y_test.copy()
X_test_list = []
y_test_list = []

# Create sequences for test data
for i in range(seq_length, len(X_test_copy)):
    X_test_list.append(X_test_copy.iloc[i - seq_length:i].to_numpy())  # Create sequence
    y_test_list.append(y_test_copy.iloc[i])

X_test_list = np.array(X_test_list)
y_test_list = np.array(y_test_list)

# Generate predictions
predictions_scaled = model.predict(X_test_list)  # Predictions are in scaled space

# Unscale predictions and actual values
X_test_reversed = scaler.inverse_transform(X_test_scaled)  # Reverse scale inputs if needed
predictions_unscaled = predictions_scaled * 1e5  # Assuming y_* scaling was 1e5
actuals = y_test_list * 1e5

# Ensure lengths match
results_length = len(X_test_copy) - seq_length
predictions = predictions[:results_length]  # Adjust predictions to match available rows
actuals = actuals[:results_length]  # Adjust actuals similarly

# Convert predictions and actuals to a DataFrame for analysis
results_df = X_test.iloc[seq_length:seq_length + results_length].copy()  # Align indices with available rows
for i, target_name in enumerate(y_test.columns):
    results_df[f"Predicted {target_name}"] = predictions[:, i]
    results_df[f"Actual {target_name}"] = actuals[:, i]

# Display or save the results
print(results_df.head())

# Optional: Save the results to a CSV file for detailed analysis
results_df.to_csv("predictions_vs_actuals.csv", index=False)


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
                         open      high       low     close   volume  \
timestamp                                                              
2024-12-30 03:20:00  93970.01  94023.30  93966.66  94023.30  4.86591   
2024-12-30 03:21:00  94023.30  94023.30  93964.42  93964.42  4.72563   
2024-12-30 03:22:00  93964.43  93964.43  93896.58  93952.52  5.19245   
2024-12-30 03:23:00  93952.51  93955.32  93860.62  93860.63  4.21085   
2024-12-30 03:24:00  93860.62  93860.63  93824.61  93851.09  3.45070   

                           ema_10        ema_50       macd  macd_signal  \
timestamp                                                                 
2024-12-30 03:20:00  94002.906588  93858.902875  56.272919    69.077061   
2024-12-30 03:21:00  93995.909026  93863.040802  49.398179    65.141284   
2024-12-30 03:22:00  93988.020112  93866.549790  42.499757    60.612979   
2024-12-30 03:23:00  93964.858274  93866.317641

In [None]:
results_df.to_csv('results_df.csv', index=False)

In [None]:
# results_df['Actual Future Price'] = results_df['close']  # Actual future close price

plt.clf()
plt.figure(figsize=(12, 7))
# results_df['Predicted Price'].plot()
# results_df['close'].plot()
results_df['Predicted Change'].plot()
results_df['Actual Change'].plot()
plt.legend()
plt.grid()
plt.show()

In [48]:
predict_ch_min = results_df["Predicted Change"].min()
predict_ch_max = results_df["Predicted Change"].max()
print(f'Min Change: {predict_ch_min}')
print(f'Max Change: {predict_ch_max}')

Min Change: -0.5142806172370911
Max Change: 0.9654271602630615


In [69]:
def simulate_trades(signals, prices, initial_balance=1000):
    """
    Simulates trades based on signals and actual price changes.

    Args:
        actual_changes (list or pd.Series): Actual percentage changes (not predicted).
        signals (list): List of trading signals ("Buy", "Sell", "Hold").
        prices (list or pd.Series): Actual price values for the asset.
        initial_balance (float): Starting balance of the trading account.

    Returns:
        float: Final balance or cumulative profit.
    """
    balance = INITIAL_BALANCE
    position = 0  # Tracks the number of stocks held
    entry_price = None  # Store the price when a "Buy" was executed

    for i, signal in enumerate(signals):
        if signal == 1 and balance > 0:
            # Execute a buy
            entry_price = prices.iloc[i]  # Use .iloc for positional indexing
            position = balance / entry_price  # Buy with all available balance
            balance = 0  # All balance used to buy
        elif signal == -1 and position > 0:
            # Execute a sell
            exit_price = prices.iloc[i]  # Use .iloc for positional indexing
            balance = position * exit_price  # Convert position to cash
            position = 0  # Clear position
            entry_price = None  # Reset entry price after selling

    # If there's a remaining position at the end, calculate its value
    if position > 0 and entry_price is not None:
        balance += position * prices.iloc[-1]  # Use .iloc for positional indexing

    return balance - INITIAL_BALANCE  # Return cumulative profit


In [None]:
plt.clf()
results_df["Predicted Change"].plot()
plt.show()

In [112]:
# Define buy and sell thresholds (e.g., absolute differences in predicted vs. actual price)
sell_thresholds = np.arange(predict_ch_min, -0.0001, 0.005)  # Thresholds for when to "Sell"; Sell when price is predicted to go down
buy_thresholds = np.arange(0.0001, predict_ch_max, 0.005)   # Thresholds for when to "Buy"; buy when price is predicted to go up

best_buy_threshold = None
best_sell_threshold = None
best_performance = -np.inf

ii = 0
jj = 0
performance = np.zeros((len(buy_thresholds), len(sell_thresholds)))
# -1 = Sell
# 0 = Hold
# 1 = Buy
for buy_th in buy_thresholds:
    for sell_th in sell_thresholds:
        # Generate signals
        trading_signals = [
            -1 if pred < sell_th else 1 if pred > buy_th else 0
            for pred in results_df['Predicted Change']
        ]

        # Simulate trades and calculate performance
        performance[ii, jj] = simulate_trades(
            signals=trading_signals,
            prices=results_df['close'],  # Use the computed predicted prices
            initial_balance=INITIAL_BALANCE
        )
        # Update best thresholds if current performance is better
        if performance[ii, jj] > best_performance:
            best_performance = performance[ii, jj]
            best_buy_threshold = buy_th
            best_sell_threshold = sell_th
        jj += 1
    ii += 1
    jj = 0
print(f"Best Buy Threshold: {best_buy_threshold}, Best Sell Threshold: {best_sell_threshold}")
print(f"Best Performance: {best_performance}")


Best Buy Threshold: 0.3051, Best Sell Threshold: -0.054280617237090656
Best Performance: 11.989245237638102


#### Beck Testing 

In [124]:
# Define buy & sell thrsholds
buy_threshold = best_buy_threshold
sell_threshold = best_sell_threshold
# Generate signals with reversed logic
trading_signals = [
    -1 if pred < sell_threshold else 1 if pred > buy_threshold else 0
    for pred in results_df['Predicted Change']
]

balance = INITIAL_BALANCE
position = 0  # No stock initially
trading_log = []  # To store completed trades



results_df['Signal'] = trading_signals

# Add 'open' and 'close' prices from the original DataFrame to results DataFrame
# results_df['open'] = df.loc[results_df.index, 'open']
# results_df['close'] = df.loc[results_df.index, 'close']

results_df.dropna(inplace=True)

# Variables to track ongoing trades
buy_price = None
buy_date = None
buy_volume = None

# Iterate over results_df for backtesting
for index, row in results_df.iterrows():
    signal = row['Signal']
    price = row['close']  # Use 'open' price for Buy

    if signal == 1 and balance > 0:
        # Record Buy details
        buy_price = price
        buy_date = index
        buy_volume = balance / price
        position = buy_volume  # Update position
        balance = 0  # All money is invested

    elif signal == -1 and position > 0:
        # Calculate profit/loss for the completed trade
        sell_price = price  # Use 'close' price for Sell
        profit_loss = (sell_price - buy_price) * buy_volume
        balance = sell_price * buy_volume  # Update balance after selling
        position = 0  # No stock left

        # Record the completed trade in the log
        trading_log.append({
            "Buy Date": buy_date,
            "Buy Price": buy_price,
            "Buy Volume": buy_volume,
            "Sell Date": index,
            "Sell Price": sell_price,
            "Profit/Loss": profit_loss
        })

        # Reset Buy details
        buy_price = None
        buy_date = None
        buy_volume = None

# Final portfolio value
if position > 0:
    final_price = results_df.iloc[-1]['Predicted Price']
    final_profit_loss = (final_price - buy_price) * buy_volume
    balance = final_price * buy_volume  # Update balance with remaining shares
    trading_log.append({
        "Buy Date": buy_date,
        "Buy Price": buy_price,
        "Buy Volume": buy_volume,
        "Sell Date": results_df.index[-1],
        "Sell Price": final_price,
        "Profit/Loss": final_profit_loss
    })

# Convert trading log to a DataFrame for better analysis
trading_log_df = pd.DataFrame(trading_log)

# Print the final results
print(f"Final Balance: ${balance:.2f}")
print(f"Net Profit: ${balance - INITIAL_BALANCE:.2f}")



Final Balance: $1016.15
Net Profit: $16.15


In [None]:
# Plot the predicted prices
plt.clf()
plt.figure(figsize=(14, 7))
plt.plot(results_df.index, results_df['Predicted Change'], label='Predicted Change', color='blue', alpha=0.7)
plt.plot(results_df.index, y_test/100, label='Actual Change', color='red', alpha=0.7)


# Use trading_log_df for Buy and Sell points
buy_signals = trading_log_df.dropna(subset=['Buy Date'])
sell_signals = trading_log_df.dropna(subset=['Sell Date'])

# Map Buy/Sell signals to values from results_df['close']
buy_close_prices = [results_df.loc[row['Buy Date'], 'Predicted Change'] for _, row in buy_signals.iterrows()]
sell_close_prices = [results_df.loc[row['Sell Date'], 'Predicted Change'] for _, row in sell_signals.iterrows()]

# Plot Buy signals as green squares at actual 'close' prices
plt.scatter(
    buy_signals['Buy Date'],
    buy_close_prices,
    label='Buy Signal',
    color='green',
    marker='s',
    alpha=0.6
)

# Plot Sell signals as red circles at actual 'close' prices
plt.scatter(
    sell_signals['Sell Date'],
    sell_close_prices,
    label='Sell Signal',
    color='red',
    marker='o',
    alpha=0.6
)

# Add labels, title, legend, and grid
plt.title("Trading Signals Over Predicted Prices (Using Actual Close Prices)")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.grid(True)
plt.show()


In [27]:
# Save the model
joblib.dump(best_model, 'best_model_10candles_1min.joblib')

['best_model_10candles_1min.joblib']