<a href="https://colab.research.google.com/github/ifeLight/ml-bot/blob/main/binance-multi-timeframe-grade.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install pandas-ta
!pip install backtrader[plotting]
!pip install plotly
!pip install --upgrade firebase-admin



In [3]:
from google.colab import auth
auth.authenticate_user()
#Configure Google cloud project
project_id = 'ifelight'
!gcloud config set project {project_id}

Updated property [core/project].


In [82]:
import numpy as np
import pandas as pd
import datetime
import requests
import json
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import backtrader as bt
import pandas_ta as ta
from imblearn.over_sampling import SMOTE
import subprocess
import firebase_admin
from firebase_admin import firestore

In [5]:
# Working GCP Bucket name
bucket_name = 'ife-storage'

In [6]:
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects

binance_base_url = 'https://52on3577u3.execute-api.eu-central-1.amazonaws.com'

def get_binance_candles(symbol: str, interval='1h', limit: int = 50, **kwargs):
    url = f'{binance_base_url}/api/v3/uiKlines?symbol={symbol}&interval={interval}&limit={limit}'
    for key, value in kwargs.items():
        url += f'&{key}={value}'
    response = requests.get(url)
    result = json.loads(response.text)
    # print(result)
    def map_result(x):
        return {
            'Date': x[0],
            'Open': x[1],
            'High': x[2],
            'Low': x[3],
            'Close': x[4],
            'Volume': x[5],
        }
    mappeded_result = []
    for x in result:
        mappeded_result.append(map_result(x))
    return mappeded_result


def candles_to_df(data):
    df =  pd.DataFrame(data)
    df['Date'] = pd.to_datetime(df['Date'], unit='ms')
    df['Open'] = df['Open'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Close'] = df['Close'].astype(float)
    df['Volume'] = df['Volume'].astype(float)
    df.set_index('Date', inplace=True)
    return df

def get_all_binance_candles(symbol: str, interval='1h', start_date=None, end_date=None, limit=1000):
  try:
    return load_candles_from_cloud_storage(symbol, interval, start_date, end_date)
  except FileNotFoundError:
    pass
  result = []
  raw_start_date = start_date
  raw_end_date = end_date
  start_date = pd.to_datetime(start_date) if start_date else pd.to_datetime('2015-01-01')
  end_date = pd.to_datetime(end_date) if end_date else pd.to_datetime('today')
  while True:
    candles = get_binance_candles(symbol, interval, limit, startTime=int(start_date.timestamp() * 1000), endTime=int(end_date.timestamp() * 1000))
    if len(candles) <= 1:
      break;
    result += candles
    start_date = pd.to_datetime(datetime.datetime.fromtimestamp(candles[-1]['Date'] / 1000))
  candles_df = candles_to_df(result)
  save_candles_to_cloud_storage(candles_df, symbol, interval, raw_start_date, raw_end_date)
  return candles_df

def candles_storage_file_name(symbol: str, interval='1h', start_date=None, end_date=None):
  file_name = f'binance_{symbol}_{interval}_{start_date}_{end_date}.csv'
  return file_name

def load_candles_from_cloud_storage(symbol: str, interval: str, start_date=None, end_date=None):
  file_name = candles_storage_file_name(symbol, interval, start_date, end_date)
  try:
    # Download the file from cloud storage.
    subprocess.run(['gsutil', 'cp', f'gs://{bucket_name}/trade/candles/{file_name}', f'/tmp/{file_name}'], check=True)

    # Load the data into a Pandas DataFrame.
    with open(f'/tmp/{file_name}', 'r') as f:
      return pd.read_csv(f, index_col=0, parse_dates=True)
  except subprocess.CalledProcessError:
    # Raise a FileNotFoundError if the file is not found in cloud storage.
    raise FileNotFoundError(f"File not found: gs://{bucket_name}/trade/candles/{file_name}")

def save_candles_to_cloud_storage(df: pd.DataFrame, symbol: str, interval: str, start_date, end_date):
  file_name = candles_storage_file_name(symbol, interval, start_date, end_date)
  df.to_csv(f'/tmp/{file_name}')
  !gsutil cp /tmp/{file_name} gs://{bucket_name}/trade/candles/{file_name}


In [7]:
timeframe = '15m'
symbol = 'BTCUSDT'
exchange = 'binance'
start_date = '01-01-2024'
end_date = '28-02-2025'
raw_data = get_all_binance_candles(symbol, timeframe, start_date, end_date)
raw_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01 00:00:00,42283.58,42488.09,42261.02,42488.00,431.71082
2024-01-01 00:15:00,42488.00,42554.57,42412.02,42419.73,392.24889
2024-01-01 00:30:00,42419.73,42447.82,42354.19,42441.32,319.90644
2024-01-01 00:45:00,42441.32,42490.74,42422.45,42475.23,127.81493
2024-01-01 01:00:00,42475.23,42475.23,42431.65,42466.33,188.76099
...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222


# New Section

In [8]:
data = raw_data.copy()
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01 00:00:00,42283.58,42488.09,42261.02,42488.00,431.71082
2024-01-01 00:15:00,42488.00,42554.57,42412.02,42419.73,392.24889
2024-01-01 00:30:00,42419.73,42447.82,42354.19,42441.32,319.90644
2024-01-01 00:45:00,42441.32,42490.74,42422.45,42475.23,127.81493
2024-01-01 01:00:00,42475.23,42475.23,42431.65,42466.33,188.76099
...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222


In [9]:
def add_pivots(df, window=5):
    """
    Calculate the probability of price going up and down based on higher and lower pivots.
    """
    df['Higher_Pivot'] = df['High'].rolling(window=2 * window + 1, center=True).apply(
        lambda x: 1 if x.iloc[window] == x.max() else 0, raw=False
    )
    df['Lower_Pivot'] = df['Low'].rolling(window=2 * window + 1, center=True).apply(
        lambda x: 1 if x.iloc[window] == x.min() else 0, raw=False
    )

    # Step 2: Ensure no two successive pivots of the same type
    pivot_type = None  # Tracks the type of the last pivot
    last_pivot_index = None  # Tracks the index of the last pivot

    # Remove duplicated index
    df = df[~df.index.duplicated(keep='first')]

    for i in range(len(df.index)):
      if df.loc[df.index[i], 'Higher_Pivot'].item() == 1:
          if pivot_type == 'higher':
              # Compare current higher pivot with the last higher pivot
              if df.loc[df.index[i], 'High'].item() > df.loc[df.index[last_pivot_index], 'High'].item():
                  # Remove the last higher pivot
                  df.loc[df.index[last_pivot_index], 'Higher_Pivot'] = 0
                  # Update the last pivot
                  last_pivot_index = i
              else:
                  # Remove the current higher pivot
                  df.loc[df.index[i], 'Higher_Pivot'] = 0
          else:
              # Update pivot type and index
              pivot_type = 'higher'
              last_pivot_index = i

      elif df.loc[df.index[i], 'Lower_Pivot'].item() == 1:
          if pivot_type == 'lower':
              # Compare current lower pivot with the last lower pivot
              if df.loc[df.index[i], 'Low'].item() < df.loc[df.index[last_pivot_index], 'Low'].item():
                  # Remove the last lower pivot
                  df.loc[df.index[last_pivot_index], 'Lower_Pivot'] = 0
                  # Update the last pivot
                  last_pivot_index = i
              else:
                  # Remove the current lower pivot
                  df.loc[df.index[i], 'Lower_Pivot'] = 0
          else:
              # Update pivot type and index
              pivot_type = 'lower'
              last_pivot_index = i
    return df

In [10]:
data = add_pivots(data, 10)

In [11]:
def plot_pivots(df):
  candlestick = go.Candlestick(
      x=df.index,
      open=df['Open'],
      high=df['High'],
      low=df['Low'],
      close=df['Close'],
      name='Candlestick'
  )

  # Step 3: Add markers for higher and lower pivots
  higher_pivots_df = df[df['Higher_Pivot'] == 1]
  lower_pivots_df = df[df['Lower_Pivot'] == 1]

  higher_pivots = go.Scatter(
      x=higher_pivots_df.index,
      y=higher_pivots_df['High'],
      mode='markers',
      marker=dict(color='red', size=10, symbol='triangle-down'),
      name='Higher Pivot'
  )

  lower_pivots = go.Scatter(
      x=lower_pivots_df.index,
      y=lower_pivots_df['Low'],
      mode='markers',
      marker=dict(color='green', size=10, symbol='triangle-up'),
      name='Lower Pivot'
  )

  # Step 4: Create the figure
  fig = go.Figure(data=[candlestick, higher_pivots, lower_pivots])

  # Update layout
  fig.update_layout(
      title='Candlestick Chart with Higher and Lower Pivots',
      xaxis_title='Date',
      yaxis_title='Price',
      xaxis_rangeslider_visible=False,
      template='plotly_dark'
  )

  # Show the plot
  fig.show()

In [12]:
plot_pivots(data[:1000])

In [13]:
data['Pivot'] = np.where(data['Higher_Pivot'] == 1, -1, np.where(data['Lower_Pivot'] == 1, 1, 0))
del data['Higher_Pivot']
del data['Lower_Pivot']
data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-01 00:00:00,42283.58,42488.09,42261.02,42488.00,431.71082,0
2024-01-01 00:15:00,42488.00,42554.57,42412.02,42419.73,392.24889,0
2024-01-01 00:30:00,42419.73,42447.82,42354.19,42441.32,319.90644,0
2024-01-01 00:45:00,42441.32,42490.74,42422.45,42475.23,127.81493,0
2024-01-01 01:00:00,42475.23,42475.23,42431.65,42466.33,188.76099,0
...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682,0
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437,0
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427,0
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222,0


In [14]:
def calculate_pivot_proximity(df):
    """
    Calculates the pivot proximity using a loop-based approach, finding the closest
    previous and next pivots without generating intermediate lists of all pivots.

    Args:
        df (pd.DataFrame): DataFrame with 'Pivot' and 'Close' columns.

    Returns:
        pd.DataFrame: DataFrame with 'Pivot_Proximity' column added.
    """
    df = df.copy()

    pivot_values = df['Pivot'].values
    close_values = df['Close'].values
    proximity_values = [0.0] * len(df)

    for i in range(len(df)):
      pivot = pivot_values[i]
      if pivot == -1:
        proximity_values[i] = -1.0
      elif pivot == 1:
        proximity_values[i] = 1.0
      else:
        closest_previous_pivot_index = None
        for j in range(i - 1, -1, -1):
          if pivot_values[j] != 0:
            closest_previous_pivot_index = j
            break

        if closest_previous_pivot_index is not None:
          closest_previous_pivot_value = pivot_values[closest_previous_pivot_index]
          closest_previous_close = close_values[closest_previous_pivot_index]

          closest_next_pivot_index = None
          if closest_previous_pivot_value == -1:
            for j in range(i + 1, len(df)):
              if pivot_values[j] == 1:
                closest_next_pivot_index = j
                break
          else:
            for j in range(i + 1, len(df)):
              if pivot_values[j] == -1:
                closest_next_pivot_index = j
                break

          if closest_next_pivot_index is not None:
            closest_next_close = close_values[closest_next_pivot_index]

            distance_to_previous = abs(close_values[i] - closest_previous_close)
            distance_to_next = abs(close_values[i] - closest_next_close)

            if distance_to_previous + distance_to_next != 0:
              if closest_previous_pivot_value == -1:
                proximity_values[i] = (distance_to_previous - distance_to_next) / (distance_to_previous + distance_to_next)
              else:
                proximity_values[i] = (distance_to_next - distance_to_previous) / (distance_to_previous + distance_to_next)

    df['Pivot_Proximity'] = proximity_values
    return df

In [15]:
data = calculate_pivot_proximity(data)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-01 00:00:00,42283.58,42488.09,42261.02,42488.00,431.71082,0,0.0
2024-01-01 00:15:00,42488.00,42554.57,42412.02,42419.73,392.24889,0,0.0
2024-01-01 00:30:00,42419.73,42447.82,42354.19,42441.32,319.90644,0,0.0
2024-01-01 00:45:00,42441.32,42490.74,42422.45,42475.23,127.81493,0,0.0
2024-01-01 01:00:00,42475.23,42475.23,42431.65,42466.33,188.76099,0,0.0
...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682,0,0.0
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437,0,0.0
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427,0,0.0
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222,0,0.0


In [16]:
def plot_candlestick_with_proximity(df, col='Pivot_Proximity', limit=0.0):
    # Create the candlestick trace
    candlestick = go.Candlestick(
        x=df.index,
        open=df['Open'],
        high=df['High'],
        low=df['Low'],
        close=df['Close'],
        name='Candlesticks',
        increasing=dict(line=dict(color='green'), fillcolor='green'),
        decreasing=dict(line=dict(color='red'), fillcolor='red'),
    )

    # Create a bar trace for Pivot_Proximity coloring
    # Create a color list based on Pivot_Proximity
    colors = []
    for proximity in df[col]:
        if proximity > (0 + limit):
            colors.append('green')  # Positive proximity (closer to lower pivot)
        elif proximity < (0 + (-1 * limit)):
            colors.append('red')    # Negative proximity (closer to higher pivot)
        else:
            colors.append('gray')   # Neutral (middle or no pivot)
    proximity_trace = go.Bar(
        x=df.index,
        y=df['Close'],
        marker=dict(color=colors),
        opacity=0.3,
        name=col,
        yaxis='y2'
    )

    # Create pivot indicators
    higher_pivots_df = df[df[col] == -1]
    lower_pivots_df = df[df[col] == 1]

    higher_pivots = go.Scatter(
        x=higher_pivots_df.index,
        y=higher_pivots_df['High'],
        mode='markers',
        marker=dict(color='red', size=10, symbol='triangle-down'),
        name='Higher Pivot'
    )

    lower_pivots = go.Scatter(
        x=lower_pivots_df.index,
        y=lower_pivots_df['Low'],
        mode='markers',
        marker=dict(color='green', size=10, symbol='triangle-up'),
        name='Lower Pivot'
    )

    # Create the figure
    fig = go.Figure(data=[
        candlestick,
        higher_pivots,
        lower_pivots,
        proximity_trace
        ])

    # Update layout
    fig.update_layout(
        title='Candlestick Chart with Pivot Proximity',
        xaxis_title='Date',
        yaxis_title='Price',
        yaxis2=dict(title='Pivot Proximity', overlaying='y', side='right'),
        template='plotly_dark',
        showlegend=True,
        xaxis_rangeslider_visible=False,
    )

    # Show the figure
    fig.show()

In [17]:
plot_candlestick_with_proximity(data[0:500], 'Pivot_Proximity', 0.7)

In [18]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-01 00:00:00,42283.58,42488.09,42261.02,42488.00,431.71082,0,0.0
2024-01-01 00:15:00,42488.00,42554.57,42412.02,42419.73,392.24889,0,0.0
2024-01-01 00:30:00,42419.73,42447.82,42354.19,42441.32,319.90644,0,0.0
2024-01-01 00:45:00,42441.32,42490.74,42422.45,42475.23,127.81493,0,0.0
2024-01-01 01:00:00,42475.23,42475.23,42431.65,42466.33,188.76099,0,0.0
...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682,0,0.0
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437,0,0.0
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427,0,0.0
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222,0,0.0


In [19]:
z = ta.bbands(data['Close'])
z

Unnamed: 0_level_0,BBL_5_2.0,BBM_5_2.0,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01 00:00:00,,,,,
2024-01-01 00:15:00,,,,,
2024-01-01 00:30:00,,,,,
2024-01-01 00:45:00,,,,,
2024-01-01 01:00:00,42409.058754,42458.122,42507.185246,0.231114,0.583647
...,...,...,...,...,...
2025-02-27 23:00:00,83848.249468,84346.110,84843.970532,1.180518,0.706825
2025-02-27 23:15:00,83925.986399,84426.678,84927.369601,1.186098,0.665293
2025-02-27 23:30:00,84209.558479,84555.218,84900.877521,0.817595,0.687297
2025-02-27 23:45:00,84518.203267,84650.774,84783.344733,0.313218,0.718019


In [20]:
z.columns[1]

'BBM_5_2.0'

In [21]:
features_columns = []

def add_scaled_rsi(df, window=14, prefix = ''):
  series = ta.rsi(df['Close'], length=window)
  column_name = f"{prefix}RSI_{window}"
  df[column_name] = series / 100
  if(column_name not in features_columns):
    features_columns.append(column_name)
  return df

def add_scaled_ema(df, window=50, prefix = ''):
  series = ta.ema(df['Close'], length=window)
  column_name = f"{prefix}EMA_{window}"
  df[column_name] = series / df['Close']
  if column_name not in features_columns:
    features_columns.append(column_name)
  return df

def add_scaled_sma(df, window=50, prefix = ''):
  series = ta.sma(df['Close'], length=window)
  column_name = f"{prefix}SMA_{window}"
  df[column_name] = series / df['Close']
  if column_name not in features_columns:
    features_columns.append(column_name)
  return df

def add_scaled_macd(df, prefix= '', fast = 12, slow = 26, signal=9):
  macd_df = ta.macd(df['Close'], fast=fast, slow=slow, signal=signal)
  suffix = f"{fast}_{slow}_{signal}"
  macd_column_name = f"{prefix}MACD_{suffix}"
  macds_column_name = f"{prefix}MACDs_{suffix}"
  macdh_column_name = f"{prefix}MACDh_{suffix}"
  df[macd_column_name] = macd_df[macd_df.columns[0]] / df['Close']
  df[macds_column_name] = macd_df[macd_df.columns[2]] / df['Close']
  df[macdh_column_name] = macd_df[macd_df.columns[1]] / df['Close']
  if macd_column_name not in features_columns:
    features_columns.append(macd_column_name)
  if macds_column_name not in features_columns:
    features_columns.append(macds_column_name)
  if macdh_column_name not in features_columns:
    features_columns.append(macdh_column_name)
  return df

def add_scaled_bbands(df, window=20, std=2.0, prefix= ''):
  bbands_df = ta.bbands(df['Close'], length=window, std=std)
  suffix = f"{window}_{std}"
  bbl_column_name = f"{prefix}BBL_{suffix}"
  bbm_column_name = f"{prefix}BBM_{suffix}"
  bbu_column_name = f"{prefix}BBU_{suffix}"
  bbb_column_name = f"{prefix}BBB_{suffix}"
  bbp_column_name = f"{prefix}BBP_{suffix}"
  df[bbl_column_name] = bbands_df[bbands_df.columns[0]] / df['Close']
  df[bbm_column_name] = bbands_df[bbands_df.columns[1]] / df['Close']
  df[bbu_column_name] = bbands_df[bbands_df.columns[2]] / df['Close']
  df[bbb_column_name] = bbands_df[bbands_df.columns[3]]
  df[bbp_column_name] = bbands_df[bbands_df.columns[4]]
  if bbl_column_name not in features_columns: features_columns.append(bbl_column_name)
  if bbm_column_name not in features_columns: features_columns.append(bbm_column_name)
  if bbu_column_name not in features_columns: features_columns.append(bbu_column_name)
  if bbb_column_name not in features_columns: features_columns.append(bbb_column_name)
  if bbp_column_name not in features_columns: features_columns.append(bbp_column_name)
  return df

In [22]:
# # Add technical indicators
lower_timeframe = timeframe
lower_timeframe_prefix = f"{lower_timeframe}_"
add_scaled_rsi(data, 14, prefix=lower_timeframe_prefix)
add_scaled_rsi(data, 6, prefix=lower_timeframe_prefix)
add_scaled_ema(data, 5, prefix=lower_timeframe_prefix)
add_scaled_ema(data, 21, prefix=lower_timeframe_prefix)
add_scaled_sma(data, 50, prefix=lower_timeframe_prefix)
add_scaled_macd(data, prefix=lower_timeframe_prefix)
add_scaled_bbands(data, prefix=lower_timeframe_prefix)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,15m_EMA_21,15m_SMA_50,15m_MACD_12_26_9,15m_MACDs_12_26_9,15m_MACDh_12_26_9,15m_BBL_20_2.0,15m_BBM_20_2.0,15m_BBU_20_2.0,15m_BBB_20_2.0,15m_BBP_20_2.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-01-01 00:00:00,42283.58,42488.09,42261.02,42488.00,431.71082,0,0.0,,,,,,,,,,,,,
2024-01-01 00:15:00,42488.00,42554.57,42412.02,42419.73,392.24889,0,0.0,,,,,,,,,,,,,
2024-01-01 00:30:00,42419.73,42447.82,42354.19,42441.32,319.90644,0,0.0,,,,,,,,,,,,,
2024-01-01 00:45:00,42441.32,42490.74,42422.45,42475.23,127.81493,0,0.0,,,,,,,,,,,,,
2024-01-01 01:00:00,42475.23,42475.23,42431.65,42466.33,188.76099,0,0.0,,,0.999807,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682,0,0.0,0.522309,0.624015,0.998218,0.996985,1.006491,-0.001826,-0.003695,0.001870,0.982790,0.994341,1.005892,2.323422,0.744955
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437,0,0.0,0.527919,0.635524,0.998496,0.996829,1.005441,-0.001347,-0.003224,0.001877,0.982314,0.993877,1.005439,2.326750,0.764784
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427,0,0.0,0.541285,0.663960,0.998270,0.996127,1.003851,-0.000869,-0.002750,0.001881,0.981310,0.992750,1.004190,2.304696,0.816875
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222,0,0.0,0.544868,0.671890,0.998659,0.996224,1.003111,-0.000463,-0.002292,0.001829,0.980974,0.992511,1.004047,2.324784,0.824588


In [23]:
features_columns

['15m_RSI_14',
 '15m_RSI_6',
 '15m_EMA_5',
 '15m_EMA_21',
 '15m_SMA_50',
 '15m_MACD_12_26_9',
 '15m_MACDs_12_26_9',
 '15m_MACDh_12_26_9',
 '15m_BBL_20_2.0',
 '15m_BBM_20_2.0',
 '15m_BBU_20_2.0',
 '15m_BBB_20_2.0',
 '15m_BBP_20_2.0']

In [24]:
def resample_candles(df, interval='1h'):
    return df.resample(interval).agg({'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum'}).ffill()

In [25]:
middle_timeframe = '1h'
middle_timeframe_prefix = f"{middle_timeframe}_"
middle_data = resample_candles(data, middle_timeframe)
add_scaled_rsi(middle_data, 14, prefix=middle_timeframe_prefix)
add_scaled_rsi(middle_data, 6, prefix=middle_timeframe_prefix)
add_scaled_ema(middle_data, 5, prefix=middle_timeframe_prefix)
add_scaled_ema(middle_data, 21, prefix=middle_timeframe_prefix)
add_scaled_macd(middle_data, prefix=middle_timeframe_prefix)
add_scaled_bbands(middle_data, prefix=middle_timeframe_prefix)
middle_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,1h_RSI_14,1h_RSI_6,1h_EMA_5,1h_EMA_21,1h_MACD_12_26_9,1h_MACDs_12_26_9,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-01-01 00:00:00,42283.58,42554.57,42261.02,42475.23,1271.68108,,,,,,,,,,,,
2024-01-01 01:00:00,42475.23,42775.00,42431.65,42613.56,1196.37856,,,,,,,,,,,,
2024-01-01 02:00:00,42613.57,42638.41,42500.00,42581.10,685.21980,,,,,,,,,,,,
2024-01-01 03:00:00,42581.09,42586.64,42230.08,42330.49,794.80391,,,,,,,,,,,,
2024-01-01 04:00:00,42330.50,42399.99,42209.46,42399.99,715.41760,,,1.001889,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 20:00:00,84113.98,84164.97,82716.49,83524.98,3204.30191,0.327430,0.217424,1.009057,1.021438,-0.007418,-0.005666,-1.752023e-03,1.000023,1.022504,1.044986,4.397252,-0.000521
2025-02-27 21:00:00,83522.02,84433.97,83223.74,84409.18,1878.95127,0.414654,0.431693,0.998991,1.009762,-0.007269,-0.005939,-1.330089e-03,0.988979,1.011584,1.034188,4.469106,0.243770
2025-02-27 22:00:00,84409.18,84758.63,83881.08,84716.34,994.53745,0.441739,0.489913,0.996913,1.005546,-0.006815,-0.006097,-7.184214e-04,0.986112,1.008165,1.030218,4.374853,0.314885
2025-02-27 23:00:00,84716.01,84927.84,84422.02,84708.58,789.30768,0.441184,0.488396,0.998003,1.005126,-0.006411,-0.006160,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980


In [26]:
# add 4h data
higher_timeframe = '4h'
higher_timeframe_prefix = f"{higher_timeframe}_"
higher_data = resample_candles(data, higher_timeframe)
add_scaled_rsi(higher_data, 14, prefix=higher_timeframe_prefix)
add_scaled_rsi(higher_data, 6, prefix=higher_timeframe_prefix)
add_scaled_ema(higher_data, 5, prefix=higher_timeframe_prefix)
add_scaled_ema(higher_data, 21, prefix=higher_timeframe_prefix)
higher_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-01-01 00:00:00,42283.58,42775.00,42230.08,42330.49,3948.08335,,,,
2024-01-01 04:00:00,42330.50,42500.00,42180.77,42492.46,2706.99880,,,,
2024-01-01 08:00:00,42492.46,42762.39,42452.58,42690.20,2948.80274,,,,
2024-01-01 12:00:00,42690.21,42847.07,42580.00,42783.05,2936.44406,,,,
2024-01-01 16:00:00,42783.05,43550.00,42664.42,43517.99,5686.97164,,,0.982647,
...,...,...,...,...,...,...,...,...,...
2025-02-27 08:00:00,86272.01,87078.46,85846.87,86698.60,4102.58827,0.354086,0.453377,0.994006,1.027267
2025-02-27 12:00:00,86698.60,86795.90,84640.75,85500.84,12382.79075,0.319968,0.362436,1.005287,1.037871
2025-02-27 16:00:00,85500.84,85691.51,83614.75,84113.98,6757.93459,0.285648,0.283440,1.014575,1.049984
2025-02-27 20:00:00,84113.98,84927.84,82716.49,84708.58,6867.09831,0.319357,0.355691,1.004969,1.038740


In [27]:
features_columns

['15m_RSI_14',
 '15m_RSI_6',
 '15m_EMA_5',
 '15m_EMA_21',
 '15m_SMA_50',
 '15m_MACD_12_26_9',
 '15m_MACDs_12_26_9',
 '15m_MACDh_12_26_9',
 '15m_BBL_20_2.0',
 '15m_BBM_20_2.0',
 '15m_BBU_20_2.0',
 '15m_BBB_20_2.0',
 '15m_BBP_20_2.0',
 '1h_RSI_14',
 '1h_RSI_6',
 '1h_EMA_5',
 '1h_EMA_21',
 '1h_MACD_12_26_9',
 '1h_MACDs_12_26_9',
 '1h_MACDh_12_26_9',
 '1h_BBL_20_2.0',
 '1h_BBM_20_2.0',
 '1h_BBU_20_2.0',
 '1h_BBB_20_2.0',
 '1h_BBP_20_2.0',
 '4h_RSI_14',
 '4h_RSI_6',
 '4h_EMA_5',
 '4h_EMA_21']

In [28]:
def merge_candlesticks_data(df1, df2):
  """
  Merges two candlestick DataFrames with forward fill, handling different timeframes,
  and prevents duplicate OHLCV columns.
  Ensure both DataFrames have a datetime index.
  And ensure the second DataFrame is the larger timeframe

  Args:
      df1: First candlestick DataFrame with datetime index.
      df2: Second candlestick DataFrame with datetime index.

  Returns:
      Merged DataFrame with forward-filled values, and no duplicate OHLCV columns.
  """
  # Ensure both DataFrames have a datetime index
  if not isinstance(df1.index, pd.DatetimeIndex) or not isinstance(df2.index, pd.DatetimeIndex):
      raise ValueError("DataFrames must have a datetime index.")
  # Identify OHLCV columns
  ohlcv_cols = ['open', 'high', 'low', 'close', 'volume']

  # Rename columns in df2 that conflict with df1's OHLCV columns
  for col in df2.columns:
    if col.lower() in ohlcv_cols and col.lower() in df1.columns.str.lower():
      del df2[col]

  # Merge the DataFrames using outer join, which preserves all dates
  merged_df = pd.merge(df1, df2, how='outer', left_index=True, right_index=True, suffixes=('_df1', '_df2'))

  # Forward fill the missing values for each column
  for col in merged_df.columns:
    merged_df[col] = merged_df[col].ffill()

  return merged_df

In [29]:
data = merge_candlesticks_data(data, middle_data)
data = merge_candlesticks_data(data, higher_data)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 00:00:00,42283.58,42488.09,42261.02,42488.00,431.71082,0,0.0,,,,...,,,,,,,,,,
2024-01-01 00:15:00,42488.00,42554.57,42412.02,42419.73,392.24889,0,0.0,,,,...,,,,,,,,,,
2024-01-01 00:30:00,42419.73,42447.82,42354.19,42441.32,319.90644,0,0.0,,,,...,,,,,,,,,,
2024-01-01 00:45:00,42441.32,42490.74,42422.45,42475.23,127.81493,0,0.0,,,,...,,,,,,,,,,
2024-01-01 01:00:00,42475.23,42475.23,42431.65,42466.33,188.76099,0,0.0,,,0.999807,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682,0,0.0,0.522309,0.624015,0.998218,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437,0,0.0,0.527919,0.635524,0.998496,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427,0,0.0,0.541285,0.663960,0.998270,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222,0,0.0,0.544868,0.671890,0.998659,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740


In [30]:
# Drop rows with NaN values (due to rolling calculations)
data.dropna(inplace=True)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-04 08:00:00,42763.08,42916.82,42762.96,42859.29,478.21032,0,0.675702,0.426390,0.329146,1.001711,...,1.772461e-03,0.986742,0.999564,1.012385,2.565426,0.517016,0.465249,0.428477,0.998686,1.015678
2024-01-04 08:15:00,42859.30,42975.38,42699.83,42939.24,550.03522,0,0.318966,0.466852,0.431833,0.999897,...,1.772461e-03,0.986742,0.999564,1.012385,2.565426,0.517016,0.465249,0.428477,0.998686,1.015678
2024-01-04 08:30:00,42939.24,42953.08,42779.20,42791.11,368.35744,0,0.979921,0.409251,0.322185,1.002239,...,1.772461e-03,0.986742,0.999564,1.012385,2.565426,0.517016,0.465249,0.428477,0.998686,1.015678
2024-01-04 08:45:00,42791.10,42886.00,42787.66,42849.66,253.72563,0,0.718671,0.438729,0.395043,1.000580,...,1.772461e-03,0.986742,0.999564,1.012385,2.565426,0.517016,0.465249,0.428477,0.998686,1.015678
2024-01-04 09:00:00,42849.67,42943.21,42849.66,42871.12,285.47558,0,0.622917,0.449570,0.422352,1.000052,...,1.842024e-03,0.983976,0.996446,1.008915,2.502843,0.642515,0.465249,0.428477,0.998686,1.015678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682,0,0.000000,0.522309,0.624015,0.998218,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437,0,0.000000,0.527919,0.635524,0.998496,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427,0,0.000000,0.541285,0.663960,0.998270,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222,0,0.000000,0.544868,0.671890,0.998659,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740


In [31]:
# delete Pivot_Porximity that have zero at the beginning and ending of the dataframe
non_zero_indices = data[data['Pivot_Proximity'] != 0].index
non_zero_at_begining = non_zero_indices[0]
non_zero_at_end = non_zero_indices[-1]
data = data.loc[non_zero_at_begining:non_zero_at_end]
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-04 08:00:00,42763.08,42916.82,42762.96,42859.29,478.21032,0,0.675702,0.426390,0.329146,1.001711,...,0.001772,0.986742,0.999564,1.012385,2.565426,0.517016,0.465249,0.428477,0.998686,1.015678
2024-01-04 08:15:00,42859.30,42975.38,42699.83,42939.24,550.03522,0,0.318966,0.466852,0.431833,0.999897,...,0.001772,0.986742,0.999564,1.012385,2.565426,0.517016,0.465249,0.428477,0.998686,1.015678
2024-01-04 08:30:00,42939.24,42953.08,42779.20,42791.11,368.35744,0,0.979921,0.409251,0.322185,1.002239,...,0.001772,0.986742,0.999564,1.012385,2.565426,0.517016,0.465249,0.428477,0.998686,1.015678
2024-01-04 08:45:00,42791.10,42886.00,42787.66,42849.66,253.72563,0,0.718671,0.438729,0.395043,1.000580,...,0.001772,0.986742,0.999564,1.012385,2.565426,0.517016,0.465249,0.428477,0.998686,1.015678
2024-01-04 09:00:00,42849.67,42943.21,42849.66,42871.12,285.47558,0,0.622917,0.449570,0.422352,1.000052,...,0.001842,0.983976,0.996446,1.008915,2.502843,0.642515,0.465249,0.428477,0.998686,1.015678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 19:30:00,84026.42,84026.42,83614.75,83938.01,623.48627,0,0.685986,0.325334,0.245913,1.003426,...,-0.001088,0.995850,1.016068,1.036286,3.979673,0.102629,0.285648,0.283440,1.014575,1.049984
2025-02-27 19:45:00,83937.33,84203.53,83722.23,84113.98,446.61424,0,0.581060,0.361770,0.349479,1.000885,...,-0.001088,0.995850,1.016068,1.036286,3.979673,0.102629,0.285648,0.283440,1.014575,1.049984
2025-02-27 20:00:00,84113.98,84164.97,83052.00,83219.97,899.63601,0,0.897560,0.279256,0.190213,1.007758,...,-0.001752,1.000023,1.022504,1.044986,4.397252,-0.000521,0.319357,0.355691,1.004969,1.038740
2025-02-27 20:15:00,83219.97,83307.57,82831.36,82923.01,833.69040,0,0.774473,0.258190,0.160973,1.007578,...,-0.001752,1.000023,1.022504,1.044986,4.397252,-0.000521,0.319357,0.355691,1.004969,1.038740


In [32]:
data[features_columns].describe()

Unnamed: 0,15m_RSI_14,15m_RSI_6,15m_EMA_5,15m_EMA_21,15m_SMA_50,15m_MACD_12_26_9,15m_MACDs_12_26_9,15m_MACDh_12_26_9,15m_BBL_20_2.0,15m_BBM_20_2.0,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
count,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0,...,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0,40371.0
mean,0.506894,0.507508,0.999975,0.999871,0.999677,9.2e-05,9.3e-05,-8.646865e-07,0.992096,0.999879,...,5.38476e-07,0.983307,0.999493,1.01568,3.239316,0.518051,0.522207,0.523279,0.999574,0.997812
std,0.110081,0.165186,0.002494,0.005983,0.010939,0.003085,0.002904,0.0009284513,0.009128,0.006819,...,0.001831076,0.017371,0.01343,0.017861,2.274402,0.337058,0.127732,0.183337,0.009612,0.023332
min,0.097674,0.022921,0.973352,0.948889,0.931,-0.028191,-0.024185,-0.009334411,0.877323,0.943581,...,-0.01195553,0.882055,0.936592,0.979851,0.284103,-0.508752,0.12985,0.027477,0.952863,0.90577
25%,0.437165,0.393571,0.998937,0.997347,0.994785,-0.001188,-0.00112,-0.0004202979,0.989753,0.99695,...,-0.0009745695,0.977223,0.993056,1.004622,1.646174,0.263478,0.429917,0.391686,0.994964,0.983407
50%,0.508083,0.509872,0.999945,0.999773,0.999528,0.000125,0.000139,-2.425896e-06,0.994597,0.999781,...,-4.518024e-05,0.988166,0.99937,1.010516,2.650738,0.536162,0.518367,0.525652,0.999341,0.997757
75%,0.576636,0.621931,1.000948,1.002226,1.004325,0.001465,0.001396,0.0004306563,0.99751,1.002604,...,0.0009301303,0.994637,1.005757,1.021316,4.090743,0.77437,0.614323,0.652654,1.003924,1.010718
max,0.926807,0.987247,1.039333,1.074902,1.127769,0.016693,0.014574,0.009552534,1.031753,1.078095,...,0.009758859,1.038156,1.11221,1.224434,22.157655,1.486907,0.909582,0.964218,1.06722,1.155367


In [33]:
def create_sequences(data, features_columns, target_col, seq_length):
    """
    Create sequences of `seq_length` time steps for LSTM input, optimized for performance.
    """
    num_samples = len(data) - seq_length
    num_features = len(features_columns)

    X = np.zeros((num_samples, seq_length, num_features))
    y = np.zeros(num_samples)

    features_data = data[features_columns].values
    target_data = data[target_col].values

    for i in range(num_samples):
        X[i] = features_data[i:i + seq_length]
        y[i] = target_data[i + seq_length]

    return X, y

In [34]:
def calc_class_ratios(df, column):
  class_counts = df[column].value_counts()
  total_count = len(df)
  class_ratios = class_counts / total_count
  return class_ratios.to_dict()

def class_ratio_to_class_weights(class_ratios):
  class_weights = {}
  for key, value in class_ratios.items():
    class_weights[key] = 1 / value
  return class_weights

class_ratios = calc_class_ratios(data, 'Pivot')
class_weights = class_ratio_to_class_weights(class_ratios)
class_weights

{0: 1.0570815113508418, 1: 37.00366636113657, -1: 37.07162534435262}

In [35]:
# select only first 80 % of the data
training_data_ratio = 0.8 # 80%
training_data = data[:int(len(data) * training_data_ratio)]
training_data_start_date = training_data.index[0]
training_data_end_date = training_data.index[-1]
training_data.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-12-05 15:30:00,102097.78,102387.45,101230.36,101368.02,2307.13938,0,-0.544304,0.34576,0.212034,1.009834,...,-0.001041,0.959651,0.999352,1.039054,7.945464,0.508156,0.64997,0.661386,0.997654,0.969591
2024-12-05 15:45:00,101368.02,101806.1,100925.97,101292.01,1708.9684,0,-0.528118,0.339286,0.204963,1.007061,...,-0.001041,0.959651,0.999352,1.039054,7.945464,0.508156,0.64997,0.661386,0.997654,0.969591
2024-12-05 16:00:00,101292.0,101910.09,101272.0,101453.99,1376.85662,0,-0.562611,0.366508,0.267438,1.003636,...,-0.002293,0.964801,1.003003,1.041205,7.617514,0.4607,0.543652,0.472865,1.013642,0.992507
2024-12-05 16:15:00,101453.99,101999.0,101272.0,101272.01,744.85667,0,-0.523859,0.349106,0.241819,1.003626,...,-0.002293,0.964801,1.003003,1.041205,7.617514,0.4607,0.543652,0.472865,1.013642,0.992507
2024-12-05 16:30:00,101272.01,101403.82,100568.02,101176.47,1503.69724,0,-0.503514,0.339979,0.228056,1.003049,...,-0.002293,0.964801,1.003003,1.041205,7.617514,0.4607,0.543652,0.472865,1.013642,0.992507
2024-12-05 16:45:00,101176.46,101492.0,101037.5,101037.51,883.43882,0,-0.473923,0.326604,0.207449,1.002953,...,-0.002293,0.964801,1.003003,1.041205,7.617514,0.4607,0.543652,0.472865,1.013642,0.992507
2024-12-05 17:00:00,101037.5,101280.01,100632.26,100654.31,724.08616,0,-0.392323,0.292438,0.159697,1.004514,...,-0.002928,0.967998,1.002351,1.036704,6.854462,0.465785,0.543652,0.472865,1.013642,0.992507
2024-12-05 17:15:00,100654.31,100964.0,100533.41,100803.99,580.22153,0,-0.424196,0.322261,0.241532,1.002015,...,-0.002928,0.967998,1.002351,1.036704,6.854462,0.465785,0.543652,0.472865,1.013642,0.992507
2024-12-05 17:30:00,100803.99,101199.32,100784.0,100956.68,587.02093,0,-0.456711,0.352255,0.322321,1.000333,...,-0.002928,0.967998,1.002351,1.036704,6.854462,0.465785,0.543652,0.472865,1.013642,0.992507
2024-12-05 17:45:00,100956.68,101356.0,100844.01,101282.36,522.80417,0,-0.526063,0.412025,0.467498,0.998078,...,-0.002928,0.967998,1.002351,1.036704,6.854462,0.465785,0.543652,0.472865,1.013642,0.992507


In [36]:
seq_length = 100
target_col = 'Pivot_Proximity'

# Create sequences
X, y = create_sequences(training_data, features_columns, target_col, seq_length)

In [37]:
print(X.shape)
print(y.shape)

(32196, 100, 29)
(32196,)


In [38]:
# Split into training and testing sets
train_validate_split_ratio = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=train_validate_split_ratio, shuffle=False)

In [39]:
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

X_train shape: (25756, 100, 29)
y_train shape: (25756,)
X_test shape: (6440, 100, 29)
y_test shape: (6440,)


In [40]:
# since the class weights index are now +1 to categorise dat
# adding one to the key of class weights
class_weights = {k+1: v for k, v in class_weights.items()}
class_weights

{1: 1.0570815113508418, 2: 37.00366636113657, 0: 37.07162534435262}

In [41]:
# Step 4: Build the LSTM model
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(50, activation='relu'),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dropout(0.2),
    Dense(10, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='tanh')
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [65]:
model.summary()

In [None]:
class CustomEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self, patience=2):
        super(CustomEarlyStopping, self).__init__()
        self.patience = patience  # Number of consecutive increases allowed
        self.best_val_mae = float('inf')  # Track the best validation MAE
        self.increase_count = 0  # Counter for consecutive increases

    def on_epoch_end(self, epoch, logs=None):
        current_val_mae = logs.get('val_mae')  # Get validation MAE for the current epoch

        if current_val_mae < self.best_val_mae:
            # If validation MAE improves, reset the counter
            self.best_val_mae = current_val_mae
            self.increase_count = 0
        else:
            # If validation MAE increases, increment the counter
            self.increase_count += 1

        # Stop training if validation MAE increases consecutively for 'patience' epochs
        if self.increase_count >= self.patience:
            print(f"\nEarly stopping: Validation MAE increased {self.patience} times in a row.")
            self.model.stop_training = True

In [42]:
# Define early stopping
custom_early_stopping = CustomEarlyStopping(patience=2)

# Step 5: Train the model
train_epochs = 12
history = model.fit(
    X_train, y_train,
    epochs=train_epochs,
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1,
    callbacks=[custom_early_stopping]
    # class_weight=class_weights
)

Epoch 1/12
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 150ms/step - loss: 0.3600 - mae: 0.5180 - val_loss: 0.2702 - val_mae: 0.4445
Epoch 2/12
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 178ms/step - loss: 0.2976 - mae: 0.4618 - val_loss: 0.2562 - val_mae: 0.4297
Epoch 3/12
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 161ms/step - loss: 0.2849 - mae: 0.4501 - val_loss: 0.2543 - val_mae: 0.4269
Epoch 4/12
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 150ms/step - loss: 0.2798 - mae: 0.4462 - val_loss: 0.2456 - val_mae: 0.4197
Epoch 5/12
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 149ms/step - loss: 0.2741 - mae: 0.4399 - val_loss: 0.2401 - val_mae: 0.4135
Epoch 6/12
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 148ms/step - loss: 0.2691 - mae: 0.4328 - val_loss: 0.2311 - val_mae: 0.4026
Epoch 7/12
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [73]:
train_loss = history.history['loss'][-1]
train_mae = history.history['mae'][-1]
val_loss = history.history['val_loss'][-1]
val_mae = history.history['val_mae'][-1]
print(f"Train Loss: {train_loss:.4f}")
print(f"Train MAE: {train_mae:.4f}")
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation MAE: {val_mae:.4f}")

Train Loss: 0.2388
Train MAE: 0.4012
Validation Loss: 0.2305
Validation MAE: 0.3967


In [83]:
px.line(history.history, y=['loss', 'val_loss'], title='Loss Over Epochs')

In [84]:
px.line(history.history, y=['mae', 'val_mae'], title='MAE Over Epochs')

In [44]:
test_data = data[int(len(data) * training_data_ratio):]
test_data.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-12-05 18:00:00,101282.37,101416.0,101100.97,101264.49,452.55352,0,-0.522258,0.409791,0.460995,0.998836,...,-0.004042,0.98253,1.014389,1.046248,6.281445,0.274183,0.543652,0.472865,1.013642,0.992507
2024-12-05 18:15:00,101264.5,101521.14,100900.0,101076.0,1032.38452,0,-0.48212,0.386012,0.391984,1.000466,...,-0.004042,0.98253,1.014389,1.046248,6.281445,0.274183,0.543652,0.472865,1.013642,0.992507
2024-12-05 18:30:00,101076.0,101266.46,100942.46,101164.01,416.25355,0,-0.500861,0.403419,0.439037,0.99973,...,-0.004042,0.98253,1.014389,1.046248,6.281445,0.274183,0.543652,0.472865,1.013642,0.992507
2024-12-05 18:45:00,101164.01,101164.01,100172.67,100179.24,1869.92391,0,-0.291159,0.300694,0.215311,1.006372,...,-0.004042,0.98253,1.014389,1.046248,6.281445,0.274183,0.543652,0.472865,1.013642,0.992507
2024-12-05 19:00:00,100179.24,100750.0,100055.0,100560.01,809.75482,0,-0.372242,0.367734,0.365365,1.001707,...,-0.005423,0.994972,1.026404,1.057836,6.124711,0.079987,0.543652,0.472865,1.013642,0.992507
2024-12-05 19:15:00,100560.0,100932.0,100424.95,100568.89,482.42833,0,-0.374133,0.369252,0.368743,1.001079,...,-0.005423,0.994972,1.026404,1.057836,6.124711,0.079987,0.543652,0.472865,1.013642,0.992507
2024-12-05 19:30:00,100568.88,100744.0,99611.0,99735.99,858.20155,0,-0.196771,0.297157,0.230589,1.006293,...,-0.005423,0.994972,1.026404,1.057836,6.124711,0.079987,0.543652,0.472865,1.013642,0.992507
2024-12-05 19:45:00,99735.99,99906.62,98599.36,99028.0,1944.15761,0,-0.046008,0.252099,0.166831,1.008992,...,-0.005423,0.994972,1.026404,1.057836,6.124711,0.079987,0.543652,0.472865,1.013642,0.992507
2024-12-05 20:00:00,99028.01,99424.0,98252.0,98583.95,1978.04837,0,0.04855,0.228678,0.138093,1.009024,...,-0.006052,0.996451,1.026331,1.056211,5.822693,0.059383,0.467849,0.359701,1.02361,1.012569
2024-12-05 20:15:00,98583.95,98627.99,98000.0,98313.83,1415.83533,0,0.106071,0.215559,0.122669,1.007864,...,-0.006052,0.996451,1.026331,1.056211,5.822693,0.059383,0.467849,0.359701,1.02361,1.012569


In [45]:
def predict_in_batches(model, test_data, features_columns, seq_length, batch_size=128):
    """
    Predicts in batches to speed up inference.
    """
    num_samples = len(test_data) - seq_length
    feature_array = test_data[features_columns].values
    predicted_values = np.full(len(test_data), np.nan)

    for start_idx in range(0, num_samples, batch_size):
        end_idx = min(start_idx + batch_size, num_samples)
        batch_indices = range(start_idx + seq_length, end_idx + seq_length)
        batch_input = np.array([feature_array[i - seq_length:i] for i in batch_indices])

        if len(batch_input) > 0 :
            predictions = model.predict(batch_input, verbose=0)
            predicted_values[batch_indices] = predictions.flatten() #Flatten to 1d array.

    test_data['Predicted_Value'] = predicted_values
    return test_data

In [46]:
# Predict in batches
batch_size = 128
predict_in_batches(model, test_data, features_columns, seq_length, batch_size)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21,Predicted_Value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-12-05 18:00:00,101282.37,101416.00,101100.97,101264.49,452.55352,0,-0.522258,0.409791,0.460995,0.998836,...,0.982530,1.014389,1.046248,6.281445,0.274183,0.543652,0.472865,1.013642,0.992507,
2024-12-05 18:15:00,101264.50,101521.14,100900.00,101076.00,1032.38452,0,-0.482120,0.386012,0.391984,1.000466,...,0.982530,1.014389,1.046248,6.281445,0.274183,0.543652,0.472865,1.013642,0.992507,
2024-12-05 18:30:00,101076.00,101266.46,100942.46,101164.01,416.25355,0,-0.500861,0.403419,0.439037,0.999730,...,0.982530,1.014389,1.046248,6.281445,0.274183,0.543652,0.472865,1.013642,0.992507,
2024-12-05 18:45:00,101164.01,101164.01,100172.67,100179.24,1869.92391,0,-0.291159,0.300694,0.215311,1.006372,...,0.982530,1.014389,1.046248,6.281445,0.274183,0.543652,0.472865,1.013642,0.992507,
2024-12-05 19:00:00,100179.24,100750.00,100055.00,100560.01,809.75482,0,-0.372242,0.367734,0.365365,1.001707,...,0.994972,1.026404,1.057836,6.124711,0.079987,0.543652,0.472865,1.013642,0.992507,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 19:30:00,84026.42,84026.42,83614.75,83938.01,623.48627,0,0.685986,0.325334,0.245913,1.003426,...,0.995850,1.016068,1.036286,3.979673,0.102629,0.285648,0.283440,1.014575,1.049984,0.533244
2025-02-27 19:45:00,83937.33,84203.53,83722.23,84113.98,446.61424,0,0.581060,0.361770,0.349479,1.000885,...,0.995850,1.016068,1.036286,3.979673,0.102629,0.285648,0.283440,1.014575,1.049984,0.549174
2025-02-27 20:00:00,84113.98,84164.97,83052.00,83219.97,899.63601,0,0.897560,0.279256,0.190213,1.007758,...,1.000023,1.022504,1.044986,4.397252,-0.000521,0.319357,0.355691,1.004969,1.038740,0.443744
2025-02-27 20:15:00,83219.97,83307.57,82831.36,82923.01,833.69040,0,0.774473,0.258190,0.160973,1.007578,...,1.000023,1.022504,1.044986,4.397252,-0.000521,0.319357,0.355691,1.004969,1.038740,0.659132


In [47]:
test_data[['Open', 'High', 'Low', 'Close', 'Pivot', 'Predicted_Value']]

Unnamed: 0_level_0,Open,High,Low,Close,Pivot,Predicted_Value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-12-05 18:00:00,101282.37,101416.00,101100.97,101264.49,0,
2024-12-05 18:15:00,101264.50,101521.14,100900.00,101076.00,0,
2024-12-05 18:30:00,101076.00,101266.46,100942.46,101164.01,0,
2024-12-05 18:45:00,101164.01,101164.01,100172.67,100179.24,0,
2024-12-05 19:00:00,100179.24,100750.00,100055.00,100560.01,0,
...,...,...,...,...,...,...
2025-02-27 19:30:00,84026.42,84026.42,83614.75,83938.01,0,0.533244
2025-02-27 19:45:00,83937.33,84203.53,83722.23,84113.98,0,0.549174
2025-02-27 20:00:00,84113.98,84164.97,83052.00,83219.97,0,0.443744
2025-02-27 20:15:00,83219.97,83307.57,82831.36,82923.01,0,0.659132


In [48]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8075 entries, 2024-12-05 18:00:00 to 2025-02-27 20:30:00
Data columns (total 37 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Open               8075 non-null   float64
 1   High               8075 non-null   float64
 2   Low                8075 non-null   float64
 3   Close              8075 non-null   float64
 4   Volume             8075 non-null   float64
 5   Pivot              8075 non-null   int64  
 6   Pivot_Proximity    8075 non-null   float64
 7   15m_RSI_14         8075 non-null   float64
 8   15m_RSI_6          8075 non-null   float64
 9   15m_EMA_5          8075 non-null   float64
 10  15m_EMA_21         8075 non-null   float64
 11  15m_SMA_50         8075 non-null   float64
 12  15m_MACD_12_26_9   8075 non-null   float64
 13  15m_MACDs_12_26_9  8075 non-null   float64
 14  15m_MACDh_12_26_9  8075 non-null   float64
 15  15m_BBL_20_2.0     8075 non-null   f

In [49]:
test_data['Predicted_Value']

Unnamed: 0_level_0,Predicted_Value
Date,Unnamed: 1_level_1
2024-12-05 18:00:00,
2024-12-05 18:15:00,
2024-12-05 18:30:00,
2024-12-05 18:45:00,
2024-12-05 19:00:00,
...,...
2025-02-27 19:30:00,0.533244
2025-02-27 19:45:00,0.549174
2025-02-27 20:00:00,0.443744
2025-02-27 20:15:00,0.659132


In [50]:
test_data.dropna(inplace=True)
test_data



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21,Predicted_Value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-12-06 19:00:00,101363.95,101460.87,101120.16,101424.00,336.15984,0,-0.805907,0.698055,0.720993,0.997350,...,0.948992,0.972579,0.996165,4.850370,1.081287,0.619127,0.687690,0.981929,0.971226,-0.427290
2024-12-06 19:15:00,101424.00,101500.00,101223.43,101344.25,353.85657,0,-0.767884,0.685178,0.690679,0.998756,...,0.948992,0.972579,0.996165,4.850370,1.081287,0.619127,0.687690,0.981929,0.971226,-0.441734
2024-12-06 19:30:00,101344.26,101700.00,101214.89,101668.25,340.99282,0,-0.922361,0.708688,0.743297,0.997049,...,0.948992,0.972579,0.996165,4.850370,1.081287,0.619127,0.687690,0.981929,0.971226,-0.424112
2024-12-06 19:45:00,101668.26,101799.00,101360.00,101439.99,426.10196,0,-0.813531,0.670689,0.649843,0.999528,...,0.948992,0.972579,0.996165,4.850370,1.081287,0.619127,0.687690,0.981929,0.971226,-0.470915
2024-12-06 20:00:00,101439.99,101667.98,101272.17,101666.15,303.26351,0,-0.921360,0.688510,0.695380,0.998203,...,0.949508,0.975225,1.000941,5.274051,0.981695,0.550011,0.537908,0.999105,0.988883,-0.424157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 19:30:00,84026.42,84026.42,83614.75,83938.01,623.48627,0,0.685986,0.325334,0.245913,1.003426,...,0.995850,1.016068,1.036286,3.979673,0.102629,0.285648,0.283440,1.014575,1.049984,0.533244
2025-02-27 19:45:00,83937.33,84203.53,83722.23,84113.98,446.61424,0,0.581060,0.361770,0.349479,1.000885,...,0.995850,1.016068,1.036286,3.979673,0.102629,0.285648,0.283440,1.014575,1.049984,0.549174
2025-02-27 20:00:00,84113.98,84164.97,83052.00,83219.97,899.63601,0,0.897560,0.279256,0.190213,1.007758,...,1.000023,1.022504,1.044986,4.397252,-0.000521,0.319357,0.355691,1.004969,1.038740,0.443744
2025-02-27 20:15:00,83219.97,83307.57,82831.36,82923.01,833.69040,0,0.774473,0.258190,0.160973,1.007578,...,1.000023,1.022504,1.044986,4.397252,-0.000521,0.319357,0.355691,1.004969,1.038740,0.659132


In [51]:
plot_candlestick_with_proximity(test_data, 'Predicted_Value', 0.5)

In [85]:
test_data_start_date = test_data.index[0]
test_data_end_date = test_data.index[-1]
print(test_data_start_date)
print(test_data_end_date)

2024-12-06 19:00:00
2025-02-27 20:30:00


In [52]:
trade_leverage = 10
trade_margin = 1000
trade_buy_threshold = 0.6
trade_sell_threshold = -0.6
broker_commision = 0.002

In [53]:
class PredictedValueStrategy(bt.Strategy):
    params = (
        ('buy_threshold', trade_buy_threshold),
        ('sell_threshold', trade_sell_threshold),
        ('leverage', trade_leverage),  # Leverage ratio
        ('margin', trade_margin)
    )

    def __init__(self):
        # To keep track of pending orders and buy price/commission
        self.order = None
        self.buyprice = None
        self.buycomm = None

        # Add the Predicted_Value as a data feed
        self.predicted_value = self.datas[0].predicted_value

         # Set leverage
        # self.broker.set_leverage(self.params.leverage)

    def next(self):
        # Check if an order is pending ... if yes, we cannot send a 2nd one
        if self.order:
            return

        # # Check if we are in the market
        # if not self.position:
        #   # Buy signal
        #   if self.predicted_value[0] > self.params.buy_threshold:
        #     self.log('BUY CREATE, %.2f' % self.datas[0].close[0])
        #     self.order = self.buy()
        # else:
        #   # Sell signal
        #   if self.predicted_value[0] < self.params.sell_threshold:
        #     self.log('SELL CREATE, %.2f' % self.datas[0].close[0])
        #     self.order = self.sell()



         # # Check if we are in the market
        if not self.position:
          cash = self.broker.getcash()
          position_size = (self.params.margin * self.params.leverage) / self.data.close[0]
          # Long signal
          if self.predicted_value[0] > self.params.buy_threshold:
            self.log('LONG POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.buy(size=position_size)

          # Short signal
          elif self.predicted_value[0] < self.params.sell_threshold:
            self.log('SHORT POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.sell(size=position_size)

        else:
          if self.predicted_value[0] > self.params.buy_threshold and self.position.size < 0:
            self.log('CLOSE SHORT POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.close()
          elif self.predicted_value[0] < self.params.sell_threshold and self.position.size > 0:
            self.log('CLOSE LONG POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.close()

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            if order.status == order.Canceled:
                self.log('Order Canceled')
            elif order.status == order.Margin:
                self.log(f'Order Margin Not Enough - Available cash: {self.broker.getcash()}')
            elif order.status == order.Rejected:
                self.log('Order Rejected')

        # Write down: no pending order
        self.order = None

    def notify_trade(self, trade):
        if not trade.isclosed:
            return
        self.log(f'TRADE COMPLETED, GROSS {trade.pnl:.2f}, NET {trade.pnlcomm:.2f}, Available Cash {self.broker.getcash():.2f}')

    def log(self, txt, dt=None):
        dt = dt or self.datas[0].datetime.date(0)
        time = self.datas[0].datetime.time()
        print(f'{dt.isoformat()} {time.isoformat()}, {txt}')


In [54]:
test_data.index[0].date

<bound method Timestamp.date of Timestamp('2024-12-06 19:00:00')>

In [55]:
# Extend PandasData to include the custom column
class CustomPandasData(bt.feeds.PandasData):
    # Add custom columns
    lines = ('predicted_value',)  # Add the custom line
    params = (
        ('predicted_value', 'Predicted_Value'),  # Map the column name
    )


# Load your data into a Pandas DataFrame
backtest_data = CustomPandasData(
    dataname=test_data,
    datetime=None,  # Use the index as the datetime
    open='Open',         # Column index for Open
    high='High',         # Column index for High
    low='Low',          # Column index for Low
    close='Close',        # Column index for Close
    volume='Volume',       # Column index for Volume
    openinterest=None,# No open interest column
    predicted_value='Predicted_Value'  # Column index for Predicted_Value
)

# Create a Cerebro engine instance
cerebro = bt.Cerebro()

# Add the strategy
cerebro.addstrategy(PredictedValueStrategy)

# Add the data feed
cerebro.adddata(backtest_data)

# Set the initial cash
cerebro.broker.set_cash(1000.0)

# Set the commission
cerebro.broker.setcommission(commission=broker_commision)

# Add analyzers
cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe')
cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='tradeanalyzer')
cerebro.addanalyzer(bt.analyzers.Returns, _name='returns')
cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')

# Run the backtest
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
backtest_result = cerebro.run()
print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())

Starting Portfolio Value: 1000.00
2024-12-06 20:30:00, SHORT POSITION CREATED, 101831.09
2024-12-06 20:45:00, SELL EXECUTED, 101831.09
2024-12-08 08:45:00, CLOSE SHORT POSITION CREATED, 98980.01
2024-12-08 09:00:00, BUY EXECUTED, 98980.02
2024-12-08 09:00:00, TRADE COMPLETED, GROSS 279.98, NET 240.54, Available Cash 1240.54
2024-12-08 09:00:00, LONG POSITION CREATED, 99169.82
2024-12-08 09:15:00, Order Margin - Available cash: 1240.5402691849806
2024-12-08 09:15:00, LONG POSITION CREATED, 99283.99
2024-12-08 09:30:00, Order Margin - Available cash: 1240.5402691849806
2024-12-08 09:45:00, LONG POSITION CREATED, 98884.02
2024-12-08 10:00:00, Order Margin - Available cash: 1240.5402691849806
2024-12-08 10:00:00, LONG POSITION CREATED, 99407.58
2024-12-08 10:15:00, Order Margin - Available cash: 1240.5402691849806
2024-12-08 13:15:00, SHORT POSITION CREATED, 100226.62
2024-12-08 13:30:00, SELL EXECUTED, 100226.63
2024-12-08 21:00:00, CLOSE SHORT POSITION CREATED, 99744.41
2024-12-08 21:15:

In [56]:
strat = backtest_result[0]

# Print analyzers
print(f"Sharpe Ratio: {strat.analyzers.sharpe.get_analysis()}")
print(f"Max Drawdown: {strat.analyzers.drawdown.get_analysis()}")

Sharpe Ratio: OrderedDict([('sharperatio', 3.6841877512405548)])
Max Drawdown: AutoOrderedDict([('len', 274), ('drawdown', 1.3601747803289252), ('moneydown', 87.52126907377897), ('max', AutoOrderedDict([('len', 1087), ('drawdown', 23.03160022860916), ('moneydown', 936.5959407503651)]))])


In [58]:
cerebro.plot(start=len(test_data)-1000)

<IPython.core.display.Javascript object>

[[<Figure size 640x480 with 4 Axes>]]

In [59]:
(strat.analyzers.pyfolio.get_analysis()).keys()

odict_keys(['returns', 'positions', 'transactions', 'gross_lev'])

In [60]:
def make_dict_even(data):
    """
    Recursively makes all numeric values in a dictionary even.

    Args:
        data (dict): The input dictionary (can have nested dictionaries or lists).

    Returns:
        dict: A new dictionary with all numeric values made even.
    """
    if isinstance(data, dict):
        new_dict = {}
        for key, value in data.items():
            new_dict[key] = make_dict_even(value)
        return new_dict
    elif isinstance(data, list):
        return [make_dict_even(item) for item in data]
    elif isinstance(data, (int, float)):
        if isinstance(data, int):
            return data if data % 2 == 0 else data + 1
        else: # float. We will round to an int, and then make even.
            int_value = round(data)
            return int_value if int_value % 2 == 0 else int_value + 1

    else:
        return data  # Return non-numeric values as they are

In [61]:
json.dumps(make_dict_even(strat.analyzers.tradeanalyzer.get_analysis()))

'{"total": {"total": 40, "open": 0, "closed": 40}, "streak": {"won": {"current": 2, "longest": 8}, "lost": {"current": 0, "longest": 4}}, "pnl": {"gross": {"total": 6934, "average": 174}, "net": {"total": 5348, "average": 134}}, "won": {"total": 32, "pnl": {"total": 6082, "average": 196, "max": 704}}, "lost": {"total": 10, "pnl": {"total": -734, "average": -82, "max": -208}}, "long": {"total": 0, "pnl": {"total": 0, "average": 0, "won": {"total": 0, "average": 0, "max": 0}, "lost": {"total": 0, "average": 0, "max": 0}}, "won": 0, "lost": 0}, "short": {"total": 40, "pnl": {"total": 5348, "average": 134, "won": {"total": 6082, "average": 196, "max": 704}, "lost": {"total": -734, "average": -82, "max": -208}}, "won": 32, "lost": 10}, "len": {"total": 3852, "average": 96, "max": 556, "min": 10, "won": {"total": 2696, "average": 88, "max": 556, "min": 12}, "lost": {"total": 1158, "average": 130, "max": 402, "min": 10}, "long": {"total": 0, "average": 0, "max": 0, "min": 9223372036854775808,

In [86]:
json.dumps(make_dict_even(strat.analyzers.drawdown.get_analysis()))

'{"len": 274, "drawdown": 2, "moneydown": 88, "max": {"len": 1088, "drawdown": 24, "moneydown": 938}}'

In [87]:
json.dumps(strat.analyzers.sharpe.get_analysis())

'{"sharperatio": 3.6841877512405548}'

In [91]:
def plot_pyfolio_results(results):
    """
    Extracts PyFolio analyzer results from Backtrader and generates interactive plots using Plotly.

    Parameters:
        results (list): The results object returned by Backtrader's cerebro.run().

    Plots:
        1. Cumulative Returns
        2. Drawdowns
        3. Daily Returns
        4. Rolling Sharpe Ratio (252-day)
    """
    # Extract the PyFolio analyzer results
    pyfoliozer = results[0].analyzers.getbyname('pyfolio')
    returns, positions, transactions, gross_lev = pyfoliozer.get_pf_items()

    # Ensure returns is a pandas Series with a datetime index
    if not isinstance(returns, pd.Series):
        returns = pd.Series(returns, index=pd.date_range(start='2020-01-01', periods=len(returns)))

    # Plot cumulative returns
    cumulative_returns = (1 + returns).cumprod()
    fig = px.line(cumulative_returns, title='Cumulative Returns', labels={'value': 'Cumulative Returns', 'index': 'Date'})
    fig.show()

    # Plot drawdowns
    drawdown = returns / (1 + returns).cumprod() - 1
    fig = px.area(drawdown, title='Drawdowns', labels={'value': 'Drawdown', 'index': 'Date'})
    fig.show()

    # Plot daily returns
    fig = px.bar(returns, title='Daily Returns', labels={'value': 'Daily Returns', 'index': 'Date'})
    fig.show()

In [92]:
plot_pyfolio_results(backtest_result)