<a href="https://colab.research.google.com/github/ifeLight/ml-bot/blob/main/binance-multi-timeframe-grade.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install pandas-ta
!pip install backtrader[plotting]
!pip install plotly
!pip install --upgrade firebase-admin



In [4]:
from google.colab import auth
auth.authenticate_user()
#Configure Google cloud project
project_id = 'ifelight'
!gcloud config set project {project_id}

Updated property [core/project].


In [5]:
import numpy as np
import pandas as pd
import datetime
import requests
import json
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import backtrader as bt
import pandas_ta as ta
from imblearn.over_sampling import SMOTE
import subprocess
import firebase_admin
from firebase_admin import firestore

In [6]:
# Working GCP Bucket name
bucket_name = 'ife-storage'

In [7]:
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects

binance_base_url = 'https://52on3577u3.execute-api.eu-central-1.amazonaws.com'

def get_binance_candles(symbol: str, interval='1h', limit: int = 50, **kwargs):
    url = f'{binance_base_url}/api/v3/uiKlines?symbol={symbol}&interval={interval}&limit={limit}'
    for key, value in kwargs.items():
        url += f'&{key}={value}'
    response = requests.get(url)
    result = json.loads(response.text)
    # print(result)
    def map_result(x):
        return {
            'Date': x[0],
            'Open': x[1],
            'High': x[2],
            'Low': x[3],
            'Close': x[4],
            'Volume': x[5],
        }
    mappeded_result = []
    for x in result:
        mappeded_result.append(map_result(x))
    return mappeded_result


def candles_to_df(data):
    df =  pd.DataFrame(data)
    df['Date'] = pd.to_datetime(df['Date'], unit='ms')
    df['Open'] = df['Open'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Close'] = df['Close'].astype(float)
    df['Volume'] = df['Volume'].astype(float)
    df.set_index('Date', inplace=True)
    return df

def get_all_binance_candles(symbol: str, interval='1h', start_date=None, end_date=None, limit=1000):
  try:
    return load_candles_from_cloud_storage(symbol, interval, start_date, end_date)
  except FileNotFoundError:
    pass
  result = []
  raw_start_date = start_date
  raw_end_date = end_date
  start_date = pd.to_datetime(start_date) if start_date else pd.to_datetime('2015-01-01')
  end_date = pd.to_datetime(end_date) if end_date else pd.to_datetime('today')
  while True:
    candles = get_binance_candles(symbol, interval, limit, startTime=int(start_date.timestamp() * 1000), endTime=int(end_date.timestamp() * 1000))
    if len(candles) <= 1:
      break;
    result += candles
    start_date = pd.to_datetime(datetime.datetime.fromtimestamp(candles[-1]['Date'] / 1000))
  candles_df = candles_to_df(result)
  save_candles_to_cloud_storage(candles_df, symbol, interval, raw_start_date, raw_end_date)
  return candles_df

def candles_storage_file_name(symbol: str, interval='1h', start_date=None, end_date=None):
  file_name = f'binance_{symbol}_{interval}_{start_date}_{end_date}.csv'
  return file_name

def load_candles_from_cloud_storage(symbol: str, interval: str, start_date=None, end_date=None):
  file_name = candles_storage_file_name(symbol, interval, start_date, end_date)
  try:
    # Download the file from cloud storage.
    subprocess.run(['gsutil', 'cp', f'gs://{bucket_name}/trade/candles/{file_name}', f'/tmp/{file_name}'], check=True)

    # Load the data into a Pandas DataFrame.
    with open(f'/tmp/{file_name}', 'r') as f:
      return pd.read_csv(f, index_col=0, parse_dates=True)
  except subprocess.CalledProcessError:
    # Raise a FileNotFoundError if the file is not found in cloud storage.
    raise FileNotFoundError(f"File not found: gs://{bucket_name}/trade/candles/{file_name}")

def save_candles_to_cloud_storage(df: pd.DataFrame, symbol: str, interval: str, start_date, end_date):
  file_name = candles_storage_file_name(symbol, interval, start_date, end_date)
  df.to_csv(f'/tmp/{file_name}')
  !gsutil cp /tmp/{file_name} gs://{bucket_name}/trade/candles/{file_name}


In [8]:
timeframe = '15m'
symbol = 'BTCUSDT'
exchange = 'binance'
start_date = '01-01-2015'
end_date = '28-02-2025'
raw_data = get_all_binance_candles(symbol, timeframe, start_date, end_date)
raw_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-08-17 04:00:00,4261.48,4280.56,4261.48,4261.48,2.189061
2017-08-17 04:15:00,4261.48,4270.41,4261.32,4261.45,9.119865
2017-08-17 04:30:00,4280.00,4310.07,4267.99,4310.07,21.923552
2017-08-17 04:45:00,4310.07,4313.62,4291.37,4308.83,13.948531
2017-08-17 05:00:00,4308.83,4328.69,4304.31,4304.31,5.101153
...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.136820
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.904370
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.514270
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.752220


# New Section

In [9]:
data = raw_data.copy()
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-08-17 04:00:00,4261.48,4280.56,4261.48,4261.48,2.189061
2017-08-17 04:15:00,4261.48,4270.41,4261.32,4261.45,9.119865
2017-08-17 04:30:00,4280.00,4310.07,4267.99,4310.07,21.923552
2017-08-17 04:45:00,4310.07,4313.62,4291.37,4308.83,13.948531
2017-08-17 05:00:00,4308.83,4328.69,4304.31,4304.31,5.101153
...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.136820
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.904370
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.514270
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.752220


In [10]:
def add_pivots(df, window=5):
    """
    Calculate the probability of price going up and down based on higher and lower pivots.
    """
    df['Higher_Pivot'] = df['High'].rolling(window=2 * window + 1, center=True).apply(
        lambda x: 1 if x.iloc[window] == x.max() else 0, raw=False
    )
    df['Lower_Pivot'] = df['Low'].rolling(window=2 * window + 1, center=True).apply(
        lambda x: 1 if x.iloc[window] == x.min() else 0, raw=False
    )

    # Step 2: Ensure no two successive pivots of the same type
    pivot_type = None  # Tracks the type of the last pivot
    last_pivot_index = None  # Tracks the index of the last pivot

    # Remove duplicated index
    df = df[~df.index.duplicated(keep='first')]

    for i in range(len(df.index)):
      if df.loc[df.index[i], 'Higher_Pivot'].item() == 1:
          if pivot_type == 'higher':
              # Compare current higher pivot with the last higher pivot
              if df.loc[df.index[i], 'High'].item() > df.loc[df.index[last_pivot_index], 'High'].item():
                  # Remove the last higher pivot
                  df.loc[df.index[last_pivot_index], 'Higher_Pivot'] = 0
                  # Update the last pivot
                  last_pivot_index = i
              else:
                  # Remove the current higher pivot
                  df.loc[df.index[i], 'Higher_Pivot'] = 0
          else:
              # Update pivot type and index
              pivot_type = 'higher'
              last_pivot_index = i

      elif df.loc[df.index[i], 'Lower_Pivot'].item() == 1:
          if pivot_type == 'lower':
              # Compare current lower pivot with the last lower pivot
              if df.loc[df.index[i], 'Low'].item() < df.loc[df.index[last_pivot_index], 'Low'].item():
                  # Remove the last lower pivot
                  df.loc[df.index[last_pivot_index], 'Lower_Pivot'] = 0
                  # Update the last pivot
                  last_pivot_index = i
              else:
                  # Remove the current lower pivot
                  df.loc[df.index[i], 'Lower_Pivot'] = 0
          else:
              # Update pivot type and index
              pivot_type = 'lower'
              last_pivot_index = i
    return df

In [11]:
data = add_pivots(data, 10)

In [12]:
def plot_pivots(df):
  candlestick = go.Candlestick(
      x=df.index,
      open=df['Open'],
      high=df['High'],
      low=df['Low'],
      close=df['Close'],
      name='Candlestick'
  )

  # Step 3: Add markers for higher and lower pivots
  higher_pivots_df = df[df['Higher_Pivot'] == 1]
  lower_pivots_df = df[df['Lower_Pivot'] == 1]

  higher_pivots = go.Scatter(
      x=higher_pivots_df.index,
      y=higher_pivots_df['High'],
      mode='markers',
      marker=dict(color='red', size=10, symbol='triangle-down'),
      name='Higher Pivot'
  )

  lower_pivots = go.Scatter(
      x=lower_pivots_df.index,
      y=lower_pivots_df['Low'],
      mode='markers',
      marker=dict(color='green', size=10, symbol='triangle-up'),
      name='Lower Pivot'
  )

  # Step 4: Create the figure
  fig = go.Figure(data=[candlestick, higher_pivots, lower_pivots])

  # Update layout
  fig.update_layout(
      title='Candlestick Chart with Higher and Lower Pivots',
      xaxis_title='Date',
      yaxis_title='Price',
      xaxis_rangeslider_visible=False,
      template='plotly_dark'
  )

  # Show the plot
  fig.show()

In [13]:
plot_pivots(data[:1000])

In [14]:
data['Pivot'] = np.where(data['Higher_Pivot'] == 1, -1, np.where(data['Lower_Pivot'] == 1, 1, 0))
del data['Higher_Pivot']
del data['Lower_Pivot']
data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-08-17 04:00:00,4261.48,4280.56,4261.48,4261.48,2.189061,0
2017-08-17 04:15:00,4261.48,4270.41,4261.32,4261.45,9.119865,0
2017-08-17 04:30:00,4280.00,4310.07,4267.99,4310.07,21.923552,0
2017-08-17 04:45:00,4310.07,4313.62,4291.37,4308.83,13.948531,0
2017-08-17 05:00:00,4308.83,4328.69,4304.31,4304.31,5.101153,0
...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.136820,0
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.904370,0
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.514270,0
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.752220,0


In [15]:
def calculate_pivot_proximity(df):
    """
    Calculates the pivot proximity using a loop-based approach, finding the closest
    previous and next pivots without generating intermediate lists of all pivots.

    Args:
        df (pd.DataFrame): DataFrame with 'Pivot' and 'Close' columns.

    Returns:
        pd.DataFrame: DataFrame with 'Pivot_Proximity' column added.
    """
    df = df.copy()

    pivot_values = df['Pivot'].values
    close_values = df['Close'].values
    proximity_values = [0.0] * len(df)

    for i in range(len(df)):
      pivot = pivot_values[i]
      if pivot == -1:
        proximity_values[i] = -1.0
      elif pivot == 1:
        proximity_values[i] = 1.0
      else:
        closest_previous_pivot_index = None
        for j in range(i - 1, -1, -1):
          if pivot_values[j] != 0:
            closest_previous_pivot_index = j
            break

        if closest_previous_pivot_index is not None:
          closest_previous_pivot_value = pivot_values[closest_previous_pivot_index]
          closest_previous_close = close_values[closest_previous_pivot_index]

          closest_next_pivot_index = None
          if closest_previous_pivot_value == -1:
            for j in range(i + 1, len(df)):
              if pivot_values[j] == 1:
                closest_next_pivot_index = j
                break
          else:
            for j in range(i + 1, len(df)):
              if pivot_values[j] == -1:
                closest_next_pivot_index = j
                break

          if closest_next_pivot_index is not None:
            closest_next_close = close_values[closest_next_pivot_index]

            distance_to_previous = abs(close_values[i] - closest_previous_close)
            distance_to_next = abs(close_values[i] - closest_next_close)

            if distance_to_previous + distance_to_next != 0:
              if closest_previous_pivot_value == -1:
                proximity_values[i] = (distance_to_previous - distance_to_next) / (distance_to_previous + distance_to_next)
              else:
                proximity_values[i] = (distance_to_next - distance_to_previous) / (distance_to_previous + distance_to_next)

    df['Pivot_Proximity'] = proximity_values
    return df

In [16]:
data = calculate_pivot_proximity(data)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-08-17 04:00:00,4261.48,4280.56,4261.48,4261.48,2.189061,0,0.0
2017-08-17 04:15:00,4261.48,4270.41,4261.32,4261.45,9.119865,0,0.0
2017-08-17 04:30:00,4280.00,4310.07,4267.99,4310.07,21.923552,0,0.0
2017-08-17 04:45:00,4310.07,4313.62,4291.37,4308.83,13.948531,0,0.0
2017-08-17 05:00:00,4308.83,4328.69,4304.31,4304.31,5.101153,0,0.0
...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.136820,0,0.0
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.904370,0,0.0
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.514270,0,0.0
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.752220,0,0.0


In [17]:
def plot_candlestick_with_proximity(df, col='Pivot_Proximity', limit=0.0):
    # Create the candlestick trace
    candlestick = go.Candlestick(
        x=df.index,
        open=df['Open'],
        high=df['High'],
        low=df['Low'],
        close=df['Close'],
        name='Candlesticks',
        increasing=dict(line=dict(color='green'), fillcolor='green'),
        decreasing=dict(line=dict(color='red'), fillcolor='red'),
    )

    # Create a bar trace for Pivot_Proximity coloring
    # Create a color list based on Pivot_Proximity
    colors = []
    for proximity in df[col]:
        if proximity > (0 + limit):
            colors.append('green')  # Positive proximity (closer to lower pivot)
        elif proximity < (0 + (-1 * limit)):
            colors.append('red')    # Negative proximity (closer to higher pivot)
        else:
            colors.append('gray')   # Neutral (middle or no pivot)
    proximity_trace = go.Bar(
        x=df.index,
        y=df['Close'],
        marker=dict(color=colors),
        opacity=0.3,
        name=col,
        yaxis='y2'
    )

    # Create pivot indicators
    higher_pivots_df = df[df[col] == -1]
    lower_pivots_df = df[df[col] == 1]

    higher_pivots = go.Scatter(
        x=higher_pivots_df.index,
        y=higher_pivots_df['High'],
        mode='markers',
        marker=dict(color='red', size=10, symbol='triangle-down'),
        name='Higher Pivot'
    )

    lower_pivots = go.Scatter(
        x=lower_pivots_df.index,
        y=lower_pivots_df['Low'],
        mode='markers',
        marker=dict(color='green', size=10, symbol='triangle-up'),
        name='Lower Pivot'
    )

    # Create the figure
    fig = go.Figure(data=[
        candlestick,
        higher_pivots,
        lower_pivots,
        proximity_trace
        ])

    # Update layout
    fig.update_layout(
        title='Candlestick Chart with Pivot Proximity',
        xaxis_title='Date',
        yaxis_title='Price',
        yaxis2=dict(title='Pivot Proximity', overlaying='y', side='right'),
        template='plotly_dark',
        showlegend=True,
        xaxis_rangeslider_visible=False,
    )

    # Show the figure
    fig.show()

In [18]:
plot_candlestick_with_proximity(data[0:500], 'Pivot_Proximity', 0.7)

In [19]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-08-17 04:00:00,4261.48,4280.56,4261.48,4261.48,2.189061,0,0.0
2017-08-17 04:15:00,4261.48,4270.41,4261.32,4261.45,9.119865,0,0.0
2017-08-17 04:30:00,4280.00,4310.07,4267.99,4310.07,21.923552,0,0.0
2017-08-17 04:45:00,4310.07,4313.62,4291.37,4308.83,13.948531,0,0.0
2017-08-17 05:00:00,4308.83,4328.69,4304.31,4304.31,5.101153,0,0.0
...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.136820,0,0.0
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.904370,0,0.0
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.514270,0,0.0
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.752220,0,0.0


In [20]:
z = ta.bbands(data['Close'])
z

Unnamed: 0_level_0,BBL_5_2.0,BBM_5_2.0,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-08-17 04:00:00,,,,,
2017-08-17 04:15:00,,,,,
2017-08-17 04:30:00,,,,,
2017-08-17 04:45:00,,,,,
2017-08-17 05:00:00,4243.729315,4289.228,4334.726685,2.121533,0.665741
...,...,...,...,...,...
2025-02-27 23:00:00,83848.249467,84346.110,84843.970533,1.180518,0.706825
2025-02-27 23:15:00,83925.986398,84426.678,84927.369602,1.186098,0.665293
2025-02-27 23:30:00,84209.558479,84555.218,84900.877521,0.817595,0.687297
2025-02-27 23:45:00,84518.203266,84650.774,84783.344734,0.313218,0.718019


In [21]:
z.columns[1]

'BBM_5_2.0'

In [22]:
features_columns = []

def add_scaled_rsi(df, window=14, prefix = ''):
  series = ta.rsi(df['Close'], length=window)
  column_name = f"{prefix}RSI_{window}"
  df[column_name] = series / 100
  if(column_name not in features_columns):
    features_columns.append(column_name)
  return df

def add_scaled_ema(df, window=50, prefix = ''):
  series = ta.ema(df['Close'], length=window)
  column_name = f"{prefix}EMA_{window}"
  df[column_name] = series / df['Close']
  if column_name not in features_columns:
    features_columns.append(column_name)
  return df

def add_scaled_sma(df, window=50, prefix = ''):
  series = ta.sma(df['Close'], length=window)
  column_name = f"{prefix}SMA_{window}"
  df[column_name] = series / df['Close']
  if column_name not in features_columns:
    features_columns.append(column_name)
  return df

def add_scaled_macd(df, prefix= '', fast = 12, slow = 26, signal=9):
  macd_df = ta.macd(df['Close'], fast=fast, slow=slow, signal=signal)
  suffix = f"{fast}_{slow}_{signal}"
  macd_column_name = f"{prefix}MACD_{suffix}"
  macds_column_name = f"{prefix}MACDs_{suffix}"
  macdh_column_name = f"{prefix}MACDh_{suffix}"
  df[macd_column_name] = macd_df[macd_df.columns[0]] / df['Close']
  df[macds_column_name] = macd_df[macd_df.columns[2]] / df['Close']
  df[macdh_column_name] = macd_df[macd_df.columns[1]] / df['Close']
  if macd_column_name not in features_columns:
    features_columns.append(macd_column_name)
  if macds_column_name not in features_columns:
    features_columns.append(macds_column_name)
  if macdh_column_name not in features_columns:
    features_columns.append(macdh_column_name)
  return df

def add_scaled_bbands(df, window=20, std=2.0, prefix= ''):
  bbands_df = ta.bbands(df['Close'], length=window, std=std)
  suffix = f"{window}_{std}"
  bbl_column_name = f"{prefix}BBL_{suffix}"
  bbm_column_name = f"{prefix}BBM_{suffix}"
  bbu_column_name = f"{prefix}BBU_{suffix}"
  bbb_column_name = f"{prefix}BBB_{suffix}"
  bbp_column_name = f"{prefix}BBP_{suffix}"
  df[bbl_column_name] = bbands_df[bbands_df.columns[0]] / df['Close']
  df[bbm_column_name] = bbands_df[bbands_df.columns[1]] / df['Close']
  df[bbu_column_name] = bbands_df[bbands_df.columns[2]] / df['Close']
  df[bbb_column_name] = bbands_df[bbands_df.columns[3]]
  df[bbp_column_name] = bbands_df[bbands_df.columns[4]]
  if bbl_column_name not in features_columns: features_columns.append(bbl_column_name)
  if bbm_column_name not in features_columns: features_columns.append(bbm_column_name)
  if bbu_column_name not in features_columns: features_columns.append(bbu_column_name)
  if bbb_column_name not in features_columns: features_columns.append(bbb_column_name)
  if bbp_column_name not in features_columns: features_columns.append(bbp_column_name)
  return df

In [23]:
# # Add technical indicators
lower_timeframe = timeframe
lower_timeframe_prefix = f"{lower_timeframe}_"
add_scaled_rsi(data, 14, prefix=lower_timeframe_prefix)
add_scaled_rsi(data, 6, prefix=lower_timeframe_prefix)
add_scaled_ema(data, 5, prefix=lower_timeframe_prefix)
add_scaled_ema(data, 21, prefix=lower_timeframe_prefix)
add_scaled_sma(data, 50, prefix=lower_timeframe_prefix)
add_scaled_macd(data, prefix=lower_timeframe_prefix)
add_scaled_bbands(data, prefix=lower_timeframe_prefix)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,15m_EMA_21,15m_SMA_50,15m_MACD_12_26_9,15m_MACDs_12_26_9,15m_MACDh_12_26_9,15m_BBL_20_2.0,15m_BBM_20_2.0,15m_BBU_20_2.0,15m_BBB_20_2.0,15m_BBP_20_2.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2017-08-17 04:00:00,4261.48,4280.56,4261.48,4261.48,2.189061,0,0.0,,,,,,,,,,,,,
2017-08-17 04:15:00,4261.48,4270.41,4261.32,4261.45,9.119865,0,0.0,,,,,,,,,,,,,
2017-08-17 04:30:00,4280.00,4310.07,4267.99,4310.07,21.923552,0,0.0,,,,,,,,,,,,,
2017-08-17 04:45:00,4310.07,4313.62,4291.37,4308.83,13.948531,0,0.0,,,,,,,,,,,,,
2017-08-17 05:00:00,4308.83,4328.69,4304.31,4304.31,5.101153,0,0.0,,,0.996496,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.136820,0,0.0,0.522309,0.624015,0.998218,0.996985,1.006491,-0.001826,-0.003695,0.001870,0.982790,0.994341,1.005892,2.323422,0.744955
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.904370,0,0.0,0.527919,0.635524,0.998496,0.996829,1.005441,-0.001347,-0.003224,0.001877,0.982314,0.993877,1.005439,2.326750,0.764784
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.514270,0,0.0,0.541285,0.663960,0.998270,0.996127,1.003851,-0.000869,-0.002750,0.001881,0.981310,0.992750,1.004190,2.304696,0.816875
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.752220,0,0.0,0.544868,0.671890,0.998659,0.996224,1.003111,-0.000463,-0.002292,0.001829,0.980974,0.992511,1.004047,2.324784,0.824588


In [24]:
features_columns

['15m_RSI_14',
 '15m_RSI_6',
 '15m_EMA_5',
 '15m_EMA_21',
 '15m_SMA_50',
 '15m_MACD_12_26_9',
 '15m_MACDs_12_26_9',
 '15m_MACDh_12_26_9',
 '15m_BBL_20_2.0',
 '15m_BBM_20_2.0',
 '15m_BBU_20_2.0',
 '15m_BBB_20_2.0',
 '15m_BBP_20_2.0']

In [25]:
def resample_candles(df, interval='1h'):
    return df.resample(interval).agg({'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum'}).ffill()

In [26]:
middle_timeframe = '1h'
middle_timeframe_prefix = f"{middle_timeframe}_"
middle_data = resample_candles(data, middle_timeframe)
add_scaled_rsi(middle_data, 14, prefix=middle_timeframe_prefix)
add_scaled_rsi(middle_data, 6, prefix=middle_timeframe_prefix)
add_scaled_ema(middle_data, 5, prefix=middle_timeframe_prefix)
add_scaled_ema(middle_data, 21, prefix=middle_timeframe_prefix)
add_scaled_macd(middle_data, prefix=middle_timeframe_prefix)
add_scaled_bbands(middle_data, prefix=middle_timeframe_prefix)
middle_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,1h_RSI_14,1h_RSI_6,1h_EMA_5,1h_EMA_21,1h_MACD_12_26_9,1h_MACDs_12_26_9,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-08-17 04:00:00,4261.48,4313.62,4261.32,4308.83,47.181009,,,,,,,,,,,,
2017-08-17 05:00:00,4308.83,4328.69,4291.37,4315.32,23.234916,,,,,,,,,,,,
2017-08-17 06:00:00,4330.29,4345.45,4309.37,4324.35,7.229691,,,,,,,,,,,,
2017-08-17 07:00:00,4316.62,4349.99,4287.41,4349.99,4.443249,,,,,,,,,,,,
2017-08-17 08:00:00,4333.32,4377.85,4333.32,4360.69,0.972807,,,0.993383,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 20:00:00,84113.98,84164.97,82716.49,83524.98,3204.301910,0.327430,0.217424,1.009057,1.021438,-0.007418,-0.005666,-1.752023e-03,1.000023,1.022504,1.044986,4.397252,-0.000521
2025-02-27 21:00:00,83522.02,84433.97,83223.74,84409.18,1878.951270,0.414654,0.431693,0.998991,1.009762,-0.007269,-0.005939,-1.330089e-03,0.988979,1.011584,1.034188,4.469106,0.243770
2025-02-27 22:00:00,84409.18,84758.63,83881.08,84716.34,994.537450,0.441739,0.489913,0.996913,1.005546,-0.006815,-0.006097,-7.184214e-04,0.986112,1.008165,1.030218,4.374853,0.314885
2025-02-27 23:00:00,84716.01,84927.84,84422.02,84708.58,789.307680,0.441184,0.488396,0.998003,1.005126,-0.006411,-0.006160,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980


In [27]:
# add 4h data
higher_timeframe = '4h'
higher_timeframe_prefix = f"{higher_timeframe}_"
higher_data = resample_candles(data, higher_timeframe)
add_scaled_rsi(higher_data, 14, prefix=higher_timeframe_prefix)
add_scaled_rsi(higher_data, 6, prefix=higher_timeframe_prefix)
add_scaled_ema(higher_data, 5, prefix=higher_timeframe_prefix)
add_scaled_ema(higher_data, 21, prefix=higher_timeframe_prefix)
higher_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-08-17 04:00:00,4261.48,4349.99,4261.32,4349.99,82.088865,,,,
2017-08-17 08:00:00,4333.32,4485.39,4333.32,4427.30,63.619882,,,,
2017-08-17 12:00:00,4436.06,4485.39,4333.42,4352.34,174.562001,,,,
2017-08-17 16:00:00,4352.33,4354.84,4200.74,4325.23,225.109716,,,,
2017-08-17 20:00:00,4307.56,4369.69,4258.56,4285.08,249.769913,,,1.014681,
...,...,...,...,...,...,...,...,...,...
2025-02-27 08:00:00,86272.01,87078.46,85846.87,86698.60,4102.588270,0.354086,0.453377,0.994006,1.027267
2025-02-27 12:00:00,86698.60,86795.90,84640.75,85500.84,12382.790750,0.319968,0.362436,1.005287,1.037871
2025-02-27 16:00:00,85500.84,85691.51,83614.75,84113.98,6757.934590,0.285648,0.283440,1.014575,1.049984
2025-02-27 20:00:00,84113.98,84927.84,82716.49,84708.58,6867.098310,0.319357,0.355691,1.004969,1.038740


In [28]:
features_columns

['15m_RSI_14',
 '15m_RSI_6',
 '15m_EMA_5',
 '15m_EMA_21',
 '15m_SMA_50',
 '15m_MACD_12_26_9',
 '15m_MACDs_12_26_9',
 '15m_MACDh_12_26_9',
 '15m_BBL_20_2.0',
 '15m_BBM_20_2.0',
 '15m_BBU_20_2.0',
 '15m_BBB_20_2.0',
 '15m_BBP_20_2.0',
 '1h_RSI_14',
 '1h_RSI_6',
 '1h_EMA_5',
 '1h_EMA_21',
 '1h_MACD_12_26_9',
 '1h_MACDs_12_26_9',
 '1h_MACDh_12_26_9',
 '1h_BBL_20_2.0',
 '1h_BBM_20_2.0',
 '1h_BBU_20_2.0',
 '1h_BBB_20_2.0',
 '1h_BBP_20_2.0',
 '4h_RSI_14',
 '4h_RSI_6',
 '4h_EMA_5',
 '4h_EMA_21']

In [29]:
def merge_candlesticks_data(df1, df2):
  """
  Merges two candlestick DataFrames with forward fill, handling different timeframes,
  and prevents duplicate OHLCV columns.
  Ensure both DataFrames have a datetime index.
  And ensure the second DataFrame is the larger timeframe

  Args:
      df1: First candlestick DataFrame with datetime index.
      df2: Second candlestick DataFrame with datetime index.

  Returns:
      Merged DataFrame with forward-filled values, and no duplicate OHLCV columns.
  """
  # Ensure both DataFrames have a datetime index
  if not isinstance(df1.index, pd.DatetimeIndex) or not isinstance(df2.index, pd.DatetimeIndex):
      raise ValueError("DataFrames must have a datetime index.")
  # Identify OHLCV columns
  ohlcv_cols = ['open', 'high', 'low', 'close', 'volume']

  # Rename columns in df2 that conflict with df1's OHLCV columns
  for col in df2.columns:
    if col.lower() in ohlcv_cols and col.lower() in df1.columns.str.lower():
      del df2[col]

  # Merge the DataFrames using outer join, which preserves all dates
  merged_df = pd.merge(df1, df2, how='outer', left_index=True, right_index=True, suffixes=('_df1', '_df2'))

  # Forward fill the missing values for each column
  for col in merged_df.columns:
    merged_df[col] = merged_df[col].ffill()

  return merged_df

In [30]:
data = merge_candlesticks_data(data, middle_data)
data = merge_candlesticks_data(data, higher_data)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-08-17 04:00:00,4261.48,4280.56,4261.48,4261.48,2.189061,0.0,0.0,,,,...,,,,,,,,,,
2017-08-17 04:15:00,4261.48,4270.41,4261.32,4261.45,9.119865,0.0,0.0,,,,...,,,,,,,,,,
2017-08-17 04:30:00,4280.00,4310.07,4267.99,4310.07,21.923552,0.0,0.0,,,,...,,,,,,,,,,
2017-08-17 04:45:00,4310.07,4313.62,4291.37,4308.83,13.948531,0.0,0.0,,,,...,,,,,,,,,,
2017-08-17 05:00:00,4308.83,4328.69,4304.31,4304.31,5.101153,0.0,0.0,,,0.996496,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.136820,0.0,0.0,0.522309,0.624015,0.998218,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.904370,0.0,0.0,0.527919,0.635524,0.998496,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.514270,0.0,0.0,0.541285,0.663960,0.998270,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.752220,0.0,0.0,0.544868,0.671890,0.998659,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740


In [31]:
# Drop rows with NaN values (due to rolling calculations)
data.dropna(inplace=True)
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-08-20 12:00:00,4106.53,4125.43,4088.09,4125.43,1.887378,0.0,0.011544,0.482216,0.473387,0.998519,...,8.797011e-04,0.982763,1.000211,1.017659,3.488878,0.493963,0.439101,0.504541,0.997276,1.015345
2017-08-20 12:15:00,4111.66,4111.66,4080.00,4080.00,0.020565,0.0,0.734894,0.377117,0.278992,1.006425,...,8.797011e-04,0.982763,1.000211,1.017659,3.488878,0.493963,0.439101,0.504541,0.997276,1.015345
2017-08-20 12:30:00,4080.00,4125.43,4080.00,4096.33,0.790217,0.0,0.474883,0.425580,0.387487,1.001609,...,8.797011e-04,0.982763,1.000211,1.017659,3.488878,0.493963,0.439101,0.504541,0.997276,1.015345
2017-08-20 12:45:00,4125.00,4125.43,4108.47,4108.47,0.067680,0.0,0.281586,0.459263,0.459979,0.999099,...,8.797011e-04,0.982763,1.000211,1.017659,3.488878,0.493963,0.439101,0.504541,0.997276,1.015345
2017-08-20 13:00:00,4125.00,4125.00,4125.00,4125.00,0.012544,0.0,0.018390,0.502077,0.547487,0.996730,...,-3.327751e-04,0.995801,1.011760,1.027719,3.154664,0.131555,0.439101,0.504541,0.997276,1.015345
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.136820,0.0,0.000000,0.522309,0.624015,0.998218,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.904370,0.0,0.000000,0.527919,0.635524,0.998496,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.514270,0.0,0.000000,0.541285,0.663960,0.998270,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.752220,0.0,0.000000,0.544868,0.671890,0.998659,...,-2.506865e-04,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740


In [32]:
# delete Pivot_Porximity that have zero at the beginning and ending of the dataframe
non_zero_indices = data[data['Pivot_Proximity'] != 0].index
non_zero_at_begining = non_zero_indices[0]
non_zero_at_end = non_zero_indices[-1]
data = data.loc[non_zero_at_begining:non_zero_at_end]
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-08-20 12:00:00,4106.53,4125.43,4088.09,4125.43,1.887378,0.0,0.011544,0.482216,0.473387,0.998519,...,0.000880,0.982763,1.000211,1.017659,3.488878,0.493963,0.439101,0.504541,0.997276,1.015345
2017-08-20 12:15:00,4111.66,4111.66,4080.00,4080.00,0.020565,0.0,0.734894,0.377117,0.278992,1.006425,...,0.000880,0.982763,1.000211,1.017659,3.488878,0.493963,0.439101,0.504541,0.997276,1.015345
2017-08-20 12:30:00,4080.00,4125.43,4080.00,4096.33,0.790217,0.0,0.474883,0.425580,0.387487,1.001609,...,0.000880,0.982763,1.000211,1.017659,3.488878,0.493963,0.439101,0.504541,0.997276,1.015345
2017-08-20 12:45:00,4125.00,4125.43,4108.47,4108.47,0.067680,0.0,0.281586,0.459263,0.459979,0.999099,...,0.000880,0.982763,1.000211,1.017659,3.488878,0.493963,0.439101,0.504541,0.997276,1.015345
2017-08-20 13:00:00,4125.00,4125.00,4125.00,4125.00,0.012544,0.0,0.018390,0.502077,0.547487,0.996730,...,-0.000333,0.995801,1.011760,1.027719,3.154664,0.131555,0.439101,0.504541,0.997276,1.015345
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 19:30:00,84026.42,84026.42,83614.75,83938.01,623.486270,0.0,0.685986,0.325334,0.245913,1.003426,...,-0.001088,0.995850,1.016068,1.036286,3.979673,0.102629,0.285648,0.283440,1.014575,1.049984
2025-02-27 19:45:00,83937.33,84203.53,83722.23,84113.98,446.614240,0.0,0.581060,0.361770,0.349479,1.000885,...,-0.001088,0.995850,1.016068,1.036286,3.979673,0.102629,0.285648,0.283440,1.014575,1.049984
2025-02-27 20:00:00,84113.98,84164.97,83052.00,83219.97,899.636010,0.0,0.897560,0.279256,0.190213,1.007758,...,-0.001752,1.000023,1.022504,1.044986,4.397252,-0.000521,0.319357,0.355691,1.004969,1.038740
2025-02-27 20:15:00,83219.97,83307.57,82831.36,82923.01,833.690400,0.0,0.774473,0.258190,0.160973,1.007578,...,-0.001752,1.000023,1.022504,1.044986,4.397252,-0.000521,0.319357,0.355691,1.004969,1.038740


In [33]:
data[features_columns].describe()

Unnamed: 0,15m_RSI_14,15m_RSI_6,15m_EMA_5,15m_EMA_21,15m_SMA_50,15m_MACD_12_26_9,15m_MACDs_12_26_9,15m_MACDh_12_26_9,15m_BBL_20_2.0,15m_BBM_20_2.0,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
count,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0,...,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0,263420.0
mean,0.505371,0.506152,0.999992,0.999963,0.999901,2.8e-05,2.9e-05,-1.450609e-06,0.989891,0.999967,...,2e-06,0.979153,0.999829,1.020505,4.131243,0.514063,0.514151,0.514914,0.99986,0.999309
std,0.111119,0.164499,0.003651,0.008516,0.015264,0.004316,0.004047,0.001323846,0.013544,0.00972,...,0.002541,0.02471,0.018917,0.028144,3.652772,0.334744,0.132856,0.187941,0.013658,0.03366
min,0.042269,0.006777,0.881157,0.869566,0.82369,-0.114603,-0.091486,-0.02631208,0.69766,0.847431,...,-0.03794,0.680883,0.817549,0.932431,2.85257e-18,-0.575809,0.053691,0.007924,0.888935,0.798163
25%,0.436266,0.394804,0.998824,0.997072,0.994279,-0.001343,-0.001274,-0.0004890957,0.987434,0.996699,...,-0.001067,0.972288,0.992302,1.005162,1.728972,0.266368,0.424045,0.382841,0.994319,0.982654
50%,0.506157,0.508165,0.999945,0.999807,0.999659,0.0001,9.6e-05,3.208597e-07,0.993927,0.999815,...,-7e-06,0.986862,0.99953,1.012029,3.051393,0.525737,0.511002,0.516111,0.999584,0.998423
75%,0.573473,0.618215,1.001053,1.002506,1.004883,0.001597,0.001532,0.0004960272,0.997331,1.002896,...,0.001065,0.994315,1.006412,1.025506,5.270096,0.764667,0.604868,0.648263,1.004639,1.01295
max,0.984326,0.997805,1.107652,1.274927,1.453253,0.046615,0.040588,0.02399701,1.084521,1.304489,...,0.025451,1.130515,1.466192,1.882351,56.76729,1.589725,0.949899,0.984297,1.242355,1.50797


In [34]:
def create_sequences(data, features_columns, target_col, seq_length):
    """
    Create sequences of `seq_length` time steps for LSTM input, optimized for performance.
    """
    num_samples = len(data) - seq_length
    num_features = len(features_columns)

    X = np.zeros((num_samples, seq_length, num_features))
    y = np.zeros(num_samples)

    features_data = data[features_columns].values
    target_data = data[target_col].values

    for i in range(num_samples):
        X[i] = features_data[i:i + seq_length]
        y[i] = target_data[i + seq_length]

    return X, y

In [35]:
def calc_class_ratios(df, column):
  class_counts = df[column].value_counts()
  total_count = len(df)
  class_ratios = class_counts / total_count
  return class_ratios.to_dict()

def class_ratio_to_class_weights(class_ratios):
  class_weights = {}
  for key, value in class_ratios.items():
    class_weights[key] = 1 / value
  return class_weights

class_ratios = calc_class_ratios(data, 'Pivot')
class_weights = class_ratio_to_class_weights(class_ratios)
class_weights

{0.0: 1.058706739599619, 1.0: 36.00109334426678, -1.0: 36.13443072702332}

In [36]:
# select only first 80 % of the data
training_data_ratio = 0.8 # 80%
training_data = data[:int(len(data) * training_data_ratio)]
training_data_start_date = training_data.index[0]
training_data_end_date = training_data.index[-1]
training_data.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Pivot,Pivot_Proximity,15m_RSI_14,15m_RSI_6,15m_EMA_5,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-08-28 23:15:00,26074.41,26178.83,26074.4,26132.46,470.21982,0.0,-0.940498,0.636244,0.840896,0.997851,...,5.1e-05,0.990836,0.996934,1.003032,1.223389,0.751383,0.516433,0.586514,0.997668,0.997957
2023-08-28 23:30:00,26132.46,26140.63,26104.33,26118.21,169.30577,0.0,-0.876222,0.606474,0.751106,0.998931,...,5.1e-05,0.990836,0.996934,1.003032,1.223389,0.751383,0.516433,0.586514,0.997668,0.997957
2023-08-28 23:45:00,26118.22,26127.88,26111.95,26120.0,98.55972,0.0,-0.899718,0.608949,0.755049,0.999241,...,5.1e-05,0.990836,0.996934,1.003032,1.223389,0.751383,0.516433,0.586514,0.997668,0.997957
2023-08-29 00:00:00,26120.0,26135.2,26110.02,26122.87,129.2069,0.0,-0.937389,0.613151,0.762294,0.999421,...,0.000218,0.99006,0.996372,1.002685,1.267069,0.787354,0.497624,0.535356,0.999384,0.999423
2023-08-29 00:15:00,26122.88,26132.38,26092.91,26092.91,115.61893,0.0,-0.544136,0.547073,0.556218,1.000379,...,0.000218,0.99006,0.996372,1.002685,1.267069,0.787354,0.497624,0.535356,0.999384,0.999423
2023-08-29 00:30:00,26092.92,26121.82,26084.07,26121.82,155.61425,0.0,-0.923607,0.592688,0.662018,0.999515,...,0.000218,0.99006,0.996372,1.002685,1.267069,0.787354,0.497624,0.535356,0.999384,0.999423
2023-08-29 00:45:00,26121.82,26165.99,26121.81,26140.44,217.36743,0.0,-0.856156,0.619283,0.714605,0.999202,...,0.000218,0.99006,0.996372,1.002685,1.267069,0.787354,0.497624,0.535356,0.999384,0.999423
2023-08-29 01:00:00,26140.44,26149.73,26105.78,26116.64,121.00872,0.0,-0.855615,0.568213,0.576922,1.000075,...,0.000225,0.991725,0.997948,1.004171,1.247218,0.66485,0.497624,0.535356,0.999384,0.999423
2023-08-29 01:15:00,26116.65,26206.24,26116.64,26127.64,474.85079,-1.0,-1.0,0.585237,0.617767,0.999769,...,0.000225,0.991725,0.997948,1.004171,1.247218,0.66485,0.497624,0.535356,0.999384,0.999423
2023-08-29 01:30:00,26127.64,26153.48,26127.0,26134.31,230.54611,0.0,-0.920491,0.595648,0.642856,0.999676,...,0.000225,0.991725,0.997948,1.004171,1.247218,0.66485,0.497624,0.535356,0.999384,0.999423


In [37]:
seq_length = 100
target_col = 'Pivot_Proximity'

# Create sequences
X, y = create_sequences(training_data, features_columns, target_col, seq_length)

In [38]:
print(X.shape)
print(y.shape)

(210636, 100, 29)
(210636,)


In [39]:
# Split into training and testing sets
train_validate_split_ratio = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=train_validate_split_ratio, shuffle=False)

In [40]:
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

X_train shape: (168508, 100, 29)
y_train shape: (168508,)
X_test shape: (42128, 100, 29)
y_test shape: (42128,)


In [41]:
# since the class weights index are now +1 to categorise dat
# adding one to the key of class weights
class_weights = {k+1: v for k, v in class_weights.items()}
class_weights

{1.0: 1.058706739599619, 2.0: 36.00109334426678, 0.0: 36.13443072702332}

In [42]:
# Step 4: Build the LSTM model
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(50, activation='relu'),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dropout(0.2),
    Dense(10, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='tanh')
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [None]:
# Step 5: Train the model
train_epochs = 12
history = model.fit(
    X_train, y_train,
    epochs=train_epochs,
    batch_size=32,
    validation_data=(X_test, y_test),
    verbose=1,
    # class_weight=class_weights
)

In [None]:
# Step 6: Evaluate the model
loss, mse = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test MSE: {mse:.4f}")

In [None]:
test_data = data[int(len(data) * training_data_ratio):]
test_data.head(10)

In [None]:
def predict_in_batches(model, test_data, features_columns, seq_length, batch_size=128):
    """
    Predicts in batches to speed up inference.
    """
    num_samples = len(test_data) - seq_length
    feature_array = test_data[features_columns].values
    predicted_values = np.full(len(test_data), np.nan)

    for start_idx in range(0, num_samples, batch_size):
        end_idx = min(start_idx + batch_size, num_samples)
        batch_indices = range(start_idx + seq_length, end_idx + seq_length)
        batch_input = np.array([feature_array[i - seq_length:i] for i in batch_indices])

        if len(batch_input) > 0 :
            predictions = model.predict(batch_input, verbose=0)
            predicted_values[batch_indices] = predictions.flatten() #Flatten to 1d array.

    test_data['Predicted_Value'] = predicted_values
    return test_data

In [None]:
# Predict in batches
batch_size = 128
predict_in_batches(model, test_data, features_columns, seq_length, batch_size)

In [None]:
test_data[['Open', 'High', 'Low', 'Close', 'Pivot', 'Predicted_Value']]

In [None]:
test_data.info()

In [None]:
test_data['Predicted_Value']

In [None]:
test_data.dropna(inplace=True)
test_data

In [None]:
plot_candlestick_with_proximity(test_data, 'Predicted_Value', 0.5)

In [None]:
trade_leverage = 10
trade_margin = 1000
trade_buy_threshold = 0.6
trade_sell_threshold = -0.6
broker_commision = 0.002

In [None]:
class PredictedValueStrategy(bt.Strategy):
    params = (
        ('buy_threshold', trade_buy_threshold),
        ('sell_threshold', trade_sell_threshold),
        ('leverage', trade_leverage),  # Leverage ratio
        ('margin', trade_margin)
    )

    def __init__(self):
        # To keep track of pending orders and buy price/commission
        self.order = None
        self.buyprice = None
        self.buycomm = None

        # Add the Predicted_Value as a data feed
        self.predicted_value = self.datas[0].predicted_value

         # Set leverage
        # self.broker.set_leverage(self.params.leverage)

    def next(self):
        # Check if an order is pending ... if yes, we cannot send a 2nd one
        if self.order:
            return

        # # Check if we are in the market
        # if not self.position:
        #   # Buy signal
        #   if self.predicted_value[0] > self.params.buy_threshold:
        #     self.log('BUY CREATE, %.2f' % self.datas[0].close[0])
        #     self.order = self.buy()
        # else:
        #   # Sell signal
        #   if self.predicted_value[0] < self.params.sell_threshold:
        #     self.log('SELL CREATE, %.2f' % self.datas[0].close[0])
        #     self.order = self.sell()



         # # Check if we are in the market
        if not self.position:
          cash = self.broker.getcash()
          position_size = (self.params.margin * self.params.leverage) / self.data.close[0]
          # Long signal
          if self.predicted_value[0] > self.params.buy_threshold:
            self.log('LONG POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.buy(size=position_size)

          # Short signal
          elif self.predicted_value[0] < self.params.sell_threshold:
            self.log('SHORT POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.sell(size=position_size)

        else:
          if self.predicted_value[0] > self.params.buy_threshold and self.position.size < 0:
            self.log('CLOSE SHORT POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.close()
          elif self.predicted_value[0] < self.params.sell_threshold and self.position.size > 0:
            self.log('CLOSE LONG POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.close()

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            if order.status == order.Canceled:
                self.log('Order Canceled')
            elif order.status == order.Margin:
                self.log(f'Order Margin - Available cash: {self.broker.getcash()}')
            elif order.status == order.Rejected:
                self.log('Order Rejected')

        # Write down: no pending order
        self.order = None

    def notify_trade(self, trade):
        if not trade.isclosed:
            return
        self.log(f'TRADE COMPLETED, GROSS {trade.pnl:.2f}, NET {trade.pnlcomm:.2f}, Available Cash {self.broker.getcash():.2f}')

    def log(self, txt, dt=None):
        dt = dt or self.datas[0].datetime.date(0)
        time = self.datas[0].datetime.time()
        print(f'{dt.isoformat()} {time.isoformat()}, {txt}')


In [None]:
test_data.index[0].date

In [None]:
# Extend PandasData to include the custom column
class CustomPandasData(bt.feeds.PandasData):
    # Add custom columns
    lines = ('predicted_value',)  # Add the custom line
    params = (
        ('predicted_value', 'Predicted_Value'),  # Map the column name
    )


# Load your data into a Pandas DataFrame
backtest_data = CustomPandasData(
    dataname=test_data,
    datetime=None,  # Use the index as the datetime
    open='Open',         # Column index for Open
    high='High',         # Column index for High
    low='Low',          # Column index for Low
    close='Close',        # Column index for Close
    volume='Volume',       # Column index for Volume
    openinterest=None,# No open interest column
    predicted_value='Predicted_Value'  # Column index for Predicted_Value
)

# Create a Cerebro engine instance
cerebro = bt.Cerebro()

# Add the strategy
cerebro.addstrategy(PredictedValueStrategy)

# Add the data feed
cerebro.adddata(backtest_data)

# Set the initial cash
cerebro.broker.set_cash(1000.0)

# Set the commission
cerebro.broker.setcommission(commission=broker_commision)

# Add analyzers
cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe')
cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='tradeanalyzer')
cerebro.addanalyzer(bt.analyzers.Returns, _name='returns')
cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')

# Run the backtest
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
backtest_result = cerebro.run()
print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())

In [None]:
strat = backtest_result[0]

# Print analyzers
print(f"Sharpe Ratio: {strat.analyzers.sharpe.get_analysis()}")
print(f"Max Drawdown: {strat.analyzers.drawdown.get_analysis()}")

In [None]:
print(strat.analyzers.drawdown.get_analysis())

In [None]:
cerebro.plot(start=len(test_data)-1000)

In [None]:
(strat.analyzers.pyfolio.get_analysis()).keys()

In [None]:
def make_dict_even(data):
    """
    Recursively makes all numeric values in a dictionary even.

    Args:
        data (dict): The input dictionary (can have nested dictionaries or lists).

    Returns:
        dict: A new dictionary with all numeric values made even.
    """
    if isinstance(data, dict):
        new_dict = {}
        for key, value in data.items():
            new_dict[key] = make_dict_even(value)
        return new_dict
    elif isinstance(data, list):
        return [make_dict_even(item) for item in data]
    elif isinstance(data, (int, float)):
        if isinstance(data, int):
            return data if data % 2 == 0 else data + 1
        else: # float. We will round to an int, and then make even.
            int_value = round(data)
            return int_value if int_value % 2 == 0 else int_value + 1

    else:
        return data  # Return non-numeric values as they are

In [None]:
json.dumps(make_dict_even(strat.analyzers.tradeanalyzer.get_analysis()))

In [None]:
(strat.analyzers.pyfolio.get_analysis())['gross_lev']

In [None]:
k = (
    ('key', 2),
)

In [None]:
k.key