<a href="https://colab.research.google.com/github/ifeLight/ml-bot/blob/main/binance-multi-timeframe-grade-uncover.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas-ta
!pip install backtrader[plotting]
!pip install plotly
!pip install --upgrade firebase-admin

Collecting pandas-ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m112.6/115.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pandas-ta
  Building wheel for pandas-ta (setup.py) ... [?25l[?25hdone
  Created wheel for pandas-ta: filename=pandas_ta-0.3.14b0-py3-none-any.whl size=218909 sha256=1a703f32af80aac0da30a654f0e01ece3560c1a9c5e5112b69eef2b2c5b49426
  Stored in directory: /root/.cache/pip/wheels/7f/33/8b/50b245c5c65433cd8f5cb24ac15d97e5a3db2d41a8b6ae957d
Successfully built pandas-ta
Installing collected packages: pandas-ta
Successfully installed pandas-ta-0.3.14b0
Collecting b

In [36]:
import numpy as np
import pandas as pd
import datetime
import requests
import json
import os
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import backtrader as bt
import pandas_ta as ta
from imblearn.over_sampling import SMOTE
import subprocess
import firebase_admin
from firebase_admin import firestore
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
from google.colab import auth
import google.auth
import itertools
import hashlib
import re

In [3]:
auth.authenticate_user()
#Configure Google cloud project
project_id = 'ifelight'
!gcloud config set project {project_id}

Updated property [core/project].


In [4]:
# Working GCP Bucket name
bucket_name = 'ife-storage'
# Working on Firestore name
firestore_collection_name = 'trade-models'

In [5]:
binance_base_url = 'https://52on3577u3.execute-api.eu-central-1.amazonaws.com'

def get_binance_candles(symbol: str, interval='1h', limit: int = 50, **kwargs):
    url = f'{binance_base_url}/api/v3/uiKlines?symbol={symbol}&interval={interval}&limit={limit}'
    for key, value in kwargs.items():
        url += f'&{key}={value}'
    response = requests.get(url)
    result = json.loads(response.text)
    # print(result)
    def map_result(x):
        return {
            'Date': x[0],
            'Open': x[1],
            'High': x[2],
            'Low': x[3],
            'Close': x[4],
            'Volume': x[5],
        }
    mappeded_result = []
    for x in result:
        mappeded_result.append(map_result(x))
    return mappeded_result


def candles_to_df(data):
    df =  pd.DataFrame(data)
    df['Date'] = pd.to_datetime(df['Date'], unit='ms')
    df['Open'] = df['Open'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Close'] = df['Close'].astype(float)
    df['Volume'] = df['Volume'].astype(float)
    df.set_index('Date', inplace=True)
    return df

def get_all_binance_candles(symbol: str, interval='1h', start_date=None, end_date=None, limit=1000):
  try:
    return load_candles_from_cloud_storage(symbol, interval, start_date, end_date)
  except FileNotFoundError:
    pass
  result = []
  raw_start_date = start_date
  raw_end_date = end_date
  start_date = pd.to_datetime(start_date) if start_date else pd.to_datetime('2015-01-01')
  end_date = pd.to_datetime(end_date) if end_date else pd.to_datetime('today')
  while True:
    candles = get_binance_candles(symbol, interval, limit, startTime=int(start_date.timestamp() * 1000), endTime=int(end_date.timestamp() * 1000))
    if len(candles) <= 1:
      break;
    result += candles
    start_date = pd.to_datetime(datetime.datetime.fromtimestamp(candles[-1]['Date'] / 1000))
  candles_df = candles_to_df(result)
  save_candles_to_cloud_storage(candles_df, symbol, interval, raw_start_date, raw_end_date)
  return candles_df

def candles_storage_file_name(symbol: str, interval='1h', start_date=None, end_date=None):
  file_name = f'binance_{symbol}_{interval}_{start_date}_{end_date}.csv'
  return file_name

def load_candles_from_cloud_storage(symbol: str, interval: str, start_date=None, end_date=None):
  file_name = candles_storage_file_name(symbol, interval, start_date, end_date)
  try:
    # Download the file from cloud storage.
    subprocess.run(['gsutil', 'cp', f'gs://{bucket_name}/trade/candles/{file_name}', f'/tmp/{file_name}'], check=True)

    # Load the data into a Pandas DataFrame.
    with open(f'/tmp/{file_name}', 'r') as f:
      return pd.read_csv(f, index_col=0, parse_dates=True)
  except subprocess.CalledProcessError:
    # Raise a FileNotFoundError if the file is not found in cloud storage.
    raise FileNotFoundError(f"File not found: gs://{bucket_name}/trade/candles/{file_name}")

def save_candles_to_cloud_storage(df: pd.DataFrame, symbol: str, interval: str, start_date, end_date):
  file_name = candles_storage_file_name(symbol, interval, start_date, end_date)
  df.to_csv(f'/tmp/{file_name}')
  !gsutil cp /tmp/{file_name} gs://{bucket_name}/trade/candles/{file_name}


In [6]:
def add_scaled_rsi(df, window=14, prefix = '', features_columns=[]):
  series = ta.rsi(df['Close'], length=window)
  column_name = f"{prefix}RSI_{window}"
  df[column_name] = series / 100
  if(column_name not in features_columns):
    features_columns.append(column_name)
  return df

def add_scaled_ema(df, window=50, prefix = '', features_columns=[]):
  series = ta.ema(df['Close'], length=window)
  column_name = f"{prefix}EMA_{window}"
  df[column_name] = series / df['Close']
  if column_name not in features_columns:
    features_columns.append(column_name)
  return df

def add_scaled_sma(df, window=50, prefix = '', features_columns=[]):
  series = ta.sma(df['Close'], length=window)
  column_name = f"{prefix}SMA_{window}"
  df[column_name] = series / df['Close']
  if column_name not in features_columns:
    features_columns.append(column_name)
  return df

def add_scaled_macd(df, prefix= '', features_columns=[], fast = 12, slow = 26, signal=9):
  macd_df = ta.macd(df['Close'], fast=fast, slow=slow, signal=signal)
  suffix = f"{fast}_{slow}_{signal}"
  macd_column_name = f"{prefix}MACD_{suffix}"
  macds_column_name = f"{prefix}MACDs_{suffix}"
  macdh_column_name = f"{prefix}MACDh_{suffix}"
  df[macd_column_name] = macd_df[macd_df.columns[0]] / df['Close']
  df[macds_column_name] = macd_df[macd_df.columns[2]] / df['Close']
  df[macdh_column_name] = macd_df[macd_df.columns[1]] / df['Close']
  if macd_column_name not in features_columns:
    features_columns.append(macd_column_name)
  if macds_column_name not in features_columns:
    features_columns.append(macds_column_name)
  if macdh_column_name not in features_columns:
    features_columns.append(macdh_column_name)
  return df

def add_scaled_bbands(df, window=20, std=2.0, prefix= '', features_columns=[]):
  bbands_df = ta.bbands(df['Close'], length=window, std=std)
  suffix = f"{window}_{std}"
  bbl_column_name = f"{prefix}BBL_{suffix}"
  bbm_column_name = f"{prefix}BBM_{suffix}"
  bbu_column_name = f"{prefix}BBU_{suffix}"
  bbb_column_name = f"{prefix}BBB_{suffix}"
  bbp_column_name = f"{prefix}BBP_{suffix}"
  df[bbl_column_name] = bbands_df[bbands_df.columns[0]] / df['Close']
  df[bbm_column_name] = bbands_df[bbands_df.columns[1]] / df['Close']
  df[bbu_column_name] = bbands_df[bbands_df.columns[2]] / df['Close']
  df[bbb_column_name] = bbands_df[bbands_df.columns[3]]
  df[bbp_column_name] = bbands_df[bbands_df.columns[4]]
  if bbl_column_name not in features_columns: features_columns.append(bbl_column_name)
  if bbm_column_name not in features_columns: features_columns.append(bbm_column_name)
  if bbu_column_name not in features_columns: features_columns.append(bbu_column_name)
  if bbb_column_name not in features_columns: features_columns.append(bbb_column_name)
  if bbp_column_name not in features_columns: features_columns.append(bbp_column_name)
  return df

In [7]:
def merge_candlesticks_data(df1, df2):
  """
  Merges two candlestick DataFrames with forward fill, handling different timeframes,
  and prevents duplicate OHLCV columns.
  Ensure both DataFrames have a datetime index.
  And ensure the second DataFrame is the larger timeframe

  Args:
      df1: First candlestick DataFrame with datetime index.
      df2: Second candlestick DataFrame with datetime index.

  Returns:
      Merged DataFrame with forward-filled values, and no duplicate OHLCV columns.
  """
  # Ensure both DataFrames have a datetime index
  if not isinstance(df1.index, pd.DatetimeIndex) or not isinstance(df2.index, pd.DatetimeIndex):
      raise ValueError("DataFrames must have a datetime index.")
  # Identify OHLCV columns
  ohlcv_cols = ['open', 'high', 'low', 'close', 'volume']

  # Rename columns in df2 that conflict with df1's OHLCV columns
  for col in df2.columns:
    if col.lower() in ohlcv_cols and col.lower() in df1.columns.str.lower():
      del df2[col]

  # Merge the DataFrames using outer join, which preserves all dates
  merged_df = pd.merge(df1, df2, how='outer', left_index=True, right_index=True, suffixes=('_df1', '_df2'))

  # Forward fill the missing values for each column
  for col in merged_df.columns:
    merged_df[col] = merged_df[col].ffill()

  return merged_df

In [8]:
def resample_candles(df, interval='1h'):
    return df.resample(interval).agg({'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum'}).ffill()

In [9]:
def create_sequences(data, features_columns, target_col, seq_length):
    """
    Create sequences of `seq_length` time steps for LSTM input, optimized for performance.
    """
    num_samples = len(data) - seq_length
    num_features = len(features_columns)

    X = np.zeros((num_samples, seq_length, num_features))
    y = np.zeros(num_samples)

    features_data = data[features_columns].values
    target_data = data[target_col].values

    for i in range(num_samples):
        X[i] = features_data[i:i + seq_length]
        y[i] = target_data[i + seq_length]

    return X, y

In [10]:
class CustomEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self, patience=2):
        super(CustomEarlyStopping, self).__init__()
        self.patience = patience  # Number of consecutive increases allowed
        self.best_val_mae = float('inf')  # Track the best validation MAE
        self.increase_count = 0  # Counter for consecutive increases

    def on_epoch_end(self, epoch, logs=None):
        current_val_mae = logs.get('val_mae')  # Get validation MAE for the current epoch

        if current_val_mae < self.best_val_mae:
            # If validation MAE improves, reset the counter
            self.best_val_mae = current_val_mae
            self.increase_count = 0
        else:
            # If validation MAE increases, increment the counter
            self.increase_count += 1

        # Stop training if validation MAE increases consecutively for 'patience' epochs
        if self.increase_count >= self.patience:
            print(f"\nEarly stopping: Validation MAE increased {self.patience} times in a row.")
            self.model.stop_training = True

In [11]:
def predict_in_batches(model, test_data, features_columns, seq_length, batch_size=128):
    """
    Predicts in batches to speed up inference.
    """
    num_samples = len(test_data) - seq_length
    feature_array = test_data[features_columns].values
    predicted_values = np.full(len(test_data), np.nan)

    for start_idx in range(0, num_samples, batch_size):
        end_idx = min(start_idx + batch_size, num_samples)
        batch_indices = range(start_idx + seq_length, end_idx + seq_length)
        batch_input = np.array([feature_array[i - seq_length:i] for i in batch_indices])

        if len(batch_input) > 0 :
            predictions = model.predict(batch_input, verbose=0)
            predicted_values[batch_indices] = predictions.flatten() #Flatten to 1d array.

    test_data['Predicted_Value'] = predicted_values
    return test_data

In [12]:
class PredictedValueStrategy(bt.Strategy):
    params = (
        ('buy_threshold', 0.6),
        ('sell_threshold', 0.6),
        ('leverage', 1),  # Leverage ratio
        ('margin', 1000),
        ('log', True)
    )

    def __init__(self):
        # To keep track of pending orders and buy price/commission
        self.order = None
        self.buyprice = None
        self.buycomm = None

        # Add the Predicted_Value as a data feed
        self.predicted_value = self.datas[0].predicted_value

    def next(self):
        # Check if an order is pending ... if yes, we cannot send a 2nd one
        if self.order:
            return
         # # Check if we are in the market
        if not self.position:
          cash = self.broker.getcash()
          position_size = (self.params.margin * self.params.leverage) / self.data.close[0]
          # Long signal
          if self.predicted_value[0] > self.params.buy_threshold:
            self.log('LONG POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.buy(size=position_size)

          # Short signal
          elif self.predicted_value[0] < self.params.sell_threshold:
            self.log('SHORT POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.sell(size=position_size)

        else:
          if self.predicted_value[0] > self.params.buy_threshold and self.position.size < 0:
            self.log('CLOSE SHORT POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.close()
          elif self.predicted_value[0] < self.params.sell_threshold and self.position.size > 0:
            self.log('CLOSE LONG POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.close()

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            if order.status == order.Canceled:
                self.log('Order Canceled')
            elif order.status == order.Margin:
                self.log(f'Order Margin Not Enough - Available cash: {self.broker.getcash()}')
            elif order.status == order.Rejected:
                self.log('Order Rejected')

        # Write down: no pending order
        self.order = None

    def notify_trade(self, trade):
        if not trade.isclosed:
            return
        self.log(f'TRADE COMPLETED, GROSS {trade.pnl:.2f}, NET {trade.pnlcomm:.2f}, Available Cash {self.broker.getcash():.2f}')

    def log(self, txt, dt=None):
        if not self.params.log:
          return
        dt = dt or self.datas[0].datetime.date(0)
        time = self.datas[0].datetime.time()
        print(f'{dt.isoformat()} {time.isoformat()}, {txt}')


In [13]:
# Extend PandasData to include the custom column
class CustomPandasData(bt.feeds.PandasData):
    # Add custom columns
    lines = ('predicted_value',)  # Add the custom line
    params = (
        ('predicted_value', 'Predicted_Value'),  # Map the column name
    )


In [14]:
def get_model_cloud_storage_path(bucket_name, local_file_name):
    """
    Returns the cloud storage path for a given model name.

    Parameters:
        local_file_name (str): The name of the model.

    Returns:
        str: The cloud storage path. (e.g., gs://<bucket_name>/trade/models/<model_name>.h5).
    """
    return f'gs://{bucket_name}/trade/models/{local_file_name}'

def save_model_to_cloud_storage(model: tf.keras.Model, model_name: str, bucket_name: str):
    """
    Saves a TensorFlow model to Google Cloud Storage and returns the cloud storage file path.

    Parameters:
        model (tf.keras.Model): The TensorFlow model to save.
        model_name (str): The name of the model (used to create the file name).
        bucket_name (str): The name of the Google Cloud Storage bucket.

    Returns:
        str: The cloud storage file path (e.g., gs://<bucket_name>/trade/models/<model_name>.h5).
    """
    # Define the local and cloud storage file paths
    local_file_name = f'{model_name}.h5'
    local_file_path = f'/tmp/{local_file_name}'
    cloud_file_path = get_model_cloud_storage_path(bucket_name, local_file_name)

    # Save the model locally
    model.save(local_file_path)

    try:
        # Upload the model to Google Cloud Storage
        subprocess.run(['gsutil', 'cp', local_file_path, cloud_file_path], check=True)
        print(f"Model saved to {cloud_file_path}")
    except subprocess.CalledProcessError as e:
        # Handle errors during the upload process
        raise RuntimeError(f"Failed to upload model to Google Cloud Storage: {e}")
    finally:
        # Clean up the local file
        if os.path.exists(local_file_path):
            os.remove(local_file_path)
    return cloud_file_path

def load_model_from_cloud_storage(model_name: str, bucket_name: str):
    """
    Loads a TensorFlow model from Google Cloud Storage.

    Parameters:
        model_name (str): The name of the model (used to create the file name).
        bucket_name (str): The name of the Google Cloud Storage bucket.

    Returns:
        tf.keras.Model: The loaded TensorFlow model.
    """
    # Define the local and cloud storage file paths
    local_file_name = f'{model_name}.h5'
    local_file_path = f'/tmp/{local_file_name}'
    cloud_file_path = get_model_cloud_storage_path(bucket_name, local_file_name)

    try:
        # Download the model from Google Cloud Storage
        subprocess.run(['gsutil', 'cp', cloud_file_path, local_file_path], check=True)

        # Load the model
        model = tf.keras.models.load_model(local_file_path)
        print(f"Model loaded from {cloud_file_path}")
        return model
    except subprocess.CalledProcessError as e:
        raise FileNotFoundError(f"Model not found in Google Cloud Storage: {e}")
    finally:
        # Clean up the local file
        if os.path.exists(local_file_path):
            os.remove(local_file_path)


In [15]:
def initialize_firestore(project_id):
    """
    Initialize the Firestore client using the service account key.

    Parameters:
        project_id (str): The Google Cloud project ID.

    Returns:
        firestore.Client: Initialized Firestore client.
    """
    if not firebase_admin._apps:
        # cred = credentials.Certificate(service_account_key_path)
        cred = firebase_admin.credentials.ApplicationDefault()
        cred._project_id = project_id  # Add this line
        os.environ["GOOGLE_CLOUD_PROJECT"] = project_id
        firebase_admin.initialize_app(cred, {'projectId': project_id})
    return firestore.client()


def save_to_firestore(project_id, collection_name, data):
    """
    Save data to a Firestore collection.

    Parameters:
        project_id (str): The Google Cloud project ID.
        service_account_key_path (str): Path to the Firebase service account key JSON file.
        collection_name (str): Name of the Firestore collection.
        data (dict): Data to save in the document.

    Returns:
        None
    """
    # Initialize Firestore client
    db = initialize_firestore(project_id)

    # Save data to Firestore
    doc_ref = db.collection(collection_name).document()
    doc_ref.set(data)
    print(f"Data saved to Firestore: Collection={collection_name}, Document ID={doc_ref.id}")


def delete_documents_by_field(project_id, collection_name, field_name, field_value, batch_size=500):
  """
  Deletes documents in a Firestore collection where a specific field matches a value.

  Args:
      collection_name: The name of the Firestore collection.
      field_name: The name of the field to filter by.
      field_value: The value to filter the field against.
      batch_size: The number of documents to delete in each batch.
  """
  db = initialize_firestore(project_id)
  collection_ref = db.collection(collection_name)

  try:
      while True:
          query = collection_ref.where(field_name, '==', field_value).limit(batch_size)
          docs = query.stream()

          deleted_count = 0
          batch = db.batch()

          for doc in docs:
              batch.delete(doc.reference)
              deleted_count += 1

          if deleted_count == 0:
              print(f"No documents found with {field_name} == {field_value} in {collection_name}.")
              break

          batch.commit()
          print(f"Deleted {deleted_count} documents from {collection_name} where {field_name} == {field_value}.")
          if deleted_count < batch_size: # if less than batch size deleted, then there are no more documents matching.
              break

  except Exception as e:
      print(f"An error occurred: {e}")


In [16]:
def generate_param_grid(params):
    """
    Generate a grid of all possible hyperparameter combinations.

    Args:
        params (dict): A dictionary where keys are hyperparameter names and values are lists of possible values.

    Returns:
        list: A list of dictionaries, where each dictionary represents a unique combination of hyperparameters.
    """
    keys = params.keys()
    values = params.values()

    # Generate all possible combinations of hyperparameters
    param_grid = [dict(zip(keys, combination)) for combination in itertools.product(*values)]

    return param_grid


In [110]:
def add_indicators_and_normalize(df, indicators, default_timeframe='15m'):
    """
    Adds technical indicators to the DataFrame based on the provided list of indicators.
    Handles higher timeframes by resampling the DataFrame and merging results.

    Parameters:
    df (pd.DataFrame): The candlestick DataFrame with columns like 'Open', 'High', 'Low', 'Close', 'Volume'.
    indicators (list): A list of indicator strings in the format 'TIMEFRAME_INDICATOR_PARAMS' or 'INDICATOR_PARAMS'.
    default_timeframe (str): The default timeframe to use if not specified in the indicator string (e.g., '15m').

    Returns:
    pd.DataFrame: The DataFrame with the added indicator columns.
    """
    # Dictionary to store resampled DataFrames for higher timeframes
    resampled_dfs = {}

    # Ensure indicators are unique
    indicators = list(set(indicators))

    # Drop duplicates
    df = df.drop_duplicates(keep='first')

    # timeframe regex patter
    timeframe_pattern = re.compile(r'^\d+[mhdw]$')

    def apply_indicator(target_df, indicator_name, params, full_indicator_name):
        """
        Applies the specified indicator to the target DataFrame.
        """
        if target_df.columns.str.contains(full_indicator_name).any():
          del target_df[full_indicator_name]
        if indicator_name == 'RSI':
            target_df[full_indicator_name] = ta.rsi(target_df['Close'], length=params[0]) / 100
        elif indicator_name == 'EMA':
            target_df[full_indicator_name] = ta.ema(target_df['Close'], length=params[0]) / target_df['Close']
        elif indicator_name == 'SMA':
            target_df[full_indicator_name] = ta.sma(target_df['Close'], length=params[0]) / target_df['Close']
        elif indicator_name == 'MACD':
            print(target_df.columns)
            print(f"{full_indicator_name}")
            macd = ta.macd(target_df['Close'], fast=params[0], slow=params[1], signal=params[2])
            target_df[full_indicator_name] = macd[macd.columns[0]] / target_df['Close']
        elif indicator_name == 'MACDs':
            macd = ta.macd(target_df['Close'], fast=params[0], slow=params[1], signal=params[2])
            target_df[full_indicator_name] = macd[macd.columns[1]] / target_df['Close']
        elif indicator_name == 'MACDh':
            macd = ta.macd(target_df['Close'], fast=params[0], slow=params[1], signal=params[2])
            target_df[full_indicator_name] = macd[macd.columns[2]] / target_df['Close']
        elif indicator_name == 'BBL':
            bb = ta.bbands(target_df['Close'], length=params[0], std=params[1])
            target_df[full_indicator_name] = bb[bb.columns[0]] / target_df['Close']
        elif indicator_name == 'BBM':
            bb = ta.bbands(target_df['Close'], length=params[0], std=params[1])
            target_df[full_indicator_name] = bb[bb.columns[1]] / target_df['Close']
        elif indicator_name == 'BBU':
            bb = ta.bbands(target_df['Close'], length=params[0], std=params[1])
            target_df[full_indicator_name] = bb[bb.columns[2]] / target_df['Close']
        elif indicator_name == 'BBB':
            bb = ta.bbands(target_df['Close'], length=params[0], std=params[1])
            target_df[full_indicator_name] = bb[bb.columns[3]]
        elif indicator_name == 'BBP':
            bb = ta.bbands(target_df['Close'], length=params[0], std=params[1])
            target_df[full_indicator_name] = bb[bb.columns[4]]
        else:
            raise ValueError(f"Unsupported indicator: {indicator_name}")

    for indicator in indicators:
        # Split the indicator string into components
        parts = indicator.split('_')

        # Check if the first part is a timeframe (e.g., '15m', '1h', '4h')
        if timeframe_pattern.match(parts[0]):
            timeframe = parts[0]
            indicator_name = parts[1]
            params = parts[2:]
        else:
            # If no timeframe is specified, use the default timeframe
            timeframe = default_timeframe
            indicator_name = parts[0]
            params = parts[1:]

        # Convert parameters to appropriate types
        params = [float(p) if '.' in p else int(p) for p in params]

        # Construct the full indicator name with timeframe
        full_indicator_name = f"{timeframe}_{indicator_name}_{'_'.join(map(str, params))}"

        # Check if the timeframe is higher than the default timeframe
        if timeframe != default_timeframe:
            # Resample the DataFrame to the higher timeframe
            if timeframe not in resampled_dfs:
                resample_rule = timeframe
                resampled_df = df.resample(resample_rule).agg({
                    'Open': 'first',
                    'High': 'max',
                    'Low': 'min',
                    'Close': 'last',
                    'Volume': 'sum'
                }).dropna()
                resampled_dfs[timeframe] = resampled_df
            else:
                # resampled_df =
                pass

            # Apply the indicator to the resampled DataFrame
            apply_indicator(resampled_dfs[timeframe], indicator_name, params, full_indicator_name)

            # Merge the resampled DataFrame back into the original DataFrame
            if full_indicator_name in df.columns:
                del df[full_indicator_name]
            df = df.merge(resampled_dfs[timeframe][[full_indicator_name]], how='left', left_index=True, right_index=True)
        else:
            # Apply the indicator to the original DataFrame
            apply_indicator(df, indicator_name, params, full_indicator_name)
    df.ffill(inplace=True)
    return df

In [18]:
timeframe = '15m'
symbol = 'BTCUSDT'
exchange = 'binance'
start_date = '01-01-2024'
end_date = '28-02-2025'
raw_data = get_all_binance_candles(symbol, timeframe, start_date, end_date)
raw_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01 00:00:00,42283.58,42488.09,42261.02,42488.00,431.71082
2024-01-01 00:15:00,42488.00,42554.57,42412.02,42419.73,392.24889
2024-01-01 00:30:00,42419.73,42447.82,42354.19,42441.32,319.90644
2024-01-01 00:45:00,42441.32,42490.74,42422.45,42475.23,127.81493
2024-01-01 01:00:00,42475.23,42475.23,42431.65,42466.33,188.76099
...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222


In [19]:
indicators = ['15m_RSI_14',
 '15m_RSI_6',
 '15m_EMA_5',
 '15m_EMA_21',
 '15m_SMA_50',
 '15m_MACD_12_26_9',
 '15m_MACDs_12_26_9',
 '15m_MACDh_12_26_9',
 '15m_BBL_20_2.0',
 '15m_BBM_20_2.0',
 '15m_BBU_20_2.0',
 '15m_BBB_20_2.0',
 '15m_BBP_20_2.0',
 '1h_RSI_14',
 '1h_RSI_6',
 '1h_EMA_5',
 '1h_EMA_21',
 '1h_MACD_12_26_9',
 '1h_MACDs_12_26_9',
 '1h_MACDh_12_26_9',
 '1h_BBL_20_2.0',
 '1h_BBM_20_2.0',
 '1h_BBU_20_2.0',
 '1h_BBB_20_2.0',
 '1h_BBP_20_2.0',
 '4h_RSI_14',
 '4h_RSI_6',
 '4h_EMA_5',
 '4h_EMA_21']

In [111]:
data = raw_data.copy()

In [114]:
data = add_indicators_and_normalize(data, indicators)
data

Index(['Open', 'High', 'Low', 'Close', 'Volume', '1h_RSI_14', '1h_BBP_20_2.0'], dtype='object')
1h_MACD_12_26_9
Index(['Open', 'High', 'Low', 'Close', 'Volume', '15m_BBL_20_2.0',
       '15m_BBU_20_2.0', '15m_MACDs_12_26_9', '15m_EMA_5', '1h_RSI_14',
       '15m_MACDh_12_26_9', '4h_RSI_14', '15m_BBP_20_2.0', '15m_EMA_21',
       '1h_BBP_20_2.0', '1h_MACD_12_26_9', '1h_EMA_21', '1h_BBM_20_2.0',
       '4h_EMA_5', '1h_MACDs_12_26_9', '4h_EMA_21', '1h_RSI_6',
       '1h_BBL_20_2.0', '15m_BBM_20_2.0', '15m_SMA_50', '1h_MACDh_12_26_9',
       '15m_BBB_20_2.0', '4h_RSI_6', '1h_BBU_20_2.0', '15m_RSI_6', '1h_EMA_5',
       '1h_BBB_20_2.0', '15m_RSI_14'],
      dtype='object')
15m_MACD_12_26_9


Unnamed: 0_level_0,Open,High,Low,Close,Volume,1h_RSI_14,15m_MACDh_12_26_9,4h_RSI_14,15m_BBP_20_2.0,15m_EMA_21,...,1h_BBU_20_2.0,15m_RSI_6,1h_EMA_5,1h_BBB_20_2.0,15m_RSI_14,15m_MACD_12_26_9,15m_BBL_20_2.0,15m_BBU_20_2.0,15m_MACDs_12_26_9,15m_EMA_5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 00:00:00,42283.58,42488.09,42261.02,42488.00,431.71082,,,,,,...,,,,,,,,,,
2024-01-01 00:15:00,42488.00,42554.57,42412.02,42419.73,392.24889,,,,,,...,,,,,,,,,,
2024-01-01 00:30:00,42419.73,42447.82,42354.19,42441.32,319.90644,,,,,,...,,,,,,,,,,
2024-01-01 00:45:00,42441.32,42490.74,42422.45,42475.23,127.81493,,,,,,...,,,,,,,,,,
2024-01-01 01:00:00,42475.23,42475.23,42431.65,42466.33,188.76099,,,,,,...,,,,,,,,,,0.999807
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 23:00:00,84716.01,84735.85,84422.02,84552.05,326.13682,0.441184,-0.003695,0.319357,0.744955,0.996985,...,1.030284,0.624015,0.998003,4.431350,0.522309,-0.001826,0.982790,1.005892,0.001870,0.998218
2025-02-27 23:15:00,84552.06,84798.00,84531.44,84592.20,156.90437,0.441184,-0.003224,0.319357,0.764784,0.996829,...,1.030284,0.635524,0.998003,4.431350,0.527919,-0.001347,0.982314,1.005439,0.001877,0.998496
2025-02-27 23:30:00,84592.19,84927.84,84494.31,84684.70,181.51427,0.441184,-0.002750,0.319357,0.816875,0.996127,...,1.030284,0.663960,0.998003,4.431350,0.541285,-0.000869,0.981310,1.004190,0.001881,0.998270
2025-02-27 23:45:00,84684.69,84853.17,84622.64,84708.58,124.75222,0.441184,-0.002292,0.319357,0.824588,0.996224,...,1.030284,0.671890,0.998003,4.431350,0.544868,-0.000463,0.980974,1.004047,0.001829,0.998659


In [115]:
data[indicators]

Unnamed: 0_level_0,15m_RSI_14,15m_RSI_6,15m_EMA_5,15m_EMA_21,15m_SMA_50,15m_MACD_12_26_9,15m_MACDs_12_26_9,15m_MACDh_12_26_9,15m_BBL_20_2.0,15m_BBM_20_2.0,...,1h_MACDh_12_26_9,1h_BBL_20_2.0,1h_BBM_20_2.0,1h_BBU_20_2.0,1h_BBB_20_2.0,1h_BBP_20_2.0,4h_RSI_14,4h_RSI_6,4h_EMA_5,4h_EMA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-01 00:00:00,,,,,,,,,,,...,,,,,,,,,,
2024-01-01 00:15:00,,,,,,,,,,,...,,,,,,,,,,
2024-01-01 00:30:00,,,,,,,,,,,...,,,,,,,,,,
2024-01-01 00:45:00,,,,,,,,,,,...,,,,,,,,,,
2024-01-01 01:00:00,,,0.999807,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-27 23:00:00,0.522309,0.624015,0.998218,0.996985,1.006491,-0.001826,0.001870,-0.003695,0.982790,0.994341,...,-0.006160,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:15:00,0.527919,0.635524,0.998496,0.996829,1.005441,-0.001347,0.001877,-0.003224,0.982314,0.993877,...,-0.006160,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:30:00,0.541285,0.663960,0.998270,0.996127,1.003851,-0.000869,0.001881,-0.002750,0.981310,0.992750,...,-0.006160,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740
2025-02-27 23:45:00,0.544868,0.671890,0.998659,0.996224,1.003111,-0.000463,0.001829,-0.002292,0.980974,0.992511,...,-0.006160,0.985618,1.007951,1.030284,4.431350,0.321980,0.319357,0.355691,1.004969,1.038740


In [42]:
timeframe_pattern = re.compile(r'^\d+[mhdw]$')
timeframe_pattern.match('15h')