<a href="https://colab.research.google.com/github/ifeLight/ml-bot/blob/main/binance-multi-timeframe-grade-opt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install pandas-ta
!pip install backtrader[plotting]
!pip install plotly
!pip install --upgrade firebase-admin

Collecting pandas-ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pandas-ta
  Building wheel for pandas-ta (setup.py) ... [?25l[?25hdone
  Created wheel for pandas-ta: filename=pandas_ta-0.3.14b0-py3-none-any.whl size=218909 sha256=109622ec3041de1164d2c15d0f3367997724361fb2c079e37ee4dadd97d61163
  Stored in directory: /root/.cache/pip/wheels/7f/33/8b/50b245c5c65433cd8f5cb24ac15d97e5a3db2d41a8b6ae957d
Successfully built pandas-ta
Installing collected packages: pandas-ta
Successfully installed pandas-ta-0.3.14b0
Collecting backtrader[plotting]
  Downloading backtrader-1.9.78.123-py2.py3-none-any.whl.metadata (6.8 kB)
Downloading backtrader-1.9.78.123-py

In [8]:
import numpy as np
import pandas as pd
import datetime
import requests
import json
import os
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import backtrader as bt
import pandas_ta as ta
from imblearn.over_sampling import SMOTE
import subprocess
import firebase_admin
from firebase_admin import firestore
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
from google.colab import auth
import google.auth

In [9]:
auth.authenticate_user()
#Configure Google cloud project
project_id = 'ifelight'
!gcloud config set project {project_id}

Updated property [core/project].


In [10]:
# Working GCP Bucket name
bucket_name = 'ife-storage'
# Working on Firestore name
firestore_collection_name = 'trade-models'

In [11]:
binance_base_url = 'https://52on3577u3.execute-api.eu-central-1.amazonaws.com'

def get_binance_candles(symbol: str, interval='1h', limit: int = 50, **kwargs):
    url = f'{binance_base_url}/api/v3/uiKlines?symbol={symbol}&interval={interval}&limit={limit}'
    for key, value in kwargs.items():
        url += f'&{key}={value}'
    response = requests.get(url)
    result = json.loads(response.text)
    # print(result)
    def map_result(x):
        return {
            'Date': x[0],
            'Open': x[1],
            'High': x[2],
            'Low': x[3],
            'Close': x[4],
            'Volume': x[5],
        }
    mappeded_result = []
    for x in result:
        mappeded_result.append(map_result(x))
    return mappeded_result


def candles_to_df(data):
    df =  pd.DataFrame(data)
    df['Date'] = pd.to_datetime(df['Date'], unit='ms')
    df['Open'] = df['Open'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Close'] = df['Close'].astype(float)
    df['Volume'] = df['Volume'].astype(float)
    df.set_index('Date', inplace=True)
    return df

def get_all_binance_candles(symbol: str, interval='1h', start_date=None, end_date=None, limit=1000):
  try:
    return load_candles_from_cloud_storage(symbol, interval, start_date, end_date)
  except FileNotFoundError:
    pass
  result = []
  raw_start_date = start_date
  raw_end_date = end_date
  start_date = pd.to_datetime(start_date) if start_date else pd.to_datetime('2015-01-01')
  end_date = pd.to_datetime(end_date) if end_date else pd.to_datetime('today')
  while True:
    candles = get_binance_candles(symbol, interval, limit, startTime=int(start_date.timestamp() * 1000), endTime=int(end_date.timestamp() * 1000))
    if len(candles) <= 1:
      break;
    result += candles
    start_date = pd.to_datetime(datetime.datetime.fromtimestamp(candles[-1]['Date'] / 1000))
  candles_df = candles_to_df(result)
  save_candles_to_cloud_storage(candles_df, symbol, interval, raw_start_date, raw_end_date)
  return candles_df

def candles_storage_file_name(symbol: str, interval='1h', start_date=None, end_date=None):
  file_name = f'binance_{symbol}_{interval}_{start_date}_{end_date}.csv'
  return file_name

def load_candles_from_cloud_storage(symbol: str, interval: str, start_date=None, end_date=None):
  file_name = candles_storage_file_name(symbol, interval, start_date, end_date)
  try:
    # Download the file from cloud storage.
    subprocess.run(['gsutil', 'cp', f'gs://{bucket_name}/trade/candles/{file_name}', f'/tmp/{file_name}'], check=True)

    # Load the data into a Pandas DataFrame.
    with open(f'/tmp/{file_name}', 'r') as f:
      return pd.read_csv(f, index_col=0, parse_dates=True)
  except subprocess.CalledProcessError:
    # Raise a FileNotFoundError if the file is not found in cloud storage.
    raise FileNotFoundError(f"File not found: gs://{bucket_name}/trade/candles/{file_name}")

def save_candles_to_cloud_storage(df: pd.DataFrame, symbol: str, interval: str, start_date, end_date):
  file_name = candles_storage_file_name(symbol, interval, start_date, end_date)
  df.to_csv(f'/tmp/{file_name}')
  !gsutil cp /tmp/{file_name} gs://{bucket_name}/trade/candles/{file_name}


In [12]:
def add_pivots(df, window=5):
    """
    Calculate the probability of price going up and down based on higher and lower pivots.
    """
    df['Higher_Pivot'] = df['High'].rolling(window=2 * window + 1, center=True).apply(
        lambda x: 1 if x.iloc[window] == x.max() else 0, raw=False
    )
    df['Lower_Pivot'] = df['Low'].rolling(window=2 * window + 1, center=True).apply(
        lambda x: 1 if x.iloc[window] == x.min() else 0, raw=False
    )

    # Step 2: Ensure no two successive pivots of the same type
    pivot_type = None  # Tracks the type of the last pivot
    last_pivot_index = None  # Tracks the index of the last pivot

    # Remove duplicated index
    df = df[~df.index.duplicated(keep='first')]

    for i in range(len(df.index)):
      if df.loc[df.index[i], 'Higher_Pivot'].item() == 1:
          if pivot_type == 'higher':
              # Compare current higher pivot with the last higher pivot
              if df.loc[df.index[i], 'High'].item() > df.loc[df.index[last_pivot_index], 'High'].item():
                  # Remove the last higher pivot
                  df.loc[df.index[last_pivot_index], 'Higher_Pivot'] = 0
                  # Update the last pivot
                  last_pivot_index = i
              else:
                  # Remove the current higher pivot
                  df.loc[df.index[i], 'Higher_Pivot'] = 0
          else:
              # Update pivot type and index
              pivot_type = 'higher'
              last_pivot_index = i

      elif df.loc[df.index[i], 'Lower_Pivot'].item() == 1:
          if pivot_type == 'lower':
              # Compare current lower pivot with the last lower pivot
              if df.loc[df.index[i], 'Low'].item() < df.loc[df.index[last_pivot_index], 'Low'].item():
                  # Remove the last lower pivot
                  df.loc[df.index[last_pivot_index], 'Lower_Pivot'] = 0
                  # Update the last pivot
                  last_pivot_index = i
              else:
                  # Remove the current lower pivot
                  df.loc[df.index[i], 'Lower_Pivot'] = 0
          else:
              # Update pivot type and index
              pivot_type = 'lower'
              last_pivot_index = i
    return df

In [13]:
def calculate_pivot_proximity(df):
    """
    Calculates the pivot proximity using a loop-based approach, finding the closest
    previous and next pivots without generating intermediate lists of all pivots.

    Args:
        df (pd.DataFrame): DataFrame with 'Pivot' and 'Close' columns.

    Returns:
        pd.DataFrame: DataFrame with 'Pivot_Proximity' column added.
    """
    df = df.copy()

    pivot_values = df['Pivot'].values
    close_values = df['Close'].values
    proximity_values = [0.0] * len(df)

    for i in range(len(df)):
      pivot = pivot_values[i]
      if pivot == -1:
        proximity_values[i] = -1.0
      elif pivot == 1:
        proximity_values[i] = 1.0
      else:
        closest_previous_pivot_index = None
        for j in range(i - 1, -1, -1):
          if pivot_values[j] != 0:
            closest_previous_pivot_index = j
            break

        if closest_previous_pivot_index is not None:
          closest_previous_pivot_value = pivot_values[closest_previous_pivot_index]
          closest_previous_close = close_values[closest_previous_pivot_index]

          closest_next_pivot_index = None
          if closest_previous_pivot_value == -1:
            for j in range(i + 1, len(df)):
              if pivot_values[j] == 1:
                closest_next_pivot_index = j
                break
          else:
            for j in range(i + 1, len(df)):
              if pivot_values[j] == -1:
                closest_next_pivot_index = j
                break

          if closest_next_pivot_index is not None:
            closest_next_close = close_values[closest_next_pivot_index]

            distance_to_previous = abs(close_values[i] - closest_previous_close)
            distance_to_next = abs(close_values[i] - closest_next_close)

            if distance_to_previous + distance_to_next != 0:
              if closest_previous_pivot_value == -1:
                proximity_values[i] = (distance_to_previous - distance_to_next) / (distance_to_previous + distance_to_next)
              else:
                proximity_values[i] = (distance_to_next - distance_to_previous) / (distance_to_previous + distance_to_next)

    df['Pivot_Proximity'] = proximity_values
    return df

In [14]:
def add_scaled_rsi(df, window=14, prefix = '', features_columns=[]):
  series = ta.rsi(df['Close'], length=window)
  column_name = f"{prefix}RSI_{window}"
  df[column_name] = series / 100
  if(column_name not in features_columns):
    features_columns.append(column_name)
  return df

def add_scaled_ema(df, window=50, prefix = '', features_columns=[]):
  series = ta.ema(df['Close'], length=window)
  column_name = f"{prefix}EMA_{window}"
  df[column_name] = series / df['Close']
  if column_name not in features_columns:
    features_columns.append(column_name)
  return df

def add_scaled_sma(df, window=50, prefix = '', features_columns=[]):
  series = ta.sma(df['Close'], length=window)
  column_name = f"{prefix}SMA_{window}"
  df[column_name] = series / df['Close']
  if column_name not in features_columns:
    features_columns.append(column_name)
  return df

def add_scaled_macd(df, prefix= '', features_columns=[], fast = 12, slow = 26, signal=9):
  macd_df = ta.macd(df['Close'], fast=fast, slow=slow, signal=signal)
  suffix = f"{fast}_{slow}_{signal}"
  macd_column_name = f"{prefix}MACD_{suffix}"
  macds_column_name = f"{prefix}MACDs_{suffix}"
  macdh_column_name = f"{prefix}MACDh_{suffix}"
  df[macd_column_name] = macd_df[macd_df.columns[0]] / df['Close']
  df[macds_column_name] = macd_df[macd_df.columns[2]] / df['Close']
  df[macdh_column_name] = macd_df[macd_df.columns[1]] / df['Close']
  if macd_column_name not in features_columns:
    features_columns.append(macd_column_name)
  if macds_column_name not in features_columns:
    features_columns.append(macds_column_name)
  if macdh_column_name not in features_columns:
    features_columns.append(macdh_column_name)
  return df

def add_scaled_bbands(df, window=20, std=2.0, prefix= '', features_columns=[]):
  bbands_df = ta.bbands(df['Close'], length=window, std=std)
  suffix = f"{window}_{std}"
  bbl_column_name = f"{prefix}BBL_{suffix}"
  bbm_column_name = f"{prefix}BBM_{suffix}"
  bbu_column_name = f"{prefix}BBU_{suffix}"
  bbb_column_name = f"{prefix}BBB_{suffix}"
  bbp_column_name = f"{prefix}BBP_{suffix}"
  df[bbl_column_name] = bbands_df[bbands_df.columns[0]] / df['Close']
  df[bbm_column_name] = bbands_df[bbands_df.columns[1]] / df['Close']
  df[bbu_column_name] = bbands_df[bbands_df.columns[2]] / df['Close']
  df[bbb_column_name] = bbands_df[bbands_df.columns[3]]
  df[bbp_column_name] = bbands_df[bbands_df.columns[4]]
  if bbl_column_name not in features_columns: features_columns.append(bbl_column_name)
  if bbm_column_name not in features_columns: features_columns.append(bbm_column_name)
  if bbu_column_name not in features_columns: features_columns.append(bbu_column_name)
  if bbb_column_name not in features_columns: features_columns.append(bbb_column_name)
  if bbp_column_name not in features_columns: features_columns.append(bbp_column_name)
  return df

In [15]:
def merge_candlesticks_data(df1, df2):
  """
  Merges two candlestick DataFrames with forward fill, handling different timeframes,
  and prevents duplicate OHLCV columns.
  Ensure both DataFrames have a datetime index.
  And ensure the second DataFrame is the larger timeframe

  Args:
      df1: First candlestick DataFrame with datetime index.
      df2: Second candlestick DataFrame with datetime index.

  Returns:
      Merged DataFrame with forward-filled values, and no duplicate OHLCV columns.
  """
  # Ensure both DataFrames have a datetime index
  if not isinstance(df1.index, pd.DatetimeIndex) or not isinstance(df2.index, pd.DatetimeIndex):
      raise ValueError("DataFrames must have a datetime index.")
  # Identify OHLCV columns
  ohlcv_cols = ['open', 'high', 'low', 'close', 'volume']

  # Rename columns in df2 that conflict with df1's OHLCV columns
  for col in df2.columns:
    if col.lower() in ohlcv_cols and col.lower() in df1.columns.str.lower():
      del df2[col]

  # Merge the DataFrames using outer join, which preserves all dates
  merged_df = pd.merge(df1, df2, how='outer', left_index=True, right_index=True, suffixes=('_df1', '_df2'))

  # Forward fill the missing values for each column
  for col in merged_df.columns:
    merged_df[col] = merged_df[col].ffill()

  return merged_df

In [16]:
def resample_candles(df, interval='1h'):
    return df.resample(interval).agg({'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum'}).ffill()

In [17]:
def create_sequences(data, features_columns, target_col, seq_length):
    """
    Create sequences of `seq_length` time steps for LSTM input, optimized for performance.
    """
    num_samples = len(data) - seq_length
    num_features = len(features_columns)

    X = np.zeros((num_samples, seq_length, num_features))
    y = np.zeros(num_samples)

    features_data = data[features_columns].values
    target_data = data[target_col].values

    for i in range(num_samples):
        X[i] = features_data[i:i + seq_length]
        y[i] = target_data[i + seq_length]

    return X, y

In [18]:
class CustomEarlyStopping(tf.keras.callbacks.Callback):
    def __init__(self, patience=2):
        super(CustomEarlyStopping, self).__init__()
        self.patience = patience  # Number of consecutive increases allowed
        self.best_val_mae = float('inf')  # Track the best validation MAE
        self.increase_count = 0  # Counter for consecutive increases

    def on_epoch_end(self, epoch, logs=None):
        current_val_mae = logs.get('val_mae')  # Get validation MAE for the current epoch

        if current_val_mae < self.best_val_mae:
            # If validation MAE improves, reset the counter
            self.best_val_mae = current_val_mae
            self.increase_count = 0
        else:
            # If validation MAE increases, increment the counter
            self.increase_count += 1

        # Stop training if validation MAE increases consecutively for 'patience' epochs
        if self.increase_count >= self.patience:
            print(f"\nEarly stopping: Validation MAE increased {self.patience} times in a row.")
            self.model.stop_training = True

In [19]:
def predict_in_batches(model, test_data, features_columns, seq_length, batch_size=128):
    """
    Predicts in batches to speed up inference.
    """
    num_samples = len(test_data) - seq_length
    feature_array = test_data[features_columns].values
    predicted_values = np.full(len(test_data), np.nan)

    for start_idx in range(0, num_samples, batch_size):
        end_idx = min(start_idx + batch_size, num_samples)
        batch_indices = range(start_idx + seq_length, end_idx + seq_length)
        batch_input = np.array([feature_array[i - seq_length:i] for i in batch_indices])

        if len(batch_input) > 0 :
            predictions = model.predict(batch_input, verbose=0)
            predicted_values[batch_indices] = predictions.flatten() #Flatten to 1d array.

    test_data['Predicted_Value'] = predicted_values
    return test_data

In [20]:
class PredictedValueStrategy(bt.Strategy):
    params = (
        ('buy_threshold', 0.6),
        ('sell_threshold', 0.6),
        ('leverage', 1),  # Leverage ratio
        ('margin', 1000),
        ('log', True)
    )

    def __init__(self):
        # To keep track of pending orders and buy price/commission
        self.order = None
        self.buyprice = None
        self.buycomm = None

        # Add the Predicted_Value as a data feed
        self.predicted_value = self.datas[0].predicted_value

    def next(self):
        # Check if an order is pending ... if yes, we cannot send a 2nd one
        if self.order:
            return
         # # Check if we are in the market
        if not self.position:
          cash = self.broker.getcash()
          position_size = (self.params.margin * self.params.leverage) / self.data.close[0]
          # Long signal
          if self.predicted_value[0] > self.params.buy_threshold:
            self.log('LONG POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.buy(size=position_size)

          # Short signal
          elif self.predicted_value[0] < self.params.sell_threshold:
            self.log('SHORT POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.sell(size=position_size)

        else:
          if self.predicted_value[0] > self.params.buy_threshold and self.position.size < 0:
            self.log('CLOSE SHORT POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.close()
          elif self.predicted_value[0] < self.params.sell_threshold and self.position.size > 0:
            self.log('CLOSE LONG POSITION CREATED, %.2f' % self.datas[0].close[0])
            self.order = self.close()

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            if order.status == order.Canceled:
                self.log('Order Canceled')
            elif order.status == order.Margin:
                self.log(f'Order Margin Not Enough - Available cash: {self.broker.getcash()}')
            elif order.status == order.Rejected:
                self.log('Order Rejected')

        # Write down: no pending order
        self.order = None

    def notify_trade(self, trade):
        if not trade.isclosed:
            return
        self.log(f'TRADE COMPLETED, GROSS {trade.pnl:.2f}, NET {trade.pnlcomm:.2f}, Available Cash {self.broker.getcash():.2f}')

    def log(self, txt, dt=None):
        if not self.params.log:
          return
        dt = dt or self.datas[0].datetime.date(0)
        time = self.datas[0].datetime.time()
        print(f'{dt.isoformat()} {time.isoformat()}, {txt}')


In [21]:
# Extend PandasData to include the custom column
class CustomPandasData(bt.feeds.PandasData):
    # Add custom columns
    lines = ('predicted_value',)  # Add the custom line
    params = (
        ('predicted_value', 'Predicted_Value'),  # Map the column name
    )


In [22]:
def make_dict_even(data):
    """
    Recursively makes all numeric values in a dictionary even.

    Args:
        data (dict): The input dictionary (can have nested dictionaries or lists).

    Returns:
        dict: A new dictionary with all numeric values made even.
    """
    if isinstance(data, dict):
        new_dict = {}
        for key, value in data.items():
            new_dict[key] = make_dict_even(value)
        return new_dict
    elif isinstance(data, list):
        return [make_dict_even(item) for item in data]
    elif isinstance(data, (int, float)):
        if isinstance(data, int):
            return data if data % 2 == 0 else data + 1
        else: # float. We will round to an int, and then make even.
            int_value = round(data)
            return int_value if int_value % 2 == 0 else int_value + 1

    else:
        return data  # Return non-numeric values as they are

In [23]:
def get_model_cloud_storage_path(bucket_name, local_file_name):
    """
    Returns the cloud storage path for a given model name.

    Parameters:
        local_file_name (str): The name of the model.

    Returns:
        str: The cloud storage path. (e.g., gs://<bucket_name>/trade/models/<model_name>.h5).
    """
    return f'gs://{bucket_name}/trade/models/{local_file_name}'

def save_model_to_cloud_storage(model: tf.keras.Model, model_name: str, bucket_name: str):
    """
    Saves a TensorFlow model to Google Cloud Storage and returns the cloud storage file path.

    Parameters:
        model (tf.keras.Model): The TensorFlow model to save.
        model_name (str): The name of the model (used to create the file name).
        bucket_name (str): The name of the Google Cloud Storage bucket.

    Returns:
        str: The cloud storage file path (e.g., gs://<bucket_name>/trade/models/<model_name>.h5).
    """
    # Define the local and cloud storage file paths
    local_file_name = f'{model_name}.h5'
    local_file_path = f'/tmp/{local_file_name}'
    cloud_file_path = get_model_cloud_storage_path(bucket_name, local_file_name)

    # Save the model locally
    model.save(local_file_path)

    try:
        # Upload the model to Google Cloud Storage
        subprocess.run(['gsutil', 'cp', local_file_path, cloud_file_path], check=True)
        print(f"Model saved to {cloud_file_path}")
    except subprocess.CalledProcessError as e:
        # Handle errors during the upload process
        raise RuntimeError(f"Failed to upload model to Google Cloud Storage: {e}")
    finally:
        # Clean up the local file
        if os.path.exists(local_file_path):
            os.remove(local_file_path)
    return cloud_file_path

def load_model_from_cloud_storage(model_name: str, bucket_name: str):
    """
    Loads a TensorFlow model from Google Cloud Storage.

    Parameters:
        model_name (str): The name of the model (used to create the file name).
        bucket_name (str): The name of the Google Cloud Storage bucket.

    Returns:
        tf.keras.Model: The loaded TensorFlow model.
    """
    # Define the local and cloud storage file paths
    local_file_name = f'{model_name}.h5'
    local_file_path = f'/tmp/{local_file_name}'
    cloud_file_path = get_model_cloud_storage_path(bucket_name, local_file_name)

    try:
        # Download the model from Google Cloud Storage
        subprocess.run(['gsutil', 'cp', cloud_file_path, local_file_path], check=True)

        # Load the model
        model = tf.keras.models.load_model(local_file_path)
        print(f"Model loaded from {cloud_file_path}")
        return model
    except subprocess.CalledProcessError as e:
        raise FileNotFoundError(f"Model not found in Google Cloud Storage: {e}")
    finally:
        # Clean up the local file
        if os.path.exists(local_file_path):
            os.remove(local_file_path)


In [24]:
def initialize_firestore(project_id):
    """
    Initialize the Firestore client using the service account key.

    Parameters:
        project_id (str): The Google Cloud project ID.

    Returns:
        firestore.Client: Initialized Firestore client.
    """
    if not firebase_admin._apps:
        # cred = credentials.Certificate(service_account_key_path)
        cred = firebase_admin.credentials.ApplicationDefault()
        cred._project_id = project_id  # Add this line
        os.environ["GOOGLE_CLOUD_PROJECT"] = project_id
        firebase_admin.initialize_app(cred, {'projectId': project_id})
    return firestore.client()


def save_to_firestore(project_id, collection_name, data):
    """
    Save data to a Firestore collection.

    Parameters:
        project_id (str): The Google Cloud project ID.
        service_account_key_path (str): Path to the Firebase service account key JSON file.
        collection_name (str): Name of the Firestore collection.
        data (dict): Data to save in the document.

    Returns:
        None
    """
    # Initialize Firestore client
    db = initialize_firestore(project_id)

    # Save data to Firestore
    doc_ref = db.collection(collection_name).document()
    doc_ref.set(data)
    print(f"Data saved to Firestore: Collection={collection_name}, Document ID={doc_ref.id}")

In [25]:
options = {
  'timeframe': '15m',
  'symbol': 'BTCUSDT',
  'exchange': 'binance',
  'start_date': '01-01-2024',
  'end_date': '28-02-2025' ,
  'pivot_windows': 10
}

In [27]:
def generate_model(**options):
  # Extract options
  timeframe = options['timeframe'] if 'timeframe' in options else '15m'
  symbol = options['symbol'] if 'symbol' in options else 'BTCUSDT'
  exchange = options['exchange'] if 'exchange' in options else 'binance'
  start_date = options['start_date'] if 'start_date' in options else '01-01-2024'
  end_date = options['end_date'] if 'end_date' in options else '28-02-2025'
  pivot_windows = options['pivot_windows'] if 'pivot_windows' in options else 10

  # Get raw candles
  data = get_all_binance_candles(symbol, timeframe, start_date, end_date)
  # Add pivots
  data = add_pivots(data, int(pivot_windows))
  data['Pivot'] = np.where(data['Higher_Pivot'] == 1, -1, np.where(data['Lower_Pivot'] == 1, 1, 0))
  del data['Higher_Pivot']
  del data['Lower_Pivot']
  # Calculate pivot proximity
  data = calculate_pivot_proximity(data)
  # Declare feature columns
  features_columns = []

  # # Add technical indicators on lower timeframe
  lower_timeframe = timeframe
  lower_timeframe_prefix = f"{lower_timeframe}_"
  add_scaled_rsi(data, 14, prefix=lower_timeframe_prefix, features_columns=features_columns)
  add_scaled_rsi(data, 6, prefix=lower_timeframe_prefix, features_columns=features_columns)
  add_scaled_ema(data, 5, prefix=lower_timeframe_prefix, features_columns=features_columns)
  add_scaled_ema(data, 21, prefix=lower_timeframe_prefix, features_columns=features_columns)
  add_scaled_sma(data, 50, prefix=lower_timeframe_prefix, features_columns=features_columns)
  add_scaled_macd(data, prefix=lower_timeframe_prefix, features_columns=features_columns)
  add_scaled_bbands(data, prefix=lower_timeframe_prefix, features_columns=features_columns)

  # Add technical indicators on middle timeframe
  middle_timeframe = '1h'
  middle_timeframe_prefix = f"{middle_timeframe}_"
  middle_data = resample_candles(data, middle_timeframe)
  add_scaled_rsi(middle_data, 14, prefix=middle_timeframe_prefix, features_columns=features_columns)
  add_scaled_rsi(middle_data, 6, prefix=middle_timeframe_prefix, features_columns=features_columns)
  add_scaled_ema(middle_data, 5, prefix=middle_timeframe_prefix, features_columns=features_columns)
  add_scaled_ema(middle_data, 21, prefix=middle_timeframe_prefix, features_columns=features_columns)
  add_scaled_macd(middle_data, prefix=middle_timeframe_prefix, features_columns=features_columns)
  add_scaled_bbands(middle_data, prefix=middle_timeframe_prefix, features_columns=features_columns)

  # Add technical indicators on higher timeframe
  higher_timeframe = '4h'
  higher_timeframe_prefix = f"{higher_timeframe}_"
  higher_data = resample_candles(data, higher_timeframe)
  add_scaled_rsi(higher_data, 14, prefix=higher_timeframe_prefix, features_columns=features_columns)
  add_scaled_rsi(higher_data, 6, prefix=higher_timeframe_prefix, features_columns=features_columns)
  add_scaled_ema(higher_data, 5, prefix=higher_timeframe_prefix, features_columns=features_columns)
  add_scaled_ema(higher_data, 21, prefix=higher_timeframe_prefix, features_columns=features_columns)

  # Merge middle and higher timeframe data to lower timeframe data
  data = merge_candlesticks_data(data, middle_data)
  data = merge_candlesticks_data(data, higher_data)

  # Drop rows with NaN values (due to rolling calculations)
  data.dropna(inplace=True)

  # delete Pivot_Porximity that have zero at the beginning and ending of the dataframe
  non_zero_indices = data[data['Pivot_Proximity'] != 0].index
  non_zero_at_begining = non_zero_indices[0]
  non_zero_at_end = non_zero_indices[-1]
  data = data.loc[non_zero_at_begining:non_zero_at_end]

  # select only first 80 % of the data
  training_data_ratio = 0.8 # 80%
  training_data = data[:int(len(data) * training_data_ratio)]
  training_data_start_date = training_data.index[0]
  training_data_end_date = training_data.index[-1]

  # Create training sequences
  seq_length = 100
  target_col = 'Pivot_Proximity'
  X, y = create_sequences(training_data, features_columns, target_col, seq_length)

  # Split into training and testing sets
  train_validate_split_ratio = 0.2
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=train_validate_split_ratio, shuffle=False)
  train_candles_length = len(X_train)
  val_candles_length = len(X_test)

  # Build the LSTM model
  model_input_shape = (X_train.shape[1], X_train.shape[2])
  model = Sequential([
      LSTM(100, return_sequences=True, input_shape=model_input_shape),
      Dropout(0.2),
      LSTM(50, return_sequences=False),
      Dropout(0.2),
      Dense(50, activation='relu'),
      Dropout(0.2),
      Dense(25, activation='relu'),
      Dropout(0.2),
      Dense(10, activation='relu'),
      Dropout(0.2),
      Dense(1, activation='tanh')
  ])
  model.compile(optimizer='adam', loss='mse', metrics=['mae'])

  # Define early stopping
  custom_early_stopping = CustomEarlyStopping(patience=3)

  # Step 5: Train the model
  train_epochs = 20
  history = model.fit(
      X_train, y_train,
      epochs=train_epochs,
      batch_size=32,
      validation_data=(X_test, y_test),
      verbose=1,
      callbacks=[custom_early_stopping]
  )
  final_epoch = len(history.history['loss'])
  train_loss = history.history['loss'][-1]
  train_mae = history.history['mae'][-1]
  val_loss = history.history['val_loss'][-1]
  val_mae = history.history['val_mae'][-1]

  # test data
  test_data = data[int(len(data) * training_data_ratio):]

  # Predict in batches
  batch_size = 128
  predict_in_batches(model, test_data, features_columns, seq_length, batch_size)
  test_data.dropna(inplace=True) # Early predicted values wont be availble due to sequencing
  test_data_start_date = test_data.index[0]
  test_data_end_date = test_data.index[-1]

  # backtest configuration
  trade_leverage = 10
  trade_margin = 1000
  trade_buy_threshold = 0.6
  trade_sell_threshold = -0.6
  broker_commision = 0.02 # In percentage

  # Load data into a Pandas DataFrame
  backtest_data = CustomPandasData(
      dataname=test_data,
      datetime=None,  # Use the index as the datetime
      open='Open',         # Column index for Open
      high='High',         # Column index for High
      low='Low',          # Column index for Low
      close='Close',        # Column index for Close
      volume='Volume',       # Column index for Volume
      openinterest=None,# No open interest column
      predicted_value='Predicted_Value'  # Column index for Predicted_Value
  )

  # Create a Cerebro engine instance
  cerebro = bt.Cerebro()

  # Add the strategy
  cerebro.addstrategy(
      PredictedValueStrategy,
      buy_threshold=trade_buy_threshold,
      sell_threshold=trade_sell_threshold,
      leverage=trade_leverage,
      margin=trade_margin,
      log=False
  )

  # Add the data feed
  cerebro.adddata(backtest_data)

  # Set the initial cash
  cerebro.broker.set_cash(10000.0)

  # Set the commission
  cerebro.broker.setcommission(commission=broker_commision / 100)

  # Add analyzers
  cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe')
  cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
  cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='tradeanalyzer')
  cerebro.addanalyzer(bt.analyzers.Returns, _name='returns')
  cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')

  # Run the backtest
  starting_portfolio_value = cerebro.broker.getvalue()
  print('Starting Portfolio Value: %.2f' % starting_portfolio_value)
  backtest_result = cerebro.run()
  final_portfolio_value = cerebro.broker.getvalue()
  print('Final Portfolio Value: %.2f' % final_portfolio_value)

  # Capture analysis
  strat = backtest_result[0]

  trade_analysis = make_dict_even(strat.analyzers.tradeanalyzer.get_analysis())
  # Errors of large numbers, fix
  trade_analysis['len']['short']['lost']['min'] = str(trade_analysis['len']['short']['lost']['min'])
  trade_analysis['len']['short']['lost']['max'] = str(trade_analysis['len']['short']['lost']['max'])
  trade_analysis['len']['short']['won']['min'] = str(trade_analysis['len']['short']['won']['min'])
  trade_analysis['len']['short']['won']['max'] = str(trade_analysis['len']['short']['won']['max'])
  trade_analysis['len']['long']['lost']['min'] = str(trade_analysis['len']['long']['lost']['min'])
  trade_analysis['len']['long']['lost']['max'] = str(trade_analysis['len']['long']['lost']['max'])
  trade_analysis['len']['long']['won']['min'] = str(trade_analysis['len']['long']['won']['min'])
  trade_analysis['len']['long']['won']['max'] = str(trade_analysis['len']['long']['won']['max'])

  drawdown_analysis = make_dict_even(strat.analyzers.drawdown.get_analysis())
  sharpe_analysis = make_dict_even(strat.analyzers.sharpe.get_analysis())

  # Save the model
  todays_date = datetime.datetime.now().strftime("%Y-%m-%d")
  model_name = f"binance_mtf_{timeframe}_{start_date}_{end_date}_{todays_date}"
  model_path = save_model_to_cloud_storage(model, model_name, bucket_name)
  analysis_result = {
      'timeframe': timeframe,
      'start_date': start_date,
      'end_date': end_date,
      'symbol': symbol,
      'exchange': exchange,
      'features_columns': features_columns,
      'lower_timeframe': lower_timeframe,
      'middle_timeframe': middle_timeframe,
      'higher_timeframe': higher_timeframe,
      'seq_length': seq_length,
      'training_data_ratio': training_data_ratio,
      'epochs': train_epochs,
      'model_input_shape': model_input_shape,
      'model_name': model_name,
      'model_path': model_path,
      'train_loss': train_loss,
      'train_mae': train_mae,
      'val_loss': val_loss,
      'val_mae': val_mae,
      'final_epoch': final_epoch,
      'training_data_start_date': training_data_start_date,
      'training_data_end_date': training_data_end_date,
      'test_data_start_date': test_data_start_date,
      'test_data_end_date': test_data_end_date,
      'trade_leverage': trade_leverage,
      'trade_margin': trade_margin,
      'trade_buy_threshold': trade_buy_threshold,
      'trade_sell_threshold': trade_sell_threshold,
      'starting_portfolio_value': starting_portfolio_value,
      'final_portfolio_value': final_portfolio_value,
      'broker_commision': broker_commision,
      'trade_analysis': trade_analysis,
      'drawdown_analysis': drawdown_analysis,
      'sharpe_analysis': sharpe_analysis,
  }
  save_to_firestore(project_id, firestore_collection_name, analysis_result)
  return analysis_result

In [28]:
generate_model(**options)

  super().__init__(**kwargs)


Epoch 1/20
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 83ms/step - loss: 0.3510 - mae: 0.5090 - val_loss: 0.2740 - val_mae: 0.4491
Epoch 2/20
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 80ms/step - loss: 0.2903 - mae: 0.4564 - val_loss: 0.2542 - val_mae: 0.4289
Epoch 3/20
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 81ms/step - loss: 0.2803 - mae: 0.4476 - val_loss: 0.2532 - val_mae: 0.4278
Epoch 4/20
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 81ms/step - loss: 0.2794 - mae: 0.4457 - val_loss: 0.2683 - val_mae: 0.4413
Epoch 5/20
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 81ms/step - loss: 0.2752 - mae: 0.4419 - val_loss: 0.2565 - val_mae: 0.4295
Epoch 6/20
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 79ms/step - loss: 0.2653 - mae: 0.4306 - val_loss: 0.2280 - val_mae: 0.4016
Epoch 7/20
[1m805/805[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['Predicted_Value'] = predicted_values
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data.dropna(inplace=True) # Early predicted values wont be availble due to sequencing


Starting Portfolio Value: 10000.00




Final Portfolio Value: 23482.39
Model saved to gs://ife-storage/trade/models/binance_mtf_15m_01-01-2024_28-02-2025_2025-03-08.h5
Data saved to Firestore: Collection=trade-models, Document ID=q6AMxOT6HTi60oRIgjWC


{'timeframe': '15m',
 'start_date': '01-01-2024',
 'end_date': '28-02-2025',
 'symbol': 'BTCUSDT',
 'exchange': 'binance',
 'features_columns': ['15m_RSI_14',
  '15m_RSI_6',
  '15m_EMA_5',
  '15m_EMA_21',
  '15m_SMA_50',
  '15m_MACD_12_26_9',
  '15m_MACDs_12_26_9',
  '15m_MACDh_12_26_9',
  '15m_BBL_20_2.0',
  '15m_BBM_20_2.0',
  '15m_BBU_20_2.0',
  '15m_BBB_20_2.0',
  '15m_BBP_20_2.0',
  '1h_RSI_14',
  '1h_RSI_6',
  '1h_EMA_5',
  '1h_EMA_21',
  '1h_MACD_12_26_9',
  '1h_MACDs_12_26_9',
  '1h_MACDh_12_26_9',
  '1h_BBL_20_2.0',
  '1h_BBM_20_2.0',
  '1h_BBU_20_2.0',
  '1h_BBB_20_2.0',
  '1h_BBP_20_2.0',
  '4h_RSI_14',
  '4h_RSI_6',
  '4h_EMA_5',
  '4h_EMA_21'],
 'lower_timeframe': '15m',
 'middle_timeframe': '1h',
 'higher_timeframe': '4h',
 'seq_length': 100,
 'training_data_ratio': 0.8,
 'epochs': 20,
 'model_input_shape': (100, 29),
 'model_name': 'binance_mtf_15m_01-01-2024_28-02-2025_2025-03-08',
 'model_path': 'gs://ife-storage/trade/models/binance_mtf_15m_01-01-2024_28-02-2025_2025-