In [1]:
import os.path

import pandas as pd

from PanderaDFM.OHLCV import MultiTimeframeOHLCV
from helper.data_preparation import single_timeframe
from helper.helper import date_range_to_string
from Config import config

config.processing_date_range = date_range_to_string(start=pd.to_datetime('03-01-24'),
                                                    end=pd.to_datetime('09-01-24'))
# devided by rolling mean, std
n_mt_ohlcv = pd.read_csv(
    os.path.join(r"C:\Code\dl-forcasting\data\Kucoin\Spot\BTCUSDT",
                 f"n_mt_ohlcv.{config.processing_date_range}.csv.zip"), compression='zip')
n_mt_ohlcv

[92mDEBUG@[94m10-11.14:02:03:[92m...Starting


Unnamed: 0,timeframe,date,open,close,high,low,volume
0,15min,2024-03-01 00:00:00+00:00,,,,,
1,1D,2024-03-01 00:00:00+00:00,,,,,
2,1h,2024-03-01 00:00:00+00:00,,,,,
3,1min,2024-03-01 00:00:00+00:00,,,,,
4,4h,2024-03-01 00:00:00+00:00,,,,,
...,...,...,...,...,...,...,...
341347,1D,2024-09-01 00:00:00+00:00,-0.684385,-1.265078,-1.114403,-0.900461,0.298451
341348,1h,2024-09-01 00:00:00+00:00,-0.081138,-0.165222,-0.038223,-0.001254,-0.095335
341349,1min,2024-09-01 00:00:00+00:00,-0.018025,-0.275889,-0.122945,-0.169871,-0.171993
341350,4h,2024-09-01 00:00:00+00:00,-0.116954,-0.577484,-0.395726,-0.480249,0.044290


In [2]:
n_mt_ohlcv.describe()

Unnamed: 0,open,close,high,low,volume
count,340668.0,340668.0,340668.0,340668.0,340668.0
mean,0.002571,0.002537,0.000586,0.004814,0.011528
std,1.080184,1.080047,1.081013,1.083676,1.070377
min,-11.82925,-11.796538,-10.789105,-12.27137,-4.911268
25%,-0.556785,-0.557024,-0.575532,-0.53149,-0.405402
50%,0.010991,0.010835,-0.014932,0.036296,-0.136793
75%,0.575594,0.574331,0.553501,0.591531,0.143095
max,13.730087,13.773854,13.686195,12.303279,15.969589


# Multi timeframe modelling


structure_timeframes = {
    '1W':{        pattern: '1D',        trigger: '4h',        double: '15min',    }, 
    '1D':{        pattern: '4h',        trigger: '1h',        double: '5min',    }, 
    '4h':{        pattern: '1h',        trigger: '15min',        double: '1min',    }, 
}
n_mt_ohlcv include open, high, low, close, and volume of all timeframes.
single_timeframe(n_mt_ohlcv, timeframe) will return data of specified timeframe.
using tensorflow create 4 parallel CNN-LSTM models each fed with structure, pattern, trigger, and double timeframe data.
join these parallel models together.


In [3]:

from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Conv1D, LeakyReLU, Flatten, Dense, Concatenate
from tensorflow.python.keras import Input
import tensorflow as tf

model_input_lengths = {
    'structure': 128,
    'pattern': 256,
    'trigger': 256,
    'double': 256,
}


def create_cnn_lstm(input_shape, name_prefix):
    input_layer = Input(shape=input_shape)

    # CNN Layer with ReLU activation
    conv = Conv1D(filters=64, kernel_size=3, padding='same')(input_layer)
    conv = LeakyReLU()(conv)
    conv = Conv1D(filters=64, kernel_size=3, padding='same')(conv)
    conv = LeakyReLU()(conv)

    # Flatten the CNN output
    flatten = Flatten()(conv)

    # LSTM Layer (LSTM has built-in activations)
    lstm = LSTM(64, return_sequences=False)(tf.expand_dims(flatten, axis=1))

    # Fully connected layer with ReLU activation
    dense = Dense(64)(lstm)
    dense = LeakyReLU()(dense)

    return Model(inputs=input_layer, outputs=dense)


def build_model(input_shapes):
    structure_model = create_cnn_lstm((model_input_lengths['structure'], 5), 'structure_model')
    pattern_model = create_cnn_lstm((model_input_lengths['pattern'], 5), 'pattern_model')
    trigger_model = create_cnn_lstm((model_input_lengths['trigger'], 5), 'trigger_model')
    double_model = create_cnn_lstm((model_input_lengths['double'], 5), 'double_model')

    combined_output = Concatenate()(
        [structure_model.output, pattern_model.output, trigger_model.output, double_model.output])

    # Add an additional Dense layer with ReLU activation
    combined_dense = Dense(128)(combined_output)
    combined_dense = LeakyReLU()(combined_dense)

    # Final output layer (for regression tasks, use linear activation; for classification, consider sigmoid/softmax)
    final_output = Dense(1, activation='linear')(combined_dense)

    # Define the final model
    model = Model(inputs=[structure_model.input, pattern_model.input, trigger_model.input, double_model.input],
                  outputs=final_output)

    # Compile the model with mean squared error loss for regression tasks
    model.compile(optimizer='adam', loss='mse')

    # Model summary
    model.summary()

    return model



Check if model is not trained yet, try loading it from 'cnn_lstm_model.h5'. 
If it is already partially trained, or loaded from disk, continue training.
after completing training on each set of data save model into 'cnn_lstm_model.h5' to prevent loosing data in case of computer restart.


In [4]:
from tensorflow.python.keras.models import load_model


def train_model(structure_data, pattern_data, trigger_data, double_data, target_data, input_shapes, model=None):
    '''
    Check if the model is already trained or partially trained. If not, build a new model. 
    Continue training the model and save the trained model to 'cnn_lstm_model.h5' after each session.

    Args:
        structure_data: Data for the structure timeframe.
        pattern_data: Data for the pattern timeframe.
        trigger_data: Data for the trigger timeframe.
        double_data: Data for the double timeframe.
        target_data: The labels or target values for training.
        input_shapes: A dictionary containing the input shapes for structure, pattern, trigger, and double timeframe data.
    Returns:
        The trained model.
    '''
    # Check if the model already exists, load if it does
    model_path = 'cnn_lstm_model.h5'

    if model is None:
        if os.path.exists(model_path):
            print("Loading existing model from disk...")
            model = load_model(model_path)
        else:
            print("Building new model...")
            model = build_model(input_shapes)

    # Train the model
    history = model.fit([structure_data, pattern_data, trigger_data, double_data],
                        target_data,
                        epochs=10,
                        batch_size=32)
    print(history)
    # Save the model after each training session to avoid losing progress
    model.save(model_path)
    print("Model saved to disk.")

    return model

In [5]:
n_mt_ohlcv.index.names

FrozenList([None])

In [6]:
from datetime import timedelta
import numpy as np
from helper.data_preparation import pattern_timeframe, trigger_timeframe
from helper.importer import pt


def prepare_train_n_test(t_structure_timeframe, mt_ohlcv: pt.DataFrame[MultiTimeframeOHLCV], forecast_horizon: int = 20,
                         batch_size: int = 1000):
    """
    Prepares input and output data for multi-step forecasting.
    
    Args:
        mt_ohlcv: 
        t_structure_timeframe: 
        df (pd.DataFrame): DataFrame containing 'high' and 'low' columns.
        window_size (int): Number of past time steps to use as input.
        forecast_horizon (int): Number of future time steps to predict.
        
    Returns:
        X (np.array): Input features.
        y (np.array): Output targets (high and low).
    """
    pattern_tf = pattern_timeframe(t_structure_timeframe)
    trigger_tf = trigger_timeframe(t_structure_timeframe)
    double_tf = pattern_timeframe(trigger_timeframe(t_structure_timeframe))

    structure_df = single_timeframe(mt_ohlcv, t_structure_timeframe)
    pattern_df = single_timeframe(mt_ohlcv, pattern_tf)
    trigger_df = single_timeframe(mt_ohlcv, trigger_tf)
    double_df = single_timeframe(mt_ohlcv, double_tf)

    length_of_training = (
            model_input_lengths['structure'] * pd.to_timedelta(t_structure_timeframe)
            + model_input_lengths['pattern'] * pd.to_timedelta(pattern_tf)
            + model_input_lengths['trigger'] * pd.to_timedelta(trigger_tf)
            + model_input_lengths['double'] * pd.to_timedelta(double_tf)
    )

    input_start = mt_ohlcv.index.get_level_values(
        level='date').min() + length_of_training * 2  # * 2 for simple safeside.
    input_end = mt_ohlcv.index.get_level_values(level='date').max() - forecast_horizon * pd.to_timedelta(
        trigger_tf)
    duration_seconds = (input_end - input_start) / timedelta(seconds=1)

    X = []
    y = []

    for relative_double_end in np.random.randint(0, duration_seconds, size=batch_size):
        double_end = input_end - relative_double_end * timedelta(seconds=1)
        trigger_end = double_end - model_input_lengths['double'] * pd.to_timedelta(double_tf)
        pattern_end = trigger_end - model_input_lengths['trigger'] * pd.to_timedelta(double_tf)
        structure_end = pattern_end - model_input_lengths['pattern'] * pd.to_timedelta(double_tf)

        double_slice = structure_df.loc[(slice(None), slice(None, double_end)), :] \
                           .iloc[:-model_input_lengths['double']]
        trigger_slice = pattern_df.loc[(slice(None), slice(None, trigger_end)), :] \
                            .iloc[:-model_input_lengths['trigger']]
        pattern_slice = trigger_df.loc[(slice(None), slice(None, pattern_end)), :] \
                            .iloc[:-model_input_lengths['pattern']]
        structure_slice = double_df.loc[(slice(None), slice(None, structure_end)), :] \
                              .iloc[:-model_input_lengths['structure']]

        X['double'].append(double_slice)
        X['trigger'].append(trigger_slice)
        X['pattern'].append(pattern_slice)
        X['structure'].append(structure_slice)

        future_slice = trigger_df.loc[(slice(None), slice(pattern_end, None)), :] \
                           .iloc[:-model_input_lengths['pattern']]
        y.append(future_slice)
    return np.array(X), np.array(y)


a = prepare_train_n_test('4h', n_mt_ohlcv, 10)
a

Exception: multi_timeframe_data expected to have "timeframe" in indexes:[[None]]

structure_timeframes = {
    '1W': {'pattern': '1D', 'trigger': '4h', 'double': '15min'},
    '1D': {'pattern': '4h', 'trigger': '1h', 'double': '5min'},
    '4h': {'pattern': '1h', 'trigger': '15min', 'double': '1min'}
}
loop over structure timeframe in ['1D', '4h']:
collect information from already prepared function read_ohlcv_features(start, end, timeframe)
create required iteration to pass data to train_model
choose double_timeframe_end in the range of start and end
structure_timeframe_end= structure_timeframe_end = trigger_timeframe_end = double_timeframe_end
calculate trigger_timeframe_start according to trigger_timeframe_end and number of bars shall be passed for taining 'trigger_model' 
calculate pattern_timeframe_start according to pattern_timeframe_end and number of bars shall be passed for taining 'pattern_model' 
calculate structure_timeframe_start according to structure_timeframe_end and number of bars shall be passed for taining 'structure_model' 

for:
```python
def create_cnn_lstm(input_shape, name_prefix):
    input_layer = Input(shape=input_shape)

    # CNN Layer with ReLU activation
    conv = Conv1D(filters=64, kernel_size=3, padding='same')(input_layer)
    conv = LeakyReLU()(conv)
    conv = Conv1D(filters=64, kernel_size=3, padding='same')(conv)
    conv = LeakyReLU()(conv)

    # Flatten the CNN output
    flatten = Flatten()(conv)

    # LSTM Layer (LSTM has built-in activations)
    lstm = LSTM(64, return_sequences=False)(tf.expand_dims(flatten, axis=1))

    # Fully connected layer with ReLU activation
    dense = Dense(64)(lstm)
    dense = LeakyReLU()(dense)

    return Model(inputs=input_layer, outputs=dense)

def build_model(input_shapes):
    structure_model = create_cnn_lstm((128, 5), 'structure_model')
    pattern_model = create_cnn_lstm((256, 5), 'pattern_model')
    trigger_model = create_cnn_lstm((256, 5), 'trigger_model')
    double_model = create_cnn_lstm((256, 5), 'double_model')
    
    combined_output = Concatenate()(
        [structure_model.output, pattern_model.output, trigger_model.output, double_model.output])
    
    # Add an additional Dense layer with ReLU activation
    combined_dense = Dense(128)(combined_output)
    combined_dense = LeakyReLU()(combined_dense)
    
    # Final output layer (for regression tasks, use linear activation; for classification, consider sigmoid/softmax)
    final_output = Dense(1, activation='linear')(combined_dense)
    
    # Define the final model
    model = Model(inputs=[structure_model.input, pattern_model.input, trigger_model.input, double_model.input],
                  outputs=final_output)
    
    # Compile the model with mean squared error loss for regression tasks
    model.compile(optimizer='adam', loss='mse')
    
    # Model summary
    model.summary()

    return model
def train_model(structure_data, pattern_data, trigger_data, double_data, target_data, input_shapes, model = None):
    '''
    Check if the model is already trained or partially trained. If not, build a new model. 
    Continue training the model and save the trained model to 'cnn_lstm_model.h5' after each session.

    Args:
        structure_data: Data for the structure timeframe.
        pattern_data: Data for the pattern timeframe.
        trigger_data: Data for the trigger timeframe.
        double_data: Data for the double timeframe.
        target_data: The labels or target values for training.
        input_shapes: A dictionary containing the input shapes for structure, pattern, trigger, and double timeframe data.
    Returns:
        The trained model.
    '''
    # Check if the model already exists, load if it does
    model_path = 'cnn_lstm_model.h5'
    
    if model is None:
        if os.path.exists(model_path):
            print("Loading existing model from disk...")
            model = load_model(model_path)
        else:
            print("Building new model...")
            model = build_model(input_shapes)

    # Train the model
    history = model.fit([structure_data, pattern_data, trigger_data, double_data],
                        target_data,
                        epochs=10,
                        batch_size=32)
    print(history)
    # Save the model after each training session to avoid losing progress
    model.save(model_path)
    print("Model saved to disk.")
    
    return model
```

In [None]:
structure_timeframes = {
    '1W': {'pattern': '1D', 'trigger': '4h', 'double': '15min'},
    '1D': {'pattern': '4h', 'trigger': '1h', 'double': '5min'},
    '4h': {'pattern': '1h', 'trigger': '15min', 'double': '1min'}
}


def 