In [2]:
import pandas as pd
import pandas_ta as ta
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Conv1D, MaxPooling1D, BatchNormalization, LeakyReLU, Dropout, 
    Bidirectional, LSTM, Dense, Flatten, Add, Layer, MultiHeadAttention, 
    Concatenate
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt

# Suppress TensorFlow warnings for cleaner output
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)

# Import joblib for saving the scaler
import joblib

Matplotlib is building the font cache; this may take a moment.


In [3]:
# Check GPU availability
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Restrict TensorFlow to only use the first GPU
        tf.config.set_visible_devices(gpus[0], 'GPU')
        # Enable dynamic memory growth for the GPU
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"GPU '{gpus[0].name}' is being used for training.")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found. Training will be performed on the CPU.")

No GPU found. Training will be performed on the CPU.


In [4]:
# Define Custom Multi-Head Self-Attention Layer
class MultiHeadSelfAttention(Layer):
    def __init__(self, embed_dim, num_heads=4):
        super(MultiHeadSelfAttention, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.layernorm = BatchNormalization()
        self.dropout = Dropout(0.1)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout(attn_output, training=training)
        out = self.layernorm(inputs + attn_output)
        return out

In [5]:
def load_data(csv_file):
    """
    Load OHLCV data from a CSV file.
    """
    df = pd.read_csv(csv_file)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)
    return df

In [13]:
def compute_fibonacci_retracement(df, period=100):
    """
    Computes Fibonacci Retracement levels based on the highest high and lowest low over a specified period.
    """
    df['Fib_Max'] = df['high'].rolling(window=period).max()
    df['Fib_Min'] = df['low'].rolling(window=period).min()
    df['Fib_Diff'] = df['Fib_Max'] - df['Fib_Min']
    df['Fib_23.6'] = df['Fib_Max'] - df['Fib_Diff'] * 0.236
    df['Fib_38.2'] = df['Fib_Max'] - df['Fib_Diff'] * 0.382
    df['Fib_50.0'] = df['Fib_Max'] - df['Fib_Diff'] * 0.5
    df['Fib_61.8'] = df['Fib_Max'] - df['Fib_Diff'] * 0.618
    return df

def compute_indicators(df):
    """
    Compute the specified technical indicators and add them to the DataFrame.
    """
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_index().reset_index()
    
    # Compute RSI
    df['RSI'] = ta.rsi(df['close'], length=14)
    
    # Compute MACD
    macd = ta.macd(df['close'])
    df['MACD'] = macd['MACD_12_26_9']
    df['MACD_signal'] = macd['MACDs_12_26_9']
    df['MACD_hist'] = macd['MACDh_12_26_9']
    
    # Compute Bollinger Bands
    bb = ta.bbands(df['close'], length=20, std=2)
    df['Bollinger_High'] = bb['BBU_20_2.0']
    df['Bollinger_Low'] = bb['BBL_20_2.0']
    
    # Volume
    df['Volume'] = df['volume']
    
    # On-Balance Volume (OBV)
    df['OBV'] = ta.obv(df['close'], df['volume'])
    
    # Moving Averages (50-period and 200-period)
    df['MA50'] = ta.sma(df['close'], length=50)
    df['MA200'] = ta.sma(df['close'], length=200)
    
    # Fibonacci Retracement Levels
    df = compute_fibonacci_retracement(df, period=100)
    
    # Ichimoku Cloud
    ichimoku = ta.ichimoku(df['high'], df['low'], df['close'])
    ichimoku_filtered = ichimoku[0][['ISA_9', 'ISB_26']]
    ichimoku_filtered_shifted = ichimoku_filtered.shift(-1)
    df['Ichimoku_Cloud_a'] = ichimoku_filtered_shifted['ISA_9']
    df['Ichimoku_Cloud_b'] = ichimoku_filtered_shifted['ISB_26']
    
    # Stochastic Oscillator
    stoch = ta.stoch(df['high'], df['low'], df['close'])
    df['Stochastic_Oscillator'] = stoch['STOCHk_14_3_3']
    
    # Average Directional Index (ADX)
    adx = ta.adx(df['high'], df['low'], df['close'])
    df['ADX'] = adx['ADX_14']
    
    # Drop intermediate Fibonacci calculation columns
    df.drop(['Fib_Max', 'Fib_Min', 'Fib_Diff'], axis=1, inplace=True)
    
    # Drop rows with NaN values resulting from indicator calculations
    df.dropna(inplace=True)
    
    return df

In [8]:
def prepare_dataset(df, target_column='close'):
    """
    Prepare features and target variable for AI modeling.
    """
    # Define feature columns (all indicators)
    feature_columns = ['RSI', 'MACD', 'MACD_signal', 'MACD_hist',
                       'Bollinger_High', 'Bollinger_Low',
                       'Volume', 'OBV', 'MA50', 'MA200',
                       'Fib_23.6', 'Fib_38.2', 'Fib_50.0', 'Fib_61.8',
                       'Ichimoku_Cloud_a', 'Ichimoku_Cloud_b',
                       'Stochastic_Oscillator', 'ADX']
    
    X = df[feature_columns]
    y = df[target_column].shift(-1)  # Predict the next close price
    
    # Drop the last row as it has NaN target
    X = X[:-1]
    y = y[:-1]
    
    return X, y

def normalize_features(X_train, X_test):
    """
    Normalize the feature data using StandardScaler.
    """
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, scaler

def create_sequences(X, y, sequence_length):
    """
    Create sequences of data for LSTM input.
    """
    X_seq = []
    y_seq = []
    for i in range(len(X) - sequence_length):
        X_seq.append(X[i:i+sequence_length])
        y_seq.append(y[i+sequence_length])
    return np.array(X_seq), np.array(y_seq)

In [15]:
def build_enhanced_cnn_lstm_attention_model(input_shape, embed_dim=64, num_heads=4, dropout_rate=0.2, l2_reg=1e-4):
    """
    Build and compile an Enhanced Hybrid CNN-LSTM model with Multi-Head Self-Attention.
    """
    # Input layer
    inputs = Input(shape=input_shape)

    # Convolutional layers with Batch Normalization and LeakyReLU activation
    x = Conv1D(filters=128, kernel_size=3, padding='same', kernel_regularizer=l2(l2_reg))(inputs)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = Conv1D(filters=128, kernel_size=3, padding='same', kernel_regularizer=l2(l2_reg))(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(dropout_rate)(x)

    # Second Convolutional Block
    x = Conv1D(filters=256, kernel_size=3, padding='same', kernel_regularizer=l2(l2_reg))(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = Conv1D(filters=256, kernel_size=3, padding='same', kernel_regularizer=l2(l2_reg))(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(dropout_rate)(x)

    # Residual Connection
    res = Conv1D(filters=256, kernel_size=1, padding='same')(inputs)
    res = MaxPooling1D(pool_size=2)(res)
    res = MaxPooling1D(pool_size=2)(res)
    x = Add()([x, res])
    x = Dropout(dropout_rate)(x)

    # Bidirectional LSTM layers with Batch Normalization and LeakyReLU activation
    x = Bidirectional(LSTM(100, return_sequences=True, kernel_regularizer=l2(l2_reg)))(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = Dropout(dropout_rate)(x)

    # Multi-Head Self-Attention
    x = MultiHeadSelfAttention(embed_dim=256, num_heads=num_heads)(x)

    # Additional Bidirectional LSTM Layer
    x = Bidirectional(LSTM(100, return_sequences=False, kernel_regularizer=l2(l2_reg)))(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = Dropout(dropout_rate)(x)

    # Fully connected layers with Dropout and L2 Regularization
    x = Dense(128, activation='relu', kernel_regularizer=l2(l2_reg))(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(64, activation='relu', kernel_regularizer=l2(l2_reg))(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)

    # Output layer
    output = Dense(1, activation='linear')(x)

    # Define the model
    model = Model(inputs=inputs, outputs=output)

    # Compile the model with Adam optimizer
    optimizer = Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])

    return model

In [10]:
def plot_training_history(history):
    """
    Plot the training and validation loss.
    """
    plt.figure(figsize=(12,6))
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss During Training')
    plt.xlabel('Epoch')
    plt.ylabel('Loss (MSE)')
    plt.legend()
    plt.show()

def plot_predictions(y_true, y_pred):
    """
    Plot the actual vs predicted prices.
    """
    plt.figure(figsize=(14,7))
    plt.plot(y_true, label='Actual Price')
    plt.plot(y_pred, label='Predicted Price')
    plt.title('Actual vs Predicted Close Prices')
    plt.xlabel('Time')
    plt.ylabel('Price')
    plt.legend()
    plt.show()

In [17]:
def main():
    # Load the data
    csv_file = 'BTC_USDT_1h_data.csv'  # Replace with your CSV file path
    df = load_data(csv_file)
    print(f"Loaded data with {len(df)} records.")
    
    # Compute indicators
    try:
        df = compute_indicators(df)
        print(f"Computed indicators. Dataset now has {len(df)} records.")
    except (TypeError, KeyError) as e:
        print(f"Error computing indicators: {e}")
        return
    
    # Prepare features and target
    X, y = prepare_dataset(df, target_column='close')
    print(f"Prepared dataset with {X.shape[0]} samples.")
    
    # Split into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False)
    print(f"Split data into {X_train.shape[0]} training and {X_test.shape[0]} testing samples.")
    
    # Normalize features
    X_train_scaled, X_test_scaled, scaler = normalize_features(X_train, X_test)
    print("Normalized feature data.")
    
    # Create sequences for LSTM
    sequence_length = 60  # Number of past hours to consider
    X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train.values, sequence_length)
    X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test.values, sequence_length)
    print(f"Created sequences with sequence length {sequence_length}.")
    
    # Check if sequences have been created correctly
    if X_train_seq.shape[0] == 0 or X_test_seq.shape[0] == 0:
        print("Error: Sequence creation resulted in empty arrays. Adjust the sequence length or check the data.")
        return
    
    # Build the Enhanced CNN-LSTM-Attention model
    input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])
    model = build_enhanced_cnn_lstm_attention_model(input_shape)
    model.summary()
    
    # Define callbacks
    early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)
    checkpoint = ModelCheckpoint('best_enhanced_trading_model.keras', monitor='val_loss', save_best_only=True, verbose=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)
    
    # Train the model
    epochs = 200
    batch_size = 32
    history = model.fit(
        X_train_seq, y_train_seq,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=0.1,
        callbacks=[early_stop, checkpoint, reduce_lr],
        verbose=1
    )
    
    # Plot training history
    plot_training_history(history)
    
    # Load the best saved model
    model = load_model('best_enhanced_trading_model.keras', custom_objects={'MultiHeadSelfAttention': MultiHeadSelfAttention})
    
    # Make predictions on the test set
    y_pred = model.predict(X_test_seq)
    y_pred = y_pred.flatten()
    
    # Invert normalization of predictions and actual values
    # Assuming 'close' was not scaled separately, otherwise adjust accordingly
    # For simplicity, assuming 'close' was not scaled
    # If 'close' was scaled, use the appropriate scaler to inverse transform
    
    # Plot actual vs predicted
    plot_predictions(y_test_seq, y_pred)
    
    # Evaluate the model
    mse = tf.keras.losses.MeanSquaredError()
    mae = tf.keras.losses.MeanAbsoluteError()
    mse_value = mse(y_test_seq, y_pred).numpy()
    mae_value = mae(y_test_seq, y_pred).numpy()
    print(f"Model Evaluation:\nMSE: {mse_value:.4f}\nMAE: {mae_value:.4f}")
    
    # Save the enhanced model and scaler for future use
    model.save('enhanced_cnn_lstm_attention_trading_model.h5')
    print("Saved Enhanced CNN-LSTM-Attention model to 'enhanced_cnn_lstm_attention_trading_model.h5'.")
    
    joblib.dump(scaler, 'scaler.joblib')
    print("Saved scaler to 'scaler.joblib'.")

In [18]:
main()

Loaded data with 8344 records.
Computed indicators. Dataset now has 8144 records.
Prepared dataset with 8143 samples.
Split data into 6514 training and 1629 testing samples.
Normalized feature data.
Created sequences with sequence length 60.




Epoch 1/200
[1m182/182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step - loss: 3725594880.0000 - mae: 60441.7891
Epoch 1: val_loss improved from inf to 3819419392.00000, saving model to best_enhanced_trading_model.keras
[1m182/182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 161ms/step - loss: 3725686528.0000 - mae: 60442.5742 - val_loss: 3819419392.0000 - val_mae: 61748.1914 - learning_rate: 0.0010
Epoch 2/200
[1m152/182[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m4s[0m 153ms/step - loss: 3748440320.0000 - mae: 60649.4102

KeyboardInterrupt: 