In [None]:
from keras.preprocessing.sequence import TimeseriesGenerator

sequence_length=24

# Create time series generators for training and testing
train_data_gen = TimeseriesGenerator(X_train.values, y_train.values, length=sequence_length, batch_size=32)
test_data_gen = TimeseriesGenerator(X_test.values, y_test.values, length=sequence_length, batch_size=32)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, Concatenate
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Flatten, Conv1D, MaxPooling1D, BatchNormalization
from tensorflow.keras.layers import GRU
from scipy.optimize import differential_evolution
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, Dropout, LSTM, Dense, Flatten
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
import numpy as np

def huber_loss(y_true, y_pred, delta=1.0):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < delta
    small_error_loss = 0.5 * tf.square(error)
    large_error_loss = delta * (tf.abs(error) - 0.5 * delta)
    return tf.where(is_small_error, small_error_loss, large_error_loss)

class MultiHeadAttentionLayer(Layer):
    def __init__(self, num_heads, key_dim, **kwargs):
        super(MultiHeadAttentionLayer, self).__init__(**kwargs)
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.head_dim = key_dim // num_heads

        # Query, Key, and Value Projections
        self.query_projection = Dense(key_dim)
        self.key_projection = Dense(key_dim)
        self.value_projection = Dense(key_dim)

        # Scaled Dot-Product Attention
        self.attention = tf.keras.layers.Attention(use_scale=True)

        # Linear Transformation
        self.output_projection = Dense(key_dim)

    def call(self, inputs):
        # Split inputs into query, key, and value
        query = self.query_projection(inputs)
        key = self.key_projection(inputs)
        value = self.value_projection(inputs)

        # Determine the number of padding elements needed
        padding_elements = self.num_heads - tf.shape(query)[-1] % self.num_heads
        padding = tf.zeros((tf.shape(query)[0], padding_elements))

        # Pad the query tensor
        query = tf.concat([query, tf.expand_dims(padding, axis=-1)], axis=-1)

        # Split into multiple heads
        query_heads = tf.concat(tf.split(query, self.num_heads, axis=-1), axis=0)
        key_heads = tf.concat(tf.split(key, self.num_heads, axis=-1), axis=0)
        value_heads = tf.concat(tf.split(value, self.num_heads, axis=-1), axis=0)

        # Scaled Dot-Product Attention
        attention_output = self.attention([query_heads, key_heads, value_heads])

        # Concatenate heads
        attention_output = tf.concat(tf.split(attention_output, self.num_heads, axis=0), axis=-1)

        # Linear Transformation
        output = self.output_projection(attention_output)

        return output



class TabularLSTMGRUModel:
    def __init__(self, input_shape, lstm_units=[64, 32], gru_units=[64, 32], output_units=1, num_heads=8):
        self.input_shape = input_shape
        self.lstm_units = lstm_units
        self.gru_units = gru_units
        self.output_units = output_units
        self.num_heads = num_heads
        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential()

        # Convolutional layers
        model.add(Conv1D(filters=256, kernel_size=2, activation='relu', input_shape=self.input_shape))
        model.add(MaxPooling1D(pool_size=2))
        model.add(BatchNormalization())

        model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
        model.add(MaxPooling1D(pool_size=2))
        model.add(BatchNormalization())

        #model.add(MultiHeadAttentionLayer(num_heads=self.num_heads, key_dim=self.lstm_units[-1]))

        model.add(Conv1D(filters=64, kernel_size=2, activation='relu'))
        model.add(MaxPooling1D(pool_size=2))
        model.add(BatchNormalization())

        # Multi-Head Attention layer
        #model.add(MultiHeadAttentionLayer(num_heads=self.num_heads, key_dim=self.lstm_units[-1]))

        # GRU layers
        for units in self.gru_units:
            model.add(GRU(units, return_sequences=True, activation='relu'))

        model.add(Flatten())
        model.add(Dense(self.output_units))

        return model

    def compile(self, learning_rate=0.001):
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        self.model.compile(loss=huber_loss, optimizer=optimizer)

    def fit(self, train_data_gen, epochs=10):
        self.model.fit(train_data_gen, epochs=epochs)

    def evaluate(self, test_data_gen):
        return self.model.evaluate(test_data_gen)

    def predict(self, data_gen):
        return self.model.predict(data_gen)

    def summary(self):
        return self.model.summary()

def fitness_function(hyperparameters):
    kernel_number = int(hyperparameters[0])
    activation_function = hyperparameters[1]
    epochs = int(hyperparameters[2])
    learning_rate = hyperparameters[3]

    # Set the hyperparameters in your model
    lstm_gru_model = TabularLSTMGRUModel(
        input_shape,
        lstm_units=hyperparameters[4:len(lstm_units) + 4],
        gru_units=hyperparameters[len(lstm_units) + 4:],
        output_units=1,
        num_heads=8
    )

    # Compile the model with specific learning rate
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    lstm_gru_model.model.compile(loss=huber_loss, optimizer=optimizer)

    # Train the model
    lstm_gru_model.fit(train_data_gen, epochs=epochs)

    # Evaluate and return the fitness score (accuracy, F1-score, etc.)
    fitness_score = lstm_gru_model.evaluate(test_data_gen)

    return fitness_score

def PSO_optimization(num_particles, max_iterations, w, c1, c2, hyperparameter_bounds):
    # PSO initialization
    particles = np.random.uniform(low=hyperparameter_bounds[:, 0], high=hyperparameter_bounds[:, 1], size=(num_particles, len(hyperparameter_bounds)))
    velocities = np.random.rand(num_particles, len(hyperparameter_bounds))
    pbest = particles.copy()
    pbest_values = np.apply_along_axis(fitness_function, 1, particles)
    gbest_index = np.argmax(pbest_values)
    gbest = pbest[gbest_index].copy()
    gbest_value = pbest_values[gbest_index]

    # PSO main loop
    for iteration in range(max_iterations):
        for i in range(num_particles):
            r1, r2 = np.random.rand(), np.random.rand()
            cognitive = c1 * r1 * (pbest[i] - particles[i])
            social = c2 * r2 * (gbest - particles[i])
            velocities[i] = w * velocities[i] + cognitive + social
            particles[i] += velocities[i]
            particles[i] = np.clip(particles[i], hyperparameter_bounds[:, 0], hyperparameter_bounds[:, 1])

            # Update personal best
            fitness_i = fitness_function(particles[i])
            if fitness_i > pbest_values[i]:
                pbest[i] = particles[i].copy()
                pbest_values[i] = fitness_i

            # Update global best
            if fitness_i > gbest_value:
                gbest = particles[i].copy()
                gbest_value = fitness_i

    return gbest

# Assuming you have the input_shape and train_data_gen defined
input_shape = (24, 11)
lstm_units = [64, 32, 16, 8]
gru_units = [64, 32, 16, 8]  # Define the units for each GRU layer

# Define hyperparameter bounds
activation_functions = ['relu', 'tanh']
activation_function_mapping = {act_func: i for i, act_func in enumerate(activation_functions)}

hyperparameter_bounds = np.array([
    [3, 5],  # Kernel number
    [0, len(activation_functions) - 1],  # Activation function (mapped to integers)
    [50, 100],  # Epochs
    [0.0001, 0.001],  # Learning rate
    [16, 8],  # LSTM units
    [64, 32],  # GRU units
])

# Run PSO optimization
optimal_hyperparameters = PSO_optimization(
    num_particles=10,
    max_iterations=50,
    w=0.5,
    c1=1.5,
    c2=1.5,
    hyperparameter_bounds=hyperparameter_bounds
)

# Extract optimal hyperparameters
optimal_kernel_number = int(optimal_hyperparameters[0])
optimal_activation_function_index = int(optimal_hyperparameters[1])
optimal_activation_function = activation_functions[optimal_activation_function_index]
optimal_epochs = int(optimal_hyperparameters[2])
optimal_learning_rate = optimal_hyperparameters[3]
optimal_lstm_units = lstm_units
optimal_gru_units = lstm_units

# Use these optimal hyperparameters to train the final model
lstm_gru_model_optimized = TabularLSTMGRUModel(
    input_shape,
    lstm_units=optimal_lstm_units,
    gru_units=optimal_gru_units,
    output_units=1,
    num_heads=8
)

# Compile the model with the optimal learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=optimal_learning_rate)
lstm_gru_model_optimized.model.compile(loss=huber_loss, optimizer=optimizer)

# Train the final model with optimal hyperparameters
lstm_gru_model_optimized.fit(train_data_gen, epochs=optimal_epochs)



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 54/68
Epoch 55/68
Epoch 56/68
Epoch 57/68
Epoch 58/68
Epoch 59/68
Epoch 60/68
Epoch 61/68
Epoch 62/68
Epoch 63/68
Epoch 64/68
Epoch 65/68
Epoch 66/68
Epoch 67/68
Epoch 68/68
Epoch 1/92
Epoch 2/92
Epoch 3/92
Epoch 4/92
Epoch 5/92
Epoch 6/92
Epoch 7/92
Epoch 8/92
Epoch 9/92
Epoch 10/92
Epoch 11/92
Epoch 12/92
Epoch 13/92
Epoch 14/92
Epoch 15/92
Epoch 16/92
Epoch 17/92
Epoch 18/92
Epoch 19/92
Epoch 20/92
Epoch 21/92
Epoch 22/92
Epoch 23/92
Epoch 24/92
Epoch 25/92
Epoch 26/92
Epoch 27/92
Epoch 28/92
Epoch 29/92
Epoch 30/92
Epoch 31/92
Epoch 32/92
Epoch 33/92
Epoch 34/92
Epoch 35/92
Epoch 36/92
Epoch 37/92
Epoch 38/92
Epoch 39/92
Epoch 40/92
Epoch 41/92
Epoch 42/92
Epoch 43/92
Epoch 44/92
Epoch 45/92
Epoch 46/92
Epoch 47/92
Epoch 48/92
Epoch 49/92
Epoch 50/92
Epoch 51/92
Epoch 52/92
Epoch 53/92
Epoch 54/92
Epoch 55/92
Epoch 56/92
Epoch 57/92
Epoch 58/92
Epoch 59/92
Epoch 60/92
Epoch 61/92
Epoch 62/92
Epoch 63/92
Epoch 64