In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
# Assuming 'X_train' and 'y_train' are your input features and target variables

# Positional encoding layer
class PositionalEncoding(layers.Layer):
    def __init__(self, sequence_length, output_dim):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(sequence_length, output_dim)

    def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

    def get_config(self):
        config = super(PositionalEncoding, self).get_config()
        return config

    def positional_encoding(self, sequence_length, output_dim):
        angle_rads = self.get_angles(np.arange(sequence_length)[:, np.newaxis], np.arange(output_dim)[np.newaxis, :], output_dim)
        sines = np.sin(angle_rads[:, 0::2])
        cosines = np.cos(angle_rads[:, 1::2])

        pos_encoding = np.concatenate([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)

    def get_angles(self, positions, i, d_model):
        angles = 1 / np.power(10000.0, (2 * (i // 2)) / np.float32(d_model))
        return positions * angles

# Transformer model
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

# Build the Transformer model
def build_model(sequence_length, num_heads, ff_dim, dropout=0, mlp_units=[128]):
    inputs = keras.Input(shape=(sequence_length, inputs.shape[-1]))
    x = PositionalEncoding(sequence_length, inputs.shape[-1])(inputs)
    for _ in range(mlp_units):
        x = transformer_encoder(x, inputs.shape[-1], num_heads, ff_dim, dropout)

    outputs = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1, activation="linear")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

# Define hyperparameters
sequence_length = 64
num_heads = 2
ff_dim = 32
dropout = 0.25

# Build and compile the model
model = build_model(sequence_length, num_heads, ff_dim, dropout)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Make predictions
predictions = model.predict(X_test)
