# Import Libraries

In [1]:
import numpy as np
import tensorflow as tf
import keras
from keras import layers

# Prepare Data

In [6]:
num_samples = 1000
input_sequence_length = 10  # 입력 시퀀스 길이
target_sequence_length = 10  # 출력 시퀀스 길이
feature_dim = 4  # 특성의 차원

x_train = np.random.rand(num_samples, input_sequence_length, feature_dim)
y_train = np.random.rand(num_samples, target_sequence_length, feature_dim)  # 예를 들어, target도 같은 차원을 가질 수 있음

# Transformer

## Positional Encoding

In [11]:
class PositionalEncoding(layers.Layer):
    def __init__(self, position, d_model):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(position, d_model)

    def get_angles(self, pos, i, d_model):
        angles = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return pos * angles

    def positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(np.arange(position)[:, np.newaxis],
                                     np.arange(d_model)[np.newaxis, :],
                                     d_model)
        # dimensions of pos_encoding should be (1, position, d_model)
        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
        pos_encoding = angle_rads[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)

    def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

## Transformer Encoder Layer

In [3]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=head_size, dropout=dropout)(inputs, inputs)
    x = layers.LayerNormalization(epsilon=1e-6)(x + inputs)
    ff = layers.Dense(ff_dim, activation="relu")(x)
    ff = layers.Dense(inputs.shape[-1])(ff)
    return layers.LayerNormalization(epsilon=1e-6)(ff + x)


## Transformer Decoder Layer

In [4]:
def transformer_decoder(inputs, enc_outputs, head_size, num_heads, ff_dim, dropout=0):
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=head_size, dropout=dropout)(inputs, inputs)
    x = layers.LayerNormalization(epsilon=1e-6)(x + inputs)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=head_size, dropout=dropout)(x, enc_outputs)
    x = layers.LayerNormalization(epsilon=1e-6)(x + inputs)
    ff = layers.Dense(ff_dim, activation="relu")(x)
    ff = layers.Dense(inputs.shape[-1])(ff)
    return layers.LayerNormalization(epsilon=1e-6)(ff + x)


## Build Model

In [12]:
def transformer_model(input_shape, target_shape, head_size, num_heads, ff_dim, num_blocks, dropout=0):
    inputs = keras.Input(shape=input_shape)
    enc_outputs = PositionalEncoding(input_shape[0], input_shape[1])(inputs)  # input_shape should be like (sequence_length, feature_dim)
    for _ in range(num_blocks):
        enc_outputs = transformer_encoder(enc_outputs, head_size, num_heads, ff_dim, dropout)
    
    dec_inputs = keras.Input(shape=target_shape)
    dec_outputs = PositionalEncoding(target_shape[0], target_shape[1])(dec_inputs)
    for _ in range(num_blocks):
        dec_outputs = transformer_decoder(dec_outputs, enc_outputs, head_size, num_heads, ff_dim, dropout)
    
    outputs = layers.Dense(target_shape[1])(dec_outputs)  # Adjusted output dimension
    return keras.Model(inputs=[inputs, dec_inputs], outputs=outputs)


In [13]:
# 입력 차원과 출력 차원 조정
input_sequence_length = 10
target_sequence_length = 10
feature_dim = 4  # 특성의 차원

# 모델 생성 및 컴파일
model = transformer_model(
    input_shape=(input_sequence_length, feature_dim),
    target_shape=(target_sequence_length, feature_dim),
    head_size=64,
    num_heads=2,
    ff_dim=128,
    num_blocks=2,
    dropout=0.1
)
model.compile(optimizer="adam", loss="mse")

In [14]:
model.fit([x_train, y_train], y_train, batch_size=32, epochs=10, validation_split=0.2)

Epoch 1/10


2024-04-13 12:45:03.034699: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - loss: 0.7036 - val_loss: 0.1102
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 76ms/step - loss: 0.1064 - val_loss: 0.0915
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 72ms/step - loss: 0.0903 - val_loss: 0.0865
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 70ms/step - loss: 0.0851 - val_loss: 0.0846
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 72ms/step - loss: 0.0844 - val_loss: 0.0840
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 71ms/step - loss: 0.0837 - val_loss: 0.0836
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 70ms/step - loss: 0.0832 - val_loss: 0.0833
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 76ms/step - loss: 0.0837 - val_loss: 0.0831
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x305368eb0>

In [15]:
# 추론 함수
def predict_sequence(input_sequence):
    target_seq = np.zeros((1, target_sequence_length, feature_dim))
    for t in range(target_sequence_length):
        prediction = model.predict([input_sequence, target_seq])
        target_seq[0, t, :] = prediction[0, t, :]
    return target_seq

# 새로운 데이터 준비 및 추론
x_new = np.random.rand(1, input_sequence_length, feature_dim)
predicted_sequence = predict_sequence(x_new)
print("Predicted Sequence:", predicted_sequence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 931ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Predicted Sequence: [[[0.47234696 0.54780543 0.52983218 0.54366386]
  [0.50043285 0.505651   0.51049489 0.50870204]
  [0.52304906 0.47687182 0.49968171 0.47915438]
  [0.52890801 0.46879721 0.49634266 0.47143605]
  [0.51117218 0.49157482 0.50498033 0.49479645]
  [0.47399431 0.547