<a href="https://colab.research.google.com/github/ganjiron/ganpython/blob/master/tranaformerTes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, Input, LayerNormalization, Dropout
from tensorflow.keras.layers import MultiHeadAttention, GlobalAveragePooling1D


In [2]:
def positional_encoding(position, d_model):
    def get_angles(pos, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
        return pos * angle_rates

    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(d_model)[np.newaxis, :],
                            d_model)

    # 배열의 짝수 인덱스(2i)에는 사인 함수 적용
    sines = np.sin(angle_rads[:, 0::2])

    # 배열의 홀수 인덱스(2i+1)에는 코사인 함수 적용
    cosines = np.cos(angle_rads[:, 1::2])

    pos_encoding = np.concatenate([sines, cosines], axis=-1)
    pos_encoding = pos_encoding[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential(
            [Dense(ff_dim, activation="relu"), Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


In [3]:
def create_transformer_model(input_shape, num_classes, embed_dim, num_heads, ff_dim):
    inputs = Input(shape=input_shape)
    x = inputs

    # Positional Encoding 추가 (예시로 입력 차원과 동일한 크기를 사용)
    pos_encoding = positional_encoding(input_shape[0], embed_dim)
    x *= tf.math.sqrt(tf.cast(embed_dim, tf.float32))
    x += pos_encoding[:, :input_shape[0], :]

    x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)
    x = GlobalAveragePooling1D()(x)
    x = Dropout(0.1)(x)
    outputs = Dense(num_classes, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model


In [4]:
input_shape = (120, 128)  # 예시 입력 크기 (시퀀스 길이, 특징 수)
num_classes = 10  # 예시 클래스 수

model = create_transformer_model(input_shape, num_classes, embed_dim=128, num_heads=4, ff_dim=128)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# model.fit(X_train, y_train, epochs=10, batch_size=32)  # 실제 데이터로 학습
