In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from keras.callbacks import Callback, ModelCheckpoint
from keras.models import load_model
import pickle

# 1. 전처리 데이터 로드 (sentences: 10분 종가+거래량, abs: 11분 종가)
with open(r'C:/TJ_FInal_Project/Gina/트랜스포머 주가예측/train_X.pkl', 'rb') as f:
    sentences = pickle.load(f)

with open(r'C:/TJ_FInal_Project/Gina/트랜스포머 주가예측/train_y.pkl', 'rb') as f:
    abs = pickle.load(f)

# Dataset 생성 (입력: 10분간 데이터, 출력: 11분 종가 예측)
dataset = tf.data.Dataset.from_tensor_slices((
    {
        'inputs': sentences,
        'dec_inputs': abs[:, :-1]  # 11분 데이터는 첫번째 값을 제외
    },
    {
        'outputs': abs[:, 1:]  # 11분 데이터를 예측
    }
))

# 데이터 타입 변환
def convert_to_int16(inputs, outputs):
    inputs = {key: tf.cast(value, tf.int16) for key, value in inputs.items()}
    outputs = {key: tf.cast(value, tf.int16) for key, value in outputs.items()}
    return inputs, outputs

dataset = dataset.map(convert_to_int16)

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# Hyperparameters 설정
SEN_MAX_LENGTH = 79  # 10분 데이터의 시퀀스 길이
ABS_MAX_LENGTH = 11  # 11분 종가 예측
VOCAB_SIZE = 2
BATCH_SIZE = 128
BUFFER_SIZE = 20000
D_MODEL = 256
NUM_LAYERS = 2
NUM_HEADS = 2
DFF = 256
DROPOUT = 0.3
EPOCHS = 2000

dataset = dataset.cache()
dataset = dataset.shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

# 2. 트랜스포머 모델 구성 (디코더 제거)
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, position, d_model):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(position, d_model)

    def get_angles(self, position, i, d_model):
        angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        return position * angles

    def positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(
            position=tf.range(position, dtype=tf.float32)[:, tf.newaxis],
            i=tf.range(d_model, tf.float32)[tf.newaxis, :],
            d_model=d_model)

        sines = tf.math.sin(angle_rads[:, 0::2])
        cosines = tf.math.cos(angle_rads[:, 1::2])

        pos_encoding = tf.concat([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[tf.newaxis, ...]

        return tf.cast(pos_encoding, tf.float32)

    def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

# 패딩 마스크
def create_padding_mask(x):
    mask = tf.cast(tf.math.equal(x, 0), tf.float32)
    return mask[:, tf.newaxis, tf.newaxis, :]

# scaled_dot_product_attention 함수 정의
def scaled_dot_product_attention(query, key, value, mask):
    matmul_qk = tf.matmul(query, key, transpose_b=True)

    dk = tf.cast(tf.shape(key)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    if mask is not None:
        scaled_attention_logits += (mask * -1e9)

    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
    output = tf.matmul(attention_weights, value)

    return output, attention_weights

# MultiHead Attention Layer
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model

        assert d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads

        self.query_dense = tf.keras.layers.Dense(d_model)
        self.key_dense = tf.keras.layers.Dense(d_model)
        self.value_dense = tf.keras.layers.Dense(d_model)
        self.dense = tf.keras.layers.Dense(d_model)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        query, key, value, mask = inputs['query'], inputs['key'], inputs['value'], inputs['mask']
        batch_size = tf.shape(query)[0]

        query = self.query_dense(query)
        key = self.key_dense(key)
        value = self.value_dense(value)

        query = self.split_heads(query, batch_size)
        key = self.split_heads(key, batch_size)
        value = self.split_heads(value, batch_size)

        scaled_attention, _ = scaled_dot_product_attention(query, key, value, mask)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])

        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
        return self.dense(concat_attention)

# 인코더 레이어 정의
def encoder_layer(d_model, num_heads, dff, dropout, name="encoder_layer"):
    inputs = tf.keras.Input(shape=(None, d_model), name="inputs")

    attn_output = MultiHeadAttention(
        d_model, num_heads)(inputs={
            'query': inputs, 
            'key': inputs, 
            'value': inputs, 
            'mask': None
        })
    
    attn_output = tf.keras.layers.Dropout(rate=dropout)(attn_output)
    out1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(inputs + attn_output)

    ffn_output = point_wise_feed_forward_network(d_model, dff)(out1)
    ffn_output = tf.keras.layers.Dropout(rate=dropout)(ffn_output)
    out2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

    return tf.keras.Model(inputs=inputs, outputs=out2, name=name)

# 포인트 와이즈 피드포워드 네트워크
def point_wise_feed_forward_network(d_model, dff):
    return tf.keras.Sequential([
        tf.keras.layers.Dense(dff, activation='relu'),
        tf.keras.layers.Dense(d_model)
    ])

# 인코더 블록 정의
def encoder(vocab_size, num_layers, dff, d_model, num_heads, dropout, name="encoder"):
    inputs = tf.keras.Input(shape=(None, 2), name="inputs")  # 2는 종가와 거래량
    padding_mask = tf.keras.Input(shape=(1, 1, None), name="padding_mask")

    # 입력 데이터를 평탄화 (Flatten)하여 모델에 맞춤
    flatten_inputs = tf.keras.layers.Flatten()(inputs)
    dense_inputs = tf.keras.layers.Dense(d_model)(flatten_inputs)  # d_model 차원으로 변환
    
    embeddings = PositionalEncoding(vocab_size, d_model)(dense_inputs)

    output = tf.keras.layers.Dropout(rate=dropout)(embeddings)

    for i in range(num_layers):
        output = encoder_layer(d_model=d_model, num_heads=num_heads, dff=dff, dropout=dropout, name=f"encoder_layer_{i}")(
            inputs=output)

    return tf.keras.Model(inputs=[inputs, padding_mask], outputs=output, name=name)

# 트랜스포머 구성 (디코더 없이 인코더와 Dense로 종가 예측)
def transformer(vocab_size, num_layers, dff, d_model, num_heads, dropout, name="transformer"):
    inputs = tf.keras.Input(shape=(None, 2), name="inputs")  # 2는 종가와 거래량
    enc_padding_mask = tf.keras.layers.Lambda(create_padding_mask, name='enc_padding_mask')(inputs)

    enc_outputs = encoder(vocab_size=vocab_size, num_layers=num_layers, dff=dff,
                          d_model=d_model, num_heads=num_heads, dropout=dropout)(inputs=[inputs, enc_padding_mask])

    outputs = tf.keras.layers.Dense(units=1, name="outputs")(enc_outputs)
    return tf.keras.Model(inputs=inputs, outputs=outputs, name=name)

# Learning rate 스케쥴러 및 옵티마이저
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=1000):
        super(CustomSchedule, self).__init__()
        self.d_model = d_model
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = tf.math.rsqrt(tf.cast(step, tf.float32))
        arg2 = step * (self.warmup_steps ** -1.5)
        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

learning_rate = CustomSchedule(D_MODEL)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

# Loss 함수 (MSE 사용)
def loss_function(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))


In [10]:
# 2. transformer 모델 정의 및 빌드 (이전 코드 그대로 유지)
model = transformer(
    vocab_size=VOCAB_SIZE,
    num_layers=NUM_LAYERS,
    dff=DFF,
    d_model=D_MODEL,
    num_heads=NUM_HEADS,
    dropout=DROPOUT
)

# 3. 모델 컴파일 (이전 코드 그대로 유지)
model.compile(optimizer=optimizer, loss=loss_function)


# 5. 모델 학습
model.fit(processed_sentences, epochs=EPOCHS)

# 6. 모델 저장
model.save_weights('./transformer_stock_prediction.h5')


ValueError: The last dimension of the inputs to a Dense layer should be defined. Found None. Full input shape received: (None, None)