In [1]:
import requests
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
# 1단계: 업비트 API에서 데이터 가져오기
# 이 함수는 특정 시장의 일별 캔들 데이터를 가져옵니다.
def fetch_upbit_data(market="KRW-BTC", count=200):
    url = "https://api.upbit.com/v1/candles/days"
    params = {"market": market, "count": count}
    response = requests.get(url, params=params)
    data = response.json()

    # 데이터프레임으로 변환하고 필요한 열만 선택
    df = pd.DataFrame(data)
    df = df[["candle_date_time_kst", "trade_price"]]
    df.columns = ["date", "price"]
    df["date"] = pd.to_datetime(df["date"])
    df.sort_values(by="date", inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

In [3]:
# BTC/KRW 시장 데이터 가져오기
df = fetch_upbit_data()
print("Sample data:")
print(df.head())

Sample data:
                 date       price
0 2024-05-25 09:00:00  96240000.0
1 2024-05-26 09:00:00  95610000.0
2 2024-05-27 09:00:00  96050000.0
3 2024-05-28 09:00:00  94600000.0
4 2024-05-29 09:00:00  93910000.0


In [4]:
# 2단계: 가격 변화율과 변동성 플래그 추가 (고급 피처)
def add_roc_features(df, window=1):
    df['price_change'] = df['price'].pct_change(periods=window) # 가격 변화율 계산
    df['volatility_flag'] = (df['price_change'].abs() > 0.05).astype(int)  # 5% 이상 변화 시 플래그 설정
    return df

In [6]:
df = add_roc_features(df)
df

Unnamed: 0,date,price,price_change,volatility_flag
0,2024-05-25 09:00:00,96240000.0,,0
1,2024-05-26 09:00:00,95610000.0,-0.006546,0
2,2024-05-27 09:00:00,96050000.0,0.004602,0
3,2024-05-28 09:00:00,94600000.0,-0.015096,0
4,2024-05-29 09:00:00,93910000.0,-0.007294,0
...,...,...,...,...
195,2024-12-06 09:00:00,139340000.0,0.013374,0
196,2024-12-07 09:00:00,138960000.0,-0.002727,0
197,2024-12-08 09:00:00,141005000.0,0.014716,0
198,2024-12-09 09:00:00,138864000.0,-0.015184,0


In [7]:
# 3단계: 데이터를 정규화하고 GRU 입력용 시퀀스 생성
def preprocess_data(df, seq_length=30):
    # 가격 데이터를 정규화
    scaler = MinMaxScaler(feature_range=(0, 1))
    df["price_normalized"] = scaler.fit_transform(df[["price"]])

    # 시계열 입력 데이터 생성
    def create_sequences(data, seq_length):
        X, y = [], []
        for i in range(len(data) - seq_length):
            X.append(data[i:i + seq_length])
            y.append(data[i + seq_length])
        return np.array(X), np.array(y)

    X, y = create_sequences(df["price_normalized"].values, seq_length)

    # 데이터를 학습, 검증, 테스트 세트로 분리
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    return X_train, X_val, X_test, y_train, y_val, y_test, scaler

In [8]:
# 데이터 전처리 실행
X_train, X_val, X_test, y_train, y_val, y_test, scaler = preprocess_data(df)
print("Training data shape:", X_train.shape)

Training data shape: (136, 30)


In [9]:
# 4단계: GRU 모델 정의
def build_gru_model(seq_length, input_dim, learning_rate):
    model = Sequential([
        GRU(128, return_sequences=True, input_shape=(seq_length, input_dim), kernel_regularizer=l2(0.01)), # 첫 번째 GRU 레이어
        Dropout(0.2), # 드롭아웃 추가
        GRU(64, return_sequences=False, kernel_regularizer=l2(0.01)), # 두 번째 GRU 레이어
        Dropout(0.2),
        Dense(32, activation='relu', kernel_regularizer=l2(0.01)), # 완전 연결 레이어
        Dense(1) # 출력 레이어
    ])

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss="mean_squared_error", metrics=["mae"])
    return model

In [10]:
# 5단계: 학습률 스케줄 및 조기 종료 설정
lr_schedule = ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=1000,
    decay_rate=0.9
)
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

In [11]:
# 6단계: 모델 입력용 데이터 형태 준비
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [12]:
# 7단계: 모델 생성 및 학습
model = build_gru_model(seq_length=X_train.shape[1], input_dim=X_train.shape[2], learning_rate=lr_schedule)
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

  super().__init__(**kwargs)


Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 203ms/step - loss: 1.7905 - mae: 0.2695 - val_loss: 0.8478 - val_mae: 0.2186
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.6934 - mae: 0.1483 - val_loss: 0.3098 - val_mae: 0.0461
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 122ms/step - loss: 0.2810 - mae: 0.1015 - val_loss: 0.1727 - val_mae: 0.0611
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 132ms/step - loss: 0.1776 - mae: 0.1073 - val_loss: 0.1346 - val_mae: 0.0745
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 129ms/step - loss: 0.1330 - mae: 0.0923 - val_loss: 0.0941 - val_mae: 0.0429
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 125ms/step - loss: 0.0908 - mae: 0.0701 - val_loss: 0.0615 - val_mae: 0.0587
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 115ms/step - loss: 0.

In [13]:
# 8단계: 테스트 데이터로 평가
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test Loss (MSE): {test_loss:.4f}, Test MAE: {test_mae:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 0.0130 - mae: 0.0398
Test Loss (MSE): 0.0130, Test MAE: 0.0398


In [14]:
# 9단계: 미래 데이터 예측
def predict_future(model, X_test, scaler):
    last_sequence = X_test[-1].reshape(1, X_test.shape[1], X_test.shape[2])
    predicted_price = model.predict(last_sequence)
    predicted_price_original = scaler.inverse_transform(predicted_price)
    return predicted_price_original[0][0]

predicted_price = predict_future(model, X_test, scaler)
print(f"Predicted BTC Price: {predicted_price:.2f} KRW")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322ms/step
Predicted BTC Price: 146564048.00 KRW
