In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)



# 데이터 불러오기 & 리샘플링

In [2]:
price_data_4h = pd.read_csv("./data/BTC_4h_data_all.csv", encoding="utf-8-sig", index_col=0, parse_dates=True)
price_data_4h.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-09-25 12:00:00,4216000.0,4242000.0,4191000.0,4235000.0,164525000.0
2017-09-25 16:00:00,4235000.0,4235000.0,4187000.0,4204000.0,25226000.0
2017-09-25 20:00:00,4204000.0,4298000.0,4175000.0,4291000.0,935253000.0
2017-09-26 00:00:00,4297000.0,4332000.0,4228000.0,4313000.0,882110000.0
2017-09-26 04:00:00,4308000.0,4332000.0,4303000.0,4305000.0,941631000.0


In [3]:
# 리샘플링 규칙 정의
ohlc_dict = {
    'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last',
    'Volume': 'sum'
}

# 4시간봉 데이터를 일봉으로 리샘플링
price_data_1d = price_data_4h.resample('D').apply(ohlc_dict)
price_data_1d.dropna(inplace=True)
price_data_1d.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-09-25,4216000.0,4298000.0,4175000.0,4291000.0,1125004000.0
2017-09-26,4297000.0,4418000.0,4228000.0,4380000.0,5722396000.0
2017-09-27,4377000.0,4536000.0,4318000.0,4527000.0,5866831000.0
2017-09-28,4531000.0,4772000.0,4495000.0,4560000.0,6322376000.0
2017-09-29,4560000.0,4709000.0,4476000.0,4669000.0,6281084000.0


# 피쳐 추가

In [4]:
import pandas_ta as ta

# 4시간봉 데이터에 피처 추가
features_4h = price_data_4h.copy()
features_4h.ta.rsi(length=14, append=True, col_names=('RSI_14_4H',))
features_4h.ta.macd(fast=12, slow=26, signal=9, append=True, col_names=('MACD_12_26_9_4H', 'MACDh_12_26_9_4H', 'MACDs_12_26_9_4H'))
features_4h.ta.bbands(length=20, std=2, append=True, col_names=('BBL_20_2.0_4H', 'BBM_20_2.0_4H', 'BBU_20_2.0_4H', 'BBB_20_2.0_4H', 'BBP_20_2.0_4H'))

# 일봉 데이터에 피처 추가
features_1d = price_data_1d.copy()
features_1d.ta.rsi(length=14, append=True, col_names=('RSI_14_1D',))
features_1d.ta.sma(length=50, append=True, col_names=('SMA_50_1D',))
features_1d.ta.adx(length=14, append=True, col_names=('ADX_14_1D', 'DMP_14_1D', 'DMN_14_1D'))

# 불필요한 컬럼 및 NaN 값 제거
features_4h.drop(['Open', 'High', 'Low', 'Close', 'Volume'], axis=1, inplace=True)
features_1d.drop(['Open', 'High', 'Low', 'Close', 'Volume'], axis=1, inplace=True)

  from pkg_resources import get_distribution, DistributionNotFound


In [5]:
# 일봉 피처를 4시간봉 인덱스에 맞게 재정렬하고 ffill로 채우기
aligned_features_1d = features_1d.reindex(features_4h.index, method='ffill')

# 4시간봉 원본 데이터와 두 타임프레임의 피처를 결합
final_features = pd.concat([price_data_4h, features_4h, aligned_features_1d], axis=1)
final_features.dropna(inplace=True)

In [6]:
final_features.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,RSI_14_4H,MACD_12_26_9_4H,MACDh_12_26_9_4H,MACDs_12_26_9_4H,BBL_20_2.0_4H,BBM_20_2.0_4H,BBU_20_2.0_4H,BBB_20_2.0_4H,BBP_20_2.0_4H,RSI_14_1D,SMA_50_1D,ADX_14_1D,DMP_14_1D,DMN_14_1D
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2017-11-13 00:00:00,7349000.0,7649000.0,7141000.0,7297000.0,1738097000.0,43.131008,-242868.273287,-16272.417052,-226595.856235,6795309.0,7615400.0,8435491.0,21.537695,0.305875,53.516834,6376620.0,39.136314,17.438522,22.35027
2017-11-13 04:00:00,7300000.0,7344000.0,6845000.0,6860000.0,1669486000.0,36.923241,-267891.8952,-33036.831172,-234855.064028,6706035.0,7551200.0,8396365.0,22.384931,0.091086,53.516834,6376620.0,39.136314,17.438522,22.35027
2017-11-13 08:00:00,6878000.0,7361000.0,6850000.0,7104000.0,1699718000.0,41.94737,-264980.010429,-24099.95712,-240880.053308,6673706.0,7501700.0,8329694.0,22.074849,0.259841,53.516834,6376620.0,39.136314,17.438522,22.35027
2017-11-13 12:00:00,7139000.0,7670000.0,7106000.0,7400000.0,1755850000.0,47.418887,-236066.368395,3850.947931,-239917.316326,6685051.0,7466650.0,8248249.0,20.93574,0.457363,53.516834,6376620.0,39.136314,17.438522,22.35027
2017-11-13 16:00:00,7410000.0,7470000.0,7183000.0,7470000.0,1759918000.0,48.651439,-205138.969441,27822.677508,-232961.646949,6714276.0,7433400.0,8152524.0,19.348447,0.525448,53.516834,6376620.0,39.136314,17.438522,22.35027


# 데이터 전처리

In [7]:
from sklearn.preprocessing import MinMaxScaler

# 시간 순서에 따른 데이터 분할
train_data = final_features.loc[:'2024-06']
validation_data = final_features.loc['2024-07':'2024-12']
test_data = final_features.loc['2025':]

# 스케일러 훈련 및 적용
scaler = MinMaxScaler()
scaled_train_features = scaler.fit_transform(train_data)
scaled_validation_features = scaler.transform(validation_data)
scaled_test_features = scaler.transform(test_data)

# 3D 시퀀스 데이터 생성
def create_sequences(data, lookback_window):
    X, y = [], []
    for i in range(lookback_window, len(data)):
        X.append(data[i-lookback_window:i, :])
        y.append(data[i, 3]) # 종가(인덱스 3)를 임시 타겟으로 설정
    return np.array(X), np.array(y)

lookback = 30
X_train, _ = create_sequences(scaled_train_features, lookback)
X_val, _ = create_sequences(scaled_validation_features, lookback)
X_test, _ = create_sequences(scaled_test_features, lookback)

# 라벨(정답지)

In [8]:
def get_triple_barrier_labels(prices, entries, profit_take_pct, stop_loss_pct, max_hold_periods):
    """
    금융 시계열 데이터 레이블링을 위한 삼중 장벽 기법을 구현합니다.

    Args:
        prices (pd.Series): 가격 시계열 (예: 'Close').
        entries (pd.DatetimeIndex): 거래를 시작하는 타임스탬프.
        profit_take_pct (float): 익절 장벽의 비율.
        stop_loss_pct (float): 손절 장벽의 비율.
        max_hold_periods (int): 최대 포지션 보유 기간.

    Returns:
        pd.Series: 각 진입 시점에 대한 레이블 (1: 익절, -1: 손절, 0: 기간 만료).
    """
    results = pd.Series(index=entries, dtype='int8')
    
    for entry_time in entries:
        entry_price = prices.loc[entry_time]
        
        # 1. 수직 장벽 설정
        end_of_window_idx = prices.index.get_loc(entry_time) + max_hold_periods
        if end_of_window_idx >= len(prices.index):
            end_of_window_idx = len(prices.index) - 1
        end_of_window = prices.index[end_of_window_idx]
        
        # 2. 수평 장벽 설정
        profit_take_level = entry_price * (1 + profit_take_pct)
        stop_loss_level = entry_price * (1 - stop_loss_pct)
        
        # 3. 장벽 도달 시간 계산
        price_path = prices.loc[entry_time:end_of_window]
        
        profit_hit_time = price_path[price_path >= profit_take_level].first_valid_index()
        stop_loss_hit_time = price_path[price_path <= stop_loss_level].first_valid_index()
        
        # 4. 레이블 결정
        if profit_hit_time is not None and (stop_loss_hit_time is None or profit_hit_time <= stop_loss_hit_time):
            results.loc[entry_time] = 1
        elif stop_loss_hit_time is not None:
            results.loc[entry_time] = -1
        else:
            results.loc[entry_time] = 0
            
    return results

# 레이블링을 위한 진입 시점 정의 (시퀀싱으로 인해 앞부분 제외)
# 여기서는 모든 시점을 진입 후보로 간주하여 레이블링합니다.
train_entries = train_data.index[lookback:]
val_entries = validation_data.index[lookback:]
test_entries = test_data.index[lookback:]

# 각 데이터셋에 대한 레이블 생성
labels_train = get_triple_barrier_labels(
    prices=train_data['Close'], entries=train_entries,
    profit_take_pct=0.02, stop_loss_pct=0.01, max_hold_periods=12 # 12 * 4H = 2일
)
labels_val = get_triple_barrier_labels(
    prices=validation_data['Close'], entries=val_entries,
    profit_take_pct=0.02, stop_loss_pct=0.01, max_hold_periods=12
)
labels_test = get_triple_barrier_labels(
    prices=test_data['Close'], entries=test_entries,
    profit_take_pct=0.02, stop_loss_pct=0.01, max_hold_periods=12
)

# 모델

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, Bidirectional, Conv1D, MaxPooling1D
from tensorflow.keras.utils import to_categorical

# 삼중 장벽 기법으로 생성된 레이블 (labels_train, labels_val, labels_test)
# 레이블을 원-핫 인코딩으로 변환: -1 -> , 0 -> , 1 -> 
y_train_cat = to_categorical(labels_train + 1, num_classes=3)
y_val_cat = to_categorical(labels_val + 1, num_classes=3)
y_test_cat = to_categorical(labels_test + 1, num_classes=3)

model = Sequential([
    LSTM(units=64, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    Dense(units=32, activation='relu'),
    Dense(units=3, activation='softmax')
])


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

  super().__init__(**kwargs)


# 훈련

In [10]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

checkpoint_cb = ModelCheckpoint("./lstm/best_mtf_model.h5", save_best_only=True)
early_stopping_cb = EarlyStopping(patience=10, restore_best_weights=True)

history = model.fit(
    X_train, y_train_cat,
    epochs=100,
    batch_size=64,
    validation_data=(X_val, y_val_cat),
    callbacks=[checkpoint_cb, early_stopping_cb]
)

Epoch 1/100
[1m224/227[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.4877 - loss: 1.0174



[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.4919 - loss: 1.0021 - val_accuracy: 0.4832 - val_loss: 1.0127
Epoch 2/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5001 - loss: 0.9726 - val_accuracy: 0.4832 - val_loss: 1.0208
Epoch 3/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5039 - loss: 0.9629 - val_accuracy: 0.4832 - val_loss: 1.0199
Epoch 4/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5049 - loss: 0.9564 - val_accuracy: 0.4832 - val_loss: 1.0198
Epoch 5/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5071 - loss: 0.9513 - val_accuracy: 0.4832 - val_loss: 1.0243
Epoch 6/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5099 - loss: 0.9459 - val_accuracy: 0.4832 - val_loss: 1.0195
Epoch 7/100
[1m227/227[0m 



[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5278 - loss: 0.9220 - val_accuracy: 0.4832 - val_loss: 1.0113
Epoch 12/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5313 - loss: 0.9167 - val_accuracy: 0.4953 - val_loss: 1.0222
Epoch 13/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5408 - loss: 0.9060 - val_accuracy: 0.5037 - val_loss: 1.0217
Epoch 14/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.5496 - loss: 0.8985 - val_accuracy: 0.5112 - val_loss: 1.0193
Epoch 15/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5626 - loss: 0.8884 - val_accuracy: 0.5065 - val_loss: 1.0119
Epoch 16/100
[1m224/227[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.5657 - loss: 0.8749



[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5670 - loss: 0.8782 - val_accuracy: 0.5074 - val_loss: 1.0101
Epoch 17/100
[1m226/227[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.5694 - loss: 0.8710



[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.5707 - loss: 0.8723 - val_accuracy: 0.5130 - val_loss: 1.0035
Epoch 18/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5794 - loss: 0.8645 - val_accuracy: 0.5047 - val_loss: 1.0186
Epoch 19/100
[1m222/227[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.5823 - loss: 0.8532



[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.5845 - loss: 0.8557 - val_accuracy: 0.5112 - val_loss: 1.0005
Epoch 20/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5869 - loss: 0.8504 - val_accuracy: 0.4953 - val_loss: 1.0081
Epoch 21/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.5972 - loss: 0.8422 - val_accuracy: 0.4981 - val_loss: 1.0028
Epoch 22/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.6021 - loss: 0.8366 - val_accuracy: 0.4991 - val_loss: 1.0050
Epoch 23/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.6057 - loss: 0.8276 - val_accuracy: 0.5037 - val_loss: 1.0096
Epoch 24/100
[1m225/227[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.6061 - loss: 0.8272



[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.6094 - loss: 0.8246 - val_accuracy: 0.4870 - val_loss: 0.9985
Epoch 25/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.6154 - loss: 0.8162 - val_accuracy: 0.4497 - val_loss: 1.0577
Epoch 26/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.6192 - loss: 0.8097 - val_accuracy: 0.4935 - val_loss: 1.0208
Epoch 27/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.6253 - loss: 0.8022 - val_accuracy: 0.4730 - val_loss: 1.0375
Epoch 28/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.6304 - loss: 0.7961 - val_accuracy: 0.4683 - val_loss: 1.0261
Epoch 29/100
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.6376 - loss: 0.7819 - val_accuracy: 0.4683 - val_loss: 1.0455
Epoch 30/100
[1m227/22

검증셋에 대한 예측을 수행합니다...
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step

--- 최적 Threshold 탐색 완료 ---
최적 Entry Threshold: 0.16
해당 Threshold에서의 F1-Score: 0.5467

--- 최적 Threshold 탐색 완료 (손절/Exit) ---
최적 Exit Threshold: 0.24
해당 Threshold에서의 F1-Score: 0.6555


# 테스트

# 검증 셋으로 threshold 값 찾기

In [31]:
from sklearn.metrics import f1_score

# 1. 훈련된 모델로 검증셋(X_val)에 대한 예측 확률 계산
print("검증셋에 대한 예측을 수행합니다...")
val_predictions = model.predict(X_val)

# '익절'(P=2) 클래스에 대한 예측 확률과 실제 정답을 추출
# y_val_cat은 to_categorical로 변환된 검증셋의 정답 레이블입니다.
prob_profit_val = val_predictions[:, 2] 
y_true_val = y_val_cat[:, 2]

# 2. 최적의 Threshold를 찾기 위한 반복문
best_f1 = -1.0
best_threshold = 0

# 0.05부터 0.95까지 0.01 간격으로 모든 Threshold 후보를 테스트
for threshold in np.arange(0.05, 0.95, 0.01):
    
    # 현재 Threshold를 기준으로 예측값을 0 또는 1로 변환
    y_pred = (prob_profit_val > threshold).astype(int)
    
    # F1-Score 계산
    current_f1 = f1_score(y_true_val, y_pred)
    
    # 만약 현재 F1-Score가 역대 최고 점수라면?
    if current_f1 > best_f1:
        best_f1 = current_f1
        best_threshold = threshold

print("\n--- 최적 Threshold 탐색 완료 ---")
print(f"최적 Entry Threshold: {best_threshold:.2f}")
print(f"해당 Threshold에서의 F1-Score: {best_f1:.4f}")

# '손절'(P=0) 클래스에 대한 예측 확률과 실제 정답을 추출
# y_val_cat은 to_categorical로 변환된 검증셋의 정답 레이블입니다.
prob_loss_val = val_predictions[:, 0] 
y_true_val_loss = y_val_cat[:, 0]

# 2. 최적의 Threshold를 찾기 위한 반복문
best_f1_exit = -1.0
best_threshold_exit = 0

# 0.05부터 0.95까지 0.01 간격으로 모든 Threshold 후보를 테스트
for threshold in np.arange(0.05, 0.95, 0.01):
    
    # 현재 Threshold를 기준으로 예측값을 0 또는 1로 변환
    y_pred_loss = (prob_loss_val > threshold).astype(int)
    
    # F1-Score 계산
    current_f1_exit = f1_score(y_true_val_loss, y_pred_loss)
    
    # 만약 현재 F1-Score가 역대 최고 점수라면?
    if current_f1_exit > best_f1_exit:
        best_f1_exit = current_f1_exit
        best_threshold_exit = threshold

print("\n--- 최적 Threshold 탐색 완료 (손절/Exit) ---")
print(f"최적 Exit Threshold: {best_threshold_exit:.2f}")
print(f"해당 Threshold에서의 F1-Score: {best_f1_exit:.4f}")

검증셋에 대한 예측을 수행합니다...
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

--- 최적 Threshold 탐색 완료 ---
최적 Entry Threshold: 0.16
해당 Threshold에서의 F1-Score: 0.5467

--- 최적 Threshold 탐색 완료 (손절/Exit) ---
최적 Exit Threshold: 0.24
해당 Threshold에서의 F1-Score: 0.6555


## 테스트셋으로 확인

In [36]:
# 테스트 데이터에 대한 예측 수행
predictions = model.predict(X_test)

# 확률을 신호로 변환 (예시 로직)
prob_profit = predictions[:, 2] # P(익절)
prob_loss = predictions[:, 0]   # P(손절)

# '익절' 확률의 최댓값과 평균값 확인
print(f"최대 익절 예측 확률: {np.max(prob_profit):.4f}")
print(f"평균 익절 예측 확률: {np.mean(prob_profit):.4f}") 


print(f"최대 손절 예측 확률: {np.max(prob_loss):.4f}")
print(f"평균 손절 예측 확률: {np.mean(prob_loss):.4f}")

entry_threshold = 0.16
exit_threshold = 0.24

# test_data의 길이에 맞는 불리언 배열 생성
entries = pd.Series(False, index=test_data.index)
exits = pd.Series(False, index=test_data.index)

# 예측 결과가 시작되는 인덱스부터 신호 생성
signal_index = test_data.index[lookback:]

entries.loc[signal_index] = prob_profit > entry_threshold
exits.loc[signal_index] = prob_loss > exit_threshold


# 테스트 기간의 종가 데이터 사용
price_close_test = test_data['Close'][lookback:]
entries_aligned = entries[lookback:]
exits_aligned = exits[lookback:]

# 진입 우선
conflicting_signals = entries_aligned & exits_aligned
exits_aligned[conflicting_signals] = False


[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
최대 익절 예측 확률: 0.6276
평균 익절 예측 확률: 0.1647
최대 손절 예측 확률: 0.9670
평균 손절 예측 확률: 0.4902


In [37]:
import vectorbt as vbt

portfolio = vbt.Portfolio.from_signals(
    close=price_close_test,
    entries=entries_aligned,
    exits=exits_aligned,
    init_cash=100000,
    fees=0.001,      # 0.1% 수수료
    slippage=0.0005, # 0.05% 슬리피지
    freq='4h',        # 데이터 빈도 명시
    # stop_exit=True,   # 진입 시 청산 고려
)

In [38]:
# 성과 통계 출력
print(portfolio.stats())

# 벤치마크(단순 보유)와 함께 성과 시각화
portfolio.plot().show()

Start                               2025-01-06 00:00:00
End                                 2025-08-17 20:00:00
Period                                224 days 00:00:00
Start Value                                    100000.0
End Value                                 202830.677791
Total Return [%]                             102.830678
Benchmark Return [%]                          13.286311
Max Gross Exposure [%]                            100.0
Total Fees Paid                            29306.969108
Max Drawdown [%]                               9.543224
Max Drawdown Duration                  30 days 00:00:00
Total Trades                                         96
Total Closed Trades                                  96
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  64.583333
Best Trade [%]                                  11.9598
Worst Trade [%]                               -5

# 테스트 - 전체 기간

In [40]:
import numpy as np
import pandas as pd
import vectorbt as vbt
from keras.models import load_model

# ---------------------------------
# 1. 저장된 모델 불러오기
# ---------------------------------
# 'my_lstm_model.h5' 부분에 실제 저장한 모델 파일 경로를 입력하세요.
print("모델을 불러오는 중입니다...")
model = load_model('./lstm/best_mtf_model.h5')
print("모델 로딩 완료!")

# ==============================================================================
# 2. 전체 기간 데이터 준비 (KeyError 해결 최종안)
# ==============================================================================
# (1) 피처를 처음부터 다시 생성하여 데이터 일관성을 보장합니다.

# 4시간봉 피처 생성: 원본 가격 데이터(OHLCV 포함)를 복사해서 시작합니다.
print("4시간봉 피처를 생성합니다...")
features_4h_new = price_data_4h.copy()
features_4h_new.ta.rsi(length=14, append=True, col_names=('RSI_14_4H',))
features_4h_new.ta.macd(fast=12, slow=26, signal=9, append=True, col_names=('MACD_12_26_9_4H', 'MACDh_12_26_9_4H', 'MACDs_12_26_9_4H'))
features_4h_new.ta.bbands(length=20, std=2, append=True, col_names=('BBL_20_2.0_4H', 'BBM_20_2.0_4H', 'BBU_20_2.0_4H', 'BBB_20_2.0_4H', 'BBP_20_2.0_4H'))

# 일봉 피처 생성
print("일봉 피처를 생성합니다...")
features_1d_new = price_data_1d.copy()
features_1d_new.ta.rsi(length=14, append=True, col_names=('RSI_14_1D',))
features_1d_new.ta.sma(length=50, append=True, col_names=('SMA_50_1D',))
features_1d_new.ta.adx(length=14, append=True, col_names=('ADX_14_1D', 'DMP_14_1D', 'DMN_14_1D'))

# 사용할 일봉 지표 컬럼만 선택합니다 (OHLCV 중복 방지)
daily_indicator_cols = ['RSI_14_1D', 'SMA_50_1D', 'ADX_14_1D', 'DMP_14_1D', 'DMN_14_1D']
features_1d_to_merge = features_1d_new[daily_indicator_cols]


# (2) 두 시간대 피처 결합
# 4시간봉 피처(OHLCV 포함)를 기준으로 일봉 지표를 합칩니다.
print("다중 시간대 피처를 결합합니다...")
final_features = pd.merge(features_4h_new, features_1d_to_merge, left_index=True, right_index=True, how='left')
final_features.fillna(method='ffill', inplace=True)
final_features.dropna(inplace=True)
print("데이터 준비 완료!")


# (3) Scaler 적용 (이전과 동일한 정렬 로직 사용)
fit_feature_names = scaler.feature_names_in_
final_features_aligned = final_features[fit_feature_names]
scaled_features_full = scaler.transform(final_features_aligned)


# (4) 시퀀스 데이터 생성 (lookback 값 확인!)
lookback = 30 # 훈련 시 사용했던 값으로 반드시 통일해야 합니다!
X_full = []
for i in range(lookback, len(scaled_features_full)):
    X_full.append(scaled_features_full[i-lookback:i])
X_full = np.array(X_full)


# ==============================================================================
# 3. 전체 기간에 대한 예측 수행 (★★★ 이 부분이 추가되었습니다 ★★★)
# ==============================================================================
print(f"총 {len(X_full)}개의 시퀀스에 대한 예측을 수행합니다...")
predictions_full = model.predict(X_full)

# 예측 결과를 각 확률 변수에 할당
prob_loss_full = predictions_full[:, 0]
prob_hold_full = predictions_full[:, 1]
prob_profit_full = predictions_full[:, 2]
print("예측 완료!")

# ==============================================================================
# 4. 시그널 생성 및 VectorBT 백테스팅
# ==============================================================================
# 예측이 시작되는 시점에 맞춰 인덱스를 정렬합니다.
signal_index_full = final_features.index[lookback:]

# 시그널 생성을 위한 빈 Series 생성
entries_full = pd.Series(False, index=price_data_4h.index) # 기준이 되는 4시간봉 인덱스 사용
exits_full = pd.Series(False, index=price_data_4h.index)

# 설정한 임계값(threshold)에 따라 진입/청산 신호 생성
# entry_threshold = 0.3 # 직접 최적화한 진입 임계값
# exit_threshold = 0.6  # 직접 최적화한 청산 임계값

# loc를 사용하여 정확한 위치에 시그널 할당
entries_full.loc[signal_index_full] = prob_profit_full > entry_threshold
exits_full.loc[signal_index_full] = prob_loss_full > exit_threshold

# 진입과 청산 신호가 겹칠 경우 진입을 우선 (ipynb 로직 반영)
conflicting_signals_full = entries_full & exits_full
exits_full[conflicting_signals_full] = False

# 백테스팅 실행 (ipynb의 설정값 반영)
full_portfolio = vbt.Portfolio.from_signals(
    close=price_data_4h['Close'], # 백테스팅 기준 가격: 4시간봉 종가
    entries=entries_full,
    exits=exits_full,
    init_cash=100000,
    fees=0.001,
    slippage=0.0005,
    freq='4h' # 봉의 주기
)

# 최종 성과 출력
print("\n--- 전체 기간 최종 백테스팅 결과 ---")
print(full_portfolio.stats())

# 누적 수익률 그래프 시각화
full_portfolio.plot().show()



모델을 불러오는 중입니다...
모델 로딩 완료!
4시간봉 피처를 생성합니다...
일봉 피처를 생성합니다...
다중 시간대 피처를 결합합니다...
데이터 준비 완료!
총 16980개의 시퀀스에 대한 예측을 수행합니다...



DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.



[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
예측 완료!

--- 전체 기간 최종 백테스팅 결과 ---
Start                               2017-09-25 12:00:00
End                                 2025-08-17 20:00:00
Period                               2883 days 12:00:00
Start Value                                    100000.0
End Value                            17398714910.892887
Total Return [%]                        17398614.910893
Benchmark Return [%]                        3760.802834
Max Gross Exposure [%]                            100.0
Total Fees Paid                       3506042633.281916
Max Drawdown [%]                               66.51127
Max Drawdown Duration                 282 days 04:00:00
Total Trades                                        672
Total Closed Trades                                 672
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  56.696

# 이상적인 전략

In [18]:
import pandas as pd
import vectorbt as vbt
import numpy as np

# --- 사전 준비 (이전 단계에서 이미 실행되었다고 가정) ---
# test_data: 훈련 데이터프레임 (OHLCV 및 피처 포함)
# labels_train: 삼중 장벽 기법으로 생성된 훈련 데이터의 레이블 (-1, 0, 1)
# lookback: 시퀀스 생성을 위한 lookback 기간 (예: 50)
# ---------------------------------------------------------

# 1. 백테스팅에 사용할 가격 데이터 준비
# labels_train은 lookback 기간 이후부터 생성되었으므로, 가격 데이터도 동일하게 맞춰줍니다.
price_close_test = test_data['Close'][lookback:]

# labels_test의 인덱스와 가격 데이터의 인덱스가 일치하는지 확인
# (get_triple_barrier_labels 함수에서 올바르게 생성했다면 일치해야 합니다)
aligned_labels_test = labels_test.reindex(price_close_test.index).dropna()
price_close_test = price_close_test.reindex(aligned_labels_test.index)

# 2. '정답' 레이블을 'entries'와 'exits' 신호로 변환
# '익절'(1)이 발생한 시점을 진입 신호로 간주
entries_from_labels = (aligned_labels_test == 1)

# '손절'(-1)이 발생한 시점을 청산 신호로 간주
exits_from_labels = (aligned_labels_test == -1)

# 3. vectorbt 포트폴리오 실행
# "Perfect Foresight Strategy" (완벽한 예측 전략)
perfect_portfolio = vbt.Portfolio.from_signals(
    close=price_close_test,
    entries=entries_from_labels,
    exits=exits_from_labels,
    init_cash=100000,
    fees=0.001,
    slippage=0.0005,
    freq='4h'
)

# 4. 결과 확인
print("--- 훈련 데이터와 정답 레이블을 이용한 백테스팅 결과 (이론적 상한선) ---")
print(perfect_portfolio.stats())

# 시각화
perfect_portfolio.plot(title="Performance with Perfect Foresight (testing Data)").show()

--- 훈련 데이터와 정답 레이블을 이용한 백테스팅 결과 (이론적 상한선) ---
Start                               2025-01-01 16:00:00
End                                 2025-08-17 20:00:00
Period                                228 days 08:00:00
Start Value                                    100000.0
End Value                                 597164.799341
Total Return [%]                             497.164799
Benchmark Return [%]                          16.932946
Max Gross Exposure [%]                            100.0
Total Fees Paid                            33773.163196
Max Drawdown [%]                               1.230422
Max Drawdown Duration                  11 days 16:00:00
Total Trades                                         60
Total Closed Trades                                  60
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  98.333333
Best Trade [%]                                11.935452
Wo