In [2]:
# 라이브러리 설치
!pip install -q torch torchvision pandas scikit-learn

# 파일 업로드
from google.colab import files
uploaded = files.upload()

# - best_val_model_pytorch
# - AAPL_minute_orderbook_2019_01-07_combined.csv


Saving AAPL_minute_orderbook_2019_01-07_combined.csv to AAPL_minute_orderbook_2019_01-07_combined (1).csv
Saving best_val_model_pytorch to best_val_model_pytorch (1)


In [3]:
# 3: 데이터 로드 및 전처리
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score

df = pd.read_csv("AAPL_minute_orderbook_2019_01-07_combined.csv")

# Feature 추출
features = []
for lvl in range(10):
    lvl_str = f"{lvl:02d}"
    features += [f"ask_px_{lvl_str}", f"ask_sz_{lvl_str}",
                 f"bid_px_{lvl_str}", f"bid_sz_{lvl_str}"]
X = df[features].values.astype(np.float32)

# 정규화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 시퀀스 생성
T = 100
X_seq = np.zeros((len(X_scaled) - T + 1, T, X_scaled.shape[1]), dtype=np.float32)
for i in range(T, len(X_scaled)+1):
    X_seq[i - T] = X_scaled[i - T:i]
x_input = torch.from_numpy(X_seq).unsqueeze(1)  # (N, 1, T, 40)


In [4]:
# 레이블 생성
df['mid_price'] = (df['bid_px_00'] + df['ask_px_00']) / 2
df['mid_price_future'] = df['mid_price'].shift(-1)
df['label'] = (df['mid_price_future'] > df['mid_price']).astype(int) * 2 + \
              (df['mid_price_future'] == df['mid_price']).astype(int) * 1
df = df.dropna(subset=['label'])
y_true = df['label'].astype(int).values[T-1:]


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class deeplob(nn.Module):
    def __init__(self, y_len):
        super().__init__()
        self.y_len = y_len
        self.dropout = nn.Dropout(p=0.2)  # 🔹 오류 방지를 위한 필드 추가

        # Convolution Block 1
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=(1, 2), stride=(1, 2)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, kernel_size=(4, 1)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, kernel_size=(4, 1)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32)
        )

        # Convolution Block 2
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=(1, 2), stride=(1, 2)),
            nn.Tanh(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, kernel_size=(4, 1)),
            nn.Tanh(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, kernel_size=(4, 1)),
            nn.Tanh(),
            nn.BatchNorm2d(32)
        )

        # Convolution Block 3
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=(1, 10)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, kernel_size=(4, 1)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, kernel_size=(4, 1)),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(32)
        )

        # Inception Modules
        self.inp1 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(1, 1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=(3, 1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64)
        )

        self.inp2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(1, 1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, kernel_size=(5, 1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64)
        )

        self.inp3 = nn.Sequential(
            nn.MaxPool2d((3, 1), stride=(1, 1), padding=(1, 0)),
            nn.Conv2d(32, 64, kernel_size=(1, 1), padding='same'),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(64)
        )

        # LSTM & FC
        self.lstm = nn.LSTM(input_size=192, hidden_size=64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, self.y_len)

    def forward(self, x):  # x: (B, 1, 100, 60)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        x_inp1 = self.inp1(x)
        x_inp2 = self.inp2(x)
        x_inp3 = self.inp3(x)

        # Concatenate: (B, 192, T, 1)
        x = torch.cat([x_inp1, x_inp2, x_inp3], dim=1)

        # Reshape for LSTM: (B, T, 192)
        x = x.squeeze(-1).permute(0, 2, 1)

        x, _ = self.lstm(x)
        x = x[:, -1, :]  # 마지막 timestep
        x = self.fc1(x)

        return x  # CrossEntropyLoss에 softmax 포함되어 있으므로 여기선 미적용


In [6]:
# 모델 로드 & 추론 (Colab GPU 사용)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = deeplob(y_len=3).to(device)
model = torch.load("best_val_model_pytorch", map_location=device, weights_only=False)
model.eval()

# 배치 추론으로 빠르게
from torch.utils.data import DataLoader, TensorDataset
batch_size = 256
loader = DataLoader(TensorDataset(x_input), batch_size=batch_size)

preds = []
with torch.no_grad():
    for (xb,) in loader:
        xb = xb.to(device)
        out = model(xb)
        pred = torch.argmax(out, dim=1).cpu().numpy()
        preds.append(pred)

y_pred = np.concatenate(preds)


In [7]:
# 정확도 출력
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_true[:len(y_pred)], y_pred)
print(f"\n DeepLOB Prediction Accuracy: {acc:.4f}")



✅ DeepLOB Prediction Accuracy: 0.4483
