In [1]:
# limit gpu memory
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
    # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
        print(e)

2025-07-14 05:27:06.458874: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752470826.705936      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752470826.777408      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


1 Physical GPUs, 1 Logical GPUs


I0000 00:00:1752470841.169262      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import (Input, Conv2D, Dense, Dropout, 
                                     Reshape, MaxPooling2D, Flatten, 
                                     LayerNormalization, MultiHeadAttention,
                                     GlobalAveragePooling1D)
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras import utils as np_utils
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau

np.random.seed(1)
tf.random.set_seed(2)

# Input data files are available in the read-only "../input/" directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/kaggle/input/fi-2010/Train_Dst_NoAuction_DecPre_CF_7.txt
/kaggle/input/fi-2010/Test_Dst_NoAuction_DecPre_CF_9.txt
/kaggle/input/fi-2010/Test_Dst_NoAuction_DecPre_CF_8.txt
/kaggle/input/fi-2010/Test_Dst_NoAuction_DecPre_CF_7.txt


Data preparation

In [6]:
def prepare_x(data):
    df1 = data[:40, :].T
    return np.array(df1)

def get_label(data):
    lob = data[-5:, :].T
    return lob

def data_classification(X, Y, T):
    [N, D] = X.shape
    df = np.array(X)
    dY = np.array(Y)
    dataY = dY[T - 1:N]
    dataX = np.zeros((N - T + 1, T, D))
    for i in range(T, N + 1):
        dataX[i - T] = df[i - T:i, :]
    return dataX.reshape(dataX.shape + (1,)), dataY

def prepare_x_y(data, k, T):
    x = prepare_x(data)
    y = get_label(data)
    x, y = data_classification(x, y, T=T)
    y = y[:,k] - 1
    y = np_utils.to_categorical(y, 3)
    return x, y

In [4]:
dec_data = np.loadtxt('/kaggle/input/fi-2010/Train_Dst_NoAuction_DecPre_CF_7.txt')
dec_train = dec_data[:, :int(np.floor(dec_data.shape[1] * 0.8))]
dec_val = dec_data[:, int(np.floor(dec_data.shape[1] * 0.8)):]
dec_test1 = np.loadtxt('/kaggle/input/fi-2010/Test_Dst_NoAuction_DecPre_CF_7.txt')
dec_test2 = np.loadtxt('/kaggle/input/fi-2010/Test_Dst_NoAuction_DecPre_CF_8.txt')
dec_test3 = np.loadtxt('/kaggle/input/fi-2010/Test_Dst_NoAuction_DecPre_CF_9.txt')
dec_test = np.hstack((dec_test1, dec_test2, dec_test3))

k = 4  # 预测时间范围
T = 100  # 单个输入的长度
n_hiddens = 64
checkpoint_filepath = 'my_model_weights.weights.h5'

trainX_CNN, trainY_CNN = prepare_x_y(dec_train, k, T)
valX_CNN, valY_CNN = prepare_x_y(dec_val, k, T)
testX_CNN, testY_CNN = prepare_x_y(dec_test, k, T)

print(trainX_CNN.shape, trainY_CNN.shape)
print(valX_CNN.shape, valY_CNN.shape)
print(testX_CNN.shape, testY_CNN.shape)

(203701, 100, 40, 1) (203701, 3)
(50851, 100, 40, 1) (50851, 3)
(139488, 100, 40, 1) (139488, 3)


Model Architecture

In [7]:
class TransformerEncoderBlock(layers.Layer):
    def __init__(self, d_model, num_heads, ff_dim, dropout_rate=0.1):
        super(TransformerEncoderBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = keras.Sequential([
            Dense(ff_dim, activation=None),
            layers.LeakyReLU(negative_slope=0.01),
            Dense(d_model),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(dropout_rate)
        self.dropout2 = Dropout(dropout_rate)

    def call(self, inputs, training=None):
        # 多头注意力
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        
        # 前馈网络
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [8]:

# 模型架构

def create_deeplob_transformer(T, NF, n_transformer_blocks=2, d_model=64, num_heads=4, ff_dim=96):
    input_lmd = Input(shape=(T, NF, 1))
    
    # 卷积块1
    
    conv_first1 = Conv2D(32, (1, 2), strides=(1, 2))(input_lmd)
    conv_first1 = layers.LeakyReLU(negative_slope=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = layers.LeakyReLU(negative_slope=0.01)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = layers.LeakyReLU(negative_slope=0.01)(conv_first1)
    
    # 卷积块2

    conv_first2 = Conv2D(32, (1, 2), strides=(1, 2))(conv_first1)
    conv_first2 = layers.LeakyReLU(negative_slope=0.01)(conv_first2)
    conv_first2 = Conv2D(32, (4, 1), padding='same')(conv_first2)
    conv_first2 = layers.LeakyReLU(negative_slope=0.01)(conv_first2)
    conv_first2 = Conv2D(32, (4, 1), padding='same')(conv_first2)
    conv_first2 = layers.LeakyReLU(negative_slope=0.01)(conv_first2)

    # reshape数据以适应Transformer输入
    
    target_shape = (int(conv_first2.shape[1]), int(conv_first2.shape[2]) * int(conv_first2.shape[3]))
    conv_reshape = Reshape(target_shape)(conv_first2)

    
    # 位置编码
    positions = tf.range(start=0, limit=conv_reshape.shape[1], delta=1)
    position_embedding = layers.Embedding(input_dim=conv_reshape.shape[1], output_dim=d_model)(positions)
    
    # 投影到d_model维度
    transformer_input = Dense(d_model)(conv_reshape)
    transformer_input = transformer_input + position_embedding
    
    # 应用多个transformer encoder块
    x = transformer_input
    for _ in range(n_transformer_blocks):
        x = TransformerEncoderBlock(d_model, num_heads, ff_dim, dropout_rate=0.2)(x)
    
    # 全局平均池化
    x = GlobalAveragePooling1D()(x)
    

    x = Dropout(0.4)(x)
    out = Dense(3, activation='softmax')(x)
    
    model = Model(inputs=input_lmd, outputs=out)
    
    # 使用带有weight_decay的Adam优化器
    adam = keras.optimizers.Adam(learning_rate=0.0003, weight_decay=3e-4)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [9]:
deeplob = create_deeplob_transformer(trainX_CNN.shape[1], trainX_CNN.shape[2])

deeplob.summary()

Model Training

In [10]:

# 模型检查点回调
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True,
    verbose=1
)

# 早停回调
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=5,  # 如果验证损失在5个epoch内没有改善，则停止训练
    restore_best_weights=True,  # 恢复最佳权重
    verbose=1
)

# 降低学习率
reduce_learningrate_callback = ReduceLROnPlateau(
    monitor='val_loss',  
    factor=0.2,          
    patience=2,
    min_lr=1e-6
)

In [11]:
# 训练模型
history = deeplob.fit(
    trainX_CNN, trainY_CNN, 
    validation_data=(valX_CNN, valY_CNN), 
    epochs=50,
    batch_size=128, 
    verbose=2, 
    callbacks=[model_checkpoint_callback, early_stopping_callback, reduce_learningrate_callback]
)

Epoch 1/50


I0000 00:00:1752471033.585963      94 service.cc:148] XLA service 0x79ed4c004630 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1752471033.586707      94 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1752471034.648971      94 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1752471041.725542      94 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.



Epoch 1: val_loss improved from inf to 1.08936, saving model to my_model_weights.weights.h5
1592/1592 - 65s - 41ms/step - accuracy: 0.4155 - loss: 1.0337 - val_accuracy: 0.3723 - val_loss: 1.0894 - learning_rate: 3.0000e-04
Epoch 2/50

Epoch 2: val_loss did not improve from 1.08936
1592/1592 - 34s - 22ms/step - accuracy: 0.4253 - loss: 1.0152 - val_accuracy: 0.3723 - val_loss: 1.0895 - learning_rate: 3.0000e-04
Epoch 3/50

Epoch 3: val_loss did not improve from 1.08936
1592/1592 - 35s - 22ms/step - accuracy: 0.5742 - loss: 0.8442 - val_accuracy: 0.3723 - val_loss: 1.0954 - learning_rate: 3.0000e-04
Epoch 4/50

Epoch 4: val_loss improved from 1.08936 to 1.07659, saving model to my_model_weights.weights.h5
1592/1592 - 35s - 22ms/step - accuracy: 0.6467 - loss: 0.7418 - val_accuracy: 0.3781 - val_loss: 1.0766 - learning_rate: 6.0000e-05
Epoch 5/50

Epoch 5: val_loss improved from 1.07659 to 0.93086, saving model to my_model_weights.weights.h5
1592/1592 - 35s - 22ms/step - accuracy: 0.683

Model Testing

In [12]:
deeplob.load_weights(checkpoint_filepath)
pred = deeplob.predict(testX_CNN, batch_size=128)

[1m1090/1090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step


In [13]:
print('accuracy_score:', accuracy_score(np.argmax(testY_CNN, axis=1), np.argmax(pred, axis=1)))
print(classification_report(np.argmax(testY_CNN, axis=1), np.argmax(pred, axis=1), digits=4))

accuracy_score: 0.7711917871071347
              precision    recall  f1-score   support

           0     0.7368    0.7750    0.7554     47915
           1     0.8377    0.7937    0.8151     48050
           2     0.7414    0.7422    0.7418     43523

    accuracy                         0.7712    139488
   macro avg     0.7720    0.7703    0.7708    139488
weighted avg     0.7730    0.7712    0.7717    139488

