### 01.悟空模型训练 --- 开始啦

In [3]:
## 02.MLP

# 替换 WukongLayer 为简单 MLP
class SimpleMLP(tf.keras.Model):
    def __init__(self, hidden_units, output_dim, name="simple_mlp"):
        super().__init__(name=name)
        self.dense_layers = [tf.keras.layers.Dense(units, activation="relu") for units in hidden_units]
        self.output_layer = tf.keras.layers.Dense(output_dim)

    def call(self, inputs):
        # 假设 inputs 是嵌入向量列表 [input_0, input_1]
        x = tf.concat(inputs, axis=1)  # 拼接两个输入 (BATCH_SIZE, 2 * INPUT_DIM)
        for layer in self.dense_layers:
            x = layer(x)
        outputs = self.output_layer(x)  # (BATCH_SIZE, 1)
        return outputs

# 替换模型中的 interaction_layers 为简单 MLP
mlp_model = SimpleMLP(
    hidden_units=[64, 32],
    output_dim=1,
)

# 定义损失函数和优化器
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# AUC 指标
auc_metric = tf.keras.metrics.AUC(num_thresholds=200)


# 自定义训练步骤
@tf.function
def train_step(inputs, labels):
    with tf.GradientTape() as tape:
        logits = mlp_model([inputs['input_0'], inputs['input_1']])
        loss_value = loss_fn(labels, logits)
    
    gradients = tape.gradient(loss_value, mlp_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, mlp_model.trainable_variables))
    
    auc_metric.update_state(labels, tf.sigmoid(logits))
    return loss_value

# 训练循环
for epoch in range(10):  # 训练 10 轮
    for batch in dataset:
        inputs_batch, labels_batch = batch
        loss_value = train_step(inputs_batch, labels_batch)
    
    auc_result = auc_metric.result().numpy()
    print(f"Epoch {epoch}, Loss: {loss_value:.4f}, AUC: {auc_result:.4f}")
    auc_metric.reset_state()


Epoch 0, Loss: 0.6884, AUC: 0.5784
Epoch 1, Loss: 0.6500, AUC: 0.7078
Epoch 2, Loss: 0.6259, AUC: 0.8000
Epoch 3, Loss: 0.5993, AUC: 0.8588
Epoch 4, Loss: 0.5729, AUC: 0.9020
Epoch 5, Loss: 0.5493, AUC: 0.9294
Epoch 6, Loss: 0.5269, AUC: 0.9608
Epoch 7, Loss: 0.5041, AUC: 0.9725
Epoch 8, Loss: 0.4821, AUC: 0.9843
Epoch 9, Loss: 0.4602, AUC: 0.9922


2025-07-03 21:05:57.121434: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [None]:
# 03.Final MLP


import numpy as np
import tensorflow as tf


class FinalMLP(tf.keras.Model):
    def __init__(
        self,
        hidden_units=[64, 32],
        output_dim=1,
        dropout_rate=0.2,
        kernel_regularizer=None,
        activation="relu",
        name="final_mlp"
    ):
        super().__init__(name=name)
        self.hidden_units = hidden_units
        self.output_dim = output_dim
        self.dropout_rate = dropout_rate
        self.kernel_regularizer = kernel_regularizer
        self.activation = activation

        # 构建隐藏层
        self.dense_layers = []
        for units in hidden_units:
            self.dense_layers.append(
                tf.keras.layers.Dense(
                    units,
                    activation=activation,
                    kernel_regularizer=kernel_regularizer,
                    kernel_initializer="he_normal"
                )
            )
            self.dense_layers.append(
                tf.keras.layers.Dropout(dropout_rate)
            )

        # 输出层
        self.output_layer = tf.keras.layers.Dense(
            output_dim,
            activation=None,
            kernel_regularizer=kernel_regularizer,
            kernel_initializer="he_normal"
        )

    def call(self, inputs):
        # 假设 inputs 是两个输入特征 [input_0, input_1]
        x = tf.concat(inputs, axis=1)  # (BATCH_SIZE, 2 * INPUT_DIM)
        for layer in self.dense_layers:
            x = layer(x)
        outputs = self.output_layer(x)  # (BATCH_SIZE, 1)
        return outputs

    def get_config(self):
        config = super().get_config()
        config.update({
            "hidden_units": self.hidden_units,
            "output_dim": self.output_dim,
            "dropout_rate": self.dropout_rate,
            "kernel_regularizer": tf.keras.regularizers.serialize(self.kernel_regularizer),
            "activation": self.activation
        })
        return config


# 初始化 FinalMLP 模型
final_mlp_model = FinalMLP(
    hidden_units=[64, 32, 16],  # 增加隐藏层深度
    output_dim=1,
    dropout_rate=0.2,
    kernel_regularizer=tf.keras.regularizers.l2(1e-4),  # L2 正则化
    activation="relu"
)

# # 使用 CosineDecay 动态调整学习率
# learning_rate = tf.keras.optimizers.schedules.CosineDecay(
#     initial_learning_rate=0.001,
#     decay_steps=1000
# )
# optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)


# 定义损失函数和指标
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
auc_metric = tf.keras.metrics.AUC(num_thresholds=200)


# 自定义训练步骤
@tf.function
def train_step(inputs, labels):
    with tf.GradientTape() as tape:
        logits = final_mlp_model([inputs['input_0'], inputs['input_1']])
        loss_value = loss_fn(labels, logits)
    
    gradients = tape.gradient(loss_value, final_mlp_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, final_mlp_model.trainable_variables))
    
    auc_metric.update_state(labels, tf.sigmoid(logits))
    return loss_value


# 训练循环（增加训练轮数）
for epoch in range(10):  # 增加到 20 轮
    for batch in dataset:
        inputs_batch, labels_batch = batch
        loss_value = train_step(inputs_batch, labels_batch)
    
    auc_result = auc_metric.result().numpy()
    print(f"Epoch {epoch}, Loss: {loss_value:.4f}, AUC: {auc_result:.4f}")
    auc_metric.reset_state()

In [None]:
# 04 DLRM 
import numpy as np
import tensorflow as tf


# DLRM 模型定义（仅使用密集特征）
class DLRMModel(tf.keras.Model):
    def __init__(
        self,
        num_dense_features=128,
        bottom_mlp_units=[64, 32],
        top_mlp_units=[64, 32],
        dropout_rate=0.2,
        kernel_regularizer=None,
        name="dlrm"
    ):
        super().__init__(name=name)
        self.num_dense_features = num_dense_features
        self.bottom_mlp_units = bottom_mlp_units
        self.top_mlp_units = top_mlp_units
        self.dropout_rate = dropout_rate
        self.kernel_regularizer = kernel_regularizer

        # Bottom MLP
        self.bottom_mlp = tf.keras.Sequential(name="bottom_mlp")
        for units in bottom_mlp_units:
            self.bottom_mlp.add(
                tf.keras.layers.Dense(
                    units,
                    activation="relu",
                    kernel_regularizer=kernel_regularizer,
                    kernel_initializer="he_normal"
                )
            )
            self.bottom_mlp.add(tf.keras.layers.Dropout(dropout_rate))

        # Interaction Layer (内积)
        self.interaction = tf.keras.layers.Dot(axes=1, name="interaction")

        # Top MLP
        self.top_mlp = tf.keras.Sequential(name="top_mlp")
        for units in top_mlp_units:
            self.top_mlp.add(
                tf.keras.layers.Dense(
                    units,
                    activation="relu",
                    kernel_regularizer=kernel_regularizer,
                    kernel_initializer="he_normal"
                )
            )
            self.top_mlp.add(tf.keras.layers.Dropout(dropout_rate))
        self.top_mlp.add(tf.keras.layers.Dense(1, activation=None, name="output"))

    def call(self, inputs):
        # inputs: [input_0, input_1]
        input_0, input_1 = inputs  # 直接解包两个密集特征

        # Bottom MLP: (B, num_dense * 2) -> (B, bottom_output_dim)
        bottom_input = tf.concat([input_0, input_1], axis=1)  # (B, 2 * num_dense)
        bottom_output = self.bottom_mlp(bottom_input)  # (B, bottom_output_dim)

        # Interaction Layer: (B, num_dense) -> (B, 1) 内积
        interactions = self.interaction([input_0, input_1])  # (B, 1)

        # Concatenate Bottom Output and Interactions: (B, bottom_output_dim + 1)
        combined = tf.concat([bottom_output, interactions], axis=1)

        # Top MLP: (B, ...) -> (B, 1)
        output = self.top_mlp(combined)  # (B, 1)
        return output

    def get_config(self):
        config = super().get_config()
        config.update({
            "num_dense_features": self.num_dense_features,
            "bottom_mlp_units": self.bottom_mlp_units,
            "top_mlp_units": self.top_mlp_units,
            "dropout_rate": self.dropout_rate,
            "kernel_regularizer": tf.keras.regularizers.serialize(self.kernel_regularizer),
        })
        return config



# 初始化 DLRM 模型（仅使用密集特征）
dlrm_model = DLRMModel(
    num_dense_features=INPUT_DIM,
    bottom_mlp_units=[64, 32],
    top_mlp_units=[64, 32],
    dropout_rate=0.2,
    kernel_regularizer=tf.keras.regularizers.l2(1e-4)
)

# 定义损失函数和优化器
learning_rate = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=0.001,
    decay_steps=1000
)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
auc_metric = tf.keras.metrics.AUC(num_thresholds=200)


# 自定义训练步骤
@tf.function
def train_step(inputs, labels):
    with tf.GradientTape() as tape:
        logits = dlrm_model([inputs['input_0'], inputs['input_1']])
        loss_value = loss_fn(labels, logits)
    
    gradients = tape.gradient(loss_value, dlrm_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, dlrm_model.trainable_variables))
    
    auc_metric.update_state(labels, tf.sigmoid(logits))
    return loss_value


# 训练循环
for epoch in range(10):  # 训练 10 轮
    for batch in dataset:
        inputs_batch, labels_batch = batch
        loss_value = train_step(inputs_batch, labels_batch)
    
    auc_result = auc_metric.result().numpy()
    print(f"Epoch {epoch}, Loss: {loss_value:.4f}, AUC: {auc_result:.4f}")
    auc_metric.reset_state()

In [None]:
# 01 wukong

import numpy as np
import tensorflow as tf
from wukong import Wukong  # 替换为你的实际模块路径

# 模型参数
BATCH_SIZE = 32
INPUT_DIM = 128


# 生成数据（每个样本的结构）
def generate_data():
    inputs_0 = np.random.rand(BATCH_SIZE, INPUT_DIM).astype(np.float32)
    inputs_1 = np.random.rand(BATCH_SIZE, INPUT_DIM).astype(np.float32)
    labels = np.random.randint(0, 2, (BATCH_SIZE, 1)).astype(np.float32)
    
    # 将每个样本的输入和标签打包为元组
    dataset = tf.data.Dataset.from_tensor_slices(({'input_0': inputs_0, 'input_1': inputs_1}, labels))
    return dataset.shuffle(1000).batch(BATCH_SIZE)

import numpy as np
import tensorflow as tf


def generate_data_v1(BATCH_SIZE=32, INPUT_DIM=128):
    # 生成两个输入特征
    inputs_0 = np.random.rand(BATCH_SIZE, INPUT_DIM).astype(np.float32)
    inputs_1 = np.random.rand(BATCH_SIZE, INPUT_DIM).astype(np.float32)

    # 生成伪标签：正样本的特征均值偏高
    feature_mean = (np.mean(inputs_0, axis=1) + np.mean(inputs_1, axis=1)) / 2

    # 生成标签：正样本的概率与均值相关
    # 通过 sigmoid 引入非线性关系，加入噪声使模型难以完美拟合
    prob = 1 / (1 + np.exp(-feature_mean + np.random.normal(0, 1.0, size=BATCH_SIZE)))
    
    # 将 prob 重塑为 (BATCH_SIZE, 1) 以匹配 size=(BATCH_SIZE, 1)
    prob = prob.reshape(BATCH_SIZE, 1)
    
    # 生成二项式分布的标签
    labels = np.random.binomial(1, prob, size=(BATCH_SIZE, 1)).astype(np.float32)

    # 构建 Dataset
    dataset = tf.data.Dataset.from_tensor_slices(
        ({'input_0': inputs_0, 'input_1': inputs_1}, labels)
    ).shuffle(1000).batch(BATCH_SIZE)

    return dataset


# 创建 Dataset
dataset = generate_data_v1(BATCH_SIZE, INPUT_DIM)




# 初始化模型
model = Wukong(
    num_layers=2,
    # num_sparse_emb=10000,
    num_sparse_emb=256,
    dim_emb=128,
    num_emb_lcb=16,
    num_emb_fmb=16,
    rank_fmb=8,
    num_hidden_wukong=2,
    dim_hidden_wukong=16,
    num_hidden_head=2,
    dim_hidden_head=32,
    dim_output=1,
    dropout=0.1,  # 增加 Dropout
    kernel_regularizer=tf.keras.regularizers.l2(1e-3)  # L2 正则化
)

# 定义损失函数和优化器
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# AUC 指标
auc_metric = tf.keras.metrics.AUC(num_thresholds=200)

# 自定义训练步骤
@tf.function
def train_step(inputs, labels):
    with tf.GradientTape() as tape:
        logits = model([inputs['input_0'], inputs['input_1']])
        loss_value = loss_fn(labels, logits)
    
    gradients = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    auc_metric.update_state(labels, tf.sigmoid(logits))
    return loss_value

# 训练循环
for epoch in range(10):  # 训练 10 轮
    for batch in dataset:
        inputs_batch, labels_batch = batch
        loss_value = train_step(inputs_batch, labels_batch)
    
    auc_result = auc_metric.result().numpy()
    print(f"Epoch {epoch}, Loss: {loss_value:.4f}, AUC: {auc_result:.4f}")
    auc_metric.reset_state()  # ✅ 修复点：使用 reset_state()


2025-07-04 16:28:12.350351: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-04 16:28:12.364852: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751617692.382208   48353 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751617692.386953   48353 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1751617692.400801   48353 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

dim_emb ****: 128


**2025-07-04 16:28:27,290 - DEBUG - f02-01:inputs : Tensor("Placeholder:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,290 - DEBUG - f02-01:inputs : Tensor("Placeholder:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,306 - DEBUG - f02-01-01:inputs : Tensor("Placeholder:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,306 - DEBUG - f02-01-01:inputs : Tensor("Placeholder:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,310 - DEBUG - f02-01-02:outputs : Tensor("lcb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,310 - DEBUG - f02-01-02:outputs : Tensor("lcb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,318 - DEBUG - f02-01-03:outputs : Tensor("lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,318 - DEBUG - f02-01-03:outputs : Tensor("lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,323 - DEBUG - f02-01-04:outputs : Tensor("lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,323 - DEBUG - f02-01-04:outputs : Tensor("lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,326 - DEBUG - f02-02:lcb : Tensor("lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,326 - DEBUG - f02-02:lcb : Tensor("lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

input_shape:{} (32, 256, 128)


**2025-07-04 16:28:27,330 - DEBUG - f02-02-01:inputs : Tensor("Placeholder:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,330 - DEBUG - f02-02-01:inputs : Tensor("Placeholder:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,335 - DEBUG - f02-02-02:outputs : Tensor("fmb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,335 - DEBUG - f02-02-02:outputs : Tensor("fmb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,342 - DEBUG - f02-02-03:outputs : Tensor("fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,342 - DEBUG - f02-02-03:outputs : Tensor("fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,346 - DEBUG - f02-02-03-01:outputs : Tensor("fmb_1/matmul_1:0", shape=(32, 256, 8), dtype=float32)**

**2025-07-04 16:28:27,346 - DEBUG - f02-02-03-01:outputs : Tensor("fmb_1/matmul_1:0", shape=(32, 256, 8), dtype=float32)**

**2025-07-04 16:28:27,350 - DEBUG - f02-02-03-02:num_emb_in : 256**

**2025-07-04 16:28:27,350 - DEBUG - f02-02-03-02:num_emb_in : 256**

**2025-07-04 16:28:27,353 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:27,353 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:27,356 - DEBUG - f02-02-04:outputs : Tensor("fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,356 - DEBUG - f02-02-04:outputs : Tensor("fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,360 - DEBUG - f02-02-05:outputs : Tensor("fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,360 - DEBUG - f02-02-05:outputs : Tensor("fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,405 - DEBUG - f02-02-06:outputs : Tensor("fmb_1/mlp_1_1/dense_6_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,405 - DEBUG - f02-02-06:outputs : Tensor("fmb_1/mlp_1_1/dense_6_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,409 - DEBUG - f02-02-07:outputs : Tensor("fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,409 - DEBUG - f02-02-07:outputs : Tensor("fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,412 - DEBUG - f02-03:fmb : Tensor("fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,412 - DEBUG - f02-03:fmb : Tensor("fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,417 - DEBUG - f02-04-01:inputs : Tensor("Placeholder:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,417 - DEBUG - f02-04-01:inputs : Tensor("Placeholder:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,421 - DEBUG - f02-04-02:outputs : Tensor("residual_projection_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,421 - DEBUG - f02-04-02:outputs : Tensor("residual_projection_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,428 - DEBUG - f02-04-03:outputs : Tensor("residual_projection_1/matmul:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,428 - DEBUG - f02-04-03:outputs : Tensor("residual_projection_1/matmul:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,432 - DEBUG - f02-04-04:outputs : Tensor("residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,432 - DEBUG - f02-04-04:outputs : Tensor("residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,435 - DEBUG - f02-05-01:outputs : Tensor("concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,435 - DEBUG - f02-05-01:outputs : Tensor("concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,438 - DEBUG - f02-05-02:residual_res : Tensor("residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,438 - DEBUG - f02-05-02:residual_res : Tensor("residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,448 - DEBUG - f02-05:outputs : Tensor("layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,448 - DEBUG - f02-05:outputs : Tensor("layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

dim_emb ****: 128


**2025-07-04 16:28:27,458 - DEBUG - f02-01:inputs : Tensor("Placeholder:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,458 - DEBUG - f02-01:inputs : Tensor("Placeholder:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,462 - DEBUG - f02-01-01:inputs : Tensor("Placeholder:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,462 - DEBUG - f02-01-01:inputs : Tensor("Placeholder:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,466 - DEBUG - f02-01-02:outputs : Tensor("lcb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,466 - DEBUG - f02-01-02:outputs : Tensor("lcb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,474 - DEBUG - f02-01-03:outputs : Tensor("lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,474 - DEBUG - f02-01-03:outputs : Tensor("lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,479 - DEBUG - f02-01-04:outputs : Tensor("lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,479 - DEBUG - f02-01-04:outputs : Tensor("lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,482 - DEBUG - f02-02:lcb : Tensor("lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,482 - DEBUG - f02-02:lcb : Tensor("lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

input_shape:{} (32, 32, 128)


**2025-07-04 16:28:27,485 - DEBUG - f02-02-01:inputs : Tensor("Placeholder:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,485 - DEBUG - f02-02-01:inputs : Tensor("Placeholder:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,490 - DEBUG - f02-02-02:outputs : Tensor("fmb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,490 - DEBUG - f02-02-02:outputs : Tensor("fmb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,497 - DEBUG - f02-02-03:outputs : Tensor("fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,497 - DEBUG - f02-02-03:outputs : Tensor("fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,502 - DEBUG - f02-02-03-01:outputs : Tensor("fmb_1/matmul_1:0", shape=(32, 32, 8), dtype=float32)**

**2025-07-04 16:28:27,502 - DEBUG - f02-02-03-01:outputs : Tensor("fmb_1/matmul_1:0", shape=(32, 32, 8), dtype=float32)**

**2025-07-04 16:28:27,507 - DEBUG - f02-02-03-02:num_emb_in : 32**

**2025-07-04 16:28:27,507 - DEBUG - f02-02-03-02:num_emb_in : 32**

**2025-07-04 16:28:27,510 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:27,510 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:27,513 - DEBUG - f02-02-04:outputs : Tensor("fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:27,513 - DEBUG - f02-02-04:outputs : Tensor("fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:27,515 - DEBUG - f02-02-05:outputs : Tensor("fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:27,515 - DEBUG - f02-02-05:outputs : Tensor("fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:27,560 - DEBUG - f02-02-06:outputs : Tensor("fmb_1/mlp_2_1/dense_9_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,560 - DEBUG - f02-02-06:outputs : Tensor("fmb_1/mlp_2_1/dense_9_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,565 - DEBUG - f02-02-07:outputs : Tensor("fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,565 - DEBUG - f02-02-07:outputs : Tensor("fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,569 - DEBUG - f02-03:fmb : Tensor("fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,569 - DEBUG - f02-03:fmb : Tensor("fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,575 - DEBUG - f02-05-01:outputs : Tensor("concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,575 - DEBUG - f02-05-01:outputs : Tensor("concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,579 - DEBUG - f02-05-02:residual_res : Tensor("Placeholder:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,579 - DEBUG - f02-05-02:residual_res : Tensor("Placeholder:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,589 - DEBUG - f02-05:outputs : Tensor("layer_normalization_1_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,589 - DEBUG - f02-05:outputs : Tensor("layer_normalization_1_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,595 - DEBUG - f02-01:inputs : Tensor("embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,595 - DEBUG - f02-01:inputs : Tensor("embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,598 - DEBUG - f02-01-01:inputs : Tensor("embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,598 - DEBUG - f02-01-01:inputs : Tensor("embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,602 - DEBUG - f02-01-02:outputs : Tensor("sequential_1/wukong_0_1/lcb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,602 - DEBUG - f02-01-02:outputs : Tensor("sequential_1/wukong_0_1/lcb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,610 - DEBUG - f02-01-03:outputs : Tensor("sequential_1/wukong_0_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,610 - DEBUG - f02-01-03:outputs : Tensor("sequential_1/wukong_0_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,614 - DEBUG - f02-01-04:outputs : Tensor("sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,614 - DEBUG - f02-01-04:outputs : Tensor("sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,618 - DEBUG - f02-02:lcb : Tensor("sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,618 - DEBUG - f02-02:lcb : Tensor("sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,621 - DEBUG - f02-02-01:inputs : Tensor("embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,621 - DEBUG - f02-02-01:inputs : Tensor("embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,625 - DEBUG - f02-02-02:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,625 - DEBUG - f02-02-02:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,633 - DEBUG - f02-02-03:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,633 - DEBUG - f02-02-03:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,638 - DEBUG - f02-02-03-01:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/matmul_1:0", shape=(32, 256, 8), dtype=float32)**

**2025-07-04 16:28:27,638 - DEBUG - f02-02-03-01:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/matmul_1:0", shape=(32, 256, 8), dtype=float32)**

**2025-07-04 16:28:27,642 - DEBUG - f02-02-03-02:num_emb_in : 256**

**2025-07-04 16:28:27,642 - DEBUG - f02-02-03-02:num_emb_in : 256**

**2025-07-04 16:28:27,645 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:27,645 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:27,648 - DEBUG - f02-02-04:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,648 - DEBUG - f02-02-04:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,650 - DEBUG - f02-02-05:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,650 - DEBUG - f02-02-05:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,675 - DEBUG - f02-02-06:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/mlp_1_1/dense_6_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,675 - DEBUG - f02-02-06:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/mlp_1_1/dense_6_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,679 - DEBUG - f02-02-07:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,679 - DEBUG - f02-02-07:outputs : Tensor("sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,682 - DEBUG - f02-03:fmb : Tensor("sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,682 - DEBUG - f02-03:fmb : Tensor("sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,687 - DEBUG - f02-04-01:inputs : Tensor("embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,687 - DEBUG - f02-04-01:inputs : Tensor("embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,691 - DEBUG - f02-04-02:outputs : Tensor("sequential_1/wukong_0_1/residual_projection_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,691 - DEBUG - f02-04-02:outputs : Tensor("sequential_1/wukong_0_1/residual_projection_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,698 - DEBUG - f02-04-03:outputs : Tensor("sequential_1/wukong_0_1/residual_projection_1/matmul:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,698 - DEBUG - f02-04-03:outputs : Tensor("sequential_1/wukong_0_1/residual_projection_1/matmul:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,702 - DEBUG - f02-04-04:outputs : Tensor("sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,702 - DEBUG - f02-04-04:outputs : Tensor("sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,705 - DEBUG - f02-05-01:outputs : Tensor("sequential_1/wukong_0_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,705 - DEBUG - f02-05-01:outputs : Tensor("sequential_1/wukong_0_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,708 - DEBUG - f02-05-02:residual_res : Tensor("sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,708 - DEBUG - f02-05-02:residual_res : Tensor("sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,718 - DEBUG - f02-05:outputs : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,718 - DEBUG - f02-05:outputs : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,722 - DEBUG - f02-01:inputs : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,722 - DEBUG - f02-01:inputs : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,726 - DEBUG - f02-01-01:inputs : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,726 - DEBUG - f02-01-01:inputs : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,730 - DEBUG - f02-01-02:outputs : Tensor("sequential_1/wukong_1_1/lcb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,730 - DEBUG - f02-01-02:outputs : Tensor("sequential_1/wukong_1_1/lcb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,738 - DEBUG - f02-01-03:outputs : Tensor("sequential_1/wukong_1_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,738 - DEBUG - f02-01-03:outputs : Tensor("sequential_1/wukong_1_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,742 - DEBUG - f02-01-04:outputs : Tensor("sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,742 - DEBUG - f02-01-04:outputs : Tensor("sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,745 - DEBUG - f02-02:lcb : Tensor("sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,745 - DEBUG - f02-02:lcb : Tensor("sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,748 - DEBUG - f02-02-01:inputs : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,748 - DEBUG - f02-02-01:inputs : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,752 - DEBUG - f02-02-02:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,752 - DEBUG - f02-02-02:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:27,759 - DEBUG - f02-02-03:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,759 - DEBUG - f02-02-03:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,763 - DEBUG - f02-02-03-01:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/matmul_1:0", shape=(32, 32, 8), dtype=float32)**

**2025-07-04 16:28:27,763 - DEBUG - f02-02-03-01:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/matmul_1:0", shape=(32, 32, 8), dtype=float32)**

**2025-07-04 16:28:27,767 - DEBUG - f02-02-03-02:num_emb_in : 32**

**2025-07-04 16:28:27,767 - DEBUG - f02-02-03-02:num_emb_in : 32**

**2025-07-04 16:28:27,770 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:27,770 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:27,772 - DEBUG - f02-02-04:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:27,772 - DEBUG - f02-02-04:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:27,775 - DEBUG - f02-02-05:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:27,775 - DEBUG - f02-02-05:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:27,803 - DEBUG - f02-02-06:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/mlp_2_1/dense_9_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,803 - DEBUG - f02-02-06:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/mlp_2_1/dense_9_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:27,807 - DEBUG - f02-02-07:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,807 - DEBUG - f02-02-07:outputs : Tensor("sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,810 - DEBUG - f02-03:fmb : Tensor("sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,810 - DEBUG - f02-03:fmb : Tensor("sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,814 - DEBUG - f02-05-01:outputs : Tensor("sequential_1/wukong_1_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,814 - DEBUG - f02-05-01:outputs : Tensor("sequential_1/wukong_1_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,817 - DEBUG - f02-05-02:residual_res : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,817 - DEBUG - f02-05-02:residual_res : Tensor("sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,826 - DEBUG - f02-05:outputs : Tensor("sequential_1/wukong_1_1/layer_normalization_1_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,826 - DEBUG - f02-05:outputs : Tensor("sequential_1/wukong_1_1/layer_normalization_1_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:27,958 - DEBUG - f02-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,958 - DEBUG - f02-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,963 - DEBUG - f02-01-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,963 - DEBUG - f02-01-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,968 - DEBUG - f02-01-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,968 - DEBUG - f02-01-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,973 - DEBUG - f02-01-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,973 - DEBUG - f02-01-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:27,978 - DEBUG - f02-01-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,978 - DEBUG - f02-01-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,981 - DEBUG - f02-02:lcb : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,981 - DEBUG - f02-02:lcb : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:27,985 - DEBUG - f02-02-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,985 - DEBUG - f02-02-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:27,989 - DEBUG - f02-02-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,989 - DEBUG - f02-02-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:27,994 - DEBUG - f02-02-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,994 - DEBUG - f02-02-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:27,998 - DEBUG - f02-02-03-01:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/matmul_1:0", shape=(32, 256, 8), dtype=float32)**

**2025-07-04 16:28:27,998 - DEBUG - f02-02-03-01:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/matmul_1:0", shape=(32, 256, 8), dtype=float32)**

**2025-07-04 16:28:28,002 - DEBUG - f02-02-03-02:num_emb_in : 256**

**2025-07-04 16:28:28,002 - DEBUG - f02-02-03-02:num_emb_in : 256**

**2025-07-04 16:28:28,005 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:28,005 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:28,008 - DEBUG - f02-02-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:28,008 - DEBUG - f02-02-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:28,012 - DEBUG - f02-02-05:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:28,012 - DEBUG - f02-02-05:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:28,031 - DEBUG - f02-02-06:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/mlp_1_1/dense_6_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:28,031 - DEBUG - f02-02-06:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/mlp_1_1/dense_6_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:28,035 - DEBUG - f02-02-07:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,035 - DEBUG - f02-02-07:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,038 - DEBUG - f02-03:fmb : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,038 - DEBUG - f02-03:fmb : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,043 - DEBUG - f02-04-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:28,043 - DEBUG - f02-04-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:28,047 - DEBUG - f02-04-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:28,047 - DEBUG - f02-04-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:28,052 - DEBUG - f02-04-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/matmul:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:28,052 - DEBUG - f02-04-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/matmul:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:28,057 - DEBUG - f02-04-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,057 - DEBUG - f02-04-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,060 - DEBUG - f02-05-01:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,060 - DEBUG - f02-05-01:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,064 - DEBUG - f02-05-02:residual_res : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,064 - DEBUG - f02-05-02:residual_res : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,077 - DEBUG - f02-05:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,077 - DEBUG - f02-05:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,081 - DEBUG - f02-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,081 - DEBUG - f02-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,085 - DEBUG - f02-01-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,085 - DEBUG - f02-01-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,091 - DEBUG - f02-01-02:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:28,091 - DEBUG - f02-01-02:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:28,096 - DEBUG - f02-01-03:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:28,096 - DEBUG - f02-01-03:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:28,100 - DEBUG - f02-01-04:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,100 - DEBUG - f02-01-04:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,103 - DEBUG - f02-02:lcb : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,103 - DEBUG - f02-02:lcb : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,106 - DEBUG - f02-02-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,106 - DEBUG - f02-02-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,110 - DEBUG - f02-02-02:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:28,110 - DEBUG - f02-02-02:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:28,115 - DEBUG - f02-02-03:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:28,115 - DEBUG - f02-02-03:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:28,119 - DEBUG - f02-02-03-01:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/matmul_1:0", shape=(32, 32, 8), dtype=float32)**

**2025-07-04 16:28:28,119 - DEBUG - f02-02-03-01:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/matmul_1:0", shape=(32, 32, 8), dtype=float32)**

**2025-07-04 16:28:28,123 - DEBUG - f02-02-03-02:num_emb_in : 32**

**2025-07-04 16:28:28,123 - DEBUG - f02-02-03-02:num_emb_in : 32**

**2025-07-04 16:28:28,126 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:28,126 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:28,129 - DEBUG - f02-02-04:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:28,129 - DEBUG - f02-02-04:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:28,132 - DEBUG - f02-02-05:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:28,132 - DEBUG - f02-02-05:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:28,149 - DEBUG - f02-02-06:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/mlp_2_1/dense_9_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:28,149 - DEBUG - f02-02-06:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/mlp_2_1/dense_9_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:28,153 - DEBUG - f02-02-07:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,153 - DEBUG - f02-02-07:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,157 - DEBUG - f02-03:fmb : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,157 - DEBUG - f02-03:fmb : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:28,161 - DEBUG - f02-05-01:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,161 - DEBUG - f02-05-01:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,164 - DEBUG - f02-05-02:residual_res : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,164 - DEBUG - f02-05-02:residual_res : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,174 - DEBUG - f02-05:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/layer_normalization_1_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:28,174 - DEBUG - f02-05:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/layer_normalization_1_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,475 - DEBUG - f02-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:29,475 - DEBUG - f02-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:29,479 - DEBUG - f02-01-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:29,479 - DEBUG - f02-01-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:29,485 - DEBUG - f02-01-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:29,485 - DEBUG - f02-01-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:29,491 - DEBUG - f02-01-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:29,491 - DEBUG - f02-01-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:29,496 - DEBUG - f02-01-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,496 - DEBUG - f02-01-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,500 - DEBUG - f02-02:lcb : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,500 - DEBUG - f02-02:lcb : Tensor("wukong_1/sequential_1/wukong_0_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,504 - DEBUG - f02-02-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:29,504 - DEBUG - f02-02-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:29,508 - DEBUG - f02-02-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:29,508 - DEBUG - f02-02-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:29,513 - DEBUG - f02-02-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:29,513 - DEBUG - f02-02-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:29,517 - DEBUG - f02-02-03-01:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/matmul_1:0", shape=(32, 256, 8), dtype=float32)**

**2025-07-04 16:28:29,517 - DEBUG - f02-02-03-01:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/matmul_1:0", shape=(32, 256, 8), dtype=float32)**

**2025-07-04 16:28:29,521 - DEBUG - f02-02-03-02:num_emb_in : 256**

**2025-07-04 16:28:29,521 - DEBUG - f02-02-03-02:num_emb_in : 256**

**2025-07-04 16:28:29,545 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:29,545 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:29,549 - DEBUG - f02-02-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:29,549 - DEBUG - f02-02-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:29,552 - DEBUG - f02-02-05:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:29,552 - DEBUG - f02-02-05:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:29,572 - DEBUG - f02-02-06:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/mlp_1_1/dense_6_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:29,572 - DEBUG - f02-02-06:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/mlp_1_1/dense_6_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:29,576 - DEBUG - f02-02-07:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,576 - DEBUG - f02-02-07:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,581 - DEBUG - f02-03:fmb : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,581 - DEBUG - f02-03:fmb : Tensor("wukong_1/sequential_1/wukong_0_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,586 - DEBUG - f02-04-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:29,586 - DEBUG - f02-04-01:inputs : Tensor("wukong_1/embedding_1/concat:0", shape=(32, 256, 128), dtype=float32)**

**2025-07-04 16:28:29,590 - DEBUG - f02-04-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:29,590 - DEBUG - f02-04-02:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose:0", shape=(32, 128, 256), dtype=float32)**

**2025-07-04 16:28:29,595 - DEBUG - f02-04-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/matmul:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:29,595 - DEBUG - f02-04-03:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/matmul:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:29,599 - DEBUG - f02-04-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,599 - DEBUG - f02-04-04:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,602 - DEBUG - f02-05-01:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,602 - DEBUG - f02-05-01:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,605 - DEBUG - f02-05-02:residual_res : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,605 - DEBUG - f02-05-02:residual_res : Tensor("wukong_1/sequential_1/wukong_0_1/residual_projection_1/transpose_1:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,616 - DEBUG - f02-05:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,616 - DEBUG - f02-05:outputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,620 - DEBUG - f02-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,620 - DEBUG - f02-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,623 - DEBUG - f02-01-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,623 - DEBUG - f02-01-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,626 - DEBUG - f02-01-02:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:29,626 - DEBUG - f02-01-02:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:29,631 - DEBUG - f02-01-03:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:29,631 - DEBUG - f02-01-03:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/matmul:0", shape=(32, 128, 16), dtype=float32)**

**2025-07-04 16:28:29,635 - DEBUG - f02-01-04:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,635 - DEBUG - f02-01-04:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,638 - DEBUG - f02-02:lcb : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,638 - DEBUG - f02-02:lcb : Tensor("wukong_1/sequential_1/wukong_1_1/lcb_1/transpose_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,642 - DEBUG - f02-02-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,642 - DEBUG - f02-02-01:inputs : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,645 - DEBUG - f02-02-02:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:29,645 - DEBUG - f02-02-02:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/transpose:0", shape=(32, 128, 32), dtype=float32)**

**2025-07-04 16:28:29,651 - DEBUG - f02-02-03:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:29,651 - DEBUG - f02-02-03:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/matmul:0", shape=(32, 128, 8), dtype=float32)**

**2025-07-04 16:28:29,654 - DEBUG - f02-02-03-01:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/matmul_1:0", shape=(32, 32, 8), dtype=float32)**

**2025-07-04 16:28:29,654 - DEBUG - f02-02-03-01:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/matmul_1:0", shape=(32, 32, 8), dtype=float32)**

**2025-07-04 16:28:29,658 - DEBUG - f02-02-03-02:num_emb_in : 32**

**2025-07-04 16:28:29,658 - DEBUG - f02-02-03-02:num_emb_in : 32**

**2025-07-04 16:28:29,661 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:29,661 - DEBUG - f02-02-03-03:rank : 8**

**2025-07-04 16:28:29,665 - DEBUG - f02-02-04:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:29,665 - DEBUG - f02-02-04:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:29,669 - DEBUG - f02-02-05:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:29,669 - DEBUG - f02-02-05:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape:0", shape=(32, 256), dtype=float32)**

**2025-07-04 16:28:29,689 - DEBUG - f02-02-06:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/mlp_2_1/dense_9_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:29,689 - DEBUG - f02-02-06:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/mlp_2_1/dense_9_1/BiasAdd:0", shape=(32, 2048), dtype=float32)**

**2025-07-04 16:28:29,694 - DEBUG - f02-02-07:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,694 - DEBUG - f02-02-07:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,697 - DEBUG - f02-03:fmb : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,697 - DEBUG - f02-03:fmb : Tensor("wukong_1/sequential_1/wukong_1_1/fmb_1/Reshape_1:0", shape=(32, 16, 128), dtype=float32)**

**2025-07-04 16:28:29,701 - DEBUG - f02-05-01:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,701 - DEBUG - f02-05-01:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/concat:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,704 - DEBUG - f02-05-02:residual_res : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,704 - DEBUG - f02-05-02:residual_res : Tensor("wukong_1/sequential_1/wukong_0_1/layer_normalization_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,715 - DEBUG - f02-05:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/layer_normalization_1_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

**2025-07-04 16:28:29,715 - DEBUG - f02-05:outputs : Tensor("wukong_1/sequential_1/wukong_1_1/layer_normalization_1_1/add_2:0", shape=(32, 32, 128), dtype=float32)**

Epoch 0, Loss: 0.6922, AUC: 0.4251
Epoch 1, Loss: 0.6338, AUC: 0.7206
Epoch 2, Loss: 1.0043, AUC: 0.9332
Epoch 3, Loss: 0.5349, AUC: 0.9757
Epoch 4, Loss: 0.7269, AUC: 0.9717
Epoch 5, Loss: 0.5720, AUC: 0.9858
Epoch 6, Loss: 0.4816, AUC: 0.9879
Epoch 7, Loss: 0.4959, AUC: 0.9899
Epoch 8, Loss: 0.4254, AUC: 0.9919
Epoch 9, Loss: 0.3752, AUC: 0.9960


2025-07-04 16:28:30.856344: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-07-04 16:28:30.884663: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-07-04 16:28:30.915302: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-07-04 16:28:30.971612: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
