In [4]:
import torch
from torch import nn
from torch.nn import functional as F
import math
import transformer_test


### TokenEmbedding Test

In [5]:
from transformer_test import TokenEmbedding
# 假设词汇表大小为10000，嵌入维度为512
vocab_size = 10
d_model = 5

# 实例化 TokenEmbedding
token_embedding = TokenEmbedding(vocab_size, d_model)

# 生成输入数据，假设输入是一个包含10个词汇索引的张量
input_indices = torch.tensor([1,1,2,3,4])

# 获取嵌入后的结果
embedded_output = token_embedding(input_indices)

print(embedded_output)  # 输出嵌入后的张量


tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [-1.3036,  1.2301,  0.0877,  0.6346, -0.5307],
        [ 0.5070,  1.0961,  0.6329, -1.6454, -0.4972],
        [ 1.7762,  2.5230,  0.5927,  0.2253,  0.2891]],
       grad_fn=<EmbeddingBackward0>)


### PositionEmbedding Test

In [6]:
from transformer_test import PositionalEmbedding
# 假设嵌入维度为512，最大序列长度为1024
d_model = 512
max_len = 1024
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 实例化 PositionalEmbedding
positional_embedding = PositionalEmbedding(d_model, max_len, device)

# 生成一个模拟的输入张量，假设输入是一个形状为 (batch_size, sequence_length) 的张量
batch_size = 128
sequence_length = 50
x = torch.zeros(batch_size, sequence_length, dtype=torch.long, device=device)

# 获取位置编码
pos_encoding = positional_embedding(x)

print(pos_encoding.shape)  # 输出位置编码的形状

torch.Size([50, 512])


### Embedding Test

In [8]:
# 测试代码：完整 Embedding 流程

# 参数配置
batch_size = 128
max_len = 50  # 实际序列长度
vocab_size = 8000  # 使用 dec_voc_size
d_model = 512
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 生成模拟输入 (batch_size, sequence_length)
input_ids = torch.randint(low=0, high=vocab_size, 
                         size=(batch_size, max_len),
                         device=device)

print("[输入] input_ids.shape:", input_ids.shape)
# 输出示例：torch.Size([128, 50])

# 初始化 Embedding 层
token_embedding = TokenEmbedding(vocab_size, d_model).to(device)
position_embedding = PositionalEmbedding(d_model, max_len=1024, device=device)

# 前向传播
token_embeddings = token_embedding(input_ids)  # (B, L) -> (B, L, D)
pos_embeddings = position_embedding(input_ids) # (B, L) -> (L, D)

print("\n[Token Embedding] 输出形状:", token_embeddings.shape)
# 输出：torch.Size([128, 50, 512])

print("[Position Embedding] 输出形状:", pos_embeddings.shape)
# 输出：torch.Size([50, 512])

# 合并嵌入（自动广播位置编码到 batch 维度）
final_embeddings = token_embeddings + pos_embeddings.unsqueeze(0)
print("\n[最终嵌入] 输出形状:", final_embeddings.shape)
# 输出：torch.Size([128, 50, 512])

# 可视化样例数据
print("\n样例数据查看：")
print("输入 IDs:", input_ids[0, :5])
print("Token 嵌入[0,0,:5]:", token_embeddings[0,0,:5].data)
print("位置嵌入[0,:5]:", pos_embeddings[0,:5].data)
print("最终嵌入[0,0,:5]:", final_embeddings[0,0,:5].data)

[输入] input_ids.shape: torch.Size([128, 50])

[Token Embedding] 输出形状: torch.Size([128, 50, 512])
[Position Embedding] 输出形状: torch.Size([50, 512])

[最终嵌入] 输出形状: torch.Size([128, 50, 512])

样例数据查看：
输入 IDs: tensor([1051, 5754, 6552, 2956, 2453])
Token 嵌入[0,0,:5]: tensor([ 0.3116, -2.2355,  0.2612,  0.1022, -0.3265])
位置嵌入[0,:5]: tensor([0., 1., 0., 1., 0.])
最终嵌入[0,0,:5]: tensor([ 0.3116, -1.2355,  0.2612,  1.1022, -0.3265])
