In [1]:
from skystar.dataloader import SeqDataloader
from skystar.dataset import TranslationDataset
from skystar.model import Transformer
from skystar.core import softmaxwithloss, softmax
from skystar.optimizer import Adam
import numpy as np
from skystar.cuda import get_array_module
from skystar import no_grad

In [2]:
sentences = [['我 是 学 生 P', 'S I am a student', 'I am a student E'],  # S: 开始符号
             ['我 喜 欢 学 习', 'S I like learning P', 'I like learning P E'],  # E: 结束符号
             ['我 是 男 生 P', 'S I am a boy', 'I am a boy E'],
             ['我 喜 欢 男 生', 'S I like boy P', 'I like a boy E']
             ]  # P: 占位符号，如果当前句子不足固定长度用P占位
src_vocab = {'P': 0, '我': 1, '是': 2, '学': 3, '生': 4, '喜': 5, '欢': 6, '习': 7, '男': 8}  # 词源字典  字：索引
tgt_vocab = {'P': 0, 'S': 1, 'E': 2, 'I': 3, 'am': 4, 'a': 5, 'student': 6, 'like': 7, 'learning': 8, 'boy': 9}
dataset = TranslationDataset(sentences, src_vocab, tgt_vocab)
loader=SeqDataloader(dataset,2)

In [15]:
''':param word_num:单词的总数
:param embedding_dim:词嵌入的维度
:param dff:前向传播层的隐藏层维度
:param dkv:自注意力层k，v的维度
:param n_heads:多头注意力的头数'''
tgt_vocab_size=len(tgt_vocab)#目标字典大小
model = Transformer(word_num=tgt_vocab_size,embedding_dim=512,dff=2048, dkv=64, n_heads=8)
epoch=300
lr=0.01
optimizer=Adam(lr).setup(model)

In [16]:
model.to_gpu()
loader.to_gpu()
for i in range(epoch):
    sum_loss=0.0
    for enc_inputs, dec_inputs, dec_outputs in loader:
        xp = get_array_module(enc_inputs)

        predict,atten1,atten2 = model(enc_inputs, dec_inputs)
        bach, sqrlen, num = predict.shape
        loss = softmaxwithloss(predict, dec_outputs, axis=-1)
        model.cleangrads()
        loss.backward()
        optimizer.update()
        sum_loss += loss.data

    print(f'epoch{i}:', f'{sum_loss/4}')
model.save_weights('TransformerTest.npz')

model saved and simplified successfully--Path:D:\Programing\pythonProject\skystar_proj\model_params\Transformer2024.12.10-114032.onnx
epoch0: 0.0


In [28]:
tgt_len=dataset.tgt_len#获取目标句子长度
model = Transformer(word_num=tgt_vocab_size,embedding_dim=512,dff=2048, dkv=64, n_heads=8)
model.load_weights('TransformerTest.npz')
def test(model, enc_input, start_symbol, tgt_len):
    enc_outputs = model.Encoder(enc_input)#编码器输出
    dec_input = np.zeros((1, tgt_len)).astype(np.int32)
    next_symbol = start_symbol
    for i in range(0, tgt_len):
        dec_input[0][i] = next_symbol#当前时间步的解码器输入
        dec_outputs= model.Decoder(dec_input, enc_input, enc_outputs)#解码器输出
        dec_outputs=dec_outputs.data.reshape(-1,512)
        projected = model.Gemm(dec_outputs)#映射分类

        prob=softmax(projected,axis=0)#归一化
        prob=np.argmax(prob.data,axis=1)#获得标签值
        next_word = prob.data[i]
        next_symbol = next_word
    return dec_input

def get_key(val,dict):
    for key, value in dict.items():
        if val == value:
             return key
    raise KeyError('The dictionary has the wrong value')
        
with no_grad():
    model.set_trainingmode(False)
    enc_inputs = dataset.enc_inputs
    
    enc_input=enc_inputs[3].reshape(1,-1)
    # enc_input=np.array([1,5,6,3,4]).reshape(1,-1)
    predict_dec_input = test(model, enc_input, start_symbol=tgt_vocab["S"],tgt_len=dataset.tgt_len)

    predict= model(enc_input, predict_dec_input)
    prob = np.argmax(predict[0].data, axis=2)
    print([get_key(n,src_vocab) for n in enc_input.squeeze()], '->',
    [get_key(n,tgt_vocab) for n in prob.squeeze()])

The network parameters are loaded successfully！The params type:np.ndarray path:D:\Programing\pythonProject\skystar_proj\model_params\TransformerTest.npz
['我', '喜', '欢', '男', '生'] -> ['I', 'like', 'boy', 'P', 'E']
