In [None]:
import numpy as np
import pickle
import operator
from keras_transformer import get_model, decode
# main_path = '/content/drive/My Drive/Colab Notebooks/'    #Google Colab FilePath
main_path = './'
path = main_path + 'middle_data/'
path = 'middle_data/'
with open(path + 'encode_input.pkl', 'rb') as f:
    encode_input = pickle.load(f)
with open(path + 'decode_input.pkl', 'rb') as f:
    decode_input = pickle.load(f)
with open(path + 'decode_output.pkl', 'rb') as f:
    decode_output = pickle.load(f)
with open(path + 'source_token_dict.pkl', 'rb') as f:
    source_token_dict = pickle.load(f)
with open(path + 'target_token_dict.pkl', 'rb') as f:
    target_token_dict = pickle.load(f)
with open(path + 'source_tokens.pkl', 'rb') as f:
    source_tokens = pickle.load(f)
print('Done')

In [None]:
from keras.optimizers import Adam
from keras.optimizers.schedules import ExponentialDecay

print(len(source_token_dict))
print(len(target_token_dict))
print(len(encode_input))
# -------------------- 构建优化后的 Transformer 模型 --------------------
model = get_model(
    token_num=max(len(source_token_dict), len(target_token_dict)),
    embed_dim=256,             # 原来是 64，提升表达能力
    encoder_num=4,             # 原来是 2，增加层数
    decoder_num=4,
    head_num=8,                # 原来是 4，增强注意力能力
    hidden_dim=1024,           # 原来是 256，提升前馈网络容量
    dropout_rate=0.1,          # 原来是 0.05，适度增加正则
    use_same_embed=True        # 共享嵌入向量表，提升跨语言映射效果
)

# -------------------- 学习率优化：使用衰减调度 --------------------
lr_schedule = ExponentialDecay(
    initial_learning_rate=1e-4, decay_steps=5000, decay_rate=0.9
)
optimizer = Adam(learning_rate=lr_schedule)
model.compile(optimizer, 'sparse_categorical_crossentropy')
print('Done')

In [None]:
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

# -------------------- 模型训练设置 --------------------
filepath = main_path + "models/W-" + "-{epoch:3d}-{loss:.4f}-.weights.h5"
checkpoint = ModelCheckpoint(filepath,
                             monitor='loss',
                             verbose=1,
                             save_best_only=True,
                             mode='min',
                             save_weights_only=True,
                             save_freq='epoch')
reduce_lr = ReduceLROnPlateau(monitor='loss', 
                               factor=0.2, 
                               patience=2, 
                               verbose=1, 
                               mode='min', 
                               min_delta=0.0001, 
                               cooldown=0, 
                               min_lr=0)
callbacks_list = [checkpoint, reduce_lr]

# 训练模型
model.fit(
    x=[np.array(encode_input[:1000000]), np.array(decode_input[:1000000])],
    y=np.array(decode_output[:1000000]),
    epochs=10,
    batch_size=64, 
    verbose=1,
    callbacks=callbacks_list, 
    # class_weight=None,  # 可以根据需要设置 class_weight
    # max_queue_size=5,  # 可以设置最大队列大小
    # workers=1,  # 可设置工作进程数
    # use_multiprocessing=False,  # 是否使用多进程
    # shuffle=False,  # 是否在训练时打乱数据
    # initial_epoch=initial_epoch_  # 可设置开始训练的 epoch
)

In [None]:
#加载模型
model.load_weights('models/W--150-0.0405-.weights.h5')
target_token_dict_inv = {v: k for k, v in target_token_dict.items()}
print('Done')

In [None]:
from keras.preprocessing import sequence
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import jieba
import requests
# -------------------- 输入预处理 --------------------
def get_input(seq):
    seq = ' '.join(jieba.lcut(seq, cut_all=False))
    seq = seq.split(' ')
    print("Segmented:", seq)
    seq = ['<START>'] + seq + ['<END>']
    seq = seq + ['<PAD>'] * (34 - len(seq))
    for x in seq:
        if x not in source_token_dict:
            print(f"OOV Token: {x}")
            return False, []
    seq = [source_token_dict[x] for x in seq]
    return True, seq

# -------------------- 翻译推理函数（可打开 beam search） --------------------
def get_ans(seq):
    decoded = decode(
        model,
        [seq],
        start_token=target_token_dict['<START>'],
        end_token=target_token_dict['<END>'],
        pad_token=target_token_dict['<PAD>'],
        top_k=5,               # 原为默认 greedy，使用 top-k (近似 beam search)
        temperature=0.9        # 添加随机性，提高自然度
    )
    print('翻译结果:', ' '.join(map(lambda x: target_token_dict_inv[x], decoded[0][1:-1])))

# -------------------- 用户交互 --------------------
while True:
    seq = input("请输入中文句子（输入 x 退出）：")
    if seq.strip() == 'x':
        break
    flag, seq = get_input(seq)
    if flag:
        get_ans(seq)
    else:
        print('输入中包含未登录词，无法翻译。')

Segmented: ['你', '是不是', '有病']
OOV Token: 有病
输入中包含未登录词，无法翻译。
Segmented: ['你', '看起来', '很漂亮']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
翻译结果: You look pretty beautiful .
Segmented: ['今天', '的', '天气', '不太好']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 