<a href="https://colab.research.google.com/github/bluetinue/Country_Name/blob/main/%E5%9F%BA%E4%BA%8EGRU%E7%9A%84seq2seq%E7%9A%84%E8%8B%B1%E8%AF%91%E6%B3%95%E6%A1%88%E4%BE%8B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title 导包
from google.colab import drive
drive.mount('/content/drive')

# 用于正则表达式
import re
# 用于构建网络结构和函数的torch工具包
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
# torch中预定义的优化方法工具包
import torch.optim as optim
import time
# 用于随机生成数据
import random
import matplotlib.pyplot as plt

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
#@title 全局变量

#开始字符标注
SOS_TOKEN = 0
EOS_TOKEN = 1

#最大句子长度
MAX_LENGTH = 10

data_path = "/content/drive/MyDrive/NLP/data/eng-fra-v2.txt"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
import torch.cuda

# 默认没有设置过cuda时,CUDA available结果是False,如果设置后是True
print("CUDA是否可用:", torch.cuda.is_available())
print("CUDA版本:", torch.version.cuda)
print("PyTorch版本:", torch.__version__)
print("电脑上GPU数量:", torch.cuda.device_count())
if torch.cuda.is_available():
    	print("当前使用的GPU版本:", torch.cuda.get_device_name(torch.cuda.current_device()))

CUDA是否可用: False
CUDA版本: 12.4
PyTorch版本: 2.6.0+cu124
电脑上GPU数量: 0


In [4]:
#@title 文本清洗工具函数
def normal2String(s):
  s1 = s.lower().strip()
  s2 = re.sub(r"([.!?])", r" \1 ", s1)
  s3 = re.sub(r"[^a-zA-Z.!?]+", r" ", s2)
  return s3.strip()

In [5]:
#@title 对原始数据进行预处理
#构建出[[英文,法文，.....的列表对]]
def wash_data():
  with open(data_path,"r",encoding="utf-8") as fr:
    lines = fr.readlines()
  #列表推导式
  my_pairs = [[ normal2String(s) for s in i.strip().split("\t")] for i in lines]

  #初始化词表，默认有开始和结束分隔符和初始长度
  english_word2index = {"SOS":0,"EOS":1}
  english_word2index_n = 2
  fres_word2index = {"SOS":0,"EOS":1}
  fres_word2index_n = 2

  #构造两个词表的word2index表
  for pair in my_pairs:
    for word in pair[0].split(" "):
      if word not in english_word2index:
        english_word2index[word] = len(english_word2index)
        #english_word2index[word] = english_word2index_n
        #english_word2index_n += 1

    for word in pair[1].split(" "):
      if word not in fres_word2index:
        fres_word2index[word] = len(fres_word2index)
        #fres_word2index[word] = fres_word2index_n
        #fres_word2index_n += 1

  #构造两个词表的index2word表
  english_index2word = {v:k for k,v in english_word2index.items()}
  fres_index2word = {v:k for k,v in fres_word2index.items()}
  return english_word2index,english_index2word,\
    len(english_word2index),fres_word2index,fres_index2word,\
    len(fres_word2index),my_pairs

In [6]:
english_word2index, english_index2word,  english_word_n, french_word2index, french_index2word, french_word_n, my_pairs = wash_data()

In [7]:
#@title 构建数据源对象
class SeqDataset(Dataset):
  def __init__(self,my_pairs):
    super().__init__()
    self.my_pairs = my_pairs
    self.sample_len = len(my_pairs)

  def __len__(self):
    return self.sample_len

  def __getitem__(self, index):
    index = min(max(0,index),self.sample_len-1)

    x = self.my_pairs[index][0]
    y = self.my_pairs[index][1]

    #文本索引张量化，给后续的embedding层处理
    x = [english_word2index[word] for word in x.split(" ")]
    x.append(EOS_TOKEN)
    tensor_x = torch.tensor(x,dtype=torch.long).to(device)

    y = [french_word2index[word] for word in y.split(" ")]
    y.append(EOS_TOKEN)
    tensor_y = torch.tensor(y,dtype=torch.long).to(device)
    return tensor_x,tensor_y

In [11]:
def use_dataset():
  my_dataset = SeqDataset(my_pairs)
  my_dataloader = DataLoader(dataset=my_dataset,batch_size=1,shuffle=True)
  return my_dataloader

In [12]:
my_dataloader = use_dataset()

In [25]:
#@title 构建基于GRU的编码器
class EncodeGru(nn.Module):
  def __init__(self,vocb_size,hidden_size):
    super().__init__()
    self.vocb_size = vocb_size
    self.hidden_size = hidden_size

    #将输入进embedding词嵌入层转换成词向量
    self.embed = nn.Embedding(vocb_size,hidden_size).to(device)

    #实例化GRU层
    self.gru = nn.GRU(hidden_size,hidden_size,batch_first=True).to(device)

  def forward(self,vocb_size,hidden):
    #数据经过词嵌入层
    output = self.embed(vocb_size)
    output,hidden = self.gru(output,hidden)
    return output,hidden

  def inithidden(self):
    return torch.zeros(1,1,self.hidden_size).to(device)

In [13]:
vocb_size = english_word_n
hidden_size = 256
encoder = EncodeGru(vocb_size,hidden_size)
for x,y in my_dataloader:
  hidden = encoder.inithidden()
  output,hidden = encoder(x,hidden)
  print(output.shape)
  print(hidden.shape)
  break

torch.Size([1, 9, 256])
torch.Size([1, 1, 256])


In [14]:
#@title 构建基于GRU的解码器（不带注意力机制）
class DecodeGruN(nn.Module):
  def __init__(self,vocab_size,hidden_size):
    super().__init__()
    self.vocab_size = vocab_size
    self.hidden_size = hidden_size

    #将输入进embedding词嵌入层转换成词向量
    self.embedding = nn.Embedding(vocb_size,hidden_size)
    self.gru = nn.GRU(hidden_size,hidden_size,batch_first=True)

    self.out = nn.Linear(hidden_size,vocab_size)
    self.softmax = nn.LogSoftmax(dim=-1)

  def forward(self,input,h0):
    #input[1,1]
    output = self.embedding(input)
    #input[1,1,256]
    output = F.relu(output)
    output,hn = self.gru(output,h0)
    #input[1,1,256]
    output = self.out(output[0])
    #最终输出[1,fre_word_n]
    return self.softmax(output),hn

  def inithidden(self):
    return torch.zeros(1,1,self.hidden_size)

In [None]:
vocab_size = french_word_n
hidden_size = 256
degrun = DecodeGruN(vocab_size,hidden_size)
degrun

DecodeGruN(
  (embedding): Embedding(2803, 256)
  (gru): GRU(256, 256, batch_first=True)
  (out): Linear(in_features=256, out_features=4345, bias=True)
  (softmax): LogSoftmax(dim=-1)
)

In [None]:
def test_demo():
  #实例化 数据集对象
  #实例化 数据加载器对象
  #实例化 编码器对象
  encoder = EncodeGru(english_word_n,hidden_size)
  #实例化 解码器对象
  decoder = DecodeGruN(french_word_n,hidden_size)
  for x,y in my_dataloader:
    #数据投入编码器
    encoder_output,encoder_hidden = encoder(x,encoder.inithidden())
    print(encoder_output.shape)
    #编码器的结果投入解码器
    hidden = encoder_hidden
    for i in range(y.shape[1]):
      temp_vec = y[0][i].reshape(1,-1)
      output,hidden = decoder(temp_vec,hidden)
      print(output.shape)
    break


In [28]:
#@title 构建基于GRU的解码器（带注意力机制）
class DecodeGruAtten(nn.Module):
  def __init__(self,fre_vocab_size,hidden_size,p=0.1,max_len=MAX_LENGTH):
    super().__init__()
    self.fre_vocab_size = fre_vocab_size
    self.hidden_size = hidden_size
    self.p = p
    self.max_len = max_len

    self.embed = nn.Embedding(fre_vocab_size,hidden_size).to(device)
    self.drop = nn.Dropout(p=p).to(device)
    # 注意力机制的线性层，输入为连接后的嵌入输入和隐藏状态，输出为注意力权重
    # 输入形状: (batch_size, hidden_size + hidden_size)
    # 输出形状: (batch_size, max_len) - 在编码器输出上的注意力权重
    self.attn = nn.Linear(hidden_size + hidden_size,max_len).to(device)

    # 线性层用于结合注意力上下文向量和嵌入输入
    # 输入形状: (batch_size, 1, hidden_size + hidden_size)
    # 输出形状: (batch_size, 1, hidden_size)
    self.attn_combine = nn.Linear(hidden_size + hidden_size,hidden_size).to(device)
    self.gru = nn.GRU(hidden_size,hidden_size,batch_first=True).to(device)
    self.out = nn.Linear(hidden_size,fre_vocab_size).to(device)
    self.softmax = nn.LogSoftmax(dim=-1).to(device)

  def forward(self,input,hidden,encoder_output):
    # input shape: (batch_size, 1)
    # hidden shape: (1, batch_size, hidden_size) - 来自 GRU，batch_first=True，num_layers=1
    # encoder_output shape: (batch_size, MAX_LENGTH, hidden_size) - 填充后的编码器输出

    # 嵌入输入并应用 dropout
    embed_y1 = self.embed(input) # shape: (batch_size, 1, hidden_size)
    embed_y2 = self.drop(embed_y1) # shape: (batch_size, 1, hidden_size)

    # 计算注意力权重
    # 连接嵌入输入（压缩）和隐藏状态（压缩）
    # embed_y2_squeezed shape: (batch_size, hidden_size)
    # hidden_squeezed shape: (batch_size, hidden_size)
    # 连接后的形状: (batch_size, hidden_size + hidden_size)
    attn_weights = F.softmax(self.attn(torch.cat((embed_y2.squeeze(0),hidden.squeeze(0)),dim=-1)),dim=-1) # shape: (batch_size, MAX_LENGTH)

    atten1 = torch.bmm(attn_weights.unsqueeze(dim=0), encoder_output.unsqueeze(dim=0))
        # 5.需要将embed_y1和atten1需要再次拼接:temp_vec-->[1, 512]
    temp_vec = torch.cat((embed_y1[0], atten1[0]), dim=-1)
    # 6. 将拼接后的结果进行线性变换,按照指定尺寸输出:combin_output-->[1,1, 256]
    combin_output = self.attn_combine(temp_vec).unsqueeze(dim=0)
    # 7. 将上述结果经过relu:relu_output-->[1,1,256]
    relu_output = F.relu(combin_output)

    # 通过 GRU
    # gru_output shape: (batch_size, 1, hidden_size)
    # hn shape: (1, batch_size, hidden_size)
    gru_output,hn = self.gru(relu_output,hidden)

    # 最终的线性层和 softmax，用于输出概率
    # result shape: (batch_size, fre_vocab_size)
    result = self.out(gru_output[0]) # 在线性层之前压缩时间步维度
    output = self.softmax(result) # shape: (batch_size, fre_vocab_size)

    return output,hn,attn_weights

In [None]:
def demo_test_att():
  #实例化 数据集对象
  #实例化 数据加载器对象
  #实例化 编码器对象
  encoder = EncodeGru(english_word_n,hidden_size).to(device)
  #实例化 解码器对象
  atten_decoder = DecodeGruAtten(french_word_n,hidden_size).to(device)
  for x,y in my_dataloader:
    #数据投入编码器
    encoder_output,encoder_hidden = encoder(x,encoder.inithidden())
    print(encoder_output.shape)
    # Ensure encoder_output_c is on the same device as other tensors and has a batch dimension
    encoder_output_c = torch.zeros(MAX_LENGTH,hidden_size).to(device)
    for i in range(encoder_output.shape[1]):
      encoder_output_c[i] = encoder_output[0,i]
    # Add batch dimension
    encoder_output_c = encoder_output_c.unsqueeze(0)

    #编码器的结果投入解码器
    for i in range(y.shape[1]):
      temp_vec = y[0][i].reshape(1,-1).to(device)
      output,hidden,atten_weights = atten_decoder(temp_vec,encoder_hidden,encoder_output_c)
      print(output.shape)
      # print(encoder_output_c) # This print statement might produce a large output, commenting out for now

    break
demo_test_att()

torch.Size([1, 9, 256])
torch.Size([1, 4345])
torch.Size([1, 4345])
torch.Size([1, 4345])
torch.Size([1, 4345])
torch.Size([1, 4345])
torch.Size([1, 4345])
torch.Size([1, 4345])
torch.Size([1, 4345])
torch.Size([1, 4345])


In [None]:
#@title 构建模型训练函数
from tqdm import tqdm
lr = 1e-4
epochs = 2
teach_forcing_ratio = 0.5
hidden_size = 255

def train_model():
  my_data = SeqDataset(my_pairs=my_pairs)
  my_dataloader = DataLoader(dataset=my_data,batch_size=1,shuffle=True)
  encoder = EncodeGru(english_word_n,hidden_size).to(device)
  decoder = DecodeGruAtten(french_word_n,hidden_size).to(device)
  plot_loss_list = []

  loss_fn = nn.NLLLoss()
  encoder_optimizer = optim.Adam(encoder.parameters(),lr=lr)
  decoder_optimizer = optim.Adam(decoder.parameters(),lr=lr)

  for epoch in range(epochs):
    starttime = time.time()
    print_loss_total,plot_loss_total = 0.0,0.0
    for i,(x,y) in tqdm(enumerate(my_dataloader,start=1)):
      loss = train_teacher_forcing(x,y,encoder,decoder,loss_fn,encoder_optimizer,decoder_optimizer)
      print_loss_total += loss
      plot_loss_total += loss
      #每1000轮打印一次损失
      if i % 1000 == 0:
        print_loss_avg = print_loss_total/1000
        print_loss_total = 0.0
        #打印轮次，平均损失，所用训练时间
        print('当前训练的轮次为：%s, 平均损失为：%s, 训练耗时：%s' % (epoch+1, print_loss_avg, time.time()-starttime))
      if i % 100 == 0:
        plot_loss_list.append(plot_loss_total/100)
        plot_loss_total = 0.0

      #保存模型
      torch.save(encoder.state_dict(),"/content/drive/MyDrive/NLP/model/encoder.pth")
      torch.save(decoder.state_dict(),"/content/drive/MyDrive/NLP/model/decoder.pth")
  plt.figure()
  plt.plot(plot_loss_list)
  plt.savefig("/content/drive/MyDrive/NLP/model/loss.png")
  plt.show()

def train_teacher_forcing(x,y,encoder,decoder,loss_fn,encoder_optimizer,decoder_optimizer):
  encoder_hidden = encoder.inithidden()
  encoder_output,encoder_hidden = encoder(x,encoder_hidden)

  encoder_output_padded = torch.zeros(encoder_output.shape[0], MAX_LENGTH, hidden_size).to(device)
  seq_len = encoder_output.shape[1]
  encoder_output_padded[:, :seq_len, :] = encoder_output[:, :seq_len, :]

  decoder_hidden = encoder_hidden
  input_y = torch.tensor([[SOS_TOKEN]],dtype=torch.long).to(device)

  my_loss = 0.0
  y_len = y.shape[1]
  use_teacher_forcing = 1 if random.random() < teach_forcing_ratio else 0
  if use_teacher_forcing:
    for i in range(y_len):
      output_y,decoder_hidden,atten_weights = decoder(input_y,decoder_hidden,encoder_output_padded)
      target_y = y[0][i].view(1)
      my_loss = my_loss + loss_fn(output_y,target_y)
      input_y = y[0][i].view(1,-1)
  else:
    for i in range(y_len):
      output_y,decoder_hidden,atten_weights = decoder(input_y,decoder_hidden,encoder_output_padded)
      target_y = y[0][i].view(1)
      my_loss = my_loss + loss_fn(output_y,target_y)
      topv,topi = output_y.topk(1)
      if topi.squeeze(1) == EOS_TOKEN:
        break
      input_y = topi.detach()
  #梯度清零
  encoder_optimizer.zero_grad()
  decoder_optimizer.zero_grad()
  #反向传播
  my_loss.backward()
  #梯度更新
  encoder_optimizer.step()
  decoder_optimizer.step()

  return my_loss.item()/y_len

train_model()

1002it [02:04, 10.32it/s]

当前训练的轮次为：0, 平均损失为：4.021194262384987, 训练耗时：124.11559915542603


2001it [04:09,  7.70it/s]

当前训练的轮次为：0, 平均损失为：3.661198564737186, 训练耗时：249.38450646400452


3001it [06:13,  9.61it/s]

当前训练的轮次为：0, 平均损失为：3.4523617556923916, 训练耗时：373.6092622280121


4001it [08:22,  9.91it/s]

当前训练的轮次为：0, 平均损失为：3.3079223645104294, 训练耗时：502.3656086921692


5000it [10:37,  7.28it/s]

当前训练的轮次为：0, 平均损失为：3.2856225761807147, 训练耗时：636.993305683136


6000it [12:57,  9.78it/s]

当前训练的轮次为：0, 平均损失为：3.191547954574467, 训练耗时：777.1756322383881


6999it [15:09,  9.90it/s]

当前训练的轮次为：0, 平均损失为：3.0775495722589077, 训练耗时：909.0458691120148


8000it [17:17,  6.88it/s]

当前训练的轮次为：0, 平均损失为：3.0212512742897832, 训练耗时：1037.6473650932312


9001it [19:26,  7.65it/s]

当前训练的轮次为：0, 平均损失为：2.9832370890288167, 训练耗时：1166.2478213310242


10001it [21:33,  9.19it/s]

当前训练的轮次为：0, 平均损失为：2.942836958563135, 训练耗时：1293.8016946315765


11001it [23:41,  8.57it/s]

当前训练的轮次为：0, 平均损失为：2.9205502190726116, 训练耗时：1421.7534267902374


12001it [25:50,  9.25it/s]

当前训练的轮次为：0, 平均损失为：2.8871494023039235, 训练耗时：1550.272677898407


13001it [27:57,  9.29it/s]

当前训练的轮次为：0, 平均损失为：2.8372564709046526, 训练耗时：1677.4068076610565


14001it [30:05,  9.79it/s]

当前训练的轮次为：0, 平均损失为：2.8501881523777546, 训练耗时：1805.7273972034454


15001it [32:12,  9.72it/s]

当前训练的轮次为：0, 平均损失为：2.800535208777398, 训练耗时：1932.3646359443665


16000it [34:18,  8.05it/s]

当前训练的轮次为：0, 平均损失为：2.7307099197928872, 训练耗时：2058.0349972248077


17000it [36:23,  9.64it/s]

当前训练的轮次为：0, 平均损失为：2.714087877810189, 训练耗时：2183.552980899811


17998it [38:29,  9.92it/s]

当前训练的轮次为：0, 平均损失为：2.6221546267399747, 训练耗时：2309.1070840358734


19001it [40:35,  7.44it/s]

当前训练的轮次为：0, 平均损失为：2.6732841259460582, 训练耗时：2435.546188354492


20002it [42:42,  7.67it/s]

当前训练的轮次为：0, 平均损失为：2.5828984836008813, 训练耗时：2562.0197076797485


21001it [44:48,  8.02it/s]

当前训练的轮次为：0, 平均损失为：2.5789597879737154, 训练耗时：2688.3246603012085


22001it [46:54,  7.85it/s]

当前训练的轮次为：0, 平均损失为：2.610603003252497, 训练耗时：2814.1969015598297


23002it [49:01,  8.37it/s]

当前训练的轮次为：0, 平均损失为：2.593225075268178, 训练耗时：2941.3684265613556


24000it [51:06,  6.99it/s]

当前训练的轮次为：0, 平均损失为：2.483364989281174, 训练耗时：3066.7591784000397


25001it [53:12,  8.03it/s]

当前训练的轮次为：0, 平均损失为：2.4386004473289793, 训练耗时：3191.9943459033966


26002it [55:18,  8.77it/s]

当前训练的轮次为：0, 平均损失为：2.4571577307834542, 训练耗时：3318.323675632477


27001it [57:25,  8.89it/s]

当前训练的轮次为：0, 平均损失为：2.470478063972598, 训练耗时：3445.0041592121124


28001it [59:42,  7.56it/s]

当前训练的轮次为：0, 平均损失为：2.4789369333367466, 训练耗时：3581.890130996704


29001it [1:01:57, 11.24it/s]

当前训练的轮次为：0, 平均损失为：2.3592175031671396, 训练耗时：3717.6590180397034


30001it [1:04:11, 10.30it/s]

当前训练的轮次为：0, 平均损失为：2.3522247069501665, 训练耗时：3851.6611733436584


31000it [1:06:20,  9.81it/s]

当前训练的轮次为：0, 平均损失为：2.4023057767854796, 训练耗时：3980.945950984955


31411it [1:07:12,  7.79it/s]


KeyboardInterrupt: 

In [29]:
#@title 构建模型评估函数
encoder_path = "/content/drive/MyDrive/NLP/model/encoder_1.pth"
decoder_path = "/content/drive/MyDrive/NLP/model/decoder_1.pth"
def eval_model():
  hidden_size = 256 # Correct hidden size to match the saved model
  encoder_model = EncodeGru(english_word_n,hidden_size)
  decoder_model = DecodeGruAtten(french_word_n,hidden_size)
  encoder_model.load_state_dict(torch.load(encoder_path,map_location="cpu"))
  decoder_model.load_state_dict(torch.load(decoder_path,map_location="cpu"))
  print(encoder_model)
  print(decoder_model)
  my_pair = [
      ['i m .', 'j ai ans .'],
      ['i m ok .','je vais bien .'],
      ['i m ok .',      'ca va .'],
      ['i m fat .',     'je suis gras .'],
      ['i m fat .',     'je suis gros .']
  ]
  for i,pair in enumerate(my_pair):
    x = pair[0]
    y = pair[1]
    x_encoder =[english_word2index[i] for i in x.split(" ")]
    x_encoder.append(EOS_TOKEN)
    x_tensor = torch.tensor(x_encoder,dtype=torch.long).view(1,-1)
    decoder_word ,atten_weights = predict_word(x_tensor,encoder_model,decoder_model)
    output_word = " ".join(decoder_word)
    print(f"x--->{x}")
    print(f"y--->{y}")
    print(f'predict--->{output_word}')


def predict_word(x_tensor,encoder_model,decoder_model):
  with torch.no_grad():
    #数据经过编码器处理
    encoder_output,encoder_hidden = encoder_model(x_tensor,encoder_model.inithidden())

    encoder_output_c = torch.zeros(MAX_LENGTH,encoder_model.hidden_size)
    x_len = x_tensor.shape[1] # Use x_tensor here instead of x
    for i in range(x_len):
      encoder_output_c[i] = encoder_output[0,i]

    #解码
    decoder_hidden = encoder_hidden
    input_y = torch.tensor([[SOS_TOKEN]],dtype=torch.long)
    decoder_word = []

    decoder_atten = torch.zeros(MAX_LENGTH,MAX_LENGTH)
    for i in range(MAX_LENGTH):
      output_y,decoder_hidden,atten_weights = decoder_model(input_y,decoder_hidden,encoder_output_c)
      topv,topi = torch.topk(output_y,k=1)
      decoder_atten[i] = atten_weights
      if topi.item() == EOS_TOKEN:
        break
      else:
        decoder_word.append(french_index2word[topi.item()])
      input_y = topi
  return decoder_word ,decoder_atten[:i+1]

eval_model()

EncodeGru(
  (embed): Embedding(2803, 256)
  (gru): GRU(256, 256, batch_first=True)
)
DecodeGruAtten(
  (embed): Embedding(4345, 256)
  (drop): Dropout(p=0.1, inplace=False)
  (attn): Linear(in_features=512, out_features=10, bias=True)
  (attn_combine): Linear(in_features=512, out_features=256, bias=True)
  (gru): GRU(256, 256, batch_first=True)
  (out): Linear(in_features=256, out_features=4345, bias=True)
  (softmax): LogSoftmax(dim=-1)
)
x--->i m .
y--->j ai ans .
predict--->je suis .
x--->i m ok .
y--->je vais bien .
predict--->je vais .
x--->i m ok .
y--->ca va .
predict--->je vais .
x--->i m fat .
y--->je suis gras .
predict--->je suis impressionnee .
x--->i m fat .
y--->je suis gros .
predict--->je suis impressionnee .


In [43]:
#@title 构建模型测试函数

#@title 构建模型评估函数
encoder_path = "/content/drive/MyDrive/NLP/model/encoder_1.pth"
decoder_path = "/content/drive/MyDrive/NLP/model/decoder_1.pth"
def eval_model():
  hidden_size = 256 # Correct hidden size to match the saved model
  encoder_model = EncodeGru(english_word_n,hidden_size)
  decoder_model = DecodeGruAtten(french_word_n,hidden_size)
  encoder_model.load_state_dict(torch.load(encoder_path,map_location="cpu"))
  decoder_model.load_state_dict(torch.load(decoder_path,map_location="cpu"))
  print(encoder_model)
  print(decoder_model)
  my_pair = [
      ['i love you .'],
  ]
  for i,pair in enumerate(my_pair):
    x = pair
    x_encoder =[english_word2index[i] for i in pair[0].split(" ")]
    x_encoder.append(EOS_TOKEN)
    x_tensor = torch.tensor(x_encoder,dtype=torch.long).view(1,-1)
    decoder_word ,atten_weights = predict_word(x_tensor,encoder_model,decoder_model)
    output_word = " ".join(decoder_word)
    print(f"x--->{x}")
    print(f'predict--->{output_word}')


def predict_word(x_tensor,encoder_model,decoder_model):
  with torch.no_grad():
    #数据经过编码器处理
    encoder_output,encoder_hidden = encoder_model(x_tensor,encoder_model.inithidden())

    encoder_output_c = torch.zeros(MAX_LENGTH,encoder_model.hidden_size)
    x_len = x_tensor.shape[1] # Use x_tensor here instead of x
    for i in range(x_len):
      encoder_output_c[i] = encoder_output[0,i]

    #解码
    decoder_hidden = encoder_hidden
    input_y = torch.tensor([[SOS_TOKEN]],dtype=torch.long)
    decoder_word = []

    decoder_atten = torch.zeros(MAX_LENGTH,MAX_LENGTH)
    for i in range(MAX_LENGTH):
      output_y,decoder_hidden,atten_weights = decoder_model(input_y,decoder_hidden,encoder_output_c)
      topv,topi = torch.topk(output_y,k=1)
      decoder_atten[i] = atten_weights
      if topi.item() == EOS_TOKEN:
        break
      else:
        decoder_word.append(french_index2word[topi.item()])
      input_y = topi
  return decoder_word ,decoder_atten[:i+1]

eval_model()

EncodeGru(
  (embed): Embedding(2803, 256)
  (gru): GRU(256, 256, batch_first=True)
)
DecodeGruAtten(
  (embed): Embedding(4345, 256)
  (drop): Dropout(p=0.1, inplace=False)
  (attn): Linear(in_features=512, out_features=10, bias=True)
  (attn_combine): Linear(in_features=512, out_features=256, bias=True)
  (gru): GRU(256, 256, batch_first=True)
  (out): Linear(in_features=256, out_features=4345, bias=True)
  (softmax): LogSoftmax(dim=-1)
)
x--->['i love you .']
predict--->je vous vous .
