In [None]:
from __future__ import unicode_literals, print_function, division
import random
import time

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import rnn_utils

SOS_token, EOS_token = rnn_utils.SOS_token, rnn_utils.EOS_token
MAX_LENGTH = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
dataset = rnn_utils.prepareData('eng', 'cmn', MAX_LENGTH, device, True)

# TODO: 检查一下数据内容




Encoder网络结构:
<img src="images/encoder-network.png" style="width:200px;height:200px;">

In [None]:
class EncoderRNN(nn.Module):
  
  def __init__(self, input_size, hidden_size):
    super(EncoderRNN, self).__init__()
    # TODO: define module embedding, gru

    
    
  def forward(self, input, hidden):
    # TODO: build model and return (outout, hidden)
    
    
    
  def initHidden(self):
    return torch.zeros(1, 1, self.hidden_size, device=device)

Decoder网络结构:
<img src="images/decoder-network.png" style="width:200px;height:200px;">

In [None]:
class DecoderRNN(nn.Module):
  def __init__(self, hidden_size, output_size):
    super(DecoderRNN, self).__init__()
    # TODO: define module embedding, gru, out, softmax

    
    
  def forward(self, input, hidden):
    # TODO: build model and return (output, hidden)

    
    
  def initHidden(self):
    return torch.zeros(1, 1, self.hidden_size, device=device)


In [None]:
class Trainer():
  
  def __init__(self, encoder, decoder, dataset, teacher_forcing_ratio=0.5, learning_rate=0.01, max_length=MAX_LENGTH):
    self.encoder = encoder
    self.decoder = decoder
    self.max_length = max_length
    self.teacher_forcing_ratio = teacher_forcing_ratio
    self.dataset = dataset

    # TODO: define module encoder_optimizer, decoder_optimizer, criterion  

    
    
  def train(self, n_iters, print_every=1000):
    start = time.time()
    print_loss_total = 0  # Reset every print_every

    # TODO: define training_pairs
    
    for iter in range(1, n_iters + 1):
      training_pair = training_pairs[iter - 1]
      input_tensor, target_tensor = training_pair[0], training_pair[1]

      loss = self.trainStep(input_tensor, target_tensor)
      print_loss_total += loss

      if iter % print_every == 0:
        print_loss_avg = print_loss_total / print_every
        print_loss_total = 0
        print('%s (%d %d%%) %.4f' % (rnn_utils.timeSince(start, iter / n_iters),
                                     iter, iter / n_iters * 100, print_loss_avg))

  def trainStep(self, input_tensor, target_tensor):
    loss = 0

    self.encoder_optimizer.zero_grad()
    self.decoder_optimizer.zero_grad()

    encoder_hidden = self.encoder.initHidden()
    encoder_outputs = torch.zeros(self.max_length, self.encoder.hidden_size, device=device)

    input_length, target_length = input_tensor.size(0), target_tensor.size(0)
    for ei in range(input_length):
      # TODO: call encoder to get the data, update encoder_outputs

        
    decoder_input = torch.tensor([[SOS_token]], device=device)
    decoder_hidden = encoder_hidden

    def get_decoder_input(di, decoder_output):
      if random.random() < self.teacher_forcing_ratio:
        # Teacher forcing: Feed the target as the next input
        return target_tensor[di]
      else:
        # Without teacher forcing: use its own predictions as the next input
        topv, topi = decoder_output.topk(1)
        return topi.squeeze().detach()

    for di in range(target_length):
      # TODO: call decoder to get (decoder_output, decoder_hidden), append loss, update decoder_input for next round

      
      if decoder_input.item() == EOS_token:
        break

    loss.backward()

    self.encoder_optimizer.step()
    self.decoder_optimizer.step()

    return loss.item() / target_length
    


In [None]:
hidden_size = 256
encoder = EncoderRNN(dataset.input_lang.n_words, hidden_size).to(device)
decoder = DecoderRNN(hidden_size, dataset.output_lang.n_words).to(device)

trainer = Trainer(encoder, decoder, dataset)
trainer.train(1000, print_every=100)

In [None]:
class Evaluator():

  def __init__(self, encoder, decoder, dataset, max_length=MAX_LENGTH):
    self.decoder = decoder
    self.encoder = encoder
    self.max_length = max_length
    self.dataset = dataset

  def evaluateRandomly(self, n=10):
    for i in range(n):
        pair = dataset.randomPair()
        print('>', pair[0])
        print('=', pair[1])
        output_words = self.evaluate(pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')
    
  def evaluate(self, sentence):
    with torch.no_grad():
        input_tensor = dataset.sentenceToTensorFromInputLang(sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = self.encoder.initHidden()

        encoder_outputs = torch.zeros(self.max_length, self.encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = self.encoder(input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)
        decoder_hidden = encoder_hidden
        decoded_words = []

        for di in range(self.max_length):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)

            word = '<EOS>' if topi.item() == EOS_token else self.dataset.wordFromOutputLang(topi.item())
            decoded_words.append(word)

            if word == '<EOS>':
                break

            decoder_input = topi.squeeze().detach()

        return decoded_words

In [None]:
encoder.load_state_dict(torch.load('rnn-encoder-seq2seq.pkl'))
decoder.load_state_dict(torch.load('rnn-decoder-seq2seq.pkl'))

In [None]:
evaluator = Evaluator(encoder, decoder, dataset)
evaluator.evaluateRandomly()