In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from itertools import chain
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#torch.cuda.current_device()

# Prepare dataset

In [2]:
PAD_token = 0
SOS_token = 1
EOS_token = 2


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "PAD", 1: "SOS", 2: "EOS"}
        self.n_words = 3  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [3]:
# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [4]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [5]:
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s ",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [6]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
print(random.choice(pairs))

Reading lines...
Read 135842 sentence pairs
Trimmed to 10599 sentence pairs
Counting words...
Counted words:
fra 4346
eng 2804
['je m en vais le mois prochain .', 'i am moving next month .']


# Set up model: encoder and decoder

In [43]:
from torch.nn.utils.rnn import pack_padded_sequence
from torch.nn.utils.rnn import pad_packed_sequence
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, device):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.device = device
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)

    # input shape: (B, largest seq length)
    # input is padded
    # hidden shape: (B, hidden_size)
    # outputs shape: (B, sequence length, hidden_size)
    def forward(self, inputs, hidden, batch_size=1, input_lengths=None):
        embedded = self.embedding(inputs).view(batch_size, -1, self.hidden_size)
        # in training
        if input_lengths:
            #print(embedded)
            #outputs = pack_padded_sequence(embedded, input_lengths,
            #                                batch_first=True, enforce_sorted=False)
            #print(outputs)
            #outputs, output_lengths = pad_packed_sequence(outputs, batch_first=True)
            #print(outputs)
            outputs, hidden = self.gru(outputs, hidden)
            
        # in prediction
        else:
            outputs, hidden = self.gru(embedded, hidden)
        
        return outputs, hidden

    def initHidden(self, batch_size=1):
        return torch.zeros(1, batch_size, self.hidden_size, device=self.device)

SyntaxError: invalid syntax (<ipython-input-43-99a52cf2a958>, line 21)

In [44]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, device):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.device=device
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        #self.softmax = nn.LogSoftmax(dim=1)

    # input shape: (B, largest seq length)
    # input is padded
    # embedded shape: (B, seq length, hidden size)
    # outputs shape: (B, seq length, output size)
    def forward(self, inputs, hidden, batch_size=1, input_lengths=None):
        embedded = self.embedding(inputs).view(batch_size, -1, self.hidden_size)
        embedded = F.relu(embedded)
        # in training
        if input_lengths:
            #print(embedded)
            #outputs = pack_padded_sequence(embedded, input_lengths,
            #                               batch_first=True, enforce_sorted=False)
            #print(outputs)
            #outputs, output_lengths = pad_packed_sequence(outputs, batch_first=True)
            #print(outputs)
            outputs, hidden = self.gru(outputs, hidden)
            
        # in prediction
        else:
            outputs, hidden = self.gru(embedded, hidden)
        
        outputs = self.out(outputs)
        #output = self.softmax(self.out(output[0]))
        return outputs, hidden

    def initHidden(self, batch_size=1):
        return torch.zeros(1, batch_size, self.hidden_size, device=self.device)

SyntaxError: invalid syntax (<ipython-input-44-d778a37036d0>, line 22)

# Data generator

In [20]:
# dataset for translation
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

def pad_sequence_seq2seq(batch):
    # batch is a list of data
    input_tokens = [pair[0] for pair in batch]
    output_tokens = [pair[1] for pair in batch]
    input_tokens_padded = pad_sequence(input_tokens, batch_first=True)
    input_lengths = [len(pair[0]) for pair in batch]
    output_lengths = [len(pair[1]) for pair in batch]
    output_tokens_padded = pad_sequence(output_tokens, batch_first=True)
    return input_tokens_padded, input_lengths, output_tokens_padded, output_lengths

class dataset(Dataset):
    def __init__(self, device):
        super(dataset, self).__init__()
        self.data = pairs
        self.device = device
    
    def __len__(self):
        return len(pairs)
    
    def __getitem__(self, idx):
        input, output = pairs[idx]
        input_token = torch.tensor([input_lang.word2index[word] for word in input.split()] +
                                   [EOS_token],
                                   device=self.device)
        output_token = torch.tensor([SOS_token] + 
                                    [output_lang.word2index[word] for word in output.split()] +
                                    [EOS_token],
                                    device=self.device)
        return input_token, output_token
    
data = dataset(device)
dataloader = DataLoader(data, collate_fn=pad_sequence_seq2seq ,batch_size=1)

# Construct model class

In [40]:
from torch.nn.utils.rnn import pack_padded_sequence
class seq2seq():
    def __init__(self, input_size, hidden_size, output_size, device):
        super(seq2seq, self).__init__()
        self.device = device
        self.encoder = EncoderRNN(input_size, hidden_size, self.device).to(self.device)
        self.decoder = DecoderRNN(hidden_size,output_size, self.device).to(self.device)
    def train(self, dataloader, epochs=5, encoder_lr=0.01, decoder_lr=0.01):
        criterion = nn.CrossEntropyLoss()
        encoder_optimizer = torch.optim.SGD(self.encoder.parameters(), lr=0.01)
        decoder_optimizer = torch.optim.SGD(self.decoder.parameters(), lr=0.01)
        for epoch in range(epochs):
            epoch_loss = 0
            for token_fra, token_fra_len, token_eng, token_eng_len in dataloader:
                batch_size = len(token_fra_len)
                # start running encoder
                encoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()
                loss = 0
                # init encoder hidden
                encoder_hidden = self.encoder.initHidden(batch_size)
                # pass input senteces through encoder
                encoder_outputs, encoder_hidden = self.encoder.forward(token_fra,
                                                      encoder_hidden,
                                                    input_lengths=token_fra_len,
                                                    batch_size=batch_size
                                                    )
                # encoder ouput is first hidden of decoder
                decoder_hidden = encoder_outputs[:,-1, :].view(1,1,7)
                # decoder_outputs shape: (B, largest seq lengths, output vocab size)
                decoder_outputs, decoder_hidden = self.decoder.forward(token_eng,
                                                                       decoder_hidden,
                                                                       batch_size=batch_size,
                                                                      input_lengths=token_eng_len)
                for batch_index in range(batch_size):
                    output_length = token_eng_len[batch_index]
                    for i in range(output_length-1):
                        loss += criterion(decoder_outputs[batch_index, i, :].view(1,-1),
                                         token_eng[batch_index,i+1].view(-1))
                    #loss += criterion(decoder_outputs[batch_index,0:output_length-1,:],
                    #         token_eng[batch_index, 1:output_length])
                    
                
                epoch_loss += loss
                loss.backward()
                decoder_optimizer.step()
                encoder_optimizer.step()
            print("epoch:", epoch, epoch_loss/len(dataloader))
    # data is a encoded sentence like nn.tensor([13,24,56,...])
    def predict(model, inputs):
        with torch.no_grad():
            
            encoder_hidden = model.encoder.initHidden(1)
            loss = 0
            encoder_outputs, encoder_hidden = model.encoder.forward(inputs,encoder_hidden)
            decoder_hidden = encoder_outputs[:,-1,:].view(1,1,7)
            decoder_output = None
            eng_output = "SOS"
            decoder_token = torch.tensor([SOS_token], device=device)
    
            while eng_output != "EOS":
            
                decoder_output,decoder_hidden = model.decoder.forward(decoder_token,
                                                                     decoder_hidden)
                decoder_token = torch.argmax(decoder_output)
                eng_output = output_lang.index2word[int(decoder_token.numpy())]
                print(eng_output)
    def save(self, model_path):
        encoder_path = model_path + "_encoder"
        decoder_path = model_path + "_decoder"
        torch.save(self.encoder.state_dict(), encoder_path)
        torch.save(self.decoder.state_dict(), decoder_path)
    
    def load(self, model_path):
        self.encoder.load_state_dict(torch.load(model_path + "_encoder"))
        self.decoder.load_state_dict(torch.load(model_path + "_decoder"))

In [41]:
model = seq2seq(input_size=input_lang.n_words, hidden_size=7,output_size=output_lang.n_words, device=device)

In [42]:
import time
start = time.time()
model.train(dataloader,epochs=2)
end = time.time()
print(end-start)

tensor([[[-8.2828e-01,  3.0757e-01,  1.6936e-01,  5.3508e-01, -5.8696e-01,
          -1.8521e-04, -5.5467e-01],
         [ 1.3796e+00, -6.0167e-02, -1.0047e+00,  3.2672e-01,  4.1770e-01,
           1.2731e+00, -1.0519e+00],
         [ 4.8803e-01,  9.4790e-01, -1.1649e-01, -2.6626e+00, -1.8363e+00,
           1.5068e-01,  1.2089e+00],
         [-9.0882e-01,  1.1819e-01, -5.6615e-01,  9.5172e-01, -1.8574e+00,
           4.7345e-01, -1.4960e-01],
         [ 1.3625e+00, -1.7420e-01, -3.1181e-01, -1.1311e+00,  1.2371e-01,
           7.7320e-01,  1.0019e-01]]], grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[-8.2828e-01,  3.0757e-01,  1.6936e-01,  5.3508e-01, -5.8696e-01,
         -1.8521e-04, -5.5467e-01],
        [ 1.3796e+00, -6.0167e-02, -1.0047e+00,  3.2672e-01,  4.1770e-01,
          1.2731e+00, -1.0519e+00],
        [ 4.8803e-01,  9.4790e-01, -1.1649e-01, -2.6626e+00, -1.8363e+00,
          1.5068e-01,  1.2089e+00],
        [-9.0882e-01,  1.1819e-01, -5.6615e-01,  9.5172e-01, -1.

       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9443,  0.4292, -0.4171, -1.2101, -0.4963, -1.7165,  0.5858],
         [ 0.1941, -0.3064,  0.1554,  1.9709, -0.2715, -0.4652, -1.0704],
         [ 1.1466,  0.7560, -1.5430, -1.3873, -0.1116,  0.5811, -0.7157],
         [ 1.2823, -0.2385, -0.2949,  1.8148, -0.4888,  0.8630,  0.2619],
         [ 1.3590, -0.1776, -0.3116, -1.1302,  0.1208,  0.7756,  0.1011]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.5807, 0.6435, 1.6039, 0.0000, 0.1173],
         [0.4340, 0.0000, 0.0000, 0.0000, 0.0000, 1.4263, 0.2140],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.5801, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.3945, 0.0000, 0.0000],
         [0.8217, 0.0000, 0.0000, 0.0000, 0.1478, 0.0000, 0.0000],
         [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]]],
       grad_fn=<ReluBackward0>)
PackedSequenc

tensor([[[ 1.9434,  0.4286, -0.4177, -1.2102, -0.4970, -1.7155,  0.5857],
         [ 0.1935, -0.3070,  0.1550,  1.9707, -0.2720, -0.4646, -1.0703],
         [ 1.5069, -0.9481,  0.2151,  0.7381, -1.3562, -0.0026,  0.2318],
         [-0.9099,  0.1162, -0.5632,  0.9515, -1.8563,  0.4722, -0.1495],
         [ 1.3561, -0.1807, -0.3120, -1.1299,  0.1178,  0.7779,  0.1022]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.5809, 0.6395, 1.6062, 0.0000, 0.1207],
         [0.4343, 0.0000, 0.0000, 0.0000, 0.0000, 1.4301, 0.2139],
         [0.0000, 0.0000, 0.0000, 0.5209, 0.0000, 0.0000, 0.0000],
         [0.8826, 0.1772, 0.0000, 0.0000, 0.6232, 0.7602, 0.6080],
         [0.3960, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]]],
       grad_fn=<ReluBackward0>)
PackedSequence(data=tensor([[0.0000, 0.0000, 1.5809, 0.6395, 1.6062, 0.0000, 0.1207],
        [0.4343, 0.0000, 0.0000, 0.0000, 0.0000, 1.4301, 0.2139],
  

           7.8056e-01,  1.0448e-01]]], grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[-8.2836e-01,  3.0748e-01,  1.6938e-01,  5.3509e-01, -5.8704e-01,
         -1.5323e-04, -5.5467e-01],
        [-6.0778e-01, -2.0233e-01, -6.4550e-01, -7.9738e-01,  3.9232e-01,
         -2.8246e-02, -5.1475e-02],
        [ 1.3795e+00, -6.0291e-02, -1.0047e+00,  3.2677e-01,  4.1757e-01,
          1.2732e+00, -1.0518e+00],
        [ 6.2180e-01, -7.7466e-01, -2.4101e-01, -1.7460e+00, -1.3199e-01,
         -8.4337e-02, -3.9239e-02],
        [-9.1011e-01,  1.1564e-01, -5.6153e-01,  9.5121e-01, -1.8552e+00,
          4.7145e-01, -1.4870e-01],
        [ 1.3532e+00, -1.8312e-01, -3.1241e-01, -1.1292e+00,  1.1511e-01,
          7.8056e-01,  1.0448e-01]], grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[-8.2836e-01,  3.0748e-01,  1.6938e-01,  5.3509e-01, -5.8704e-01,
          -1.5323e-04, -5.5467e-01],
       

       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9419,  0.4273, -0.4188, -1.2104, -0.4985, -1.7138,  0.5855],
         [ 0.1925, -0.3078,  0.1542,  1.9707, -0.2731, -0.4638, -1.0700],
         [-0.5081, -0.8379, -1.2639, -0.5465,  0.1756,  0.8845, -0.2270],
         [ 1.2819, -0.2391, -0.2949,  1.8147, -0.4890,  0.8633,  0.2621],
         [ 1.3506, -0.1856, -0.3131, -1.1287,  0.1123,  0.7834,  0.1063]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.5909, 0.6339, 1.6153, 0.0000, 0.1270],
         [0.4372, 0.0000, 0.0000, 0.0000, 0.0000, 1.4396, 0.2133],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.5884, 0.0000],
         [0.1055, 0.0000, 1.2496, 0.3084, 1.2074, 0.0000, 0.1564],
         [0.4150, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]]],
       grad_fn=<ReluBackward0>)
PackedSequenc

       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.5982, 0.6326, 1.6200, 0.0000, 0.1283],
         [0.4386, 0.0000, 0.0000, 0.0000, 0.0000, 1.4429, 0.2126],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.5874, 0.0000],
         [0.8384, 0.0000, 1.5149, 0.0000, 0.0000, 2.0201, 0.0000],
         [0.4264, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]]],
       grad_fn=<ReluBackward0>)
PackedSequence(data=tensor([[0.0000, 0.0000, 1.5982, 0.6326, 1.6200, 0.0000, 0.1283],
        [0.4386, 0.0000, 0.0000, 0.0000, 0.0000, 1.4429, 0.2126],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.5874, 0.0000],
        [0.8384, 0.0000, 1.5149, 0.0000, 0.0000, 2.0201, 0.0000],
        [0.4264, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([

           7.8760e-01,  1.1020e-01]]], grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[-8.2840e-01,  3.0744e-01,  1.6935e-01,  5.3510e-01, -5.8714e-01,
         -1.3983e-04, -5.5465e-01],
        [-6.0771e-01, -2.0232e-01, -6.4547e-01, -7.9739e-01,  3.9197e-01,
         -2.8285e-02, -5.1214e-02],
        [ 1.9135e-01, -3.0905e-01,  1.5284e-01,  1.9703e+00, -2.7477e-01,
         -4.6237e-01, -1.0698e+00],
        [ 1.4008e+00,  1.0054e+00, -8.8513e-01, -1.5416e+00, -2.7830e-01,
         -6.9577e-01, -9.2713e-01],
        [-9.1016e-01,  1.1505e-01, -5.5829e-01,  9.5151e-01, -1.8530e+00,
          4.6981e-01, -1.4667e-01],
        [ 1.3479e+00, -1.8922e-01, -3.1412e-01, -1.1284e+00,  1.0677e-01,
          7.8760e-01,  1.1020e-01]], grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[-8.2840e-01,  3.0744e-01,  1.6935e-01,  5.3510e-01, -5.8714e-01,
          -1.3983e-04, -5.5465e-01],
       

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9394,  0.4246, -0.4215, -1.2113, -0.5014, -1.7103,  0.5853],
        [ 0.1905, -0.3099,  0.1518,  1.9701, -0.2761, -0.4614, -1.0696],
        [ 0.5304,  1.7168,  0.1188, -0.7593, -2.0790,  1.9747,  2.3912],
        [-0.9097,  0.1155, -0.5573,  0.9515, -1.8521,  0.4691, -0.1454],
        [ 1.3472, -0.1903, -0.3152, -1.1286,  0.1042,  0.7897,  0.1128]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9394,  0.4246, -0.4215, -1.2113, -0.5014, -1.7103,  0.5853],
         [ 0.1905, -0.3099,  0.1518,  1.9701, -0.2761, -0.4614, -1.0696],
         [ 0.5304,  1.7168,  0.1188, -0.7593, -2.0790,  1.9747,  2.3912],
         [-0.9097,  0.1155, -0.5573,  0.9515, -1.8521,  0.4691, -0.1454],
         [ 1.3472, -0.1903, -0.3152, -1.1286,  0.1042,  0.7897,  0.1128]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.6212

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[-0.5615,  1.3349, -0.2938,  1.5282, -0.7977,  0.0110,  0.6825],
        [ 0.5453, -0.1154,  0.1302, -1.2159, -0.3806,  1.1264,  0.7336],
        [ 0.8362,  0.2028,  0.8577,  1.3454,  0.1794,  0.1173,  1.0034],
        [-0.9099,  0.1152, -0.5561,  0.9517, -1.8516,  0.4681, -0.1451],
        [ 1.3470, -0.1921, -0.3155, -1.1297,  0.1020,  0.7905,  0.1141]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[-0.5615,  1.3349, -0.2938,  1.5282, -0.7977,  0.0110,  0.6825],
         [ 0.5453, -0.1154,  0.1302, -1.2159, -0.3806,  1.1264,  0.7336],
         [ 0.8362,  0.2028,  0.8577,  1.3454,  0.1794,  0.1173,  1.0034],
         [-0.9099,  0.1152, -0.5561,  0.9517, -1.8516,  0.4681, -0.1451],
         [ 1.3470, -0.1921, -0.3155, -1.1297,  0.1020,  0.7905,  0.1141]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.6287

       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.6367, 0.6349, 1.6478, 0.0000, 0.1363],
         [0.4461, 0.0000, 0.0000, 0.0000, 0.0000, 1.4548, 0.2094],
         [0.0000, 0.0000, 0.0000, 0.5218, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.1442, 0.0000, 0.0000, 0.0000],
         [0.4764, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]]],
       grad_fn=<ReluBackward0>)
PackedSequence(data=tensor([[0.0000, 0.0000, 1.6367, 0.6349, 1.6478, 0.0000, 0.1363],
        [0.4461, 0.0000, 0.0000, 0.0000, 0.0000, 1.4548, 0.2094],
        [0.0000, 0.0000, 0.0000, 0.5218, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.1442, 0.0000, 0.0000, 0.0000],
        [0.4764, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9379,  0.4227, -0.4237, -1.2120, -0.5041, -1.7076,  0.5853],
        [ 0.1896, -0.3113,  0.1498,  1.9697, -0.2784, -0.4596, -1.0691],
        [-0.3839, -1.7751, -0.8348,  1.4647, -0.2400, -0.8275,  0.4633],
        [-0.9089,  0.1155, -0.5549,  0.9519, -1.8510,  0.4672, -0.1431],
        [ 1.3476, -0.1933, -0.3166, -1.1301,  0.0969,  0.7936,  0.1187]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9379,  0.4227, -0.4237, -1.2120, -0.5041, -1.7076,  0.5853],
         [ 0.1896, -0.3113,  0.1498,  1.9697, -0.2784, -0.4596, -1.0691],
         [-0.3839, -1.7751, -0.8348,  1.4647, -0.2400, -0.8275,  0.4633],
         [-0.9089,  0.1155, -0.5549,  0.9519, -1.8510,  0.4672, -0.1431],
         [ 1.3476, -0.1933, -0.3166, -1.1301,  0.0969,  0.7936,  0.1187]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.6531

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9373,  0.4217, -0.4249, -1.2126, -0.5055, -1.7061,  0.5854],
        [ 0.1892, -0.3123,  0.1484,  1.9693, -0.2800, -0.4584, -1.0690],
        [ 0.5673,  1.6656, -1.5315, -0.2226, -1.4837, -1.0774, -0.1756],
        [-0.9081,  0.1160, -0.5542,  0.9519, -1.8504,  0.4670, -0.1417],
        [ 1.3483, -0.1937, -0.3169, -1.1301,  0.0941,  0.7952,  0.1214]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9373,  0.4217, -0.4249, -1.2126, -0.5055, -1.7061,  0.5854],
         [ 0.1892, -0.3123,  0.1484,  1.9693, -0.2800, -0.4584, -1.0690],
         [ 0.5673,  1.6656, -1.5315, -0.2226, -1.4837, -1.0774, -0.1756],
         [-0.9081,  0.1160, -0.5542,  0.9519, -1.8504,  0.4670, -0.1417],
         [ 1.3483, -0.1937, -0.3169, -1.1301,  0.0941,  0.7952,  0.1214]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.6691

       grad_fn=<ReluBackward0>)
PackedSequence(data=tensor([[0.0000, 0.0000, 1.6867, 0.6391, 1.6682, 0.0000, 0.1368],
        [0.4525, 0.0000, 0.0000, 0.0000, 0.0000, 1.4658, 0.2042],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.5961, 0.0000],
        [0.0000, 0.0000, 1.5199, 0.5621, 0.8793, 1.2228, 1.1267],
        [0.5184, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[0.0000, 0.0000, 1.6867, 0.6391, 1.6682, 0.0000, 0.1368],
         [0.4525, 0.0000, 0.0000, 0.0000, 0.0000, 1.4658, 0.2042],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.5961, 0.0000],
         [0.0000, 0.0000, 1.5199, 0.5621, 0.8793, 1.2228, 1.1267],
         [0.5184, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]]],
   

       grad_fn=<ReluBackward0>)
PackedSequence(data=tensor([[0.0000, 0.0000, 1.7057, 0.6425, 1.6764, 0.0000, 0.1377],
        [0.4550, 0.0000, 0.0000, 0.0000, 0.0000, 1.4719, 0.2027],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.5983, 0.0000],
        [0.0000, 0.0940, 0.4702, 0.0000, 0.0000, 1.8651, 1.2653],
        [0.5320, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[0.0000, 0.0000, 1.7057, 0.6425, 1.6764, 0.0000, 0.1377],
         [0.4550, 0.0000, 0.0000, 0.0000, 0.0000, 1.4719, 0.2027],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.5983, 0.0000],
         [0.0000, 0.0940, 0.4702, 0.0000, 0.0000, 1.8651, 1.2653],
         [0.5320, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]]],
   

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9359,  0.4192, -0.4276, -1.2137, -0.5095, -1.7031,  0.5860],
        [ 0.1883, -0.3142,  0.1454,  1.9687, -0.2840, -0.4562, -1.0683],
        [-0.2014,  2.0128,  0.3349, -1.3826,  0.4208, -1.6103,  0.6756],
        [-0.9039,  0.1191, -0.5531,  0.9513, -1.8480,  0.4666, -0.1354],
        [ 1.3540, -0.1929, -0.3185, -1.1312,  0.0847,  0.7993,  0.1317]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9359,  0.4192, -0.4276, -1.2137, -0.5095, -1.7031,  0.5860],
         [ 0.1883, -0.3142,  0.1454,  1.9687, -0.2840, -0.4562, -1.0683],
         [-0.2014,  2.0128,  0.3349, -1.3826,  0.4208, -1.6103,  0.6756],
         [-0.9039,  0.1191, -0.5531,  0.9513, -1.8480,  0.4666, -0.1354],
         [ 1.3540, -0.1929, -0.3185, -1.1312,  0.0847,  0.7993,  0.1317]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.7295

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9357,  0.4187, -0.4283, -1.2141, -0.5105, -1.7024,  0.5862],
        [ 0.1881, -0.3149,  0.1444,  1.9685, -0.2854, -0.4554, -1.0681],
        [-1.4670,  1.3273,  0.4209, -0.8221,  0.4986,  1.5533, -1.7112],
        [-0.9026,  0.1199, -0.5530,  0.9512, -1.8475,  0.4670, -0.1333],
        [ 1.3557, -0.1931, -0.3192, -1.1310,  0.0807,  0.8016,  0.1349]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9357,  0.4187, -0.4283, -1.2141, -0.5105, -1.7024,  0.5862],
         [ 0.1881, -0.3149,  0.1444,  1.9685, -0.2854, -0.4554, -1.0681],
         [-1.4670,  1.3273,  0.4209, -0.8221,  0.4986,  1.5533, -1.7112],
         [-0.9026,  0.1199, -0.5530,  0.9512, -1.8475,  0.4670, -0.1333],
         [ 1.3557, -0.1931, -0.3192, -1.1310,  0.0807,  0.8016,  0.1349]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.7509

       grad_fn=<ReluBackward0>)
PackedSequence(data=tensor([[0.0000, 0.0000, 1.7709, 0.6562, 1.7039, 0.0000, 0.1425],
        [0.4593, 0.0000, 0.0000, 0.0000, 0.0000, 1.5015, 0.1993],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.6111, 0.0000],
        [0.3753, 0.0000, 0.0000, 0.0000, 0.0524, 0.0000, 0.4975],
        [0.5684, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[0.0000, 0.0000, 1.7709, 0.6562, 1.7039, 0.0000, 0.1425],
         [0.4593, 0.0000, 0.0000, 0.0000, 0.0000, 1.5015, 0.1993],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.6111, 0.0000],
         [0.3753, 0.0000, 0.0000, 0.0000, 0.0524, 0.0000, 0.4975],
         [0.5684, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]]],
   

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9355,  0.4181, -0.4290, -1.2145, -0.5118, -1.7014,  0.5866],
        [ 0.1879, -0.3160,  0.1430,  1.9683, -0.2875, -0.4541, -1.0676],
        [-0.7166,  0.7809,  0.0795, -0.2958,  0.5572,  0.2138, -1.2492],
        [-0.9001,  0.1211, -0.5531,  0.9504, -1.8471,  0.4679, -0.1304],
        [ 1.3600, -0.1931, -0.3203, -1.1319,  0.0730,  0.8063,  0.1408]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9355,  0.4181, -0.4290, -1.2145, -0.5118, -1.7014,  0.5866],
         [ 0.1879, -0.3160,  0.1430,  1.9683, -0.2875, -0.4541, -1.0676],
         [-0.7166,  0.7809,  0.0795, -0.2958,  0.5572,  0.2138, -1.2492],
         [-0.9001,  0.1211, -0.5531,  0.9504, -1.8471,  0.4679, -0.1304],
         [ 1.3600, -0.1931, -0.3203, -1.1319,  0.0730,  0.8063,  0.1408]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.7919

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 0.2219, -0.6899,  1.6430,  1.0700,  0.6853, -1.1431,  0.3816],
        [ 0.5455, -0.1153,  0.1303, -1.2159, -0.3808,  1.1261,  0.7336],
        [ 0.5490, -1.4227, -0.2948,  1.2871,  0.1592,  1.2275, -0.0178],
        [-0.8988,  0.1215, -0.5535,  0.9500, -1.8471,  0.4690, -0.1290],
        [ 1.3615, -0.1937, -0.3211, -1.1317,  0.0692,  0.8089,  0.1427]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 0.2219, -0.6899,  1.6430,  1.0700,  0.6853, -1.1431,  0.3816],
         [ 0.5455, -0.1153,  0.1303, -1.2159, -0.3808,  1.1261,  0.7336],
         [ 0.5490, -1.4227, -0.2948,  1.2871,  0.1592,  1.2275, -0.0178],
         [-0.8988,  0.1215, -0.5535,  0.9500, -1.8471,  0.4690, -0.1290],
         [ 1.3615, -0.1937, -0.3211, -1.1317,  0.0692,  0.8089,  0.1427]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.8091

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[-0.5617,  1.3345, -0.2938,  1.5283, -0.7981,  0.0110,  0.6823],
        [ 0.7014, -1.8377,  0.5632, -1.5536, -0.4101, -0.6087, -0.9440],
        [ 0.7014, -1.8377,  0.5632, -1.5536, -0.4101, -0.6087, -0.9440],
        [-0.5975,  0.7064, -0.1961, -0.6633, -0.3301, -1.0932, -0.4472],
        [-0.8986,  0.1217, -0.5537,  0.9499, -1.8471,  0.4691, -0.1288],
        [ 1.3620, -0.1935, -0.3212, -1.1322,  0.0691,  0.8086,  0.1427]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[-0.5617,  1.3345, -0.2938,  1.5283, -0.7981,  0.0110,  0.6823],
         [ 0.7014, -1.8377,  0.5632, -1.5536, -0.4101, -0.6087, -0.9440],
         [ 0.7014, -1.8377,  0.5632, -1.5536, -0.4101, -0.6087, -0.9440],
         [-0.5975,  0.7064, -0.1961, -0.6633, -0.3301, -1.0932, -0.4472],
         [-0.8986,  0.1217, -0.5537,  0.9499, -1.8471,  0.4691, -0.128

PackedSequence(data=tensor([[-0.5617,  1.3343, -0.2939,  1.5284, -0.7984,  0.0111,  0.6822],
        [ 0.5457, -0.1153,  0.1303, -1.2161, -0.3809,  1.1258,  0.7334],
        [-0.7853, -1.1163,  2.9628, -1.1634, -0.6359, -0.5876,  1.3116],
        [-0.8990,  0.1213, -0.5532,  0.9501, -1.8472,  0.4684, -0.1293],
        [ 1.3625, -0.1943, -0.3207, -1.1338,  0.0693,  0.8071,  0.1419]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[-0.5617,  1.3343, -0.2939,  1.5284, -0.7984,  0.0111,  0.6822],
         [ 0.5457, -0.1153,  0.1303, -1.2161, -0.3809,  1.1258,  0.7334],
         [-0.7853, -1.1163,  2.9628, -1.1634, -0.6359, -0.5876,  1.3116],
         [-0.8990,  0.1213, -0.5532,  0.9501, -1.8472,  0.4684, -0.1293],
         [ 1.3625, -0.1943, -0.3207, -1.1338,  0.0693,  0.8071,  0.1419]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.8087, 0.6668, 1.7215, 0.0000, 0.145

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[-0.5617,  1.3340, -0.2939,  1.5285, -0.7990,  0.0111,  0.6820],
        [ 0.5460, -0.1152,  0.1305, -1.2162, -0.3811,  1.1254,  0.7332],
        [-0.4088, -1.1540, -0.3932, -0.2710,  1.3335, -1.1420, -0.5930],
        [-0.8995,  0.1206, -0.5521,  0.9503, -1.8477,  0.4672, -0.1300],
        [ 1.3634, -0.1952, -0.3196, -1.1358,  0.0689,  0.8054,  0.1414]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[-0.5617,  1.3340, -0.2939,  1.5285, -0.7990,  0.0111,  0.6820],
         [ 0.5460, -0.1152,  0.1305, -1.2162, -0.3811,  1.1254,  0.7332],
         [-0.4088, -1.1540, -0.3932, -0.2710,  1.3335, -1.1420, -0.5930],
         [-0.8995,  0.1206, -0.5521,  0.9503, -1.8477,  0.4672, -0.1300],
         [ 1.3634, -0.1952, -0.3196, -1.1358,  0.0689,  0.8054,  0.1414]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.8084

       grad_fn=<IndexSelectBackward>)
tensor([[[-8.2837e-01,  3.0725e-01,  1.6926e-01,  5.3510e-01, -5.8773e-01,
          -7.2489e-05, -5.5453e-01],
         [ 1.3796e+00, -6.0404e-02, -1.0047e+00,  3.2679e-01,  4.1723e-01,
           1.2733e+00, -1.0517e+00],
         [ 3.5035e-01, -5.0794e-01, -7.2867e-01, -1.6198e+00,  1.0054e+00,
           1.7128e+00,  2.5622e+00],
         [-8.9938e-01,  1.2029e-01, -5.5177e-01,  9.5036e-01, -1.8482e+00,
           4.6709e-01, -1.2974e-01],
         [ 1.3645e+00, -1.9562e-01, -3.1908e-01, -1.1367e+00,  6.7255e-02,
           8.0534e-01,  1.4184e-01]]], grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[-8.2837e-01,  3.0725e-01,  1.6926e-01,  5.3510e-01, -5.8773e-01,
         -7.2489e-05, -5.5453e-01],
        [ 1.3796e+00, -6.0404e-02, -1.0047e+00,  3.2679e-01,  4.1723e-01,
          1.2733e+00, -1.0517e+00],
        [ 3.5035e-01, -5.0794e-01, -7.2867e-01, -1.6198e+00,  1.0054e+00,
          1.7128e+00,  2.5622e+00],
        [-8.9938e-01,  1.2

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9354,  0.4176, -0.4294, -1.2148, -0.5127, -1.7007,  0.5869],
        [ 0.1879, -0.3169,  0.1416,  1.9681, -0.2893, -0.4528, -1.0672],
        [ 0.4597, -1.8046,  0.6321,  1.0525, -0.7433,  0.4503,  1.6498],
        [-0.8982,  0.1208, -0.5521,  0.9499, -1.8483,  0.4679, -0.1287],
        [ 1.3663, -0.1950, -0.3192, -1.1369,  0.0647,  0.8068,  0.1434]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9354,  0.4176, -0.4294, -1.2148, -0.5127, -1.7007,  0.5869],
         [ 0.1879, -0.3169,  0.1416,  1.9681, -0.2893, -0.4528, -1.0672],
         [ 0.4597, -1.8046,  0.6321,  1.0525, -0.7433,  0.4503,  1.6498],
         [-0.8982,  0.1208, -0.5521,  0.9499, -1.8483,  0.4679, -0.1287],
         [ 1.3663, -0.1950, -0.3192, -1.1369,  0.0647,  0.8068,  0.1434]]],
       grad_fn=<IndexSelectBackward>)
tensor([[[0.0000, 0.0000, 1.8282

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 0.5081, -0.4364, -1.8904,  0.7483, -0.4980,  0.7259, -1.1441],
        [ 0.5462, -0.1152,  0.1305, -1.2163, -0.3812,  1.1252,  0.7332],
        [-1.6690, -1.4304,  0.5323, -0.9099, -1.0354, -0.5949, -0.2508],
        [ 0.7523,  0.5085, -0.2659, -0.2957, -0.4614, -0.8845, -0.7316],
        [ 0.7942,  1.5213, -2.2998,  1.0065, -1.3871, -0.6289, -0.8581],
        [-0.8969,  0.1214, -0.5525,  0.9495, -1.8483,  0.4688, -0.1276],
        [ 1.3680, -0.1949, -0.3194, -1.1369,  0.0619,  0.8087,  0.1450]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 0.5081, -0.4364, -1.8904,  0.7483, -0.4980,  0.7259, -1.1441],
         [ 0.5462, -0.1152,  0.1305, -1.2163, -0.3812,  1.1252,  0.7332],
         [-1.6690, -1.4304,  0.5323, -0.9099, -1.0354, -0.5949, -0.2508],
         [ 0.7523,  0.5085, -0.2659, -0.2957, -0.4614, -0.8845, -0.7

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9353,  0.4173, -0.4294, -1.2152, -0.5135, -1.7002,  0.5872],
        [ 0.1880, -0.3173,  0.1410,  1.9680, -0.2903, -0.4522, -1.0669],
        [ 0.8648,  0.2724, -2.0504,  0.5677,  1.5321,  0.8669, -1.2534],
        [ 0.0343, -0.4188, -1.9074, -0.1409, -0.6813, -0.5903, -0.5216],
        [-0.8953,  0.1220, -0.5531,  0.9488, -1.8483,  0.4701, -0.1260],
        [ 1.3699, -0.1946, -0.3198, -1.1370,  0.0590,  0.8105,  0.1466]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9353,  0.4173, -0.4294, -1.2152, -0.5135, -1.7002,  0.5872],
         [ 0.1880, -0.3173,  0.1410,  1.9680, -0.2903, -0.4522, -1.0669],
         [ 0.8648,  0.2724, -2.0504,  0.5677,  1.5321,  0.8669, -1.2534],
         [ 0.0343, -0.4188, -1.9074, -0.1409, -0.6813, -0.5903, -0.5216],
         [-0.8953,  0.1220, -0.5531,  0.9488, -1.8483,  0.4701, -0.126

       grad_fn=<ReluBackward0>)
PackedSequence(data=tensor([[0.0000, 0.0000, 1.8696, 0.6755, 1.7402, 0.0000, 0.1492],
        [0.4584, 0.0000, 0.0000, 0.0000, 0.0000, 1.5391, 0.1979],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.6162, 0.0000],
        [0.0000, 0.0000, 0.1997, 0.1490, 0.0000, 0.0000, 0.8780],
        [0.6572, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[0.0000, 0.0000, 1.8696, 0.6755, 1.7402, 0.0000, 0.1492],
         [0.4584, 0.0000, 0.0000, 0.0000, 0.0000, 1.5391, 0.1979],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.6162, 0.0000],
         [0.0000, 0.0000, 0.1997, 0.1490, 0.0000, 0.0000, 0.8780],
         [0.6572, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [1.1944, 0.3493, 0.0000, 1.0468, 0.4070, 0.0000, 0.0000]]],
   

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9352,  0.4170, -0.4296, -1.2155, -0.5143, -1.6997,  0.5875],
        [ 0.1882, -0.3179,  0.1402,  1.9677, -0.2917, -0.4513, -1.0666],
        [-0.4843,  0.5193,  0.7805,  0.6789, -0.7838,  0.6869, -0.6135],
        [ 0.0966,  1.1689,  0.0250,  0.8177,  2.0776, -1.0384,  1.5099],
        [-0.8926,  0.1232, -0.5540,  0.9478, -1.8485,  0.4722, -0.1238],
        [ 1.3736, -0.1939, -0.3204, -1.1375,  0.0535,  0.8142,  0.1492]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9352,  0.4170, -0.4296, -1.2155, -0.5143, -1.6997,  0.5875],
         [ 0.1882, -0.3179,  0.1402,  1.9677, -0.2917, -0.4513, -1.0666],
         [-0.4843,  0.5193,  0.7805,  0.6789, -0.7838,  0.6869, -0.6135],
         [ 0.0966,  1.1689,  0.0250,  0.8177,  2.0776, -1.0384,  1.5099],
         [-0.8926,  0.1232, -0.5540,  0.9478, -1.8485,  0.4722, -0.123

       grad_fn=<ViewBackward>)
PackedSequence(data=tensor([[ 1.9352,  0.4169, -0.4296, -1.2155, -0.5146, -1.6995,  0.5877],
        [ 0.1882, -0.3181,  0.1399,  1.9676, -0.2921, -0.4510, -1.0665],
        [-0.6074, -0.2023, -0.6453, -0.7973,  0.3912, -0.0284, -0.0510],
        [ 2.1638,  0.2780,  1.4231, -0.1224, -0.4734,  0.5719,  1.6691],
        [ 1.5571, -0.0086,  1.0826, -1.9302, -1.4012, -1.0234,  0.4391],
        [-1.2949,  0.0128,  1.6501, -0.6350, -0.5053,  0.4450,  1.0898],
        [-0.8910,  0.1240, -0.5547,  0.9472, -1.8486,  0.4736, -0.1226],
        [ 1.3757, -0.1935, -0.3207, -1.1377,  0.0505,  0.8163,  0.1506]],
       grad_fn=<PackPaddedSequenceBackward>), batch_sizes=tensor([1, 1, 1, 1, 1, 1, 1, 1]), sorted_indices=tensor([0]), unsorted_indices=tensor([0]))
tensor([[[ 1.9352,  0.4169, -0.4296, -1.2155, -0.5146, -1.6995,  0.5877],
         [ 0.1882, -0.3181,  0.1399,  1.9676, -0.2921, -0.4510, -1.0665],
         [-0.6074, -0.2023, -0.6453, -0.7973,  0.3912, -0.0284, -0

KeyboardInterrupt: 

In [57]:
model.save("seq2seq")

# Make prediction

In [26]:
def predict(model, inputs):
    with torch.no_grad():
            
        encoder_hidden = model.encoder.initHidden(1)
        encoder_hidden = model.encoder.forward(inputs,encoder_hidden)
        decoder_hidden = encoder_hidden
        decoder_output = None
        eng_output = "SOS"
        decoder_token = torch.tensor([SOS_token], device=device)
    
        while eng_output != "EOS":
            
            decoder_output,decoder_hidden = model.decoder.forward(decoder_token,
                                                                     decoder_hidden)
            decoder_token = torch.argmax(decoder_output)
            #print(decoder_token)
            eng_output = output_lang.index2word[int(decoder_token.numpy())]
            print(eng_output)

In [32]:
#model = seq2seq(input_size=input_lang.n_words, hidden_size=5,output_size=output_lang.n_words, device=device)
#model.load("seq2seq")
input_sentence = "tu est pas bon ."
input_token = torch.tensor([input_lang.word2index[word] for word in input_sentence.split()] +
                                   [EOS_token],
                                   device=device)
#predict(model, input_token)
model.predict(input_token)

i
m
sorry
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to

the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to
the
to
be
to

KeyboardInterrupt: 

# seq2seq with attention

In [42]:
output_lang.index2word[int(2)]

KeyError: 2