In [None]:
%matplotlib inline

from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
!wget https://www.manythings.org/anki/rus-eng.zip
!unzip rus-eng.zip

--2024-07-22 08:22:27--  https://www.manythings.org/anki/rus-eng.zip
Resolving www.manythings.org (www.manythings.org)... 173.254.30.110
Connecting to www.manythings.org (www.manythings.org)|173.254.30.110|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16305013 (16M) [application/zip]
Saving to: ‘rus-eng.zip’


2024-07-22 08:22:31 (6.26 MB/s) - ‘rus-eng.zip’ saved [16305013/16305013]

Archive:  rus-eng.zip
  inflating: rus.txt                 
  inflating: _about.txt              


In [None]:
!tail eng-rus.txt

We need to uphold laws against discrimination — in hiring, and in housing, and in education, and in the criminal justice system. That is what our Constitution and our highest ideals require.	Нам нужно отстаивать законы против дискриминации при найме на работу, в жилищной сфере, в сфере образования и правоохранительной системе. Этого требуют наша Конституция и высшие идеалы.	CC-BY 2.0 (France) Attribution: tatoeba.org #5762728 (BHO) & #6390439 (odexed)
I've heard that you should never date anyone who is less than half your age plus seven. Tom is now 30 years old and Mary is 17. How many years will Tom need to wait until he can start dating Mary?	Я слышал, что никогда не следует встречаться с кем-то вдвое младше вас плюс семь лет. Тому 30 лет, a Мэри 17. Сколько лет Тому нужно ждать до тех пор, пока он сможет начать встречаться с Мэри?	CC-BY 2.0 (France) Attribution: tatoeba.org #10068197 (CK) & #10644473 (notenoughsun)
I do have one final ask of you as your president, the same thing I a

In [None]:
sep = 'CC-BY'
with open('/content/eng-rus.txt') as file:
    for line in file:
        print(line.split(sep, 1)[0])

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
We were surprised at the ease with which he solved the problem.	Нас удивила та лёгкость, с которой он решил проблему.	
We were surprised at the ease with which he solved the problem.	Нас удивила та лёгкость, с которой он решил задачу.	
We were wakened by the whistle of the steam locomotive at dawn.	На рассвете нас разбудил свисток локомотива.	
We weren't able to buy tickets, so we didn't go to the concert.	Мы не смогли купить билеты, так что не пошли на концерт.	
We will investigate the problem and deal with it as we see fit.	Мы изучим вопрос и решим его, как посчитаем нужным.	
We wouldn't be in this mess if you'd just done what I told you.	Мы бы не попали в такие неприятности, если бы ты просто сделал то, что я тебе говорил.	
We'd better brainstorm about it together and get a better idea.	Мы лучше ещё хорошенько подумаем об этом вместе и придумаем что-нибудь получше.	
What would the world be like if we a

In [None]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [None]:
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = s.lower()
    s = re.sub('[.!?]','',s)
#    s = re.sub(r"([.!?])", r" \1", s)
#    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [None]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    lines = open('%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
        read().strip().split('\n')
    lines = [i.split('\tCC-BY', 1)[0] for i in lines]

    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]


    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [None]:
MAX_LENGTH = 20

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)

def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [None]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'rus', True)
print(random.choice(pairs))

Reading lines...
Read 496059 sentence pairs
Trimmed to 5114 sentence pairs
Counting words...
Counted words:
rus 4793
eng 2444
['он беспечен во всем', 'he is careless in everything']


# Encoder

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

# Decoder

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [None]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [None]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [None]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [None]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [None]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

In [None]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

# GRU 1 layer

In [None]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)

trainIters(encoder1, decoder1, 75000, print_every=5000)

1m 2s (- 14m 39s) (5000 6%) 3.2404
2m 7s (- 13m 46s) (10000 13%) 2.7620
3m 5s (- 12m 22s) (15000 20%) 2.2945
4m 7s (- 11m 21s) (20000 26%) 1.9407
5m 4s (- 10m 9s) (25000 33%) 1.5647
6m 9s (- 9m 14s) (30000 40%) 1.2485
7m 12s (- 8m 13s) (35000 46%) 1.0359
8m 9s (- 7m 8s) (40000 53%) 0.8469
9m 7s (- 6m 4s) (45000 60%) 0.6426
10m 8s (- 5m 4s) (50000 66%) 0.5115
11m 6s (- 4m 2s) (55000 73%) 0.3942
12m 7s (- 3m 1s) (60000 80%) 0.3096
13m 12s (- 2m 1s) (65000 86%) 0.2384
14m 12s (- 1m 0s) (70000 93%) 0.1968
15m 11s (- 0m 0s) (75000 100%) 0.1427


In [None]:
evaluateRandomly(encoder1, decoder1)

> мне так жаль, что я заставил тебя ждать
= i am so sorry to have kept you waiting
< i am so sorry to have kept you waiting <EOS>

> я все еще одинок
= i am still alone
< i am still alone <EOS>

> она очень боится темноты
= she is very afraid of the dark
< she is very afraid of the dark <EOS>

> я уверен, что мы будем очень счастливы здесь
= i am sure that we will be very happy here
< i am sure that we will be very happy here <EOS>

> она уже замужем
= she is already married
< she is already married <EOS>

> она известнее тебя
= she is more famous than you
< she is more famous than you <EOS>

> ты сама не своя
= you aren't yourself
< you aren't yourself <EOS>

> мы с неи примерно ровесники
= she is about my age
< she is about my age <EOS>

> она не менее красива, чем ее мать
= she is no less beautiful than her mother
< she is no less beautiful than her mother <EOS>

> ее сеичас нет дома
= she isn't at home now
< she isn't at home now <EOS>



# GRU 2 layers

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)
trainIters(encoder1, decoder1, 75000, print_every=5000)

0m 58s (- 13m 42s) (5000 6%) 3.2176
1m 55s (- 12m 28s) (10000 13%) 2.7449
2m 52s (- 11m 28s) (15000 20%) 2.2991
3m 49s (- 10m 30s) (20000 26%) 1.9310
4m 46s (- 9m 32s) (25000 33%) 1.5933
5m 43s (- 8m 34s) (30000 40%) 1.2846
6m 39s (- 7m 36s) (35000 46%) 1.0613
7m 36s (- 6m 39s) (40000 53%) 0.8509
8m 33s (- 5m 42s) (45000 60%) 0.6778
9m 31s (- 4m 45s) (50000 66%) 0.5307
10m 28s (- 3m 48s) (55000 73%) 0.4161
11m 25s (- 2m 51s) (60000 80%) 0.3272
12m 23s (- 1m 54s) (65000 86%) 0.2208
13m 21s (- 0m 57s) (70000 93%) 0.1718
14m 20s (- 0m 0s) (75000 100%) 0.1270


In [None]:
evaluateRandomly(encoder1, decoder1)

> она покупает своему ребенку игрушку
= she is buying a toy for her child
< she is buying for her her time for a new dress <EOS>

> я очень удивлен, что он стал членом парламента
= i am very surprised that she became a diet member
< i am very surprised that she became a diet member <EOS>

> она поливает цветы
= she is watering the flowers
< she is watering the flowers <EOS>

> я близнец
= i am a twin
< i am a twin <EOS>

> на нем нет шляпы
= he is not wearing a hat
< he is not wearing a hat <EOS>

> он слишком мал, чтобы идти туда одному
= he is too young to go there alone
< he is too young to go there alone <EOS>

> они не боятся смерти
= they aren't afraid of death
< they aren't afraid of death <EOS>

> он лучше всех годится для этого проекта
= he is the best for this project
< he is the best for this project <EOS>

> мы рады вам помочь
= we are glad to help you
< we are glad to help you <EOS>

> она расстелила скатерть на столе
= she spread a cloth over the table
< she spread a clot

# LSTM 1 layer

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.lstm(output, hidden)
        return output, hidden

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device),
                torch.zeros(1, 1, self.hidden_size, device=device))

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.lstm(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device),
                torch.zeros(1, 1, self.hidden_size, device=device))

In [None]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)

trainIters(encoder1, decoder1, 75000, print_every=5000)

0m 58s (- 13m 32s) (5000 6%) 3.3295
1m 53s (- 12m 17s) (10000 13%) 2.9002
2m 49s (- 11m 18s) (15000 20%) 2.5721
3m 46s (- 10m 21s) (20000 26%) 2.2142
4m 42s (- 9m 24s) (25000 33%) 1.8649
5m 41s (- 8m 32s) (30000 40%) 1.6165
6m 40s (- 7m 37s) (35000 46%) 1.3083
7m 38s (- 6m 40s) (40000 53%) 1.0981
8m 36s (- 5m 44s) (45000 60%) 0.9109
9m 35s (- 4m 47s) (50000 66%) 0.7362
10m 33s (- 3m 50s) (55000 73%) 0.6090
11m 32s (- 2m 53s) (60000 80%) 0.4866
12m 30s (- 1m 55s) (65000 86%) 0.3954
13m 28s (- 0m 57s) (70000 93%) 0.3101
14m 27s (- 0m 0s) (75000 100%) 0.2570


In [None]:
evaluateRandomly(encoder1, decoder1)

> ты сегодня сам не свои
= you aren't yourself today
< you aren't yourself today <EOS>

> он мои друг вы знаете его, ребята
= he is my friend do you guys know him
< he is my friend do you guys know him <EOS>

> вы врач
= you are a doctor
< you are the doctor <EOS>

> она все еще любила его
= she still loved him
< she still loved him <EOS>

> он лучше всех годится для этого проекта
= he is the best for this project
< he is the best for this project <EOS>

> она вдруг потеряла сознание
= she suddenly lost consciousness
< she suddenly lost consciousness <EOS>

> он дипломированныи фармацевт
= he is licensed as a pharmacist
< he is licensed as a pharmacist <EOS>

> я собираюсь специализироваться по французскому
= i am going to major in french
< i am going to major in french <EOS>

> я оставлю книги здесь
= i am leaving the books here
< i am leaving books here <EOS>

> она едва ли могла сказать слово на англииском
= she spoke scarcely a word of english
< she spoke scarcely a word of english

# LSTM 2 layers

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.lstm(output, hidden)
        return output, hidden

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device),
                torch.zeros(1, 1, self.hidden_size, device=device))

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.lstm(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device),
                torch.zeros(1, 1, self.hidden_size, device=device))

In [None]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device)

trainIters(encoder1, decoder1, 75000, print_every=5000)

1m 1s (- 14m 14s) (5000 6%) 3.2941
1m 58s (- 12m 49s) (10000 13%) 2.9176
2m 55s (- 11m 43s) (15000 20%) 2.6538
3m 53s (- 10m 42s) (20000 26%) 2.2710
4m 52s (- 9m 45s) (25000 33%) 1.9775
5m 52s (- 8m 48s) (30000 40%) 1.6517
6m 51s (- 7m 50s) (35000 46%) 1.4341
7m 51s (- 6m 52s) (40000 53%) 1.2132
8m 51s (- 5m 54s) (45000 60%) 1.0252
9m 50s (- 4m 55s) (50000 66%) 0.8338
10m 50s (- 3m 56s) (55000 73%) 0.6936
11m 50s (- 2m 57s) (60000 80%) 0.5300
12m 50s (- 1m 58s) (65000 86%) 0.4410
13m 51s (- 0m 59s) (70000 93%) 0.3619
14m 50s (- 0m 0s) (75000 100%) 0.2864


In [None]:
evaluateRandomly(encoder1, decoder1)

> мне от тебя тошно
= i am disgusted with you
< i am disgusted with you <EOS>

> ты такого же роста, как я
= you are as tall as i am
< you are as tall as i am <EOS>

> он точно выиграет
= he is sure to win
< he is sure to win <EOS>

> она дала ему пощечину
= she slapped his face
< she slapped him face <EOS>

> мы далеко не поедем
= we aren't going far
< we aren't going far <EOS>

> она сеичас ужинает
= she is having dinner now
< she is having dinner now <EOS>

> он всего лишь ребенок
= he is only a child
< he is only a child <EOS>

> нас всех беспокоит твое здоровье
= we are all anxious about your health
< we are all anxious about your health <EOS>

> я слеп на правыи глаз
= i am blind in the right eye
< i am blind in the right eye <EOS>

> мы собираемся в следующее воскресенье в гости к дяде
= we are going to visit our uncle next sunday
< we are going to visit our uncle next sunday <EOS>



По результатам проведенных экспериментов сеть с двумя ячейками GRU справляется лучше всего: loss 0.1270

GRU 1 layer - 0.1427
LSTM 1 layer - 0.2570
LSTM 2 layer - 0.2864