In [1]:
%matplotlib inline

In [2]:
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
import pandas as pd

df = pd.read_csv("rus.txt", sep='\t', header=None).drop(2, axis=1)

In [5]:
df[0] = df[0].apply(lambda x: re.sub(r'[^a-z ]+', '', x.lower()))
df[1] = df[1].apply(lambda x: re.sub(r'[^а-яё ]+', '', x.lower()))
df.sample(10)

Unnamed: 0,0,1
24971,they wont come,они не придут
299980,tom didnt know what he was doing,том не знал что делает
194532,did you get your money back,ты вернул свои деньги
297786,my french teacher was very strict,мой учитель французского был очень строгий
22642,i won the event,я выиграл в этом мероприятии
106487,what a touching story,какая трогательная история
105502,tom was wearing jeans,том был в джинсах
376807,it is getting colder and colder day by day,с каждым днём холодает
73391,tom hid his weapons,том спрятал своё оружие
344728,we have had a lot of rain this summer,этим летом у нас было много дождей


In [6]:
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[0].startswith(eng_prefixes)

pairs = [p for p in zip(df[0].tolist(), df[1].tolist()) if filterPair(p)]
print(len(pairs), random.choice(pairs))

4650 ('he is still young', 'он ещё молодой')


In [7]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [8]:
def prepareData(lang1, lang2, pairs, reverse=False):
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('eng', 'rus', pairs)
print(random.choice(pairs))

Counted words:
eng 2190
rus 4163
('i am so sorry to have kept you waiting', 'мне так жаль что я заставила вас ждать')


The Encoder
-----------





In [9]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [10]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [11]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [12]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [13]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [14]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [15]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

In [16]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

# DecoderScalar



In [46]:
# class AttnDecoderRNN(nn.Module):
#     def __init__(self, hidden_size, output_size, dropout_p=0.1,
#                  max_length=MAX_LENGTH):
#         super(AttnDecoderRNN, self).__init__()
#         self.hidden_size = hidden_size
#         self.output_size = output_size
#         self.dropout_p = dropout_p
#         self.max_length = max_length + 2

#         self.embedding = nn.Embedding(self.output_size, self.hidden_size)
#         self.dropout = nn.Dropout(self.dropout_p)
#         self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
#         self.gru = nn.GRU(self.hidden_size, self.hidden_size)
#         self.out = nn.Linear(self.hidden_size, self.output_size)

#     def forward(self, input, hidden, encoder_outputs):
#         embedded = self.embedding(input).view(1, 1, -1)
#         embedded = self.dropout(embedded)

#         weights = []
#         for i in range(len(encoder_outputs)):
#             weights.append(
#                 torch.div(torch.matmul(hidden[0][0], encoder_outputs[i]),
#                           torch.sqrt(
#                               torch.tensor(self.max_length, dtype=torch.float,
#                                            device=device))
#                           )
#                 )
#         attn_weights = F.softmax(torch.tensor(weights, device=device))

#         attn_applied = torch.bmm(attn_weights.unsqueeze(0).unsqueeze(0),
#                                  encoder_outputs.view(1, -1, self.hidden_size)
#                                  )

#         output = torch.cat((attn_applied[0], embedded[0]), 1)
#         output = self.attn_combine(output).unsqueeze(0)

#         output = F.relu(output)
#         output, hidden = self.gru(output, hidden)

#         output = F.log_softmax(self.out(output[0]), dim=1)
#         return output, hidden, attn_weights

#     def initHidden(self):
#         return torch.zeros(1, 1, self.hidden_size, device=device)


class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)
        weights = torch.div(
            torch.matmul(encoder_outputs, hidden[0][0]),
            torch.sqrt(torch.tensor(self.max_length, dtype=torch.float, device=device))
        )
        
        attn_weights = F.softmax(weights)
        attn_applied = torch.bmm(attn_weights.view(1, 1, -1), encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [47]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, 75000, print_every=5000)



0m 58s (- 13m 32s) (5000 6%) 4.1367
1m 55s (- 12m 28s) (10000 13%) 3.5649
2m 53s (- 11m 32s) (15000 20%) 2.9518
3m 51s (- 10m 35s) (20000 26%) 2.4082
4m 49s (- 9m 39s) (25000 33%) 1.9368
5m 48s (- 8m 42s) (30000 40%) 1.5792
6m 47s (- 7m 45s) (35000 46%) 1.3195
7m 46s (- 6m 47s) (40000 53%) 1.0611
8m 45s (- 5m 50s) (45000 60%) 0.9149
9m 45s (- 4m 52s) (50000 66%) 0.7642
10m 44s (- 3m 54s) (55000 73%) 0.6736
11m 43s (- 2m 55s) (60000 80%) 0.6037
12m 43s (- 1m 57s) (65000 86%) 0.5245
13m 42s (- 0m 58s) (70000 93%) 0.4769
14m 42s (- 0m 0s) (75000 100%) 0.4651


In [48]:
evaluateRandomly(encoder1, attn_decoder1)

> you arent allowed to go into that room
= тебе в эту комнату нельзя
< вам в нельзя комнату нельзя <EOS>

> you arent alone
= вы не одни
< ты не один <EOS>

> she seldom if ever goes to movies by herself
= она если и ходит одна в кино то редко
< она редко если вообще одна на то редко <EOS>

> you are in part responsible for it
= частично за это ответственны вы
< частично за это отвечаете вы <EOS>

> i am too busy to go
= я слишком занят чтобы идти
< я слишком занят чтобы идти <EOS>

> he is ready to work
= он готов к работе
< он готов к работе <EOS>

> he is boiling with rage
= в нём кипит ярость
< в нём кипит ярость <EOS>

> they are supposed to obey the orders
= они обязаны подчиняться приказам
< они обязаны подчиняться приказам <EOS>

> he is a close friend of my brother
= он близкий друг моего брата
< он близкий друг моего брата <EOS>

> they are taking a walk
= они прогуливаются
< они совершают пешую прогулку <EOS>





# DecoderMLP



In [None]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [49]:
hidden_size = 256
encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, 75000, print_every=5000)



0m 59s (- 13m 54s) (5000 6%) 4.1034
1m 57s (- 12m 43s) (10000 13%) 3.5429
2m 55s (- 11m 43s) (15000 20%) 2.9226
3m 55s (- 10m 47s) (20000 26%) 2.3744
4m 54s (- 9m 49s) (25000 33%) 1.9125
5m 54s (- 8m 51s) (30000 40%) 1.5583
6m 54s (- 7m 53s) (35000 46%) 1.2843
7m 53s (- 6m 54s) (40000 53%) 1.0651
8m 53s (- 5m 55s) (45000 60%) 0.8865
9m 53s (- 4m 56s) (50000 66%) 0.7709
10m 53s (- 3m 57s) (55000 73%) 0.6567
11m 52s (- 2m 58s) (60000 80%) 0.5772
12m 52s (- 1m 58s) (65000 86%) 0.5130
13m 53s (- 0m 59s) (70000 93%) 0.4866
14m 53s (- 0m 0s) (75000 100%) 0.4426


In [50]:
evaluateRandomly(encoder1, attn_decoder1)

> i am too short
= я слишком низкий
< я слишком низкий <EOS>

> i am glad that you have returned safe
= я рад что ты вернулся целым и невредимым
< я рад что вы вернулись целыми и невредимыми <EOS>

> they are manufactured in various sizes
= они произведены в различных размерах
< они произведены в различных размерах <EOS>

> he is a famous composer
= он знаменитый композитор
< он известный композитор <EOS>

> he is respected by everyone
= его все уважают
< его все уважают <EOS>

> i am allowed  yen a month for books
= мне положено   иен в месяц на книги
< мне положено   иен в месяц на книги <EOS>

> i am a stranger here
= я здесь человек новый
< я здесь человек новый <EOS>

> i am painting my garage
= я крашу свой гараж
< я крашу свой гараж <EOS>

> he is at the office
= он в офисе
< он в офисе <EOS>

> she is toms older sister
= она старшая сестра тома
< она старшая сестра тома <EOS>



