Train the sequence to sequence model (Model 1) for a language pair (excluding French-English), where the output is English and the input is a language of your choice.

In [1]:
#Importing the required libraries for building the nueral network
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [3]:
# Turn a Unicode string to plain ASCII, thanks to

def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [4]:
from google.colab import files
uploaded = files.upload()

Saving hin-eng.zip to hin-eng.zip


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
!unzip hin-eng.zip -d hin-eng

Archive:  hin-eng.zip
  inflating: hin-eng/hin.txt         
  inflating: hin-eng/_about.txt      


In [7]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('/content/hin-eng/hin.txt', encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[s for s in l.split('\t')] for l in lines]
    l = []
    for p in pairs:
      l.append(p[0:2])
    pairs = l


    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [8]:
#There are mnay sentences and hence we are using short and simple sentences to train quickly and filtering accordingly eg "i am"

MAX_LENGTH = 26

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s ",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [9]:
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1,lang2,reverse)
    print("Read %s sentence pairs" % len(pairs))
    #pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


In [10]:
input_lang_hin, output_lang_eng, pairs = prepareData('eng','hin',True)
print(random.choice(pairs))

Reading lines...
Read 2934 sentence pairs
Trimmed to 2934 sentence pairs
Counting words...
Counted words:
hin 3179
eng 3530
['आठ से दस बजे के बीच हुआ।', 'It happened between eight and ten.']


In [11]:

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [12]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)





In [13]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [14]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang_hin, pair[0])
    target_tensor = tensorFromSentence(output_lang_eng, pair[1])
    return (input_tensor, target_tensor)

In [15]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [16]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [17]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [18]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [19]:
def evaluate_eng(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang_hin, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang_eng.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

In [20]:
def evaluateRandomly(encoder, decoder, n=20):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate_eng(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [21]:
hidden_size = 100
encoder_eng = EncoderRNN(input_lang_hin.n_words, hidden_size).to(device)
attn_decoder_eng = AttnDecoderRNN(hidden_size, output_lang_eng.n_words, dropout_p=0.1).to(device)

trainIters(encoder_eng, attn_decoder_eng, 75000, print_every=5000)

1m 42s (- 23m 55s) (5000 6%) 4.7785
3m 21s (- 21m 48s) (10000 13%) 4.5101
5m 1s (- 20m 6s) (15000 20%) 4.1837
6m 41s (- 18m 24s) (20000 26%) 3.8734
8m 27s (- 16m 55s) (25000 33%) 3.5735
10m 9s (- 15m 13s) (30000 40%) 3.2903
11m 51s (- 13m 33s) (35000 46%) 3.0551
13m 34s (- 11m 52s) (40000 53%) 2.8097
15m 18s (- 10m 12s) (45000 60%) 2.5975
17m 1s (- 8m 30s) (50000 66%) 2.4165
18m 45s (- 6m 49s) (55000 73%) 2.2291
20m 32s (- 5m 8s) (60000 80%) 2.0467
22m 18s (- 3m 25s) (65000 86%) 1.9539
24m 2s (- 1m 43s) (70000 93%) 1.8074
25m 46s (- 0m 0s) (75000 100%) 1.6891


In [22]:
evaluate_eng(encoder_eng, attn_decoder_eng, "उसने दरवाज़े के पास एक कुत्ता देखा।")

(['He', 'saw', 'a', 'dog', 'near', 'the', 'door.', '<EOS>'],
 tensor([[2.3266e-05, 3.4521e-06, 4.4568e-04, 9.1040e-06, 9.9706e-01, 7.9903e-04,
          1.6165e-04, 4.4120e-08, 5.8997e-04, 1.7568e-05, 4.2125e-09, 3.9673e-05,
          4.5501e-05, 4.1066e-05, 6.0750e-05, 1.9340e-04, 5.5782e-05, 4.7673e-05,
          4.1301e-05, 5.1636e-05, 6.2603e-05, 8.5850e-05, 1.1590e-05, 4.0929e-05,
          7.5914e-05, 4.1254e-05],
         [3.9088e-05, 1.5984e-05, 1.1676e-04, 3.6429e-05, 7.0703e-04, 9.9848e-01,
          1.4601e-04, 1.1208e-06, 7.9547e-07, 4.1566e-06, 2.0903e-07, 5.7608e-06,
          7.4587e-05, 1.2837e-05, 3.5362e-05, 6.8020e-06, 8.0225e-06, 3.7287e-05,
          9.1120e-06, 7.2973e-05, 2.9273e-05, 4.6223e-05, 2.1511e-05, 2.5770e-05,
          3.4320e-05, 3.3608e-05],
         [2.0943e-08, 8.1613e-08, 1.2803e-06, 2.5005e-08, 7.6300e-08, 9.9243e-08,
          9.9978e-01, 2.0733e-04, 2.6190e-07, 1.4579e-09, 6.3712e-10, 9.6988e-06,
          8.5041e-08, 8.8413e-08, 7.6236e-08, 1.7

In [23]:
evaluateRandomly(encoder_eng, attn_decoder_eng)

> वह रूसी बोलना और लिखना दोनो जानता है।
= He can both speak and write Russian.
< He treats that and and and <EOS>

> हो सकता है कि उसकी कहानी सच न हो।
= His story may not be true.
< His story may not be true. <EOS>

> मैं तुम्हारे बारे में सोच रहा हूँ।
= I'm thinking about you.
< I'm make your <EOS>

> उसने उसका प्रस्ताव खुशी से स्वीकार कर लिया।
= She gladly accepted his proposal.
< She accepted not sure of the <EOS>

> वह बीमारी की वजह से नहीं आ सका।
= She could not come because of her illness.
< He could not not because to illness. <EOS>

> यह चाकू तेज़ नहीं है।
= The knife is not sharp.
< This isn't not very <EOS>

> पापा बाथरूम में दाढ़ी बना रहे हैं।
= Dad is shaving in the bathroom.
< It is shaving in rooms. <EOS>

> उसने अपनी पत्नी को उसे सुबह सात बजे उठाने की बात का याद दिलाया।
= He reminded his wife to wake him up at 7:00 a.m.
< He reminded his wife to wake the money. <EOS>

> पागल मत बनो।
= Don't be absurd.
< Don't be absurd. <EOS>

> मुझे उसकी झलक दिखाई दी।
= I caught a glimp

Now train another model (Model 2) for the reverse (i.e., from English to the language you chose). In this model, use the GloVe 100 dimensional embeddings. See notebook 4, cell 2 for an example while training.


In [24]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1


In [25]:
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [29]:
from google.colab import files
uploaded = files.upload()

Saving hin-engR.zip to hin-engR.zip


In [30]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [31]:
!unzip hin-eng.zip -d hin-eng

Archive:  hin-eng.zip
replace hin-eng/hin.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: hin-eng/hin.txt         
  inflating: hin-eng/_about.txt      


In [35]:
def readLangs(lang1, lang2, reverse=True):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('/content/hin-eng/hin.txt', encoding='utf-8').\
        read().strip().split('\n')

    # Split every line into pairs and normalize
    pairs = [[s for s in l.split('\t')] for l in lines]
    l = []
    for p in pairs:
      l.append(p[0:2])
    pairs = l


    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [33]:
MAX_LENGTH = 26

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s ",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)


def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH and \
        p[1].startswith(eng_prefixes)


def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [36]:
def prepareData(lang1, lang2, reverse=True):
    input_lang, output_lang, pairs = readLangs(lang1,lang2,reverse)
    print("Read %s sentence pairs" % len(pairs))
    #pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

In [58]:
input_lang_engl, output_lang_hind, pairs = prepareData('eng','hin',False)
print(random.choice(pairs))

Reading lines...
Read 2934 sentence pairs
Trimmed to 2934 sentence pairs
Counting words...
Counted words:
eng 3530
hin 3179
["I'm longing to see him.", 'मैं उससे मिलने के लिए उत्सुक हूँ।']


In [62]:
class EncoderRNN1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN1, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [65]:
class DecoderRNN1(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN1, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)





In [64]:
class AttnDecoderRNN1(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN1, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [66]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair1(pair):
    input_tensor = tensorFromSentence(input_lang_engl, pair[0])
    target_tensor = tensorFromSentence(output_lang_hind, pair[1])
    return (input_tensor, target_tensor)

In [67]:
teacher_forcing_ratio = 0.5


def train1(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [68]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [69]:
def trainIters1(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [70]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [71]:
def evaluate_hind(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang_engl, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang_hind.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

In [72]:
def evaluateRandomly1(encoder, decoder, n=20):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate_hind(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [73]:
hidden_size = 100
encoder_hind = EncoderRNN1(input_lang_engl.n_words, hidden_size).to(device)
attn_decoder_hind = AttnDecoderRNN1(hidden_size, output_lang_hind.n_words, dropout_p=0.1).to(device)

trainIters1(encoder_hind, attn_decoder_hind, 75000, print_every=5000)

1m 47s (- 24m 58s) (5000 6%) 4.9423
3m 29s (- 22m 41s) (10000 13%) 4.7145
5m 13s (- 20m 54s) (15000 20%) 4.3446
6m 58s (- 19m 10s) (20000 26%) 4.0552
8m 43s (- 17m 27s) (25000 33%) 3.7258
10m 29s (- 15m 44s) (30000 40%) 3.4265
12m 16s (- 14m 1s) (35000 46%) 3.1160
14m 3s (- 12m 18s) (40000 53%) 2.8685
15m 50s (- 10m 33s) (45000 60%) 2.6487
17m 38s (- 8m 49s) (50000 66%) 2.4687
19m 25s (- 7m 3s) (55000 73%) 2.2793
21m 13s (- 5m 18s) (60000 80%) 2.0878
23m 0s (- 3m 32s) (65000 86%) 1.9977
24m 48s (- 1m 46s) (70000 93%) 1.8841
26m 35s (- 0m 0s) (75000 100%) 1.7308


In [75]:
evaluate_hind(encoder_hind, attn_decoder_hind, "He reminded his wife to wake the money.")

(['उसने', 'मेरी', 'पैसों', 'के', 'लिए', 'कहा।', '<EOS>'],
 tensor([[1.6726e-07, 2.1061e-07, 3.7565e-08, 1.7129e-06, 3.5030e-03, 9.9544e-01,
          4.8545e-06, 1.3129e-07, 4.3596e-09, 3.0264e-10, 1.0003e-03, 1.1400e-06,
          2.3294e-05, 8.3562e-07, 7.3721e-07, 1.5057e-06, 2.4991e-06, 1.6206e-06,
          6.8037e-07, 6.1450e-07, 9.2637e-07, 7.3787e-07, 1.1199e-05, 1.9549e-06,
          4.5299e-06, 8.8837e-07],
         [3.0154e-08, 3.3110e-10, 1.5241e-09, 4.0350e-09, 9.9996e-01, 1.4760e-06,
          3.3667e-07, 1.7651e-08, 4.8406e-12, 1.8393e-06, 3.2252e-05, 2.6598e-07,
          3.8362e-08, 1.1188e-07, 5.4164e-09, 1.4502e-07, 3.4157e-08, 4.7439e-08,
          7.3407e-08, 5.2747e-08, 2.4752e-08, 2.4757e-08, 1.0059e-07, 2.1607e-08,
          1.1306e-07, 2.6350e-08],
         [1.3806e-10, 4.1029e-11, 4.2814e-09, 3.2923e-09, 1.2322e-09, 1.7962e-09,
          1.0000e+00, 4.1646e-08, 1.1681e-11, 3.0454e-08, 1.8289e-07, 7.3502e-08,
          2.5566e-07, 5.9163e-09, 3.9816e-09, 1.8180

In [76]:
evaluateRandomly1(encoder_hind, attn_decoder_hind)

> I am able to drive a car.
= मैं गाड़ी चला सकता हूँ।
< मैं गाड़ी चला सकता हूँ। <EOS>

> He deals in furniture.
= वो फ़र्निचर का व्यापार करता है।
< उसके कपड़े का आग <EOS>

> I will explain it to him.
= मैं उसको यह बात समझाउँगा।
< मैं उसको यह बात समझाउँगा। यह तो मैं <EOS>

> I understand.
= मैं समझता हूँ।
< मैं समझता हूँ। <EOS>

> They were scolded by the teacher.
= उन्हें अपनी टीचर से डाँट पड़ी।
< उन्हें एक से का पालन <EOS>

> Will you turn on the light?
= बत्ती चालू कर दोगे क्या?
< बत्ती चालू कर क्या? <EOS>

> Tom wants to know why you didn't call him back.
= टॉम पूछ रहा है कि तुमने उसे कॉल बैक क्यों नहीं किया।
< टॉम पूछ कि कि टॉम वह क्यों <EOS>

> London is one of the largest cities in the world.
= लंदन दुनिया के सबसे बड़े शहरों में से एक है।
< दुनिया दुनिया से ऑस्ट्रेलिया से के में बहुत में है। <EOS>

> He is accustomed to hard work.
= उसे मेहनत करने की आदत है।
< उसको मेहनत करने की आदत है। है। <EOS>

> I was waiting for a taxi.
= मैं टैक्सी का इंतेज़ार कर रहा था।
< मैं कल एक का इंते

Input 5 well formed sentences from the English vocab to Model 2, and input the resultant translated sentences to Model 1. Display all model outputs in each case.

In [77]:
def translate_eng(input_sentence):
    output_words, attentions = evaluate_eng(
        encoder_eng, attn_decoder_eng, input_sentence)
    print('input =', input_sentence)
    print('output =', ' '.join(output_words))

In [80]:
translate_eng("मेरी चिंता मत करो।")

input = मेरी चिंता मत करो।
output = Don't worry about me. <EOS>


In [81]:
def translate_hind(input_sentence):
    output_words, attentions = evaluate_hind(
        encoder_hind, attn_decoder_hind, input_sentence)
    print('input =', input_sentence)
    print('output =', ' '.join(output_words))

In [82]:
translate_hind("How are you?")

input = How are you?
output = तुम कैसे हो? <EOS>


In [107]:
translate_hind("His story may not be true.")

input = His story may not be true.
output = घड़ी नहीं कि यह सच सच है। <EOS>


In [106]:
translate_hind("It happened between eight and ten.")

input = It happened between eight and ten.
output = आठ और दस बजे बजे <EOS>


In [108]:
translate_hind("Don't be absurd.")

input = Don't be absurd.
output = पागल मत बनो। <EOS>


In [92]:
translate_hind("They were scolded by the teacher.")

input = They were scolded by the teacher.
output = उन्हें एक अपनी से से डाँट <EOS>


Hindi-English-Hindi

In [165]:
def translate_all(input_sentence):
    output_words_eng, attentions = evaluate_eng(
        encoder_eng, attn_decoder_eng, input_sentence)
    output_words_eng.pop()
    output=' '.join( output_words_eng)
    output_words_hind, attentions = evaluate_hind(
        encoder_hind, attn_decoder_hind, output)
    
    
    print('input =', input_sentence)
    print('output =', ' '.join( output_words_eng))
    print('output1 =',' '.join( output_words_hind))
    

In [166]:
translate_all("तुम कैसे हो?")

input = तुम कैसे हो?
output = How are you?
output1 = तुम कैसे हो? <EOS>
