In [140]:
import tensorflow as tf

In [141]:
import numpy as np

import os
import pickle
import copy
import numpy as np

In [142]:
CODES = {'<PAD>': 0, '<EOS>': 1, '<UNK>': 2, '<GO>': 3 }
def create_lookup_table(vocab):
    # make a list of unique characters
    
    vocab = set(list(vocab))
    vocab_to_int = copy.copy(CODES)
    for v_i, v in enumerate(vocab, len(CODES)):
            vocab_to_int[v] = v_i
            
    int_to_vocab = {v_i: v for v, v_i in vocab_to_int.items()}
        
    return vocab_to_int, int_to_vocab

In [143]:
def text_to_ids(source_words, target_words, source_vocab_to_int, target_vocab_to_int):
    
        #1st, 2nd args: raw string text to be converted
        #3rd, 4th args: lookup tables for 1st and 2nd args respectively
    
        #return: A tuple of lists (source_id_text, target_id_text) converted
    
    # empty list of converted words
    source_text_id = []
    target_text_id = []
    
    max_source_word_length = max([len(word) for word in source_words])
    max_target_word_length = max([len(word) for word in target_words])
    
    # iterating through each word (# of words in source&target is the same)
    for i in range(len(source_words)):
        # extract words one by one
        source_word = source_words[i]
        target_word = target_words[i]
        
        # make a list of characters (extraction) from the chosen word
        source_tokens = list(source_word)
        target_tokens = list(target_word)
        
        # empty list of converted words to index in the chosen word
        source_token_id = []
        target_token_id = []
        
        for index, token in enumerate(source_tokens):
                source_token_id.append(source_vocab_to_int[token])
        
        for index, token in enumerate(target_tokens):
                target_token_id.append(target_vocab_to_int[token])
                
        # put <EOS> token at the end of the chosen target word
        # this token suggests when to stop creating a sequence
        target_token_id.append(target_vocab_to_int['<EOS>'])
            
        # add each converted words in the final list
        source_text_id.append(source_token_id)
        target_text_id.append(target_token_id)
    
    return source_text_id, target_text_id

In [148]:
def preprocess(source_text, target_text):


    Englishvocab = 'abcdefghijklmnopqrstuvwxyz'    
    Hindivocab = 'ँंॉॆॊॏऺऻॎःािीुूेैोौअआइईउऊएऐओऔकखगघचछजझटठडढणतथदधनपफबभमयरलवशषसहज्ञक्षश्रज़रफ़ड़ढ़ख़क़ग़ळृृ़़ऑ'
    # create lookup tables for English and Hindi data
    source_vocab_to_int, source_int_to_vocab = create_lookup_table(Hindivocab)
    target_vocab_to_int, target_int_to_vocab = create_lookup_table(Englishvocab)

    # create list of words whose characters are represented in index
    source_text, target_text = text_to_ids(source_text, target_text, source_vocab_to_int, target_vocab_to_int)
    
     # Save data for later use
    #pickle.dump((
        #(source_text, target_text),
      #  (source_vocab_to_int, target_vocab_to_int),
       # (source_int_to_vocab, target_int_to_vocab)), open('preprocessTest.p', 'wb'))
    
    return source_text,target_text,source_vocab_to_int,target_vocab_to_int,source_int_to_vocab,target_int_to_vocab

In [149]:
import pandas as pd
dataset = pd.read_csv("transliteration.txt",delimiter = "\t",header=None,encoding='utf-8',na_filter = False)
X = dataset.iloc[:,-1]
y = dataset.iloc[:,0]


In [150]:
def load_preprocess():
    with open('preprocessTest.p', mode='rb') as in_file:
        return pickle.load(in_file)

In [151]:
source_int_text, target_int_text, source_vocab_to_int, target_vocab_to_int,source_int_to_vocab,target_int_to_vocab = preprocess(X,y)

In [152]:

from distutils.version import LooseVersion
import warnings
import tensorflow as tf
from tensorflow.python.layers.core import Dense

In [153]:
# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.1'), 'Please use TensorFlow version 1.1 or newer'
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.14.0


  import sys


In [154]:
def enc_dec_model_inputs():
    inputs = tf.placeholder(tf.int32, [None, None], name='input')
    targets = tf.placeholder(tf.int32, [None, None], name='targets') 
    
    target_sequence_length = tf.placeholder(tf.int32, [None], name='target_sequence_length')
    max_target_len = tf.reduce_max(target_sequence_length)    
    
    return inputs, targets, target_sequence_length, max_target_len

In [155]:
def hyperparam_inputs():
    lr_rate = tf.placeholder(tf.float32, name='lr_rate')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    return lr_rate, keep_prob

In [156]:
def process_decoder_input(target_data, target_vocab_to_int, batch_size):
    """
    Preprocess target data for encoding
    :return: Preprocessed target data
    """
    # get '<GO>' id
    go_id = target_vocab_to_int['<GO>']
    
    after_slice = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1])
    after_concat = tf.concat( [tf.fill([batch_size, 1], go_id), after_slice], 1)
    
    return after_concat

In [157]:

def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob, 
                   source_vocab_size, 
                   encoding_embedding_size):
    """
    :return: tuple (RNN output, RNN state)
    """
    embed = tf.contrib.layers.embed_sequence(rnn_inputs, 
                                             vocab_size=source_vocab_size, 
                                             embed_dim=encoding_embedding_size)
    
    stacked_cells = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(rnn_size), keep_prob) for _ in range(num_layers)])
    
    outputs, state = tf.nn.dynamic_rnn(stacked_cells, 
                                       embed, 
                                       dtype=tf.float32)
    return outputs, state

In [158]:
def decoding_layer_train(encoder_state, dec_cell, dec_embed_input, 
                         target_sequence_length, max_summary_length, 
                         output_layer, keep_prob):
    """
    Create a training process in decoding layer 
    :return: BasicDecoderOutput containing training logits and sample_id
    """
    dec_cell = tf.contrib.rnn.DropoutWrapper(dec_cell, 
                                             output_keep_prob=keep_prob)
    
    # for only input layer
    helper = tf.contrib.seq2seq.TrainingHelper(dec_embed_input, 
                                               target_sequence_length)
    
    decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, 
                                              helper, 
                                              encoder_state, 
                                              output_layer)

    # unrolling the decoder layer
    outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, 
                                                      impute_finished=True, 
                                                      maximum_iterations=max_summary_length)
    return outputs

In [159]:
def decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id,
                         end_of_sequence_id, max_target_sequence_length,
                         vocab_size, output_layer, batch_size, keep_prob):
    """
    Create a inference process in decoding layer 
    :return: BasicDecoderOutput containing inference logits and sample_id
    """
    dec_cell = tf.contrib.rnn.DropoutWrapper(dec_cell, 
                                             output_keep_prob=keep_prob)
    
    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(dec_embeddings, 
                                                      tf.fill([batch_size], start_of_sequence_id), 
                                                      end_of_sequence_id)
    
    decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, 
                                              helper, 
                                              encoder_state, 
                                              output_layer)
    
    outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, 
                                                      impute_finished=True, 
                                                      maximum_iterations=max_target_sequence_length)
    return outputs

In [160]:
def decoding_layer(dec_input, encoder_state,
                   target_sequence_length, max_target_sequence_length,
                   rnn_size,
                   num_layers, target_vocab_to_int, target_vocab_size,
                   batch_size, keep_prob, decoding_embedding_size):
    """
    Create decoding layer
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    target_vocab_size = len(target_vocab_to_int)
    dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)
    
    cells = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.LSTMCell(rnn_size) for _ in range(num_layers)])
    
    with tf.variable_scope("decode"):
        output_layer = tf.layers.Dense(target_vocab_size)
        train_output = decoding_layer_train(encoder_state, 
                                            cells, 
                                            dec_embed_input, 
                                            target_sequence_length, 
                                            max_target_sequence_length, 
                                            output_layer, 
                                            keep_prob)

    with tf.variable_scope("decode", reuse=True):
        infer_output = decoding_layer_infer(encoder_state, 
                                            cells, 
                                            dec_embeddings, 
                                            target_vocab_to_int['<GO>'], 
                                            target_vocab_to_int['<EOS>'], 
                                            max_target_sequence_length, 
                                            target_vocab_size, 
                                            output_layer,
                                            batch_size,
                                            keep_prob)

    return (train_output, infer_output)

In [161]:
def seq2seq_model(input_data, target_data, keep_prob, batch_size,
                  target_sequence_length,
                  max_target_sentence_length,
                  source_vocab_size, target_vocab_size,
                  enc_embedding_size, dec_embedding_size,
                  rnn_size, num_layers, target_vocab_to_int):
    """
    Build the Sequence-to-Sequence model
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    enc_outputs, enc_states = encoding_layer(input_data, 
                                             rnn_size, 
                                             num_layers, 
                                             keep_prob, 
                                             source_vocab_size, 
                                             enc_embedding_size)
    
    dec_input = process_decoder_input(target_data, 
                                      target_vocab_to_int, 
                                      batch_size)
    
    train_output, infer_output = decoding_layer(dec_input,
                                               enc_states, 
                                               target_sequence_length, 
                                               max_target_sentence_length,
                                               rnn_size,
                                              num_layers,
                                              target_vocab_to_int,
                                              target_vocab_size,
                                              batch_size,
                                              keep_prob,
                                              dec_embedding_size)
    
    return train_output, infer_output

In [197]:
display_step = 200

epochs = 60
batch_size = 30

rnn_size = 64
num_layers = 2

encoding_embedding_size = 50
decoding_embedding_size = 50

learning_rate = 0.001
keep_probability = 0.5

In [198]:
save_path = 'checkpointsNew2/dev'
source_int_text, target_int_text, source_vocab_to_int, target_vocab_to_int,source_int_to_vocab,target_int_to_vocab = preprocess(X,y)
max_target_sentence_length = max([len(sentence) for sentence in source_int_text])

train_graph = tf.Graph()
with train_graph.as_default():
    input_data, targets, target_sequence_length, max_target_sequence_length = enc_dec_model_inputs()
    lr, keep_prob = hyperparam_inputs()
    
    train_logits, inference_logits = seq2seq_model(tf.reverse(input_data, [-1]),
                                                   targets,
                                                   keep_prob,
                                                   batch_size,
                                                   target_sequence_length,
                                                   max_target_sequence_length,
                                                   len(source_vocab_to_int),
                                                   len(target_vocab_to_int),
                                                   encoding_embedding_size,
                                                   decoding_embedding_size,
                                                   rnn_size,
                                                   num_layers,
                                                   target_vocab_to_int)
    
    training_logits = tf.identity(train_logits.rnn_output, name='logits')
    inference_logits = tf.identity(inference_logits.sample_id, name='predictions')

    # https://www.tensorflow.org/api_docs/python/tf/sequence_mask
    # - Returns a mask tensor representing the first N positions of each cell.
    masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')

    with tf.name_scope("optimization"):
        # Loss function - weighted softmax cross entropy
        cost = tf.contrib.seq2seq.sequence_loss(
            training_logits,
            targets,
            masks)

        # Optimizer
        optimizer = tf.train.AdamOptimizer(lr)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)





In [199]:
def pad_word_batch(word_batch, pad_int):
    
    max_word = max([len(word) for word in word_batch])
    return [word + [pad_int] * (max_word - len(word)) for word in word_batch]


def get_batches(sources, targets, batch_size, source_pad_int, target_pad_int):
    
    for batch_i in range(0, len(sources)//batch_size):
        start_i = batch_i * batch_size

        # Slice the right amount for the batch
        sources_batch = sources[start_i:start_i + batch_size]
        targets_batch = targets[start_i:start_i + batch_size]

        # Pad
        pad_sources_batch = np.array(pad_word_batch(sources_batch, source_pad_int))
        pad_targets_batch = np.array(pad_word_batch(targets_batch, target_pad_int))
        
        
        # Need the lengths for the _lengths parameters
        pad_targets_lengths = []
        for target in pad_targets_batch:
            pad_targets_lengths.append(len(target))

        pad_source_lengths = []
        for source in pad_sources_batch:
            pad_source_lengths.append(len(source))

        yield pad_sources_batch, pad_targets_batch, pad_source_lengths, pad_targets_lengths
        
def get_accuracy(target, logits):
    
    #Calculate accuracy
    
    max_seq = max(target.shape[1], logits.shape[1])
    if max_seq - target.shape[1]:
        target = np.pad(
            target,
            [(0,0),(0,max_seq - target.shape[1])],
            'constant')
    if max_seq - logits.shape[1]:
        logits = np.pad(
            logits,
            [(0,0),(0,max_seq - logits.shape[1])],
            'constant')

    return np.mean(np.equal(target, logits))

In [200]:
train_source = source_int_text[batch_size:]
train_target = target_int_text[batch_size:]
valid_source = source_int_text[:batch_size]
valid_target = target_int_text[:batch_size]

In [201]:
(valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths ) = next(get_batches(valid_source,
                                                                                                             valid_target,
                                                                                                             batch_size,
                                                                                                             source_vocab_to_int['<PAD>'],
                                                                                                             target_vocab_to_int['<PAD>']))

In [202]:
with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch_i in range(epochs):
        
        for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in enumerate(
                get_batches(train_source, train_target, batch_size,
                            source_vocab_to_int['<PAD>'],
                            target_vocab_to_int['<PAD>'])):

            _, loss = sess.run(
                [train_op, cost],
                {input_data: source_batch,
                 targets: target_batch,
                 lr: learning_rate,
                 target_sequence_length: targets_lengths,
                 keep_prob: keep_probability})
            

            if batch_i % display_step == 0 and batch_i > 0:
                
                batch_train_logits = sess.run(
                    inference_logits,
                    {input_data: source_batch,
                     target_sequence_length: targets_lengths,
                     keep_prob: 1.0})

                batch_valid_logits = sess.run(
                    inference_logits,
                    {input_data: valid_sources_batch,
                     target_sequence_length: valid_targets_lengths,
                     keep_prob: 1.0})

                train_acc = get_accuracy(target_batch, batch_train_logits)
                valid_acc = get_accuracy(valid_targets_batch, batch_valid_logits)
                print('Epoch {:>3} Batch {:>3}/{} - Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f}, Loss: {:>6.4f}'
                      .format(epoch_i+1, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, loss))

    # Save Model
    saver = tf.train.Saver()
    saver.save(sess, save_path)
    print('Model Trained and saved')
    


Epoch   1 Batch 200/1027 - Train Accuracy: 0.4267, Validation Accuracy: 0.3519, Loss: 2.0066
Epoch   1 Batch 400/1027 - Train Accuracy: 0.3667, Validation Accuracy: 0.1889, Loss: 1.6517
Epoch   1 Batch 600/1027 - Train Accuracy: 0.4667, Validation Accuracy: 0.3556, Loss: 1.4345
Epoch   1 Batch 800/1027 - Train Accuracy: 0.4424, Validation Accuracy: 0.4148, Loss: 1.5155
Epoch   1 Batch 1000/1027 - Train Accuracy: 0.5467, Validation Accuracy: 0.4815, Loss: 1.1068
Epoch   2 Batch 200/1027 - Train Accuracy: 0.4867, Validation Accuracy: 0.5000, Loss: 1.3517
Epoch   2 Batch 400/1027 - Train Accuracy: 0.5455, Validation Accuracy: 0.5407, Loss: 1.0412
Epoch   2 Batch 600/1027 - Train Accuracy: 0.6697, Validation Accuracy: 0.6333, Loss: 0.9529
Epoch   2 Batch 800/1027 - Train Accuracy: 0.5576, Validation Accuracy: 0.6407, Loss: 1.0882
Epoch   2 Batch 1000/1027 - Train Accuracy: 0.6467, Validation Accuracy: 0.6630, Loss: 0.7956
Epoch   3 Batch 200/1027 - Train Accuracy: 0.6167, Validation Accura

Epoch  18 Batch 800/1027 - Train Accuracy: 0.7939, Validation Accuracy: 0.7556, Loss: 0.2389
Epoch  18 Batch 1000/1027 - Train Accuracy: 0.8778, Validation Accuracy: 0.7630, Loss: 0.1702
Epoch  19 Batch 200/1027 - Train Accuracy: 0.7567, Validation Accuracy: 0.7370, Loss: 0.3205
Epoch  19 Batch 400/1027 - Train Accuracy: 0.8606, Validation Accuracy: 0.7519, Loss: 0.1822
Epoch  19 Batch 600/1027 - Train Accuracy: 0.8788, Validation Accuracy: 0.7556, Loss: 0.2327
Epoch  19 Batch 800/1027 - Train Accuracy: 0.8273, Validation Accuracy: 0.7630, Loss: 0.2177
Epoch  19 Batch 1000/1027 - Train Accuracy: 0.8689, Validation Accuracy: 0.7667, Loss: 0.1780
Epoch  20 Batch 200/1027 - Train Accuracy: 0.7467, Validation Accuracy: 0.7333, Loss: 0.3115
Epoch  20 Batch 400/1027 - Train Accuracy: 0.8303, Validation Accuracy: 0.7667, Loss: 0.1759
Epoch  20 Batch 600/1027 - Train Accuracy: 0.8879, Validation Accuracy: 0.7222, Loss: 0.2222
Epoch  20 Batch 800/1027 - Train Accuracy: 0.8030, Validation Accura

Epoch  36 Batch 400/1027 - Train Accuracy: 0.8697, Validation Accuracy: 0.7519, Loss: 0.1827
Epoch  36 Batch 600/1027 - Train Accuracy: 0.8636, Validation Accuracy: 0.7630, Loss: 0.2497
Epoch  36 Batch 800/1027 - Train Accuracy: 0.8333, Validation Accuracy: 0.7519, Loss: 0.1779
Epoch  36 Batch 1000/1027 - Train Accuracy: 0.8733, Validation Accuracy: 0.7667, Loss: 0.1574
Epoch  37 Batch 200/1027 - Train Accuracy: 0.7433, Validation Accuracy: 0.7519, Loss: 0.3296
Epoch  37 Batch 400/1027 - Train Accuracy: 0.8697, Validation Accuracy: 0.7519, Loss: 0.2081
Epoch  37 Batch 600/1027 - Train Accuracy: 0.8636, Validation Accuracy: 0.7741, Loss: 0.2212
Epoch  37 Batch 800/1027 - Train Accuracy: 0.8000, Validation Accuracy: 0.7593, Loss: 0.1780
Epoch  37 Batch 1000/1027 - Train Accuracy: 0.8756, Validation Accuracy: 0.7593, Loss: 0.1384
Epoch  38 Batch 200/1027 - Train Accuracy: 0.7767, Validation Accuracy: 0.7704, Loss: 0.3057
Epoch  38 Batch 400/1027 - Train Accuracy: 0.8606, Validation Accura

Epoch  53 Batch 1000/1027 - Train Accuracy: 0.8889, Validation Accuracy: 0.7630, Loss: 0.1012
Epoch  54 Batch 200/1027 - Train Accuracy: 0.7567, Validation Accuracy: 0.7741, Loss: 0.2693
Epoch  54 Batch 400/1027 - Train Accuracy: 0.8727, Validation Accuracy: 0.7593, Loss: 0.1384
Epoch  54 Batch 600/1027 - Train Accuracy: 0.8788, Validation Accuracy: 0.7593, Loss: 0.1798
Epoch  54 Batch 800/1027 - Train Accuracy: 0.8576, Validation Accuracy: 0.7593, Loss: 0.1646
Epoch  54 Batch 1000/1027 - Train Accuracy: 0.9022, Validation Accuracy: 0.7741, Loss: 0.1078
Epoch  55 Batch 200/1027 - Train Accuracy: 0.7567, Validation Accuracy: 0.7741, Loss: 0.2604
Epoch  55 Batch 400/1027 - Train Accuracy: 0.8212, Validation Accuracy: 0.7593, Loss: 0.1670
Epoch  55 Batch 600/1027 - Train Accuracy: 0.8939, Validation Accuracy: 0.7593, Loss: 0.2029
Epoch  55 Batch 800/1027 - Train Accuracy: 0.8394, Validation Accuracy: 0.7556, Loss: 0.1358
Epoch  55 Batch 1000/1027 - Train Accuracy: 0.8844, Validation Accur

In [203]:
# Save the parameters   
def save_params(params):
    with open('paramsTestNew.p', 'wb') as out_file:
        pickle.dump(params, out_file)

save_params(save_path)
def load_params():
    with open('paramsTestNew.p', mode='rb') as in_file:
        return pickle.load(in_file)

In [204]:
source_int_text, target_int_text, source_vocab_to_int, target_vocab_to_int,source_int_to_vocab,target_int_to_vocab = preprocess(X,y)


In [205]:
load_path = load_params()


In [206]:
load_path

'checkpointsNew2/dev'

In [207]:
batch_size = 30

#converting the words to vectors of integers
def word_to_seq(word, vocab_to_int):
    results = []
    for word in list(word):
        if word in vocab_to_int:
            results.append(vocab_to_int[word])
        else:
            results.append(vocab_to_int['<UNK>'])
            
    return results

#taking user input for prediction
print("\n Enter word to be transliterated:")
transliterate_word = 'जामिया'
transliterate_word = word_to_seq(transliterate_word, source_vocab_to_int)

#initialising the graph
loaded_graph = tf.Graph()

#initialising the session
with tf.Session(graph=loaded_graph) as sess:
        
    # Load saved model
    loader = tf.train.import_meta_graph(load_path + '.meta')
    
    #tf.train.Saver.restore(sess,load_path)
    loader.restore(sess, load_path)

#providing placeholder names from the loaded graph
    input_data = loaded_graph.get_tensor_by_name('input:0')
    logits = loaded_graph.get_tensor_by_name('predictions:0')
    target_sequence_length = loaded_graph.get_tensor_by_name('target_sequence_length:0')
    keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')

#transliterating the given word
    transliterate_logits = sess.run(logits, {input_data: [transliterate_word]*batch_size,
                                         target_sequence_length: [len(transliterate_word)]*batch_size,
                                         keep_prob: 1.0})[0]

print('Input')
print('  Word Ids:      {}'.format([i for i in transliterate_word]))
print('  Hindi Word: {}'.format([source_int_to_vocab[i] for i in transliterate_word]))

print('\nPrediction')
print('  Word Id:      {}'.format([i for i in transliterate_logits]))

#showing the output
output = ""
for i in transliterate_logits:
        if target_int_to_vocab[i]!= '<EOS>':
                output = output + target_int_to_vocab[i]
print('  English Word:      {}'.format(output))


 Enter word to be transliterated:
INFO:tensorflow:Restoring parameters from checkpointsNew2/dev
Input
  Word Ids:      [31, 74, 26, 52, 11, 74]
  Hindi Word: ['ज', 'ा', 'म', 'ि', 'य', 'ा']

Prediction
  Word Id:      [15, 12, 12, 21, 18, 27]
  English Word:      jaamiy


In [211]:
create_lookup_table('ॉॆॊॏऺऻॎःािीुूेैोौअआइईउऊएऐओऔकखगघचछजझटठडढणतथदधनपफबभमयरलवशषसहज्ञक्षश्रज़रफ़ड़ढ़ख़क़ग़ळृृ़़ऑ')


({'<PAD>': 0,
  '<EOS>': 1,
  '<UNK>': 2,
  '<GO>': 3,
  'ॉ': 4,
  'ख': 5,
  'ू': 6,
  'ॆ': 7,
  'ः': 8,
  'स': 9,
  'ह': 10,
  'य': 11,
  'छ': 12,
  'ल': 13,
  'श': 14,
  'ऻ': 15,
  'ण': 16,
  'ॊ': 17,
  'ु': 18,
  'ई': 19,
  'त': 20,
  'ॏ': 21,
  'ट': 22,
  'ृ': 23,
  'ो': 24,
  'म': 25,
  'ड': 26,
  'ौ': 27,
  'उ': 28,
  'ए': 29,
  'ज': 30,
  'ै': 31,
  'ञ': 32,
  'च': 33,
  'ग़': 34,
  'प': 35,
  'ऑ': 36,
  'ठ': 37,
  'ळ': 38,
  'औ': 39,
  'ध': 40,
  'व': 41,
  'थ': 42,
  'ऐ': 43,
  'न': 44,
  '़': 45,
  'क़': 46,
  'द': 47,
  'ऺ': 48,
  'अ': 49,
  'ि': 50,
  'ड़': 51,
  'क': 52,
  'इ': 53,
  'र': 54,
  'भ': 55,
  'आ': 56,
  'े': 57,
  'घ': 58,
  'ख़': 59,
  'ॎ': 60,
  'ढ': 61,
  'ज़': 62,
  'ी': 63,
  'फ़': 64,
  'फ': 65,
  '्': 66,
  'झ': 67,
  'ग': 68,
  'ऊ': 69,
  'ष': 70,
  'ढ़': 71,
  'ा': 72,
  'ओ': 73,
  'ब': 74},
 {0: '<PAD>',
  1: '<EOS>',
  2: '<UNK>',
  3: '<GO>',
  4: 'ॉ',
  5: 'ख',
  6: 'ू',
  7: 'ॆ',
  8: 'ः',
  9: 'स',
  10: 'ह',
  11: 'य',
  12: 'छ',
  13: 'ल',
  14: 'श',