# Neural machine translation

In [1]:
import tensorflow as tf

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import unicodedata
import re
import numpy as np
import os
import io
import time
# 3. Бибилотека чтобы разделять грамотно иероглифы
import jieba

## Download and prepare the dataset

We'll use a language dataset provided by http://www.manythings.org/anki/

In [2]:
!wget http://www.manythings.org/anki/cmn-eng.zip

--2021-10-10 10:31:18--  http://www.manythings.org/anki/cmn-eng.zip
Resolving www.manythings.org (www.manythings.org)... 104.21.92.44, 172.67.186.54, 2606:4700:3033::ac43:ba36, ...
Connecting to www.manythings.org (www.manythings.org)|104.21.92.44|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1153006 (1.1M) [application/zip]
Saving to: ‘cmn-eng.zip’


2021-10-10 10:31:19 (3.88 MB/s) - ‘cmn-eng.zip’ saved [1153006/1153006]



In [3]:
!mkdir cn-eng
!unzip cmn-eng.zip -d cn-eng/

Archive:  cmn-eng.zip
  inflating: cn-eng/cmn.txt          
  inflating: cn-eng/_about.txt       


In [4]:

!ls /content/cn-eng/ -lah

total 3.7M
drwxr-xr-x 2 root root 4.0K Oct 10 10:31 .
drwxr-xr-x 1 root root 4.0K Oct 10 10:31 ..
-rw-r--r-- 1 root root 1.5K Jul 14 10:16 _about.txt
-rw-r--r-- 1 root root 3.6M Jul 14 10:16 cmn.txt


In [5]:
# Download the file
path_to_file = "/content/cn-eng/cmn.txt"

In [6]:
# 2. Потом я заметил что модель выплевывает пустые строки, думаю что надо разделять иероглифы каждый между собой. Добавим процедуру cutword и unicode_to_ascii в функцию preprocess_sentence
def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')
    
def cutword(sentence):
    output = []
    for word in jieba.cut(sentence, cut_all=False):
        output.append(word)
    output = ' '.join(output)
    return output

def preprocess_sentence(w):
  w = unicode_to_ascii(w.lower().strip())

  # creating a space between a word and the punctuation following it
  # eg: "he is a boy." => "he is a boy ."
  # Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation
  w = re.sub(r"([?.。!,])", r" \1 ", w)
  w = re.sub(r'[" "]+', " ", w)

  # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
  w = re.sub(r"[^a-zA-Z?.!,？。！，\u4e00-\u9FFF]+", " ", w)
  if len(re.findall('([a-z])',w)) == 0:
        try:
            w = cutword(w)
        except:
            pass
  

  # adding a start and an end token to the sentence
  # so that the model know when to start and stop predicting.
  w = '<start> ' + w + ' <end>'
  return w

In [7]:
def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')

# cut chinese
def cutword(sentence):
    output = []
    for word in jieba.cut(sentence, cut_all=False):
        output.append(word)
    output = ' '.join(output)
    return output

def preprocess_sentence(w):
    w = unicode_to_ascii(w.lower().strip())

    # creating a space between a word and the punctuation following it
    w = re.sub(r"([?.!,？。！，])", r" \1 ", w)
    # delete extra spaces
    w = re.sub(r'[" "]+', " ", w)

    # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    # as well as Chinese characters
    w = re.sub(r"[^a-zA-Z?.!,？。！，\u4e00-\u9FFF]+", " ", w)

    # cut words
    if len(re.findall('([a-z])',w)) == 0:
        try:
            w = cutword(w)
        except:
            pass

    # adding a start and an end token to the sentence
    # so that the model know when to start and stop predicting.
    w = '<start> ' + w + ' <end>'
    return w

In [8]:
cutword("我是你大爷。")

Building prefix dict from the default dictionary ...
Dumping model to file cache /tmp/jieba.cache
Loading model cost 0.830 seconds.
Prefix dict has been built successfully.


'我 是 你 大爷 。'

In [9]:
en_sentence = u"May I borrow this book?"
zh_sentence = u"我可以借这本书吗？"
print(preprocess_sentence(en_sentence))
print(preprocess_sentence(zh_sentence))

<start> may i borrow this book ?  <end>
<start> 我 可以 借 这 本书 吗   ？   <end>


In [10]:
def create_dataset(path, num_examples):
    lines = io.open(path, encoding='UTF-8').read().strip().split('\n')
    word_pairs = []
    for line in lines[:num_examples]:
        sentences = line.split('\t')[:2]
        word_pairs.append((preprocess_sentence(sentences[0]),
                           preprocess_sentence(sentences[1])))
    return word_pairs

In [11]:
for l,k in create_dataset(path_to_file, 10):
    print(l)
    print(k)
    print('English length: {}; Chinese length: {}'.format(len(l),len(k)))
    print('--' *30)

<start> hi .  <end>
<start> 嗨   。   <end>
English length: 19; Chinese length: 21
------------------------------------------------------------
<start> hi .  <end>
<start> 你好   。   <end>
English length: 19; Chinese length: 22
------------------------------------------------------------
<start> run .  <end>
<start> 你 用 跑 的   。   <end>
English length: 20; Chinese length: 27
------------------------------------------------------------
<start> wait !  <end>
<start> 等等   ！   <end>
English length: 21; Chinese length: 22
------------------------------------------------------------
<start> wait !  <end>
<start> 等 一下   ！   <end>
English length: 21; Chinese length: 24
------------------------------------------------------------
<start> begin .  <end>
<start> 开始   ！   <end>
English length: 22; Chinese length: 22
------------------------------------------------------------
<start> hello !  <end>
<start> 你好   。   <end>
English length: 22; Chinese length: 22
-------------------------------------------

In [12]:
english = []
chinese = []

for en, zh in create_dataset(path_to_file, None):
    english.append(en)
    chinese.append(zh)

print('--------- Original ---------')
with open (path_to_file) as f:
    for i in (f.read().split('\n')[-2].split('\t')):
        print(i)
print('')
print('--------- Processed ---------')
print(english[-1])
print(chinese[-1])
print('')
print('Number of English Sentences: ', len(english))
print('Number of Chinese sentences: ', len(chinese))

--------- Original ---------
If a person has not had a chance to acquire his target language by the time he's an adult, he's unlikely to be able to reach native speaker level in that language.
如果一個人在成人前沒有機會習得目標語言，他對該語言的認識達到母語者程度的機會是相當小的。
CC-BY 2.0 (France) Attribution: tatoeba.org #1230633 (alec) & #1205914 (cienias)

--------- Processed ---------
<start> if a person has not had a chance to acquire his target language by the time he s an adult , he s unlikely to be able to reach native speaker level in that language .  <end>
<start> 如果 一個 人 在 成人 前 沒 有 機會習 得 目標 語言   ，   他 對 該 語言 的 認識 達 到 母語者 程度 的 機會 是 相當 小 的   。   <end>

Number of English Sentences:  26388
Number of Chinese sentences:  26388


# Tokenization

In [13]:
# create a helper function to get the padded tensor length
# the default level(0.98) means that 98% of all sentences have fewer than n tokens
def get_pad_len(tensor, level=0.98): 
    n = 0
    while True:
        count = 0
        for i in tensor:
            if len(i) < n:
                count += 1
        if count / len(tensor) >= level:
            break
        n += 1
    return n

In [14]:
def to_tensor(lang, return_tensor=True, return_tokenizer=False):
    # Assigns the index (sequence) of each word in a text to X
    tokenizer = Tokenizer(filters=' ', oov_token='<OOV>') 
    tokenizer.fit_on_texts(lang)
    lang_tensor = tokenizer.texts_to_sequences(lang)
    lang_tensor = pad_sequences(lang_tensor,
                                maxlen=get_pad_len(lang_tensor), # use the previously created function
                                padding='post',
                                truncating='post') 
    if return_tensor:
        print('\nShape of data tensor:', lang_tensor.shape)
        return lang_tensor
    if return_tokenizer:
        return tokenizer

In [15]:
english_tokenizer = to_tensor(english, False, True)
english_tensor = to_tensor(english)
print('\nOriginal sentence:')
print(english[500])
print('\nTensor of the sentence:')
print(english_tensor[500])


Shape of data tensor: (26388, 17)

Original sentence:
<start> tom laughed .  <end>

Tensor of the sentence:
[  2  12 944   4   3   0   0   0   0   0   0   0   0   0   0   0   0]


In [16]:

chinese_tokenizer = to_tensor(chinese, False, True)
chinese_tensor = to_tensor(chinese)
print('\nOriginal sentence:')
print(chinese[500])
print('\nTensor of the sentence:')
print(chinese_tensor[500])


Shape of data tensor: (26388, 16)

Original sentence:
<start> 汤姆 笑 了   。   <end>

Tensor of the sentence:
[  2  14 324   7   4   3   0   0   0   0   0   0   0   0   0   0]


In [17]:
english_vocab_size = len(english_tokenizer.word_index) + 1
chinese_vocab_size = len(chinese_tokenizer.word_index) + 1

print('Found {} unique tokens in English.\n'.format(english_vocab_size))
print('Found {} unique tokens in Chinese.\n'.format(chinese_vocab_size))

name_dict = ['Chinese', 'English']
for idx, lang in enumerate([chinese_tokenizer, english_tokenizer]):
    print('The 10 most frequent tokens in {} are:'.format(name_dict[idx]))
    for idx, word in enumerate(lang.word_index):
        if word not in ['<OOV>' , '<start>', '<end>']:
            print(word, end='|')
        if idx == 13:
            break
    print('\n')

Found 6756 unique tokens in English.

Found 15400 unique tokens in Chinese.

The 10 most frequent tokens in Chinese are:
。|我|的|了|你|他|？|在|是|她|汤姆|

The 10 most frequent tokens in English are:
.|i|the|to|you|a|?|is|tom|t|he|



# Create Dataset

In [18]:
english_train, english_test, chinese_train, chinese_test = train_test_split(
    english_tensor, chinese_tensor, test_size=0.1)

In [19]:
BUFFER_SIZE = len(english_train)
BATCH_SIZE = 64
STEPS_PER_EPOCH = len(english_train)//BATCH_SIZE
EMBEDDING_DIM = 128
ENC_HIDDEN_DIM = 1024
DEC_HIDDEN_DIM = 1024

# creating a TensorFlow Dataset object 
dataset = tf.data.Dataset.from_tensor_slices((english_train, chinese_train)).shuffle(BUFFER_SIZE)

# batching
dataset = dataset.batch(BATCH_SIZE, drop_remainder=False)

In [20]:
print(chinese_vocab_size)
print(english_vocab_size)

15400
6756


# Encoder-decoder + Attantion

In [21]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_hidden_dim, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_hidden_dim = enc_hidden_dim
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    # 1. Добавил  tf.keras.layers.Dropout в encoder для нормализации.
    self.dropout = tf.keras.layers.Dropout(0.1)
    self.gru = tf.keras.layers.GRU(self.enc_hidden_dim,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    

  def call(self, x, init_state, training=True):
    x = self.embedding(x)
    x = self.dropout(x)
    output, hidden = self.gru(x, initial_state = init_state)
    return output, hidden

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_sz, self.enc_hidden_dim))

In [22]:
class Attention(tf.keras.layers.Layer):
    """ bahdanau-style assistive attention """

    def __init__(self, units):
        super(Attention, self).__init__()
        self.d1 = tf.keras.layers.Dense(units)
        self.d2 = tf.keras.layers.Dense(units)
        self.d3 = tf.keras.layers.Dense(1)

    def call(self, query, values):
        query_3d = tf.expand_dims(query, 1)

        scores = self.d3(tf.nn.tanh(
            self.d1(query_3d) + self.d2(values)))
        # values.shape == (64, 17, 1024)

        # tfa.seq2seq.LuongAttention
        # tfa.seq2seq.BahdanauAttention

        # axis 1 is the time axis, i.e. across multiple time steps
        attention_weights = tf.nn.softmax(scores, axis=1)

        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [39]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units):
    super(Decoder, self).__init__()
    self.dec_units = dec_units

    self.attention =  Attention(self.dec_units)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)
    self.dropout = tf.keras.layers.Dropout(0.1)

  def call(self, x, query, value, drop=True):
    # enc_output shape == (batch_size, max_length, hidden_size)

    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
    x = self.embedding(x)
    x = self.dropout(x)
    context_vector, attention_weights = self.attention(query, value)
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # passing the concatenated vector to the GRU
    output, state = self.gru(x, training=drop)

    # output shape == (batch_size * 1, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))

    # output shape == (batch_size, vocab)
    x = self.fc(output)

    return x, state, attention_weights

# Instantiate the models with sample batches

In [33]:

encoder = Encoder(english_vocab_size, EMBEDDING_DIM, ENC_HIDDEN_DIM, BATCH_SIZE)
sample_hidden = encoder.initialize_hidden_state()
example_input_batch, example_target_batch = next(iter(dataset))
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)

In [34]:
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))

Encoder output shape: (batch size, sequence length, units) (64, 17, 1024)
Encoder Hidden state shape: (batch size, units) (64, 1024)


In [35]:
encoder.summary()

Model: "encoder_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      multiple                  864768    
_________________________________________________________________
dropout_2 (Dropout)          multiple                  0         
_________________________________________________________________
gru_2 (GRU)                  multiple                  3545088   
Total params: 4,409,856
Trainable params: 4,409,856
Non-trainable params: 0
_________________________________________________________________


In [36]:
attention_layer = Attention(1024)
context_vector, attention_weights = attention_layer(sample_hidden, sample_output)

print("Context vector shape: (batch size, units) {}".format(context_vector.shape))
print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape))

Context vector shape: (batch size, units) (64, 1024)
Attention weights shape: (batch_size, sequence_length, 1) (64, 17, 1)


In [42]:
decoder = Decoder(chinese_vocab_size, EMBEDDING_DIM, DEC_HIDDEN_DIM)

sample_decoder_output, _, l_  = decoder(tf.random.uniform((BATCH_SIZE, 1)),sample_hidden, sample_output)

print('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

Decoder output shape: (batch_size, vocab size) (64, 15400)


In [43]:
attention_layer.count_params()

2100225

In [44]:
decoder.summary()

Model: "decoder_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
attention_5 (Attention)      multiple                  2100225   
_________________________________________________________________
embedding_6 (Embedding)      multiple                  1971200   
_________________________________________________________________
gru_6 (GRU)                  multiple                  6690816   
_________________________________________________________________
dense_22 (Dense)             multiple                  15785000  
_________________________________________________________________
dropout_6 (Dropout)          multiple                  0         
Total params: 26,547,241
Trainable params: 26,547,241
Non-trainable params: 0
_________________________________________________________________


## Define the optimizer and the loss function

In [45]:
optimizer = tf.keras.optimizers.Adam()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

## Checkpoints (Object-based saving)

In [46]:
checkpoint_dir = './training_nmt_checkpoints'

checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")

checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

Backpropagation

In [57]:
@tf.function
def train_step(inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:

    enc_hidden = encoder.initialize_hidden_state()
    enc_output, enc_hidden = encoder(inp, enc_hidden)
    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([chinese_tokenizer.word_index['<start>']] * BATCH_SIZE, 1)
    random_number = np.random.rand()
    # Teacher forcing - feeding the target as the next input
    for t in range(1, targ.shape[1]):
      # passing enc_output to the decoder
      predictions, dec_hidden, att_weights = decoder(dec_input, dec_hidden, enc_output)
      
      loss += loss_function(targ[:, t], predictions)

      # using teacher forcing
      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

In [58]:
steps_per_epoch = len(english_train)//BATCH_SIZE

with tf.device("/gpu:0"):
    EPOCHS = 50

    for epoch in range(EPOCHS):
        start = time.time()

        enc_hidden = encoder.initialize_hidden_state()
        total_loss = 0

        for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
            batch_loss = train_step(inp, targ, enc_hidden)
            total_loss += batch_loss

            if batch % 100 == 0:
                print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                        batch,
                                                        batch_loss.numpy()))
    # saving (checkpoint) the model every 2 epochs
    if (epoch + 1) % 2 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

    print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                        total_loss / steps_per_epoch))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 4.6705
Epoch 1 Batch 100 Loss 2.5703
Epoch 1 Batch 200 Loss 2.4626
Epoch 1 Batch 300 Loss 2.4660
Epoch 2 Batch 0 Loss 2.3195
Epoch 2 Batch 100 Loss 2.3288
Epoch 2 Batch 200 Loss 2.2043
Epoch 2 Batch 300 Loss 2.1493
Epoch 3 Batch 0 Loss 2.0991
Epoch 3 Batch 100 Loss 2.2093
Epoch 3 Batch 200 Loss 2.1201
Epoch 3 Batch 300 Loss 2.1164
Epoch 4 Batch 0 Loss 1.7717
Epoch 4 Batch 100 Loss 1.9765
Epoch 4 Batch 200 Loss 1.7857
Epoch 4 Batch 300 Loss 1.8709
Epoch 5 Batch 0 Loss 1.4785
Epoch 5 Batch 100 Loss 1.8309
Epoch 5 Batch 200 Loss 1.6413
Epoch 5 Batch 300 Loss 1.5224
Epoch 6 Batch 0 Loss 1.4537
Epoch 6 Batch 100 Loss 1.5193
Epoch 6 Batch 200 Loss 1.5739
Epoch 6 Batch 300 Loss 1.4957
Epoch 7 Batch 0 Loss 1.2186
Epoch 7 Batch 100 Loss 1.3600
Epoch 7 Batch 200 Loss 1.3333
Epoch 7 Batch 300 Loss 1.2933
Epoch 8 Batch 0 Loss 1.1288
Epoch 8 Batch 100 Loss 1.1426
Epoch 8 Batch 200 Loss 1.1907
Epoch 8 Batch 300 Loss 1.1583
Epoch 9 Batch 0 Loss 0.8946
Epoch 9 Batch 100 Loss 1.066

## Translate

* The evaluate function is similar to the training loop, except we don't use *teacher forcing* here. The input to the decoder at each time step is its previous predictions along with the hidden state and the encoder output.
* Stop predicting when the model predicts the *end token*.
* And store the *attention weights for every time step*.

Note: The encoder output is calculated only once for one input.

In [70]:
def get_eval_tensor(sentence):
    processed_sentence = preprocess_sentence(sentence)
    try:
        inputs = [english_tokenizer.word_index[i] for i in processed_sentence.split(' ')]
        inputs = pad_sequences([inputs], maxlen=english_tensor.shape[1], padding='post')
        inputs = tf.convert_to_tensor(inputs)
        return inputs
    except:
        return [0 for _ in processed_sentence.split(' ')]
        # print('The Neural Network has not learned the word yet!')
    
def evaluate(sentence):
    result = ''
    # initialize encoder hidden layer
    hidden = [tf.zeros((1, DEC_HIDDEN_DIM))]
    enc_out, enc_hidden = encoder(get_eval_tensor(sentence), hidden, False)
    # False means not using dropout
    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([chinese_tokenizer.word_index['<start>']], 0)

    attention_plot = np.zeros((chinese_tensor.shape[1], english_tensor.shape[1]))
    

    for t in range(english_tensor.shape[1]):
        predictions, dec_hidden, attention_weights  = decoder(
            dec_input, dec_hidden, enc_out) 
        # False means not using dropout 

        predicted_id = tf.argmax(predictions[0]).numpy()
        attention_weights = tf.reshape(attention_weights, (-1,))
        attention_plot[t] = attention_weights.numpy()

        try:
            new_word = chinese_tokenizer.index_word[predicted_id] 
        except:
            pass
       
        result += new_word + ' '
        if chinese_tokenizer.index_word[predicted_id] == '<end>':
            return result, attention_plot

        dec_input = tf.expand_dims([predicted_id], 0)

    return result, attention_plot

In [71]:
def plot_attention(attention, sentence, predicted_sentence):
    fig = go.Figure(data = go.Heatmap(z=attention[:-1,:],
                                      x=sentence,
                                      y=[i for i in predicted_sentence[:-1][::-1]]))
    fig.update_xaxes(side="top")
    fig.update_layout(
        autosize=False,
        width=700,
        height=500,
        margin=dict(l=50, r=50, b=5, t=2, pad=4)
        )
    fig.show()

In [72]:
def translate(sentence, plot=False, score=False):
    result, attention_plot = evaluate(sentence)
    if score == False:
        print(result.replace(' ', '').strip('<end>'))
    if score:
        return result.strip('<end> ')
    if plot:
        attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))]
        plot_attention(attention_plot, sentence.split(' '), result.split(' '))

## Restore the latest checkpoint and test

In [73]:
# restoring the latest checkpoint in checkpoint_dir
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fdc326a3250>

In [74]:
ENC_HIDDEN_DIM = 1024
DEC_HIDDEN_DIM = 1024

In [75]:
translate('He is the only person I know')

他是我所知道的人。


In [76]:
# Перевел как он моя единственная квартира ---

In [77]:
translate('I am very hungry')
# зачет

我很饿。


In [78]:
translate(u'The default initial hidden')

AttributeError: ignored

In [79]:
translate(u'It goes without saying')
# don't correct

不言而喻。


In [80]:
translate(u'We must keep books clean ')
# don't correct

我们必须快自己的车。
