In [0]:
import tensorflow as tf
import numpy as np
import unicodedata
import re
from tensorflow import keras
import time

In [0]:
raw_data = (
    ('What a ridiculous concept!', 'Quel concept ridicule !'),
    ('Your idea is not entirely crazy.', "Votre idée n'est pas complètement folle."),
    ("A man's worth lies in what he is.", "La valeur d'un homme réside dans ce qu'il est."),
    ('What he did is very wrong.', "Ce qu'il a fait est très mal."),
    ("All three of you need to do that.", "Vous avez besoin de faire cela, tous les trois."),
    ("Are you giving me another chance?", "Me donnez-vous une autre chance ?"),
    ("Both Tom and Mary work as models.", "Tom et Mary travaillent tous les deux comme mannequins."),
    ("Can I have a few minutes, please?", "Puis-je avoir quelques minutes, je vous prie ?"),
    ("Could you close the door, please?", "Pourriez-vous fermer la porte, s'il vous plaît ?"),
    ("Did you plant pumpkins this year?", "Cette année, avez-vous planté des citrouilles ?"),
    ("Do you ever study in the library?", "Est-ce que vous étudiez à la bibliothèque des fois ?"),
    ("Don't be deceived by appearances.", "Ne vous laissez pas abuser par les apparences."),
    ("Excuse me. Can you speak English?", "Je vous prie de m'excuser ! Savez-vous parler anglais ?"),
    ("Few people know the true meaning.", "Peu de gens savent ce que cela veut réellement dire."),
    ("Germany produced many scientists.", "L'Allemagne a produit beaucoup de scientifiques."),
    ("Guess whose birthday it is today.", "Devine de qui c'est l'anniversaire, aujourd'hui !"),
    ("He acted like he owned the place.", "Il s'est comporté comme s'il possédait l'endroit."),
    ("Honesty will pay in the long run.", "L'honnêteté paye à la longue."),
    ("How do we know this isn't a trap?", "Comment savez-vous qu'il ne s'agit pas d'un piège ?"),
    ("I can't believe you're giving up.", "Je n'arrive pas à croire que vous abandonniez."),
)

In [0]:
def unicode_to_ascii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')

def normalize_string(s):
    s = unicode_to_ascii(s)
    s = re.sub(r'([!.?])', r' \1', s)
    s = re.sub(r'[^a-zA-Z.!?]+', r' ', s)
    s = re.sub(r'\s+', r' ', s)
    return s

In [0]:
raw_data_en,raw_data_fr=list(zip(*raw_data))
raw_data_en,raw_data_fr=list(raw_data_en),list(raw_data_fr)

train=["<start> "+data+" <end>" for data in raw_data_en]
target=["<start> "+data+" <end>" for data in raw_data_fr]

In [0]:
en_tokenizer=keras.preprocessing.text.Tokenizer(filters="")
en_tokenizer.fit_on_texts(train)
train=en_tokenizer.texts_to_sequences(train)
train=keras.preprocessing.sequence.pad_sequences(train,padding="post")

In [0]:
fr_tokenizer=keras.preprocessing.text.Tokenizer(filters="")
fr_tokenizer.fit_on_texts(target)
target=fr_tokenizer.texts_to_sequences(target)
target=keras.preprocessing.sequence.pad_sequences(target,padding="post")

In [0]:
BUFFER_SIZE=20
BATCH_SIZE=5
embedding_dim = 256
units = 1024
dataset=tf.data.Dataset.from_tensor_slices((train,target))
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

vocab_input_len=len(en_tokenizer.word_index)+1
vocab_target_len=len(fr_tokenizer.word_index)+1

In [0]:
class Encoder(keras.Model):
  def __init__(self,vocal_size,embedding_size,enc_units,batch_size):
    super(Encoder,self).__init__()
    self.batch_size=batch_size
    self.enc_units=enc_units
    self.embedding=keras.layers.Embedding(vocal_size,embedding_size)
    self.gru=keras.layers.GRU(self.enc_units,
                  return_sequences=True,
                  return_state=True,
                  recurrent_initializer="glorot_uniform")
    
  def call(self,x,hidden):
    x=self.embedding(x)
    output,state=self.gru(x,initial_state=hidden)

    return output,state
  
  def init_states(self):
    return tf.zeros((self.batch_size,self.enc_units))

In [0]:
encoder=Encoder(vocab_input_len, embedding_dim, units, BATCH_SIZE)
sample_hidden=encoder.init_states()

example_input_batch,example_target_batch=next(iter(dataset))
sample_output,sample_hidden=encoder(example_input_batch,sample_hidden)

In [0]:
class Attention(keras.layers.Layer):
  def __init__(self,units):
    super(Attention,self).__init__()
    self.W1=keras.layers.Dense(units)
    self.W2=keras.layers.Dense(units)
    self.V=keras.layers.Dense(1)

  def call(self,query,enc_output):
    hidden_with_time_axis=tf.expand_dims(query,1)
    score=self.V(tf.nn.tanh(
        self.W1(enc_output)+self.W2(hidden_with_time_axis)))
    
    attention_weights=tf.nn.softmax(score,axis=1)

    context_vector=attention_weights*enc_output
    context_vector=tf.reduce_sum(context_vector,axis=1)

    return context_vector, attention_weights

In [0]:
class Decoder(keras.Model):
  def __init__(self,vocab_size,embedding_dim,dec_units,batch_size):
    super(Decoder,self).__init__()
    self.batch_size=batch_size
    self.dec_units=dec_units
    self.embedding=keras.layers.Embedding(vocab_size,embedding_dim)
    self.gru=keras.layers.GRU(self.dec_units,
                 return_sequences=True,
                 return_state=True,
                 recurrent_initializer='glorot_uniform')
    self.fc=keras.layers.Dense(vocab_size)
    self.attention=Attention(self.dec_units)

  def call(self,x,hidden,enc_output):
    context_vector, attention_weights = self.attention(hidden, enc_output)
    x=self.embedding(x)

    x=tf.concat([tf.expand_dims(context_vector,1),x],axis=-1)
    
    output,state=self.gru(x)
    output = tf.reshape(output, (-1, output.shape[2]))
    x=self.fc(output)

    return x,state,attention_weights

In [0]:
decoder=Decoder(vocab_target_len,embedding_dim,units,BATCH_SIZE)

In [0]:
optimizer=keras.optimizers.Adam()
loss_object=keras.losses.SparseCategoricalCrossentropy(from_logits=True,reduction="none")

def loss_function(real,pred):
  mask=tf.math.logical_not(tf.math.equal(real,0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

In [0]:
@tf.function
def train_step(input,target,enc_hidden):
  loss=0

  with tf.GradientTape() as tape:
    enc_output,enc_hidden=encoder(input,enc_hidden)
    dec_hidden=enc_hidden
    dec_input=tf.expand_dims([fr_tokenizer.word_index["<start>"]] * BATCH_SIZE,1)

    # 教师强制 - 将目标词作为下一个输入
    for t in range(1, target.shape[1]):
      # 将编码器输出 （enc_output） 传送至解码器
      predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

      loss += loss_function(target[:, t], predictions)

      # 使用教师强制
      dec_input = tf.expand_dims(target[:, t], 1)

  batch_loss = (loss / int(target.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

In [15]:
EPOCHS=50

for epoch in range(EPOCHS):
  start=time.time()

  enc_hidden=encoder.init_states()
  total_loss=0

  for (batch,(input,target)) in enumerate(dataset.take(5)):
    batch_loss=train_step(input,target,enc_hidden)
    total_loss+=batch_loss
  
    print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,batch,batch_loss.numpy()))
  
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 3.1386
Epoch 1 Batch 1 Loss 3.5275
Epoch 1 Batch 2 Loss 3.8363
Epoch 1 Batch 3 Loss 2.9638
Time taken for 1 epoch 16.38884973526001 sec

Epoch 2 Batch 0 Loss 3.1315
Epoch 2 Batch 1 Loss 3.8064
Epoch 2 Batch 2 Loss 3.2061
Epoch 2 Batch 3 Loss 3.1021
Time taken for 1 epoch 4.554631233215332 sec

Epoch 3 Batch 0 Loss 3.2553
Epoch 3 Batch 1 Loss 3.4383
Epoch 3 Batch 2 Loss 3.2938
Epoch 3 Batch 3 Loss 2.8412
Time taken for 1 epoch 4.643236398696899 sec

Epoch 4 Batch 0 Loss 3.2268
Epoch 4 Batch 1 Loss 3.0467
Epoch 4 Batch 2 Loss 3.0070
Epoch 4 Batch 3 Loss 3.4539
Time taken for 1 epoch 4.601182699203491 sec

Epoch 5 Batch 0 Loss 3.5145
Epoch 5 Batch 1 Loss 2.7776
Epoch 5 Batch 2 Loss 3.0394
Epoch 5 Batch 3 Loss 2.9858
Time taken for 1 epoch 4.619771957397461 sec

Epoch 6 Batch 0 Loss 3.1777
Epoch 6 Batch 1 Loss 2.6194
Epoch 6 Batch 2 Loss 2.7143
Epoch 6 Batch 3 Loss 3.3713
Time taken for 1 epoch 4.574537754058838 sec

Epoch 7 Batch 0 Loss 2.3614
Epoch 7 Batch 1 Loss 3.3

In [18]:
attention_plot=np.zeros((target.shape[1],train.shape[0]))
hidden=[tf.zeros((1,units))]
enc_out,enc_hidden=encoder(tf.expand_dims(train[0,],0),hidden)

dec_hidden=enc_hidden
dec_input=tf.expand_dims([fr_tokenizer.word_index["<start>"]],0)
result=""

for t in range(target.shape[1]):
  predictions, dec_hidden, attention_weights = decoder(dec_input,dec_hidden,enc_out)
  # 存储注意力权重以便后面制图

  predicted_id = tf.argmax(predictions[0]).numpy()

  if predicted_id != 0:
    result += fr_tokenizer.index_word[predicted_id] + ' '

    if fr_tokenizer.index_word[predicted_id] == '<end>':
      print(result)
      break

  # 预测的 ID 被输送回模型
  dec_input = tf.expand_dims([predicted_id], 0)

quel concept ridicule ! <end> 
