In [1]:
% tensorflow_version 2.x

import io
import re
import os
import unicodedata
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

from sklearn.model_selection import train_test_split

TensorFlow 2.x selected.


In [0]:
path_to_zip = tf.keras.utils.get_file(
    'spa-eng.zip', origin='http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip',
    extract=True)

path_to_file = os.path.dirname(path_to_zip)+"/spa-eng/spa.txt"

In [0]:
def unicode_to_ascii(s):
  return ''.join(c for c in unicodedata.normalize('NFD', s)
      if unicodedata.category(c) != 'Mn')

def preprocess_sentence(w):
  w = unicode_to_ascii(w.lower().strip())
  w = re.sub(r"([?.!,¿])", r" \1 ", w)
  w = re.sub(r'[" "]+', " ", w)
  w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)
  w = w.rstrip().strip()
  return w


In [0]:
def create_dataset(path, num_examples):
  lines = io.open(path, encoding='UTF-8').read().strip().split('\n')
  word_pairs = [[preprocess_sentence(w) for w in l.split('\t')]  for l in lines[:num_examples]]
  return zip(*word_pairs)

In [0]:
en, sp = create_dataset(path_to_file, None)
en=list(en)
sp=list(sp)

In [0]:
token_en=tfds.features.text.SubwordTextEncoder.build_from_corpus((i for i in en),target_vocab_size=8000)
token_sp=tfds.features.text.SubwordTextEncoder.build_from_corpus((i for i in sp),target_vocab_size=8000)

In [0]:
for idx,i in enumerate(en):
  en[idx]=[token_en.vocab_size]+token_en.encode(i)+[token_en.vocab_size+1]

for idx,i in enumerate(sp):
  sp[idx]=[token_sp.vocab_size]+token_sp.encode(i)+[token_sp.vocab_size+1]

In [0]:
x_train,x_test,y_train,y_test=train_test_split(en,sp,test_size=0.3,random_state=42)

In [0]:
def generate_data():
  for _ in range(len(x_train)):
    yield x_train[_],y_train[_]

def generate_valid_data():
  for _ in range(len(x_test)):
    yield x_test[_],y_test[_]

In [0]:
batch_size=96
buffer=30000
dataset=tf.data.Dataset.from_generator(generate_data,output_types=(tf.int32,tf.int32))
x_t=dataset.cache().shuffle(buffer).padded_batch(batch_size,padded_shapes=([-1], [-1])).prefetch(tf.data.experimental.AUTOTUNE)

valid_dataset=tf.data.Dataset.from_generator(generate_valid_data,output_types=(tf.int32,tf.int32))
x_v=valid_dataset.cache().shuffle(buffer).padded_batch(batch_size,padded_shapes=([-1], [-1])).prefetch(tf.data.experimental.AUTOTUNE)

In [11]:
from nmt_transformer import *

Code is built using tensorflow:2.0.0


In [0]:
embed_dim=600
num_coders=6
num_attn_head=4
higher_dim=1024
input_vocab_size=token_en.vocab_size+2
target_vocab_size=token_sp.vocab_size+2
max_length_input=500
max_length_target=500


transformer=Transformer(num_coders=num_coders,num_attn_head=num_attn_head,embed_dim=embed_dim,higher_dim=higher_dim,\
                        input_vocab_size=input_vocab_size,target_vocab_size=target_vocab_size\
                        ,max_length_input=max_length_input,max_length_target=max_length_target,drop_rate=0.05)

lr=LearningRate(embed_dim,warmup_steps=9000)
optimizer=tf.keras.optimizers.Adam(lr,epsilon=1e-9)

In [0]:
#creating train  graph
inp_signature=[tf.TensorSpec(shape=(None,None),dtype=tf.int32),tf.TensorSpec(shape=(None,None),dtype=tf.int32),tf.TensorSpec(shape=(None),dtype=tf.bool)]
@tf.function(input_signature=inp_signature)
def train(inp,target,flag):
    decoder_input=target[:,:-1] #example for output '<start> I am a student <eos>' the values that should be fed to the decoder should be '<start> I am a'
    decoder_output=target[:,1:] # 'I am a student <eos>'
    encoder_mask,decoder_look_ahead=create_mask(inp,decoder_input)
    with tf.GradientTape() as tape:
        output,attn_dict,encoder_op=transformer(inp,decoder_input,encoder_mask,decoder_look_ahead,True)
        if flag==True:
            print(transformer.summary())
            flag=False
        loss_=calculate_loss(decoder_output,output)
    
    gradients=tape.gradient(loss_,transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients,transformer.trainable_variables))
    
    train_loss.update_state(loss_)
    train_accuracy.update_state(decoder_output,output)


#creating validation graph
inp_signature=[tf.TensorSpec(shape=(None,None),dtype=tf.int32),tf.TensorSpec(shape=(None,None),dtype=tf.int32)]
@tf.function(input_signature=inp_signature)
def validate(inp,target):
    decoder_input=target[:,:-1] #example for output '<start> I am a student <eos>' the values that should be fed to the decoder should be '<start> I am a'
    decoder_output=target[:,1:] # 'I am a student <eos>'
    encoder_mask,decoder_look_ahead=create_mask(inp,decoder_input)
    output,attn_dict,encoder_op=transformer(inp,decoder_input,encoder_mask,decoder_look_ahead,True)
    loss_valid=calculate_loss(decoder_output,output)
    valid_loss.update_state(loss_valid)
    valid_accuracy.update_state(decoder_output,output)



In [0]:
path="./en_sp/train/"
ckpt=tf.train.Checkpoint(transformer=transformer,optimizer=optimizer)
ckpt_manage=tf.train.CheckpointManager(checkpoint=ckpt,directory=path,max_to_keep=4)
if ckpt_manage.latest_checkpoint:
    ckpt.restore(ckpt_manage.latest_checkpoint)

In [15]:
epochs=30

for epoch in range(epochs):
    train_loss.reset_states();train_accuracy.reset_states();valid_loss.reset_states();valid_accuracy.reset_states()
    
    for (batch,(inp,target)) in enumerate(x_t):
        
        
        if batch ==0 and epoch ==0:
          with tf.device("/gpu:0"):
            train(inp,target,True)
        else:
          with tf.device("/gpu:0"):
            train(inp,target,False)
        
        if batch % 100 == 0:
              print (' For epoch {} with  batch {}  the loss is  {:.4f}  with accuracy {:.4f}'.format(epoch + 1, batch, train_loss.result(), train_accuracy.result()))
    
    
    if (epoch + 1) % 2 == 0:
        ckpt_save = ckpt_manage.save()
        print ('Saving checkpoint for epoch {} at {}'.format(epoch+1,ckpt_save))

    
    for (valid_batch,(inp,target)) in enumerate(x_v):
      with tf.device("/gpu:0"):
        validate(inp,target)
    
    print ('For epoch {} the validation loss is {:.4f} with accuracy of {:.4f}'.format(epoch + 1, 
                                                valid_loss.result(), 
                                                valid_accuracy.result()))
    

    
    print ('For epoch {} the loss is {:.4f} with accuracy of {:.4f}'.format(epoch + 1, 
                                                train_loss.result(), 
                                                train_accuracy.result()))

Model: "transformer"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_nx (encoder_nx)      multiple                  20941344  
_________________________________________________________________
decoder_nx (decoder_nx)      multiple                  29455344  
_________________________________________________________________
dense_96 (Dense)             multiple                  4750304   
Total params: 55,146,992
Trainable params: 55,146,992
Non-trainable params: 0
_________________________________________________________________
None
Model: "transformer"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_nx (encoder_nx)      multiple                  20941344  
_________________________________________________________________
decoder_nx (decoder_nx)      multiple                  29455344  
_________________

KeyboardInterrupt: ignored

In [16]:
transformer.summary()

Model: "transformer"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_nx (encoder_nx)      multiple                  20941344  
_________________________________________________________________
decoder_nx (decoder_nx)      multiple                  29455344  
_________________________________________________________________
dense_96 (Dense)             multiple                  4750304   
Total params: 55,146,992
Trainable params: 55,146,992
Non-trainable params: 0
_________________________________________________________________


In [18]:
transformer.save("en_sp_model.h5")

NotImplementedError: ignored

In [19]:
!mkdir transformer_en_sp
tf.saved_model.save(transformer,"transformer_en_sp/")

TypeError: ignored

In [0]:
transformer.save_weights("transformer_en_sp/")

In [22]:
transformer.layers

[<nmt_transformer.encoder_nx at 0x7f7e157a99e8>,
 <nmt_transformer.decoder_nx at 0x7f7e14abad68>,
 <tensorflow.python.keras.layers.core.Dense at 0x7f7e15677588>]