<a href="https://colab.research.google.com/github/h4ck4l1/datasets/blob/main/NLP_with_RNN_and_Attention/Bahdanau_Attention_with_tpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
from google.colab import auth
auth.authenticate_user()
import os,warnings
os.environ["TF_MIN_LOG_LEVEL"] = "3"
warnings.filterwarnings("ignore")
import tensorflow as tf
from tensorflow import keras
import plotly.graph_objects as go
import plotly.io as pio
from zipfile import ZipFile
pio.templates.default = "plotly_dark"
tf.get_logger().setLevel("ERROR")

In [9]:
url = "https://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip"

In [10]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.TPUStrategy(resolver)

In [11]:
with tf.device("/job:localhost"):
    file_path = keras.utils.get_file(fname="spa-eng.zip",origin=url,extract=True)
    with ZipFile(file_path,"r") as f:
        f.extractall("spa-eng")
    with open("spa-eng/spa-eng/spa.txt","r") as f:
        text = f.read()
    text = text.replace("¡","").replace("¿","")
    text = [line.split("\t") for line in text.splitlines()]
    en_text,es_text = zip(*text)
    total_size = len(en_text)

In [12]:
def get_layers(vocab_size=1000,seq_length=50):
    en_vec_layer = keras.layers.TextVectorization(vocab_size,output_sequence_length=50)
    es_vec_layer = keras.layers.TextVectorization(vocab_size,output_sequence_length=50)
    en_vec_layer.adapt(en_text)
    es_vec_layer.adapt([f"soseq {s} eoseq" for s in es_text])
    return en_vec_layer,es_vec_layer

In [13]:
def get_data(es_vec_layer,train_size=100_000,full=True):

    if full:
        train_size = slice(None,100_000)
        valid_size = slice(100_000,None)
    else:
        train_size = slice(None,1000)
        valid_size = slice(1000,1500)

    tsize = train_size.stop
    vsize = (valid_size.stop - valid_size.start) if valid_size.stop else (len(en_text) - tsize)
    X_train = en_vec_layer(tf.constant(en_text[train_size]))
    X_valid = en_vec_layer(tf.constant(en_text[valid_size]))
    X_dec_train = es_vec_layer(tf.constant([f"soseq {s}" for s in es_text[train_size]]))
    X_dec_valid = es_vec_layer(tf.constant([f"soseq {s}" for s in es_text[valid_size]]))
    y_train = es_vec_layer(tf.constant([f"{s} eoseq" for s in es_text[train_size]]))
    y_valid = es_vec_layer(tf.constant([f"{s} eoseq" for s in es_text[valid_size]]))

    return (X_train,X_dec_train),y_train,(X_valid,X_dec_valid),y_valid,tsize,vsize

In [14]:
# @title
# def get_model(vocab_size=1000,embed_size=128):

#     encoder_inputs = keras.layers.Input(shape=(),dtype=tf.string)
#     decoder_inputs = keras.layers.Input(shape=(),dtype=tf.string)
#     en_vec_out = en_vec_layer(encoder_inputs)
#     es_vec_out = es_vec_layer(decoder_inputs)
#     en_embed = keras.layers.Embedding(vocab_size,embed_size,mask_zero=True)
#     es_embed = keras.layers.Embedding(vocab_size,embed_size,mask_zero=True)
#     en_embed_out = en_embed(en_vec_out)
#     es_embed_out = es_embed(es_vec_out)
#     encoder = keras.layers.Bidirectional(keras.layers.LSTM(256,return_state=True,return_sequences=True))
#     decoder = keras.layers.LSTM(512,return_sequences=True)
#     encoder_out,*encoder_state = encoder(en_embed_out)
#     initial_state = [tf.concat(encoder_state[::2],axis=-1),tf.concat(encoder_state[1::2],axis=-1)]
#     decoder_out = decoder(es_embed_out,initial_state=initial_state)
#     attention = keras.layers.Attention()
#     attention_out = attention([decoder_out,encoder_out])
#     out_layer = keras.layers.Dense(vocab_size,"softmax")
#     out = out_layer(attention_out)
#     return keras.Model(inputs=[encoder_inputs,decoder_inputs],outputs=[out])


In [19]:
class AttentionModel(keras.Model):

    def __init__(self,vocab_size=1000,embed_size=128,**kwargs):

        super(AttentionModel,self).__init__(**kwargs)
        self.en_embed = keras.layers.Embedding(vocab_size,embed_size,mask_zero=True)
        self.es_embed = keras.layers.Embedding(vocab_size,embed_size,mask_zero=True)
        self.encoder = keras.layers.Bidirectional(keras.layers.LSTM(256,return_state=True,return_sequences=True))
        self.decoder = keras.layers.LSTM(512,return_sequences=True)
        self.attention = keras.layers.Attention()
        self.out = keras.layers.Dense(vocab_size,"softmax")

    def call(self,inputs):

        encoder_inputs = inputs[0]
        decoder_inputs = inputs[1]
        en_embed_out = self.en_embed(encoder_inputs)
        es_embed_out = self.es_embed(decoder_inputs)
        encoder_out,*encoder_state = self.encoder(en_embed_out)
        encoder_state = [tf.concat(encoder_state[::2],axis=-1),tf.concat(encoder_state[1::2],axis=-1)]
        decoder_out = self.decoder(es_embed_out,initial_state=encoder_state)
        attention_out = self.attention([decoder_out,encoder_out])
        return self.out(attention_out)

In [20]:
with strategy.scope():

    en_vec_layer,es_vec_layer = get_layers()
    full = True
    epochs = 10
    X_train,y_train,X_valid,y_valid,train_size,valid_size = get_data(es_vec_layer,full=full)
    model = AttentionModel()
    BATCH_SIZE = 50*8
    steps_per_epoch = train_size//BATCH_SIZE
    validation_steps = valid_size//BATCH_SIZE
    num_train_steps = steps_per_epoch * epochs
    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer="nadam",
        metrics=["accuracy"],
        steps_per_execution=25
    )

In [21]:
model.fit(
    X_train,
    y_train,
    epochs=epochs,
    batch_size=BATCH_SIZE,
    validation_data=(X_valid,y_valid),
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x781d102ff2b0>