In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding,GRU,Dense,AdditiveAttention,StringLookup,TextVectorization
from tensorflow.keras.losses import SparseCategoricalCrossentropy
import csv

In [2]:
from google.colab import drive
drive.mount('/content/drive')

with open("/content/drive/MyDrive/data/eng-ind.csv") as f:
  csv_reader = csv.reader(f)
  next(csv_reader)

  input_data = list()
  target_data = list()

  for row in csv_reader:
    input_data.append(row[0])
    target_data.append(row[1])

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
BUFFER_SIZE = len(input_data)
BATCH_SIZE = 64

dataset = tf.data.Dataset.from_tensor_slices((input_data, target_data)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE)

In [None]:
for input_batch, target_batch in dataset.take(1):
  print(input_batch,"\n")
  print(target_batch)
  break

In [5]:
def tf_lower_and_split_punct(text):
  text = tf.strings.lower(text)
  text = tf.strings.regex_replace(text, '[^ a-z.?!,¿]', '')
  text = tf.strings.regex_replace(text, '[.?!,¿]', r' \0 ')
  text = tf.strings.strip(text)

  text = tf.strings.join(['[START]', text, '[END]'], separator=' ')
  return text

In [6]:
max_vocab_size = 5000

input_text_processor = TextVectorization(
    standardize=tf_lower_and_split_punct,
    max_tokens=max_vocab_size)

output_text_processor = TextVectorization(
    standardize=tf_lower_and_split_punct,
    max_tokens=max_vocab_size)

In [7]:
input_text_processor.adapt(input_data)
output_text_processor.adapt(target_data)

In [8]:
vocab_size_input = 5000
vocab_size_output = 5000
embedding_dim = 512
units = 64

In [18]:
class BatchLogs(tf.keras.callbacks.Callback):
  def __init__(self, key):
    self.key = key
    self.logs = []

  def on_train_batch_end(self, n, logs):
    self.logs.append(logs[self.key])

batch_loss = BatchLogs('batch_loss')

In [19]:
inputs = tf.keras.Input(shape=(64,))
# encoding network
embedding_enc = Embedding(vocab_size_input, embedding_dim)(inputs)
output_enc, state_enc = GRU(
    units,return_sequences=True,
    return_state=True,
    recurrent_initializer='glorot_uniform')(embedding_enc)

# attention network (units, query, value, mask)

# decoding network embedding, gru, bahdanauattention, 2dense
embedding_dec = Embedding(vocab_size_output, embedding_dim)(state_enc)
rnn_output, state_dec = GRU(units,return_sequences=True,return_state=True,recurrent_initializer='glorot_uniform')(embedding_dec)
# attentionnya masukin
x1 = Dense(units, use_bias=False)(rnn_output)
x2 = Dense(units, use_bias=False)(output_enc)

query_mask = tf.ones(tf.shape(rnn_output)[:-1], dtype=bool)
value_mask = (inputs != 0)

context_vector, attention_weights = AdditiveAttention()(
                      inputs = [x1,output_enc,x2],
                      mask= [query_mask, value_mask],
                      return_attention_scores = True,
                  )

context_and_rnn_output = tf.concat([context_vector, rnn_output], axis=-1)


x3 = Dense(units, activation=tf.math.tanh,use_bias=False)(context_and_rnn_output)
x4 = Dense(vocab_size_output)(x3)

model = tf.keras.Model(inputs=inputs, outputs=x4)
model.compile(
    optimizer=tf.optimizers.Adam(),
    loss=SparseCategoricalCrossentropy,
    metrics = [batch_loss]
)


In [20]:
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 64)]         0           []                               
                                                                                                  
 embedding_6 (Embedding)        (None, 64, 512)      2560000     ['input_4[0][0]']                
                                                                                                  
 gru_6 (GRU)                    [(None, 64, 64),     110976      ['embedding_6[0][0]']            
                                 (None, 64)]                                                      
                                                                                                  
 embedding_7 (Embedding)        (None, 64, 512)      2560000     ['gru_6[0][1]']            