Skip to content

Commit

Permalink
0.2.2: Transformer
Browse files Browse the repository at this point in the history
  • Loading branch information
Jaime Sendra committed Aug 26, 2020
1 parent be7dc97 commit 4502d21
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 120 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.1
0.2.2
2 changes: 1 addition & 1 deletion mlearner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#
__version__ = '0.2.1'
__version__ = '0.2.2'


# On OSX, we can get a runtime error due to multiple OpenMP libraries loaded
Expand Down
4 changes: 2 additions & 2 deletions mlearner/nlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .utils import Processor_data, open_txt
from mlearner.nlp import helpers
from .cnn_advanced import DCNN_Advanced
from .transformer import Transformer, train_transformer
from .transformer import Transformer, Transformer_train

__all__ = ["DCNN", "Processor_data", "open_txt", "helpers", "DCNN_Advanced",
"Transformer", "train_transformer"]
"Transformer", "Transformer_train"]
172 changes: 60 additions & 112 deletions mlearner/nlp/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,15 @@ class Transformer(tf.keras.Model):
---------
```python
BATCH_SIZE = 64
BUFFER_SIZE = 20000
dataset = tf.data.Dataset.from_tensor_slices((inputs, outputs))
dataset = dataset.cache()
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
tf.keras.backend.clear_session()
# Hiper Parámetros
Expand All @@ -413,90 +422,32 @@ class Transformer(tf.keras.Model):
NB_PROJ = 8 # 8
DROPOUT_RATE = 0.1 # 0.1
transformer = Transformer(vocab_size_enc=VOCAB_SIZE_EN,
model_Transformer = Transformer(vocab_size_enc=VOCAB_SIZE_EN,
vocab_size_dec=VOCAB_SIZE_ES,
d_model=D_MODEL,
nb_layers=NB_LAYERS,
FFN_units=FFN_UNITS,
nb_proj=NB_PROJ,
dropout_rate=DROPOUT_RATE)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,
reduction="none")
def loss_function(target, pred):
mask = tf.math.logical_not(tf.math.equal(target, 0))
loss_ = loss_object(target, pred)
mask = tf.cast(mask, dtype=loss_.dtype)
loss_ *= mask
return tf.reduce_mean(loss_)
train_loss = tf.keras.metrics.Mean(name="train_loss")
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name="train_accuracy")
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, d_model, warmup_steps=4000):
super(CustomSchedule, self).__init__()
self.d_model = tf.cast(d_model, tf.float32)
self.warmup_steps = warmup_steps
def __call__(self, step):
arg1 = tf.math.rsqrt(step)
arg2 = step * (self.warmup_steps**-1.5)
return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)
leaning_rate = CustomSchedule(D_MODEL)
optimizer = tf.keras.optimizers.Adam(leaning_rate,
beta_1=0.9,
beta_2=0.98,
epsilon=1e-9)
EPOCHS = 10
for epoch in range(EPOCHS):
print("Inicio del epoch {}".format(epoch+1))
start = time.time()
train_loss.reset_states()
train_accuracy.reset_states()
for (batch, (enc_inputs, targets)) in enumerate(dataset):
dec_inputs = targets[:, :-1]
dec_outputs_real = targets[:, 1:]
with tf.GradientTape() as tape:
predictions = transformer(enc_inputs, dec_inputs, True)
loss = loss_function(dec_outputs_real, predictions)
gradients = tape.gradient(loss, transformer.trainable_variables)
optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
train_loss(loss)
train_accuracy(dec_outputs_real, predictions)
if batch % 50 == 0:
print("Epoch {} Lote {} Pérdida {:.4f} Precisión {:.4f}".format(
epoch+1, batch, train_loss.result(), train_accuracy.result()))
ckpt_save_path = ckpt_manager.save()
print("Guardando checkpoint para el epoch {} en {}".format(epoch+1,
ckpt_save_path))
print("Tiempo que ha tardado 1 epoch: {} segs\n".format(time.time() - start))
Transformer_train(model_Transformer,
dataset,
d_model=D_MODEL,
train=TRAIN,
epochs=1,
checkpoint_path="ckpt/",
max_to_keep=5)
# Evaluate
def evaluate(inp_sentence):
inp_sentence = \
[VOCAB_SIZE_EN-2] + tokenizer_en.encode(inp_sentence) + [VOCAB_SIZE_EN-1]
[VOCAB_SIZE_EN-2] + processor_en.tokenizer.encode(inp_sentence) + [VOCAB_SIZE_EN-1]
enc_input = tf.expand_dims(inp_sentence, axis=0)
output = tf.expand_dims([VOCAB_SIZE_ES-2], axis=0)
for _ in range(MAX_LENGTH):
predictions = transformer(enc_input, output, False) #(1, seq_length, VOCAB_SIZE_ES)
predictions = model_Transformer(enc_input, output, False) #(1, seq_length, VOCAB_SIZE_ES)
prediction = predictions[:, -1:, :]
Expand All @@ -512,7 +463,7 @@ def evaluate(inp_sentence):
def translate(sentence):
output = evaluate(sentence).numpy()
predicted_sentence = tokenizer_es.decode(
predicted_sentence = processor_es.tokenizer.decode(
[i for i in output if i < VOCAB_SIZE_ES-2]
)
Expand Down Expand Up @@ -629,15 +580,11 @@ def loss_function(target, pred):
return tf.reduce_mean(loss_)


def train_transformer(dataset,
def Transformer_train(Transformer,
dataset,
d_model,
epochs,
vocab_size_enc,
vocab_size_dec,
nb_layers,
nb_proj,
FFN_units,
dropout_rate=0.1,
train=True,
beta_1=0.9,
beta_2=0.98,
epsilon=1e-9,
Expand All @@ -646,13 +593,6 @@ def train_transformer(dataset,
"""
Entrenamiento Transformer Customizado
"""
transformer = Transformer(vocab_size_enc=vocab_size_enc,
vocab_size_dec=vocab_size_dec,
d_model=d_model,
nb_layers=nb_layers,
FFN_units=FFN_units,
nb_proj=nb_proj,
dropout_rate=dropout_rate)
# Custom Learning Rate Schedule
leaning_rate = CustomSchedule_transformer(d_model)
# Loss function
Expand All @@ -668,32 +608,40 @@ def train_transformer(dataset,
optimizer,
checkpoint_path="ckpt/",
max_to_keep=5)
# Grafo estatico
@tf.function
def train_step(enc_inputs, dec_inputs, dec_outputs_real):
with tf.GradientTape() as tape:
predictions = Transformer(enc_inputs, dec_inputs, True)
loss = loss_function(dec_outputs_real, predictions)

gradients = tape.gradient(loss, Transformer.trainable_variables)
optimizer.apply_gradients(zip(gradients, Transformer.trainable_variables))

train_accuracy(dec_outputs_real, predictions)
return loss

# Bucle de Entrenamiento
for epoch in range(epochs):
print("Inicio del epoch {}".format(epoch+1))
start = time.time()

train_loss.reset_states()
train_accuracy.reset_states()

for (batch, (enc_inputs, targets)) in enumerate(dataset):
dec_inputs = targets[:, :-1]
dec_outputs_real = targets[:, 1:]
with tf.GradientTape() as tape:
predictions = transformer(enc_inputs, dec_inputs, True)
loss = loss_function(dec_outputs_real, predictions)

gradients = tape.gradient(loss, transformer.trainable_variables)
optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))

train_loss(loss)
train_accuracy(dec_outputs_real, predictions)

if batch % 50 == 0:
print("Epoch {} Lote {} Pérdida {:.4f} Precisión {:.4f}".format(
epoch+1, batch, train_loss.result(), train_accuracy.result()))

ckpt_save_path = ckpt_manager.save()
print("Guardando checkpoint para el epoch {} en {}".format(epoch+1,
ckpt_save_path))
print("Tiempo que ha tardado 1 epoch: {} segs\n".format(time.time() - start))
if train:
for epoch in range(epochs):
print("Inicio del epoch {}".format(epoch+1))
start = time.time()

train_loss.reset_states()
train_accuracy.reset_states()

for (batch, (enc_inputs, targets)) in enumerate(dataset):
dec_inputs = targets[:, :-1]
dec_outputs_real = targets[:, 1:]

loss = train_step(enc_inputs, dec_inputs, dec_outputs_real)
train_loss(loss)

if batch % 50 == 0:
print("Epoch {} Lote {} Pérdida {:.4f} Precisión {:.4f}".format(
epoch+1, batch, train_loss.result(), train_accuracy.result()))

ckpt_save_path = ckpt_manager.save()
print("Guardando checkpoint para el epoch {} en {}".format(epoch+1,
ckpt_save_path))
print("Tiempo que ha tardado 1 epoch: {} segs\n".format(time.time() - start))
6 changes: 3 additions & 3 deletions mlearner/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"""

from .params_manager import ParamsManager
from .keras import keras_checkpoint, MyCustomCallback, EarlyStoppingAtMinLoss,
LearningRateScheduler, CustomSchedule_transformer
from .keras import keras_checkpoint, MyCustomCallback, EarlyStoppingAtMinLoss, \
LearningRateScheduler

__all__ = ["ParamsManager", "keras_checkpoint", "MyCustomCallback",
"EarlyStoppingAtMinLoss", "LearningRateScheduler", "CustomSchedule_transformer"]
"EarlyStoppingAtMinLoss", "LearningRateScheduler"]
1 change: 0 additions & 1 deletion mlearner/utils/keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,3 @@ def on_epoch_begin(self, epoch, logs=None):
# Definir el valor en el optimized antes de que la epoch comience
tf.keras.backend.set_value(self.model.optimizer.lr, scheduled_lr)
print('\nEpoch %05d: Learning rate is %6.4f.' % (epoch, scheduled_lr))

0 comments on commit 4502d21

Please sign in to comment.