In [1]:
from pathlib import Path
import os
from collections import Counter
import numpy as np
import time
from tqdm.notebook import tqdm

np.random.seed(8080)

data_path = Path("/content/drive/My Drive/Adv Projects in ML/data/bpe")
print(data_path)
print(os.listdir(data_path))

os.chdir("/content/drive/My Drive/Adv Projects in ML/")

!nvidia-smi

/content/drive/My Drive/Adv Projects in ML/data/bpe
['split_val_bpe.lang1', 'split_val_bpe.lang2', 'codes.file', 'split_train_bpe.lang1', 'split_train_bpe_d.lang1', 'split_train_bpe_d.lang2', 'split_train_bpe.lang2']
Wed Apr  8 20:07:39 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+------------------------------

In [2]:
%tensorflow_version 2.x
import tensorflow as tf
from transformer import Transformer, CustomSchedule, create_masks

print("Tensorflow version " + tf.__version__)

tf.random.set_seed(8080)
# make sure numpy seeded

Tensorflow version 2.2.0-rc2


In [3]:
# read data
with open(data_path/"split_train_bpe.lang1","r") as f:
    english = f.read()
print(len(english.split("\n")), english[:200])
    
with open(data_path/"split_train_bpe.lang2","r") as f:
    french = f.read()
print(len(french.split("\n")), french[:200])

with open(data_path/"split_val_bpe.lang1","r") as f:
    english_val = f.read()
print(len(english_val.split("\n")), english_val[:200])

with open(data_path/"split_val_bpe.lang2","r") as f:
    french_val = f.read()
print(len(french_val.split("\n")), french_val[:200])

# create vocab
english_vocab = list(set(english.replace("\n", " <eos> ").split()))
french_vocab = list(set(french.replace("\n", " <eos> ").split()))
len(english_vocab), len(french_vocab)

english_counter = Counter(english.replace("\n", " <eos> ").split())
french_counter = Counter(french.replace("\n", " <eos> ").split())
len(english_counter), len(french_counter)

english_counter.update({"<unk>":0})
french_counter.update({"<unk>":0})
english_counter.update({"<start>":0})
french_counter.update({"<start>":0})
len(english_counter), len(french_counter)

english_vocab = list(english_counter.keys())
french_vocab = list(french_counter.keys())

# # trim vocab to 10k+2, 12k+2
# english_vocab = ["<start>","<unk>"]
# for i in english_counter.most_common(10000):
#   english_vocab.append(i[0])
# french_vocab = ["<start>","<unk>"]
# for i in french_counter.most_common(12000):
#   french_vocab.append(i[0])

english_word2id = {}
english_id2word = {}
french_word2id = {}
french_id2word = {}

# start enumerate from 1 so that 0 is reserved for padding seqs 
for i, w in enumerate(english_vocab, start=1):
  english_word2id[w] = i
  english_id2word[i] = w

for i, w in enumerate(french_vocab, start=1):
  french_word2id[w] = i
  french_id2word[i] = w

len(english_word2id), len(english_id2word), len(french_word2id), len(french_id2word)

def transform_data(english_lang1, french_lang2):
  english_lines = english_lang1.split("\n")
  french_lines = french_lang2.split("\n")

  data_english = []
  data_french = []

  for line in english_lines:
    line2id = [english_word2id["<start>"]]
    for word in line.split():
      try:
        line2id.append(english_word2id[word])
      except:
        line2id.append(english_word2id["<unk>"])
    line2id.append(english_word2id["<eos>"])
    data_english.append(line2id)

  for line in french_lines:
    line2id = [french_word2id["<start>"]]
    for word in line.split():
      try:
        line2id.append(french_word2id[word])
      except:
        line2id.append(french_word2id["<unk>"])
    line2id.append(french_word2id["<eos>"])
    data_french.append(line2id)

  print(len(data_english), len(data_french))
  return data_english, data_french

data_english, data_french = transform_data(english, french)
data_english_val, data_french_val = transform_data(english_val, french_val)

english_id2word[54], len(data_english), len(data_french), len(data_english_val), len(data_french_val), len(english_vocab), len(french_vocab)

8800 as mr de castro is not present mr le fol@@ l who is re@@ placing mr de castro has the floor
on the other hand if you 're visiting an under@@ developed country and 25 dollars bu@@ ys you a gour@@ met m
8800 Comme M. De Castro est absent , M. Le F@@ ol@@ l , qui le remplace , a la parole .
D' un autre côté , si vous êtes dans un pays en voie de développement , où 25 dollars peuvent vous obtenir un repas d
2200 what action does the council intend to take in the face of this seriously discriminatory attitude which runs contrary to the principles of the eu
where would you like to go next
if that were not enoug
2200 Quelles mesures le Conseil comp@@ te-@@ t-il adopter face à cette attitude qui constitue une grave discrimination et est contraire aux principes sur lesquels l' Union européenne est fondée ?
Où souhai
8800 8800
2200 2200


('consideration', 8800, 8800, 2200, 2200, 10927, 13658)

In [0]:
np.savez("data_and_vocab_bpe.npz", data_english=data_english, data_french=data_french, data_english_val=data_english_val, data_french_val=data_french_val, 
         english_word2id=english_word2id, english_id2word=english_id2word, french_word2id=french_word2id,french_id2word=french_id2word)

In [16]:
BUFFER_SIZE = len(data_english)
BATCH_SIZE = 64
EPOCHS = 50
print("No. of training batches: ", np.ceil(len(data_english)/BATCH_SIZE))
print("No. of validation batches: ", np.ceil(len(data_english_val)/BATCH_SIZE))

# transformer hyperparams
num_layers = 4
d_model = 256
dff = 512
num_heads = 8
input_vocab_size = len(english_vocab) + 1
target_vocab_size = len(french_vocab) + 1
dropout_rate = 0.3
pe_input = max(max([len(i) for i in data_english]),max([len(i) for i in data_english_val]))
pe_target = max(max([len(i) for i in data_french]),max([len(i) for i in data_french_val]))

No. of training batches:  138.0
No. of validation batches:  35.0


In [0]:
tensor_train = tf.data.Dataset.from_tensor_slices((
    tf.keras.preprocessing.sequence.pad_sequences(data_english, padding='post'),
    tf.keras.preprocessing.sequence.pad_sequences(data_french, padding='post')
)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=False)
tensor_val = tf.data.Dataset.from_tensor_slices((
    tf.keras.preprocessing.sequence.pad_sequences(data_english_val, padding='post'),
    tf.keras.preprocessing.sequence.pad_sequences(data_french_val, padding='post')
)).batch(BATCH_SIZE, drop_remainder=False)

In [19]:
transformer = Transformer(
    num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, 
    input_vocab_size=input_vocab_size, target_vocab_size=target_vocab_size, 
    pe_input=pe_input, pe_target=pe_target, rate=dropout_rate)

temp_input = tf.random.uniform((BATCH_SIZE, pe_input), dtype=tf.int64, minval=0, maxval=200)
temp_target = tf.random.uniform((BATCH_SIZE, pe_target), dtype=tf.int64, minval=0, maxval=200)

fn_out, _ = transformer(temp_input, temp_target, training=False, 
                               enc_padding_mask=None, 
                               look_ahead_mask=None,
                               dec_padding_mask=None)

fn_out.shape  # (batch_size, tar_seq_len, target_vocab_size)

TensorShape([64, 130, 13659])

In [20]:
transformer.summary()

Model: "transformer_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_1 (Encoder)          multiple                  4905984   
_________________________________________________________________
decoder_1 (Decoder)          multiple                  6659840   
_________________________________________________________________
dense_129 (Dense)            multiple                  3510363   
Total params: 15,076,187
Trainable params: 15,076,187
Non-trainable params: 0
_________________________________________________________________


In [0]:
learning_rate = CustomSchedule(d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, 
                                     epsilon=1e-9)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='train_accuracy')

val_loss = tf.keras.metrics.Mean(name='loss')
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='val_accuracy')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask
  
  return tf.reduce_sum(loss_)/tf.reduce_sum(mask)

In [0]:
experiment_number = "bpe_2"

checkpoint_path = "./checkpoints/train"+experiment_number

ckpt = tf.train.Checkpoint(transformer=transformer,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=1)

writer_train = tf.summary.create_file_writer("log_dir/"+experiment_number)

# # if a checkpoint exists, restore the latest checkpoint.
# if ckpt_manager.latest_checkpoint:
#   ckpt.restore(ckpt_manager.latest_checkpoint)
#   print ('Latest checkpoint restored!!')

In [0]:
# The @tf.function trace-compiles train_step into a TF graph for faster
# execution. The function specializes to the precise shape of the argument
# tensors. To avoid re-tracing due to the variable sequence lengths or variable
# batch sizes (the last batch is smaller), use input_signature to specify
# more generic shapes.

train_step_signature = [
    tf.TensorSpec(shape=(None, None), dtype=tf.int32),
    tf.TensorSpec(shape=(None, None), dtype=tf.int32),
]

@tf.function(input_signature=train_step_signature)
def train_step(inp, tar):
  
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  
  enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)

  with tf.GradientTape() as tape:
    predictions, _ = transformer(inp, tar_inp, 
                                 True, 
                                 enc_padding_mask, 
                                 combined_mask, 
                                 dec_padding_mask)
    loss = loss_function(tar_real, predictions)

  gradients = tape.gradient(loss, transformer.trainable_variables)
  optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
  
  train_loss(loss)
  train_accuracy(tar_real, predictions)

@tf.function(input_signature=train_step_signature)
def val_step(inp, tar):
  print(inp.get_shape(), tar.get_shape())
  
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  
  enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)

  predictions, _ = transformer(inp, tar_inp, 
                                False, 
                                enc_padding_mask, 
                                combined_mask, 
                                dec_padding_mask)
  loss = loss_function(tar_real, predictions)
  
  val_loss(loss)
  val_accuracy(tar_real, predictions)


In [24]:
best_val_loss = np.inf

for epoch in range(EPOCHS):
  start = time.time()
  
  train_loss.reset_states()
  train_accuracy.reset_states()
  
  val_loss.reset_states()
  val_accuracy.reset_states()
  
  for (batch, (inp, tar)) in tqdm(enumerate(tensor_train)):
    train_step(inp, tar)
    if batch % 50 == 0:
      print ('Epoch {} Batch {} Training Loss {:.4f} Accuracy {:.4f}'.format(
          epoch + 1, batch, train_loss.result(), train_accuracy.result()))
    
  print ('Epoch {} Training Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1, 
                                                train_loss.result(), 
                                                train_accuracy.result()))

  print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

  with writer_train.as_default():
    tf.summary.scalar('train_loss', train_loss.result(), step=epoch)

  print("validating")
  for (batch, (inp, tar)) in enumerate(tensor_val):
    val_step(inp, tar)
  
  print ('Epoch {} Validation Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1, 
                                            val_loss.result(), 
                                            val_accuracy.result()))
  if best_val_loss > val_loss.result():
    best_val_loss = val_loss.result()
    ckpt_save_path = ckpt_manager.save()
    print ('Saving checkpoint for epoch {} at {}'.format(epoch+1,
                                                         ckpt_save_path))
  with writer_train.as_default():
    tf.summary.scalar('val_loss', val_loss.result(), step=epoch)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 Batch 0 Training Loss 9.5188 Accuracy 0.0000
Epoch 1 Batch 50 Training Loss 9.4604 Accuracy 0.0012
Epoch 1 Batch 100 Training Loss 9.3506 Accuracy 0.0045

Epoch 1 Training Loss 9.2534 Accuracy 0.0054
Time taken for 1 epoch: 44.70581364631653 secs

validating
(None, None) (None, None)
Epoch 1 Validation Loss 8.7876 Accuracy 0.0076
Saving checkpoint for epoch 1 at ./checkpoints/trainbpe_2/ckpt-1


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 Batch 0 Training Loss 8.8391 Accuracy 0.0070
Epoch 2 Batch 50 Training Loss 8.6264 Accuracy 0.0075
Epoch 2 Batch 100 Training Loss 8.3885 Accuracy 0.0075

Epoch 2 Training Loss 8.2025 Accuracy 0.0075
Time taken for 1 epoch: 34.225117683410645 secs

validating
Epoch 2 Validation Loss 7.4580 Accuracy 0.0076
Saving checkpoint for epoch 2 at ./checkpoints/trainbpe_2/ckpt-2


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 Batch 0 Training Loss 7.4792 Accuracy 0.0044
Epoch 3 Batch 50 Training Loss 7.2553 Accuracy 0.0076
Epoch 3 Batch 100 Training Loss 7.0925 Accuracy 0.0092

Epoch 3 Training Loss 7.0157 Accuracy 0.0114
Time taken for 1 epoch: 33.97651672363281 secs

validating
Epoch 3 Validation Loss 6.8168 Accuracy 0.0215
Saving checkpoint for epoch 3 at ./checkpoints/trainbpe_2/ckpt-3


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 Batch 0 Training Loss 6.6251 Accuracy 0.0179
Epoch 4 Batch 50 Training Loss 6.6956 Accuracy 0.0202
Epoch 4 Batch 100 Training Loss 6.6593 Accuracy 0.0202

Epoch 4 Training Loss 6.6312 Accuracy 0.0203
Time taken for 1 epoch: 33.924625873565674 secs

validating
Epoch 4 Validation Loss 6.5866 Accuracy 0.0227
Saving checkpoint for epoch 4 at ./checkpoints/trainbpe_2/ckpt-4


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 Batch 0 Training Loss 6.4798 Accuracy 0.0199
Epoch 5 Batch 50 Training Loss 6.4208 Accuracy 0.0219
Epoch 5 Batch 100 Training Loss 6.3721 Accuracy 0.0228

Epoch 5 Training Loss 6.3330 Accuracy 0.0232
Time taken for 1 epoch: 33.805588245391846 secs

validating
Epoch 5 Validation Loss 6.2512 Accuracy 0.0269
Saving checkpoint for epoch 5 at ./checkpoints/trainbpe_2/ckpt-5


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 6 Batch 0 Training Loss 6.0969 Accuracy 0.0258
Epoch 6 Batch 50 Training Loss 6.0823 Accuracy 0.0254
Epoch 6 Batch 100 Training Loss 6.0388 Accuracy 0.0258

Epoch 6 Training Loss 6.0100 Accuracy 0.0261
Time taken for 1 epoch: 33.77644062042236 secs

validating
Epoch 6 Validation Loss 5.9972 Accuracy 0.0291
Saving checkpoint for epoch 6 at ./checkpoints/trainbpe_2/ckpt-6


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 7 Batch 0 Training Loss 5.9800 Accuracy 0.0281
Epoch 7 Batch 50 Training Loss 5.8122 Accuracy 0.0279
Epoch 7 Batch 100 Training Loss 5.7730 Accuracy 0.0285

Epoch 7 Training Loss 5.7534 Accuracy 0.0288
Time taken for 1 epoch: 33.774372577667236 secs

validating
Epoch 7 Validation Loss 5.8184 Accuracy 0.0324
Saving checkpoint for epoch 7 at ./checkpoints/trainbpe_2/ckpt-7


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 8 Batch 0 Training Loss 5.6908 Accuracy 0.0253
Epoch 8 Batch 50 Training Loss 5.5810 Accuracy 0.0309
Epoch 8 Batch 100 Training Loss 5.5596 Accuracy 0.0312

Epoch 8 Training Loss 5.5469 Accuracy 0.0314
Time taken for 1 epoch: 33.721468448638916 secs

validating
Epoch 8 Validation Loss 5.6711 Accuracy 0.0348
Saving checkpoint for epoch 8 at ./checkpoints/trainbpe_2/ckpt-8


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 9 Batch 0 Training Loss 5.2787 Accuracy 0.0346
Epoch 9 Batch 50 Training Loss 5.3861 Accuracy 0.0335
Epoch 9 Batch 100 Training Loss 5.3613 Accuracy 0.0338

Epoch 9 Training Loss 5.3480 Accuracy 0.0340
Time taken for 1 epoch: 33.66844964027405 secs

validating
Epoch 9 Validation Loss 5.5088 Accuracy 0.0375
Saving checkpoint for epoch 9 at ./checkpoints/trainbpe_2/ckpt-9


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 10 Batch 0 Training Loss 5.1215 Accuracy 0.0357
Epoch 10 Batch 50 Training Loss 5.1941 Accuracy 0.0359
Epoch 10 Batch 100 Training Loss 5.1747 Accuracy 0.0364

Epoch 10 Training Loss 5.1627 Accuracy 0.0366
Time taken for 1 epoch: 33.61831307411194 secs

validating
Epoch 10 Validation Loss 5.3566 Accuracy 0.0399
Saving checkpoint for epoch 10 at ./checkpoints/trainbpe_2/ckpt-10


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 11 Batch 0 Training Loss 4.9946 Accuracy 0.0346
Epoch 11 Batch 50 Training Loss 5.0135 Accuracy 0.0386
Epoch 11 Batch 100 Training Loss 4.9989 Accuracy 0.0386

Epoch 11 Training Loss 4.9962 Accuracy 0.0388
Time taken for 1 epoch: 33.66914248466492 secs

validating
Epoch 11 Validation Loss 5.2579 Accuracy 0.0419
Saving checkpoint for epoch 11 at ./checkpoints/trainbpe_2/ckpt-11


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 12 Batch 0 Training Loss 4.9462 Accuracy 0.0419
Epoch 12 Batch 50 Training Loss 4.8480 Accuracy 0.0412
Epoch 12 Batch 100 Training Loss 4.8517 Accuracy 0.0409

Epoch 12 Training Loss 4.8428 Accuracy 0.0409
Time taken for 1 epoch: 33.628501176834106 secs

validating
Epoch 12 Validation Loss 5.1629 Accuracy 0.0431
Saving checkpoint for epoch 12 at ./checkpoints/trainbpe_2/ckpt-12


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 13 Batch 0 Training Loss 4.6854 Accuracy 0.0442
Epoch 13 Batch 50 Training Loss 4.7050 Accuracy 0.0423
Epoch 13 Batch 100 Training Loss 4.7060 Accuracy 0.0426

Epoch 13 Training Loss 4.7028 Accuracy 0.0426
Time taken for 1 epoch: 33.36833071708679 secs

validating
Epoch 13 Validation Loss 5.1305 Accuracy 0.0444
Saving checkpoint for epoch 13 at ./checkpoints/trainbpe_2/ckpt-13


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 14 Batch 0 Training Loss 4.4655 Accuracy 0.0397
Epoch 14 Batch 50 Training Loss 4.5343 Accuracy 0.0441
Epoch 14 Batch 100 Training Loss 4.5640 Accuracy 0.0440

Epoch 14 Training Loss 4.5729 Accuracy 0.0442
Time taken for 1 epoch: 33.66933536529541 secs

validating
Epoch 14 Validation Loss 5.0556 Accuracy 0.0454
Saving checkpoint for epoch 14 at ./checkpoints/trainbpe_2/ckpt-14


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 15 Batch 0 Training Loss 4.4289 Accuracy 0.0529
Epoch 15 Batch 50 Training Loss 4.4239 Accuracy 0.0457
Epoch 15 Batch 100 Training Loss 4.4364 Accuracy 0.0460

Epoch 15 Training Loss 4.4503 Accuracy 0.0458
Time taken for 1 epoch: 33.55754327774048 secs

validating
Epoch 15 Validation Loss 5.0134 Accuracy 0.0457
Saving checkpoint for epoch 15 at ./checkpoints/trainbpe_2/ckpt-15


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 16 Batch 0 Training Loss 4.1485 Accuracy 0.0537
Epoch 16 Batch 50 Training Loss 4.2796 Accuracy 0.0475
Epoch 16 Batch 100 Training Loss 4.3183 Accuracy 0.0475

Epoch 16 Training Loss 4.3319 Accuracy 0.0474
Time taken for 1 epoch: 33.41011953353882 secs

validating
Epoch 16 Validation Loss 4.9745 Accuracy 0.0460
Saving checkpoint for epoch 16 at ./checkpoints/trainbpe_2/ckpt-16


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 17 Batch 0 Training Loss 4.0757 Accuracy 0.0514
Epoch 17 Batch 50 Training Loss 4.1636 Accuracy 0.0489
Epoch 17 Batch 100 Training Loss 4.1960 Accuracy 0.0487

Epoch 17 Training Loss 4.2123 Accuracy 0.0485
Time taken for 1 epoch: 33.42615556716919 secs

validating
Epoch 17 Validation Loss 4.9885 Accuracy 0.0468


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 18 Batch 0 Training Loss 3.9035 Accuracy 0.0514
Epoch 18 Batch 50 Training Loss 4.0513 Accuracy 0.0512
Epoch 18 Batch 100 Training Loss 4.0808 Accuracy 0.0505

Epoch 18 Training Loss 4.0936 Accuracy 0.0503
Time taken for 1 epoch: 33.28453588485718 secs

validating
Epoch 18 Validation Loss 4.9569 Accuracy 0.0474
Saving checkpoint for epoch 18 at ./checkpoints/trainbpe_2/ckpt-17


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 19 Batch 0 Training Loss 3.7973 Accuracy 0.0512
Epoch 19 Batch 50 Training Loss 3.9216 Accuracy 0.0523
Epoch 19 Batch 100 Training Loss 3.9645 Accuracy 0.0520

Epoch 19 Training Loss 3.9801 Accuracy 0.0518
Time taken for 1 epoch: 33.6994731426239 secs

validating
Epoch 19 Validation Loss 4.9450 Accuracy 0.0482
Saving checkpoint for epoch 19 at ./checkpoints/trainbpe_2/ckpt-18


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 20 Batch 0 Training Loss 3.7706 Accuracy 0.0547
Epoch 20 Batch 50 Training Loss 3.8085 Accuracy 0.0537
Epoch 20 Batch 100 Training Loss 3.8384 Accuracy 0.0537

Epoch 20 Training Loss 3.8582 Accuracy 0.0535
Time taken for 1 epoch: 33.39351439476013 secs

validating
Epoch 20 Validation Loss 4.9558 Accuracy 0.0486


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 21 Batch 0 Training Loss 3.5587 Accuracy 0.0544
Epoch 21 Batch 50 Training Loss 3.6654 Accuracy 0.0568
Epoch 21 Batch 100 Training Loss 3.7131 Accuracy 0.0560

Epoch 21 Training Loss 3.7365 Accuracy 0.0553
Time taken for 1 epoch: 33.305376291275024 secs

validating
Epoch 21 Validation Loss 4.9837 Accuracy 0.0477


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 22 Batch 0 Training Loss 3.4806 Accuracy 0.0624
Epoch 22 Batch 50 Training Loss 3.5425 Accuracy 0.0584
Epoch 22 Batch 100 Training Loss 3.5871 Accuracy 0.0578

Epoch 22 Training Loss 3.6143 Accuracy 0.0572
Time taken for 1 epoch: 33.240694522857666 secs

validating
Epoch 22 Validation Loss 5.0099 Accuracy 0.0483


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 23 Batch 0 Training Loss 3.4536 Accuracy 0.0595
Epoch 23 Batch 50 Training Loss 3.4108 Accuracy 0.0611
Epoch 23 Batch 100 Training Loss 3.4508 Accuracy 0.0601

Epoch 23 Training Loss 3.4867 Accuracy 0.0594
Time taken for 1 epoch: 33.35658407211304 secs

validating
Epoch 23 Validation Loss 5.0238 Accuracy 0.0482


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 24 Batch 0 Training Loss 3.2378 Accuracy 0.0683
Epoch 24 Batch 50 Training Loss 3.2851 Accuracy 0.0638
Epoch 24 Batch 100 Training Loss 3.3279 Accuracy 0.0625

Epoch 24 Training Loss 3.3651 Accuracy 0.0618
Time taken for 1 epoch: 33.311408281326294 secs

validating
Epoch 24 Validation Loss 5.0509 Accuracy 0.0485


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 25 Batch 0 Training Loss 3.0973 Accuracy 0.0762
Epoch 25 Batch 50 Training Loss 3.1342 Accuracy 0.0661
Epoch 25 Batch 100 Training Loss 3.1989 Accuracy 0.0653

Epoch 25 Training Loss 3.2311 Accuracy 0.0646
Time taken for 1 epoch: 33.20367121696472 secs

validating
Epoch 25 Validation Loss 5.1157 Accuracy 0.0482


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 26 Batch 0 Training Loss 2.9715 Accuracy 0.0646
Epoch 26 Batch 50 Training Loss 3.0024 Accuracy 0.0699
Epoch 26 Batch 100 Training Loss 3.0652 Accuracy 0.0683

Epoch 26 Training Loss 3.1010 Accuracy 0.0674
Time taken for 1 epoch: 33.230204582214355 secs

validating
Epoch 26 Validation Loss 5.1579 Accuracy 0.0488


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 27 Batch 0 Training Loss 2.7565 Accuracy 0.0770
Epoch 27 Batch 50 Training Loss 2.8497 Accuracy 0.0738
Epoch 27 Batch 100 Training Loss 2.9248 Accuracy 0.0716

Epoch 27 Training Loss 2.9687 Accuracy 0.0707
Time taken for 1 epoch: 33.292380809783936 secs

validating
Epoch 27 Validation Loss 5.2175 Accuracy 0.0477


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 28 Batch 0 Training Loss 2.8273 Accuracy 0.0757
Epoch 28 Batch 50 Training Loss 2.7364 Accuracy 0.0773
Epoch 28 Batch 100 Training Loss 2.7985 Accuracy 0.0750

Epoch 28 Training Loss 2.8371 Accuracy 0.0739
Time taken for 1 epoch: 33.29198622703552 secs

validating
Epoch 28 Validation Loss 5.3188 Accuracy 0.0483


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 29 Batch 0 Training Loss 2.6254 Accuracy 0.0854
Epoch 29 Batch 50 Training Loss 2.6060 Accuracy 0.0800
Epoch 29 Batch 100 Training Loss 2.6645 Accuracy 0.0786

Epoch 29 Training Loss 2.7131 Accuracy 0.0775
Time taken for 1 epoch: 33.28731417655945 secs

validating
Epoch 29 Validation Loss 5.3720 Accuracy 0.0481


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 30 Batch 0 Training Loss 2.3880 Accuracy 0.0876
Epoch 30 Batch 50 Training Loss 2.4703 Accuracy 0.0849
Epoch 30 Batch 100 Training Loss 2.5391 Accuracy 0.0825

Epoch 30 Training Loss 2.5841 Accuracy 0.0809
Time taken for 1 epoch: 33.28516411781311 secs

validating
Epoch 30 Validation Loss 5.4708 Accuracy 0.0476


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 31 Batch 0 Training Loss 2.2382 Accuracy 0.0910
Epoch 31 Batch 50 Training Loss 2.3263 Accuracy 0.0892
Epoch 31 Batch 100 Training Loss 2.3953 Accuracy 0.0871

Epoch 31 Training Loss 2.4429 Accuracy 0.0854
Time taken for 1 epoch: 33.23005557060242 secs

validating


KeyboardInterrupt: ignored

Evaluate best model

In [25]:
# load model

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
  ckpt.restore(ckpt_manager.latest_checkpoint)
  print ('Latest checkpoint restored!!')

Latest checkpoint restored!!


In [26]:
transformer

<transformer.Transformer at 0x7f33712a78d0>

In [27]:
val_loss.reset_states()
val_accuracy.reset_states()
  
for (batch, (inp, tar)) in enumerate(tensor_val):
  val_step(inp, tar)
  
print ('Validation Loss {:.4f} Accuracy {:.4f}'.format(
                                          val_loss.result(), 
                                          val_accuracy.result()))
  

Validation Loss 4.9450 Accuracy 0.0482


In [0]:
def generate_predictions(inp_sentences):

  if len(inp_sentences.get_shape())==1:
    encoder_input = tf.expand_dims(inp_sentences, 0)
    decoder_input = [french_word2id["<start>"]]
    output = tf.expand_dims(decoder_input, 0)

  else:
    encoder_input = inp_sentences
    decoder_input = [french_word2id["<start>"]]*inp_sentences.get_shape()[0]
    output = tf.expand_dims(decoder_input, -1)


  # encoder_input = tf.expand_dims(inp_sentence, 0)
  
  # decoder_input = [french_word2id["<start>"]]
  # output = tf.expand_dims(decoder_input, 0)
    
  for i in range(pe_target):
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        encoder_input, output)
  
    # predictions.shape == (batch_size, seq_len, vocab_size)
    predictions, attention_weights = transformer(encoder_input, 
                                                 output,
                                                 False,
                                                 enc_padding_mask,
                                                 combined_mask,
                                                 dec_padding_mask)
    
    # select the last word from the seq_len dimension
    predictions = predictions[: ,-1:, :]  # (batch_size, 1, vocab_size)

    predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
    
    # # return the result if the predicted_id is equal to the end token
    # if predicted_id == french_word2id["<eos>"]:
    #   return tf.squeeze(output, axis=0), attention_weights
    
    # concatentate the predicted_id to the output which is given to the decoder
    # as its input.
    output = tf.concat([output, predicted_id], axis=-1)

  # return tf.squeeze(output, axis=0), attention_weights
  return output, attention_weights

In [29]:
all_preds = []
for (batch_i, (inp, tar)) in tqdm(enumerate(tensor_val)):

  preds, attention = generate_predictions(inp)
  all_preds.append(preds)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [30]:
translated_sentences = []

for k in tqdm(all_preds):
  for i in k:
    sentence_french = []
    for j in i.numpy()[1:]:
      if j==0 or j==french_word2id["<eos>"]:
        break
      sentence_french.append(french_id2word[j])

    sentence_french = " ".join(sentence_french)

    translated_sentences.append(sentence_french)

translated_sentences = "\n".join(translated_sentences)

with open("predictions.txt","w") as f:
  f.write(translated_sentences)

HBox(children=(IntProgress(value=0, max=35), HTML(value='')))


