In [1]:
from pathlib import Path
import os
from collections import Counter
import numpy as np
import time
from tqdm.notebook import tqdm

np.random.seed(8080)

data_path = Path("/content/drive/My Drive/Adv Projects in ML/data")
print(data_path)
print(os.listdir(data_path))

os.chdir("/content/drive/My Drive/Adv Projects in ML/")

!nvidia-smi

/content/drive/My Drive/Adv Projects in ML/data
['train.lang2', 'unaligned.en', 'unaligned.fr', 'train.lang1', 'split_train.lang1', 'split_val.lang2', 'split_train.lang2', 'split_val.lang1', 'unalignedtry.en', 'split_train_unaligned_tokenized_rempunc.en', 'split_val_unaligned_tokenized_rempunc.en', 'split_train_unaligned_tokenized.en', 'split_val_unaligned_tokenized.en', 'split_train_unaligned_tokenized_rempunc.fr', 'split_val_unaligned_tokenized_rempunc.fr', 'split_train_unaligned_tokenized.fr', 'split_val_unaligned_tokenized.fr', 'bpe', 'unaligned_tokenized_rempunc.en']
Wed Apr 15 06:05:48 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Co

In [2]:
%tensorflow_version 2.x
import tensorflow as tf
from transformer import Transformer, CustomSchedule, create_masks

print("Tensorflow version " + tf.__version__)

tf.random.set_seed(8080)
# make sure numpy seeded

Tensorflow version 2.2.0-rc2


In [3]:
# read data
with open(data_path/"split_train.lang1","r") as f:
    english = f.read()
print(len(english.split("\n")), english[:200])
    
with open(data_path/"split_train.lang2","r") as f:
    french = f.read()
print(len(french.split("\n")), french[:200])

with open(data_path/"split_val.lang1","r") as f:
    english_val = f.read()
print(len(english_val.split("\n")), english_val[:200])

with open(data_path/"split_val.lang2","r") as f:
    french_val = f.read()
print(len(french_val.split("\n")), french_val[:200])

with open("predictions_english_st.txt","r") as f:
    french_st = f.read().strip()
print(len(french_st.split("\n")), french_st[:200])

with open(data_path/"unaligned.en","r") as f:
    english_monolingual = f.read().strip()
# english_monolingual = english_monolingual.split("\n")[:len(french_st.split("\n"))]
print(len(english_monolingual.split("\n")), english_monolingual[:200])

# create vocab
english_vocab = list(set(english.replace("\n", " <eos> ").split()))
french_vocab = list(set(french.replace("\n", " <eos> ").split()))
len(english_vocab), len(french_vocab)

english_counter = Counter(english.replace("\n", " <eos> ").split())
french_counter = Counter(french.replace("\n", " <eos> ").split())
len(english_counter), len(french_counter)

english_counter.update({"<unk>":0})
french_counter.update({"<unk>":0})
english_counter.update({"<start>":0})
french_counter.update({"<start>":0})
len(english_counter), len(french_counter)

english_vocab = list(english_counter.keys())
french_vocab = list(french_counter.keys())

# # trim vocab to 10k+2, 12k+2
# english_vocab = ["<start>","<unk>"]
# for i in english_counter.most_common(10000):
#   english_vocab.append(i[0])
# french_vocab = ["<start>","<unk>"]
# for i in french_counter.most_common(12000):
#   french_vocab.append(i[0])

english_word2id = {}
english_id2word = {}
french_word2id = {}
french_id2word = {}

# start enumerate from 1 so that 0 is reserved for padding seqs 
for i, w in enumerate(english_vocab, start=1):
  english_word2id[w] = i
  english_id2word[i] = w

for i, w in enumerate(french_vocab, start=1):
  french_word2id[w] = i
  french_id2word[i] = w

len(english_word2id), len(english_id2word), len(french_word2id), len(french_id2word)

def transform_data(english_lang1, french_lang2):
  english_lines = english_lang1.split("\n")
  french_lines = french_lang2.split("\n")

  data_english = []
  data_french = []

  for line in english_lines:
    line2id = [english_word2id["<start>"]]
    for word in line.split():
      try:
        line2id.append(english_word2id[word])
      except:
        line2id.append(english_word2id["<unk>"])
    line2id.append(english_word2id["<eos>"])
    data_english.append(line2id)

  for line in french_lines:
    line2id = [french_word2id["<start>"]]
    for word in line.split():
      try:
        line2id.append(french_word2id[word])
      except:
        line2id.append(french_word2id["<unk>"])
    line2id.append(french_word2id["<eos>"])
    data_french.append(line2id)

  print(len(data_english), len(data_french))
  return data_english, data_french

data_english, data_french = transform_data(english, french)
data_english_val, data_french_val = transform_data(english_val, french_val)

data_english_monolingual, data_french_st = transform_data(english_monolingual, french_st)
data_english_monolingual = data_english_monolingual[:len(data_french_st)]

english_id2word[54], len(data_english), len(data_french), len(data_english_val), len(data_french_val), len(data_english_monolingual), len(data_french_st)

8800 as mr de castro is not present mr le foll who is replacing mr de castro has the floor
on the other hand if you 're visiting an underdeveloped country and 25 dollars buys you a gourmet meal it 's exorb
8800 Comme M. De Castro est absent , M. Le Foll , qui le remplace , a la parole .
D' un autre côté , si vous êtes dans un pays en voie de développement , où 25 dollars peuvent vous obtenir un repas de luxe
2200 what action does the council intend to take in the face of this seriously discriminatory attitude which runs contrary to the principles of the eu
where would you like to go next
if that were not enoug
2200 Quelles mesures le Conseil compte-t-il adopter face à cette attitude qui constitue une grave discrimination et est contraire aux principes sur lesquels l' Union européenne est fondée ?
Où souhaiteriez
200000 Pour la deuxième fois , nous avons des frais d’ être prises par rapport à des petites entreprises et que c' est vrai .
La dernière séance , j' ai été dit que j' étais le

('gas', 8800, 8800, 2200, 2200, 200000, 200000)

In [0]:
np.savez("data_and_vocab_st.npz", data_english=data_english, data_french=data_french, data_english_val=data_english_val, data_french_val=data_french_val, 
         data_english_monolingual=data_english_monolingual, data_french_st=data_french_st,
         english_word2id=english_word2id, english_id2word=english_id2word, french_word2id=french_word2id,french_id2word=french_id2word)

In [5]:
BUFFER_SIZE = len(data_english)
BATCH_SIZE = 64
EPOCHS = 5
print("No. of batches: ", np.ceil(len(data_english_monolingual)/BATCH_SIZE))
print("No. of batches: ", np.ceil(len(data_english)/BATCH_SIZE))
repeat_factor = len(data_english_monolingual) // len(data_english) + 1
print(repeat_factor)

# transformer hyperparams
num_layers = 1
d_model = 256
dff = 256
num_heads = 8
input_vocab_size = len(english_vocab) + 1
target_vocab_size = len(french_vocab) + 1
dropout_rate = 0.4
p_wd_st = 0.3
pe_input = max(max([len(i) for i in data_english]),max([len(i) for i in data_english_val]),max([len(i) for i in data_english_monolingual]))
pe_target = max(max([len(i) for i in data_french]),max([len(i) for i in data_french_val]),max([len(i) for i in data_french_st]))

# pe_input = 200
# pe_target = 230
pe_input, pe_target

No. of batches:  3125.0
No. of batches:  138.0
23


(112, 222)

In [0]:
tensor_train = tf.data.Dataset.from_tensor_slices((
    tf.keras.preprocessing.sequence.pad_sequences(data_english, padding='post'),
    tf.keras.preprocessing.sequence.pad_sequences(data_french, padding='post')
)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=False)
tensor_val = tf.data.Dataset.from_tensor_slices((
    tf.keras.preprocessing.sequence.pad_sequences(data_english_val, padding='post'),
    tf.keras.preprocessing.sequence.pad_sequences(data_french_val, padding='post')
)).batch(BATCH_SIZE, drop_remainder=False)

In [0]:
class DatasetGenerator_ST(tf.data.Dataset):
  def _generator():
    inp_pad = tf.keras.preprocessing.sequence.pad_sequences(data_english_monolingual, padding='post').shape[1]
    tar_pad = tf.keras.preprocessing.sequence.pad_sequences(data_french_st, padding='post').shape[1]
    indexes = np.arange(len(data_french_st))
    np.random.shuffle(indexes)
    data1 = np.array(data_english_monolingual)[indexes]
    data2 = np.array(data_french_st)[indexes]
    for i in range(len(data2)):
      tar = data2[i]
      tar = np.pad(tar, (0,tar_pad-len(tar)))
      aug = data1[i]
      if np.random.choice(['drop','swap']) == 'drop':
        drop_idxs = np.random.binomial(1,0.3,len(aug))
        drop_idxs = np.where(drop_idxs==1)
        aug = np.delete(aug,drop_idxs)
      else:
        swap_idx = np.random.choice(np.arange(1,len(aug)))
        tmp = aug[swap_idx]
        aug[swap_idx] = aug[swap_idx-1]
        aug[swap_idx-1] = tmp
      aug = np.pad(aug, (0,inp_pad-len(aug)))
      yield aug, tar

  def __new__(cls):
      return tf.data.Dataset.from_generator(
          cls._generator,
          output_types=(tf.dtypes.int32,tf.dtypes.int32),
          output_shapes=(None,None)
      )
tensor_st = DatasetGenerator_ST().batch(BATCH_SIZE, drop_remainder=False).prefetch(tf.data.experimental.AUTOTUNE)


In [8]:
transformer = Transformer(
    num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, 
    input_vocab_size=input_vocab_size, target_vocab_size=target_vocab_size, 
    pe_input=pe_input, pe_target=pe_target, rate=dropout_rate)

temp_input = tf.random.uniform((BATCH_SIZE, pe_input), dtype=tf.int64, minval=0, maxval=200)
temp_target = tf.random.uniform((BATCH_SIZE, pe_target), dtype=tf.int64, minval=0, maxval=200)

fn_out, _ = transformer(temp_input, temp_target, training=False, 
                               enc_padding_mask=None, 
                               look_ahead_mask=None,
                               dec_padding_mask=None)

fn_out.shape  # (batch_size, tar_seq_len, target_vocab_size)

TensorShape([64, 222, 16315])

In [9]:
transformer.summary()

Model: "transformer"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder (Encoder)            multiple                  3560704   
_________________________________________________________________
decoder (Decoder)            multiple                  4836096   
_________________________________________________________________
dense_16 (Dense)             multiple                  4192955   
Total params: 12,589,755
Trainable params: 12,589,755
Non-trainable params: 0
_________________________________________________________________


In [0]:
learning_rate = CustomSchedule(d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, 
                                     epsilon=1e-9)

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

train_loss = tf.keras.metrics.Mean(name='loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='train_accuracy')

val_loss = tf.keras.metrics.Mean(name='loss')
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='val_accuracy')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask
  
  return tf.reduce_sum(loss_)/tf.reduce_sum(mask)

In [0]:
experiment_number = "st_3_"

checkpoint_path = "./checkpoints/train"+experiment_number

ckpt = tf.train.Checkpoint(transformer=transformer,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=3)

writer_train = tf.summary.create_file_writer("log_dir/"+experiment_number+"_train")
writer_val = tf.summary.create_file_writer("log_dir/"+experiment_number+"_val")

# # if a checkpoint exists, restore the latest checkpoint.
# if ckpt_manager.latest_checkpoint:
#   ckpt.restore(ckpt_manager.latest_checkpoint)
#   print ('Latest checkpoint restored!!')

In [0]:
# The @tf.function trace-compiles train_step into a TF graph for faster
# execution. The function specializes to the precise shape of the argument
# tensors. To avoid re-tracing due to the variable sequence lengths or variable
# batch sizes (the last batch is smaller), use input_signature to specify
# more generic shapes.

train_step_signature = [
    tf.TensorSpec(shape=(None, None), dtype=tf.int32),
    tf.TensorSpec(shape=(None, None), dtype=tf.int32),
]

@tf.function(input_signature=train_step_signature)
def train_step(inp, tar):
  
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  
  enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)

  with tf.GradientTape() as tape:
    predictions, _ = transformer(inp, tar_inp, 
                                 True, 
                                 enc_padding_mask, 
                                 combined_mask, 
                                 dec_padding_mask)
    loss = loss_function(tar_real, predictions)

  gradients = tape.gradient(loss, transformer.trainable_variables)
  optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
  
  train_loss(loss)
  train_accuracy(tar_real, predictions)

@tf.function(input_signature=train_step_signature)
def val_step(inp, tar):
  
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  
  enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)

  predictions, _ = transformer(inp, tar_inp, 
                                False, 
                                enc_padding_mask, 
                                combined_mask, 
                                dec_padding_mask)
  loss = loss_function(tar_real, predictions)
  
  val_loss(loss)
  val_accuracy(tar_real, predictions)


In [13]:
best_val_loss = np.inf

for epoch in range(EPOCHS):
  start = time.time()

  train_loss.reset_states()
  train_accuracy.reset_states()

  print("training ST data")
  tensor_st = DatasetGenerator_ST().batch(BATCH_SIZE, drop_remainder=False).prefetch(tf.data.experimental.AUTOTUNE)
  for (batch, (inp, tar)) in tqdm(enumerate(tensor_st), total=len(data_english_monolingual)//BATCH_SIZE+1):
    train_step(inp, tar)
    if batch % 100 == 0:
      print ('Epoch {} Batch {} Training Loss {:.4f} Accuracy {:.4f}'.format(
          epoch + 1, batch, train_loss.result(), train_accuracy.result()))

  for iteration_i in range(repeat_factor):

    train_loss.reset_states()
    train_accuracy.reset_states()
    
    val_loss.reset_states()
    val_accuracy.reset_states()

    print("training Parallel data")

    for (batch, (inp, tar)) in tqdm(enumerate(tensor_train)):
      train_step(inp, tar)
      if batch % 50 == 0:
        print ('Epoch {} iteration_i {} Batch {} Training Loss {:.4f} Accuracy {:.4f}'.format(
            epoch + 1, iteration_i, batch, train_loss.result(), train_accuracy.result()))
      
    print ('Epoch {} iteration_i {} Training Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1, 
                                                  iteration_i,
                                                  train_loss.result(), 
                                                  train_accuracy.result()))
    print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

    with writer_train.as_default():
      tf.summary.scalar('train_loss', train_loss.result(), step=epoch)

    print("validating")
    for (batch, (inp, tar)) in enumerate(tensor_val):
      val_step(inp, tar)
    
    print ('Epoch {} iteration_i {} Validation Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1,
                                              iteration_i,
                                              val_loss.result(), 
                                              val_accuracy.result()))
    if best_val_loss > val_loss.result():
      best_val_loss = val_loss.result()
      ckpt_save_path = ckpt_manager.save()
      print ('Saving checkpoint for epoch {} iteration_i {} at {}'.format(epoch+1,
                                                          iteration_i,
                                                          ckpt_save_path))
    with writer_val.as_default():
      tf.summary.scalar('val_loss', val_loss.result(), step=epoch)

training ST data


HBox(children=(IntProgress(value=0, max=3126), HTML(value='')))

Epoch 1 Batch 0 Training Loss 9.7294 Accuracy 0.0000
Epoch 1 Batch 100 Training Loss 9.5965 Accuracy 0.0005
Epoch 1 Batch 200 Training Loss 9.1716 Accuracy 0.0022
Epoch 1 Batch 300 Training Loss 8.5456 Accuracy 0.0030
Epoch 1 Batch 400 Training Loss 7.9554 Accuracy 0.0035
Epoch 1 Batch 500 Training Loss 7.5317 Accuracy 0.0047
Epoch 1 Batch 600 Training Loss 7.1994 Accuracy 0.0062
Epoch 1 Batch 700 Training Loss 6.8948 Accuracy 0.0079
Epoch 1 Batch 800 Training Loss 6.6048 Accuracy 0.0097
Epoch 1 Batch 900 Training Loss 6.3348 Accuracy 0.0115
Epoch 1 Batch 1000 Training Loss 6.0867 Accuracy 0.0132
Epoch 1 Batch 1100 Training Loss 5.8643 Accuracy 0.0148
Epoch 1 Batch 1200 Training Loss 5.6647 Accuracy 0.0162
Epoch 1 Batch 1300 Training Loss 5.4850 Accuracy 0.0176
Epoch 1 Batch 1400 Training Loss 5.3239 Accuracy 0.0188
Epoch 1 Batch 1500 Training Loss 5.1778 Accuracy 0.0199
Epoch 1 Batch 1600 Training Loss 5.0457 Accuracy 0.0210
Epoch 1 Batch 1700 Training Loss 4.9247 Accuracy 0.0220
Epoc

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 0 Batch 0 Training Loss 6.2153 Accuracy 0.0558
Epoch 1 iteration_i 0 Batch 50 Training Loss 4.9058 Accuracy 0.0594
Epoch 1 iteration_i 0 Batch 100 Training Loss 4.7728 Accuracy 0.0611

Epoch 1 iteration_i 0 Training Loss 4.7054 Accuracy 0.0615
Time taken for 1 epoch: 765.4688374996185 secs

validating
Epoch 1 iteration_i 0 Validation Loss 4.5016 Accuracy 0.0685
Saving checkpoint for epoch 1 iteration_i 0 at ./checkpoints/trainst_3_/ckpt-1
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 1 Batch 0 Training Loss 4.1501 Accuracy 0.0769
Epoch 1 iteration_i 1 Batch 50 Training Loss 4.1392 Accuracy 0.0680
Epoch 1 iteration_i 1 Batch 100 Training Loss 4.1260 Accuracy 0.0682

Epoch 1 iteration_i 1 Training Loss 4.1224 Accuracy 0.0687
Time taken for 1 epoch: 791.8378891944885 secs

validating
Epoch 1 iteration_i 1 Validation Loss 4.4314 Accuracy 0.0715
Saving checkpoint for epoch 1 iteration_i 1 at ./checkpoints/trainst_3_/ckpt-2
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 2 Batch 0 Training Loss 3.5178 Accuracy 0.0706
Epoch 1 iteration_i 2 Batch 50 Training Loss 3.7842 Accuracy 0.0736
Epoch 1 iteration_i 2 Batch 100 Training Loss 3.7886 Accuracy 0.0736

Epoch 1 iteration_i 2 Training Loss 3.8003 Accuracy 0.0739
Time taken for 1 epoch: 817.3789081573486 secs

validating
Epoch 1 iteration_i 2 Validation Loss 4.4193 Accuracy 0.0739
Saving checkpoint for epoch 1 iteration_i 2 at ./checkpoints/trainst_3_/ckpt-3
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 3 Batch 0 Training Loss 3.4158 Accuracy 0.0831
Epoch 1 iteration_i 3 Batch 50 Training Loss 3.5193 Accuracy 0.0789
Epoch 1 iteration_i 3 Batch 100 Training Loss 3.5330 Accuracy 0.0788

Epoch 1 iteration_i 3 Training Loss 3.5313 Accuracy 0.0788
Time taken for 1 epoch: 842.8950083255768 secs

validating
Epoch 1 iteration_i 3 Validation Loss 4.4198 Accuracy 0.0754
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 4 Batch 0 Training Loss 3.2091 Accuracy 0.0831
Epoch 1 iteration_i 4 Batch 50 Training Loss 3.2530 Accuracy 0.0833
Epoch 1 iteration_i 4 Batch 100 Training Loss 3.2851 Accuracy 0.0827

Epoch 1 iteration_i 4 Training Loss 3.2976 Accuracy 0.0831
Time taken for 1 epoch: 867.8197505474091 secs

validating
Epoch 1 iteration_i 4 Validation Loss 4.4517 Accuracy 0.0765
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 5 Batch 0 Training Loss 2.9852 Accuracy 0.0801
Epoch 1 iteration_i 5 Batch 50 Training Loss 3.0078 Accuracy 0.0887
Epoch 1 iteration_i 5 Batch 100 Training Loss 3.0437 Accuracy 0.0886

Epoch 1 iteration_i 5 Training Loss 3.0783 Accuracy 0.0878
Time taken for 1 epoch: 892.728922367096 secs

validating
Epoch 1 iteration_i 5 Validation Loss 4.5178 Accuracy 0.0769
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 6 Batch 0 Training Loss 2.7517 Accuracy 0.0932
Epoch 1 iteration_i 6 Batch 50 Training Loss 2.8088 Accuracy 0.0940
Epoch 1 iteration_i 6 Batch 100 Training Loss 2.8518 Accuracy 0.0927

Epoch 1 iteration_i 6 Training Loss 2.8783 Accuracy 0.0922
Time taken for 1 epoch: 917.576699256897 secs

validating
Epoch 1 iteration_i 6 Validation Loss 4.5930 Accuracy 0.0779
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 7 Batch 0 Training Loss 2.5240 Accuracy 0.0995
Epoch 1 iteration_i 7 Batch 50 Training Loss 2.5966 Accuracy 0.0990
Epoch 1 iteration_i 7 Batch 100 Training Loss 2.6506 Accuracy 0.0973

Epoch 1 iteration_i 7 Training Loss 2.6843 Accuracy 0.0967
Time taken for 1 epoch: 942.4697165489197 secs

validating
Epoch 1 iteration_i 7 Validation Loss 4.6581 Accuracy 0.0786
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 8 Batch 0 Training Loss 2.2670 Accuracy 0.1102
Epoch 1 iteration_i 8 Batch 50 Training Loss 2.4228 Accuracy 0.1036
Epoch 1 iteration_i 8 Batch 100 Training Loss 2.4720 Accuracy 0.1022

Epoch 1 iteration_i 8 Training Loss 2.5074 Accuracy 0.1011
Time taken for 1 epoch: 967.3429026603699 secs

validating
Epoch 1 iteration_i 8 Validation Loss 4.7117 Accuracy 0.0793
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 9 Batch 0 Training Loss 2.2932 Accuracy 0.1091
Epoch 1 iteration_i 9 Batch 50 Training Loss 2.2832 Accuracy 0.1082
Epoch 1 iteration_i 9 Batch 100 Training Loss 2.3202 Accuracy 0.1069

Epoch 1 iteration_i 9 Training Loss 2.3537 Accuracy 0.1054
Time taken for 1 epoch: 992.1935932636261 secs

validating
Epoch 1 iteration_i 9 Validation Loss 4.8113 Accuracy 0.0797
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 10 Batch 0 Training Loss 2.0559 Accuracy 0.1147
Epoch 1 iteration_i 10 Batch 50 Training Loss 2.1428 Accuracy 0.1121
Epoch 1 iteration_i 10 Batch 100 Training Loss 2.1847 Accuracy 0.1111

Epoch 1 iteration_i 10 Training Loss 2.2131 Accuracy 0.1094
Time taken for 1 epoch: 1017.0388584136963 secs

validating
Epoch 1 iteration_i 10 Validation Loss 4.8577 Accuracy 0.0797
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 11 Batch 0 Training Loss 2.0050 Accuracy 0.1304
Epoch 1 iteration_i 11 Batch 50 Training Loss 1.9989 Accuracy 0.1168
Epoch 1 iteration_i 11 Batch 100 Training Loss 2.0497 Accuracy 0.1147

Epoch 1 iteration_i 11 Training Loss 2.0820 Accuracy 0.1135
Time taken for 1 epoch: 1041.9408628940582 secs

validating
Epoch 1 iteration_i 11 Validation Loss 4.9647 Accuracy 0.0795
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 12 Batch 0 Training Loss 1.9102 Accuracy 0.1311
Epoch 1 iteration_i 12 Batch 50 Training Loss 1.8947 Accuracy 0.1208
Epoch 1 iteration_i 12 Batch 100 Training Loss 1.9346 Accuracy 0.1181

Epoch 1 iteration_i 12 Training Loss 1.9620 Accuracy 0.1174
Time taken for 1 epoch: 1066.8108081817627 secs

validating
Epoch 1 iteration_i 12 Validation Loss 5.0257 Accuracy 0.0802
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 13 Batch 0 Training Loss 1.7644 Accuracy 0.1357
Epoch 1 iteration_i 13 Batch 50 Training Loss 1.7736 Accuracy 0.1246
Epoch 1 iteration_i 13 Batch 100 Training Loss 1.8325 Accuracy 0.1224

Epoch 1 iteration_i 13 Training Loss 1.8614 Accuracy 0.1212
Time taken for 1 epoch: 1091.6171414852142 secs

validating
Epoch 1 iteration_i 13 Validation Loss 5.1184 Accuracy 0.0804
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 14 Batch 0 Training Loss 1.7247 Accuracy 0.1440
Epoch 1 iteration_i 14 Batch 50 Training Loss 1.6820 Accuracy 0.1299
Epoch 1 iteration_i 14 Batch 100 Training Loss 1.7365 Accuracy 0.1261

Epoch 1 iteration_i 14 Training Loss 1.7743 Accuracy 0.1242
Time taken for 1 epoch: 1116.4698462486267 secs

validating
Epoch 1 iteration_i 14 Validation Loss 5.1541 Accuracy 0.0796
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 15 Batch 0 Training Loss 1.5548 Accuracy 0.1376
Epoch 1 iteration_i 15 Batch 50 Training Loss 1.6228 Accuracy 0.1311
Epoch 1 iteration_i 15 Batch 100 Training Loss 1.6600 Accuracy 0.1281

Epoch 1 iteration_i 15 Training Loss 1.6894 Accuracy 0.1271
Time taken for 1 epoch: 1141.3282525539398 secs

validating
Epoch 1 iteration_i 15 Validation Loss 5.2414 Accuracy 0.0802
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 16 Batch 0 Training Loss 1.4542 Accuracy 0.1362
Epoch 1 iteration_i 16 Batch 50 Training Loss 1.5256 Accuracy 0.1336
Epoch 1 iteration_i 16 Batch 100 Training Loss 1.5767 Accuracy 0.1311

Epoch 1 iteration_i 16 Training Loss 1.6117 Accuracy 0.1303
Time taken for 1 epoch: 1166.2549858093262 secs

validating
Epoch 1 iteration_i 16 Validation Loss 5.2980 Accuracy 0.0801
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 17 Batch 0 Training Loss 1.3537 Accuracy 0.1496
Epoch 1 iteration_i 17 Batch 50 Training Loss 1.4752 Accuracy 0.1381
Epoch 1 iteration_i 17 Batch 100 Training Loss 1.5117 Accuracy 0.1345

Epoch 1 iteration_i 17 Training Loss 1.5396 Accuracy 0.1333
Time taken for 1 epoch: 1191.157592535019 secs

validating
Epoch 1 iteration_i 17 Validation Loss 5.3746 Accuracy 0.0796
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 18 Batch 0 Training Loss 1.2976 Accuracy 0.1558
Epoch 1 iteration_i 18 Batch 50 Training Loss 1.4032 Accuracy 0.1402
Epoch 1 iteration_i 18 Batch 100 Training Loss 1.4462 Accuracy 0.1370

Epoch 1 iteration_i 18 Training Loss 1.4810 Accuracy 0.1356
Time taken for 1 epoch: 1215.9778671264648 secs

validating
Epoch 1 iteration_i 18 Validation Loss 5.4383 Accuracy 0.0800
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 19 Batch 0 Training Loss 1.2728 Accuracy 0.1373
Epoch 1 iteration_i 19 Batch 50 Training Loss 1.3488 Accuracy 0.1404
Epoch 1 iteration_i 19 Batch 100 Training Loss 1.3892 Accuracy 0.1397

Epoch 1 iteration_i 19 Training Loss 1.4219 Accuracy 0.1380
Time taken for 1 epoch: 1240.9304745197296 secs

validating
Epoch 1 iteration_i 19 Validation Loss 5.4881 Accuracy 0.0799
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 20 Batch 0 Training Loss 1.2209 Accuracy 0.1339
Epoch 1 iteration_i 20 Batch 50 Training Loss 1.2939 Accuracy 0.1431
Epoch 1 iteration_i 20 Batch 100 Training Loss 1.3404 Accuracy 0.1421

Epoch 1 iteration_i 20 Training Loss 1.3683 Accuracy 0.1403
Time taken for 1 epoch: 1265.8498067855835 secs

validating
Epoch 1 iteration_i 20 Validation Loss 5.5692 Accuracy 0.0802
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 21 Batch 0 Training Loss 1.1979 Accuracy 0.1489
Epoch 1 iteration_i 21 Batch 50 Training Loss 1.2500 Accuracy 0.1456
Epoch 1 iteration_i 21 Batch 100 Training Loss 1.2889 Accuracy 0.1436

Epoch 1 iteration_i 21 Training Loss 1.3195 Accuracy 0.1423
Time taken for 1 epoch: 1290.7067866325378 secs

validating
Epoch 1 iteration_i 21 Validation Loss 5.6144 Accuracy 0.0802
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 1 iteration_i 22 Batch 0 Training Loss 1.1424 Accuracy 0.1401
Epoch 1 iteration_i 22 Batch 50 Training Loss 1.2110 Accuracy 0.1471
Epoch 1 iteration_i 22 Batch 100 Training Loss 1.2462 Accuracy 0.1451

Epoch 1 iteration_i 22 Training Loss 1.2785 Accuracy 0.1441
Time taken for 1 epoch: 1315.5914611816406 secs

validating
Epoch 1 iteration_i 22 Validation Loss 5.6659 Accuracy 0.0800
training ST data


HBox(children=(IntProgress(value=0, max=3126), HTML(value='')))

Epoch 2 Batch 0 Training Loss 4.5291 Accuracy 0.0333
Epoch 2 Batch 100 Training Loss 2.9013 Accuracy 0.0435
Epoch 2 Batch 200 Training Loss 2.6907 Accuracy 0.0456
Epoch 2 Batch 300 Training Loss 2.5797 Accuracy 0.0469
Epoch 2 Batch 400 Training Loss 2.5070 Accuracy 0.0476
Epoch 2 Batch 500 Training Loss 2.4569 Accuracy 0.0482
Epoch 2 Batch 600 Training Loss 2.4173 Accuracy 0.0487
Epoch 2 Batch 700 Training Loss 2.3855 Accuracy 0.0492
Epoch 2 Batch 800 Training Loss 2.3589 Accuracy 0.0496
Epoch 2 Batch 900 Training Loss 2.3338 Accuracy 0.0500
Epoch 2 Batch 1000 Training Loss 2.3144 Accuracy 0.0502
Epoch 2 Batch 1100 Training Loss 2.2964 Accuracy 0.0505
Epoch 2 Batch 1200 Training Loss 2.2809 Accuracy 0.0507
Epoch 2 Batch 1300 Training Loss 2.2670 Accuracy 0.0510
Epoch 2 Batch 1400 Training Loss 2.2549 Accuracy 0.0511
Epoch 2 Batch 1500 Training Loss 2.2428 Accuracy 0.0513
Epoch 2 Batch 1600 Training Loss 2.2323 Accuracy 0.0515
Epoch 2 Batch 1700 Training Loss 2.2225 Accuracy 0.0516
Epoc

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 0 Batch 0 Training Loss 4.7432 Accuracy 0.0967
Epoch 2 iteration_i 0 Batch 50 Training Loss 3.7270 Accuracy 0.0916
Epoch 2 iteration_i 0 Batch 100 Training Loss 3.5407 Accuracy 0.0949

Epoch 2 iteration_i 0 Training Loss 3.4571 Accuracy 0.0968
Time taken for 1 epoch: 756.7500467300415 secs

validating
Epoch 2 iteration_i 0 Validation Loss 4.5613 Accuracy 0.0782
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 1 Batch 0 Training Loss 2.8870 Accuracy 0.1062
Epoch 2 iteration_i 1 Batch 50 Training Loss 2.7894 Accuracy 0.1091
Epoch 2 iteration_i 1 Batch 100 Training Loss 2.7715 Accuracy 0.1097

Epoch 2 iteration_i 1 Training Loss 2.7623 Accuracy 0.1097
Time taken for 1 epoch: 781.6642150878906 secs

validating
Epoch 2 iteration_i 1 Validation Loss 4.6355 Accuracy 0.0801
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 2 Batch 0 Training Loss 2.3769 Accuracy 0.1078
Epoch 2 iteration_i 2 Batch 50 Training Loss 2.4124 Accuracy 0.1170
Epoch 2 iteration_i 2 Batch 100 Training Loss 2.4192 Accuracy 0.1167

Epoch 2 iteration_i 2 Training Loss 2.4191 Accuracy 0.1166
Time taken for 1 epoch: 806.6466720104218 secs

validating
Epoch 2 iteration_i 2 Validation Loss 4.7129 Accuracy 0.0812
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 3 Batch 0 Training Loss 2.0971 Accuracy 0.1327
Epoch 2 iteration_i 3 Batch 50 Training Loss 2.1392 Accuracy 0.1229
Epoch 2 iteration_i 3 Batch 100 Training Loss 2.1666 Accuracy 0.1221

Epoch 2 iteration_i 3 Training Loss 2.1717 Accuracy 0.1221
Time taken for 1 epoch: 831.5813744068146 secs

validating
Epoch 2 iteration_i 3 Validation Loss 4.8287 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 4 Batch 0 Training Loss 1.9970 Accuracy 0.1430
Epoch 2 iteration_i 4 Batch 50 Training Loss 1.9426 Accuracy 0.1295
Epoch 2 iteration_i 4 Batch 100 Training Loss 1.9666 Accuracy 0.1273

Epoch 2 iteration_i 4 Training Loss 1.9776 Accuracy 0.1268
Time taken for 1 epoch: 856.640294790268 secs

validating
Epoch 2 iteration_i 4 Validation Loss 4.9033 Accuracy 0.0819
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 5 Batch 0 Training Loss 1.6475 Accuracy 0.1316
Epoch 2 iteration_i 5 Batch 50 Training Loss 1.7728 Accuracy 0.1319
Epoch 2 iteration_i 5 Batch 100 Training Loss 1.7978 Accuracy 0.1307

Epoch 2 iteration_i 5 Training Loss 1.8145 Accuracy 0.1308
Time taken for 1 epoch: 881.4358286857605 secs

validating
Epoch 2 iteration_i 5 Validation Loss 4.9751 Accuracy 0.0819
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 6 Batch 0 Training Loss 1.6484 Accuracy 0.1271
Epoch 2 iteration_i 6 Batch 50 Training Loss 1.6386 Accuracy 0.1372
Epoch 2 iteration_i 6 Batch 100 Training Loss 1.6612 Accuracy 0.1356

Epoch 2 iteration_i 6 Training Loss 1.6802 Accuracy 0.1344
Time taken for 1 epoch: 906.2340993881226 secs

validating
Epoch 2 iteration_i 6 Validation Loss 5.0661 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 7 Batch 0 Training Loss 1.4282 Accuracy 0.1582
Epoch 2 iteration_i 7 Batch 50 Training Loss 1.5088 Accuracy 0.1402
Epoch 2 iteration_i 7 Batch 100 Training Loss 1.5458 Accuracy 0.1389

Epoch 2 iteration_i 7 Training Loss 1.5643 Accuracy 0.1377
Time taken for 1 epoch: 931.0311615467072 secs

validating
Epoch 2 iteration_i 7 Validation Loss 5.1708 Accuracy 0.0818
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 8 Batch 0 Training Loss 1.3889 Accuracy 0.1589
Epoch 2 iteration_i 8 Batch 50 Training Loss 1.4072 Accuracy 0.1448
Epoch 2 iteration_i 8 Batch 100 Training Loss 1.4439 Accuracy 0.1424

Epoch 2 iteration_i 8 Training Loss 1.4619 Accuracy 0.1409
Time taken for 1 epoch: 955.9969971179962 secs

validating
Epoch 2 iteration_i 8 Validation Loss 5.3071 Accuracy 0.0819
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 9 Batch 0 Training Loss 1.3197 Accuracy 0.1390
Epoch 2 iteration_i 9 Batch 50 Training Loss 1.3215 Accuracy 0.1462
Epoch 2 iteration_i 9 Batch 100 Training Loss 1.3512 Accuracy 0.1447

Epoch 2 iteration_i 9 Training Loss 1.3761 Accuracy 0.1440
Time taken for 1 epoch: 980.7789018154144 secs

validating
Epoch 2 iteration_i 9 Validation Loss 5.3428 Accuracy 0.0818
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 10 Batch 0 Training Loss 1.2024 Accuracy 0.1636
Epoch 2 iteration_i 10 Batch 50 Training Loss 1.2465 Accuracy 0.1502
Epoch 2 iteration_i 10 Batch 100 Training Loss 1.2802 Accuracy 0.1479

Epoch 2 iteration_i 10 Training Loss 1.2958 Accuracy 0.1467
Time taken for 1 epoch: 1005.6522755622864 secs

validating
Epoch 2 iteration_i 10 Validation Loss 5.4761 Accuracy 0.0820
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 11 Batch 0 Training Loss 1.0490 Accuracy 0.1582
Epoch 2 iteration_i 11 Batch 50 Training Loss 1.1685 Accuracy 0.1515
Epoch 2 iteration_i 11 Batch 100 Training Loss 1.2078 Accuracy 0.1499

Epoch 2 iteration_i 11 Training Loss 1.2355 Accuracy 0.1489
Time taken for 1 epoch: 1030.449030160904 secs

validating
Epoch 2 iteration_i 11 Validation Loss 5.5296 Accuracy 0.0815
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 12 Batch 0 Training Loss 1.1089 Accuracy 0.1691
Epoch 2 iteration_i 12 Batch 50 Training Loss 1.1209 Accuracy 0.1551
Epoch 2 iteration_i 12 Batch 100 Training Loss 1.1501 Accuracy 0.1530

Epoch 2 iteration_i 12 Training Loss 1.1744 Accuracy 0.1514
Time taken for 1 epoch: 1055.355260848999 secs

validating
Epoch 2 iteration_i 12 Validation Loss 5.6101 Accuracy 0.0818
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 13 Batch 0 Training Loss 1.0052 Accuracy 0.1470
Epoch 2 iteration_i 13 Batch 50 Training Loss 1.0685 Accuracy 0.1563
Epoch 2 iteration_i 13 Batch 100 Training Loss 1.0982 Accuracy 0.1546

Epoch 2 iteration_i 13 Training Loss 1.1192 Accuracy 0.1536
Time taken for 1 epoch: 1080.0959839820862 secs

validating
Epoch 2 iteration_i 13 Validation Loss 5.6633 Accuracy 0.0816
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 14 Batch 0 Training Loss 1.0646 Accuracy 0.1568
Epoch 2 iteration_i 14 Batch 50 Training Loss 1.0235 Accuracy 0.1591
Epoch 2 iteration_i 14 Batch 100 Training Loss 1.0537 Accuracy 0.1571

Epoch 2 iteration_i 14 Training Loss 1.0771 Accuracy 0.1556
Time taken for 1 epoch: 1105.3425033092499 secs

validating
Epoch 2 iteration_i 14 Validation Loss 5.7484 Accuracy 0.0815
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 15 Batch 0 Training Loss 0.9764 Accuracy 0.1522
Epoch 2 iteration_i 15 Batch 50 Training Loss 0.9928 Accuracy 0.1580
Epoch 2 iteration_i 15 Batch 100 Training Loss 1.0205 Accuracy 0.1574

Epoch 2 iteration_i 15 Training Loss 1.0378 Accuracy 0.1567
Time taken for 1 epoch: 1130.1132237911224 secs

validating
Epoch 2 iteration_i 15 Validation Loss 5.8100 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 16 Batch 0 Training Loss 1.0449 Accuracy 0.1525
Epoch 2 iteration_i 16 Batch 50 Training Loss 0.9553 Accuracy 0.1598
Epoch 2 iteration_i 16 Batch 100 Training Loss 0.9813 Accuracy 0.1588

Epoch 2 iteration_i 16 Training Loss 1.0003 Accuracy 0.1585
Time taken for 1 epoch: 1154.9166581630707 secs

validating
Epoch 2 iteration_i 16 Validation Loss 5.8599 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 17 Batch 0 Training Loss 0.8810 Accuracy 0.1525
Epoch 2 iteration_i 17 Batch 50 Training Loss 0.9306 Accuracy 0.1622
Epoch 2 iteration_i 17 Batch 100 Training Loss 0.9544 Accuracy 0.1600

Epoch 2 iteration_i 17 Training Loss 0.9693 Accuracy 0.1597
Time taken for 1 epoch: 1179.7077898979187 secs

validating
Epoch 2 iteration_i 17 Validation Loss 5.9002 Accuracy 0.0814
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 18 Batch 0 Training Loss 0.8829 Accuracy 0.1542
Epoch 2 iteration_i 18 Batch 50 Training Loss 0.8890 Accuracy 0.1653
Epoch 2 iteration_i 18 Batch 100 Training Loss 0.9143 Accuracy 0.1631

Epoch 2 iteration_i 18 Training Loss 0.9382 Accuracy 0.1610
Time taken for 1 epoch: 1204.5100395679474 secs

validating
Epoch 2 iteration_i 18 Validation Loss 5.9814 Accuracy 0.0818
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 19 Batch 0 Training Loss 0.7941 Accuracy 0.1674
Epoch 2 iteration_i 19 Batch 50 Training Loss 0.8636 Accuracy 0.1646
Epoch 2 iteration_i 19 Batch 100 Training Loss 0.8950 Accuracy 0.1632

Epoch 2 iteration_i 19 Training Loss 0.9123 Accuracy 0.1620
Time taken for 1 epoch: 1229.2802107334137 secs

validating
Epoch 2 iteration_i 19 Validation Loss 6.0204 Accuracy 0.0813
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 20 Batch 0 Training Loss 0.9124 Accuracy 0.1758
Epoch 2 iteration_i 20 Batch 50 Training Loss 0.8543 Accuracy 0.1656
Epoch 2 iteration_i 20 Batch 100 Training Loss 0.8755 Accuracy 0.1646

Epoch 2 iteration_i 20 Training Loss 0.8933 Accuracy 0.1628
Time taken for 1 epoch: 1253.9412479400635 secs

validating
Epoch 2 iteration_i 20 Validation Loss 6.0719 Accuracy 0.0813
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 21 Batch 0 Training Loss 0.7554 Accuracy 0.1811
Epoch 2 iteration_i 21 Batch 50 Training Loss 0.8241 Accuracy 0.1676
Epoch 2 iteration_i 21 Batch 100 Training Loss 0.8477 Accuracy 0.1653

Epoch 2 iteration_i 21 Training Loss 0.8675 Accuracy 0.1641
Time taken for 1 epoch: 1278.5121817588806 secs

validating
Epoch 2 iteration_i 21 Validation Loss 6.1076 Accuracy 0.0810
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 2 iteration_i 22 Batch 0 Training Loss 0.8023 Accuracy 0.1691
Epoch 2 iteration_i 22 Batch 50 Training Loss 0.8013 Accuracy 0.1668
Epoch 2 iteration_i 22 Batch 100 Training Loss 0.8311 Accuracy 0.1666

Epoch 2 iteration_i 22 Training Loss 0.8469 Accuracy 0.1650
Time taken for 1 epoch: 1303.204889535904 secs

validating
Epoch 2 iteration_i 22 Validation Loss 6.1389 Accuracy 0.0814
training ST data


HBox(children=(IntProgress(value=0, max=3126), HTML(value='')))

Epoch 3 Batch 0 Training Loss 4.5001 Accuracy 0.0338
Epoch 3 Batch 100 Training Loss 2.9108 Accuracy 0.0439
Epoch 3 Batch 200 Training Loss 2.6661 Accuracy 0.0463
Epoch 3 Batch 300 Training Loss 2.5401 Accuracy 0.0477
Epoch 3 Batch 400 Training Loss 2.4580 Accuracy 0.0488
Epoch 3 Batch 500 Training Loss 2.4012 Accuracy 0.0495
Epoch 3 Batch 600 Training Loss 2.3541 Accuracy 0.0502
Epoch 3 Batch 700 Training Loss 2.3195 Accuracy 0.0506
Epoch 3 Batch 800 Training Loss 2.2901 Accuracy 0.0511
Epoch 3 Batch 900 Training Loss 2.2652 Accuracy 0.0514
Epoch 3 Batch 1000 Training Loss 2.2445 Accuracy 0.0517
Epoch 3 Batch 1100 Training Loss 2.2270 Accuracy 0.0519
Epoch 3 Batch 1200 Training Loss 2.2115 Accuracy 0.0522
Epoch 3 Batch 1300 Training Loss 2.1977 Accuracy 0.0524
Epoch 3 Batch 1400 Training Loss 2.1852 Accuracy 0.0525
Epoch 3 Batch 1500 Training Loss 2.1741 Accuracy 0.0527
Epoch 3 Batch 1600 Training Loss 2.1633 Accuracy 0.0529
Epoch 3 Batch 1700 Training Loss 2.1543 Accuracy 0.0531
Epoc

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 0 Batch 0 Training Loss 4.2005 Accuracy 0.0919
Epoch 3 iteration_i 0 Batch 50 Training Loss 3.3953 Accuracy 0.0987
Epoch 3 iteration_i 0 Batch 100 Training Loss 3.1491 Accuracy 0.1050

Epoch 3 iteration_i 0 Training Loss 3.0444 Accuracy 0.1079
Time taken for 1 epoch: 753.3331656455994 secs

validating
Epoch 3 iteration_i 0 Validation Loss 4.8866 Accuracy 0.0788
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 1 Batch 0 Training Loss 2.3378 Accuracy 0.1370
Epoch 3 iteration_i 1 Batch 50 Training Loss 2.3540 Accuracy 0.1239
Epoch 3 iteration_i 1 Batch 100 Training Loss 2.3286 Accuracy 0.1245

Epoch 3 iteration_i 1 Training Loss 2.3088 Accuracy 0.1246
Time taken for 1 epoch: 778.205394744873 secs

validating
Epoch 3 iteration_i 1 Validation Loss 5.0119 Accuracy 0.0801
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 2 Batch 0 Training Loss 2.0389 Accuracy 0.1327
Epoch 3 iteration_i 2 Batch 50 Training Loss 1.9996 Accuracy 0.1327
Epoch 3 iteration_i 2 Batch 100 Training Loss 1.9879 Accuracy 0.1326

Epoch 3 iteration_i 2 Training Loss 1.9846 Accuracy 0.1322
Time taken for 1 epoch: 803.0944323539734 secs

validating
Epoch 3 iteration_i 2 Validation Loss 5.1496 Accuracy 0.0810
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 3 Batch 0 Training Loss 1.7346 Accuracy 0.1353
Epoch 3 iteration_i 3 Batch 50 Training Loss 1.7346 Accuracy 0.1385
Epoch 3 iteration_i 3 Batch 100 Training Loss 1.7467 Accuracy 0.1386

Epoch 3 iteration_i 3 Training Loss 1.7550 Accuracy 0.1379
Time taken for 1 epoch: 827.8587567806244 secs

validating
Epoch 3 iteration_i 3 Validation Loss 5.2808 Accuracy 0.0812
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 4 Batch 0 Training Loss 1.5355 Accuracy 0.1401
Epoch 3 iteration_i 4 Batch 50 Training Loss 1.5536 Accuracy 0.1425
Epoch 3 iteration_i 4 Batch 100 Training Loss 1.5731 Accuracy 0.1427

Epoch 3 iteration_i 4 Training Loss 1.5807 Accuracy 0.1424
Time taken for 1 epoch: 852.656051158905 secs

validating
Epoch 3 iteration_i 4 Validation Loss 5.3947 Accuracy 0.0816
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 5 Batch 0 Training Loss 1.3467 Accuracy 0.1567
Epoch 3 iteration_i 5 Batch 50 Training Loss 1.4065 Accuracy 0.1479
Epoch 3 iteration_i 5 Batch 100 Training Loss 1.4257 Accuracy 0.1478

Epoch 3 iteration_i 5 Training Loss 1.4371 Accuracy 0.1464
Time taken for 1 epoch: 877.4862802028656 secs

validating
Epoch 3 iteration_i 5 Validation Loss 5.4835 Accuracy 0.0821
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 6 Batch 0 Training Loss 1.2350 Accuracy 0.1535
Epoch 3 iteration_i 6 Batch 50 Training Loss 1.2828 Accuracy 0.1530
Epoch 3 iteration_i 6 Batch 100 Training Loss 1.3078 Accuracy 0.1502

Epoch 3 iteration_i 6 Training Loss 1.3219 Accuracy 0.1497
Time taken for 1 epoch: 902.2180335521698 secs

validating
Epoch 3 iteration_i 6 Validation Loss 5.6007 Accuracy 0.0818
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 7 Batch 0 Training Loss 1.1710 Accuracy 0.1490
Epoch 3 iteration_i 7 Batch 50 Training Loss 1.1825 Accuracy 0.1540
Epoch 3 iteration_i 7 Batch 100 Training Loss 1.2052 Accuracy 0.1537

Epoch 3 iteration_i 7 Training Loss 1.2240 Accuracy 0.1529
Time taken for 1 epoch: 927.0998411178589 secs

validating
Epoch 3 iteration_i 7 Validation Loss 5.6676 Accuracy 0.0820
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 8 Batch 0 Training Loss 1.0990 Accuracy 0.1642
Epoch 3 iteration_i 8 Batch 50 Training Loss 1.0920 Accuracy 0.1565
Epoch 3 iteration_i 8 Batch 100 Training Loss 1.1215 Accuracy 0.1565

Epoch 3 iteration_i 8 Training Loss 1.1388 Accuracy 0.1558
Time taken for 1 epoch: 951.9727625846863 secs

validating
Epoch 3 iteration_i 8 Validation Loss 5.7704 Accuracy 0.0818
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 9 Batch 0 Training Loss 1.0166 Accuracy 0.1717
Epoch 3 iteration_i 9 Batch 50 Training Loss 1.0200 Accuracy 0.1617
Epoch 3 iteration_i 9 Batch 100 Training Loss 1.0456 Accuracy 0.1597

Epoch 3 iteration_i 9 Training Loss 1.0640 Accuracy 0.1586
Time taken for 1 epoch: 976.7972159385681 secs

validating
Epoch 3 iteration_i 9 Validation Loss 5.8561 Accuracy 0.0819
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 10 Batch 0 Training Loss 1.0177 Accuracy 0.1632
Epoch 3 iteration_i 10 Batch 50 Training Loss 0.9620 Accuracy 0.1626
Epoch 3 iteration_i 10 Batch 100 Training Loss 0.9925 Accuracy 0.1614

Epoch 3 iteration_i 10 Training Loss 1.0059 Accuracy 0.1607
Time taken for 1 epoch: 1001.6385788917542 secs

validating
Epoch 3 iteration_i 10 Validation Loss 5.9162 Accuracy 0.0816
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 11 Batch 0 Training Loss 0.9192 Accuracy 0.1696
Epoch 3 iteration_i 11 Batch 50 Training Loss 0.9055 Accuracy 0.1660
Epoch 3 iteration_i 11 Batch 100 Training Loss 0.9272 Accuracy 0.1636

Epoch 3 iteration_i 11 Training Loss 0.9483 Accuracy 0.1628
Time taken for 1 epoch: 1026.4920647144318 secs

validating
Epoch 3 iteration_i 11 Validation Loss 5.9932 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 12 Batch 0 Training Loss 0.8597 Accuracy 0.1790
Epoch 3 iteration_i 12 Batch 50 Training Loss 0.8655 Accuracy 0.1660
Epoch 3 iteration_i 12 Batch 100 Training Loss 0.8882 Accuracy 0.1655

Epoch 3 iteration_i 12 Training Loss 0.9020 Accuracy 0.1647
Time taken for 1 epoch: 1051.434360742569 secs

validating
Epoch 3 iteration_i 12 Validation Loss 6.0722 Accuracy 0.0818
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 13 Batch 0 Training Loss 0.7975 Accuracy 0.1847
Epoch 3 iteration_i 13 Batch 50 Training Loss 0.8146 Accuracy 0.1692
Epoch 3 iteration_i 13 Batch 100 Training Loss 0.8468 Accuracy 0.1674

Epoch 3 iteration_i 13 Training Loss 0.8615 Accuracy 0.1666
Time taken for 1 epoch: 1076.2883405685425 secs

validating
Epoch 3 iteration_i 13 Validation Loss 6.1268 Accuracy 0.0816
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 14 Batch 0 Training Loss 0.7760 Accuracy 0.1861
Epoch 3 iteration_i 14 Batch 50 Training Loss 0.7876 Accuracy 0.1682
Epoch 3 iteration_i 14 Batch 100 Training Loss 0.8106 Accuracy 0.1685

Epoch 3 iteration_i 14 Training Loss 0.8275 Accuracy 0.1677
Time taken for 1 epoch: 1101.0133893489838 secs

validating
Epoch 3 iteration_i 14 Validation Loss 6.1847 Accuracy 0.0819
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 15 Batch 0 Training Loss 0.6906 Accuracy 0.1814
Epoch 3 iteration_i 15 Batch 50 Training Loss 0.7623 Accuracy 0.1717
Epoch 3 iteration_i 15 Batch 100 Training Loss 0.7832 Accuracy 0.1699

Epoch 3 iteration_i 15 Training Loss 0.7974 Accuracy 0.1688
Time taken for 1 epoch: 1125.9219496250153 secs

validating
Epoch 3 iteration_i 15 Validation Loss 6.2363 Accuracy 0.0815
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 16 Batch 0 Training Loss 0.7759 Accuracy 0.1786
Epoch 3 iteration_i 16 Batch 50 Training Loss 0.7388 Accuracy 0.1687
Epoch 3 iteration_i 16 Batch 100 Training Loss 0.7607 Accuracy 0.1705

Epoch 3 iteration_i 16 Training Loss 0.7739 Accuracy 0.1697
Time taken for 1 epoch: 1150.751445531845 secs

validating
Epoch 3 iteration_i 16 Validation Loss 6.2665 Accuracy 0.0814
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 17 Batch 0 Training Loss 0.6863 Accuracy 0.1822
Epoch 3 iteration_i 17 Batch 50 Training Loss 0.7176 Accuracy 0.1708
Epoch 3 iteration_i 17 Batch 100 Training Loss 0.7310 Accuracy 0.1708

Epoch 3 iteration_i 17 Training Loss 0.7458 Accuracy 0.1710
Time taken for 1 epoch: 1175.6122040748596 secs

validating
Epoch 3 iteration_i 17 Validation Loss 6.3168 Accuracy 0.0815
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 18 Batch 0 Training Loss 0.6966 Accuracy 0.1606
Epoch 3 iteration_i 18 Batch 50 Training Loss 0.6907 Accuracy 0.1736
Epoch 3 iteration_i 18 Batch 100 Training Loss 0.7123 Accuracy 0.1726

Epoch 3 iteration_i 18 Training Loss 0.7275 Accuracy 0.1717
Time taken for 1 epoch: 1200.539468050003 secs

validating
Epoch 3 iteration_i 18 Validation Loss 6.3531 Accuracy 0.0814
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 19 Batch 0 Training Loss 0.6252 Accuracy 0.1745
Epoch 3 iteration_i 19 Batch 50 Training Loss 0.6814 Accuracy 0.1747
Epoch 3 iteration_i 19 Batch 100 Training Loss 0.6977 Accuracy 0.1738

Epoch 3 iteration_i 19 Training Loss 0.7105 Accuracy 0.1723
Time taken for 1 epoch: 1225.5305318832397 secs

validating
Epoch 3 iteration_i 19 Validation Loss 6.4068 Accuracy 0.0815
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 20 Batch 0 Training Loss 0.6821 Accuracy 0.1851
Epoch 3 iteration_i 20 Batch 50 Training Loss 0.6641 Accuracy 0.1736
Epoch 3 iteration_i 20 Batch 100 Training Loss 0.6852 Accuracy 0.1737

Epoch 3 iteration_i 20 Training Loss 0.6984 Accuracy 0.1730
Time taken for 1 epoch: 1250.4960424900055 secs

validating
Epoch 3 iteration_i 20 Validation Loss 6.4176 Accuracy 0.0813
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 21 Batch 0 Training Loss 0.6245 Accuracy 0.1802
Epoch 3 iteration_i 21 Batch 50 Training Loss 0.6451 Accuracy 0.1756
Epoch 3 iteration_i 21 Batch 100 Training Loss 0.6678 Accuracy 0.1747

Epoch 3 iteration_i 21 Training Loss 0.6777 Accuracy 0.1740
Time taken for 1 epoch: 1275.4412939548492 secs

validating
Epoch 3 iteration_i 21 Validation Loss 6.4736 Accuracy 0.0811
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 3 iteration_i 22 Batch 0 Training Loss 0.6210 Accuracy 0.1662
Epoch 3 iteration_i 22 Batch 50 Training Loss 0.6383 Accuracy 0.1747
Epoch 3 iteration_i 22 Batch 100 Training Loss 0.6519 Accuracy 0.1746

Epoch 3 iteration_i 22 Training Loss 0.6625 Accuracy 0.1745
Time taken for 1 epoch: 1300.4064736366272 secs

validating
Epoch 3 iteration_i 22 Validation Loss 6.5221 Accuracy 0.0814
training ST data


HBox(children=(IntProgress(value=0, max=3126), HTML(value='')))

Epoch 4 Batch 0 Training Loss 4.4557 Accuracy 0.0322
Epoch 4 Batch 100 Training Loss 2.9116 Accuracy 0.0447
Epoch 4 Batch 200 Training Loss 2.6477 Accuracy 0.0473
Epoch 4 Batch 300 Training Loss 2.5147 Accuracy 0.0488
Epoch 4 Batch 400 Training Loss 2.4320 Accuracy 0.0498
Epoch 4 Batch 500 Training Loss 2.3722 Accuracy 0.0505
Epoch 4 Batch 600 Training Loss 2.3284 Accuracy 0.0511
Epoch 4 Batch 700 Training Loss 2.2933 Accuracy 0.0515
Epoch 4 Batch 800 Training Loss 2.2638 Accuracy 0.0519
Epoch 4 Batch 900 Training Loss 2.2399 Accuracy 0.0522
Epoch 4 Batch 1000 Training Loss 2.2207 Accuracy 0.0524
Epoch 4 Batch 1100 Training Loss 2.2026 Accuracy 0.0526
Epoch 4 Batch 1200 Training Loss 2.1877 Accuracy 0.0528
Epoch 4 Batch 1300 Training Loss 2.1742 Accuracy 0.0530
Epoch 4 Batch 1400 Training Loss 2.1613 Accuracy 0.0532
Epoch 4 Batch 1500 Training Loss 2.1492 Accuracy 0.0534
Epoch 4 Batch 1600 Training Loss 2.1390 Accuracy 0.0535
Epoch 4 Batch 1700 Training Loss 2.1294 Accuracy 0.0537
Epoc

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 0 Batch 0 Training Loss 4.2942 Accuracy 0.0957
Epoch 4 iteration_i 0 Batch 50 Training Loss 3.2610 Accuracy 0.1020
Epoch 4 iteration_i 0 Batch 100 Training Loss 2.9990 Accuracy 0.1110

Epoch 4 iteration_i 0 Training Loss 2.8785 Accuracy 0.1139
Time taken for 1 epoch: 756.374470949173 secs

validating
Epoch 4 iteration_i 0 Validation Loss 5.0821 Accuracy 0.0790
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 1 Batch 0 Training Loss 2.1744 Accuracy 0.1278
Epoch 4 iteration_i 1 Batch 50 Training Loss 2.1695 Accuracy 0.1306
Epoch 4 iteration_i 1 Batch 100 Training Loss 2.1448 Accuracy 0.1320

Epoch 4 iteration_i 1 Training Loss 2.1267 Accuracy 0.1324
Time taken for 1 epoch: 781.0543756484985 secs

validating
Epoch 4 iteration_i 1 Validation Loss 5.2515 Accuracy 0.0804
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 2 Batch 0 Training Loss 1.8676 Accuracy 0.1473
Epoch 4 iteration_i 2 Batch 50 Training Loss 1.8208 Accuracy 0.1402
Epoch 4 iteration_i 2 Batch 100 Training Loss 1.8142 Accuracy 0.1399

Epoch 4 iteration_i 2 Training Loss 1.8055 Accuracy 0.1402
Time taken for 1 epoch: 805.7964055538177 secs

validating
Epoch 4 iteration_i 2 Validation Loss 5.4169 Accuracy 0.0809
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 3 Batch 0 Training Loss 1.6076 Accuracy 0.1514
Epoch 4 iteration_i 3 Batch 50 Training Loss 1.5738 Accuracy 0.1484
Epoch 4 iteration_i 3 Batch 100 Training Loss 1.5840 Accuracy 0.1465

Epoch 4 iteration_i 3 Training Loss 1.5857 Accuracy 0.1457
Time taken for 1 epoch: 830.549106836319 secs

validating
Epoch 4 iteration_i 3 Validation Loss 5.5665 Accuracy 0.0815
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 4 Batch 0 Training Loss 1.3636 Accuracy 0.1503
Epoch 4 iteration_i 4 Batch 50 Training Loss 1.4010 Accuracy 0.1481
Epoch 4 iteration_i 4 Batch 100 Training Loss 1.4153 Accuracy 0.1490

Epoch 4 iteration_i 4 Training Loss 1.4212 Accuracy 0.1500
Time taken for 1 epoch: 855.3334136009216 secs

validating
Epoch 4 iteration_i 4 Validation Loss 5.6845 Accuracy 0.0819
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 5 Batch 0 Training Loss 1.1745 Accuracy 0.1673
Epoch 4 iteration_i 5 Batch 50 Training Loss 1.2476 Accuracy 0.1529
Epoch 4 iteration_i 5 Batch 100 Training Loss 1.2713 Accuracy 0.1537

Epoch 4 iteration_i 5 Training Loss 1.2802 Accuracy 0.1538
Time taken for 1 epoch: 880.0718698501587 secs

validating
Epoch 4 iteration_i 5 Validation Loss 5.8059 Accuracy 0.0815
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 6 Batch 0 Training Loss 1.0793 Accuracy 0.1645
Epoch 4 iteration_i 6 Batch 50 Training Loss 1.1342 Accuracy 0.1586
Epoch 4 iteration_i 6 Batch 100 Training Loss 1.1497 Accuracy 0.1574

Epoch 4 iteration_i 6 Training Loss 1.1617 Accuracy 0.1575
Time taken for 1 epoch: 904.7703976631165 secs

validating
Epoch 4 iteration_i 6 Validation Loss 5.8956 Accuracy 0.0816
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 7 Batch 0 Training Loss 1.1011 Accuracy 0.1703
Epoch 4 iteration_i 7 Batch 50 Training Loss 1.0360 Accuracy 0.1623
Epoch 4 iteration_i 7 Batch 100 Training Loss 1.0532 Accuracy 0.1609

Epoch 4 iteration_i 7 Training Loss 1.0700 Accuracy 0.1602
Time taken for 1 epoch: 929.5290985107422 secs

validating
Epoch 4 iteration_i 7 Validation Loss 5.9835 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 8 Batch 0 Training Loss 0.8248 Accuracy 0.1486
Epoch 4 iteration_i 8 Batch 50 Training Loss 0.9434 Accuracy 0.1637
Epoch 4 iteration_i 8 Batch 100 Training Loss 0.9710 Accuracy 0.1635

Epoch 4 iteration_i 8 Training Loss 0.9850 Accuracy 0.1632
Time taken for 1 epoch: 954.2877688407898 secs

validating
Epoch 4 iteration_i 8 Validation Loss 6.0359 Accuracy 0.0822
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 9 Batch 0 Training Loss 0.9068 Accuracy 0.1606
Epoch 4 iteration_i 9 Batch 50 Training Loss 0.8849 Accuracy 0.1662
Epoch 4 iteration_i 9 Batch 100 Training Loss 0.9045 Accuracy 0.1659

Epoch 4 iteration_i 9 Training Loss 0.9171 Accuracy 0.1659
Time taken for 1 epoch: 979.168089389801 secs

validating
Epoch 4 iteration_i 9 Validation Loss 6.1149 Accuracy 0.0821
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 10 Batch 0 Training Loss 0.7795 Accuracy 0.1674
Epoch 4 iteration_i 10 Batch 50 Training Loss 0.8308 Accuracy 0.1714
Epoch 4 iteration_i 10 Batch 100 Training Loss 0.8462 Accuracy 0.1698

Epoch 4 iteration_i 10 Training Loss 0.8585 Accuracy 0.1682
Time taken for 1 epoch: 1003.8469803333282 secs

validating
Epoch 4 iteration_i 10 Validation Loss 6.1676 Accuracy 0.0818
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 11 Batch 0 Training Loss 0.7763 Accuracy 0.1733
Epoch 4 iteration_i 11 Batch 50 Training Loss 0.7662 Accuracy 0.1739
Epoch 4 iteration_i 11 Batch 100 Training Loss 0.7890 Accuracy 0.1719

Epoch 4 iteration_i 11 Training Loss 0.8078 Accuracy 0.1703
Time taken for 1 epoch: 1028.6786987781525 secs

validating
Epoch 4 iteration_i 11 Validation Loss 6.2425 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 12 Batch 0 Training Loss 0.6752 Accuracy 0.1694
Epoch 4 iteration_i 12 Batch 50 Training Loss 0.7313 Accuracy 0.1736
Epoch 4 iteration_i 12 Batch 100 Training Loss 0.7542 Accuracy 0.1719

Epoch 4 iteration_i 12 Training Loss 0.7675 Accuracy 0.1718
Time taken for 1 epoch: 1053.3983614444733 secs

validating
Epoch 4 iteration_i 12 Validation Loss 6.3042 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 13 Batch 0 Training Loss 0.6607 Accuracy 0.1634
Epoch 4 iteration_i 13 Batch 50 Training Loss 0.6903 Accuracy 0.1724
Epoch 4 iteration_i 13 Batch 100 Training Loss 0.7112 Accuracy 0.1735

Epoch 4 iteration_i 13 Training Loss 0.7275 Accuracy 0.1736
Time taken for 1 epoch: 1078.0774731636047 secs

validating
Epoch 4 iteration_i 13 Validation Loss 6.3363 Accuracy 0.0820
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 14 Batch 0 Training Loss 0.6478 Accuracy 0.1809
Epoch 4 iteration_i 14 Batch 50 Training Loss 0.6525 Accuracy 0.1796
Epoch 4 iteration_i 14 Batch 100 Training Loss 0.6806 Accuracy 0.1763

Epoch 4 iteration_i 14 Training Loss 0.6950 Accuracy 0.1747
Time taken for 1 epoch: 1102.8978250026703 secs

validating
Epoch 4 iteration_i 14 Validation Loss 6.3874 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 15 Batch 0 Training Loss 0.5899 Accuracy 0.1782
Epoch 4 iteration_i 15 Batch 50 Training Loss 0.6365 Accuracy 0.1763
Epoch 4 iteration_i 15 Batch 100 Training Loss 0.6550 Accuracy 0.1764

Epoch 4 iteration_i 15 Training Loss 0.6731 Accuracy 0.1756
Time taken for 1 epoch: 1127.6162736415863 secs

validating
Epoch 4 iteration_i 15 Validation Loss 6.4581 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 16 Batch 0 Training Loss 0.6389 Accuracy 0.1829
Epoch 4 iteration_i 16 Batch 50 Training Loss 0.6120 Accuracy 0.1792
Epoch 4 iteration_i 16 Batch 100 Training Loss 0.6310 Accuracy 0.1773

Epoch 4 iteration_i 16 Training Loss 0.6453 Accuracy 0.1764
Time taken for 1 epoch: 1152.353045463562 secs

validating
Epoch 4 iteration_i 16 Validation Loss 6.5176 Accuracy 0.0815
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 17 Batch 0 Training Loss 0.5926 Accuracy 0.1922
Epoch 4 iteration_i 17 Batch 50 Training Loss 0.6032 Accuracy 0.1810
Epoch 4 iteration_i 17 Batch 100 Training Loss 0.6151 Accuracy 0.1776

Epoch 4 iteration_i 17 Training Loss 0.6288 Accuracy 0.1772
Time taken for 1 epoch: 1177.1186277866364 secs

validating
Epoch 4 iteration_i 17 Validation Loss 6.5299 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 18 Batch 0 Training Loss 0.6543 Accuracy 0.1607
Epoch 4 iteration_i 18 Batch 50 Training Loss 0.5845 Accuracy 0.1794
Epoch 4 iteration_i 18 Batch 100 Training Loss 0.5994 Accuracy 0.1787

Epoch 4 iteration_i 18 Training Loss 0.6125 Accuracy 0.1778
Time taken for 1 epoch: 1201.8699278831482 secs

validating
Epoch 4 iteration_i 18 Validation Loss 6.5965 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 19 Batch 0 Training Loss 0.5459 Accuracy 0.1955
Epoch 4 iteration_i 19 Batch 50 Training Loss 0.5728 Accuracy 0.1790
Epoch 4 iteration_i 19 Batch 100 Training Loss 0.5866 Accuracy 0.1790

Epoch 4 iteration_i 19 Training Loss 0.5953 Accuracy 0.1785
Time taken for 1 epoch: 1226.6162037849426 secs

validating
Epoch 4 iteration_i 19 Validation Loss 6.6157 Accuracy 0.0812
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 20 Batch 0 Training Loss 0.5534 Accuracy 0.1924
Epoch 4 iteration_i 20 Batch 50 Training Loss 0.5510 Accuracy 0.1828
Epoch 4 iteration_i 20 Batch 100 Training Loss 0.5655 Accuracy 0.1809

Epoch 4 iteration_i 20 Training Loss 0.5819 Accuracy 0.1792
Time taken for 1 epoch: 1251.3952741622925 secs

validating
Epoch 4 iteration_i 20 Validation Loss 6.6605 Accuracy 0.0812
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 21 Batch 0 Training Loss 0.5178 Accuracy 0.1674
Epoch 4 iteration_i 21 Batch 50 Training Loss 0.5573 Accuracy 0.1788
Epoch 4 iteration_i 21 Batch 100 Training Loss 0.5632 Accuracy 0.1789

Epoch 4 iteration_i 21 Training Loss 0.5716 Accuracy 0.1793
Time taken for 1 epoch: 1276.1649150848389 secs

validating
Epoch 4 iteration_i 21 Validation Loss 6.6787 Accuracy 0.0810
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 4 iteration_i 22 Batch 0 Training Loss 0.5101 Accuracy 0.2073
Epoch 4 iteration_i 22 Batch 50 Training Loss 0.5341 Accuracy 0.1813
Epoch 4 iteration_i 22 Batch 100 Training Loss 0.5507 Accuracy 0.1811

Epoch 4 iteration_i 22 Training Loss 0.5629 Accuracy 0.1800
Time taken for 1 epoch: 1301.0542764663696 secs

validating
Epoch 4 iteration_i 22 Validation Loss 6.7227 Accuracy 0.0812
training ST data


HBox(children=(IntProgress(value=0, max=3126), HTML(value='')))

Epoch 5 Batch 0 Training Loss 4.7149 Accuracy 0.0380
Epoch 5 Batch 100 Training Loss 2.9335 Accuracy 0.0450
Epoch 5 Batch 200 Training Loss 2.6492 Accuracy 0.0475
Epoch 5 Batch 300 Training Loss 2.5077 Accuracy 0.0491
Epoch 5 Batch 400 Training Loss 2.4229 Accuracy 0.0499
Epoch 5 Batch 500 Training Loss 2.3601 Accuracy 0.0507
Epoch 5 Batch 600 Training Loss 2.3150 Accuracy 0.0512
Epoch 5 Batch 700 Training Loss 2.2803 Accuracy 0.0516
Epoch 5 Batch 800 Training Loss 2.2501 Accuracy 0.0520
Epoch 5 Batch 900 Training Loss 2.2241 Accuracy 0.0524
Epoch 5 Batch 1000 Training Loss 2.2030 Accuracy 0.0527
Epoch 5 Batch 1100 Training Loss 2.1853 Accuracy 0.0529
Epoch 5 Batch 1200 Training Loss 2.1699 Accuracy 0.0531
Epoch 5 Batch 1300 Training Loss 2.1568 Accuracy 0.0533
Epoch 5 Batch 1400 Training Loss 2.1446 Accuracy 0.0534
Epoch 5 Batch 1500 Training Loss 2.1334 Accuracy 0.0536
Epoch 5 Batch 1600 Training Loss 2.1232 Accuracy 0.0538
Epoch 5 Batch 1700 Training Loss 2.1139 Accuracy 0.0539
Epoc

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 0 Batch 0 Training Loss 4.2111 Accuracy 0.1014
Epoch 5 iteration_i 0 Batch 50 Training Loss 3.3409 Accuracy 0.1050
Epoch 5 iteration_i 0 Batch 100 Training Loss 3.0758 Accuracy 0.1132

Epoch 5 iteration_i 0 Training Loss 2.9613 Accuracy 0.1166
Time taken for 1 epoch: 754.6557536125183 secs

validating
Epoch 5 iteration_i 0 Validation Loss 5.1717 Accuracy 0.0782
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 1 Batch 0 Training Loss 2.2494 Accuracy 0.1304
Epoch 5 iteration_i 1 Batch 50 Training Loss 2.2188 Accuracy 0.1345
Epoch 5 iteration_i 1 Batch 100 Training Loss 2.1672 Accuracy 0.1355

Epoch 5 iteration_i 1 Training Loss 2.1413 Accuracy 0.1359
Time taken for 1 epoch: 779.5338587760925 secs

validating
Epoch 5 iteration_i 1 Validation Loss 5.3411 Accuracy 0.0804
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 2 Batch 0 Training Loss 1.9067 Accuracy 0.1338
Epoch 5 iteration_i 2 Batch 50 Training Loss 1.8212 Accuracy 0.1431
Epoch 5 iteration_i 2 Batch 100 Training Loss 1.8010 Accuracy 0.1442

Epoch 5 iteration_i 2 Training Loss 1.7952 Accuracy 0.1441
Time taken for 1 epoch: 804.4910817146301 secs

validating
Epoch 5 iteration_i 2 Validation Loss 5.5298 Accuracy 0.0807
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 3 Batch 0 Training Loss 1.5659 Accuracy 0.1459
Epoch 5 iteration_i 3 Batch 50 Training Loss 1.5572 Accuracy 0.1480
Epoch 5 iteration_i 3 Batch 100 Training Loss 1.5569 Accuracy 0.1488

Epoch 5 iteration_i 3 Training Loss 1.5522 Accuracy 0.1492
Time taken for 1 epoch: 829.3613345623016 secs

validating
Epoch 5 iteration_i 3 Validation Loss 5.6449 Accuracy 0.0813
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 4 Batch 0 Training Loss 1.3519 Accuracy 0.1543
Epoch 5 iteration_i 4 Batch 50 Training Loss 1.3538 Accuracy 0.1543
Epoch 5 iteration_i 4 Batch 100 Training Loss 1.3612 Accuracy 0.1540

Epoch 5 iteration_i 4 Training Loss 1.3653 Accuracy 0.1537
Time taken for 1 epoch: 854.2687029838562 secs

validating
Epoch 5 iteration_i 4 Validation Loss 5.7623 Accuracy 0.0813
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 5 Batch 0 Training Loss 1.2458 Accuracy 0.1639
Epoch 5 iteration_i 5 Batch 50 Training Loss 1.2035 Accuracy 0.1606
Epoch 5 iteration_i 5 Batch 100 Training Loss 1.2085 Accuracy 0.1583

Epoch 5 iteration_i 5 Training Loss 1.2141 Accuracy 0.1577
Time taken for 1 epoch: 879.1650333404541 secs

validating
Epoch 5 iteration_i 5 Validation Loss 5.8679 Accuracy 0.0816
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 6 Batch 0 Training Loss 1.0372 Accuracy 0.1628
Epoch 5 iteration_i 6 Batch 50 Training Loss 1.0653 Accuracy 0.1612
Epoch 5 iteration_i 6 Batch 100 Training Loss 1.0773 Accuracy 0.1612

Epoch 5 iteration_i 6 Training Loss 1.0880 Accuracy 0.1609
Time taken for 1 epoch: 904.0575838088989 secs

validating
Epoch 5 iteration_i 6 Validation Loss 5.9484 Accuracy 0.0816
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 7 Batch 0 Training Loss 0.9096 Accuracy 0.1812
Epoch 5 iteration_i 7 Batch 50 Training Loss 0.9565 Accuracy 0.1659
Epoch 5 iteration_i 7 Batch 100 Training Loss 0.9667 Accuracy 0.1642

Epoch 5 iteration_i 7 Training Loss 0.9776 Accuracy 0.1647
Time taken for 1 epoch: 929.1433446407318 secs

validating
Epoch 5 iteration_i 7 Validation Loss 6.0079 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 8 Batch 0 Training Loss 0.8310 Accuracy 0.1823
Epoch 5 iteration_i 8 Batch 50 Training Loss 0.8610 Accuracy 0.1700
Epoch 5 iteration_i 8 Batch 100 Training Loss 0.8815 Accuracy 0.1679

Epoch 5 iteration_i 8 Training Loss 0.8914 Accuracy 0.1678
Time taken for 1 epoch: 953.9578468799591 secs

validating
Epoch 5 iteration_i 8 Validation Loss 6.0793 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 9 Batch 0 Training Loss 0.8535 Accuracy 0.1789
Epoch 5 iteration_i 9 Batch 50 Training Loss 0.7713 Accuracy 0.1731
Epoch 5 iteration_i 9 Batch 100 Training Loss 0.7953 Accuracy 0.1724

Epoch 5 iteration_i 9 Training Loss 0.8112 Accuracy 0.1709
Time taken for 1 epoch: 978.7525205612183 secs

validating
Epoch 5 iteration_i 9 Validation Loss 6.1401 Accuracy 0.0818
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 10 Batch 0 Training Loss 0.7990 Accuracy 0.1677
Epoch 5 iteration_i 10 Batch 50 Training Loss 0.7169 Accuracy 0.1754
Epoch 5 iteration_i 10 Batch 100 Training Loss 0.7387 Accuracy 0.1735

Epoch 5 iteration_i 10 Training Loss 0.7526 Accuracy 0.1732
Time taken for 1 epoch: 1003.6056303977966 secs

validating
Epoch 5 iteration_i 10 Validation Loss 6.2353 Accuracy 0.0819
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 11 Batch 0 Training Loss 0.6809 Accuracy 0.1913
Epoch 5 iteration_i 11 Batch 50 Training Loss 0.6729 Accuracy 0.1764
Epoch 5 iteration_i 11 Batch 100 Training Loss 0.6905 Accuracy 0.1760

Epoch 5 iteration_i 11 Training Loss 0.7022 Accuracy 0.1753
Time taken for 1 epoch: 1028.3548414707184 secs

validating
Epoch 5 iteration_i 11 Validation Loss 6.2699 Accuracy 0.0814
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 12 Batch 0 Training Loss 0.5897 Accuracy 0.1832
Epoch 5 iteration_i 12 Batch 50 Training Loss 0.6306 Accuracy 0.1795
Epoch 5 iteration_i 12 Batch 100 Training Loss 0.6466 Accuracy 0.1781

Epoch 5 iteration_i 12 Training Loss 0.6609 Accuracy 0.1771
Time taken for 1 epoch: 1053.2091464996338 secs

validating
Epoch 5 iteration_i 12 Validation Loss 6.3308 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 13 Batch 0 Training Loss 0.6151 Accuracy 0.1892
Epoch 5 iteration_i 13 Batch 50 Training Loss 0.5945 Accuracy 0.1800
Epoch 5 iteration_i 13 Batch 100 Training Loss 0.6145 Accuracy 0.1790

Epoch 5 iteration_i 13 Training Loss 0.6271 Accuracy 0.1783
Time taken for 1 epoch: 1077.9979746341705 secs

validating
Epoch 5 iteration_i 13 Validation Loss 6.3837 Accuracy 0.0821
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 14 Batch 0 Training Loss 0.6041 Accuracy 0.1826
Epoch 5 iteration_i 14 Batch 50 Training Loss 0.5783 Accuracy 0.1806
Epoch 5 iteration_i 14 Batch 100 Training Loss 0.5863 Accuracy 0.1795

Epoch 5 iteration_i 14 Training Loss 0.5998 Accuracy 0.1793
Time taken for 1 epoch: 1102.793336391449 secs

validating
Epoch 5 iteration_i 14 Validation Loss 6.4559 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 15 Batch 0 Training Loss 0.5139 Accuracy 0.1773
Epoch 5 iteration_i 15 Batch 50 Training Loss 0.5458 Accuracy 0.1844
Epoch 5 iteration_i 15 Batch 100 Training Loss 0.5654 Accuracy 0.1822

Epoch 5 iteration_i 15 Training Loss 0.5786 Accuracy 0.1801
Time taken for 1 epoch: 1127.6860845088959 secs

validating
Epoch 5 iteration_i 15 Validation Loss 6.4933 Accuracy 0.0814
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 16 Batch 0 Training Loss 0.4590 Accuracy 0.1773
Epoch 5 iteration_i 16 Batch 50 Training Loss 0.5373 Accuracy 0.1823
Epoch 5 iteration_i 16 Batch 100 Training Loss 0.5443 Accuracy 0.1815

Epoch 5 iteration_i 16 Training Loss 0.5528 Accuracy 0.1810
Time taken for 1 epoch: 1152.4528715610504 secs

validating
Epoch 5 iteration_i 16 Validation Loss 6.5502 Accuracy 0.0816
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 17 Batch 0 Training Loss 0.5230 Accuracy 0.1751
Epoch 5 iteration_i 17 Batch 50 Training Loss 0.5173 Accuracy 0.1829
Epoch 5 iteration_i 17 Batch 100 Training Loss 0.5284 Accuracy 0.1821

Epoch 5 iteration_i 17 Training Loss 0.5413 Accuracy 0.1816
Time taken for 1 epoch: 1177.2006614208221 secs

validating
Epoch 5 iteration_i 17 Validation Loss 6.6082 Accuracy 0.0813
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 18 Batch 0 Training Loss 0.4633 Accuracy 0.1719
Epoch 5 iteration_i 18 Batch 50 Training Loss 0.4859 Accuracy 0.1846
Epoch 5 iteration_i 18 Batch 100 Training Loss 0.5110 Accuracy 0.1838

Epoch 5 iteration_i 18 Training Loss 0.5247 Accuracy 0.1823
Time taken for 1 epoch: 1201.9836168289185 secs

validating
Epoch 5 iteration_i 18 Validation Loss 6.6094 Accuracy 0.0817
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 19 Batch 0 Training Loss 0.4316 Accuracy 0.1946
Epoch 5 iteration_i 19 Batch 50 Training Loss 0.4856 Accuracy 0.1860
Epoch 5 iteration_i 19 Batch 100 Training Loss 0.4964 Accuracy 0.1839

Epoch 5 iteration_i 19 Training Loss 0.5090 Accuracy 0.1830
Time taken for 1 epoch: 1226.8338775634766 secs

validating
Epoch 5 iteration_i 19 Validation Loss 6.6705 Accuracy 0.0819
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 20 Batch 0 Training Loss 0.5278 Accuracy 0.1848
Epoch 5 iteration_i 20 Batch 50 Training Loss 0.4911 Accuracy 0.1842
Epoch 5 iteration_i 20 Batch 100 Training Loss 0.4966 Accuracy 0.1841

Epoch 5 iteration_i 20 Training Loss 0.5054 Accuracy 0.1828
Time taken for 1 epoch: 1251.6097049713135 secs

validating
Epoch 5 iteration_i 20 Validation Loss 6.6985 Accuracy 0.0814
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 21 Batch 0 Training Loss 0.4912 Accuracy 0.1860
Epoch 5 iteration_i 21 Batch 50 Training Loss 0.4714 Accuracy 0.1834
Epoch 5 iteration_i 21 Batch 100 Training Loss 0.4844 Accuracy 0.1844

Epoch 5 iteration_i 21 Training Loss 0.4952 Accuracy 0.1835
Time taken for 1 epoch: 1276.3148612976074 secs

validating
Epoch 5 iteration_i 21 Validation Loss 6.7422 Accuracy 0.0819
training Parallel data


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Epoch 5 iteration_i 22 Batch 0 Training Loss 0.4222 Accuracy 0.1971
Epoch 5 iteration_i 22 Batch 50 Training Loss 0.4570 Accuracy 0.1836
Epoch 5 iteration_i 22 Batch 100 Training Loss 0.4751 Accuracy 0.1848

Epoch 5 iteration_i 22 Training Loss 0.4833 Accuracy 0.1840
Time taken for 1 epoch: 1301.1362941265106 secs

validating
Epoch 5 iteration_i 22 Validation Loss 6.7671 Accuracy 0.0814


Evaluate best model

In [0]:
# load model

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
  ckpt.restore(ckpt_manager.latest_checkpoint)
  print ('Latest checkpoint restored!!')

Latest checkpoint restored!!


In [0]:
transformer

<transformer.Transformer at 0x7f2bf53d4fd0>

In [14]:
val_loss.reset_states()
val_accuracy.reset_states()
  
for (batch, (inp, tar)) in tqdm(enumerate(tensor_val)):
  val_step(inp, tar)
  
print ('Validation Loss {:.4f} Accuracy {:.4f}'.format(
                                          val_loss.result(), 
                                          val_accuracy.result()))
  

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Validation Loss 6.7671 Accuracy 0.0814


In [0]:
def generate_predictions(inp_sentences):

  if len(inp_sentences.get_shape())==1:
    encoder_input = tf.expand_dims(inp_sentences, 0)
    decoder_input = [french_word2id["<start>"]]
    output = tf.expand_dims(decoder_input, 0)

  else:
    encoder_input = inp_sentences
    decoder_input = [french_word2id["<start>"]]*inp_sentences.get_shape()[0]
    output = tf.expand_dims(decoder_input, -1)


  # encoder_input = tf.expand_dims(inp_sentence, 0)
  
  # decoder_input = [french_word2id["<start>"]]
  # output = tf.expand_dims(decoder_input, 0)
  
  for i in range(pe_target):
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        encoder_input, output)
  
    # predictions.shape == (batch_size, seq_len, vocab_size)
    predictions, attention_weights = transformer(encoder_input, 
                                                 output,
                                                 False,
                                                 enc_padding_mask,  
                                                 combined_mask,
                                                 dec_padding_mask)
    
    # select the last word from the seq_len dimension
    predictions = predictions[: ,-1:, :]  # (batch_size, 1, vocab_size)

    predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
    
    # # return the result if all the seqs has the end token
    if tf.reduce_sum(tf.cast((tf.reduce_sum(tf.cast(output == french_word2id["<eos>"], tf.float32),axis=1)>0), tf.float32)) == inp.get_shape()[0]:
      return output, attention_weights
    
    # concatentate the predicted_id to the output which is given to the decoder
    # as its input.
    output = tf.concat([output, predicted_id], axis=-1)

  # return tf.squeeze(output, axis=0), attention_weights
  return output, attention_weights

In [16]:
all_preds = []
for (batch_i, (inp, tar)) in tqdm(enumerate(tensor_val)):
  preds, attention = generate_predictions(inp)
  all_preds.append(preds)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [17]:
translated_sentences = []

for k in tqdm(all_preds):
  for i in k:
    sentence_french = []
    for j in i.numpy()[1:]:
      if j==0 or j==french_word2id["<eos>"]:
        break
      sentence_french.append(french_id2word[j])

    sentence_french = " ".join(sentence_french)

    translated_sentences.append(sentence_french)

translated_sentences = "\n".join(translated_sentences)

with open("predictions.txt","w") as f:
  f.write(translated_sentences)

HBox(children=(IntProgress(value=0, max=35), HTML(value='')))




In [18]:
!pip install sacrebleu
!python evaluator.py --input-file-path ./predictions.txt --target-file-path ./data/split_val.lang2 --do-not-run-model 

Collecting sacrebleu
[?25l  Downloading https://files.pythonhosted.org/packages/f5/58/5c6cc352ea6271125325950715cf8b59b77abe5e93cf29f6e60b491a31d9/sacrebleu-1.4.6-py3-none-any.whl (59kB)
[K     |████████████████████████████████| 61kB 3.2MB/s 
[?25hCollecting portalocker
  Downloading https://files.pythonhosted.org/packages/53/84/7b3146ec6378d28abc73ab484f09f47dfa008ad6f03f33d90a369f880e25/portalocker-1.7.0-py2.py3-none-any.whl
Collecting mecab-python3
[?25l  Downloading https://files.pythonhosted.org/packages/18/49/b55a839a77189042960bf96490640c44816073f917d489acbc5d79fa5cc3/mecab_python3-0.996.5-cp36-cp36m-manylinux2010_x86_64.whl (17.1MB)
[K     |████████████████████████████████| 17.1MB 200kB/s 
Installing collected packages: portalocker, mecab-python3, sacrebleu
Successfully installed mecab-python3-0.996.5 portalocker-1.7.0 sacrebleu-1.4.6
final avg bleu score: 11.16


In [19]:
ckpt_save_path = ckpt_manager.save()
print ('Saving checkpoint for epoch {} iteration_i {} at {}'.format(epoch+1,
                                                    iteration_i,
                                                    ckpt_save_path))

Saving checkpoint for epoch 5 iteration_i 22 at ./checkpoints/trainst_3_/ckpt-4


Self-Training Monolingual Data Generation

In [0]:
amount_data_start = 100000
amount_data_end = 200000

In [0]:
with open(data_path/"unaligned_tokenized_rempunc.en","r") as f:
    english_monolingual = f.read().strip().lower()
print(len(english_monolingual.split("\n")), english_monolingual[:200])

474000 for the second phase of the trials we just had different sizes small medium large and extra - large it 's true
geng had been my host the previous january when i was the first us defense secretary to v


In [0]:
def transform_test_data(lang1, dict_word2id, amount_data_start=None,amount_data_end=None):
  lines = lang1.split("\n")
  if amount_data_start or amount_data_end:
    lines = lines[amount_data_start:amount_data_end]
  data = []

  for line in lines:
    line2id = [dict_word2id["<start>"]]
    for word in line.split():
      try:
        line2id.append(dict_word2id[word])
      except:
        line2id.append(dict_word2id["<unk>"])
    line2id.append(dict_word2id["<eos>"])
    data.append(line2id)

  return data

english_monolingual_data = transform_test_data(english_monolingual, english_word2id, amount_data_start, amount_data_end)
len(english_monolingual_data)

100000

In [0]:
max([len(i) for i in english_monolingual_data])

112

In [0]:
with open(data_path/"unaligned.fr","r") as f:
    french_monolingual = f.read().strip().lower()
print(len(french_monolingual.split("\n")), french_monolingual[:200])
french_monolingual_data = transform_test_data(french_monolingual, french_word2id)
print(len(french_monolingual_data))
max([len(i) for i in french_monolingual_data])

474000 nous n’aurions pas pu dégager d’accord sur un calendrier de conclusion de la cig sans l’engagement politique de mes collègues du conseil européen.
(de) madame la présidente, monsieur le commissaire, m
474000


220

In [0]:
pe_input = max([len(i) for i in english_monolingual_data])
pe_target = max([len(i) for i in french_monolingual_data])

In [0]:
tensor_test = tf.data.Dataset.from_tensor_slices((
    tf.keras.preprocessing.sequence.pad_sequences(english_monolingual_data, padding='post')
)).batch(BATCH_SIZE, drop_remainder=False)

In [0]:
transformer = Transformer(
    num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, 
    input_vocab_size=input_vocab_size, target_vocab_size=target_vocab_size, 
    pe_input=pe_input, pe_target=pe_target, rate=dropout_rate)

In [0]:
experiment_number = "7_smaller_1_1024_"

checkpoint_path = "./checkpoints/train"+experiment_number

ckpt = tf.train.Checkpoint(transformer=transformer,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=1)

if ckpt_manager.latest_checkpoint:
  ckpt.restore(ckpt_manager.latest_checkpoint)
  print ('Latest checkpoint restored!!')

Latest checkpoint restored!!


In [0]:
all_preds = []
for batch_i, inp in tqdm(enumerate(tensor_test.unbatch().batch(128)),total=len(english_monolingual_data) // 128 + 1):
  preds, attention = generate_predictions(inp)
  all_preds.append(preds)

HBox(children=(IntProgress(value=0, max=782), HTML(value='')))




In [0]:
translated_sentences = []

for k in tqdm(all_preds):
  for i in k:
    sentence_french = []
    for j in i.numpy()[1:]:
      if j==0 or j==french_word2id["<eos>"]:
        break
      sentence_french.append(french_id2word[j])

    sentence_french = " ".join(sentence_french)

    translated_sentences.append(sentence_french)

translated_sentences = "\n".join(translated_sentences)

with open("predictions_english_monolingual_"+str(amount_data_start)+"_"+str(amount_data_end)+".txt","w") as f:
  f.write(translated_sentences)

HBox(children=(IntProgress(value=0, max=782), HTML(value='')))


