In [1]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

import pickle # to save/load objects
import os
import sys
sys.path.append("../helper/") # go to parent dir
from functions_preprocess import *
from functions_model import *

%load_ext autoreload
%autoreload 2

### Data Preprocessing
Here, I'm getting, cleaning and processing the data. I intentionally mixed the bands with different characteristics. I thought this may prevent overfitting and provide better word pool. Data has the almost entire discography of the bands, even demos for some.  

I'm also creating two dictionaries here, `vocab_to_int` and `int_to_vocab`. The model need numerical representation of the characters to be able to compute weights, biases, etc.

In [2]:
# prepare training data
artist_list = next(os.walk('../data/'))[1] # just need folder names, each named folders has albums

text = ''
folder_name = ''
for i in artist_list:
    text = text + combine_songs(i)
    folder_name = folder_name + i[:1] # I know this's not the greatest folder name
    
vocab = set(text)
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
chars = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
print(folder_name, "ready with", len(chars), "chars")

aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw ready with 6292269 chars


### Saving
I need to save the objects I created so far. Remember, no orders in Python dictionaries. This means this notebook creates different dictionaries in each session. Because I need the use my model later, I have to save these lookup dictionaries.

In [3]:
# Saving the objects
directory = 'checkpoints/{}'.format(folder_name)
if not os.path.exists(directory): os.makedirs(directory) # create folder first
f = open('checkpoints/{}/vars.pckl'.format(folder_name), 'wb')
pickle.dump([vocab, vocab_to_int, int_to_vocab, artist_list], f, protocol=2)
f.close()

In [4]:
# hyperparameters
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob_train = 0.7

I highly encourage you to check the function `split_data` in `functions_model.py`. The target is the next character in the sequence.  
`x = chars[: n_batches*slice_size]`  
`y = chars[1: n_batches*slice_size + 1]`

Here is the main graph of the following model from <a href="https://www.tensorflow.org/get_started/summaries_and_tensorboard" target="_blank">TensorBoard</a>,
<img src="assets/main_graph.PNG" width="600">

In [5]:
start_time = time.time() # always measure the time
epochs = 100
save_every_n = 2000
# Split training and validation sets
train_x, train_y, val_x, val_y = split_data(chars, batch_size, num_steps) # default fraction for trainning is 0.9

model = build_rnn(len(vocab), 
                  batch_size=batch_size,
                  num_steps=num_steps,
                  learning_rate=learning_rate,
                  lstm_size=lstm_size,
                  num_layers=num_layers)

saver = tf.train.Saver(max_to_keep=100)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_writer = tf.summary.FileWriter('./logs/{}/train'.format(folder_name), sess.graph)
    test_writer = tf.summary.FileWriter('./logs/{}/test'.format(folder_name))
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/lyr20.ckpt')
    
    n_batches = int(train_x.shape[1]/num_steps)
    iterations = n_batches * epochs
    for e in range(epochs):
        
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for b, (x, y) in enumerate(get_batch([train_x, train_y], num_steps), 1):
            iteration = e*n_batches + b
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob_train,
                    model.initial_state: new_state}
            summary, batch_loss, new_state, _ = sess.run([model.merged, model.cost, 
                                                          model.final_state, model.optimizer], 
                                                          feed_dict=feed)
            loss += batch_loss
            end = time.time()
            print('Epoch {}/{} '.format(e+1, epochs),
                  'Iteration {}/{}'.format(iteration, iterations),
                  'Training loss: {:.4f}'.format(loss/b),
                  '{:.4f} sec/batch'.format((end-start)))
            
            train_writer.add_summary(summary, iteration)
        
            if (iteration%save_every_n == 0) or (iteration == iterations):
                # Check performance, notice dropout has been set to 1 
                # because this is validation, not training, so we need everything
                val_loss = []
                new_state = sess.run(model.initial_state)
                for x, y in get_batch([val_x, val_y], num_steps):
                    feed = {model.inputs: x,
                            model.targets: y,
                            model.keep_prob: 1.,
                            model.initial_state: new_state}
                    summary, batch_loss, new_state = sess.run([model.merged, model.cost, 
                                                               model.final_state], feed_dict=feed)
                    val_loss.append(batch_loss)
                    
                test_writer.add_summary(summary, iteration)

                print('Validation loss:', np.mean(val_loss),
                      'Saving checkpoint!')
                saver.save(sess, "checkpoints/{}/i{}_l{}_{:.3f}.ckpt".format(folder_name, iteration, lstm_size, np.mean(val_loss)))

print(epochs, "epoch done in", round(time.time() - start_time, 2)/60, "mins")

Epoch 1/100  Iteration 1/56600 Training loss: 4.4892 0.8543 sec/batch
Epoch 1/100  Iteration 2/56600 Training loss: 4.4489 0.4667 sec/batch
Epoch 1/100  Iteration 3/56600 Training loss: 4.2830 0.4863 sec/batch
Epoch 1/100  Iteration 4/56600 Training loss: 4.4983 0.4636 sec/batch
Epoch 1/100  Iteration 5/56600 Training loss: 4.4387 0.4756 sec/batch
Epoch 1/100  Iteration 6/56600 Training loss: 4.3224 0.4835 sec/batch
Epoch 1/100  Iteration 7/56600 Training loss: 4.2203 0.4727 sec/batch
Epoch 1/100  Iteration 8/56600 Training loss: 4.1332 0.4716 sec/batch
Epoch 1/100  Iteration 9/56600 Training loss: 4.0553 0.4653 sec/batch
Epoch 1/100  Iteration 10/56600 Training loss: 3.9902 0.4770 sec/batch
Epoch 1/100  Iteration 11/56600 Training loss: 3.9355 0.4649 sec/batch
Epoch 1/100  Iteration 12/56600 Training loss: 3.8880 0.4790 sec/batch
Epoch 1/100  Iteration 13/56600 Training loss: 3.8463 0.4742 sec/batch
Epoch 1/100  Iteration 14/56600 Training loss: 3.8079 0.4637 sec/batch
Epoch 1/100  It

Epoch 1/100  Iteration 117/56600 Training loss: 3.2900 0.4678 sec/batch
Epoch 1/100  Iteration 118/56600 Training loss: 3.2894 0.4678 sec/batch
Epoch 1/100  Iteration 119/56600 Training loss: 3.2900 0.4730 sec/batch
Epoch 1/100  Iteration 120/56600 Training loss: 3.2923 0.4653 sec/batch
Epoch 1/100  Iteration 121/56600 Training loss: 3.2942 0.4862 sec/batch
Epoch 1/100  Iteration 122/56600 Training loss: 3.2950 0.4701 sec/batch
Epoch 1/100  Iteration 123/56600 Training loss: 3.2950 0.4654 sec/batch
Epoch 1/100  Iteration 124/56600 Training loss: 3.2937 0.4802 sec/batch
Epoch 1/100  Iteration 125/56600 Training loss: 3.2928 0.4701 sec/batch
Epoch 1/100  Iteration 126/56600 Training loss: 3.2917 0.4565 sec/batch
Epoch 1/100  Iteration 127/56600 Training loss: 3.2908 0.4718 sec/batch
Epoch 1/100  Iteration 128/56600 Training loss: 3.2901 0.4760 sec/batch
Epoch 1/100  Iteration 129/56600 Training loss: 3.2893 0.4718 sec/batch
Epoch 1/100  Iteration 130/56600 Training loss: 3.2883 0.4835 se

Epoch 1/100  Iteration 231/56600 Training loss: 3.0727 0.4954 sec/batch
Epoch 1/100  Iteration 232/56600 Training loss: 3.0700 0.4743 sec/batch
Epoch 1/100  Iteration 233/56600 Training loss: 3.0675 0.4669 sec/batch
Epoch 1/100  Iteration 234/56600 Training loss: 3.0649 0.4853 sec/batch
Epoch 1/100  Iteration 235/56600 Training loss: 3.0624 0.4777 sec/batch
Epoch 1/100  Iteration 236/56600 Training loss: 3.0597 0.4648 sec/batch
Epoch 1/100  Iteration 237/56600 Training loss: 3.0571 0.4743 sec/batch
Epoch 1/100  Iteration 238/56600 Training loss: 3.0547 0.4717 sec/batch
Epoch 1/100  Iteration 239/56600 Training loss: 3.0523 0.4774 sec/batch
Epoch 1/100  Iteration 240/56600 Training loss: 3.0500 0.4770 sec/batch
Epoch 1/100  Iteration 241/56600 Training loss: 3.0475 0.4715 sec/batch
Epoch 1/100  Iteration 242/56600 Training loss: 3.0450 0.4868 sec/batch
Epoch 1/100  Iteration 243/56600 Training loss: 3.0427 0.4733 sec/batch
Epoch 1/100  Iteration 244/56600 Training loss: 3.0404 0.4823 se

Epoch 1/100  Iteration 345/56600 Training loss: 2.8261 0.4766 sec/batch
Epoch 1/100  Iteration 346/56600 Training loss: 2.8243 0.4701 sec/batch
Epoch 1/100  Iteration 347/56600 Training loss: 2.8226 0.4869 sec/batch
Epoch 1/100  Iteration 348/56600 Training loss: 2.8209 0.4853 sec/batch
Epoch 1/100  Iteration 349/56600 Training loss: 2.8193 0.4724 sec/batch
Epoch 1/100  Iteration 350/56600 Training loss: 2.8176 0.4894 sec/batch
Epoch 1/100  Iteration 351/56600 Training loss: 2.8158 0.4824 sec/batch
Epoch 1/100  Iteration 352/56600 Training loss: 2.8140 0.4784 sec/batch
Epoch 1/100  Iteration 353/56600 Training loss: 2.8123 0.4739 sec/batch
Epoch 1/100  Iteration 354/56600 Training loss: 2.8106 0.4733 sec/batch
Epoch 1/100  Iteration 355/56600 Training loss: 2.8089 0.4676 sec/batch
Epoch 1/100  Iteration 356/56600 Training loss: 2.8072 0.4755 sec/batch
Epoch 1/100  Iteration 357/56600 Training loss: 2.8055 0.4794 sec/batch
Epoch 1/100  Iteration 358/56600 Training loss: 2.8038 0.4784 se

Epoch 1/100  Iteration 459/56600 Training loss: 2.6530 0.4903 sec/batch
Epoch 1/100  Iteration 460/56600 Training loss: 2.6516 0.4793 sec/batch
Epoch 1/100  Iteration 461/56600 Training loss: 2.6504 0.4763 sec/batch
Epoch 1/100  Iteration 462/56600 Training loss: 2.6490 0.4711 sec/batch
Epoch 1/100  Iteration 463/56600 Training loss: 2.6477 0.4901 sec/batch
Epoch 1/100  Iteration 464/56600 Training loss: 2.6464 0.4877 sec/batch
Epoch 1/100  Iteration 465/56600 Training loss: 2.6452 0.4707 sec/batch
Epoch 1/100  Iteration 466/56600 Training loss: 2.6439 0.4873 sec/batch
Epoch 1/100  Iteration 467/56600 Training loss: 2.6427 0.4945 sec/batch
Epoch 1/100  Iteration 468/56600 Training loss: 2.6414 0.4674 sec/batch
Epoch 1/100  Iteration 469/56600 Training loss: 2.6402 0.4684 sec/batch
Epoch 1/100  Iteration 470/56600 Training loss: 2.6390 0.4931 sec/batch
Epoch 1/100  Iteration 471/56600 Training loss: 2.6377 0.4779 sec/batch
Epoch 1/100  Iteration 472/56600 Training loss: 2.6365 0.4759 se

Epoch 2/100  Iteration 573/56600 Training loss: 1.9552 0.4807 sec/batch
Epoch 2/100  Iteration 574/56600 Training loss: 1.9572 0.4698 sec/batch
Epoch 2/100  Iteration 575/56600 Training loss: 1.9567 0.4718 sec/batch
Epoch 2/100  Iteration 576/56600 Training loss: 1.9540 0.4835 sec/batch
Epoch 2/100  Iteration 577/56600 Training loss: 1.9540 0.4793 sec/batch
Epoch 2/100  Iteration 578/56600 Training loss: 1.9531 0.4760 sec/batch
Epoch 2/100  Iteration 579/56600 Training loss: 1.9551 0.4824 sec/batch
Epoch 2/100  Iteration 580/56600 Training loss: 1.9531 0.4953 sec/batch
Epoch 2/100  Iteration 581/56600 Training loss: 1.9525 0.4872 sec/batch
Epoch 2/100  Iteration 582/56600 Training loss: 1.9511 0.4790 sec/batch
Epoch 2/100  Iteration 583/56600 Training loss: 1.9496 0.4784 sec/batch
Epoch 2/100  Iteration 584/56600 Training loss: 1.9482 0.4769 sec/batch
Epoch 2/100  Iteration 585/56600 Training loss: 1.9483 0.4824 sec/batch
Epoch 2/100  Iteration 586/56600 Training loss: 1.9489 0.5014 se

Epoch 2/100  Iteration 687/56600 Training loss: 1.9009 0.4782 sec/batch
Epoch 2/100  Iteration 688/56600 Training loss: 1.9008 0.4768 sec/batch
Epoch 2/100  Iteration 689/56600 Training loss: 1.9001 0.4656 sec/batch
Epoch 2/100  Iteration 690/56600 Training loss: 1.8992 0.4800 sec/batch
Epoch 2/100  Iteration 691/56600 Training loss: 1.8986 0.4835 sec/batch
Epoch 2/100  Iteration 692/56600 Training loss: 1.8981 0.4717 sec/batch
Epoch 2/100  Iteration 693/56600 Training loss: 1.8975 0.4846 sec/batch
Epoch 2/100  Iteration 694/56600 Training loss: 1.8975 0.4817 sec/batch
Epoch 2/100  Iteration 695/56600 Training loss: 1.8971 0.4694 sec/batch
Epoch 2/100  Iteration 696/56600 Training loss: 1.8968 0.4767 sec/batch
Epoch 2/100  Iteration 697/56600 Training loss: 1.8964 0.4683 sec/batch
Epoch 2/100  Iteration 698/56600 Training loss: 1.8960 0.4793 sec/batch
Epoch 2/100  Iteration 699/56600 Training loss: 1.8955 0.4669 sec/batch
Epoch 2/100  Iteration 700/56600 Training loss: 1.8951 0.4771 se

Epoch 2/100  Iteration 801/56600 Training loss: 1.8613 0.4695 sec/batch
Epoch 2/100  Iteration 802/56600 Training loss: 1.8610 0.4809 sec/batch
Epoch 2/100  Iteration 803/56600 Training loss: 1.8608 0.4913 sec/batch
Epoch 2/100  Iteration 804/56600 Training loss: 1.8608 0.4753 sec/batch
Epoch 2/100  Iteration 805/56600 Training loss: 1.8607 0.4762 sec/batch
Epoch 2/100  Iteration 806/56600 Training loss: 1.8607 0.4860 sec/batch
Epoch 2/100  Iteration 807/56600 Training loss: 1.8606 0.4773 sec/batch
Epoch 2/100  Iteration 808/56600 Training loss: 1.8605 0.4712 sec/batch
Epoch 2/100  Iteration 809/56600 Training loss: 1.8606 0.4843 sec/batch
Epoch 2/100  Iteration 810/56600 Training loss: 1.8606 0.4728 sec/batch
Epoch 2/100  Iteration 811/56600 Training loss: 1.8604 0.4611 sec/batch
Epoch 2/100  Iteration 812/56600 Training loss: 1.8602 0.4698 sec/batch
Epoch 2/100  Iteration 813/56600 Training loss: 1.8600 0.4842 sec/batch
Epoch 2/100  Iteration 814/56600 Training loss: 1.8598 0.4837 se

Epoch 2/100  Iteration 915/56600 Training loss: 1.8294 0.4815 sec/batch
Epoch 2/100  Iteration 916/56600 Training loss: 1.8291 0.4831 sec/batch
Epoch 2/100  Iteration 917/56600 Training loss: 1.8288 0.4785 sec/batch
Epoch 2/100  Iteration 918/56600 Training loss: 1.8284 0.4866 sec/batch
Epoch 2/100  Iteration 919/56600 Training loss: 1.8282 0.4711 sec/batch
Epoch 2/100  Iteration 920/56600 Training loss: 1.8279 0.4908 sec/batch
Epoch 2/100  Iteration 921/56600 Training loss: 1.8277 0.4917 sec/batch
Epoch 2/100  Iteration 922/56600 Training loss: 1.8274 0.4741 sec/batch
Epoch 2/100  Iteration 923/56600 Training loss: 1.8271 0.4829 sec/batch
Epoch 2/100  Iteration 924/56600 Training loss: 1.8268 0.4726 sec/batch
Epoch 2/100  Iteration 925/56600 Training loss: 1.8266 0.4805 sec/batch
Epoch 2/100  Iteration 926/56600 Training loss: 1.8263 0.4817 sec/batch
Epoch 2/100  Iteration 927/56600 Training loss: 1.8261 0.4696 sec/batch
Epoch 2/100  Iteration 928/56600 Training loss: 1.8258 0.4784 se

Epoch 2/100  Iteration 1029/56600 Training loss: 1.7956 0.4844 sec/batch
Epoch 2/100  Iteration 1030/56600 Training loss: 1.7953 0.4804 sec/batch
Epoch 2/100  Iteration 1031/56600 Training loss: 1.7950 0.4814 sec/batch
Epoch 2/100  Iteration 1032/56600 Training loss: 1.7948 0.4823 sec/batch
Epoch 2/100  Iteration 1033/56600 Training loss: 1.7946 0.4825 sec/batch
Epoch 2/100  Iteration 1034/56600 Training loss: 1.7944 0.4825 sec/batch
Epoch 2/100  Iteration 1035/56600 Training loss: 1.7942 0.4815 sec/batch
Epoch 2/100  Iteration 1036/56600 Training loss: 1.7940 0.4816 sec/batch
Epoch 2/100  Iteration 1037/56600 Training loss: 1.7938 0.4827 sec/batch
Epoch 2/100  Iteration 1038/56600 Training loss: 1.7936 0.4824 sec/batch
Epoch 2/100  Iteration 1039/56600 Training loss: 1.7933 0.4794 sec/batch
Epoch 2/100  Iteration 1040/56600 Training loss: 1.7931 0.4823 sec/batch
Epoch 2/100  Iteration 1041/56600 Training loss: 1.7928 0.4883 sec/batch
Epoch 2/100  Iteration 1042/56600 Training loss: 1.

Epoch 3/100  Iteration 1142/56600 Training loss: 1.6477 0.4864 sec/batch
Epoch 3/100  Iteration 1143/56600 Training loss: 1.6480 0.4846 sec/batch
Epoch 3/100  Iteration 1144/56600 Training loss: 1.6474 0.4834 sec/batch
Epoch 3/100  Iteration 1145/56600 Training loss: 1.6496 0.4844 sec/batch
Epoch 3/100  Iteration 1146/56600 Training loss: 1.6477 0.4883 sec/batch
Epoch 3/100  Iteration 1147/56600 Training loss: 1.6468 0.4865 sec/batch
Epoch 3/100  Iteration 1148/56600 Training loss: 1.6454 0.4847 sec/batch
Epoch 3/100  Iteration 1149/56600 Training loss: 1.6443 0.4833 sec/batch
Epoch 3/100  Iteration 1150/56600 Training loss: 1.6433 0.4916 sec/batch
Epoch 3/100  Iteration 1151/56600 Training loss: 1.6441 0.5024 sec/batch
Epoch 3/100  Iteration 1152/56600 Training loss: 1.6443 0.4874 sec/batch
Epoch 3/100  Iteration 1153/56600 Training loss: 1.6458 0.4893 sec/batch
Epoch 3/100  Iteration 1154/56600 Training loss: 1.6447 0.4893 sec/batch
Epoch 3/100  Iteration 1155/56600 Training loss: 1.

Epoch 3/100  Iteration 1255/56600 Training loss: 1.6246 0.4977 sec/batch
Epoch 3/100  Iteration 1256/56600 Training loss: 1.6239 0.4751 sec/batch
Epoch 3/100  Iteration 1257/56600 Training loss: 1.6236 0.4908 sec/batch
Epoch 3/100  Iteration 1258/56600 Training loss: 1.6234 0.4926 sec/batch
Epoch 3/100  Iteration 1259/56600 Training loss: 1.6232 0.4939 sec/batch
Epoch 3/100  Iteration 1260/56600 Training loss: 1.6234 0.4842 sec/batch
Epoch 3/100  Iteration 1261/56600 Training loss: 1.6233 0.4980 sec/batch
Epoch 3/100  Iteration 1262/56600 Training loss: 1.6231 0.5169 sec/batch
Epoch 3/100  Iteration 1263/56600 Training loss: 1.6230 0.4884 sec/batch
Epoch 3/100  Iteration 1264/56600 Training loss: 1.6228 0.5214 sec/batch
Epoch 3/100  Iteration 1265/56600 Training loss: 1.6227 0.4936 sec/batch
Epoch 3/100  Iteration 1266/56600 Training loss: 1.6225 0.4924 sec/batch
Epoch 3/100  Iteration 1267/56600 Training loss: 1.6224 0.4884 sec/batch
Epoch 3/100  Iteration 1268/56600 Training loss: 1.

Epoch 3/100  Iteration 1368/56600 Training loss: 1.6096 0.4745 sec/batch
Epoch 3/100  Iteration 1369/56600 Training loss: 1.6095 0.4951 sec/batch
Epoch 3/100  Iteration 1370/56600 Training loss: 1.6096 0.4779 sec/batch
Epoch 3/100  Iteration 1371/56600 Training loss: 1.6098 0.4824 sec/batch
Epoch 3/100  Iteration 1372/56600 Training loss: 1.6100 0.4786 sec/batch
Epoch 3/100  Iteration 1373/56600 Training loss: 1.6101 0.4858 sec/batch
Epoch 3/100  Iteration 1374/56600 Training loss: 1.6102 0.4945 sec/batch
Epoch 3/100  Iteration 1375/56600 Training loss: 1.6104 0.4698 sec/batch
Epoch 3/100  Iteration 1376/56600 Training loss: 1.6107 0.4823 sec/batch
Epoch 3/100  Iteration 1377/56600 Training loss: 1.6107 0.4657 sec/batch
Epoch 3/100  Iteration 1378/56600 Training loss: 1.6106 0.4753 sec/batch
Epoch 3/100  Iteration 1379/56600 Training loss: 1.6107 0.4830 sec/batch
Epoch 3/100  Iteration 1380/56600 Training loss: 1.6106 0.4735 sec/batch
Epoch 3/100  Iteration 1381/56600 Training loss: 1.

Epoch 3/100  Iteration 1481/56600 Training loss: 1.5971 0.5024 sec/batch
Epoch 3/100  Iteration 1482/56600 Training loss: 1.5970 0.5124 sec/batch
Epoch 3/100  Iteration 1483/56600 Training loss: 1.5968 0.4954 sec/batch
Epoch 3/100  Iteration 1484/56600 Training loss: 1.5966 0.4974 sec/batch
Epoch 3/100  Iteration 1485/56600 Training loss: 1.5964 0.5294 sec/batch
Epoch 3/100  Iteration 1486/56600 Training loss: 1.5963 0.4903 sec/batch
Epoch 3/100  Iteration 1487/56600 Training loss: 1.5962 0.4873 sec/batch
Epoch 3/100  Iteration 1488/56600 Training loss: 1.5961 0.4983 sec/batch
Epoch 3/100  Iteration 1489/56600 Training loss: 1.5960 0.4923 sec/batch
Epoch 3/100  Iteration 1490/56600 Training loss: 1.5959 0.4863 sec/batch
Epoch 3/100  Iteration 1491/56600 Training loss: 1.5957 0.4893 sec/batch
Epoch 3/100  Iteration 1492/56600 Training loss: 1.5956 0.4834 sec/batch
Epoch 3/100  Iteration 1493/56600 Training loss: 1.5955 0.4865 sec/batch
Epoch 3/100  Iteration 1494/56600 Training loss: 1.

Epoch 3/100  Iteration 1594/56600 Training loss: 1.5798 0.4732 sec/batch
Epoch 3/100  Iteration 1595/56600 Training loss: 1.5796 0.4824 sec/batch
Epoch 3/100  Iteration 1596/56600 Training loss: 1.5795 0.4923 sec/batch
Epoch 3/100  Iteration 1597/56600 Training loss: 1.5793 0.4859 sec/batch
Epoch 3/100  Iteration 1598/56600 Training loss: 1.5792 0.4775 sec/batch
Epoch 3/100  Iteration 1599/56600 Training loss: 1.5792 0.4887 sec/batch
Epoch 3/100  Iteration 1600/56600 Training loss: 1.5791 0.4765 sec/batch
Epoch 3/100  Iteration 1601/56600 Training loss: 1.5790 0.4829 sec/batch
Epoch 3/100  Iteration 1602/56600 Training loss: 1.5790 0.4866 sec/batch
Epoch 3/100  Iteration 1603/56600 Training loss: 1.5789 0.4920 sec/batch
Epoch 3/100  Iteration 1604/56600 Training loss: 1.5789 0.4880 sec/batch
Epoch 3/100  Iteration 1605/56600 Training loss: 1.5788 0.4848 sec/batch
Epoch 3/100  Iteration 1606/56600 Training loss: 1.5786 0.4881 sec/batch
Epoch 3/100  Iteration 1607/56600 Training loss: 1.

Epoch 4/100  Iteration 1707/56600 Training loss: 1.5207 0.4690 sec/batch
Epoch 4/100  Iteration 1708/56600 Training loss: 1.5174 0.4866 sec/batch
Epoch 4/100  Iteration 1709/56600 Training loss: 1.5183 0.4756 sec/batch
Epoch 4/100  Iteration 1710/56600 Training loss: 1.5179 0.4920 sec/batch
Epoch 4/100  Iteration 1711/56600 Training loss: 1.5197 0.4932 sec/batch
Epoch 4/100  Iteration 1712/56600 Training loss: 1.5183 0.4708 sec/batch
Epoch 4/100  Iteration 1713/56600 Training loss: 1.5176 0.4769 sec/batch
Epoch 4/100  Iteration 1714/56600 Training loss: 1.5166 0.4968 sec/batch
Epoch 4/100  Iteration 1715/56600 Training loss: 1.5162 0.4825 sec/batch
Epoch 4/100  Iteration 1716/56600 Training loss: 1.5154 0.4799 sec/batch
Epoch 4/100  Iteration 1717/56600 Training loss: 1.5159 0.4738 sec/batch
Epoch 4/100  Iteration 1718/56600 Training loss: 1.5157 0.4829 sec/batch
Epoch 4/100  Iteration 1719/56600 Training loss: 1.5173 0.4822 sec/batch
Epoch 4/100  Iteration 1720/56600 Training loss: 1.

Epoch 4/100  Iteration 1820/56600 Training loss: 1.5027 0.4873 sec/batch
Epoch 4/100  Iteration 1821/56600 Training loss: 1.5024 0.4854 sec/batch
Epoch 4/100  Iteration 1822/56600 Training loss: 1.5018 0.4893 sec/batch
Epoch 4/100  Iteration 1823/56600 Training loss: 1.5017 0.4914 sec/batch
Epoch 4/100  Iteration 1824/56600 Training loss: 1.5015 0.4953 sec/batch
Epoch 4/100  Iteration 1825/56600 Training loss: 1.5013 0.4904 sec/batch
Epoch 4/100  Iteration 1826/56600 Training loss: 1.5014 0.4923 sec/batch
Epoch 4/100  Iteration 1827/56600 Training loss: 1.5014 0.4934 sec/batch
Epoch 4/100  Iteration 1828/56600 Training loss: 1.5013 0.4934 sec/batch
Epoch 4/100  Iteration 1829/56600 Training loss: 1.5013 0.4903 sec/batch
Epoch 4/100  Iteration 1830/56600 Training loss: 1.5011 0.4893 sec/batch
Epoch 4/100  Iteration 1831/56600 Training loss: 1.5011 0.4924 sec/batch
Epoch 4/100  Iteration 1832/56600 Training loss: 1.5009 0.4863 sec/batch
Epoch 4/100  Iteration 1833/56600 Training loss: 1.

Epoch 4/100  Iteration 1933/56600 Training loss: 1.4942 0.4793 sec/batch
Epoch 4/100  Iteration 1934/56600 Training loss: 1.4942 0.4753 sec/batch
Epoch 4/100  Iteration 1935/56600 Training loss: 1.4942 0.4733 sec/batch
Epoch 4/100  Iteration 1936/56600 Training loss: 1.4944 0.4758 sec/batch
Epoch 4/100  Iteration 1937/56600 Training loss: 1.4945 0.4709 sec/batch
Epoch 4/100  Iteration 1938/56600 Training loss: 1.4948 0.4861 sec/batch
Epoch 4/100  Iteration 1939/56600 Training loss: 1.4950 0.4740 sec/batch
Epoch 4/100  Iteration 1940/56600 Training loss: 1.4952 0.4701 sec/batch
Epoch 4/100  Iteration 1941/56600 Training loss: 1.4955 0.4901 sec/batch
Epoch 4/100  Iteration 1942/56600 Training loss: 1.4957 0.4613 sec/batch
Epoch 4/100  Iteration 1943/56600 Training loss: 1.4959 0.4780 sec/batch
Epoch 4/100  Iteration 1944/56600 Training loss: 1.4959 0.4800 sec/batch
Epoch 4/100  Iteration 1945/56600 Training loss: 1.4960 0.4824 sec/batch
Epoch 4/100  Iteration 1946/56600 Training loss: 1.

Epoch 4/100  Iteration 2045/56600 Training loss: 1.4878 0.4744 sec/batch
Epoch 4/100  Iteration 2046/56600 Training loss: 1.4877 0.4764 sec/batch
Epoch 4/100  Iteration 2047/56600 Training loss: 1.4877 0.4764 sec/batch
Epoch 4/100  Iteration 2048/56600 Training loss: 1.4877 0.4764 sec/batch
Epoch 4/100  Iteration 2049/56600 Training loss: 1.4875 0.4744 sec/batch
Epoch 4/100  Iteration 2050/56600 Training loss: 1.4874 0.4755 sec/batch
Epoch 4/100  Iteration 2051/56600 Training loss: 1.4873 0.4724 sec/batch
Epoch 4/100  Iteration 2052/56600 Training loss: 1.4873 0.4794 sec/batch
Epoch 4/100  Iteration 2053/56600 Training loss: 1.4872 0.4754 sec/batch
Epoch 4/100  Iteration 2054/56600 Training loss: 1.4871 0.4753 sec/batch
Epoch 4/100  Iteration 2055/56600 Training loss: 1.4871 0.4754 sec/batch
Epoch 4/100  Iteration 2056/56600 Training loss: 1.4870 0.4774 sec/batch
Epoch 4/100  Iteration 2057/56600 Training loss: 1.4870 0.4764 sec/batch
Epoch 4/100  Iteration 2058/56600 Training loss: 1.

Epoch 4/100  Iteration 2158/56600 Training loss: 1.4766 0.4764 sec/batch
Epoch 4/100  Iteration 2159/56600 Training loss: 1.4766 0.4755 sec/batch
Epoch 4/100  Iteration 2160/56600 Training loss: 1.4765 0.4765 sec/batch
Epoch 4/100  Iteration 2161/56600 Training loss: 1.4764 0.4764 sec/batch
Epoch 4/100  Iteration 2162/56600 Training loss: 1.4763 0.4764 sec/batch
Epoch 4/100  Iteration 2163/56600 Training loss: 1.4762 0.4774 sec/batch
Epoch 4/100  Iteration 2164/56600 Training loss: 1.4761 0.4755 sec/batch
Epoch 4/100  Iteration 2165/56600 Training loss: 1.4761 0.4764 sec/batch
Epoch 4/100  Iteration 2166/56600 Training loss: 1.4760 0.4744 sec/batch
Epoch 4/100  Iteration 2167/56600 Training loss: 1.4760 0.4764 sec/batch
Epoch 4/100  Iteration 2168/56600 Training loss: 1.4760 0.4764 sec/batch
Epoch 4/100  Iteration 2169/56600 Training loss: 1.4760 0.4774 sec/batch
Epoch 4/100  Iteration 2170/56600 Training loss: 1.4760 0.4764 sec/batch
Epoch 4/100  Iteration 2171/56600 Training loss: 1.

Epoch 5/100  Iteration 2271/56600 Training loss: 1.4465 0.4760 sec/batch
Epoch 5/100  Iteration 2272/56600 Training loss: 1.4494 0.4809 sec/batch
Epoch 5/100  Iteration 2273/56600 Training loss: 1.4489 0.4750 sec/batch
Epoch 5/100  Iteration 2274/56600 Training loss: 1.4466 0.4839 sec/batch
Epoch 5/100  Iteration 2275/56600 Training loss: 1.4468 0.4909 sec/batch
Epoch 5/100  Iteration 2276/56600 Training loss: 1.4468 0.4644 sec/batch
Epoch 5/100  Iteration 2277/56600 Training loss: 1.4484 0.4877 sec/batch
Epoch 5/100  Iteration 2278/56600 Training loss: 1.4467 0.4610 sec/batch
Epoch 5/100  Iteration 2279/56600 Training loss: 1.4457 0.4764 sec/batch
Epoch 5/100  Iteration 2280/56600 Training loss: 1.4448 0.4864 sec/batch
Epoch 5/100  Iteration 2281/56600 Training loss: 1.4443 0.4633 sec/batch
Epoch 5/100  Iteration 2282/56600 Training loss: 1.4434 0.4781 sec/batch
Epoch 5/100  Iteration 2283/56600 Training loss: 1.4433 0.4864 sec/batch
Epoch 5/100  Iteration 2284/56600 Training loss: 1.

Epoch 5/100  Iteration 2384/56600 Training loss: 1.4321 0.4796 sec/batch
Epoch 5/100  Iteration 2385/56600 Training loss: 1.4323 0.4702 sec/batch
Epoch 5/100  Iteration 2386/56600 Training loss: 1.4324 0.4744 sec/batch
Epoch 5/100  Iteration 2387/56600 Training loss: 1.4321 0.4819 sec/batch
Epoch 5/100  Iteration 2388/56600 Training loss: 1.4316 0.4743 sec/batch
Epoch 5/100  Iteration 2389/56600 Training loss: 1.4316 0.4836 sec/batch
Epoch 5/100  Iteration 2390/56600 Training loss: 1.4314 0.4891 sec/batch
Epoch 5/100  Iteration 2391/56600 Training loss: 1.4313 0.4774 sec/batch
Epoch 5/100  Iteration 2392/56600 Training loss: 1.4314 0.4886 sec/batch
Epoch 5/100  Iteration 2393/56600 Training loss: 1.4314 0.4813 sec/batch
Epoch 5/100  Iteration 2394/56600 Training loss: 1.4313 0.4832 sec/batch
Epoch 5/100  Iteration 2395/56600 Training loss: 1.4312 0.4820 sec/batch
Epoch 5/100  Iteration 2396/56600 Training loss: 1.4310 0.4749 sec/batch
Epoch 5/100  Iteration 2397/56600 Training loss: 1.

Epoch 5/100  Iteration 2497/56600 Training loss: 1.4270 0.4763 sec/batch
Epoch 5/100  Iteration 2498/56600 Training loss: 1.4269 0.4733 sec/batch
Epoch 5/100  Iteration 2499/56600 Training loss: 1.4269 0.4743 sec/batch
Epoch 5/100  Iteration 2500/56600 Training loss: 1.4270 0.4803 sec/batch
Epoch 5/100  Iteration 2501/56600 Training loss: 1.4270 0.4794 sec/batch
Epoch 5/100  Iteration 2502/56600 Training loss: 1.4272 0.4804 sec/batch
Epoch 5/100  Iteration 2503/56600 Training loss: 1.4273 0.4816 sec/batch
Epoch 5/100  Iteration 2504/56600 Training loss: 1.4276 0.4736 sec/batch
Epoch 5/100  Iteration 2505/56600 Training loss: 1.4278 0.4804 sec/batch
Epoch 5/100  Iteration 2506/56600 Training loss: 1.4280 0.4873 sec/batch
Epoch 5/100  Iteration 2507/56600 Training loss: 1.4283 0.4683 sec/batch
Epoch 5/100  Iteration 2508/56600 Training loss: 1.4285 0.4862 sec/batch
Epoch 5/100  Iteration 2509/56600 Training loss: 1.4287 0.4720 sec/batch
Epoch 5/100  Iteration 2510/56600 Training loss: 1.

Epoch 5/100  Iteration 2610/56600 Training loss: 1.4223 0.4690 sec/batch
Epoch 5/100  Iteration 2611/56600 Training loss: 1.4222 0.4867 sec/batch
Epoch 5/100  Iteration 2612/56600 Training loss: 1.4222 0.4855 sec/batch
Epoch 5/100  Iteration 2613/56600 Training loss: 1.4222 0.4648 sec/batch
Epoch 5/100  Iteration 2614/56600 Training loss: 1.4222 0.4619 sec/batch
Epoch 5/100  Iteration 2615/56600 Training loss: 1.4220 0.4676 sec/batch
Epoch 5/100  Iteration 2616/56600 Training loss: 1.4219 0.4706 sec/batch
Epoch 5/100  Iteration 2617/56600 Training loss: 1.4218 0.4873 sec/batch
Epoch 5/100  Iteration 2618/56600 Training loss: 1.4218 0.4835 sec/batch
Epoch 5/100  Iteration 2619/56600 Training loss: 1.4218 0.4729 sec/batch
Epoch 5/100  Iteration 2620/56600 Training loss: 1.4218 0.4745 sec/batch
Epoch 5/100  Iteration 2621/56600 Training loss: 1.4218 0.4840 sec/batch
Epoch 5/100  Iteration 2622/56600 Training loss: 1.4217 0.4788 sec/batch
Epoch 5/100  Iteration 2623/56600 Training loss: 1.

Epoch 5/100  Iteration 2723/56600 Training loss: 1.4132 0.4811 sec/batch
Epoch 5/100  Iteration 2724/56600 Training loss: 1.4132 0.4876 sec/batch
Epoch 5/100  Iteration 2725/56600 Training loss: 1.4132 0.4846 sec/batch
Epoch 5/100  Iteration 2726/56600 Training loss: 1.4132 0.4876 sec/batch
Epoch 5/100  Iteration 2727/56600 Training loss: 1.4131 0.4760 sec/batch
Epoch 5/100  Iteration 2728/56600 Training loss: 1.4130 0.4743 sec/batch
Epoch 5/100  Iteration 2729/56600 Training loss: 1.4129 0.4987 sec/batch
Epoch 5/100  Iteration 2730/56600 Training loss: 1.4129 0.4812 sec/batch
Epoch 5/100  Iteration 2731/56600 Training loss: 1.4129 0.4928 sec/batch
Epoch 5/100  Iteration 2732/56600 Training loss: 1.4128 0.4816 sec/batch
Epoch 5/100  Iteration 2733/56600 Training loss: 1.4128 0.4918 sec/batch
Epoch 5/100  Iteration 2734/56600 Training loss: 1.4129 0.4966 sec/batch
Epoch 5/100  Iteration 2735/56600 Training loss: 1.4129 0.4708 sec/batch
Epoch 5/100  Iteration 2736/56600 Training loss: 1.

Epoch 6/100  Iteration 2836/56600 Training loss: 1.3991 0.4870 sec/batch
Epoch 6/100  Iteration 2837/56600 Training loss: 1.3971 0.4907 sec/batch
Epoch 6/100  Iteration 2838/56600 Training loss: 1.3987 0.4833 sec/batch
Epoch 6/100  Iteration 2839/56600 Training loss: 1.3984 0.4822 sec/batch
Epoch 6/100  Iteration 2840/56600 Training loss: 1.3965 0.4832 sec/batch
Epoch 6/100  Iteration 2841/56600 Training loss: 1.3971 0.4834 sec/batch
Epoch 6/100  Iteration 2842/56600 Training loss: 1.3985 0.4809 sec/batch
Epoch 6/100  Iteration 2843/56600 Training loss: 1.3998 0.4936 sec/batch
Epoch 6/100  Iteration 2844/56600 Training loss: 1.3972 0.4783 sec/batch
Epoch 6/100  Iteration 2845/56600 Training loss: 1.3969 0.4843 sec/batch
Epoch 6/100  Iteration 2846/56600 Training loss: 1.3958 0.4853 sec/batch
Epoch 6/100  Iteration 2847/56600 Training loss: 1.3949 0.4841 sec/batch
Epoch 6/100  Iteration 2848/56600 Training loss: 1.3937 0.4881 sec/batch
Epoch 6/100  Iteration 2849/56600 Training loss: 1.

Epoch 6/100  Iteration 2949/56600 Training loss: 1.3820 0.4853 sec/batch
Epoch 6/100  Iteration 2950/56600 Training loss: 1.3824 0.4833 sec/batch
Epoch 6/100  Iteration 2951/56600 Training loss: 1.3827 0.4659 sec/batch
Epoch 6/100  Iteration 2952/56600 Training loss: 1.3829 0.4851 sec/batch
Epoch 6/100  Iteration 2953/56600 Training loss: 1.3826 0.4873 sec/batch
Epoch 6/100  Iteration 2954/56600 Training loss: 1.3822 0.4713 sec/batch
Epoch 6/100  Iteration 2955/56600 Training loss: 1.3821 0.4932 sec/batch
Epoch 6/100  Iteration 2956/56600 Training loss: 1.3820 0.4731 sec/batch
Epoch 6/100  Iteration 2957/56600 Training loss: 1.3819 0.4946 sec/batch
Epoch 6/100  Iteration 2958/56600 Training loss: 1.3820 0.4778 sec/batch
Epoch 6/100  Iteration 2959/56600 Training loss: 1.3821 0.4839 sec/batch
Epoch 6/100  Iteration 2960/56600 Training loss: 1.3820 0.4906 sec/batch
Epoch 6/100  Iteration 2961/56600 Training loss: 1.3821 0.4804 sec/batch
Epoch 6/100  Iteration 2962/56600 Training loss: 1.

Epoch 6/100  Iteration 3062/56600 Training loss: 1.3791 0.4989 sec/batch
Epoch 6/100  Iteration 3063/56600 Training loss: 1.3792 0.4850 sec/batch
Epoch 6/100  Iteration 3064/56600 Training loss: 1.3792 0.4803 sec/batch
Epoch 6/100  Iteration 3065/56600 Training loss: 1.3793 0.4838 sec/batch
Epoch 6/100  Iteration 3066/56600 Training loss: 1.3794 0.4741 sec/batch
Epoch 6/100  Iteration 3067/56600 Training loss: 1.3794 0.4826 sec/batch
Epoch 6/100  Iteration 3068/56600 Training loss: 1.3796 0.4863 sec/batch
Epoch 6/100  Iteration 3069/56600 Training loss: 1.3797 0.5010 sec/batch
Epoch 6/100  Iteration 3070/56600 Training loss: 1.3800 0.4981 sec/batch
Epoch 6/100  Iteration 3071/56600 Training loss: 1.3802 0.4803 sec/batch
Epoch 6/100  Iteration 3072/56600 Training loss: 1.3804 0.5039 sec/batch
Epoch 6/100  Iteration 3073/56600 Training loss: 1.3807 0.4862 sec/batch
Epoch 6/100  Iteration 3074/56600 Training loss: 1.3810 0.4881 sec/batch
Epoch 6/100  Iteration 3075/56600 Training loss: 1.

Epoch 6/100  Iteration 3175/56600 Training loss: 1.3762 0.4813 sec/batch
Epoch 6/100  Iteration 3176/56600 Training loss: 1.3762 0.4774 sec/batch
Epoch 6/100  Iteration 3177/56600 Training loss: 1.3762 0.4874 sec/batch
Epoch 6/100  Iteration 3178/56600 Training loss: 1.3762 0.4954 sec/batch
Epoch 6/100  Iteration 3179/56600 Training loss: 1.3762 0.4717 sec/batch
Epoch 6/100  Iteration 3180/56600 Training loss: 1.3761 0.4919 sec/batch
Epoch 6/100  Iteration 3181/56600 Training loss: 1.3760 0.4774 sec/batch
Epoch 6/100  Iteration 3182/56600 Training loss: 1.3759 0.4797 sec/batch
Epoch 6/100  Iteration 3183/56600 Training loss: 1.3759 0.4649 sec/batch
Epoch 6/100  Iteration 3184/56600 Training loss: 1.3759 0.4690 sec/batch
Epoch 6/100  Iteration 3185/56600 Training loss: 1.3758 0.4720 sec/batch
Epoch 6/100  Iteration 3186/56600 Training loss: 1.3758 0.4716 sec/batch
Epoch 6/100  Iteration 3187/56600 Training loss: 1.3759 0.4848 sec/batch
Epoch 6/100  Iteration 3188/56600 Training loss: 1.

Epoch 6/100  Iteration 3288/56600 Training loss: 1.3689 0.4918 sec/batch
Epoch 6/100  Iteration 3289/56600 Training loss: 1.3688 0.4717 sec/batch
Epoch 6/100  Iteration 3290/56600 Training loss: 1.3688 0.4879 sec/batch
Epoch 6/100  Iteration 3291/56600 Training loss: 1.3687 0.4747 sec/batch
Epoch 6/100  Iteration 3292/56600 Training loss: 1.3687 0.4748 sec/batch
Epoch 6/100  Iteration 3293/56600 Training loss: 1.3686 0.4832 sec/batch
Epoch 6/100  Iteration 3294/56600 Training loss: 1.3685 0.4786 sec/batch
Epoch 6/100  Iteration 3295/56600 Training loss: 1.3685 0.4795 sec/batch
Epoch 6/100  Iteration 3296/56600 Training loss: 1.3684 0.4666 sec/batch
Epoch 6/100  Iteration 3297/56600 Training loss: 1.3685 0.4689 sec/batch
Epoch 6/100  Iteration 3298/56600 Training loss: 1.3684 0.4706 sec/batch
Epoch 6/100  Iteration 3299/56600 Training loss: 1.3684 0.4825 sec/batch
Epoch 6/100  Iteration 3300/56600 Training loss: 1.3685 0.4933 sec/batch
Epoch 6/100  Iteration 3301/56600 Training loss: 1.

Epoch 7/100  Iteration 3401/56600 Training loss: 1.3608 0.4793 sec/batch
Epoch 7/100  Iteration 3402/56600 Training loss: 1.3613 0.4821 sec/batch
Epoch 7/100  Iteration 3403/56600 Training loss: 1.3591 0.4824 sec/batch
Epoch 7/100  Iteration 3404/56600 Training loss: 1.3602 0.4844 sec/batch
Epoch 7/100  Iteration 3405/56600 Training loss: 1.3594 0.4850 sec/batch
Epoch 7/100  Iteration 3406/56600 Training loss: 1.3576 0.4876 sec/batch
Epoch 7/100  Iteration 3407/56600 Training loss: 1.3590 0.4801 sec/batch
Epoch 7/100  Iteration 3408/56600 Training loss: 1.3595 0.4852 sec/batch
Epoch 7/100  Iteration 3409/56600 Training loss: 1.3610 0.4837 sec/batch
Epoch 7/100  Iteration 3410/56600 Training loss: 1.3592 0.4808 sec/batch
Epoch 7/100  Iteration 3411/56600 Training loss: 1.3589 0.4800 sec/batch
Epoch 7/100  Iteration 3412/56600 Training loss: 1.3581 0.4882 sec/batch
Epoch 7/100  Iteration 3413/56600 Training loss: 1.3566 0.4825 sec/batch
Epoch 7/100  Iteration 3414/56600 Training loss: 1.

Epoch 7/100  Iteration 3514/56600 Training loss: 1.3457 0.4843 sec/batch
Epoch 7/100  Iteration 3515/56600 Training loss: 1.3456 0.4881 sec/batch
Epoch 7/100  Iteration 3516/56600 Training loss: 1.3461 0.4900 sec/batch
Epoch 7/100  Iteration 3517/56600 Training loss: 1.3463 0.4850 sec/batch
Epoch 7/100  Iteration 3518/56600 Training loss: 1.3466 0.4925 sec/batch
Epoch 7/100  Iteration 3519/56600 Training loss: 1.3463 0.4725 sec/batch
Epoch 7/100  Iteration 3520/56600 Training loss: 1.3459 0.4883 sec/batch
Epoch 7/100  Iteration 3521/56600 Training loss: 1.3458 0.4902 sec/batch
Epoch 7/100  Iteration 3522/56600 Training loss: 1.3457 0.4781 sec/batch
Epoch 7/100  Iteration 3523/56600 Training loss: 1.3456 0.5036 sec/batch
Epoch 7/100  Iteration 3524/56600 Training loss: 1.3457 0.4795 sec/batch
Epoch 7/100  Iteration 3525/56600 Training loss: 1.3458 0.4819 sec/batch
Epoch 7/100  Iteration 3526/56600 Training loss: 1.3456 0.4876 sec/batch
Epoch 7/100  Iteration 3527/56600 Training loss: 1.

Epoch 7/100  Iteration 3627/56600 Training loss: 1.3432 0.4744 sec/batch
Epoch 7/100  Iteration 3628/56600 Training loss: 1.3433 0.4748 sec/batch
Epoch 7/100  Iteration 3629/56600 Training loss: 1.3434 0.4688 sec/batch
Epoch 7/100  Iteration 3630/56600 Training loss: 1.3434 0.4835 sec/batch
Epoch 7/100  Iteration 3631/56600 Training loss: 1.3434 0.4807 sec/batch
Epoch 7/100  Iteration 3632/56600 Training loss: 1.3435 0.4892 sec/batch
Epoch 7/100  Iteration 3633/56600 Training loss: 1.3436 0.4628 sec/batch
Epoch 7/100  Iteration 3634/56600 Training loss: 1.3438 0.4786 sec/batch
Epoch 7/100  Iteration 3635/56600 Training loss: 1.3439 0.4924 sec/batch
Epoch 7/100  Iteration 3636/56600 Training loss: 1.3442 0.4796 sec/batch
Epoch 7/100  Iteration 3637/56600 Training loss: 1.3445 0.4772 sec/batch
Epoch 7/100  Iteration 3638/56600 Training loss: 1.3447 0.4781 sec/batch
Epoch 7/100  Iteration 3639/56600 Training loss: 1.3450 0.4844 sec/batch
Epoch 7/100  Iteration 3640/56600 Training loss: 1.

Epoch 7/100  Iteration 3740/56600 Training loss: 1.3405 0.4767 sec/batch
Epoch 7/100  Iteration 3741/56600 Training loss: 1.3406 0.4853 sec/batch
Epoch 7/100  Iteration 3742/56600 Training loss: 1.3406 0.4935 sec/batch
Epoch 7/100  Iteration 3743/56600 Training loss: 1.3405 0.4864 sec/batch
Epoch 7/100  Iteration 3744/56600 Training loss: 1.3405 0.4925 sec/batch
Epoch 7/100  Iteration 3745/56600 Training loss: 1.3405 0.4842 sec/batch
Epoch 7/100  Iteration 3746/56600 Training loss: 1.3405 0.4764 sec/batch
Epoch 7/100  Iteration 3747/56600 Training loss: 1.3404 0.4709 sec/batch
Epoch 7/100  Iteration 3748/56600 Training loss: 1.3403 0.4812 sec/batch
Epoch 7/100  Iteration 3749/56600 Training loss: 1.3402 0.4833 sec/batch
Epoch 7/100  Iteration 3750/56600 Training loss: 1.3402 0.4915 sec/batch
Epoch 7/100  Iteration 3751/56600 Training loss: 1.3402 0.4851 sec/batch
Epoch 7/100  Iteration 3752/56600 Training loss: 1.3402 0.4977 sec/batch
Epoch 7/100  Iteration 3753/56600 Training loss: 1.

Epoch 7/100  Iteration 3853/56600 Training loss: 1.3340 0.4753 sec/batch
Epoch 7/100  Iteration 3854/56600 Training loss: 1.3340 0.4877 sec/batch
Epoch 7/100  Iteration 3855/56600 Training loss: 1.3339 0.4711 sec/batch
Epoch 7/100  Iteration 3856/56600 Training loss: 1.3339 0.4722 sec/batch
Epoch 7/100  Iteration 3857/56600 Training loss: 1.3339 0.4892 sec/batch
Epoch 7/100  Iteration 3858/56600 Training loss: 1.3339 0.4764 sec/batch
Epoch 7/100  Iteration 3859/56600 Training loss: 1.3338 0.4779 sec/batch
Epoch 7/100  Iteration 3860/56600 Training loss: 1.3337 0.4737 sec/batch
Epoch 7/100  Iteration 3861/56600 Training loss: 1.3337 0.4845 sec/batch
Epoch 7/100  Iteration 3862/56600 Training loss: 1.3336 0.4730 sec/batch
Epoch 7/100  Iteration 3863/56600 Training loss: 1.3337 0.4893 sec/batch
Epoch 7/100  Iteration 3864/56600 Training loss: 1.3336 0.4629 sec/batch
Epoch 7/100  Iteration 3865/56600 Training loss: 1.3336 0.4834 sec/batch
Epoch 7/100  Iteration 3866/56600 Training loss: 1.

Epoch 8/100  Iteration 3966/56600 Training loss: 1.3427 0.4918 sec/batch
Epoch 8/100  Iteration 3967/56600 Training loss: 1.3320 0.4883 sec/batch
Epoch 8/100  Iteration 3968/56600 Training loss: 1.3325 0.4779 sec/batch
Epoch 8/100  Iteration 3969/56600 Training loss: 1.3303 0.4882 sec/batch
Epoch 8/100  Iteration 3970/56600 Training loss: 1.3283 0.4830 sec/batch
Epoch 8/100  Iteration 3971/56600 Training loss: 1.3278 0.4903 sec/batch
Epoch 8/100  Iteration 3972/56600 Training loss: 1.3247 0.4805 sec/batch
Epoch 8/100  Iteration 3973/56600 Training loss: 1.3256 0.4830 sec/batch
Epoch 8/100  Iteration 3974/56600 Training loss: 1.3267 0.4794 sec/batch
Epoch 8/100  Iteration 3975/56600 Training loss: 1.3275 0.4770 sec/batch
Epoch 8/100  Iteration 3976/56600 Training loss: 1.3256 0.4835 sec/batch
Epoch 8/100  Iteration 3977/56600 Training loss: 1.3249 0.4950 sec/batch
Epoch 8/100  Iteration 3978/56600 Training loss: 1.3238 0.4797 sec/batch
Epoch 8/100  Iteration 3979/56600 Training loss: 1.

Epoch 8/100  Iteration 4078/56600 Training loss: 1.3153 0.4700 sec/batch
Epoch 8/100  Iteration 4079/56600 Training loss: 1.3157 0.4893 sec/batch
Epoch 8/100  Iteration 4080/56600 Training loss: 1.3158 0.4848 sec/batch
Epoch 8/100  Iteration 4081/56600 Training loss: 1.3158 0.4796 sec/batch
Epoch 8/100  Iteration 4082/56600 Training loss: 1.3163 0.4643 sec/batch
Epoch 8/100  Iteration 4083/56600 Training loss: 1.3166 0.4820 sec/batch
Epoch 8/100  Iteration 4084/56600 Training loss: 1.3167 0.4786 sec/batch
Epoch 8/100  Iteration 4085/56600 Training loss: 1.3164 0.4838 sec/batch
Epoch 8/100  Iteration 4086/56600 Training loss: 1.3161 0.4746 sec/batch
Epoch 8/100  Iteration 4087/56600 Training loss: 1.3160 0.4850 sec/batch
Epoch 8/100  Iteration 4088/56600 Training loss: 1.3160 0.4829 sec/batch
Epoch 8/100  Iteration 4089/56600 Training loss: 1.3159 0.4887 sec/batch
Epoch 8/100  Iteration 4090/56600 Training loss: 1.3160 0.4649 sec/batch
Epoch 8/100  Iteration 4091/56600 Training loss: 1.

Epoch 8/100  Iteration 4191/56600 Training loss: 1.3145 0.4748 sec/batch
Epoch 8/100  Iteration 4192/56600 Training loss: 1.3146 0.4811 sec/batch
Epoch 8/100  Iteration 4193/56600 Training loss: 1.3146 0.4869 sec/batch
Epoch 8/100  Iteration 4194/56600 Training loss: 1.3147 0.4744 sec/batch
Epoch 8/100  Iteration 4195/56600 Training loss: 1.3148 0.4744 sec/batch
Epoch 8/100  Iteration 4196/56600 Training loss: 1.3148 0.4755 sec/batch
Epoch 8/100  Iteration 4197/56600 Training loss: 1.3149 0.4720 sec/batch
Epoch 8/100  Iteration 4198/56600 Training loss: 1.3150 0.4880 sec/batch
Epoch 8/100  Iteration 4199/56600 Training loss: 1.3150 0.4750 sec/batch
Epoch 8/100  Iteration 4200/56600 Training loss: 1.3152 0.4839 sec/batch
Epoch 8/100  Iteration 4201/56600 Training loss: 1.3154 0.4958 sec/batch
Epoch 8/100  Iteration 4202/56600 Training loss: 1.3156 0.4773 sec/batch
Epoch 8/100  Iteration 4203/56600 Training loss: 1.3158 0.4892 sec/batch
Epoch 8/100  Iteration 4204/56600 Training loss: 1.

Epoch 8/100  Iteration 4304/56600 Training loss: 1.3121 0.4943 sec/batch
Epoch 8/100  Iteration 4305/56600 Training loss: 1.3120 0.4772 sec/batch
Epoch 8/100  Iteration 4306/56600 Training loss: 1.3120 0.4741 sec/batch
Epoch 8/100  Iteration 4307/56600 Training loss: 1.3121 0.4777 sec/batch
Epoch 8/100  Iteration 4308/56600 Training loss: 1.3121 0.4667 sec/batch
Epoch 8/100  Iteration 4309/56600 Training loss: 1.3121 0.5022 sec/batch
Epoch 8/100  Iteration 4310/56600 Training loss: 1.3120 0.4619 sec/batch
Epoch 8/100  Iteration 4311/56600 Training loss: 1.3121 0.4638 sec/batch
Epoch 8/100  Iteration 4312/56600 Training loss: 1.3120 0.4769 sec/batch
Epoch 8/100  Iteration 4313/56600 Training loss: 1.3119 0.4735 sec/batch
Epoch 8/100  Iteration 4314/56600 Training loss: 1.3118 0.4827 sec/batch
Epoch 8/100  Iteration 4315/56600 Training loss: 1.3117 0.4748 sec/batch
Epoch 8/100  Iteration 4316/56600 Training loss: 1.3117 0.4845 sec/batch
Epoch 8/100  Iteration 4317/56600 Training loss: 1.

Epoch 8/100  Iteration 4417/56600 Training loss: 1.3061 0.4762 sec/batch
Epoch 8/100  Iteration 4418/56600 Training loss: 1.3061 0.4833 sec/batch
Epoch 8/100  Iteration 4419/56600 Training loss: 1.3060 0.4884 sec/batch
Epoch 8/100  Iteration 4420/56600 Training loss: 1.3059 0.4760 sec/batch
Epoch 8/100  Iteration 4421/56600 Training loss: 1.3059 0.4866 sec/batch
Epoch 8/100  Iteration 4422/56600 Training loss: 1.3059 0.4704 sec/batch
Epoch 8/100  Iteration 4423/56600 Training loss: 1.3058 0.4844 sec/batch
Epoch 8/100  Iteration 4424/56600 Training loss: 1.3058 0.4905 sec/batch
Epoch 8/100  Iteration 4425/56600 Training loss: 1.3058 0.4654 sec/batch
Epoch 8/100  Iteration 4426/56600 Training loss: 1.3057 0.4794 sec/batch
Epoch 8/100  Iteration 4427/56600 Training loss: 1.3056 0.4672 sec/batch
Epoch 8/100  Iteration 4428/56600 Training loss: 1.3056 0.4841 sec/batch
Epoch 8/100  Iteration 4429/56600 Training loss: 1.3056 0.4725 sec/batch
Epoch 8/100  Iteration 4430/56600 Training loss: 1.

Epoch 9/100  Iteration 4530/56600 Training loss: 1.3361 0.4719 sec/batch
Epoch 9/100  Iteration 4531/56600 Training loss: 1.3289 0.4722 sec/batch
Epoch 9/100  Iteration 4532/56600 Training loss: 1.3207 0.4865 sec/batch
Epoch 9/100  Iteration 4533/56600 Training loss: 1.3090 0.4860 sec/batch
Epoch 9/100  Iteration 4534/56600 Training loss: 1.3097 0.4844 sec/batch
Epoch 9/100  Iteration 4535/56600 Training loss: 1.3067 0.4721 sec/batch
Epoch 9/100  Iteration 4536/56600 Training loss: 1.3054 0.4804 sec/batch
Epoch 9/100  Iteration 4537/56600 Training loss: 1.3052 0.4847 sec/batch
Epoch 9/100  Iteration 4538/56600 Training loss: 1.3026 0.4776 sec/batch
Epoch 9/100  Iteration 4539/56600 Training loss: 1.3023 0.4774 sec/batch
Epoch 9/100  Iteration 4540/56600 Training loss: 1.3034 0.4790 sec/batch
Epoch 9/100  Iteration 4541/56600 Training loss: 1.3041 0.4702 sec/batch
Epoch 9/100  Iteration 4542/56600 Training loss: 1.3014 0.4954 sec/batch
Epoch 9/100  Iteration 4543/56600 Training loss: 1.

Epoch 9/100  Iteration 4643/56600 Training loss: 1.2884 0.4749 sec/batch
Epoch 9/100  Iteration 4644/56600 Training loss: 1.2887 0.4809 sec/batch
Epoch 9/100  Iteration 4645/56600 Training loss: 1.2891 0.4834 sec/batch
Epoch 9/100  Iteration 4646/56600 Training loss: 1.2892 0.4764 sec/batch
Epoch 9/100  Iteration 4647/56600 Training loss: 1.2892 0.4872 sec/batch
Epoch 9/100  Iteration 4648/56600 Training loss: 1.2897 0.4824 sec/batch
Epoch 9/100  Iteration 4649/56600 Training loss: 1.2900 0.4726 sec/batch
Epoch 9/100  Iteration 4650/56600 Training loss: 1.2902 0.4747 sec/batch
Epoch 9/100  Iteration 4651/56600 Training loss: 1.2899 0.4757 sec/batch
Epoch 9/100  Iteration 4652/56600 Training loss: 1.2895 0.4780 sec/batch
Epoch 9/100  Iteration 4653/56600 Training loss: 1.2895 0.4796 sec/batch
Epoch 9/100  Iteration 4654/56600 Training loss: 1.2895 0.4753 sec/batch
Epoch 9/100  Iteration 4655/56600 Training loss: 1.2895 0.4782 sec/batch
Epoch 9/100  Iteration 4656/56600 Training loss: 1.

Epoch 9/100  Iteration 4756/56600 Training loss: 1.2888 0.4749 sec/batch
Epoch 9/100  Iteration 4757/56600 Training loss: 1.2889 0.4782 sec/batch
Epoch 9/100  Iteration 4758/56600 Training loss: 1.2890 0.4790 sec/batch
Epoch 9/100  Iteration 4759/56600 Training loss: 1.2890 0.4752 sec/batch
Epoch 9/100  Iteration 4760/56600 Training loss: 1.2891 0.4683 sec/batch
Epoch 9/100  Iteration 4761/56600 Training loss: 1.2892 0.4884 sec/batch
Epoch 9/100  Iteration 4762/56600 Training loss: 1.2892 0.4894 sec/batch
Epoch 9/100  Iteration 4763/56600 Training loss: 1.2893 0.4703 sec/batch
Epoch 9/100  Iteration 4764/56600 Training loss: 1.2893 0.4895 sec/batch
Epoch 9/100  Iteration 4765/56600 Training loss: 1.2894 0.4789 sec/batch
Epoch 9/100  Iteration 4766/56600 Training loss: 1.2896 0.4750 sec/batch
Epoch 9/100  Iteration 4767/56600 Training loss: 1.2898 0.4753 sec/batch
Epoch 9/100  Iteration 4768/56600 Training loss: 1.2901 0.4843 sec/batch
Epoch 9/100  Iteration 4769/56600 Training loss: 1.

Epoch 9/100  Iteration 4869/56600 Training loss: 1.2872 0.4671 sec/batch
Epoch 9/100  Iteration 4870/56600 Training loss: 1.2872 0.4767 sec/batch
Epoch 9/100  Iteration 4871/56600 Training loss: 1.2872 0.4680 sec/batch
Epoch 9/100  Iteration 4872/56600 Training loss: 1.2872 0.4785 sec/batch
Epoch 9/100  Iteration 4873/56600 Training loss: 1.2873 0.4715 sec/batch
Epoch 9/100  Iteration 4874/56600 Training loss: 1.2873 0.4725 sec/batch
Epoch 9/100  Iteration 4875/56600 Training loss: 1.2873 0.4746 sec/batch
Epoch 9/100  Iteration 4876/56600 Training loss: 1.2872 0.4783 sec/batch
Epoch 9/100  Iteration 4877/56600 Training loss: 1.2872 0.4687 sec/batch
Epoch 9/100  Iteration 4878/56600 Training loss: 1.2872 0.4787 sec/batch
Epoch 9/100  Iteration 4879/56600 Training loss: 1.2871 0.4641 sec/batch
Epoch 9/100  Iteration 4880/56600 Training loss: 1.2869 0.4763 sec/batch
Epoch 9/100  Iteration 4881/56600 Training loss: 1.2869 0.4793 sec/batch
Epoch 9/100  Iteration 4882/56600 Training loss: 1.

Epoch 9/100  Iteration 4982/56600 Training loss: 1.2820 0.4752 sec/batch
Epoch 9/100  Iteration 4983/56600 Training loss: 1.2820 0.4726 sec/batch
Epoch 9/100  Iteration 4984/56600 Training loss: 1.2819 0.4860 sec/batch
Epoch 9/100  Iteration 4985/56600 Training loss: 1.2818 0.4683 sec/batch
Epoch 9/100  Iteration 4986/56600 Training loss: 1.2818 0.4902 sec/batch
Epoch 9/100  Iteration 4987/56600 Training loss: 1.2817 0.4757 sec/batch
Epoch 9/100  Iteration 4988/56600 Training loss: 1.2817 0.4789 sec/batch
Epoch 9/100  Iteration 4989/56600 Training loss: 1.2817 0.4768 sec/batch
Epoch 9/100  Iteration 4990/56600 Training loss: 1.2817 0.4736 sec/batch
Epoch 9/100  Iteration 4991/56600 Training loss: 1.2816 0.4658 sec/batch
Epoch 9/100  Iteration 4992/56600 Training loss: 1.2816 0.4841 sec/batch
Epoch 9/100  Iteration 4993/56600 Training loss: 1.2815 0.4880 sec/batch
Epoch 9/100  Iteration 4994/56600 Training loss: 1.2815 0.4752 sec/batch
Epoch 9/100  Iteration 4995/56600 Training loss: 1.

Epoch 10/100  Iteration 5095/56600 Training loss: 1.3591 0.4697 sec/batch
Epoch 10/100  Iteration 5096/56600 Training loss: 1.3182 0.4904 sec/batch
Epoch 10/100  Iteration 5097/56600 Training loss: 1.3110 0.4752 sec/batch
Epoch 10/100  Iteration 5098/56600 Training loss: 1.3027 0.4784 sec/batch
Epoch 10/100  Iteration 5099/56600 Training loss: 1.2906 0.4718 sec/batch
Epoch 10/100  Iteration 5100/56600 Training loss: 1.2901 0.4842 sec/batch
Epoch 10/100  Iteration 5101/56600 Training loss: 1.2874 0.4891 sec/batch
Epoch 10/100  Iteration 5102/56600 Training loss: 1.2860 0.4763 sec/batch
Epoch 10/100  Iteration 5103/56600 Training loss: 1.2853 0.4794 sec/batch
Epoch 10/100  Iteration 5104/56600 Training loss: 1.2826 0.4794 sec/batch
Epoch 10/100  Iteration 5105/56600 Training loss: 1.2831 0.4779 sec/batch
Epoch 10/100  Iteration 5106/56600 Training loss: 1.2833 0.4748 sec/batch
Epoch 10/100  Iteration 5107/56600 Training loss: 1.2846 0.4746 sec/batch
Epoch 10/100  Iteration 5108/56600 Tra

Epoch 10/100  Iteration 5206/56600 Training loss: 1.2660 0.4780 sec/batch
Epoch 10/100  Iteration 5207/56600 Training loss: 1.2662 0.4747 sec/batch
Epoch 10/100  Iteration 5208/56600 Training loss: 1.2662 0.4797 sec/batch
Epoch 10/100  Iteration 5209/56600 Training loss: 1.2662 0.4827 sec/batch
Epoch 10/100  Iteration 5210/56600 Training loss: 1.2665 0.4710 sec/batch
Epoch 10/100  Iteration 5211/56600 Training loss: 1.2669 0.4798 sec/batch
Epoch 10/100  Iteration 5212/56600 Training loss: 1.2670 0.4727 sec/batch
Epoch 10/100  Iteration 5213/56600 Training loss: 1.2671 0.4866 sec/batch
Epoch 10/100  Iteration 5214/56600 Training loss: 1.2676 0.4805 sec/batch
Epoch 10/100  Iteration 5215/56600 Training loss: 1.2679 0.4631 sec/batch
Epoch 10/100  Iteration 5216/56600 Training loss: 1.2682 0.4808 sec/batch
Epoch 10/100  Iteration 5217/56600 Training loss: 1.2679 0.4703 sec/batch
Epoch 10/100  Iteration 5218/56600 Training loss: 1.2676 0.4797 sec/batch
Epoch 10/100  Iteration 5219/56600 Tra

Epoch 10/100  Iteration 5317/56600 Training loss: 1.2675 0.4727 sec/batch
Epoch 10/100  Iteration 5318/56600 Training loss: 1.2676 0.4844 sec/batch
Epoch 10/100  Iteration 5319/56600 Training loss: 1.2675 0.4787 sec/batch
Epoch 10/100  Iteration 5320/56600 Training loss: 1.2675 0.4734 sec/batch
Epoch 10/100  Iteration 5321/56600 Training loss: 1.2675 0.4758 sec/batch
Epoch 10/100  Iteration 5322/56600 Training loss: 1.2674 0.4774 sec/batch
Epoch 10/100  Iteration 5323/56600 Training loss: 1.2675 0.4766 sec/batch
Epoch 10/100  Iteration 5324/56600 Training loss: 1.2676 0.4726 sec/batch
Epoch 10/100  Iteration 5325/56600 Training loss: 1.2676 0.4831 sec/batch
Epoch 10/100  Iteration 5326/56600 Training loss: 1.2677 0.4676 sec/batch
Epoch 10/100  Iteration 5327/56600 Training loss: 1.2678 0.4909 sec/batch
Epoch 10/100  Iteration 5328/56600 Training loss: 1.2678 0.4871 sec/batch
Epoch 10/100  Iteration 5329/56600 Training loss: 1.2679 0.4766 sec/batch
Epoch 10/100  Iteration 5330/56600 Tra

Epoch 10/100  Iteration 5428/56600 Training loss: 1.2665 0.4741 sec/batch
Epoch 10/100  Iteration 5429/56600 Training loss: 1.2664 0.4783 sec/batch
Epoch 10/100  Iteration 5430/56600 Training loss: 1.2664 0.4767 sec/batch
Epoch 10/100  Iteration 5431/56600 Training loss: 1.2664 0.4853 sec/batch
Epoch 10/100  Iteration 5432/56600 Training loss: 1.2664 0.4843 sec/batch
Epoch 10/100  Iteration 5433/56600 Training loss: 1.2664 0.4734 sec/batch
Epoch 10/100  Iteration 5434/56600 Training loss: 1.2663 0.4938 sec/batch
Epoch 10/100  Iteration 5435/56600 Training loss: 1.2662 0.4789 sec/batch
Epoch 10/100  Iteration 5436/56600 Training loss: 1.2662 0.4832 sec/batch
Epoch 10/100  Iteration 5437/56600 Training loss: 1.2661 0.4814 sec/batch
Epoch 10/100  Iteration 5438/56600 Training loss: 1.2662 0.4794 sec/batch
Epoch 10/100  Iteration 5439/56600 Training loss: 1.2663 0.4835 sec/batch
Epoch 10/100  Iteration 5440/56600 Training loss: 1.2663 0.4784 sec/batch
Epoch 10/100  Iteration 5441/56600 Tra

Epoch 10/100  Iteration 5539/56600 Training loss: 1.2618 0.4819 sec/batch
Epoch 10/100  Iteration 5540/56600 Training loss: 1.2617 0.4747 sec/batch
Epoch 10/100  Iteration 5541/56600 Training loss: 1.2617 0.4831 sec/batch
Epoch 10/100  Iteration 5542/56600 Training loss: 1.2616 0.4775 sec/batch
Epoch 10/100  Iteration 5543/56600 Training loss: 1.2616 0.4711 sec/batch
Epoch 10/100  Iteration 5544/56600 Training loss: 1.2615 0.4906 sec/batch
Epoch 10/100  Iteration 5545/56600 Training loss: 1.2615 0.4653 sec/batch
Epoch 10/100  Iteration 5546/56600 Training loss: 1.2615 0.4690 sec/batch
Epoch 10/100  Iteration 5547/56600 Training loss: 1.2615 0.4922 sec/batch
Epoch 10/100  Iteration 5548/56600 Training loss: 1.2614 0.4658 sec/batch
Epoch 10/100  Iteration 5549/56600 Training loss: 1.2614 0.4746 sec/batch
Epoch 10/100  Iteration 5550/56600 Training loss: 1.2613 0.4730 sec/batch
Epoch 10/100  Iteration 5551/56600 Training loss: 1.2612 0.4843 sec/batch
Epoch 10/100  Iteration 5552/56600 Tra

Epoch 10/100  Iteration 5650/56600 Training loss: 1.2583 0.4845 sec/batch
Epoch 10/100  Iteration 5651/56600 Training loss: 1.2583 0.4873 sec/batch
Epoch 10/100  Iteration 5652/56600 Training loss: 1.2583 0.4751 sec/batch
Epoch 10/100  Iteration 5653/56600 Training loss: 1.2584 0.4698 sec/batch
Epoch 10/100  Iteration 5654/56600 Training loss: 1.2584 0.4910 sec/batch
Epoch 10/100  Iteration 5655/56600 Training loss: 1.2584 0.4732 sec/batch
Epoch 10/100  Iteration 5656/56600 Training loss: 1.2584 0.4887 sec/batch
Epoch 10/100  Iteration 5657/56600 Training loss: 1.2584 0.4827 sec/batch
Epoch 10/100  Iteration 5658/56600 Training loss: 1.2584 0.4695 sec/batch
Epoch 10/100  Iteration 5659/56600 Training loss: 1.2585 0.4910 sec/batch
Epoch 10/100  Iteration 5660/56600 Training loss: 1.2585 0.4666 sec/batch
Epoch 11/100  Iteration 5661/56600 Training loss: 1.3418 0.4791 sec/batch
Epoch 11/100  Iteration 5662/56600 Training loss: 1.2983 0.4643 sec/batch
Epoch 11/100  Iteration 5663/56600 Tra

Epoch 11/100  Iteration 5761/56600 Training loss: 1.2470 0.4692 sec/batch
Epoch 11/100  Iteration 5762/56600 Training loss: 1.2476 0.4880 sec/batch
Epoch 11/100  Iteration 5763/56600 Training loss: 1.2476 0.4678 sec/batch
Epoch 11/100  Iteration 5764/56600 Training loss: 1.2479 0.4785 sec/batch
Epoch 11/100  Iteration 5765/56600 Training loss: 1.2481 0.4811 sec/batch
Epoch 11/100  Iteration 5766/56600 Training loss: 1.2480 0.4688 sec/batch
Epoch 11/100  Iteration 5767/56600 Training loss: 1.2477 0.4723 sec/batch
Epoch 11/100  Iteration 5768/56600 Training loss: 1.2477 0.4748 sec/batch
Epoch 11/100  Iteration 5769/56600 Training loss: 1.2477 0.4820 sec/batch
Epoch 11/100  Iteration 5770/56600 Training loss: 1.2478 0.4829 sec/batch
Epoch 11/100  Iteration 5771/56600 Training loss: 1.2479 0.4775 sec/batch
Epoch 11/100  Iteration 5772/56600 Training loss: 1.2479 0.4877 sec/batch
Epoch 11/100  Iteration 5773/56600 Training loss: 1.2481 0.4900 sec/batch
Epoch 11/100  Iteration 5774/56600 Tra

Epoch 11/100  Iteration 5872/56600 Training loss: 1.2495 0.4862 sec/batch
Epoch 11/100  Iteration 5873/56600 Training loss: 1.2494 0.4809 sec/batch
Epoch 11/100  Iteration 5874/56600 Training loss: 1.2493 0.4829 sec/batch
Epoch 11/100  Iteration 5875/56600 Training loss: 1.2493 0.4871 sec/batch
Epoch 11/100  Iteration 5876/56600 Training loss: 1.2493 0.4891 sec/batch
Epoch 11/100  Iteration 5877/56600 Training loss: 1.2493 0.5062 sec/batch
Epoch 11/100  Iteration 5878/56600 Training loss: 1.2493 0.4849 sec/batch
Epoch 11/100  Iteration 5879/56600 Training loss: 1.2495 0.4790 sec/batch
Epoch 11/100  Iteration 5880/56600 Training loss: 1.2495 0.4846 sec/batch
Epoch 11/100  Iteration 5881/56600 Training loss: 1.2496 0.4864 sec/batch
Epoch 11/100  Iteration 5882/56600 Training loss: 1.2496 0.4863 sec/batch
Epoch 11/100  Iteration 5883/56600 Training loss: 1.2495 0.4984 sec/batch
Epoch 11/100  Iteration 5884/56600 Training loss: 1.2496 0.4846 sec/batch
Epoch 11/100  Iteration 5885/56600 Tra

Epoch 11/100  Iteration 5983/56600 Training loss: 1.2487 0.4779 sec/batch
Epoch 11/100  Iteration 5984/56600 Training loss: 1.2487 0.4901 sec/batch
Epoch 11/100  Iteration 5985/56600 Training loss: 1.2486 0.4811 sec/batch
Epoch 11/100  Iteration 5986/56600 Training loss: 1.2486 0.4751 sec/batch
Epoch 11/100  Iteration 5987/56600 Training loss: 1.2484 0.4685 sec/batch
Epoch 11/100  Iteration 5988/56600 Training loss: 1.2483 0.4878 sec/batch
Epoch 11/100  Iteration 5989/56600 Training loss: 1.2483 0.4759 sec/batch
Epoch 11/100  Iteration 5990/56600 Training loss: 1.2483 0.4888 sec/batch
Epoch 11/100  Iteration 5991/56600 Training loss: 1.2483 0.4748 sec/batch
Epoch 11/100  Iteration 5992/56600 Training loss: 1.2484 0.4844 sec/batch
Epoch 11/100  Iteration 5993/56600 Training loss: 1.2484 0.4727 sec/batch
Epoch 11/100  Iteration 5994/56600 Training loss: 1.2484 0.4880 sec/batch
Epoch 11/100  Iteration 5995/56600 Training loss: 1.2483 0.4909 sec/batch
Epoch 11/100  Iteration 5996/56600 Tra

Epoch 11/100  Iteration 6094/56600 Training loss: 1.2450 0.4793 sec/batch
Epoch 11/100  Iteration 6095/56600 Training loss: 1.2450 0.4758 sec/batch
Epoch 11/100  Iteration 6096/56600 Training loss: 1.2449 0.4742 sec/batch
Epoch 11/100  Iteration 6097/56600 Training loss: 1.2448 0.4842 sec/batch
Epoch 11/100  Iteration 6098/56600 Training loss: 1.2447 0.4878 sec/batch
Epoch 11/100  Iteration 6099/56600 Training loss: 1.2446 0.4743 sec/batch
Epoch 11/100  Iteration 6100/56600 Training loss: 1.2445 0.4862 sec/batch
Epoch 11/100  Iteration 6101/56600 Training loss: 1.2444 0.4758 sec/batch
Epoch 11/100  Iteration 6102/56600 Training loss: 1.2443 0.4770 sec/batch
Epoch 11/100  Iteration 6103/56600 Training loss: 1.2443 0.4753 sec/batch
Epoch 11/100  Iteration 6104/56600 Training loss: 1.2443 0.4767 sec/batch
Epoch 11/100  Iteration 6105/56600 Training loss: 1.2441 0.4692 sec/batch
Epoch 11/100  Iteration 6106/56600 Training loss: 1.2440 0.4877 sec/batch
Epoch 11/100  Iteration 6107/56600 Tra

Epoch 11/100  Iteration 6205/56600 Training loss: 1.2409 0.4845 sec/batch
Epoch 11/100  Iteration 6206/56600 Training loss: 1.2409 0.4732 sec/batch
Epoch 11/100  Iteration 6207/56600 Training loss: 1.2409 0.4741 sec/batch
Epoch 11/100  Iteration 6208/56600 Training loss: 1.2409 0.4829 sec/batch
Epoch 11/100  Iteration 6209/56600 Training loss: 1.2409 0.4753 sec/batch
Epoch 11/100  Iteration 6210/56600 Training loss: 1.2409 0.4718 sec/batch
Epoch 11/100  Iteration 6211/56600 Training loss: 1.2409 0.4879 sec/batch
Epoch 11/100  Iteration 6212/56600 Training loss: 1.2409 0.4792 sec/batch
Epoch 11/100  Iteration 6213/56600 Training loss: 1.2409 0.4748 sec/batch
Epoch 11/100  Iteration 6214/56600 Training loss: 1.2409 0.4702 sec/batch
Epoch 11/100  Iteration 6215/56600 Training loss: 1.2408 0.4911 sec/batch
Epoch 11/100  Iteration 6216/56600 Training loss: 1.2409 0.4820 sec/batch
Epoch 11/100  Iteration 6217/56600 Training loss: 1.2409 0.4770 sec/batch
Epoch 11/100  Iteration 6218/56600 Tra

Epoch 12/100  Iteration 6316/56600 Training loss: 1.2297 0.4838 sec/batch
Epoch 12/100  Iteration 6317/56600 Training loss: 1.2299 0.4811 sec/batch
Epoch 12/100  Iteration 6318/56600 Training loss: 1.2299 0.4598 sec/batch
Epoch 12/100  Iteration 6319/56600 Training loss: 1.2296 0.4897 sec/batch
Epoch 12/100  Iteration 6320/56600 Training loss: 1.2297 0.4779 sec/batch
Epoch 12/100  Iteration 6321/56600 Training loss: 1.2296 0.4760 sec/batch
Epoch 12/100  Iteration 6322/56600 Training loss: 1.2296 0.4801 sec/batch
Epoch 12/100  Iteration 6323/56600 Training loss: 1.2293 0.4746 sec/batch
Epoch 12/100  Iteration 6324/56600 Training loss: 1.2292 0.4691 sec/batch
Epoch 12/100  Iteration 6325/56600 Training loss: 1.2295 0.4781 sec/batch
Epoch 12/100  Iteration 6326/56600 Training loss: 1.2301 0.4761 sec/batch
Epoch 12/100  Iteration 6327/56600 Training loss: 1.2303 0.4787 sec/batch
Epoch 12/100  Iteration 6328/56600 Training loss: 1.2308 0.4797 sec/batch
Epoch 12/100  Iteration 6329/56600 Tra

Epoch 12/100  Iteration 6427/56600 Training loss: 1.2332 0.4792 sec/batch
Epoch 12/100  Iteration 6428/56600 Training loss: 1.2334 0.4839 sec/batch
Epoch 12/100  Iteration 6429/56600 Training loss: 1.2334 0.4886 sec/batch
Epoch 12/100  Iteration 6430/56600 Training loss: 1.2334 0.4749 sec/batch
Epoch 12/100  Iteration 6431/56600 Training loss: 1.2334 0.4882 sec/batch
Epoch 12/100  Iteration 6432/56600 Training loss: 1.2335 0.4819 sec/batch
Epoch 12/100  Iteration 6433/56600 Training loss: 1.2335 0.4722 sec/batch
Epoch 12/100  Iteration 6434/56600 Training loss: 1.2333 0.4726 sec/batch
Epoch 12/100  Iteration 6435/56600 Training loss: 1.2333 0.4743 sec/batch
Epoch 12/100  Iteration 6436/56600 Training loss: 1.2333 0.4948 sec/batch
Epoch 12/100  Iteration 6437/56600 Training loss: 1.2333 0.4790 sec/batch
Epoch 12/100  Iteration 6438/56600 Training loss: 1.2333 0.4805 sec/batch
Epoch 12/100  Iteration 6439/56600 Training loss: 1.2331 0.4877 sec/batch
Epoch 12/100  Iteration 6440/56600 Tra

Epoch 12/100  Iteration 6538/56600 Training loss: 1.2335 0.4931 sec/batch
Epoch 12/100  Iteration 6539/56600 Training loss: 1.2333 0.4841 sec/batch
Epoch 12/100  Iteration 6540/56600 Training loss: 1.2332 0.4767 sec/batch
Epoch 12/100  Iteration 6541/56600 Training loss: 1.2332 0.4797 sec/batch
Epoch 12/100  Iteration 6542/56600 Training loss: 1.2332 0.4727 sec/batch
Epoch 12/100  Iteration 6543/56600 Training loss: 1.2333 0.4842 sec/batch
Epoch 12/100  Iteration 6544/56600 Training loss: 1.2333 0.4884 sec/batch
Epoch 12/100  Iteration 6545/56600 Training loss: 1.2332 0.4746 sec/batch
Epoch 12/100  Iteration 6546/56600 Training loss: 1.2331 0.4823 sec/batch
Epoch 12/100  Iteration 6547/56600 Training loss: 1.2331 0.4774 sec/batch
Epoch 12/100  Iteration 6548/56600 Training loss: 1.2330 0.4688 sec/batch
Epoch 12/100  Iteration 6549/56600 Training loss: 1.2328 0.4717 sec/batch
Epoch 12/100  Iteration 6550/56600 Training loss: 1.2328 0.4901 sec/batch
Epoch 12/100  Iteration 6551/56600 Tra

Epoch 12/100  Iteration 6649/56600 Training loss: 1.2302 0.4724 sec/batch
Epoch 12/100  Iteration 6650/56600 Training loss: 1.2302 0.4723 sec/batch
Epoch 12/100  Iteration 6651/56600 Training loss: 1.2301 0.4946 sec/batch
Epoch 12/100  Iteration 6652/56600 Training loss: 1.2300 0.4692 sec/batch
Epoch 12/100  Iteration 6653/56600 Training loss: 1.2299 0.4688 sec/batch
Epoch 12/100  Iteration 6654/56600 Training loss: 1.2298 0.4727 sec/batch
Epoch 12/100  Iteration 6655/56600 Training loss: 1.2297 0.4749 sec/batch
Epoch 12/100  Iteration 6656/56600 Training loss: 1.2296 0.4914 sec/batch
Epoch 12/100  Iteration 6657/56600 Training loss: 1.2295 0.4793 sec/batch
Epoch 12/100  Iteration 6658/56600 Training loss: 1.2293 0.4721 sec/batch
Epoch 12/100  Iteration 6659/56600 Training loss: 1.2293 0.4877 sec/batch
Epoch 12/100  Iteration 6660/56600 Training loss: 1.2292 0.4911 sec/batch
Epoch 12/100  Iteration 6661/56600 Training loss: 1.2291 0.4931 sec/batch
Epoch 12/100  Iteration 6662/56600 Tra

Epoch 12/100  Iteration 6760/56600 Training loss: 1.2257 0.4677 sec/batch
Epoch 12/100  Iteration 6761/56600 Training loss: 1.2256 0.4719 sec/batch
Epoch 12/100  Iteration 6762/56600 Training loss: 1.2256 0.4760 sec/batch
Epoch 12/100  Iteration 6763/56600 Training loss: 1.2256 0.4769 sec/batch
Epoch 12/100  Iteration 6764/56600 Training loss: 1.2256 0.4775 sec/batch
Epoch 12/100  Iteration 6765/56600 Training loss: 1.2256 0.4742 sec/batch
Epoch 12/100  Iteration 6766/56600 Training loss: 1.2256 0.4867 sec/batch
Epoch 12/100  Iteration 6767/56600 Training loss: 1.2255 0.4750 sec/batch
Epoch 12/100  Iteration 6768/56600 Training loss: 1.2255 0.4698 sec/batch
Epoch 12/100  Iteration 6769/56600 Training loss: 1.2255 0.4919 sec/batch
Epoch 12/100  Iteration 6770/56600 Training loss: 1.2255 0.4794 sec/batch
Epoch 12/100  Iteration 6771/56600 Training loss: 1.2254 0.4771 sec/batch
Epoch 12/100  Iteration 6772/56600 Training loss: 1.2254 0.4660 sec/batch
Epoch 12/100  Iteration 6773/56600 Tra

Epoch 13/100  Iteration 6871/56600 Training loss: 1.2147 0.4876 sec/batch
Epoch 13/100  Iteration 6872/56600 Training loss: 1.2146 0.4770 sec/batch
Epoch 13/100  Iteration 6873/56600 Training loss: 1.2146 0.4842 sec/batch
Epoch 13/100  Iteration 6874/56600 Training loss: 1.2146 0.4721 sec/batch
Epoch 13/100  Iteration 6875/56600 Training loss: 1.2150 0.4754 sec/batch
Epoch 13/100  Iteration 6876/56600 Training loss: 1.2152 0.4948 sec/batch
Epoch 13/100  Iteration 6877/56600 Training loss: 1.2151 0.4812 sec/batch
Epoch 13/100  Iteration 6878/56600 Training loss: 1.2152 0.4835 sec/batch
Epoch 13/100  Iteration 6879/56600 Training loss: 1.2150 0.4881 sec/batch
Epoch 13/100  Iteration 6880/56600 Training loss: 1.2146 0.4910 sec/batch
Epoch 13/100  Iteration 6881/56600 Training loss: 1.2147 0.4913 sec/batch
Epoch 13/100  Iteration 6882/56600 Training loss: 1.2149 0.4799 sec/batch
Epoch 13/100  Iteration 6883/56600 Training loss: 1.2151 0.4698 sec/batch
Epoch 13/100  Iteration 6884/56600 Tra

Epoch 13/100  Iteration 6982/56600 Training loss: 1.2188 0.4723 sec/batch
Epoch 13/100  Iteration 6983/56600 Training loss: 1.2186 0.4843 sec/batch
Epoch 13/100  Iteration 6984/56600 Training loss: 1.2186 0.4728 sec/batch
Epoch 13/100  Iteration 6985/56600 Training loss: 1.2186 0.4751 sec/batch
Epoch 13/100  Iteration 6986/56600 Training loss: 1.2187 0.4875 sec/batch
Epoch 13/100  Iteration 6987/56600 Training loss: 1.2187 0.4772 sec/batch
Epoch 13/100  Iteration 6988/56600 Training loss: 1.2188 0.4768 sec/batch
Epoch 13/100  Iteration 6989/56600 Training loss: 1.2188 0.4772 sec/batch
Epoch 13/100  Iteration 6990/56600 Training loss: 1.2189 0.4757 sec/batch
Epoch 13/100  Iteration 6991/56600 Training loss: 1.2189 0.4688 sec/batch
Epoch 13/100  Iteration 6992/56600 Training loss: 1.2189 0.4720 sec/batch
Epoch 13/100  Iteration 6993/56600 Training loss: 1.2189 0.4897 sec/batch
Epoch 13/100  Iteration 6994/56600 Training loss: 1.2191 0.4842 sec/batch
Epoch 13/100  Iteration 6995/56600 Tra

Epoch 13/100  Iteration 7093/56600 Training loss: 1.2202 0.4812 sec/batch
Epoch 13/100  Iteration 7094/56600 Training loss: 1.2202 0.4782 sec/batch
Epoch 13/100  Iteration 7095/56600 Training loss: 1.2202 0.4875 sec/batch
Epoch 13/100  Iteration 7096/56600 Training loss: 1.2202 0.4747 sec/batch
Epoch 13/100  Iteration 7097/56600 Training loss: 1.2201 0.4700 sec/batch
Epoch 13/100  Iteration 7098/56600 Training loss: 1.2200 0.4817 sec/batch
Epoch 13/100  Iteration 7099/56600 Training loss: 1.2198 0.4812 sec/batch
Epoch 13/100  Iteration 7100/56600 Training loss: 1.2196 0.4786 sec/batch
Epoch 13/100  Iteration 7101/56600 Training loss: 1.2195 0.4715 sec/batch
Epoch 13/100  Iteration 7102/56600 Training loss: 1.2195 0.4845 sec/batch
Epoch 13/100  Iteration 7103/56600 Training loss: 1.2195 0.4877 sec/batch
Epoch 13/100  Iteration 7104/56600 Training loss: 1.2194 0.4817 sec/batch
Epoch 13/100  Iteration 7105/56600 Training loss: 1.2193 0.4910 sec/batch
Epoch 13/100  Iteration 7106/56600 Tra

Epoch 13/100  Iteration 7204/56600 Training loss: 1.2163 0.4859 sec/batch
Epoch 13/100  Iteration 7205/56600 Training loss: 1.2163 0.4778 sec/batch
Epoch 13/100  Iteration 7206/56600 Training loss: 1.2162 0.4843 sec/batch
Epoch 13/100  Iteration 7207/56600 Training loss: 1.2162 0.4879 sec/batch
Epoch 13/100  Iteration 7208/56600 Training loss: 1.2161 0.4901 sec/batch
Epoch 13/100  Iteration 7209/56600 Training loss: 1.2161 0.4833 sec/batch
Epoch 13/100  Iteration 7210/56600 Training loss: 1.2160 0.4778 sec/batch
Epoch 13/100  Iteration 7211/56600 Training loss: 1.2160 0.4680 sec/batch
Epoch 13/100  Iteration 7212/56600 Training loss: 1.2160 0.4727 sec/batch
Epoch 13/100  Iteration 7213/56600 Training loss: 1.2160 0.4741 sec/batch
Epoch 13/100  Iteration 7214/56600 Training loss: 1.2160 0.4706 sec/batch
Epoch 13/100  Iteration 7215/56600 Training loss: 1.2159 0.4890 sec/batch
Epoch 13/100  Iteration 7216/56600 Training loss: 1.2159 0.4661 sec/batch
Epoch 13/100  Iteration 7217/56600 Tra

Epoch 13/100  Iteration 7315/56600 Training loss: 1.2118 0.4845 sec/batch
Epoch 13/100  Iteration 7316/56600 Training loss: 1.2118 0.4720 sec/batch
Epoch 13/100  Iteration 7317/56600 Training loss: 1.2118 0.4879 sec/batch
Epoch 13/100  Iteration 7318/56600 Training loss: 1.2118 0.4781 sec/batch
Epoch 13/100  Iteration 7319/56600 Training loss: 1.2118 0.4789 sec/batch
Epoch 13/100  Iteration 7320/56600 Training loss: 1.2118 0.4782 sec/batch
Epoch 13/100  Iteration 7321/56600 Training loss: 1.2117 0.4673 sec/batch
Epoch 13/100  Iteration 7322/56600 Training loss: 1.2117 0.4703 sec/batch
Epoch 13/100  Iteration 7323/56600 Training loss: 1.2117 0.4710 sec/batch
Epoch 13/100  Iteration 7324/56600 Training loss: 1.2116 0.4725 sec/batch
Epoch 13/100  Iteration 7325/56600 Training loss: 1.2116 0.4933 sec/batch
Epoch 13/100  Iteration 7326/56600 Training loss: 1.2115 0.4716 sec/batch
Epoch 13/100  Iteration 7327/56600 Training loss: 1.2115 0.4844 sec/batch
Epoch 13/100  Iteration 7328/56600 Tra

Epoch 14/100  Iteration 7426/56600 Training loss: 1.2023 0.4688 sec/batch
Epoch 14/100  Iteration 7427/56600 Training loss: 1.2026 0.4715 sec/batch
Epoch 14/100  Iteration 7428/56600 Training loss: 1.2023 0.4725 sec/batch
Epoch 14/100  Iteration 7429/56600 Training loss: 1.2021 0.4838 sec/batch
Epoch 14/100  Iteration 7430/56600 Training loss: 1.2015 0.4800 sec/batch
Epoch 14/100  Iteration 7431/56600 Training loss: 1.2011 0.4797 sec/batch
Epoch 14/100  Iteration 7432/56600 Training loss: 1.2017 0.4874 sec/batch
Epoch 14/100  Iteration 7433/56600 Training loss: 1.2016 0.4805 sec/batch
Epoch 14/100  Iteration 7434/56600 Training loss: 1.2011 0.4887 sec/batch
Epoch 14/100  Iteration 7435/56600 Training loss: 1.2012 0.4805 sec/batch
Epoch 14/100  Iteration 7436/56600 Training loss: 1.2012 0.4774 sec/batch
Epoch 14/100  Iteration 7437/56600 Training loss: 1.2007 0.4767 sec/batch
Epoch 14/100  Iteration 7438/56600 Training loss: 1.2005 0.4781 sec/batch
Epoch 14/100  Iteration 7439/56600 Tra

Epoch 14/100  Iteration 7537/56600 Training loss: 1.2049 0.4696 sec/batch
Epoch 14/100  Iteration 7538/56600 Training loss: 1.2048 0.4718 sec/batch
Epoch 14/100  Iteration 7539/56600 Training loss: 1.2048 0.4743 sec/batch
Epoch 14/100  Iteration 7540/56600 Training loss: 1.2048 0.4892 sec/batch
Epoch 14/100  Iteration 7541/56600 Training loss: 1.2049 0.4750 sec/batch
Epoch 14/100  Iteration 7542/56600 Training loss: 1.2051 0.4846 sec/batch
Epoch 14/100  Iteration 7543/56600 Training loss: 1.2051 0.4887 sec/batch
Epoch 14/100  Iteration 7544/56600 Training loss: 1.2051 0.4890 sec/batch
Epoch 14/100  Iteration 7545/56600 Training loss: 1.2051 0.4870 sec/batch
Epoch 14/100  Iteration 7546/56600 Training loss: 1.2053 0.4812 sec/batch
Epoch 14/100  Iteration 7547/56600 Training loss: 1.2051 0.4893 sec/batch
Epoch 14/100  Iteration 7548/56600 Training loss: 1.2051 0.4728 sec/batch
Epoch 14/100  Iteration 7549/56600 Training loss: 1.2050 0.4723 sec/batch
Epoch 14/100  Iteration 7550/56600 Tra

Epoch 14/100  Iteration 7648/56600 Training loss: 1.2059 0.4896 sec/batch
Epoch 14/100  Iteration 7649/56600 Training loss: 1.2060 0.4840 sec/batch
Epoch 14/100  Iteration 7650/56600 Training loss: 1.2062 0.4694 sec/batch
Epoch 14/100  Iteration 7651/56600 Training loss: 1.2064 0.4677 sec/batch
Epoch 14/100  Iteration 7652/56600 Training loss: 1.2064 0.4753 sec/batch
Epoch 14/100  Iteration 7653/56600 Training loss: 1.2066 0.4873 sec/batch
Epoch 14/100  Iteration 7654/56600 Training loss: 1.2066 0.4816 sec/batch
Epoch 14/100  Iteration 7655/56600 Training loss: 1.2066 0.4689 sec/batch
Epoch 14/100  Iteration 7656/56600 Training loss: 1.2066 0.4880 sec/batch
Epoch 14/100  Iteration 7657/56600 Training loss: 1.2066 0.4901 sec/batch
Epoch 14/100  Iteration 7658/56600 Training loss: 1.2065 0.4916 sec/batch
Epoch 14/100  Iteration 7659/56600 Training loss: 1.2064 0.4829 sec/batch
Epoch 14/100  Iteration 7660/56600 Training loss: 1.2064 0.4847 sec/batch
Epoch 14/100  Iteration 7661/56600 Tra

Epoch 14/100  Iteration 7759/56600 Training loss: 1.2034 0.4763 sec/batch
Epoch 14/100  Iteration 7760/56600 Training loss: 1.2034 0.5026 sec/batch
Epoch 14/100  Iteration 7761/56600 Training loss: 1.2034 0.4896 sec/batch
Epoch 14/100  Iteration 7762/56600 Training loss: 1.2034 0.4809 sec/batch
Epoch 14/100  Iteration 7763/56600 Training loss: 1.2034 0.4879 sec/batch
Epoch 14/100  Iteration 7764/56600 Training loss: 1.2034 0.4872 sec/batch
Epoch 14/100  Iteration 7765/56600 Training loss: 1.2034 0.4823 sec/batch
Epoch 14/100  Iteration 7766/56600 Training loss: 1.2033 0.4840 sec/batch
Epoch 14/100  Iteration 7767/56600 Training loss: 1.2032 0.4840 sec/batch
Epoch 14/100  Iteration 7768/56600 Training loss: 1.2031 0.4884 sec/batch
Epoch 14/100  Iteration 7769/56600 Training loss: 1.2031 0.4667 sec/batch
Epoch 14/100  Iteration 7770/56600 Training loss: 1.2031 0.4856 sec/batch
Epoch 14/100  Iteration 7771/56600 Training loss: 1.2030 0.4816 sec/batch
Epoch 14/100  Iteration 7772/56600 Tra

Epoch 14/100  Iteration 7870/56600 Training loss: 1.1988 0.4676 sec/batch
Epoch 14/100  Iteration 7871/56600 Training loss: 1.1987 0.4843 sec/batch
Epoch 14/100  Iteration 7872/56600 Training loss: 1.1988 0.4889 sec/batch
Epoch 14/100  Iteration 7873/56600 Training loss: 1.1988 0.4895 sec/batch
Epoch 14/100  Iteration 7874/56600 Training loss: 1.1988 0.4874 sec/batch
Epoch 14/100  Iteration 7875/56600 Training loss: 1.1987 0.4771 sec/batch
Epoch 14/100  Iteration 7876/56600 Training loss: 1.1988 0.4846 sec/batch
Epoch 14/100  Iteration 7877/56600 Training loss: 1.1988 0.4887 sec/batch
Epoch 14/100  Iteration 7878/56600 Training loss: 1.1987 0.4894 sec/batch
Epoch 14/100  Iteration 7879/56600 Training loss: 1.1987 0.4896 sec/batch
Epoch 14/100  Iteration 7880/56600 Training loss: 1.1987 0.4789 sec/batch
Epoch 14/100  Iteration 7881/56600 Training loss: 1.1987 0.4804 sec/batch
Epoch 14/100  Iteration 7882/56600 Training loss: 1.1987 0.4741 sec/batch
Epoch 14/100  Iteration 7883/56600 Tra

Epoch 15/100  Iteration 7981/56600 Training loss: 1.1943 0.4742 sec/batch
Epoch 15/100  Iteration 7982/56600 Training loss: 1.1935 0.4835 sec/batch
Epoch 15/100  Iteration 7983/56600 Training loss: 1.1933 0.4809 sec/batch
Epoch 15/100  Iteration 7984/56600 Training loss: 1.1929 0.4852 sec/batch
Epoch 15/100  Iteration 7985/56600 Training loss: 1.1925 0.4875 sec/batch
Epoch 15/100  Iteration 7986/56600 Training loss: 1.1924 0.4906 sec/batch
Epoch 15/100  Iteration 7987/56600 Training loss: 1.1922 0.4778 sec/batch
Epoch 15/100  Iteration 7988/56600 Training loss: 1.1916 0.4821 sec/batch
Epoch 15/100  Iteration 7989/56600 Training loss: 1.1914 0.4699 sec/batch
Epoch 15/100  Iteration 7990/56600 Training loss: 1.1913 0.4736 sec/batch
Epoch 15/100  Iteration 7991/56600 Training loss: 1.1911 0.4746 sec/batch
Epoch 15/100  Iteration 7992/56600 Training loss: 1.1916 0.4863 sec/batch
Epoch 15/100  Iteration 7993/56600 Training loss: 1.1920 0.4821 sec/batch
Epoch 15/100  Iteration 7994/56600 Tra

Epoch 15/100  Iteration 8092/56600 Training loss: 1.1956 0.4841 sec/batch
Epoch 15/100  Iteration 8093/56600 Training loss: 1.1956 0.4781 sec/batch
Epoch 15/100  Iteration 8094/56600 Training loss: 1.1955 0.4745 sec/batch
Epoch 15/100  Iteration 8095/56600 Training loss: 1.1954 0.4776 sec/batch
Epoch 15/100  Iteration 8096/56600 Training loss: 1.1952 0.4648 sec/batch
Epoch 15/100  Iteration 8097/56600 Training loss: 1.1952 0.4683 sec/batch
Epoch 15/100  Iteration 8098/56600 Training loss: 1.1951 0.4728 sec/batch
Epoch 15/100  Iteration 8099/56600 Training loss: 1.1949 0.4893 sec/batch
Epoch 15/100  Iteration 8100/56600 Training loss: 1.1948 0.4818 sec/batch
Epoch 15/100  Iteration 8101/56600 Training loss: 1.1948 0.4770 sec/batch
Epoch 15/100  Iteration 8102/56600 Training loss: 1.1949 0.4703 sec/batch
Epoch 15/100  Iteration 8103/56600 Training loss: 1.1948 0.4865 sec/batch
Epoch 15/100  Iteration 8104/56600 Training loss: 1.1948 0.4792 sec/batch
Epoch 15/100  Iteration 8105/56600 Tra

Epoch 15/100  Iteration 8203/56600 Training loss: 1.1962 0.4643 sec/batch
Epoch 15/100  Iteration 8204/56600 Training loss: 1.1961 0.4760 sec/batch
Epoch 15/100  Iteration 8205/56600 Training loss: 1.1960 0.4712 sec/batch
Epoch 15/100  Iteration 8206/56600 Training loss: 1.1959 0.4841 sec/batch
Epoch 15/100  Iteration 8207/56600 Training loss: 1.1959 0.4885 sec/batch
Epoch 15/100  Iteration 8208/56600 Training loss: 1.1958 0.4740 sec/batch
Epoch 15/100  Iteration 8209/56600 Training loss: 1.1957 0.4876 sec/batch
Epoch 15/100  Iteration 8210/56600 Training loss: 1.1957 0.4769 sec/batch
Epoch 15/100  Iteration 8211/56600 Training loss: 1.1958 0.4749 sec/batch
Epoch 15/100  Iteration 8212/56600 Training loss: 1.1958 0.4782 sec/batch
Epoch 15/100  Iteration 8213/56600 Training loss: 1.1958 0.4621 sec/batch
Epoch 15/100  Iteration 8214/56600 Training loss: 1.1959 0.4833 sec/batch
Epoch 15/100  Iteration 8215/56600 Training loss: 1.1960 0.4721 sec/batch
Epoch 15/100  Iteration 8216/56600 Tra

Epoch 15/100  Iteration 8314/56600 Training loss: 1.1931 0.4672 sec/batch
Epoch 15/100  Iteration 8315/56600 Training loss: 1.1930 0.4690 sec/batch
Epoch 15/100  Iteration 8316/56600 Training loss: 1.1929 0.4730 sec/batch
Epoch 15/100  Iteration 8317/56600 Training loss: 1.1930 0.4890 sec/batch
Epoch 15/100  Iteration 8318/56600 Training loss: 1.1930 0.4888 sec/batch
Epoch 15/100  Iteration 8319/56600 Training loss: 1.1930 0.4765 sec/batch
Epoch 15/100  Iteration 8320/56600 Training loss: 1.1930 0.4838 sec/batch
Epoch 15/100  Iteration 8321/56600 Training loss: 1.1930 0.4881 sec/batch
Epoch 15/100  Iteration 8322/56600 Training loss: 1.1929 0.4898 sec/batch
Epoch 15/100  Iteration 8323/56600 Training loss: 1.1930 0.4943 sec/batch
Epoch 15/100  Iteration 8324/56600 Training loss: 1.1930 0.4853 sec/batch
Epoch 15/100  Iteration 8325/56600 Training loss: 1.1929 0.4837 sec/batch
Epoch 15/100  Iteration 8326/56600 Training loss: 1.1929 0.4888 sec/batch
Epoch 15/100  Iteration 8327/56600 Tra

Epoch 15/100  Iteration 8425/56600 Training loss: 1.1888 0.4703 sec/batch
Epoch 15/100  Iteration 8426/56600 Training loss: 1.1887 0.4737 sec/batch
Epoch 15/100  Iteration 8427/56600 Training loss: 1.1887 0.4895 sec/batch
Epoch 15/100  Iteration 8428/56600 Training loss: 1.1886 0.4764 sec/batch
Epoch 15/100  Iteration 8429/56600 Training loss: 1.1885 0.4772 sec/batch
Epoch 15/100  Iteration 8430/56600 Training loss: 1.1885 0.4857 sec/batch
Epoch 15/100  Iteration 8431/56600 Training loss: 1.1884 0.4685 sec/batch
Epoch 15/100  Iteration 8432/56600 Training loss: 1.1883 0.4856 sec/batch
Epoch 15/100  Iteration 8433/56600 Training loss: 1.1882 0.4866 sec/batch
Epoch 15/100  Iteration 8434/56600 Training loss: 1.1882 0.4744 sec/batch
Epoch 15/100  Iteration 8435/56600 Training loss: 1.1882 0.4853 sec/batch
Epoch 15/100  Iteration 8436/56600 Training loss: 1.1881 0.4740 sec/batch
Epoch 15/100  Iteration 8437/56600 Training loss: 1.1881 0.4737 sec/batch
Epoch 15/100  Iteration 8438/56600 Tra

Epoch 16/100  Iteration 8536/56600 Training loss: 1.1874 0.4877 sec/batch
Epoch 16/100  Iteration 8537/56600 Training loss: 1.1868 0.4743 sec/batch
Epoch 16/100  Iteration 8538/56600 Training loss: 1.1868 0.4925 sec/batch
Epoch 16/100  Iteration 8539/56600 Training loss: 1.1866 0.4771 sec/batch
Epoch 16/100  Iteration 8540/56600 Training loss: 1.1860 0.4790 sec/batch
Epoch 16/100  Iteration 8541/56600 Training loss: 1.1856 0.4763 sec/batch
Epoch 16/100  Iteration 8542/56600 Training loss: 1.1851 0.4667 sec/batch
Epoch 16/100  Iteration 8543/56600 Training loss: 1.1849 0.4696 sec/batch
Epoch 16/100  Iteration 8544/56600 Training loss: 1.1842 0.4873 sec/batch
Epoch 16/100  Iteration 8545/56600 Training loss: 1.1835 0.4752 sec/batch
Epoch 16/100  Iteration 8546/56600 Training loss: 1.1829 0.4899 sec/batch
Epoch 16/100  Iteration 8547/56600 Training loss: 1.1824 0.4745 sec/batch
Epoch 16/100  Iteration 8548/56600 Training loss: 1.1815 0.4770 sec/batch
Epoch 16/100  Iteration 8549/56600 Tra

Epoch 16/100  Iteration 8647/56600 Training loss: 1.1829 0.4752 sec/batch
Epoch 16/100  Iteration 8648/56600 Training loss: 1.1830 0.4684 sec/batch
Epoch 16/100  Iteration 8649/56600 Training loss: 1.1830 0.4698 sec/batch
Epoch 16/100  Iteration 8650/56600 Training loss: 1.1830 0.4719 sec/batch
Epoch 16/100  Iteration 8651/56600 Training loss: 1.1831 0.4754 sec/batch
Epoch 16/100  Iteration 8652/56600 Training loss: 1.1830 0.4860 sec/batch
Epoch 16/100  Iteration 8653/56600 Training loss: 1.1829 0.4757 sec/batch
Epoch 16/100  Iteration 8654/56600 Training loss: 1.1828 0.4835 sec/batch
Epoch 16/100  Iteration 8655/56600 Training loss: 1.1827 0.4795 sec/batch
Epoch 16/100  Iteration 8656/56600 Training loss: 1.1826 0.4774 sec/batch
Epoch 16/100  Iteration 8657/56600 Training loss: 1.1827 0.4846 sec/batch
Epoch 16/100  Iteration 8658/56600 Training loss: 1.1828 0.4789 sec/batch
Epoch 16/100  Iteration 8659/56600 Training loss: 1.1828 0.4816 sec/batch
Epoch 16/100  Iteration 8660/56600 Tra

Epoch 16/100  Iteration 8758/56600 Training loss: 1.1846 0.4793 sec/batch
Epoch 16/100  Iteration 8759/56600 Training loss: 1.1845 0.4770 sec/batch
Epoch 16/100  Iteration 8760/56600 Training loss: 1.1845 0.4880 sec/batch
Epoch 16/100  Iteration 8761/56600 Training loss: 1.1845 0.4737 sec/batch
Epoch 16/100  Iteration 8762/56600 Training loss: 1.1846 0.4851 sec/batch
Epoch 16/100  Iteration 8763/56600 Training loss: 1.1845 0.4839 sec/batch
Epoch 16/100  Iteration 8764/56600 Training loss: 1.1844 0.4819 sec/batch
Epoch 16/100  Iteration 8765/56600 Training loss: 1.1843 0.4802 sec/batch
Epoch 16/100  Iteration 8766/56600 Training loss: 1.1843 0.4754 sec/batch
Epoch 16/100  Iteration 8767/56600 Training loss: 1.1843 0.5013 sec/batch
Epoch 16/100  Iteration 8768/56600 Training loss: 1.1842 0.4798 sec/batch
Epoch 16/100  Iteration 8769/56600 Training loss: 1.1840 0.4816 sec/batch
Epoch 16/100  Iteration 8770/56600 Training loss: 1.1839 0.4817 sec/batch
Epoch 16/100  Iteration 8771/56600 Tra

Epoch 16/100  Iteration 8869/56600 Training loss: 1.1814 0.4754 sec/batch
Epoch 16/100  Iteration 8870/56600 Training loss: 1.1814 0.4726 sec/batch
Epoch 16/100  Iteration 8871/56600 Training loss: 1.1815 0.4808 sec/batch
Epoch 16/100  Iteration 8872/56600 Training loss: 1.1814 0.4758 sec/batch
Epoch 16/100  Iteration 8873/56600 Training loss: 1.1813 0.4689 sec/batch
Epoch 16/100  Iteration 8874/56600 Training loss: 1.1813 0.4725 sec/batch
Epoch 16/100  Iteration 8875/56600 Training loss: 1.1812 0.4743 sec/batch
Epoch 16/100  Iteration 8876/56600 Training loss: 1.1813 0.4758 sec/batch
Epoch 16/100  Iteration 8877/56600 Training loss: 1.1813 0.4750 sec/batch
Epoch 16/100  Iteration 8878/56600 Training loss: 1.1813 0.4732 sec/batch
Epoch 16/100  Iteration 8879/56600 Training loss: 1.1812 0.4736 sec/batch
Epoch 16/100  Iteration 8880/56600 Training loss: 1.1812 0.4665 sec/batch
Epoch 16/100  Iteration 8881/56600 Training loss: 1.1811 0.4847 sec/batch
Epoch 16/100  Iteration 8882/56600 Tra

Epoch 16/100  Iteration 8980/56600 Training loss: 1.1775 0.4791 sec/batch
Epoch 16/100  Iteration 8981/56600 Training loss: 1.1774 0.4755 sec/batch
Epoch 16/100  Iteration 8982/56600 Training loss: 1.1774 0.4743 sec/batch
Epoch 16/100  Iteration 8983/56600 Training loss: 1.1774 0.4714 sec/batch
Epoch 16/100  Iteration 8984/56600 Training loss: 1.1774 0.4890 sec/batch
Epoch 16/100  Iteration 8985/56600 Training loss: 1.1773 0.4801 sec/batch
Epoch 16/100  Iteration 8986/56600 Training loss: 1.1773 0.4756 sec/batch
Epoch 16/100  Iteration 8987/56600 Training loss: 1.1773 0.4655 sec/batch
Epoch 16/100  Iteration 8988/56600 Training loss: 1.1772 0.4698 sec/batch
Epoch 16/100  Iteration 8989/56600 Training loss: 1.1772 0.4872 sec/batch
Epoch 16/100  Iteration 8990/56600 Training loss: 1.1771 0.4745 sec/batch
Epoch 16/100  Iteration 8991/56600 Training loss: 1.1771 0.4809 sec/batch
Epoch 16/100  Iteration 8992/56600 Training loss: 1.1770 0.4777 sec/batch
Epoch 16/100  Iteration 8993/56600 Tra

Epoch 17/100  Iteration 9091/56600 Training loss: 1.1753 0.4876 sec/batch
Epoch 17/100  Iteration 9092/56600 Training loss: 1.1757 0.4740 sec/batch
Epoch 17/100  Iteration 9093/56600 Training loss: 1.1766 0.4791 sec/batch
Epoch 17/100  Iteration 9094/56600 Training loss: 1.1773 0.4769 sec/batch
Epoch 17/100  Iteration 9095/56600 Training loss: 1.1773 0.4727 sec/batch
Epoch 17/100  Iteration 9096/56600 Training loss: 1.1780 0.4892 sec/batch
Epoch 17/100  Iteration 9097/56600 Training loss: 1.1783 0.4673 sec/batch
Epoch 17/100  Iteration 9098/56600 Training loss: 1.1788 0.4688 sec/batch
Epoch 17/100  Iteration 9099/56600 Training loss: 1.1783 0.4927 sec/batch
Epoch 17/100  Iteration 9100/56600 Training loss: 1.1779 0.4782 sec/batch
Epoch 17/100  Iteration 9101/56600 Training loss: 1.1784 0.4756 sec/batch
Epoch 17/100  Iteration 9102/56600 Training loss: 1.1789 0.4673 sec/batch
Epoch 17/100  Iteration 9103/56600 Training loss: 1.1780 0.4689 sec/batch
Epoch 17/100  Iteration 9104/56600 Tra

Epoch 17/100  Iteration 9202/56600 Training loss: 1.1716 0.4783 sec/batch
Epoch 17/100  Iteration 9203/56600 Training loss: 1.1715 0.4790 sec/batch
Epoch 17/100  Iteration 9204/56600 Training loss: 1.1715 0.4612 sec/batch
Epoch 17/100  Iteration 9205/56600 Training loss: 1.1716 0.4719 sec/batch
Epoch 17/100  Iteration 9206/56600 Training loss: 1.1717 0.4744 sec/batch
Epoch 17/100  Iteration 9207/56600 Training loss: 1.1720 0.4891 sec/batch
Epoch 17/100  Iteration 9208/56600 Training loss: 1.1724 0.4753 sec/batch
Epoch 17/100  Iteration 9209/56600 Training loss: 1.1725 0.4843 sec/batch
Epoch 17/100  Iteration 9210/56600 Training loss: 1.1725 0.4729 sec/batch
Epoch 17/100  Iteration 9211/56600 Training loss: 1.1726 0.4700 sec/batch
Epoch 17/100  Iteration 9212/56600 Training loss: 1.1725 0.4782 sec/batch
Epoch 17/100  Iteration 9213/56600 Training loss: 1.1727 0.4799 sec/batch
Epoch 17/100  Iteration 9214/56600 Training loss: 1.1728 0.4777 sec/batch
Epoch 17/100  Iteration 9215/56600 Tra

Epoch 17/100  Iteration 9313/56600 Training loss: 1.1760 0.4804 sec/batch
Epoch 17/100  Iteration 9314/56600 Training loss: 1.1759 0.4759 sec/batch
Epoch 17/100  Iteration 9315/56600 Training loss: 1.1759 0.4697 sec/batch
Epoch 17/100  Iteration 9316/56600 Training loss: 1.1758 0.4875 sec/batch
Epoch 17/100  Iteration 9317/56600 Training loss: 1.1757 0.4754 sec/batch
Epoch 17/100  Iteration 9318/56600 Training loss: 1.1757 0.4747 sec/batch
Epoch 17/100  Iteration 9319/56600 Training loss: 1.1756 0.4820 sec/batch
Epoch 17/100  Iteration 9320/56600 Training loss: 1.1755 0.4757 sec/batch
Epoch 17/100  Iteration 9321/56600 Training loss: 1.1754 0.4770 sec/batch
Epoch 17/100  Iteration 9322/56600 Training loss: 1.1754 0.4721 sec/batch
Epoch 17/100  Iteration 9323/56600 Training loss: 1.1752 0.4855 sec/batch
Epoch 17/100  Iteration 9324/56600 Training loss: 1.1751 0.4876 sec/batch
Epoch 17/100  Iteration 9325/56600 Training loss: 1.1749 0.4752 sec/batch
Epoch 17/100  Iteration 9326/56600 Tra

Epoch 17/100  Iteration 9424/56600 Training loss: 1.1725 0.4853 sec/batch
Epoch 17/100  Iteration 9425/56600 Training loss: 1.1724 0.4841 sec/batch
Epoch 17/100  Iteration 9426/56600 Training loss: 1.1723 0.4873 sec/batch
Epoch 17/100  Iteration 9427/56600 Training loss: 1.1723 0.4898 sec/batch
Epoch 17/100  Iteration 9428/56600 Training loss: 1.1723 0.4980 sec/batch
Epoch 17/100  Iteration 9429/56600 Training loss: 1.1722 0.4857 sec/batch
Epoch 17/100  Iteration 9430/56600 Training loss: 1.1722 0.4731 sec/batch
Epoch 17/100  Iteration 9431/56600 Training loss: 1.1722 0.4720 sec/batch
Epoch 17/100  Iteration 9432/56600 Training loss: 1.1722 0.4897 sec/batch
Epoch 17/100  Iteration 9433/56600 Training loss: 1.1722 0.4970 sec/batch
Epoch 17/100  Iteration 9434/56600 Training loss: 1.1722 0.4822 sec/batch
Epoch 17/100  Iteration 9435/56600 Training loss: 1.1721 0.4855 sec/batch
Epoch 17/100  Iteration 9436/56600 Training loss: 1.1722 0.4720 sec/batch
Epoch 17/100  Iteration 9437/56600 Tra

Epoch 17/100  Iteration 9535/56600 Training loss: 1.1687 0.4832 sec/batch
Epoch 17/100  Iteration 9536/56600 Training loss: 1.1687 0.4742 sec/batch
Epoch 17/100  Iteration 9537/56600 Training loss: 1.1687 0.4850 sec/batch
Epoch 17/100  Iteration 9538/56600 Training loss: 1.1686 0.4721 sec/batch
Epoch 17/100  Iteration 9539/56600 Training loss: 1.1686 0.4843 sec/batch
Epoch 17/100  Iteration 9540/56600 Training loss: 1.1685 0.4842 sec/batch
Epoch 17/100  Iteration 9541/56600 Training loss: 1.1685 0.4725 sec/batch
Epoch 17/100  Iteration 9542/56600 Training loss: 1.1685 0.4700 sec/batch
Epoch 17/100  Iteration 9543/56600 Training loss: 1.1684 0.4890 sec/batch
Epoch 17/100  Iteration 9544/56600 Training loss: 1.1684 0.4741 sec/batch
Epoch 17/100  Iteration 9545/56600 Training loss: 1.1684 0.4832 sec/batch
Epoch 17/100  Iteration 9546/56600 Training loss: 1.1683 0.4726 sec/batch
Epoch 17/100  Iteration 9547/56600 Training loss: 1.1683 0.4846 sec/batch
Epoch 17/100  Iteration 9548/56600 Tra

Epoch 18/100  Iteration 9646/56600 Training loss: 1.1667 0.4998 sec/batch
Epoch 18/100  Iteration 9647/56600 Training loss: 1.1665 0.4971 sec/batch
Epoch 18/100  Iteration 9648/56600 Training loss: 1.1652 0.4866 sec/batch
Epoch 18/100  Iteration 9649/56600 Training loss: 1.1653 0.4873 sec/batch
Epoch 18/100  Iteration 9650/56600 Training loss: 1.1653 0.4913 sec/batch
Epoch 18/100  Iteration 9651/56600 Training loss: 1.1646 0.4959 sec/batch
Epoch 18/100  Iteration 9652/56600 Training loss: 1.1643 0.4871 sec/batch
Epoch 18/100  Iteration 9653/56600 Training loss: 1.1635 0.4852 sec/batch
Epoch 18/100  Iteration 9654/56600 Training loss: 1.1631 0.4880 sec/batch
Epoch 18/100  Iteration 9655/56600 Training loss: 1.1635 0.4740 sec/batch
Epoch 18/100  Iteration 9656/56600 Training loss: 1.1646 0.4909 sec/batch
Epoch 18/100  Iteration 9657/56600 Training loss: 1.1656 0.4723 sec/batch
Epoch 18/100  Iteration 9658/56600 Training loss: 1.1657 0.4850 sec/batch
Epoch 18/100  Iteration 9659/56600 Tra

Epoch 18/100  Iteration 9757/56600 Training loss: 1.1612 0.4797 sec/batch
Epoch 18/100  Iteration 9758/56600 Training loss: 1.1612 0.4830 sec/batch
Epoch 18/100  Iteration 9759/56600 Training loss: 1.1613 0.4854 sec/batch
Epoch 18/100  Iteration 9760/56600 Training loss: 1.1616 0.4877 sec/batch
Epoch 18/100  Iteration 9761/56600 Training loss: 1.1617 0.4807 sec/batch
Epoch 18/100  Iteration 9762/56600 Training loss: 1.1617 0.4879 sec/batch
Epoch 18/100  Iteration 9763/56600 Training loss: 1.1617 0.4691 sec/batch
Epoch 18/100  Iteration 9764/56600 Training loss: 1.1618 0.4855 sec/batch
Epoch 18/100  Iteration 9765/56600 Training loss: 1.1620 0.4929 sec/batch
Epoch 18/100  Iteration 9766/56600 Training loss: 1.1619 0.4809 sec/batch
Epoch 18/100  Iteration 9767/56600 Training loss: 1.1620 0.4958 sec/batch
Epoch 18/100  Iteration 9768/56600 Training loss: 1.1619 0.4694 sec/batch
Epoch 18/100  Iteration 9769/56600 Training loss: 1.1618 0.4835 sec/batch
Epoch 18/100  Iteration 9770/56600 Tra

Epoch 18/100  Iteration 9868/56600 Training loss: 1.1662 0.4860 sec/batch
Epoch 18/100  Iteration 9869/56600 Training loss: 1.1663 0.4792 sec/batch
Epoch 18/100  Iteration 9870/56600 Training loss: 1.1663 0.4770 sec/batch
Epoch 18/100  Iteration 9871/56600 Training loss: 1.1663 0.4835 sec/batch
Epoch 18/100  Iteration 9872/56600 Training loss: 1.1664 0.4742 sec/batch
Epoch 18/100  Iteration 9873/56600 Training loss: 1.1662 0.4849 sec/batch
Epoch 18/100  Iteration 9874/56600 Training loss: 1.1662 0.4883 sec/batch
Epoch 18/100  Iteration 9875/56600 Training loss: 1.1662 0.4682 sec/batch
Epoch 18/100  Iteration 9876/56600 Training loss: 1.1661 0.4922 sec/batch
Epoch 18/100  Iteration 9877/56600 Training loss: 1.1662 0.4740 sec/batch
Epoch 18/100  Iteration 9878/56600 Training loss: 1.1662 0.4834 sec/batch
Epoch 18/100  Iteration 9879/56600 Training loss: 1.1661 0.4879 sec/batch
Epoch 18/100  Iteration 9880/56600 Training loss: 1.1661 0.4739 sec/batch
Epoch 18/100  Iteration 9881/56600 Tra

Epoch 18/100  Iteration 9979/56600 Training loss: 1.1629 0.4686 sec/batch
Epoch 18/100  Iteration 9980/56600 Training loss: 1.1628 0.4724 sec/batch
Epoch 18/100  Iteration 9981/56600 Training loss: 1.1629 0.4933 sec/batch
Epoch 18/100  Iteration 9982/56600 Training loss: 1.1629 0.4816 sec/batch
Epoch 18/100  Iteration 9983/56600 Training loss: 1.1629 0.4739 sec/batch
Epoch 18/100  Iteration 9984/56600 Training loss: 1.1629 0.4707 sec/batch
Epoch 18/100  Iteration 9985/56600 Training loss: 1.1628 0.4709 sec/batch
Epoch 18/100  Iteration 9986/56600 Training loss: 1.1628 0.4739 sec/batch
Epoch 18/100  Iteration 9987/56600 Training loss: 1.1627 0.4853 sec/batch
Epoch 18/100  Iteration 9988/56600 Training loss: 1.1627 0.4719 sec/batch
Epoch 18/100  Iteration 9989/56600 Training loss: 1.1626 0.4583 sec/batch
Epoch 18/100  Iteration 9990/56600 Training loss: 1.1626 0.4782 sec/batch
Epoch 18/100  Iteration 9991/56600 Training loss: 1.1626 0.4728 sec/batch
Epoch 18/100  Iteration 9992/56600 Tra

Epoch 18/100  Iteration 10088/56600 Training loss: 1.1596 0.4786 sec/batch
Epoch 18/100  Iteration 10089/56600 Training loss: 1.1597 0.4618 sec/batch
Epoch 18/100  Iteration 10090/56600 Training loss: 1.1597 0.4698 sec/batch
Epoch 18/100  Iteration 10091/56600 Training loss: 1.1597 0.4721 sec/batch
Epoch 18/100  Iteration 10092/56600 Training loss: 1.1597 0.4726 sec/batch
Epoch 18/100  Iteration 10093/56600 Training loss: 1.1598 0.4856 sec/batch
Epoch 18/100  Iteration 10094/56600 Training loss: 1.1599 0.4741 sec/batch
Epoch 18/100  Iteration 10095/56600 Training loss: 1.1599 0.4741 sec/batch
Epoch 18/100  Iteration 10096/56600 Training loss: 1.1599 0.4751 sec/batch
Epoch 18/100  Iteration 10097/56600 Training loss: 1.1598 0.4737 sec/batch
Epoch 18/100  Iteration 10098/56600 Training loss: 1.1598 0.4843 sec/batch
Epoch 18/100  Iteration 10099/56600 Training loss: 1.1597 0.4730 sec/batch
Epoch 18/100  Iteration 10100/56600 Training loss: 1.1597 0.4748 sec/batch
Epoch 18/100  Iteration 1

Epoch 19/100  Iteration 10198/56600 Training loss: 1.1737 0.4787 sec/batch
Epoch 19/100  Iteration 10199/56600 Training loss: 1.1728 0.4853 sec/batch
Epoch 19/100  Iteration 10200/56600 Training loss: 1.1727 0.4725 sec/batch
Epoch 19/100  Iteration 10201/56600 Training loss: 1.1726 0.4848 sec/batch
Epoch 19/100  Iteration 10202/56600 Training loss: 1.1698 0.4798 sec/batch
Epoch 19/100  Iteration 10203/56600 Training loss: 1.1682 0.4745 sec/batch
Epoch 19/100  Iteration 10204/56600 Training loss: 1.1671 0.4693 sec/batch
Epoch 19/100  Iteration 10205/56600 Training loss: 1.1655 0.4722 sec/batch
Epoch 19/100  Iteration 10206/56600 Training loss: 1.1637 0.4868 sec/batch
Epoch 19/100  Iteration 10207/56600 Training loss: 1.1617 0.4976 sec/batch
Epoch 19/100  Iteration 10208/56600 Training loss: 1.1611 0.4741 sec/batch
Epoch 19/100  Iteration 10209/56600 Training loss: 1.1606 0.4855 sec/batch
Epoch 19/100  Iteration 10210/56600 Training loss: 1.1586 0.4803 sec/batch
Epoch 19/100  Iteration 1

Epoch 19/100  Iteration 10308/56600 Training loss: 1.1518 0.4863 sec/batch
Epoch 19/100  Iteration 10309/56600 Training loss: 1.1524 0.4769 sec/batch
Epoch 19/100  Iteration 10310/56600 Training loss: 1.1527 0.4854 sec/batch
Epoch 19/100  Iteration 10311/56600 Training loss: 1.1526 0.4877 sec/batch
Epoch 19/100  Iteration 10312/56600 Training loss: 1.1524 0.4735 sec/batch
Epoch 19/100  Iteration 10313/56600 Training loss: 1.1526 0.4827 sec/batch
Epoch 19/100  Iteration 10314/56600 Training loss: 1.1527 0.4818 sec/batch
Epoch 19/100  Iteration 10315/56600 Training loss: 1.1528 0.4845 sec/batch
Epoch 19/100  Iteration 10316/56600 Training loss: 1.1528 0.4721 sec/batch
Epoch 19/100  Iteration 10317/56600 Training loss: 1.1529 0.4754 sec/batch
Epoch 19/100  Iteration 10318/56600 Training loss: 1.1528 0.4862 sec/batch
Epoch 19/100  Iteration 10319/56600 Training loss: 1.1530 0.4781 sec/batch
Epoch 19/100  Iteration 10320/56600 Training loss: 1.1530 0.4846 sec/batch
Epoch 19/100  Iteration 1

Epoch 19/100  Iteration 10418/56600 Training loss: 1.1557 0.4721 sec/batch
Epoch 19/100  Iteration 10419/56600 Training loss: 1.1558 0.4700 sec/batch
Epoch 19/100  Iteration 10420/56600 Training loss: 1.1559 0.4834 sec/batch
Epoch 19/100  Iteration 10421/56600 Training loss: 1.1561 0.4791 sec/batch
Epoch 19/100  Iteration 10422/56600 Training loss: 1.1561 0.4692 sec/batch
Epoch 19/100  Iteration 10423/56600 Training loss: 1.1563 0.4719 sec/batch
Epoch 19/100  Iteration 10424/56600 Training loss: 1.1564 0.4685 sec/batch
Epoch 19/100  Iteration 10425/56600 Training loss: 1.1565 0.4832 sec/batch
Epoch 19/100  Iteration 10426/56600 Training loss: 1.1567 0.4813 sec/batch
Epoch 19/100  Iteration 10427/56600 Training loss: 1.1568 0.4759 sec/batch
Epoch 19/100  Iteration 10428/56600 Training loss: 1.1571 0.4855 sec/batch
Epoch 19/100  Iteration 10429/56600 Training loss: 1.1572 0.4735 sec/batch
Epoch 19/100  Iteration 10430/56600 Training loss: 1.1574 0.4702 sec/batch
Epoch 19/100  Iteration 1

Epoch 19/100  Iteration 10528/56600 Training loss: 1.1554 0.4791 sec/batch
Epoch 19/100  Iteration 10529/56600 Training loss: 1.1553 0.4804 sec/batch
Epoch 19/100  Iteration 10530/56600 Training loss: 1.1552 0.4879 sec/batch
Epoch 19/100  Iteration 10531/56600 Training loss: 1.1551 0.4900 sec/batch
Epoch 19/100  Iteration 10532/56600 Training loss: 1.1552 0.4890 sec/batch
Epoch 19/100  Iteration 10533/56600 Training loss: 1.1553 0.4804 sec/batch
Epoch 19/100  Iteration 10534/56600 Training loss: 1.1552 0.4833 sec/batch
Epoch 19/100  Iteration 10535/56600 Training loss: 1.1552 0.4830 sec/batch
Epoch 19/100  Iteration 10536/56600 Training loss: 1.1551 0.4737 sec/batch
Epoch 19/100  Iteration 10537/56600 Training loss: 1.1551 0.4863 sec/batch
Epoch 19/100  Iteration 10538/56600 Training loss: 1.1551 0.4802 sec/batch
Epoch 19/100  Iteration 10539/56600 Training loss: 1.1549 0.4815 sec/batch
Epoch 19/100  Iteration 10540/56600 Training loss: 1.1548 0.4818 sec/batch
Epoch 19/100  Iteration 1

Epoch 19/100  Iteration 10638/56600 Training loss: 1.1515 0.4734 sec/batch
Epoch 19/100  Iteration 10639/56600 Training loss: 1.1515 0.4847 sec/batch
Epoch 19/100  Iteration 10640/56600 Training loss: 1.1515 0.4878 sec/batch
Epoch 19/100  Iteration 10641/56600 Training loss: 1.1514 0.4753 sec/batch
Epoch 19/100  Iteration 10642/56600 Training loss: 1.1514 0.4711 sec/batch
Epoch 19/100  Iteration 10643/56600 Training loss: 1.1514 0.4932 sec/batch
Epoch 19/100  Iteration 10644/56600 Training loss: 1.1513 0.4844 sec/batch
Epoch 19/100  Iteration 10645/56600 Training loss: 1.1513 0.4865 sec/batch
Epoch 19/100  Iteration 10646/56600 Training loss: 1.1512 0.4732 sec/batch
Epoch 19/100  Iteration 10647/56600 Training loss: 1.1512 0.4694 sec/batch
Epoch 19/100  Iteration 10648/56600 Training loss: 1.1512 0.4802 sec/batch
Epoch 19/100  Iteration 10649/56600 Training loss: 1.1512 0.4813 sec/batch
Epoch 19/100  Iteration 10650/56600 Training loss: 1.1512 0.4812 sec/batch
Epoch 19/100  Iteration 1

Epoch 19/100  Iteration 10748/56600 Training loss: 1.1495 0.4694 sec/batch
Epoch 19/100  Iteration 10749/56600 Training loss: 1.1495 0.4720 sec/batch
Epoch 19/100  Iteration 10750/56600 Training loss: 1.1496 0.4812 sec/batch
Epoch 19/100  Iteration 10751/56600 Training loss: 1.1495 0.4825 sec/batch
Epoch 19/100  Iteration 10752/56600 Training loss: 1.1496 0.4733 sec/batch
Epoch 19/100  Iteration 10753/56600 Training loss: 1.1497 0.4759 sec/batch
Epoch 19/100  Iteration 10754/56600 Training loss: 1.1498 0.4726 sec/batch
Epoch 20/100  Iteration 10755/56600 Training loss: 1.2531 0.4693 sec/batch
Epoch 20/100  Iteration 10756/56600 Training loss: 1.2105 0.4697 sec/batch
Epoch 20/100  Iteration 10757/56600 Training loss: 1.1990 0.4719 sec/batch
Epoch 20/100  Iteration 10758/56600 Training loss: 1.1892 0.4745 sec/batch
Epoch 20/100  Iteration 10759/56600 Training loss: 1.1762 0.4844 sec/batch
Epoch 20/100  Iteration 10760/56600 Training loss: 1.1751 0.4726 sec/batch
Epoch 20/100  Iteration 1

Epoch 20/100  Iteration 10858/56600 Training loss: 1.1416 0.4800 sec/batch
Epoch 20/100  Iteration 10859/56600 Training loss: 1.1418 0.4731 sec/batch
Epoch 20/100  Iteration 10860/56600 Training loss: 1.1418 0.4844 sec/batch
Epoch 20/100  Iteration 10861/56600 Training loss: 1.1415 0.4725 sec/batch
Epoch 20/100  Iteration 10862/56600 Training loss: 1.1416 0.4710 sec/batch
Epoch 20/100  Iteration 10863/56600 Training loss: 1.1417 0.4819 sec/batch
Epoch 20/100  Iteration 10864/56600 Training loss: 1.1418 0.4826 sec/batch
Epoch 20/100  Iteration 10865/56600 Training loss: 1.1417 0.4844 sec/batch
Epoch 20/100  Iteration 10866/56600 Training loss: 1.1417 0.4877 sec/batch
Epoch 20/100  Iteration 10867/56600 Training loss: 1.1420 0.4913 sec/batch
Epoch 20/100  Iteration 10868/56600 Training loss: 1.1421 0.4880 sec/batch
Epoch 20/100  Iteration 10869/56600 Training loss: 1.1422 0.4752 sec/batch
Epoch 20/100  Iteration 10870/56600 Training loss: 1.1426 0.4848 sec/batch
Epoch 20/100  Iteration 1

Epoch 20/100  Iteration 10968/56600 Training loss: 1.1468 0.4721 sec/batch
Epoch 20/100  Iteration 10969/56600 Training loss: 1.1469 0.4743 sec/batch
Epoch 20/100  Iteration 10970/56600 Training loss: 1.1469 0.4856 sec/batch
Epoch 20/100  Iteration 10971/56600 Training loss: 1.1469 0.4736 sec/batch
Epoch 20/100  Iteration 10972/56600 Training loss: 1.1469 0.4595 sec/batch
Epoch 20/100  Iteration 10973/56600 Training loss: 1.1471 0.4777 sec/batch
Epoch 20/100  Iteration 10974/56600 Training loss: 1.1472 0.4685 sec/batch
Epoch 20/100  Iteration 10975/56600 Training loss: 1.1472 0.4859 sec/batch
Epoch 20/100  Iteration 10976/56600 Training loss: 1.1472 0.4866 sec/batch
Epoch 20/100  Iteration 10977/56600 Training loss: 1.1472 0.4745 sec/batch
Epoch 20/100  Iteration 10978/56600 Training loss: 1.1473 0.4869 sec/batch
Epoch 20/100  Iteration 10979/56600 Training loss: 1.1473 0.4754 sec/batch
Epoch 20/100  Iteration 10980/56600 Training loss: 1.1475 0.4800 sec/batch
Epoch 20/100  Iteration 1

Epoch 20/100  Iteration 11078/56600 Training loss: 1.1477 0.4874 sec/batch
Epoch 20/100  Iteration 11079/56600 Training loss: 1.1476 0.4908 sec/batch
Epoch 20/100  Iteration 11080/56600 Training loss: 1.1476 0.4887 sec/batch
Epoch 20/100  Iteration 11081/56600 Training loss: 1.1475 0.4813 sec/batch
Epoch 20/100  Iteration 11082/56600 Training loss: 1.1474 0.4714 sec/batch
Epoch 20/100  Iteration 11083/56600 Training loss: 1.1475 0.4730 sec/batch
Epoch 20/100  Iteration 11084/56600 Training loss: 1.1475 0.4889 sec/batch
Epoch 20/100  Iteration 11085/56600 Training loss: 1.1475 0.4862 sec/batch
Epoch 20/100  Iteration 11086/56600 Training loss: 1.1476 0.4784 sec/batch
Epoch 20/100  Iteration 11087/56600 Training loss: 1.1476 0.4764 sec/batch
Epoch 20/100  Iteration 11088/56600 Training loss: 1.1476 0.4903 sec/batch
Epoch 20/100  Iteration 11089/56600 Training loss: 1.1475 0.4747 sec/batch
Epoch 20/100  Iteration 11090/56600 Training loss: 1.1475 0.4898 sec/batch
Epoch 20/100  Iteration 1

Epoch 20/100  Iteration 11188/56600 Training loss: 1.1444 0.4686 sec/batch
Epoch 20/100  Iteration 11189/56600 Training loss: 1.1443 0.4721 sec/batch
Epoch 20/100  Iteration 11190/56600 Training loss: 1.1443 0.4891 sec/batch
Epoch 20/100  Iteration 11191/56600 Training loss: 1.1442 0.4910 sec/batch
Epoch 20/100  Iteration 11192/56600 Training loss: 1.1441 0.4800 sec/batch
Epoch 20/100  Iteration 11193/56600 Training loss: 1.1440 0.4780 sec/batch
Epoch 20/100  Iteration 11194/56600 Training loss: 1.1439 0.4795 sec/batch
Epoch 20/100  Iteration 11195/56600 Training loss: 1.1438 0.4649 sec/batch
Epoch 20/100  Iteration 11196/56600 Training loss: 1.1437 0.4845 sec/batch
Epoch 20/100  Iteration 11197/56600 Training loss: 1.1438 0.4721 sec/batch
Epoch 20/100  Iteration 11198/56600 Training loss: 1.1437 0.4683 sec/batch
Epoch 20/100  Iteration 11199/56600 Training loss: 1.1436 0.4786 sec/batch
Epoch 20/100  Iteration 11200/56600 Training loss: 1.1435 0.4671 sec/batch
Epoch 20/100  Iteration 1

Epoch 20/100  Iteration 11298/56600 Training loss: 1.1416 0.4780 sec/batch
Epoch 20/100  Iteration 11299/56600 Training loss: 1.1416 0.4792 sec/batch
Epoch 20/100  Iteration 11300/56600 Training loss: 1.1415 0.4713 sec/batch
Epoch 20/100  Iteration 11301/56600 Training loss: 1.1416 0.4846 sec/batch
Epoch 20/100  Iteration 11302/56600 Training loss: 1.1416 0.4883 sec/batch
Epoch 20/100  Iteration 11303/56600 Training loss: 1.1416 0.4748 sec/batch
Epoch 20/100  Iteration 11304/56600 Training loss: 1.1415 0.4823 sec/batch
Epoch 20/100  Iteration 11305/56600 Training loss: 1.1415 0.4764 sec/batch
Epoch 20/100  Iteration 11306/56600 Training loss: 1.1415 0.4689 sec/batch
Epoch 20/100  Iteration 11307/56600 Training loss: 1.1415 0.4722 sec/batch
Epoch 20/100  Iteration 11308/56600 Training loss: 1.1415 0.4902 sec/batch
Epoch 20/100  Iteration 11309/56600 Training loss: 1.1415 0.4918 sec/batch
Epoch 20/100  Iteration 11310/56600 Training loss: 1.1415 0.4803 sec/batch
Epoch 20/100  Iteration 1

Epoch 21/100  Iteration 11408/56600 Training loss: 1.1332 0.4816 sec/batch
Epoch 21/100  Iteration 11409/56600 Training loss: 1.1333 0.4797 sec/batch
Epoch 21/100  Iteration 11410/56600 Training loss: 1.1334 0.4664 sec/batch
Epoch 21/100  Iteration 11411/56600 Training loss: 1.1336 0.4690 sec/batch
Epoch 21/100  Iteration 11412/56600 Training loss: 1.1335 0.4719 sec/batch
Epoch 21/100  Iteration 11413/56600 Training loss: 1.1332 0.4746 sec/batch
Epoch 21/100  Iteration 11414/56600 Training loss: 1.1332 0.4891 sec/batch
Epoch 21/100  Iteration 11415/56600 Training loss: 1.1331 0.4733 sec/batch
Epoch 21/100  Iteration 11416/56600 Training loss: 1.1330 0.4852 sec/batch
Epoch 21/100  Iteration 11417/56600 Training loss: 1.1325 0.4879 sec/batch
Epoch 21/100  Iteration 11418/56600 Training loss: 1.1324 0.4912 sec/batch
Epoch 21/100  Iteration 11419/56600 Training loss: 1.1326 0.4987 sec/batch
Epoch 21/100  Iteration 11420/56600 Training loss: 1.1332 0.4797 sec/batch
Epoch 21/100  Iteration 1

Epoch 21/100  Iteration 11518/56600 Training loss: 1.1391 0.4813 sec/batch
Epoch 21/100  Iteration 11519/56600 Training loss: 1.1392 0.4749 sec/batch
Epoch 21/100  Iteration 11520/56600 Training loss: 1.1392 0.4854 sec/batch
Epoch 21/100  Iteration 11521/56600 Training loss: 1.1392 0.4981 sec/batch
Epoch 21/100  Iteration 11522/56600 Training loss: 1.1394 0.4829 sec/batch
Epoch 21/100  Iteration 11523/56600 Training loss: 1.1394 0.4790 sec/batch
Epoch 21/100  Iteration 11524/56600 Training loss: 1.1395 0.4667 sec/batch
Epoch 21/100  Iteration 11525/56600 Training loss: 1.1396 0.4698 sec/batch
Epoch 21/100  Iteration 11526/56600 Training loss: 1.1398 0.4720 sec/batch
Epoch 21/100  Iteration 11527/56600 Training loss: 1.1397 0.4673 sec/batch
Epoch 21/100  Iteration 11528/56600 Training loss: 1.1396 0.4802 sec/batch
Epoch 21/100  Iteration 11529/56600 Training loss: 1.1397 0.4626 sec/batch
Epoch 21/100  Iteration 11530/56600 Training loss: 1.1397 0.4849 sec/batch
Epoch 21/100  Iteration 1

Epoch 21/100  Iteration 11628/56600 Training loss: 1.1413 0.4748 sec/batch
Epoch 21/100  Iteration 11629/56600 Training loss: 1.1412 0.4818 sec/batch
Epoch 21/100  Iteration 11630/56600 Training loss: 1.1412 0.4728 sec/batch
Epoch 21/100  Iteration 11631/56600 Training loss: 1.1411 0.4717 sec/batch
Epoch 21/100  Iteration 11632/56600 Training loss: 1.1411 0.4737 sec/batch
Epoch 21/100  Iteration 11633/56600 Training loss: 1.1411 0.4660 sec/batch
Epoch 21/100  Iteration 11634/56600 Training loss: 1.1409 0.4846 sec/batch
Epoch 21/100  Iteration 11635/56600 Training loss: 1.1410 0.4725 sec/batch
Epoch 21/100  Iteration 11636/56600 Training loss: 1.1410 0.4742 sec/batch
Epoch 21/100  Iteration 11637/56600 Training loss: 1.1411 0.4845 sec/batch
Epoch 21/100  Iteration 11638/56600 Training loss: 1.1411 0.4718 sec/batch
Epoch 21/100  Iteration 11639/56600 Training loss: 1.1410 0.4728 sec/batch
Epoch 21/100  Iteration 11640/56600 Training loss: 1.1409 0.4883 sec/batch
Epoch 21/100  Iteration 1

Epoch 21/100  Iteration 11738/56600 Training loss: 1.1382 0.4614 sec/batch
Epoch 21/100  Iteration 11739/56600 Training loss: 1.1383 0.4761 sec/batch
Epoch 21/100  Iteration 11740/56600 Training loss: 1.1383 0.4675 sec/batch
Epoch 21/100  Iteration 11741/56600 Training loss: 1.1382 0.4699 sec/batch
Epoch 21/100  Iteration 11742/56600 Training loss: 1.1382 0.4872 sec/batch
Epoch 21/100  Iteration 11743/56600 Training loss: 1.1381 0.4753 sec/batch
Epoch 21/100  Iteration 11744/56600 Training loss: 1.1382 0.4896 sec/batch
Epoch 21/100  Iteration 11745/56600 Training loss: 1.1381 0.4781 sec/batch
Epoch 21/100  Iteration 11746/56600 Training loss: 1.1380 0.4749 sec/batch
Epoch 21/100  Iteration 11747/56600 Training loss: 1.1379 0.4877 sec/batch
Epoch 21/100  Iteration 11748/56600 Training loss: 1.1378 0.4743 sec/batch
Epoch 21/100  Iteration 11749/56600 Training loss: 1.1378 0.4690 sec/batch
Epoch 21/100  Iteration 11750/56600 Training loss: 1.1376 0.4802 sec/batch
Epoch 21/100  Iteration 1

Epoch 21/100  Iteration 11848/56600 Training loss: 1.1348 0.4865 sec/batch
Epoch 21/100  Iteration 11849/56600 Training loss: 1.1348 0.4768 sec/batch
Epoch 21/100  Iteration 11850/56600 Training loss: 1.1348 0.4809 sec/batch
Epoch 21/100  Iteration 11851/56600 Training loss: 1.1347 0.4747 sec/batch
Epoch 21/100  Iteration 11852/56600 Training loss: 1.1347 0.4740 sec/batch
Epoch 21/100  Iteration 11853/56600 Training loss: 1.1346 0.4692 sec/batch
Epoch 21/100  Iteration 11854/56600 Training loss: 1.1346 0.4719 sec/batch
Epoch 21/100  Iteration 11855/56600 Training loss: 1.1346 0.4751 sec/batch
Epoch 21/100  Iteration 11856/56600 Training loss: 1.1346 0.4858 sec/batch
Epoch 21/100  Iteration 11857/56600 Training loss: 1.1346 0.4754 sec/batch
Epoch 21/100  Iteration 11858/56600 Training loss: 1.1346 0.4761 sec/batch
Epoch 21/100  Iteration 11859/56600 Training loss: 1.1346 0.4758 sec/batch
Epoch 21/100  Iteration 11860/56600 Training loss: 1.1346 0.4642 sec/batch
Epoch 21/100  Iteration 1

Epoch 22/100  Iteration 11958/56600 Training loss: 1.1264 0.4900 sec/batch
Epoch 22/100  Iteration 11959/56600 Training loss: 1.1259 0.4911 sec/batch
Epoch 22/100  Iteration 11960/56600 Training loss: 1.1265 0.4813 sec/batch
Epoch 22/100  Iteration 11961/56600 Training loss: 1.1265 0.4776 sec/batch
Epoch 22/100  Iteration 11962/56600 Training loss: 1.1259 0.4808 sec/batch
Epoch 22/100  Iteration 11963/56600 Training loss: 1.1258 0.4745 sec/batch
Epoch 22/100  Iteration 11964/56600 Training loss: 1.1259 0.4835 sec/batch
Epoch 22/100  Iteration 11965/56600 Training loss: 1.1255 0.4807 sec/batch
Epoch 22/100  Iteration 11966/56600 Training loss: 1.1255 0.4810 sec/batch
Epoch 22/100  Iteration 11967/56600 Training loss: 1.1253 0.4796 sec/batch
Epoch 22/100  Iteration 11968/56600 Training loss: 1.1255 0.4729 sec/batch
Epoch 22/100  Iteration 11969/56600 Training loss: 1.1258 0.4846 sec/batch
Epoch 22/100  Iteration 11970/56600 Training loss: 1.1260 0.4879 sec/batch
Epoch 22/100  Iteration 1

Epoch 22/100  Iteration 12067/56600 Training loss: 1.1320 0.4780 sec/batch
Epoch 22/100  Iteration 12068/56600 Training loss: 1.1320 0.4686 sec/batch
Epoch 22/100  Iteration 12069/56600 Training loss: 1.1321 0.4881 sec/batch
Epoch 22/100  Iteration 12070/56600 Training loss: 1.1322 0.4743 sec/batch
Epoch 22/100  Iteration 12071/56600 Training loss: 1.1322 0.4684 sec/batch
Epoch 22/100  Iteration 12072/56600 Training loss: 1.1323 0.4724 sec/batch
Epoch 22/100  Iteration 12073/56600 Training loss: 1.1323 0.4754 sec/batch
Epoch 22/100  Iteration 12074/56600 Training loss: 1.1325 0.4736 sec/batch
Epoch 22/100  Iteration 12075/56600 Training loss: 1.1324 0.4751 sec/batch
Epoch 22/100  Iteration 12076/56600 Training loss: 1.1324 0.4756 sec/batch
Epoch 22/100  Iteration 12077/56600 Training loss: 1.1323 0.4737 sec/batch
Epoch 22/100  Iteration 12078/56600 Training loss: 1.1322 0.4678 sec/batch
Epoch 22/100  Iteration 12079/56600 Training loss: 1.1323 0.4856 sec/batch
Epoch 22/100  Iteration 1

Epoch 22/100  Iteration 12177/56600 Training loss: 1.1345 0.4848 sec/batch
Epoch 22/100  Iteration 12178/56600 Training loss: 1.1346 0.4775 sec/batch
Epoch 22/100  Iteration 12179/56600 Training loss: 1.1348 0.4726 sec/batch
Epoch 22/100  Iteration 12180/56600 Training loss: 1.1349 0.4753 sec/batch
Epoch 22/100  Iteration 12181/56600 Training loss: 1.1350 0.4723 sec/batch
Epoch 22/100  Iteration 12182/56600 Training loss: 1.1350 0.4691 sec/batch
Epoch 22/100  Iteration 12183/56600 Training loss: 1.1351 0.4720 sec/batch
Epoch 22/100  Iteration 12184/56600 Training loss: 1.1351 0.4745 sec/batch
Epoch 22/100  Iteration 12185/56600 Training loss: 1.1350 0.4759 sec/batch
Epoch 22/100  Iteration 12186/56600 Training loss: 1.1349 0.4741 sec/batch
Epoch 22/100  Iteration 12187/56600 Training loss: 1.1349 0.4741 sec/batch
Epoch 22/100  Iteration 12188/56600 Training loss: 1.1349 0.4764 sec/batch
Epoch 22/100  Iteration 12189/56600 Training loss: 1.1348 0.4653 sec/batch
Epoch 22/100  Iteration 1

Epoch 22/100  Iteration 12287/56600 Training loss: 1.1319 0.4897 sec/batch
Epoch 22/100  Iteration 12288/56600 Training loss: 1.1319 0.4707 sec/batch
Epoch 22/100  Iteration 12289/56600 Training loss: 1.1319 0.4755 sec/batch
Epoch 22/100  Iteration 12290/56600 Training loss: 1.1318 0.4766 sec/batch
Epoch 22/100  Iteration 12291/56600 Training loss: 1.1318 0.4756 sec/batch
Epoch 22/100  Iteration 12292/56600 Training loss: 1.1318 0.4667 sec/batch
Epoch 22/100  Iteration 12293/56600 Training loss: 1.1318 0.4855 sec/batch
Epoch 22/100  Iteration 12294/56600 Training loss: 1.1318 0.4724 sec/batch
Epoch 22/100  Iteration 12295/56600 Training loss: 1.1317 0.4736 sec/batch
Epoch 22/100  Iteration 12296/56600 Training loss: 1.1316 0.4869 sec/batch
Epoch 22/100  Iteration 12297/56600 Training loss: 1.1315 0.4755 sec/batch
Epoch 22/100  Iteration 12298/56600 Training loss: 1.1315 0.4799 sec/batch
Epoch 22/100  Iteration 12299/56600 Training loss: 1.1315 0.4806 sec/batch
Epoch 22/100  Iteration 1

Epoch 22/100  Iteration 12397/56600 Training loss: 1.1281 0.4893 sec/batch
Epoch 22/100  Iteration 12398/56600 Training loss: 1.1280 0.4758 sec/batch
Epoch 22/100  Iteration 12399/56600 Training loss: 1.1280 0.4856 sec/batch
Epoch 22/100  Iteration 12400/56600 Training loss: 1.1280 0.4872 sec/batch
Epoch 22/100  Iteration 12401/56600 Training loss: 1.1281 0.4740 sec/batch
Epoch 22/100  Iteration 12402/56600 Training loss: 1.1281 0.4874 sec/batch
Epoch 22/100  Iteration 12403/56600 Training loss: 1.1281 0.4741 sec/batch
Epoch 22/100  Iteration 12404/56600 Training loss: 1.1281 0.4731 sec/batch
Epoch 22/100  Iteration 12405/56600 Training loss: 1.1281 0.4755 sec/batch
Epoch 22/100  Iteration 12406/56600 Training loss: 1.1281 0.4700 sec/batch
Epoch 22/100  Iteration 12407/56600 Training loss: 1.1281 0.4698 sec/batch
Epoch 22/100  Iteration 12408/56600 Training loss: 1.1281 0.4719 sec/batch
Epoch 22/100  Iteration 12409/56600 Training loss: 1.1280 0.4744 sec/batch
Epoch 22/100  Iteration 1

Epoch 23/100  Iteration 12507/56600 Training loss: 1.1270 0.4784 sec/batch
Epoch 23/100  Iteration 12508/56600 Training loss: 1.1262 0.4857 sec/batch
Epoch 23/100  Iteration 12509/56600 Training loss: 1.1256 0.4865 sec/batch
Epoch 23/100  Iteration 12510/56600 Training loss: 1.1248 0.4749 sec/batch
Epoch 23/100  Iteration 12511/56600 Training loss: 1.1244 0.4785 sec/batch
Epoch 23/100  Iteration 12512/56600 Training loss: 1.1240 0.4750 sec/batch
Epoch 23/100  Iteration 12513/56600 Training loss: 1.1234 0.4698 sec/batch
Epoch 23/100  Iteration 12514/56600 Training loss: 1.1229 0.4720 sec/batch
Epoch 23/100  Iteration 12515/56600 Training loss: 1.1228 0.4741 sec/batch
Epoch 23/100  Iteration 12516/56600 Training loss: 1.1221 0.4780 sec/batch
Epoch 23/100  Iteration 12517/56600 Training loss: 1.1217 0.4843 sec/batch
Epoch 23/100  Iteration 12518/56600 Training loss: 1.1217 0.4787 sec/batch
Epoch 23/100  Iteration 12519/56600 Training loss: 1.1213 0.4786 sec/batch
Epoch 23/100  Iteration 1

Epoch 23/100  Iteration 12617/56600 Training loss: 1.1251 0.4736 sec/batch
Epoch 23/100  Iteration 12618/56600 Training loss: 1.1251 0.4799 sec/batch
Epoch 23/100  Iteration 12619/56600 Training loss: 1.1251 0.4828 sec/batch
Epoch 23/100  Iteration 12620/56600 Training loss: 1.1253 0.4766 sec/batch
Epoch 23/100  Iteration 12621/56600 Training loss: 1.1254 0.4881 sec/batch
Epoch 23/100  Iteration 12622/56600 Training loss: 1.1253 0.4753 sec/batch
Epoch 23/100  Iteration 12623/56600 Training loss: 1.1253 0.4849 sec/batch
Epoch 23/100  Iteration 12624/56600 Training loss: 1.1252 0.4812 sec/batch
Epoch 23/100  Iteration 12625/56600 Training loss: 1.1251 0.4735 sec/batch
Epoch 23/100  Iteration 12626/56600 Training loss: 1.1251 0.4719 sec/batch
Epoch 23/100  Iteration 12627/56600 Training loss: 1.1250 0.4745 sec/batch
Epoch 23/100  Iteration 12628/56600 Training loss: 1.1249 0.4846 sec/batch
Epoch 23/100  Iteration 12629/56600 Training loss: 1.1250 0.4782 sec/batch
Epoch 23/100  Iteration 1

Epoch 23/100  Iteration 12727/56600 Training loss: 1.1277 0.4907 sec/batch
Epoch 23/100  Iteration 12728/56600 Training loss: 1.1277 0.4750 sec/batch
Epoch 23/100  Iteration 12729/56600 Training loss: 1.1277 0.4733 sec/batch
Epoch 23/100  Iteration 12730/56600 Training loss: 1.1276 0.4835 sec/batch
Epoch 23/100  Iteration 12731/56600 Training loss: 1.1275 0.4758 sec/batch
Epoch 23/100  Iteration 12732/56600 Training loss: 1.1274 0.4754 sec/batch
Epoch 23/100  Iteration 12733/56600 Training loss: 1.1273 0.4765 sec/batch
Epoch 23/100  Iteration 12734/56600 Training loss: 1.1272 0.4667 sec/batch
Epoch 23/100  Iteration 12735/56600 Training loss: 1.1272 0.4686 sec/batch
Epoch 23/100  Iteration 12736/56600 Training loss: 1.1271 0.4730 sec/batch
Epoch 23/100  Iteration 12737/56600 Training loss: 1.1270 0.4889 sec/batch
Epoch 23/100  Iteration 12738/56600 Training loss: 1.1270 0.4848 sec/batch
Epoch 23/100  Iteration 12739/56600 Training loss: 1.1271 0.4758 sec/batch
Epoch 23/100  Iteration 1

Epoch 23/100  Iteration 12837/56600 Training loss: 1.1249 0.4721 sec/batch
Epoch 23/100  Iteration 12838/56600 Training loss: 1.1250 0.4897 sec/batch
Epoch 23/100  Iteration 12839/56600 Training loss: 1.1250 0.4881 sec/batch
Epoch 23/100  Iteration 12840/56600 Training loss: 1.1250 0.4776 sec/batch
Epoch 23/100  Iteration 12841/56600 Training loss: 1.1249 0.4764 sec/batch
Epoch 23/100  Iteration 12842/56600 Training loss: 1.1250 0.4805 sec/batch
Epoch 23/100  Iteration 12843/56600 Training loss: 1.1249 0.4754 sec/batch
Epoch 23/100  Iteration 12844/56600 Training loss: 1.1248 0.4854 sec/batch
Epoch 23/100  Iteration 12845/56600 Training loss: 1.1249 0.4726 sec/batch
Epoch 23/100  Iteration 12846/56600 Training loss: 1.1249 0.4709 sec/batch
Epoch 23/100  Iteration 12847/56600 Training loss: 1.1249 0.4884 sec/batch
Epoch 23/100  Iteration 12848/56600 Training loss: 1.1249 0.4770 sec/batch
Epoch 23/100  Iteration 12849/56600 Training loss: 1.1249 0.4698 sec/batch
Epoch 23/100  Iteration 1

Epoch 23/100  Iteration 12947/56600 Training loss: 1.1218 0.4788 sec/batch
Epoch 23/100  Iteration 12948/56600 Training loss: 1.1218 0.4851 sec/batch
Epoch 23/100  Iteration 12949/56600 Training loss: 1.1218 0.4877 sec/batch
Epoch 23/100  Iteration 12950/56600 Training loss: 1.1218 0.4899 sec/batch
Epoch 23/100  Iteration 12951/56600 Training loss: 1.1217 0.4930 sec/batch
Epoch 23/100  Iteration 12952/56600 Training loss: 1.1216 0.4715 sec/batch
Epoch 23/100  Iteration 12953/56600 Training loss: 1.1216 0.4846 sec/batch
Epoch 23/100  Iteration 12954/56600 Training loss: 1.1215 0.4876 sec/batch
Epoch 23/100  Iteration 12955/56600 Training loss: 1.1215 0.4891 sec/batch
Epoch 23/100  Iteration 12956/56600 Training loss: 1.1214 0.4838 sec/batch
Epoch 23/100  Iteration 12957/56600 Training loss: 1.1214 0.4777 sec/batch
Epoch 23/100  Iteration 12958/56600 Training loss: 1.1213 0.4859 sec/batch
Epoch 23/100  Iteration 12959/56600 Training loss: 1.1212 0.4899 sec/batch
Epoch 23/100  Iteration 1

Epoch 24/100  Iteration 13057/56600 Training loss: 1.1237 0.4899 sec/batch
Epoch 24/100  Iteration 13058/56600 Training loss: 1.1245 0.4749 sec/batch
Epoch 24/100  Iteration 13059/56600 Training loss: 1.1245 0.4853 sec/batch
Epoch 24/100  Iteration 13060/56600 Training loss: 1.1249 0.4879 sec/batch
Epoch 24/100  Iteration 13061/56600 Training loss: 1.1244 0.4734 sec/batch
Epoch 24/100  Iteration 13062/56600 Training loss: 1.1242 0.4805 sec/batch
Epoch 24/100  Iteration 13063/56600 Training loss: 1.1245 0.4769 sec/batch
Epoch 24/100  Iteration 13064/56600 Training loss: 1.1250 0.4698 sec/batch
Epoch 24/100  Iteration 13065/56600 Training loss: 1.1243 0.4722 sec/batch
Epoch 24/100  Iteration 13066/56600 Training loss: 1.1244 0.4901 sec/batch
Epoch 24/100  Iteration 13067/56600 Training loss: 1.1243 0.4878 sec/batch
Epoch 24/100  Iteration 13068/56600 Training loss: 1.1237 0.4799 sec/batch
Epoch 24/100  Iteration 13069/56600 Training loss: 1.1233 0.4800 sec/batch
Epoch 24/100  Iteration 1

Epoch 24/100  Iteration 13167/56600 Training loss: 1.1179 0.4807 sec/batch
Epoch 24/100  Iteration 13168/56600 Training loss: 1.1180 0.4835 sec/batch
Epoch 24/100  Iteration 13169/56600 Training loss: 1.1184 0.4745 sec/batch
Epoch 24/100  Iteration 13170/56600 Training loss: 1.1188 0.4691 sec/batch
Epoch 24/100  Iteration 13171/56600 Training loss: 1.1189 0.4800 sec/batch
Epoch 24/100  Iteration 13172/56600 Training loss: 1.1190 0.4828 sec/batch
Epoch 24/100  Iteration 13173/56600 Training loss: 1.1191 0.4792 sec/batch
Epoch 24/100  Iteration 13174/56600 Training loss: 1.1192 0.4796 sec/batch
Epoch 24/100  Iteration 13175/56600 Training loss: 1.1194 0.4855 sec/batch
Epoch 24/100  Iteration 13176/56600 Training loss: 1.1194 0.4923 sec/batch
Epoch 24/100  Iteration 13177/56600 Training loss: 1.1194 0.4828 sec/batch
Epoch 24/100  Iteration 13178/56600 Training loss: 1.1194 0.4789 sec/batch
Epoch 24/100  Iteration 13179/56600 Training loss: 1.1194 0.4798 sec/batch
Epoch 24/100  Iteration 1

Epoch 24/100  Iteration 13277/56600 Training loss: 1.1227 0.4757 sec/batch
Epoch 24/100  Iteration 13278/56600 Training loss: 1.1226 0.4731 sec/batch
Epoch 24/100  Iteration 13279/56600 Training loss: 1.1225 0.4688 sec/batch
Epoch 24/100  Iteration 13280/56600 Training loss: 1.1224 0.4847 sec/batch
Epoch 24/100  Iteration 13281/56600 Training loss: 1.1223 0.4731 sec/batch
Epoch 24/100  Iteration 13282/56600 Training loss: 1.1222 0.4893 sec/batch
Epoch 24/100  Iteration 13283/56600 Training loss: 1.1222 0.4861 sec/batch
Epoch 24/100  Iteration 13284/56600 Training loss: 1.1221 0.4791 sec/batch
Epoch 24/100  Iteration 13285/56600 Training loss: 1.1220 0.4770 sec/batch
Epoch 24/100  Iteration 13286/56600 Training loss: 1.1218 0.4846 sec/batch
Epoch 24/100  Iteration 13287/56600 Training loss: 1.1217 0.4741 sec/batch
Epoch 24/100  Iteration 13288/56600 Training loss: 1.1217 0.4687 sec/batch
Epoch 24/100  Iteration 13289/56600 Training loss: 1.1217 0.4810 sec/batch
Epoch 24/100  Iteration 1

Epoch 24/100  Iteration 13387/56600 Training loss: 1.1194 0.4722 sec/batch
Epoch 24/100  Iteration 13388/56600 Training loss: 1.1193 0.4737 sec/batch
Epoch 24/100  Iteration 13389/56600 Training loss: 1.1193 0.4879 sec/batch
Epoch 24/100  Iteration 13390/56600 Training loss: 1.1193 0.4740 sec/batch
Epoch 24/100  Iteration 13391/56600 Training loss: 1.1192 0.4763 sec/batch
Epoch 24/100  Iteration 13392/56600 Training loss: 1.1192 0.4794 sec/batch
Epoch 24/100  Iteration 13393/56600 Training loss: 1.1191 0.4747 sec/batch
Epoch 24/100  Iteration 13394/56600 Training loss: 1.1191 0.4849 sec/batch
Epoch 24/100  Iteration 13395/56600 Training loss: 1.1192 0.4722 sec/batch
Epoch 24/100  Iteration 13396/56600 Training loss: 1.1192 0.4749 sec/batch
Epoch 24/100  Iteration 13397/56600 Training loss: 1.1191 0.4900 sec/batch
Epoch 24/100  Iteration 13398/56600 Training loss: 1.1191 0.4770 sec/batch
Epoch 24/100  Iteration 13399/56600 Training loss: 1.1191 0.4749 sec/batch
Epoch 24/100  Iteration 1

Epoch 24/100  Iteration 13497/56600 Training loss: 1.1162 0.4859 sec/batch
Epoch 24/100  Iteration 13498/56600 Training loss: 1.1162 0.4770 sec/batch
Epoch 24/100  Iteration 13499/56600 Training loss: 1.1162 0.4775 sec/batch
Epoch 24/100  Iteration 13500/56600 Training loss: 1.1162 0.4824 sec/batch
Epoch 24/100  Iteration 13501/56600 Training loss: 1.1162 0.4740 sec/batch
Epoch 24/100  Iteration 13502/56600 Training loss: 1.1161 0.4847 sec/batch
Epoch 24/100  Iteration 13503/56600 Training loss: 1.1161 0.4894 sec/batch
Epoch 24/100  Iteration 13504/56600 Training loss: 1.1160 0.4670 sec/batch
Epoch 24/100  Iteration 13505/56600 Training loss: 1.1160 0.4779 sec/batch
Epoch 24/100  Iteration 13506/56600 Training loss: 1.1160 0.4683 sec/batch
Epoch 24/100  Iteration 13507/56600 Training loss: 1.1159 0.4699 sec/batch
Epoch 24/100  Iteration 13508/56600 Training loss: 1.1159 0.4719 sec/batch
Epoch 24/100  Iteration 13509/56600 Training loss: 1.1159 0.4899 sec/batch
Epoch 24/100  Iteration 1

Epoch 25/100  Iteration 13607/56600 Training loss: 1.1181 0.4869 sec/batch
Epoch 25/100  Iteration 13608/56600 Training loss: 1.1177 0.4905 sec/batch
Epoch 25/100  Iteration 13609/56600 Training loss: 1.1172 0.4892 sec/batch
Epoch 25/100  Iteration 13610/56600 Training loss: 1.1156 0.4810 sec/batch
Epoch 25/100  Iteration 13611/56600 Training loss: 1.1157 0.4812 sec/batch
Epoch 25/100  Iteration 13612/56600 Training loss: 1.1155 0.4823 sec/batch
Epoch 25/100  Iteration 13613/56600 Training loss: 1.1147 0.4741 sec/batch
Epoch 25/100  Iteration 13614/56600 Training loss: 1.1142 0.4951 sec/batch
Epoch 25/100  Iteration 13615/56600 Training loss: 1.1134 0.4845 sec/batch
Epoch 25/100  Iteration 13616/56600 Training loss: 1.1129 0.4806 sec/batch
Epoch 25/100  Iteration 13617/56600 Training loss: 1.1129 0.4790 sec/batch
Epoch 25/100  Iteration 13618/56600 Training loss: 1.1144 0.4665 sec/batch
Epoch 25/100  Iteration 13619/56600 Training loss: 1.1156 0.4804 sec/batch
Epoch 25/100  Iteration 1

Epoch 25/100  Iteration 13717/56600 Training loss: 1.1120 0.4846 sec/batch
Epoch 25/100  Iteration 13718/56600 Training loss: 1.1119 0.4730 sec/batch
Epoch 25/100  Iteration 13719/56600 Training loss: 1.1116 0.4735 sec/batch
Epoch 25/100  Iteration 13720/56600 Training loss: 1.1115 0.4915 sec/batch
Epoch 25/100  Iteration 13721/56600 Training loss: 1.1116 0.4855 sec/batch
Epoch 25/100  Iteration 13722/56600 Training loss: 1.1119 0.4808 sec/batch
Epoch 25/100  Iteration 13723/56600 Training loss: 1.1121 0.4767 sec/batch
Epoch 25/100  Iteration 13724/56600 Training loss: 1.1120 0.4662 sec/batch
Epoch 25/100  Iteration 13725/56600 Training loss: 1.1121 0.4698 sec/batch
Epoch 25/100  Iteration 13726/56600 Training loss: 1.1123 0.4722 sec/batch
Epoch 25/100  Iteration 13727/56600 Training loss: 1.1124 0.4901 sec/batch
Epoch 25/100  Iteration 13728/56600 Training loss: 1.1123 0.4860 sec/batch
Epoch 25/100  Iteration 13729/56600 Training loss: 1.1124 0.4784 sec/batch
Epoch 25/100  Iteration 1

Epoch 25/100  Iteration 13827/56600 Training loss: 1.1172 0.4915 sec/batch
Epoch 25/100  Iteration 13828/56600 Training loss: 1.1174 0.4799 sec/batch
Epoch 25/100  Iteration 13829/56600 Training loss: 1.1176 0.4770 sec/batch
Epoch 25/100  Iteration 13830/56600 Training loss: 1.1177 0.4792 sec/batch
Epoch 25/100  Iteration 13831/56600 Training loss: 1.1178 0.4847 sec/batch
Epoch 25/100  Iteration 13832/56600 Training loss: 1.1177 0.4880 sec/batch
Epoch 25/100  Iteration 13833/56600 Training loss: 1.1178 0.4883 sec/batch
Epoch 25/100  Iteration 13834/56600 Training loss: 1.1178 0.4881 sec/batch
Epoch 25/100  Iteration 13835/56600 Training loss: 1.1177 0.4787 sec/batch
Epoch 25/100  Iteration 13836/56600 Training loss: 1.1175 0.4845 sec/batch
Epoch 25/100  Iteration 13837/56600 Training loss: 1.1175 0.4923 sec/batch
Epoch 25/100  Iteration 13838/56600 Training loss: 1.1175 0.4787 sec/batch
Epoch 25/100  Iteration 13839/56600 Training loss: 1.1175 0.4885 sec/batch
Epoch 25/100  Iteration 1

Epoch 25/100  Iteration 13937/56600 Training loss: 1.1144 0.4882 sec/batch
Epoch 25/100  Iteration 13938/56600 Training loss: 1.1144 0.4722 sec/batch
Epoch 25/100  Iteration 13939/56600 Training loss: 1.1143 0.4845 sec/batch
Epoch 25/100  Iteration 13940/56600 Training loss: 1.1144 0.4889 sec/batch
Epoch 25/100  Iteration 13941/56600 Training loss: 1.1145 0.4696 sec/batch
Epoch 25/100  Iteration 13942/56600 Training loss: 1.1145 0.4821 sec/batch
Epoch 25/100  Iteration 13943/56600 Training loss: 1.1145 0.4799 sec/batch
Epoch 25/100  Iteration 13944/56600 Training loss: 1.1145 0.4853 sec/batch
Epoch 25/100  Iteration 13945/56600 Training loss: 1.1145 0.4875 sec/batch
Epoch 25/100  Iteration 13946/56600 Training loss: 1.1145 0.4710 sec/batch
Epoch 25/100  Iteration 13947/56600 Training loss: 1.1145 0.4826 sec/batch
Epoch 25/100  Iteration 13948/56600 Training loss: 1.1144 0.4787 sec/batch
Epoch 25/100  Iteration 13949/56600 Training loss: 1.1144 0.4834 sec/batch
Epoch 25/100  Iteration 1

Epoch 25/100  Iteration 14046/56600 Training loss: 1.1115 0.4692 sec/batch
Epoch 25/100  Iteration 14047/56600 Training loss: 1.1115 0.4757 sec/batch
Epoch 25/100  Iteration 14048/56600 Training loss: 1.1114 0.4770 sec/batch
Epoch 25/100  Iteration 14049/56600 Training loss: 1.1114 0.4766 sec/batch
Epoch 25/100  Iteration 14050/56600 Training loss: 1.1114 0.4661 sec/batch
Epoch 25/100  Iteration 14051/56600 Training loss: 1.1115 0.4689 sec/batch
Epoch 25/100  Iteration 14052/56600 Training loss: 1.1115 0.4885 sec/batch
Epoch 25/100  Iteration 14053/56600 Training loss: 1.1115 0.4742 sec/batch
Epoch 25/100  Iteration 14054/56600 Training loss: 1.1116 0.4895 sec/batch
Epoch 25/100  Iteration 14055/56600 Training loss: 1.1117 0.4770 sec/batch
Epoch 25/100  Iteration 14056/56600 Training loss: 1.1118 0.4780 sec/batch
Epoch 25/100  Iteration 14057/56600 Training loss: 1.1118 0.4855 sec/batch
Epoch 25/100  Iteration 14058/56600 Training loss: 1.1117 0.4744 sec/batch
Epoch 25/100  Iteration 1

Epoch 26/100  Iteration 14156/56600 Training loss: 1.1402 0.4727 sec/batch
Epoch 26/100  Iteration 14157/56600 Training loss: 1.1353 0.4713 sec/batch
Epoch 26/100  Iteration 14158/56600 Training loss: 1.1305 0.4700 sec/batch
Epoch 26/100  Iteration 14159/56600 Training loss: 1.1280 0.4720 sec/batch
Epoch 26/100  Iteration 14160/56600 Training loss: 1.1276 0.4735 sec/batch
Epoch 26/100  Iteration 14161/56600 Training loss: 1.1274 0.4766 sec/batch
Epoch 26/100  Iteration 14162/56600 Training loss: 1.1279 0.4763 sec/batch
Epoch 26/100  Iteration 14163/56600 Training loss: 1.1278 0.4748 sec/batch
Epoch 26/100  Iteration 14164/56600 Training loss: 1.1256 0.4794 sec/batch
Epoch 26/100  Iteration 14165/56600 Training loss: 1.1238 0.4730 sec/batch
Epoch 26/100  Iteration 14166/56600 Training loss: 1.1228 0.4849 sec/batch
Epoch 26/100  Iteration 14167/56600 Training loss: 1.1217 0.4723 sec/batch
Epoch 26/100  Iteration 14168/56600 Training loss: 1.1201 0.4755 sec/batch
Epoch 26/100  Iteration 1

Epoch 26/100  Iteration 14266/56600 Training loss: 1.1035 0.4690 sec/batch
Epoch 26/100  Iteration 14267/56600 Training loss: 1.1040 0.4730 sec/batch
Epoch 26/100  Iteration 14268/56600 Training loss: 1.1043 0.4737 sec/batch
Epoch 26/100  Iteration 14269/56600 Training loss: 1.1044 0.4785 sec/batch
Epoch 26/100  Iteration 14270/56600 Training loss: 1.1049 0.4749 sec/batch
Epoch 26/100  Iteration 14271/56600 Training loss: 1.1055 0.4753 sec/batch
Epoch 26/100  Iteration 14272/56600 Training loss: 1.1060 0.4803 sec/batch
Epoch 26/100  Iteration 14273/56600 Training loss: 1.1059 0.4601 sec/batch
Epoch 26/100  Iteration 14274/56600 Training loss: 1.1058 0.4698 sec/batch
Epoch 26/100  Iteration 14275/56600 Training loss: 1.1061 0.4721 sec/batch
Epoch 26/100  Iteration 14276/56600 Training loss: 1.1062 0.4745 sec/batch
Epoch 26/100  Iteration 14277/56600 Training loss: 1.1063 0.4862 sec/batch
Epoch 26/100  Iteration 14278/56600 Training loss: 1.1063 0.4740 sec/batch
Epoch 26/100  Iteration 1

Epoch 26/100  Iteration 14376/56600 Training loss: 1.1098 0.4902 sec/batch
Epoch 26/100  Iteration 14377/56600 Training loss: 1.1099 0.4760 sec/batch
Epoch 26/100  Iteration 14378/56600 Training loss: 1.1098 0.4689 sec/batch
Epoch 26/100  Iteration 14379/56600 Training loss: 1.1099 0.4721 sec/batch
Epoch 26/100  Iteration 14380/56600 Training loss: 1.1101 0.4895 sec/batch
Epoch 26/100  Iteration 14381/56600 Training loss: 1.1102 0.4875 sec/batch
Epoch 26/100  Iteration 14382/56600 Training loss: 1.1103 0.4775 sec/batch
Epoch 26/100  Iteration 14383/56600 Training loss: 1.1106 0.4778 sec/batch
Epoch 26/100  Iteration 14384/56600 Training loss: 1.1106 0.4780 sec/batch
Epoch 26/100  Iteration 14385/56600 Training loss: 1.1107 0.4747 sec/batch
Epoch 26/100  Iteration 14386/56600 Training loss: 1.1108 0.4847 sec/batch
Epoch 26/100  Iteration 14387/56600 Training loss: 1.1108 0.4725 sec/batch
Epoch 26/100  Iteration 14388/56600 Training loss: 1.1110 0.4734 sec/batch
Epoch 26/100  Iteration 1

Epoch 26/100  Iteration 14486/56600 Training loss: 1.1098 0.4784 sec/batch
Epoch 26/100  Iteration 14487/56600 Training loss: 1.1098 0.4780 sec/batch
Epoch 26/100  Iteration 14488/56600 Training loss: 1.1098 0.4795 sec/batch
Epoch 26/100  Iteration 14489/56600 Training loss: 1.1098 0.4669 sec/batch
Epoch 26/100  Iteration 14490/56600 Training loss: 1.1097 0.4698 sec/batch
Epoch 26/100  Iteration 14491/56600 Training loss: 1.1096 0.4883 sec/batch
Epoch 26/100  Iteration 14492/56600 Training loss: 1.1096 0.4894 sec/batch
Epoch 26/100  Iteration 14493/56600 Training loss: 1.1094 0.4869 sec/batch
Epoch 26/100  Iteration 14494/56600 Training loss: 1.1095 0.4782 sec/batch
Epoch 26/100  Iteration 14495/56600 Training loss: 1.1095 0.4844 sec/batch
Epoch 26/100  Iteration 14496/56600 Training loss: 1.1095 0.4728 sec/batch
Epoch 26/100  Iteration 14497/56600 Training loss: 1.1094 0.4735 sec/batch
Epoch 26/100  Iteration 14498/56600 Training loss: 1.1094 0.4811 sec/batch
Epoch 26/100  Iteration 1

Epoch 26/100  Iteration 14596/56600 Training loss: 1.1061 0.4662 sec/batch
Epoch 26/100  Iteration 14597/56600 Training loss: 1.1061 0.4999 sec/batch
Epoch 26/100  Iteration 14598/56600 Training loss: 1.1060 0.4731 sec/batch
Epoch 26/100  Iteration 14599/56600 Training loss: 1.1060 0.4692 sec/batch
Epoch 26/100  Iteration 14600/56600 Training loss: 1.1060 0.4717 sec/batch
Epoch 26/100  Iteration 14601/56600 Training loss: 1.1060 0.4900 sec/batch
Epoch 26/100  Iteration 14602/56600 Training loss: 1.1061 0.4825 sec/batch
Epoch 26/100  Iteration 14603/56600 Training loss: 1.1060 0.4765 sec/batch
Epoch 26/100  Iteration 14604/56600 Training loss: 1.1060 0.4761 sec/batch
Epoch 26/100  Iteration 14605/56600 Training loss: 1.1060 0.4787 sec/batch
Epoch 26/100  Iteration 14606/56600 Training loss: 1.1059 0.4795 sec/batch
Epoch 26/100  Iteration 14607/56600 Training loss: 1.1059 0.4906 sec/batch
Epoch 26/100  Iteration 14608/56600 Training loss: 1.1058 0.4747 sec/batch
Epoch 26/100  Iteration 1

Epoch 26/100  Iteration 14706/56600 Training loss: 1.1045 0.4841 sec/batch
Epoch 26/100  Iteration 14707/56600 Training loss: 1.1045 0.4743 sec/batch
Epoch 26/100  Iteration 14708/56600 Training loss: 1.1046 0.4777 sec/batch
Epoch 26/100  Iteration 14709/56600 Training loss: 1.1047 0.4771 sec/batch
Epoch 26/100  Iteration 14710/56600 Training loss: 1.1047 0.4641 sec/batch
Epoch 26/100  Iteration 14711/56600 Training loss: 1.1047 0.4716 sec/batch
Epoch 26/100  Iteration 14712/56600 Training loss: 1.1047 0.5063 sec/batch
Epoch 26/100  Iteration 14713/56600 Training loss: 1.1047 0.4793 sec/batch
Epoch 26/100  Iteration 14714/56600 Training loss: 1.1047 0.4792 sec/batch
Epoch 26/100  Iteration 14715/56600 Training loss: 1.1048 0.4744 sec/batch
Epoch 26/100  Iteration 14716/56600 Training loss: 1.1049 0.4762 sec/batch
Epoch 27/100  Iteration 14717/56600 Training loss: 1.2125 0.4776 sec/batch
Epoch 27/100  Iteration 14718/56600 Training loss: 1.1745 0.4694 sec/batch
Epoch 27/100  Iteration 1

Epoch 27/100  Iteration 14816/56600 Training loss: 1.0968 0.4740 sec/batch
Epoch 27/100  Iteration 14817/56600 Training loss: 1.0969 0.4822 sec/batch
Epoch 27/100  Iteration 14818/56600 Training loss: 1.0974 0.4799 sec/batch
Epoch 27/100  Iteration 14819/56600 Training loss: 1.0973 0.4707 sec/batch
Epoch 27/100  Iteration 14820/56600 Training loss: 1.0975 0.4727 sec/batch
Epoch 27/100  Iteration 14821/56600 Training loss: 1.0976 0.4825 sec/batch
Epoch 27/100  Iteration 14822/56600 Training loss: 1.0976 0.4791 sec/batch
Epoch 27/100  Iteration 14823/56600 Training loss: 1.0972 0.4786 sec/batch
Epoch 27/100  Iteration 14824/56600 Training loss: 1.0973 0.4785 sec/batch
Epoch 27/100  Iteration 14825/56600 Training loss: 1.0974 0.4758 sec/batch
Epoch 27/100  Iteration 14826/56600 Training loss: 1.0976 0.4712 sec/batch
Epoch 27/100  Iteration 14827/56600 Training loss: 1.0976 0.4854 sec/batch
Epoch 27/100  Iteration 14828/56600 Training loss: 1.0974 0.4845 sec/batch
Epoch 27/100  Iteration 1

Epoch 27/100  Iteration 14926/56600 Training loss: 1.1045 0.4886 sec/batch
Epoch 27/100  Iteration 14927/56600 Training loss: 1.1044 0.4887 sec/batch
Epoch 27/100  Iteration 14928/56600 Training loss: 1.1044 0.4802 sec/batch
Epoch 27/100  Iteration 14929/56600 Training loss: 1.1042 0.4786 sec/batch
Epoch 27/100  Iteration 14930/56600 Training loss: 1.1040 0.4874 sec/batch
Epoch 27/100  Iteration 14931/56600 Training loss: 1.1040 0.4866 sec/batch
Epoch 27/100  Iteration 14932/56600 Training loss: 1.1040 0.4741 sec/batch
Epoch 27/100  Iteration 14933/56600 Training loss: 1.1039 0.4800 sec/batch
Epoch 27/100  Iteration 14934/56600 Training loss: 1.1039 0.4801 sec/batch
Epoch 27/100  Iteration 14935/56600 Training loss: 1.1041 0.4791 sec/batch
Epoch 27/100  Iteration 14936/56600 Training loss: 1.1042 0.4886 sec/batch
Epoch 27/100  Iteration 14937/56600 Training loss: 1.1043 0.4809 sec/batch
Epoch 27/100  Iteration 14938/56600 Training loss: 1.1043 0.4795 sec/batch
Epoch 27/100  Iteration 1

Epoch 27/100  Iteration 15036/56600 Training loss: 1.1045 0.4801 sec/batch
Epoch 27/100  Iteration 15037/56600 Training loss: 1.1045 0.4738 sec/batch
Epoch 27/100  Iteration 15038/56600 Training loss: 1.1044 0.4768 sec/batch
Epoch 27/100  Iteration 15039/56600 Training loss: 1.1043 0.4755 sec/batch
Epoch 27/100  Iteration 15040/56600 Training loss: 1.1042 0.4628 sec/batch
Epoch 27/100  Iteration 15041/56600 Training loss: 1.1042 0.4719 sec/batch
Epoch 27/100  Iteration 15042/56600 Training loss: 1.1042 0.4737 sec/batch
Epoch 27/100  Iteration 15043/56600 Training loss: 1.1040 0.4752 sec/batch
Epoch 27/100  Iteration 15044/56600 Training loss: 1.1040 0.4750 sec/batch
Epoch 27/100  Iteration 15045/56600 Training loss: 1.1041 0.4752 sec/batch
Epoch 27/100  Iteration 15046/56600 Training loss: 1.1040 0.4795 sec/batch
Epoch 27/100  Iteration 15047/56600 Training loss: 1.1040 0.4613 sec/batch
Epoch 27/100  Iteration 15048/56600 Training loss: 1.1041 0.4798 sec/batch
Epoch 27/100  Iteration 1

Epoch 27/100  Iteration 15146/56600 Training loss: 1.1018 0.4791 sec/batch
Epoch 27/100  Iteration 15147/56600 Training loss: 1.1018 0.4926 sec/batch
Epoch 27/100  Iteration 15148/56600 Training loss: 1.1017 0.4796 sec/batch
Epoch 27/100  Iteration 15149/56600 Training loss: 1.1016 0.4802 sec/batch
Epoch 27/100  Iteration 15150/56600 Training loss: 1.1015 0.4799 sec/batch
Epoch 27/100  Iteration 15151/56600 Training loss: 1.1015 0.4744 sec/batch
Epoch 27/100  Iteration 15152/56600 Training loss: 1.1014 0.4790 sec/batch
Epoch 27/100  Iteration 15153/56600 Training loss: 1.1014 0.4774 sec/batch
Epoch 27/100  Iteration 15154/56600 Training loss: 1.1013 0.4787 sec/batch
Epoch 27/100  Iteration 15155/56600 Training loss: 1.1011 0.4803 sec/batch
Epoch 27/100  Iteration 15156/56600 Training loss: 1.1010 0.4694 sec/batch
Epoch 27/100  Iteration 15157/56600 Training loss: 1.1010 0.4879 sec/batch
Epoch 27/100  Iteration 15158/56600 Training loss: 1.1009 0.4906 sec/batch
Epoch 27/100  Iteration 1

Epoch 27/100  Iteration 15256/56600 Training loss: 1.0993 0.4862 sec/batch
Epoch 27/100  Iteration 15257/56600 Training loss: 1.0992 0.4670 sec/batch
Epoch 27/100  Iteration 15258/56600 Training loss: 1.0992 0.4658 sec/batch
Epoch 27/100  Iteration 15259/56600 Training loss: 1.0992 0.4788 sec/batch
Epoch 27/100  Iteration 15260/56600 Training loss: 1.0992 0.4761 sec/batch
Epoch 27/100  Iteration 15261/56600 Training loss: 1.0991 0.4823 sec/batch
Epoch 27/100  Iteration 15262/56600 Training loss: 1.0992 0.4740 sec/batch
Epoch 27/100  Iteration 15263/56600 Training loss: 1.0992 0.4629 sec/batch
Epoch 27/100  Iteration 15264/56600 Training loss: 1.0992 0.4759 sec/batch
Epoch 27/100  Iteration 15265/56600 Training loss: 1.0992 0.4781 sec/batch
Epoch 27/100  Iteration 15266/56600 Training loss: 1.0992 0.4762 sec/batch
Epoch 27/100  Iteration 15267/56600 Training loss: 1.0991 0.4772 sec/batch
Epoch 27/100  Iteration 15268/56600 Training loss: 1.0991 0.4753 sec/batch
Epoch 27/100  Iteration 1

Epoch 28/100  Iteration 15366/56600 Training loss: 1.0917 0.4880 sec/batch
Epoch 28/100  Iteration 15367/56600 Training loss: 1.0916 0.4742 sec/batch
Epoch 28/100  Iteration 15368/56600 Training loss: 1.0915 0.4777 sec/batch
Epoch 28/100  Iteration 15369/56600 Training loss: 1.0912 0.4765 sec/batch
Epoch 28/100  Iteration 15370/56600 Training loss: 1.0908 0.4777 sec/batch
Epoch 28/100  Iteration 15371/56600 Training loss: 1.0909 0.4774 sec/batch
Epoch 28/100  Iteration 15372/56600 Training loss: 1.0911 0.4783 sec/batch
Epoch 28/100  Iteration 15373/56600 Training loss: 1.0912 0.4741 sec/batch
Epoch 28/100  Iteration 15374/56600 Training loss: 1.0911 0.4830 sec/batch
Epoch 28/100  Iteration 15375/56600 Training loss: 1.0908 0.4718 sec/batch
Epoch 28/100  Iteration 15376/56600 Training loss: 1.0907 0.4857 sec/batch
Epoch 28/100  Iteration 15377/56600 Training loss: 1.0905 0.4769 sec/batch
Epoch 28/100  Iteration 15378/56600 Training loss: 1.0904 0.4749 sec/batch
Epoch 28/100  Iteration 1

Epoch 28/100  Iteration 15476/56600 Training loss: 1.0981 0.4877 sec/batch
Epoch 28/100  Iteration 15477/56600 Training loss: 1.0982 0.4837 sec/batch
Epoch 28/100  Iteration 15478/56600 Training loss: 1.0983 0.4800 sec/batch
Epoch 28/100  Iteration 15479/56600 Training loss: 1.0984 0.4789 sec/batch
Epoch 28/100  Iteration 15480/56600 Training loss: 1.0985 0.4762 sec/batch
Epoch 28/100  Iteration 15481/56600 Training loss: 1.0986 0.4860 sec/batch
Epoch 28/100  Iteration 15482/56600 Training loss: 1.0986 0.4889 sec/batch
Epoch 28/100  Iteration 15483/56600 Training loss: 1.0986 0.4654 sec/batch
Epoch 28/100  Iteration 15484/56600 Training loss: 1.0988 0.4797 sec/batch
Epoch 28/100  Iteration 15485/56600 Training loss: 1.0989 0.4802 sec/batch
Epoch 28/100  Iteration 15486/56600 Training loss: 1.0989 0.4837 sec/batch
Epoch 28/100  Iteration 15487/56600 Training loss: 1.0991 0.4896 sec/batch
Epoch 28/100  Iteration 15488/56600 Training loss: 1.0993 0.4884 sec/batch
Epoch 28/100  Iteration 1

Epoch 28/100  Iteration 15586/56600 Training loss: 1.1007 0.4637 sec/batch
Epoch 28/100  Iteration 15587/56600 Training loss: 1.1006 0.4730 sec/batch
Epoch 28/100  Iteration 15588/56600 Training loss: 1.1006 0.4733 sec/batch
Epoch 28/100  Iteration 15589/56600 Training loss: 1.1004 0.4865 sec/batch
Epoch 28/100  Iteration 15590/56600 Training loss: 1.1003 0.4759 sec/batch
Epoch 28/100  Iteration 15591/56600 Training loss: 1.1002 0.4767 sec/batch
Epoch 28/100  Iteration 15592/56600 Training loss: 1.1002 0.4760 sec/batch
Epoch 28/100  Iteration 15593/56600 Training loss: 1.1001 0.4639 sec/batch
Epoch 28/100  Iteration 15594/56600 Training loss: 1.1000 0.4691 sec/batch
Epoch 28/100  Iteration 15595/56600 Training loss: 1.1000 0.4725 sec/batch
Epoch 28/100  Iteration 15596/56600 Training loss: 1.0999 0.4740 sec/batch
Epoch 28/100  Iteration 15597/56600 Training loss: 1.0998 0.4746 sec/batch
Epoch 28/100  Iteration 15598/56600 Training loss: 1.0998 0.4759 sec/batch
Epoch 28/100  Iteration 1

Epoch 28/100  Iteration 15696/56600 Training loss: 1.0974 0.4826 sec/batch
Epoch 28/100  Iteration 15697/56600 Training loss: 1.0973 0.4880 sec/batch
Epoch 28/100  Iteration 15698/56600 Training loss: 1.0973 0.4820 sec/batch
Epoch 28/100  Iteration 15699/56600 Training loss: 1.0973 0.4859 sec/batch
Epoch 28/100  Iteration 15700/56600 Training loss: 1.0972 0.4936 sec/batch
Epoch 28/100  Iteration 15701/56600 Training loss: 1.0973 0.4824 sec/batch
Epoch 28/100  Iteration 15702/56600 Training loss: 1.0973 0.4828 sec/batch
Epoch 28/100  Iteration 15703/56600 Training loss: 1.0972 0.4820 sec/batch
Epoch 28/100  Iteration 15704/56600 Training loss: 1.0972 0.4931 sec/batch
Epoch 28/100  Iteration 15705/56600 Training loss: 1.0972 0.4796 sec/batch
Epoch 28/100  Iteration 15706/56600 Training loss: 1.0973 0.4798 sec/batch
Epoch 28/100  Iteration 15707/56600 Training loss: 1.0972 0.4836 sec/batch
Epoch 28/100  Iteration 15708/56600 Training loss: 1.0971 0.4812 sec/batch
Epoch 28/100  Iteration 1

Epoch 28/100  Iteration 15806/56600 Training loss: 1.0943 0.4725 sec/batch
Epoch 28/100  Iteration 15807/56600 Training loss: 1.0943 0.4822 sec/batch
Epoch 28/100  Iteration 15808/56600 Training loss: 1.0943 0.4868 sec/batch
Epoch 28/100  Iteration 15809/56600 Training loss: 1.0943 0.4768 sec/batch
Epoch 28/100  Iteration 15810/56600 Training loss: 1.0943 0.4738 sec/batch
Epoch 28/100  Iteration 15811/56600 Training loss: 1.0943 0.4890 sec/batch
Epoch 28/100  Iteration 15812/56600 Training loss: 1.0943 0.4791 sec/batch
Epoch 28/100  Iteration 15813/56600 Training loss: 1.0943 0.4620 sec/batch
Epoch 28/100  Iteration 15814/56600 Training loss: 1.0942 0.4663 sec/batch
Epoch 28/100  Iteration 15815/56600 Training loss: 1.0942 0.4889 sec/batch
Epoch 28/100  Iteration 15816/56600 Training loss: 1.0942 0.4741 sec/batch
Epoch 28/100  Iteration 15817/56600 Training loss: 1.0941 0.4748 sec/batch
Epoch 28/100  Iteration 15818/56600 Training loss: 1.0942 0.4752 sec/batch
Epoch 28/100  Iteration 1

Epoch 29/100  Iteration 15916/56600 Training loss: 1.0902 0.4741 sec/batch
Epoch 29/100  Iteration 15917/56600 Training loss: 1.0905 0.4752 sec/batch
Epoch 29/100  Iteration 15918/56600 Training loss: 1.0901 0.4782 sec/batch
Epoch 29/100  Iteration 15919/56600 Training loss: 1.0900 0.4627 sec/batch
Epoch 29/100  Iteration 15920/56600 Training loss: 1.0895 0.4721 sec/batch
Epoch 29/100  Iteration 15921/56600 Training loss: 1.0890 0.4822 sec/batch
Epoch 29/100  Iteration 15922/56600 Training loss: 1.0895 0.4853 sec/batch
Epoch 29/100  Iteration 15923/56600 Training loss: 1.0897 0.4771 sec/batch
Epoch 29/100  Iteration 15924/56600 Training loss: 1.0891 0.4776 sec/batch
Epoch 29/100  Iteration 15925/56600 Training loss: 1.0888 0.4738 sec/batch
Epoch 29/100  Iteration 15926/56600 Training loss: 1.0889 0.4786 sec/batch
Epoch 29/100  Iteration 15927/56600 Training loss: 1.0885 0.4723 sec/batch
Epoch 29/100  Iteration 15928/56600 Training loss: 1.0883 0.4775 sec/batch
Epoch 29/100  Iteration 1

Epoch 29/100  Iteration 16025/56600 Training loss: 1.0953 0.4834 sec/batch
Epoch 29/100  Iteration 16026/56600 Training loss: 1.0953 0.4731 sec/batch
Epoch 29/100  Iteration 16027/56600 Training loss: 1.0952 0.4659 sec/batch
Epoch 29/100  Iteration 16028/56600 Training loss: 1.0952 0.4872 sec/batch
Epoch 29/100  Iteration 16029/56600 Training loss: 1.0951 0.4663 sec/batch
Epoch 29/100  Iteration 16030/56600 Training loss: 1.0952 0.4646 sec/batch
Epoch 29/100  Iteration 16031/56600 Training loss: 1.0953 0.4895 sec/batch
Epoch 29/100  Iteration 16032/56600 Training loss: 1.0954 0.4765 sec/batch
Epoch 29/100  Iteration 16033/56600 Training loss: 1.0954 0.4754 sec/batch
Epoch 29/100  Iteration 16034/56600 Training loss: 1.0954 0.4776 sec/batch
Epoch 29/100  Iteration 16035/56600 Training loss: 1.0955 0.4720 sec/batch
Epoch 29/100  Iteration 16036/56600 Training loss: 1.0957 0.4726 sec/batch
Epoch 29/100  Iteration 16037/56600 Training loss: 1.0955 0.4661 sec/batch
Epoch 29/100  Iteration 1

Epoch 29/100  Iteration 16135/56600 Training loss: 1.0968 0.4737 sec/batch
Epoch 29/100  Iteration 16136/56600 Training loss: 1.0969 0.4800 sec/batch
Epoch 29/100  Iteration 16137/56600 Training loss: 1.0968 0.4731 sec/batch
Epoch 29/100  Iteration 16138/56600 Training loss: 1.0969 0.4821 sec/batch
Epoch 29/100  Iteration 16139/56600 Training loss: 1.0970 0.4778 sec/batch
Epoch 29/100  Iteration 16140/56600 Training loss: 1.0971 0.4920 sec/batch
Epoch 29/100  Iteration 16141/56600 Training loss: 1.0973 0.4718 sec/batch
Epoch 29/100  Iteration 16142/56600 Training loss: 1.0973 0.4865 sec/batch
Epoch 29/100  Iteration 16143/56600 Training loss: 1.0975 0.4772 sec/batch
Epoch 29/100  Iteration 16144/56600 Training loss: 1.0975 0.4742 sec/batch
Epoch 29/100  Iteration 16145/56600 Training loss: 1.0976 0.4721 sec/batch
Epoch 29/100  Iteration 16146/56600 Training loss: 1.0976 0.4893 sec/batch
Epoch 29/100  Iteration 16147/56600 Training loss: 1.0975 0.4768 sec/batch
Epoch 29/100  Iteration 1

Epoch 29/100  Iteration 16245/56600 Training loss: 1.0944 0.4747 sec/batch
Epoch 29/100  Iteration 16246/56600 Training loss: 1.0943 0.4743 sec/batch
Epoch 29/100  Iteration 16247/56600 Training loss: 1.0944 0.4819 sec/batch
Epoch 29/100  Iteration 16248/56600 Training loss: 1.0944 0.4772 sec/batch
Epoch 29/100  Iteration 16249/56600 Training loss: 1.0943 0.4886 sec/batch
Epoch 29/100  Iteration 16250/56600 Training loss: 1.0943 0.4594 sec/batch
Epoch 29/100  Iteration 16251/56600 Training loss: 1.0943 0.4788 sec/batch
Epoch 29/100  Iteration 16252/56600 Training loss: 1.0942 0.4754 sec/batch
Epoch 29/100  Iteration 16253/56600 Training loss: 1.0942 0.4674 sec/batch
Epoch 29/100  Iteration 16254/56600 Training loss: 1.0942 0.4868 sec/batch
Epoch 29/100  Iteration 16255/56600 Training loss: 1.0942 0.4804 sec/batch
Epoch 29/100  Iteration 16256/56600 Training loss: 1.0942 0.4851 sec/batch
Epoch 29/100  Iteration 16257/56600 Training loss: 1.0941 0.4785 sec/batch
Epoch 29/100  Iteration 1

Epoch 29/100  Iteration 16355/56600 Training loss: 1.0909 0.4772 sec/batch
Epoch 29/100  Iteration 16356/56600 Training loss: 1.0908 0.4721 sec/batch
Epoch 29/100  Iteration 16357/56600 Training loss: 1.0908 0.4757 sec/batch
Epoch 29/100  Iteration 16358/56600 Training loss: 1.0908 0.4866 sec/batch
Epoch 29/100  Iteration 16359/56600 Training loss: 1.0908 0.4770 sec/batch
Epoch 29/100  Iteration 16360/56600 Training loss: 1.0907 0.4737 sec/batch
Epoch 29/100  Iteration 16361/56600 Training loss: 1.0907 0.4745 sec/batch
Epoch 29/100  Iteration 16362/56600 Training loss: 1.0908 0.4704 sec/batch
Epoch 29/100  Iteration 16363/56600 Training loss: 1.0908 0.4874 sec/batch
Epoch 29/100  Iteration 16364/56600 Training loss: 1.0908 0.4816 sec/batch
Epoch 29/100  Iteration 16365/56600 Training loss: 1.0908 0.4763 sec/batch
Epoch 29/100  Iteration 16366/56600 Training loss: 1.0908 0.4760 sec/batch
Epoch 29/100  Iteration 16367/56600 Training loss: 1.0908 0.4772 sec/batch
Epoch 29/100  Iteration 1

Epoch 30/100  Iteration 16465/56600 Training loss: 1.0933 0.4747 sec/batch
Epoch 30/100  Iteration 16466/56600 Training loss: 1.0926 0.4759 sec/batch
Epoch 30/100  Iteration 16467/56600 Training loss: 1.0923 0.4734 sec/batch
Epoch 30/100  Iteration 16468/56600 Training loss: 1.0917 0.4720 sec/batch
Epoch 30/100  Iteration 16469/56600 Training loss: 1.0910 0.4742 sec/batch
Epoch 30/100  Iteration 16470/56600 Training loss: 1.0905 0.4869 sec/batch
Epoch 30/100  Iteration 16471/56600 Training loss: 1.0899 0.4803 sec/batch
Epoch 30/100  Iteration 16472/56600 Training loss: 1.0892 0.4768 sec/batch
Epoch 30/100  Iteration 16473/56600 Training loss: 1.0887 0.4755 sec/batch
Epoch 30/100  Iteration 16474/56600 Training loss: 1.0883 0.4685 sec/batch
Epoch 30/100  Iteration 16475/56600 Training loss: 1.0876 0.4767 sec/batch
Epoch 30/100  Iteration 16476/56600 Training loss: 1.0870 0.4738 sec/batch
Epoch 30/100  Iteration 16477/56600 Training loss: 1.0868 0.4764 sec/batch
Epoch 30/100  Iteration 1

Epoch 30/100  Iteration 16575/56600 Training loss: 1.0904 0.4960 sec/batch
Epoch 30/100  Iteration 16576/56600 Training loss: 1.0904 0.4787 sec/batch
Epoch 30/100  Iteration 16577/56600 Training loss: 1.0903 0.4685 sec/batch
Epoch 30/100  Iteration 16578/56600 Training loss: 1.0902 0.4672 sec/batch
Epoch 30/100  Iteration 16579/56600 Training loss: 1.0901 0.4800 sec/batch
Epoch 30/100  Iteration 16580/56600 Training loss: 1.0901 0.4911 sec/batch
Epoch 30/100  Iteration 16581/56600 Training loss: 1.0903 0.4810 sec/batch
Epoch 30/100  Iteration 16582/56600 Training loss: 1.0904 0.4821 sec/batch
Epoch 30/100  Iteration 16583/56600 Training loss: 1.0905 0.4827 sec/batch
Epoch 30/100  Iteration 16584/56600 Training loss: 1.0905 0.4799 sec/batch
Epoch 30/100  Iteration 16585/56600 Training loss: 1.0903 0.4843 sec/batch
Epoch 30/100  Iteration 16586/56600 Training loss: 1.0903 0.4671 sec/batch
Epoch 30/100  Iteration 16587/56600 Training loss: 1.0902 0.4762 sec/batch
Epoch 30/100  Iteration 1

Epoch 30/100  Iteration 16685/56600 Training loss: 1.0927 0.4802 sec/batch
Epoch 30/100  Iteration 16686/56600 Training loss: 1.0928 0.4806 sec/batch
Epoch 30/100  Iteration 16687/56600 Training loss: 1.0927 0.4691 sec/batch
Epoch 30/100  Iteration 16688/56600 Training loss: 1.0927 0.4666 sec/batch
Epoch 30/100  Iteration 16689/56600 Training loss: 1.0927 0.4841 sec/batch
Epoch 30/100  Iteration 16690/56600 Training loss: 1.0926 0.4820 sec/batch
Epoch 30/100  Iteration 16691/56600 Training loss: 1.0926 0.4705 sec/batch
Epoch 30/100  Iteration 16692/56600 Training loss: 1.0925 0.4725 sec/batch
Epoch 30/100  Iteration 16693/56600 Training loss: 1.0923 0.4822 sec/batch
Epoch 30/100  Iteration 16694/56600 Training loss: 1.0923 0.4801 sec/batch
Epoch 30/100  Iteration 16695/56600 Training loss: 1.0922 0.4845 sec/batch
Epoch 30/100  Iteration 16696/56600 Training loss: 1.0921 0.4797 sec/batch
Epoch 30/100  Iteration 16697/56600 Training loss: 1.0921 0.4783 sec/batch
Epoch 30/100  Iteration 1

Epoch 30/100  Iteration 16795/56600 Training loss: 1.0900 0.4823 sec/batch
Epoch 30/100  Iteration 16796/56600 Training loss: 1.0900 0.4806 sec/batch
Epoch 30/100  Iteration 16797/56600 Training loss: 1.0899 0.4916 sec/batch
Epoch 30/100  Iteration 16798/56600 Training loss: 1.0899 0.4786 sec/batch
Epoch 30/100  Iteration 16799/56600 Training loss: 1.0898 0.4892 sec/batch
Epoch 30/100  Iteration 16800/56600 Training loss: 1.0899 0.4814 sec/batch
Epoch 30/100  Iteration 16801/56600 Training loss: 1.0899 0.4800 sec/batch
Epoch 30/100  Iteration 16802/56600 Training loss: 1.0899 0.4890 sec/batch
Epoch 30/100  Iteration 16803/56600 Training loss: 1.0898 0.4796 sec/batch
Epoch 30/100  Iteration 16804/56600 Training loss: 1.0899 0.4906 sec/batch
Epoch 30/100  Iteration 16805/56600 Training loss: 1.0898 0.4821 sec/batch
Epoch 30/100  Iteration 16806/56600 Training loss: 1.0897 0.4910 sec/batch
Epoch 30/100  Iteration 16807/56600 Training loss: 1.0898 0.4799 sec/batch
Epoch 30/100  Iteration 1

Epoch 30/100  Iteration 16905/56600 Training loss: 1.0869 0.4679 sec/batch
Epoch 30/100  Iteration 16906/56600 Training loss: 1.0868 0.4714 sec/batch
Epoch 30/100  Iteration 16907/56600 Training loss: 1.0869 0.4828 sec/batch
Epoch 30/100  Iteration 16908/56600 Training loss: 1.0869 0.4797 sec/batch
Epoch 30/100  Iteration 16909/56600 Training loss: 1.0869 0.4770 sec/batch
Epoch 30/100  Iteration 16910/56600 Training loss: 1.0868 0.4691 sec/batch
Epoch 30/100  Iteration 16911/56600 Training loss: 1.0868 0.4882 sec/batch
Epoch 30/100  Iteration 16912/56600 Training loss: 1.0868 0.4725 sec/batch
Epoch 30/100  Iteration 16913/56600 Training loss: 1.0867 0.4895 sec/batch
Epoch 30/100  Iteration 16914/56600 Training loss: 1.0867 0.4875 sec/batch
Epoch 30/100  Iteration 16915/56600 Training loss: 1.0867 0.4785 sec/batch
Epoch 30/100  Iteration 16916/56600 Training loss: 1.0866 0.4786 sec/batch
Epoch 30/100  Iteration 16917/56600 Training loss: 1.0865 0.4768 sec/batch
Epoch 30/100  Iteration 1

Epoch 31/100  Iteration 17015/56600 Training loss: 1.0879 0.4824 sec/batch
Epoch 31/100  Iteration 17016/56600 Training loss: 1.0887 0.4800 sec/batch
Epoch 31/100  Iteration 17017/56600 Training loss: 1.0898 0.4907 sec/batch
Epoch 31/100  Iteration 17018/56600 Training loss: 1.0902 0.4720 sec/batch
Epoch 31/100  Iteration 17019/56600 Training loss: 1.0903 0.4882 sec/batch
Epoch 31/100  Iteration 17020/56600 Training loss: 1.0908 0.4812 sec/batch
Epoch 31/100  Iteration 17021/56600 Training loss: 1.0910 0.4648 sec/batch
Epoch 31/100  Iteration 17022/56600 Training loss: 1.0916 0.4816 sec/batch
Epoch 31/100  Iteration 17023/56600 Training loss: 1.0913 0.4644 sec/batch
Epoch 31/100  Iteration 17024/56600 Training loss: 1.0910 0.4792 sec/batch
Epoch 31/100  Iteration 17025/56600 Training loss: 1.0915 0.4801 sec/batch
Epoch 31/100  Iteration 17026/56600 Training loss: 1.0921 0.4717 sec/batch
Epoch 31/100  Iteration 17027/56600 Training loss: 1.0911 0.4874 sec/batch
Epoch 31/100  Iteration 1

Epoch 31/100  Iteration 17125/56600 Training loss: 1.0837 0.4813 sec/batch
Epoch 31/100  Iteration 17126/56600 Training loss: 1.0837 0.4968 sec/batch
Epoch 31/100  Iteration 17127/56600 Training loss: 1.0836 0.4781 sec/batch
Epoch 31/100  Iteration 17128/56600 Training loss: 1.0837 0.4809 sec/batch
Epoch 31/100  Iteration 17129/56600 Training loss: 1.0839 0.4790 sec/batch
Epoch 31/100  Iteration 17130/56600 Training loss: 1.0840 0.4799 sec/batch
Epoch 31/100  Iteration 17131/56600 Training loss: 1.0843 0.4810 sec/batch
Epoch 31/100  Iteration 17132/56600 Training loss: 1.0847 0.4839 sec/batch
Epoch 31/100  Iteration 17133/56600 Training loss: 1.0849 0.4826 sec/batch
Epoch 31/100  Iteration 17134/56600 Training loss: 1.0850 0.4919 sec/batch
Epoch 31/100  Iteration 17135/56600 Training loss: 1.0851 0.4790 sec/batch
Epoch 31/100  Iteration 17136/56600 Training loss: 1.0852 0.4789 sec/batch
Epoch 31/100  Iteration 17137/56600 Training loss: 1.0854 0.4873 sec/batch
Epoch 31/100  Iteration 1

Epoch 31/100  Iteration 17235/56600 Training loss: 1.0889 0.4909 sec/batch
Epoch 31/100  Iteration 17236/56600 Training loss: 1.0889 0.4695 sec/batch
Epoch 31/100  Iteration 17237/56600 Training loss: 1.0888 0.4769 sec/batch
Epoch 31/100  Iteration 17238/56600 Training loss: 1.0887 0.4767 sec/batch
Epoch 31/100  Iteration 17239/56600 Training loss: 1.0886 0.4636 sec/batch
Epoch 31/100  Iteration 17240/56600 Training loss: 1.0884 0.4720 sec/batch
Epoch 31/100  Iteration 17241/56600 Training loss: 1.0883 0.4663 sec/batch
Epoch 31/100  Iteration 17242/56600 Training loss: 1.0882 0.4808 sec/batch
Epoch 31/100  Iteration 17243/56600 Training loss: 1.0881 0.4879 sec/batch
Epoch 31/100  Iteration 17244/56600 Training loss: 1.0881 0.4750 sec/batch
Epoch 31/100  Iteration 17245/56600 Training loss: 1.0880 0.4826 sec/batch
Epoch 31/100  Iteration 17246/56600 Training loss: 1.0879 0.4814 sec/batch
Epoch 31/100  Iteration 17247/56600 Training loss: 1.0878 0.4809 sec/batch
Epoch 31/100  Iteration 1

Epoch 31/100  Iteration 17345/56600 Training loss: 1.0857 0.4822 sec/batch
Epoch 31/100  Iteration 17346/56600 Training loss: 1.0856 0.4799 sec/batch
Epoch 31/100  Iteration 17347/56600 Training loss: 1.0856 0.4688 sec/batch
Epoch 31/100  Iteration 17348/56600 Training loss: 1.0856 0.4645 sec/batch
Epoch 31/100  Iteration 17349/56600 Training loss: 1.0855 0.4773 sec/batch
Epoch 31/100  Iteration 17350/56600 Training loss: 1.0854 0.4788 sec/batch
Epoch 31/100  Iteration 17351/56600 Training loss: 1.0854 0.4746 sec/batch
Epoch 31/100  Iteration 17352/56600 Training loss: 1.0853 0.4877 sec/batch
Epoch 31/100  Iteration 17353/56600 Training loss: 1.0853 0.4823 sec/batch
Epoch 31/100  Iteration 17354/56600 Training loss: 1.0852 0.4801 sec/batch
Epoch 31/100  Iteration 17355/56600 Training loss: 1.0851 0.4916 sec/batch
Epoch 31/100  Iteration 17356/56600 Training loss: 1.0852 0.4704 sec/batch
Epoch 31/100  Iteration 17357/56600 Training loss: 1.0852 0.4883 sec/batch
Epoch 31/100  Iteration 1

Epoch 31/100  Iteration 17455/56600 Training loss: 1.0827 0.4800 sec/batch
Epoch 31/100  Iteration 17456/56600 Training loss: 1.0826 0.4786 sec/batch
Epoch 31/100  Iteration 17457/56600 Training loss: 1.0827 0.4716 sec/batch
Epoch 31/100  Iteration 17458/56600 Training loss: 1.0826 0.4746 sec/batch
Epoch 31/100  Iteration 17459/56600 Training loss: 1.0826 0.4746 sec/batch
Epoch 31/100  Iteration 17460/56600 Training loss: 1.0826 0.4600 sec/batch
Epoch 31/100  Iteration 17461/56600 Training loss: 1.0826 0.4720 sec/batch
Epoch 31/100  Iteration 17462/56600 Training loss: 1.0826 0.4825 sec/batch
Epoch 31/100  Iteration 17463/56600 Training loss: 1.0826 0.4647 sec/batch
Epoch 31/100  Iteration 17464/56600 Training loss: 1.0826 0.4843 sec/batch
Epoch 31/100  Iteration 17465/56600 Training loss: 1.0825 0.4614 sec/batch
Epoch 31/100  Iteration 17466/56600 Training loss: 1.0825 0.4845 sec/batch
Epoch 31/100  Iteration 17467/56600 Training loss: 1.0825 0.4738 sec/batch
Epoch 31/100  Iteration 1

Epoch 32/100  Iteration 17565/56600 Training loss: 1.0900 0.4821 sec/batch
Epoch 32/100  Iteration 17566/56600 Training loss: 1.0883 0.4799 sec/batch
Epoch 32/100  Iteration 17567/56600 Training loss: 1.0871 0.4927 sec/batch
Epoch 32/100  Iteration 17568/56600 Training loss: 1.0845 0.4671 sec/batch
Epoch 32/100  Iteration 17569/56600 Training loss: 1.0841 0.4743 sec/batch
Epoch 32/100  Iteration 17570/56600 Training loss: 1.0836 0.4763 sec/batch
Epoch 32/100  Iteration 17571/56600 Training loss: 1.0833 0.4651 sec/batch
Epoch 32/100  Iteration 17572/56600 Training loss: 1.0823 0.4818 sec/batch
Epoch 32/100  Iteration 17573/56600 Training loss: 1.0821 0.4662 sec/batch
Epoch 32/100  Iteration 17574/56600 Training loss: 1.0819 0.4653 sec/batch
Epoch 32/100  Iteration 17575/56600 Training loss: 1.0815 0.4835 sec/batch
Epoch 32/100  Iteration 17576/56600 Training loss: 1.0808 0.4775 sec/batch
Epoch 32/100  Iteration 17577/56600 Training loss: 1.0801 0.4841 sec/batch
Epoch 32/100  Iteration 1

Epoch 32/100  Iteration 17675/56600 Training loss: 1.0791 0.4874 sec/batch
Epoch 32/100  Iteration 17676/56600 Training loss: 1.0790 0.4864 sec/batch
Epoch 32/100  Iteration 17677/56600 Training loss: 1.0791 0.4768 sec/batch
Epoch 32/100  Iteration 17678/56600 Training loss: 1.0790 0.4846 sec/batch
Epoch 32/100  Iteration 17679/56600 Training loss: 1.0790 0.4767 sec/batch
Epoch 32/100  Iteration 17680/56600 Training loss: 1.0790 0.4762 sec/batch
Epoch 32/100  Iteration 17681/56600 Training loss: 1.0787 0.4762 sec/batch
Epoch 32/100  Iteration 17682/56600 Training loss: 1.0786 0.4649 sec/batch
Epoch 32/100  Iteration 17683/56600 Training loss: 1.0787 0.4694 sec/batch
Epoch 32/100  Iteration 17684/56600 Training loss: 1.0791 0.4662 sec/batch
Epoch 32/100  Iteration 17685/56600 Training loss: 1.0793 0.4647 sec/batch
Epoch 32/100  Iteration 17686/56600 Training loss: 1.0792 0.4766 sec/batch
Epoch 32/100  Iteration 17687/56600 Training loss: 1.0794 0.4770 sec/batch
Epoch 32/100  Iteration 1

Epoch 32/100  Iteration 17785/56600 Training loss: 1.0837 0.4824 sec/batch
Epoch 32/100  Iteration 17786/56600 Training loss: 1.0839 0.4786 sec/batch
Epoch 32/100  Iteration 17787/56600 Training loss: 1.0841 0.4861 sec/batch
Epoch 32/100  Iteration 17788/56600 Training loss: 1.0843 0.4828 sec/batch
Epoch 32/100  Iteration 17789/56600 Training loss: 1.0845 0.4852 sec/batch
Epoch 32/100  Iteration 17790/56600 Training loss: 1.0848 0.4717 sec/batch
Epoch 32/100  Iteration 17791/56600 Training loss: 1.0850 0.5107 sec/batch
Epoch 32/100  Iteration 17792/56600 Training loss: 1.0851 0.4803 sec/batch
Epoch 32/100  Iteration 17793/56600 Training loss: 1.0852 0.4778 sec/batch
Epoch 32/100  Iteration 17794/56600 Training loss: 1.0852 0.4797 sec/batch
Epoch 32/100  Iteration 17795/56600 Training loss: 1.0852 0.4864 sec/batch
Epoch 32/100  Iteration 17796/56600 Training loss: 1.0853 0.4743 sec/batch
Epoch 32/100  Iteration 17797/56600 Training loss: 1.0851 0.5011 sec/batch
Epoch 32/100  Iteration 1

Epoch 32/100  Iteration 17895/56600 Training loss: 1.0820 0.4694 sec/batch
Epoch 32/100  Iteration 17896/56600 Training loss: 1.0820 0.4819 sec/batch
Epoch 32/100  Iteration 17897/56600 Training loss: 1.0818 0.4963 sec/batch
Epoch 32/100  Iteration 17898/56600 Training loss: 1.0817 0.4786 sec/batch
Epoch 32/100  Iteration 17899/56600 Training loss: 1.0817 0.4796 sec/batch
Epoch 32/100  Iteration 17900/56600 Training loss: 1.0816 0.4796 sec/batch
Epoch 32/100  Iteration 17901/56600 Training loss: 1.0816 0.4939 sec/batch
Epoch 32/100  Iteration 17902/56600 Training loss: 1.0817 0.4896 sec/batch
Epoch 32/100  Iteration 17903/56600 Training loss: 1.0818 0.4787 sec/batch
Epoch 32/100  Iteration 17904/56600 Training loss: 1.0817 0.4797 sec/batch
Epoch 32/100  Iteration 17905/56600 Training loss: 1.0817 0.4798 sec/batch
Epoch 32/100  Iteration 17906/56600 Training loss: 1.0818 0.4779 sec/batch
Epoch 32/100  Iteration 17907/56600 Training loss: 1.0817 0.4841 sec/batch
Epoch 32/100  Iteration 1

Epoch 32/100  Iteration 18004/56600 Training loss: 1.0789 0.4726 sec/batch
Epoch 32/100  Iteration 18005/56600 Training loss: 1.0789 0.4686 sec/batch
Epoch 32/100  Iteration 18006/56600 Training loss: 1.0789 0.4699 sec/batch
Epoch 32/100  Iteration 18007/56600 Training loss: 1.0789 0.4857 sec/batch
Epoch 32/100  Iteration 18008/56600 Training loss: 1.0789 0.4820 sec/batch
Epoch 32/100  Iteration 18009/56600 Training loss: 1.0789 0.4792 sec/batch
Epoch 32/100  Iteration 18010/56600 Training loss: 1.0788 0.4782 sec/batch
Epoch 32/100  Iteration 18011/56600 Training loss: 1.0788 0.4743 sec/batch
Epoch 32/100  Iteration 18012/56600 Training loss: 1.0788 0.4791 sec/batch
Epoch 32/100  Iteration 18013/56600 Training loss: 1.0789 0.4809 sec/batch
Epoch 32/100  Iteration 18014/56600 Training loss: 1.0789 0.4829 sec/batch
Epoch 32/100  Iteration 18015/56600 Training loss: 1.0789 0.4716 sec/batch
Epoch 32/100  Iteration 18016/56600 Training loss: 1.0790 0.4866 sec/batch
Epoch 32/100  Iteration 1

Epoch 33/100  Iteration 18114/56600 Training loss: 1.1525 0.4789 sec/batch
Epoch 33/100  Iteration 18115/56600 Training loss: 1.1413 0.4636 sec/batch
Epoch 33/100  Iteration 18116/56600 Training loss: 1.1296 0.4785 sec/batch
Epoch 33/100  Iteration 18117/56600 Training loss: 1.1168 0.4792 sec/batch
Epoch 33/100  Iteration 18118/56600 Training loss: 1.1133 0.4788 sec/batch
Epoch 33/100  Iteration 18119/56600 Training loss: 1.1085 0.4798 sec/batch
Epoch 33/100  Iteration 18120/56600 Training loss: 1.1028 0.4844 sec/batch
Epoch 33/100  Iteration 18121/56600 Training loss: 1.1010 0.4655 sec/batch
Epoch 33/100  Iteration 18122/56600 Training loss: 1.0990 0.4805 sec/batch
Epoch 33/100  Iteration 18123/56600 Training loss: 1.0986 0.4794 sec/batch
Epoch 33/100  Iteration 18124/56600 Training loss: 1.0986 0.4789 sec/batch
Epoch 33/100  Iteration 18125/56600 Training loss: 1.0980 0.4743 sec/batch
Epoch 33/100  Iteration 18126/56600 Training loss: 1.0952 0.4834 sec/batch
Epoch 33/100  Iteration 1

Epoch 33/100  Iteration 18224/56600 Training loss: 1.0704 0.4820 sec/batch
Epoch 33/100  Iteration 18225/56600 Training loss: 1.0709 0.4786 sec/batch
Epoch 33/100  Iteration 18226/56600 Training loss: 1.0711 0.4688 sec/batch
Epoch 33/100  Iteration 18227/56600 Training loss: 1.0714 0.4784 sec/batch
Epoch 33/100  Iteration 18228/56600 Training loss: 1.0717 0.4796 sec/batch
Epoch 33/100  Iteration 18229/56600 Training loss: 1.0722 0.4779 sec/batch
Epoch 33/100  Iteration 18230/56600 Training loss: 1.0725 0.4800 sec/batch
Epoch 33/100  Iteration 18231/56600 Training loss: 1.0726 0.4886 sec/batch
Epoch 33/100  Iteration 18232/56600 Training loss: 1.0732 0.4797 sec/batch
Epoch 33/100  Iteration 18233/56600 Training loss: 1.0738 0.4786 sec/batch
Epoch 33/100  Iteration 18234/56600 Training loss: 1.0743 0.4794 sec/batch
Epoch 33/100  Iteration 18235/56600 Training loss: 1.0742 0.4791 sec/batch
Epoch 33/100  Iteration 18236/56600 Training loss: 1.0741 0.4875 sec/batch
Epoch 33/100  Iteration 1

Epoch 33/100  Iteration 18334/56600 Training loss: 1.0781 0.4782 sec/batch
Epoch 33/100  Iteration 18335/56600 Training loss: 1.0780 0.4953 sec/batch
Epoch 33/100  Iteration 18336/56600 Training loss: 1.0781 0.4942 sec/batch
Epoch 33/100  Iteration 18337/56600 Training loss: 1.0782 0.4849 sec/batch
Epoch 33/100  Iteration 18338/56600 Training loss: 1.0783 0.4819 sec/batch
Epoch 33/100  Iteration 18339/56600 Training loss: 1.0784 0.4842 sec/batch
Epoch 33/100  Iteration 18340/56600 Training loss: 1.0783 0.4897 sec/batch
Epoch 33/100  Iteration 18341/56600 Training loss: 1.0784 0.4869 sec/batch
Epoch 33/100  Iteration 18342/56600 Training loss: 1.0786 0.4807 sec/batch
Epoch 33/100  Iteration 18343/56600 Training loss: 1.0787 0.4849 sec/batch
Epoch 33/100  Iteration 18344/56600 Training loss: 1.0788 0.4888 sec/batch
Epoch 33/100  Iteration 18345/56600 Training loss: 1.0791 0.4800 sec/batch
Epoch 33/100  Iteration 18346/56600 Training loss: 1.0790 0.4800 sec/batch
Epoch 33/100  Iteration 1

Epoch 33/100  Iteration 18444/56600 Training loss: 1.0783 0.4657 sec/batch
Epoch 33/100  Iteration 18445/56600 Training loss: 1.0784 0.4899 sec/batch
Epoch 33/100  Iteration 18446/56600 Training loss: 1.0783 0.4737 sec/batch
Epoch 33/100  Iteration 18447/56600 Training loss: 1.0782 0.4848 sec/batch
Epoch 33/100  Iteration 18448/56600 Training loss: 1.0782 0.4861 sec/batch
Epoch 33/100  Iteration 18449/56600 Training loss: 1.0782 0.4899 sec/batch
Epoch 33/100  Iteration 18450/56600 Training loss: 1.0783 0.4786 sec/batch
Epoch 33/100  Iteration 18451/56600 Training loss: 1.0782 0.4795 sec/batch
Epoch 33/100  Iteration 18452/56600 Training loss: 1.0782 0.4792 sec/batch
Epoch 33/100  Iteration 18453/56600 Training loss: 1.0781 0.4889 sec/batch
Epoch 33/100  Iteration 18454/56600 Training loss: 1.0780 0.4823 sec/batch
Epoch 33/100  Iteration 18455/56600 Training loss: 1.0779 0.4782 sec/batch
Epoch 33/100  Iteration 18456/56600 Training loss: 1.0779 0.4931 sec/batch
Epoch 33/100  Iteration 1

Epoch 33/100  Iteration 18554/56600 Training loss: 1.0749 0.4916 sec/batch
Epoch 33/100  Iteration 18555/56600 Training loss: 1.0749 0.4947 sec/batch
Epoch 33/100  Iteration 18556/56600 Training loss: 1.0749 0.4948 sec/batch
Epoch 33/100  Iteration 18557/56600 Training loss: 1.0748 0.4902 sec/batch
Epoch 33/100  Iteration 18558/56600 Training loss: 1.0747 0.4845 sec/batch
Epoch 33/100  Iteration 18559/56600 Training loss: 1.0746 0.4819 sec/batch
Epoch 33/100  Iteration 18560/56600 Training loss: 1.0746 0.4752 sec/batch
Epoch 33/100  Iteration 18561/56600 Training loss: 1.0746 0.4731 sec/batch
Epoch 33/100  Iteration 18562/56600 Training loss: 1.0745 0.4843 sec/batch
Epoch 33/100  Iteration 18563/56600 Training loss: 1.0745 0.4915 sec/batch
Epoch 33/100  Iteration 18564/56600 Training loss: 1.0745 0.4933 sec/batch
Epoch 33/100  Iteration 18565/56600 Training loss: 1.0745 0.4888 sec/batch
Epoch 33/100  Iteration 18566/56600 Training loss: 1.0745 0.4850 sec/batch
Epoch 33/100  Iteration 1

Epoch 33/100  Iteration 18664/56600 Training loss: 1.0731 0.4835 sec/batch
Epoch 33/100  Iteration 18665/56600 Training loss: 1.0732 0.4739 sec/batch
Epoch 33/100  Iteration 18666/56600 Training loss: 1.0732 0.4741 sec/batch
Epoch 33/100  Iteration 18667/56600 Training loss: 1.0731 0.4921 sec/batch
Epoch 33/100  Iteration 18668/56600 Training loss: 1.0731 0.4799 sec/batch
Epoch 33/100  Iteration 18669/56600 Training loss: 1.0732 0.4795 sec/batch
Epoch 33/100  Iteration 18670/56600 Training loss: 1.0732 0.4881 sec/batch
Epoch 33/100  Iteration 18671/56600 Training loss: 1.0732 0.4854 sec/batch
Epoch 33/100  Iteration 18672/56600 Training loss: 1.0732 0.4880 sec/batch
Epoch 33/100  Iteration 18673/56600 Training loss: 1.0733 0.4814 sec/batch
Epoch 33/100  Iteration 18674/56600 Training loss: 1.0733 0.4780 sec/batch
Epoch 33/100  Iteration 18675/56600 Training loss: 1.0733 0.4798 sec/batch
Epoch 33/100  Iteration 18676/56600 Training loss: 1.0734 0.4790 sec/batch
Epoch 33/100  Iteration 1

Epoch 34/100  Iteration 18774/56600 Training loss: 1.0653 0.4791 sec/batch
Epoch 34/100  Iteration 18775/56600 Training loss: 1.0649 0.4796 sec/batch
Epoch 34/100  Iteration 18776/56600 Training loss: 1.0648 0.4786 sec/batch
Epoch 34/100  Iteration 18777/56600 Training loss: 1.0651 0.4955 sec/batch
Epoch 34/100  Iteration 18778/56600 Training loss: 1.0658 0.4800 sec/batch
Epoch 34/100  Iteration 18779/56600 Training loss: 1.0659 0.4740 sec/batch
Epoch 34/100  Iteration 18780/56600 Training loss: 1.0664 0.4832 sec/batch
Epoch 34/100  Iteration 18781/56600 Training loss: 1.0663 0.4786 sec/batch
Epoch 34/100  Iteration 18782/56600 Training loss: 1.0665 0.4920 sec/batch
Epoch 34/100  Iteration 18783/56600 Training loss: 1.0666 0.4777 sec/batch
Epoch 34/100  Iteration 18784/56600 Training loss: 1.0665 0.4702 sec/batch
Epoch 34/100  Iteration 18785/56600 Training loss: 1.0662 0.4733 sec/batch
Epoch 34/100  Iteration 18786/56600 Training loss: 1.0662 0.4795 sec/batch
Epoch 34/100  Iteration 1

Epoch 34/100  Iteration 18884/56600 Training loss: 1.0741 0.4779 sec/batch
Epoch 34/100  Iteration 18885/56600 Training loss: 1.0742 0.4629 sec/batch
Epoch 34/100  Iteration 18886/56600 Training loss: 1.0741 0.4777 sec/batch
Epoch 34/100  Iteration 18887/56600 Training loss: 1.0741 0.4657 sec/batch
Epoch 34/100  Iteration 18888/56600 Training loss: 1.0741 0.4787 sec/batch
Epoch 34/100  Iteration 18889/56600 Training loss: 1.0741 0.4797 sec/batch
Epoch 34/100  Iteration 18890/56600 Training loss: 1.0740 0.4733 sec/batch
Epoch 34/100  Iteration 18891/56600 Training loss: 1.0738 0.4640 sec/batch
Epoch 34/100  Iteration 18892/56600 Training loss: 1.0736 0.4627 sec/batch
Epoch 34/100  Iteration 18893/56600 Training loss: 1.0737 0.4798 sec/batch
Epoch 34/100  Iteration 18894/56600 Training loss: 1.0736 0.4627 sec/batch
Epoch 34/100  Iteration 18895/56600 Training loss: 1.0736 0.4718 sec/batch
Epoch 34/100  Iteration 18896/56600 Training loss: 1.0736 0.4682 sec/batch
Epoch 34/100  Iteration 1

Epoch 34/100  Iteration 18994/56600 Training loss: 1.0748 0.4883 sec/batch
Epoch 34/100  Iteration 18995/56600 Training loss: 1.0748 0.4827 sec/batch
Epoch 34/100  Iteration 18996/56600 Training loss: 1.0748 0.4803 sec/batch
Epoch 34/100  Iteration 18997/56600 Training loss: 1.0748 0.4689 sec/batch
Epoch 34/100  Iteration 18998/56600 Training loss: 1.0746 0.4698 sec/batch
Epoch 34/100  Iteration 18999/56600 Training loss: 1.0747 0.4935 sec/batch
Epoch 34/100  Iteration 19000/56600 Training loss: 1.0746 0.4789 sec/batch
Epoch 34/100  Iteration 19001/56600 Training loss: 1.0744 0.4883 sec/batch
Epoch 34/100  Iteration 19002/56600 Training loss: 1.0743 0.4806 sec/batch
Epoch 34/100  Iteration 19003/56600 Training loss: 1.0743 0.4788 sec/batch
Epoch 34/100  Iteration 19004/56600 Training loss: 1.0743 0.4789 sec/batch
Epoch 34/100  Iteration 19005/56600 Training loss: 1.0742 0.4758 sec/batch
Epoch 34/100  Iteration 19006/56600 Training loss: 1.0741 0.4653 sec/batch
Epoch 34/100  Iteration 1

Epoch 34/100  Iteration 19104/56600 Training loss: 1.0720 0.4750 sec/batch
Epoch 34/100  Iteration 19105/56600 Training loss: 1.0719 0.4793 sec/batch
Epoch 34/100  Iteration 19106/56600 Training loss: 1.0719 0.4848 sec/batch
Epoch 34/100  Iteration 19107/56600 Training loss: 1.0718 0.4884 sec/batch
Epoch 34/100  Iteration 19108/56600 Training loss: 1.0717 0.4803 sec/batch
Epoch 34/100  Iteration 19109/56600 Training loss: 1.0716 0.4940 sec/batch
Epoch 34/100  Iteration 19110/56600 Training loss: 1.0715 0.4781 sec/batch
Epoch 34/100  Iteration 19111/56600 Training loss: 1.0715 0.4586 sec/batch
Epoch 34/100  Iteration 19112/56600 Training loss: 1.0714 0.4741 sec/batch
Epoch 34/100  Iteration 19113/56600 Training loss: 1.0714 0.4677 sec/batch
Epoch 34/100  Iteration 19114/56600 Training loss: 1.0713 0.4601 sec/batch
Epoch 34/100  Iteration 19115/56600 Training loss: 1.0713 0.4775 sec/batch
Epoch 34/100  Iteration 19116/56600 Training loss: 1.0712 0.4785 sec/batch
Epoch 34/100  Iteration 1

Epoch 34/100  Iteration 19214/56600 Training loss: 1.0695 0.4780 sec/batch
Epoch 34/100  Iteration 19215/56600 Training loss: 1.0695 0.4799 sec/batch
Epoch 34/100  Iteration 19216/56600 Training loss: 1.0695 0.4947 sec/batch
Epoch 34/100  Iteration 19217/56600 Training loss: 1.0695 0.4732 sec/batch
Epoch 34/100  Iteration 19218/56600 Training loss: 1.0695 0.4794 sec/batch
Epoch 34/100  Iteration 19219/56600 Training loss: 1.0695 0.4899 sec/batch
Epoch 34/100  Iteration 19220/56600 Training loss: 1.0695 0.4794 sec/batch
Epoch 34/100  Iteration 19221/56600 Training loss: 1.0695 0.4895 sec/batch
Epoch 34/100  Iteration 19222/56600 Training loss: 1.0695 0.4688 sec/batch
Epoch 34/100  Iteration 19223/56600 Training loss: 1.0694 0.4714 sec/batch
Epoch 34/100  Iteration 19224/56600 Training loss: 1.0694 0.4813 sec/batch
Epoch 34/100  Iteration 19225/56600 Training loss: 1.0694 0.4793 sec/batch
Epoch 34/100  Iteration 19226/56600 Training loss: 1.0694 0.4794 sec/batch
Epoch 34/100  Iteration 1

Epoch 35/100  Iteration 19324/56600 Training loss: 1.0632 0.4800 sec/batch
Epoch 35/100  Iteration 19325/56600 Training loss: 1.0630 0.4789 sec/batch
Epoch 35/100  Iteration 19326/56600 Training loss: 1.0630 0.4943 sec/batch
Epoch 35/100  Iteration 19327/56600 Training loss: 1.0633 0.4890 sec/batch
Epoch 35/100  Iteration 19328/56600 Training loss: 1.0636 0.4762 sec/batch
Epoch 35/100  Iteration 19329/56600 Training loss: 1.0634 0.4800 sec/batch
Epoch 35/100  Iteration 19330/56600 Training loss: 1.0631 0.4796 sec/batch
Epoch 35/100  Iteration 19331/56600 Training loss: 1.0628 0.4786 sec/batch
Epoch 35/100  Iteration 19332/56600 Training loss: 1.0623 0.4950 sec/batch
Epoch 35/100  Iteration 19333/56600 Training loss: 1.0623 0.4761 sec/batch
Epoch 35/100  Iteration 19334/56600 Training loss: 1.0626 0.4772 sec/batch
Epoch 35/100  Iteration 19335/56600 Training loss: 1.0626 0.4791 sec/batch
Epoch 35/100  Iteration 19336/56600 Training loss: 1.0625 0.4790 sec/batch
Epoch 35/100  Iteration 1

Epoch 35/100  Iteration 19434/56600 Training loss: 1.0689 0.4792 sec/batch
Epoch 35/100  Iteration 19435/56600 Training loss: 1.0688 0.4638 sec/batch
Epoch 35/100  Iteration 19436/56600 Training loss: 1.0688 0.4794 sec/batch
Epoch 35/100  Iteration 19437/56600 Training loss: 1.0689 0.4791 sec/batch
Epoch 35/100  Iteration 19438/56600 Training loss: 1.0690 0.4743 sec/batch
Epoch 35/100  Iteration 19439/56600 Training loss: 1.0691 0.4836 sec/batch
Epoch 35/100  Iteration 19440/56600 Training loss: 1.0692 0.4750 sec/batch
Epoch 35/100  Iteration 19441/56600 Training loss: 1.0693 0.4892 sec/batch
Epoch 35/100  Iteration 19442/56600 Training loss: 1.0695 0.4785 sec/batch
Epoch 35/100  Iteration 19443/56600 Training loss: 1.0695 0.4925 sec/batch
Epoch 35/100  Iteration 19444/56600 Training loss: 1.0695 0.4842 sec/batch
Epoch 35/100  Iteration 19445/56600 Training loss: 1.0694 0.4832 sec/batch
Epoch 35/100  Iteration 19446/56600 Training loss: 1.0697 0.4801 sec/batch
Epoch 35/100  Iteration 1

Epoch 35/100  Iteration 19544/56600 Training loss: 1.0719 0.4843 sec/batch
Epoch 35/100  Iteration 19545/56600 Training loss: 1.0718 0.4883 sec/batch
Epoch 35/100  Iteration 19546/56600 Training loss: 1.0719 0.4894 sec/batch
Epoch 35/100  Iteration 19547/56600 Training loss: 1.0718 0.4824 sec/batch
Epoch 35/100  Iteration 19548/56600 Training loss: 1.0718 0.4909 sec/batch
Epoch 35/100  Iteration 19549/56600 Training loss: 1.0717 0.4833 sec/batch
Epoch 35/100  Iteration 19550/56600 Training loss: 1.0717 0.4827 sec/batch
Epoch 35/100  Iteration 19551/56600 Training loss: 1.0716 0.4916 sec/batch
Epoch 35/100  Iteration 19552/56600 Training loss: 1.0714 0.4882 sec/batch
Epoch 35/100  Iteration 19553/56600 Training loss: 1.0714 0.4899 sec/batch
Epoch 35/100  Iteration 19554/56600 Training loss: 1.0714 0.4793 sec/batch
Epoch 35/100  Iteration 19555/56600 Training loss: 1.0713 0.4892 sec/batch
Epoch 35/100  Iteration 19556/56600 Training loss: 1.0713 0.4864 sec/batch
Epoch 35/100  Iteration 1

Epoch 35/100  Iteration 19654/56600 Training loss: 1.0690 0.4783 sec/batch
Epoch 35/100  Iteration 19655/56600 Training loss: 1.0689 0.4796 sec/batch
Epoch 35/100  Iteration 19656/56600 Training loss: 1.0689 0.4838 sec/batch
Epoch 35/100  Iteration 19657/56600 Training loss: 1.0689 0.4755 sec/batch
Epoch 35/100  Iteration 19658/56600 Training loss: 1.0689 0.4911 sec/batch
Epoch 35/100  Iteration 19659/56600 Training loss: 1.0688 0.4750 sec/batch
Epoch 35/100  Iteration 19660/56600 Training loss: 1.0688 0.4833 sec/batch
Epoch 35/100  Iteration 19661/56600 Training loss: 1.0688 0.4897 sec/batch
Epoch 35/100  Iteration 19662/56600 Training loss: 1.0688 0.4845 sec/batch
Epoch 35/100  Iteration 19663/56600 Training loss: 1.0688 0.4739 sec/batch
Epoch 35/100  Iteration 19664/56600 Training loss: 1.0688 0.4790 sec/batch
Epoch 35/100  Iteration 19665/56600 Training loss: 1.0687 0.5013 sec/batch
Epoch 35/100  Iteration 19666/56600 Training loss: 1.0687 0.4882 sec/batch
Epoch 35/100  Iteration 1

Epoch 35/100  Iteration 19764/56600 Training loss: 1.0662 0.4940 sec/batch
Epoch 35/100  Iteration 19765/56600 Training loss: 1.0662 0.4794 sec/batch
Epoch 35/100  Iteration 19766/56600 Training loss: 1.0662 0.4943 sec/batch
Epoch 35/100  Iteration 19767/56600 Training loss: 1.0662 0.4737 sec/batch
Epoch 35/100  Iteration 19768/56600 Training loss: 1.0662 0.4670 sec/batch
Epoch 35/100  Iteration 19769/56600 Training loss: 1.0662 0.4876 sec/batch
Epoch 35/100  Iteration 19770/56600 Training loss: 1.0662 0.5013 sec/batch
Epoch 35/100  Iteration 19771/56600 Training loss: 1.0662 0.4892 sec/batch
Epoch 35/100  Iteration 19772/56600 Training loss: 1.0662 0.4797 sec/batch
Epoch 35/100  Iteration 19773/56600 Training loss: 1.0662 0.4786 sec/batch
Epoch 35/100  Iteration 19774/56600 Training loss: 1.0662 0.4798 sec/batch
Epoch 35/100  Iteration 19775/56600 Training loss: 1.0662 0.4801 sec/batch
Epoch 35/100  Iteration 19776/56600 Training loss: 1.0662 0.4780 sec/batch
Epoch 35/100  Iteration 1

Epoch 36/100  Iteration 19874/56600 Training loss: 1.0617 0.4795 sec/batch
Epoch 36/100  Iteration 19875/56600 Training loss: 1.0613 0.4791 sec/batch
Epoch 36/100  Iteration 19876/56600 Training loss: 1.0610 0.4786 sec/batch
Epoch 36/100  Iteration 19877/56600 Training loss: 1.0607 0.4800 sec/batch
Epoch 36/100  Iteration 19878/56600 Training loss: 1.0612 0.4730 sec/batch
Epoch 36/100  Iteration 19879/56600 Training loss: 1.0615 0.4948 sec/batch
Epoch 36/100  Iteration 19880/56600 Training loss: 1.0610 0.4844 sec/batch
Epoch 36/100  Iteration 19881/56600 Training loss: 1.0607 0.4877 sec/batch
Epoch 36/100  Iteration 19882/56600 Training loss: 1.0600 0.4865 sec/batch
Epoch 36/100  Iteration 19883/56600 Training loss: 1.0594 0.4799 sec/batch
Epoch 36/100  Iteration 19884/56600 Training loss: 1.0600 0.4787 sec/batch
Epoch 36/100  Iteration 19885/56600 Training loss: 1.0600 0.4942 sec/batch
Epoch 36/100  Iteration 19886/56600 Training loss: 1.0593 0.4785 sec/batch
Epoch 36/100  Iteration 1

Epoch 36/100  Iteration 19984/56600 Training loss: 1.0650 0.4862 sec/batch
Epoch 36/100  Iteration 19985/56600 Training loss: 1.0649 0.4837 sec/batch
Epoch 36/100  Iteration 19986/56600 Training loss: 1.0648 0.4957 sec/batch
Epoch 36/100  Iteration 19987/56600 Training loss: 1.0648 0.4789 sec/batch
Epoch 36/100  Iteration 19988/56600 Training loss: 1.0648 0.4874 sec/batch
Epoch 36/100  Iteration 19989/56600 Training loss: 1.0647 0.4968 sec/batch
Epoch 36/100  Iteration 19990/56600 Training loss: 1.0647 0.4943 sec/batch
Epoch 36/100  Iteration 19991/56600 Training loss: 1.0646 0.4953 sec/batch
Epoch 36/100  Iteration 19992/56600 Training loss: 1.0647 0.4944 sec/batch
Epoch 36/100  Iteration 19993/56600 Training loss: 1.0648 0.4894 sec/batch
Epoch 36/100  Iteration 19994/56600 Training loss: 1.0649 0.4844 sec/batch
Epoch 36/100  Iteration 19995/56600 Training loss: 1.0649 0.4763 sec/batch
Epoch 36/100  Iteration 19996/56600 Training loss: 1.0650 0.4777 sec/batch
Epoch 36/100  Iteration 1

Epoch 36/100  Iteration 20093/56600 Training loss: 1.0680 0.4843 sec/batch
Epoch 36/100  Iteration 20094/56600 Training loss: 1.0679 0.4844 sec/batch
Epoch 36/100  Iteration 20095/56600 Training loss: 1.0679 0.4722 sec/batch
Epoch 36/100  Iteration 20096/56600 Training loss: 1.0680 0.4800 sec/batch
Epoch 36/100  Iteration 20097/56600 Training loss: 1.0681 0.4690 sec/batch
Epoch 36/100  Iteration 20098/56600 Training loss: 1.0681 0.4634 sec/batch
Epoch 36/100  Iteration 20099/56600 Training loss: 1.0681 0.4629 sec/batch
Epoch 36/100  Iteration 20100/56600 Training loss: 1.0682 0.4773 sec/batch
Epoch 36/100  Iteration 20101/56600 Training loss: 1.0683 0.4815 sec/batch
Epoch 36/100  Iteration 20102/56600 Training loss: 1.0684 0.4785 sec/batch
Epoch 36/100  Iteration 20103/56600 Training loss: 1.0686 0.4923 sec/batch
Epoch 36/100  Iteration 20104/56600 Training loss: 1.0687 0.4759 sec/batch
Epoch 36/100  Iteration 20105/56600 Training loss: 1.0688 0.4643 sec/batch
Epoch 36/100  Iteration 2

Epoch 36/100  Iteration 20203/56600 Training loss: 1.0661 0.4795 sec/batch
Epoch 36/100  Iteration 20204/56600 Training loss: 1.0661 0.4794 sec/batch
Epoch 36/100  Iteration 20205/56600 Training loss: 1.0661 0.4789 sec/batch
Epoch 36/100  Iteration 20206/56600 Training loss: 1.0661 0.4792 sec/batch
Epoch 36/100  Iteration 20207/56600 Training loss: 1.0661 0.4751 sec/batch
Epoch 36/100  Iteration 20208/56600 Training loss: 1.0661 0.4674 sec/batch
Epoch 36/100  Iteration 20209/56600 Training loss: 1.0662 0.4740 sec/batch
Epoch 36/100  Iteration 20210/56600 Training loss: 1.0663 0.4767 sec/batch
Epoch 36/100  Iteration 20211/56600 Training loss: 1.0661 0.4869 sec/batch
Epoch 36/100  Iteration 20212/56600 Training loss: 1.0661 0.4868 sec/batch
Epoch 36/100  Iteration 20213/56600 Training loss: 1.0661 0.4821 sec/batch
Epoch 36/100  Iteration 20214/56600 Training loss: 1.0660 0.4785 sec/batch
Epoch 36/100  Iteration 20215/56600 Training loss: 1.0660 0.4858 sec/batch
Epoch 36/100  Iteration 2

Epoch 36/100  Iteration 20313/56600 Training loss: 1.0633 0.4707 sec/batch
Epoch 36/100  Iteration 20314/56600 Training loss: 1.0633 0.4634 sec/batch
Epoch 36/100  Iteration 20315/56600 Training loss: 1.0632 0.4794 sec/batch
Epoch 36/100  Iteration 20316/56600 Training loss: 1.0632 0.4633 sec/batch
Epoch 36/100  Iteration 20317/56600 Training loss: 1.0631 0.4859 sec/batch
Epoch 36/100  Iteration 20318/56600 Training loss: 1.0630 0.4824 sec/batch
Epoch 36/100  Iteration 20319/56600 Training loss: 1.0630 0.4796 sec/batch
Epoch 36/100  Iteration 20320/56600 Training loss: 1.0629 0.4795 sec/batch
Epoch 36/100  Iteration 20321/56600 Training loss: 1.0629 0.4784 sec/batch
Epoch 36/100  Iteration 20322/56600 Training loss: 1.0629 0.4804 sec/batch
Epoch 36/100  Iteration 20323/56600 Training loss: 1.0629 0.4781 sec/batch
Epoch 36/100  Iteration 20324/56600 Training loss: 1.0629 0.4881 sec/batch
Epoch 36/100  Iteration 20325/56600 Training loss: 1.0629 0.4644 sec/batch
Epoch 36/100  Iteration 2

Epoch 37/100  Iteration 20423/56600 Training loss: 1.0695 0.4688 sec/batch
Epoch 37/100  Iteration 20424/56600 Training loss: 1.0693 0.4798 sec/batch
Epoch 37/100  Iteration 20425/56600 Training loss: 1.0689 0.4785 sec/batch
Epoch 37/100  Iteration 20426/56600 Training loss: 1.0681 0.4765 sec/batch
Epoch 37/100  Iteration 20427/56600 Training loss: 1.0675 0.4816 sec/batch
Epoch 37/100  Iteration 20428/56600 Training loss: 1.0668 0.4860 sec/batch
Epoch 37/100  Iteration 20429/56600 Training loss: 1.0662 0.4881 sec/batch
Epoch 37/100  Iteration 20430/56600 Training loss: 1.0656 0.4789 sec/batch
Epoch 37/100  Iteration 20431/56600 Training loss: 1.0650 0.4791 sec/batch
Epoch 37/100  Iteration 20432/56600 Training loss: 1.0644 0.4884 sec/batch
Epoch 37/100  Iteration 20433/56600 Training loss: 1.0637 0.4688 sec/batch
Epoch 37/100  Iteration 20434/56600 Training loss: 1.0626 0.4791 sec/batch
Epoch 37/100  Iteration 20435/56600 Training loss: 1.0622 0.4743 sec/batch
Epoch 37/100  Iteration 2

Epoch 37/100  Iteration 20533/56600 Training loss: 1.0622 0.4673 sec/batch
Epoch 37/100  Iteration 20534/56600 Training loss: 1.0623 0.4793 sec/batch
Epoch 37/100  Iteration 20535/56600 Training loss: 1.0623 0.4637 sec/batch
Epoch 37/100  Iteration 20536/56600 Training loss: 1.0623 0.4748 sec/batch
Epoch 37/100  Iteration 20537/56600 Training loss: 1.0622 0.4687 sec/batch
Epoch 37/100  Iteration 20538/56600 Training loss: 1.0623 0.4620 sec/batch
Epoch 37/100  Iteration 20539/56600 Training loss: 1.0622 0.4640 sec/batch
Epoch 37/100  Iteration 20540/56600 Training loss: 1.0621 0.4733 sec/batch
Epoch 37/100  Iteration 20541/56600 Training loss: 1.0621 0.4847 sec/batch
Epoch 37/100  Iteration 20542/56600 Training loss: 1.0621 0.4796 sec/batch
Epoch 37/100  Iteration 20543/56600 Training loss: 1.0622 0.4782 sec/batch
Epoch 37/100  Iteration 20544/56600 Training loss: 1.0624 0.4704 sec/batch
Epoch 37/100  Iteration 20545/56600 Training loss: 1.0624 0.4728 sec/batch
Epoch 37/100  Iteration 2

Epoch 37/100  Iteration 20643/56600 Training loss: 1.0654 0.4889 sec/batch
Epoch 37/100  Iteration 20644/56600 Training loss: 1.0653 0.4690 sec/batch
Epoch 37/100  Iteration 20645/56600 Training loss: 1.0652 0.4689 sec/batch
Epoch 37/100  Iteration 20646/56600 Training loss: 1.0651 0.4790 sec/batch
Epoch 37/100  Iteration 20647/56600 Training loss: 1.0652 0.4793 sec/batch
Epoch 37/100  Iteration 20648/56600 Training loss: 1.0652 0.4768 sec/batch
Epoch 37/100  Iteration 20649/56600 Training loss: 1.0652 0.4858 sec/batch
Epoch 37/100  Iteration 20650/56600 Training loss: 1.0651 0.4790 sec/batch
Epoch 37/100  Iteration 20651/56600 Training loss: 1.0651 0.4796 sec/batch
Epoch 37/100  Iteration 20652/56600 Training loss: 1.0651 0.4768 sec/batch
Epoch 37/100  Iteration 20653/56600 Training loss: 1.0650 0.4670 sec/batch
Epoch 37/100  Iteration 20654/56600 Training loss: 1.0650 0.4775 sec/batch
Epoch 37/100  Iteration 20655/56600 Training loss: 1.0648 0.4744 sec/batch
Epoch 37/100  Iteration 2

Epoch 37/100  Iteration 20753/56600 Training loss: 1.0630 0.4786 sec/batch
Epoch 37/100  Iteration 20754/56600 Training loss: 1.0630 0.4818 sec/batch
Epoch 37/100  Iteration 20755/56600 Training loss: 1.0629 0.4688 sec/batch
Epoch 37/100  Iteration 20756/56600 Training loss: 1.0629 0.4806 sec/batch
Epoch 37/100  Iteration 20757/56600 Training loss: 1.0629 0.4783 sec/batch
Epoch 37/100  Iteration 20758/56600 Training loss: 1.0629 0.4749 sec/batch
Epoch 37/100  Iteration 20759/56600 Training loss: 1.0628 0.4619 sec/batch
Epoch 37/100  Iteration 20760/56600 Training loss: 1.0628 0.4650 sec/batch
Epoch 37/100  Iteration 20761/56600 Training loss: 1.0628 0.4622 sec/batch
Epoch 37/100  Iteration 20762/56600 Training loss: 1.0629 0.4740 sec/batch
Epoch 37/100  Iteration 20763/56600 Training loss: 1.0629 0.4793 sec/batch
Epoch 37/100  Iteration 20764/56600 Training loss: 1.0628 0.4688 sec/batch
Epoch 37/100  Iteration 20765/56600 Training loss: 1.0628 0.4787 sec/batch
Epoch 37/100  Iteration 2

Epoch 37/100  Iteration 20863/56600 Training loss: 1.0605 0.4777 sec/batch
Epoch 37/100  Iteration 20864/56600 Training loss: 1.0605 0.4647 sec/batch
Epoch 37/100  Iteration 20865/56600 Training loss: 1.0604 0.4744 sec/batch
Epoch 37/100  Iteration 20866/56600 Training loss: 1.0604 0.4829 sec/batch
Epoch 37/100  Iteration 20867/56600 Training loss: 1.0604 0.4912 sec/batch
Epoch 37/100  Iteration 20868/56600 Training loss: 1.0604 0.4753 sec/batch
Epoch 37/100  Iteration 20869/56600 Training loss: 1.0604 0.4798 sec/batch
Epoch 37/100  Iteration 20870/56600 Training loss: 1.0604 0.4747 sec/batch
Epoch 37/100  Iteration 20871/56600 Training loss: 1.0603 0.4785 sec/batch
Epoch 37/100  Iteration 20872/56600 Training loss: 1.0603 0.4688 sec/batch
Epoch 37/100  Iteration 20873/56600 Training loss: 1.0603 0.4790 sec/batch
Epoch 37/100  Iteration 20874/56600 Training loss: 1.0603 0.4840 sec/batch
Epoch 37/100  Iteration 20875/56600 Training loss: 1.0602 0.4900 sec/batch
Epoch 37/100  Iteration 2

Epoch 38/100  Iteration 20973/56600 Training loss: 1.0581 0.4926 sec/batch
Epoch 38/100  Iteration 20974/56600 Training loss: 1.0576 0.4780 sec/batch
Epoch 38/100  Iteration 20975/56600 Training loss: 1.0582 0.4805 sec/batch
Epoch 38/100  Iteration 20976/56600 Training loss: 1.0596 0.4947 sec/batch
Epoch 38/100  Iteration 20977/56600 Training loss: 1.0609 0.4937 sec/batch
Epoch 38/100  Iteration 20978/56600 Training loss: 1.0617 0.4807 sec/batch
Epoch 38/100  Iteration 20979/56600 Training loss: 1.0625 0.4703 sec/batch
Epoch 38/100  Iteration 20980/56600 Training loss: 1.0631 0.4791 sec/batch
Epoch 38/100  Iteration 20981/56600 Training loss: 1.0632 0.4951 sec/batch
Epoch 38/100  Iteration 20982/56600 Training loss: 1.0635 0.4946 sec/batch
Epoch 38/100  Iteration 20983/56600 Training loss: 1.0636 0.4726 sec/batch
Epoch 38/100  Iteration 20984/56600 Training loss: 1.0644 0.4734 sec/batch
Epoch 38/100  Iteration 20985/56600 Training loss: 1.0640 0.4825 sec/batch
Epoch 38/100  Iteration 2

Epoch 38/100  Iteration 21083/56600 Training loss: 1.0564 0.4787 sec/batch
Epoch 38/100  Iteration 21084/56600 Training loss: 1.0565 0.4696 sec/batch
Epoch 38/100  Iteration 21085/56600 Training loss: 1.0567 0.4788 sec/batch
Epoch 38/100  Iteration 21086/56600 Training loss: 1.0566 0.4748 sec/batch
Epoch 38/100  Iteration 21087/56600 Training loss: 1.0567 0.4678 sec/batch
Epoch 38/100  Iteration 21088/56600 Training loss: 1.0567 0.4722 sec/batch
Epoch 38/100  Iteration 21089/56600 Training loss: 1.0566 0.4810 sec/batch
Epoch 38/100  Iteration 21090/56600 Training loss: 1.0568 0.4790 sec/batch
Epoch 38/100  Iteration 21091/56600 Training loss: 1.0569 0.4636 sec/batch
Epoch 38/100  Iteration 21092/56600 Training loss: 1.0571 0.4799 sec/batch
Epoch 38/100  Iteration 21093/56600 Training loss: 1.0575 0.4782 sec/batch
Epoch 38/100  Iteration 21094/56600 Training loss: 1.0579 0.4788 sec/batch
Epoch 38/100  Iteration 21095/56600 Training loss: 1.0582 0.4733 sec/batch
Epoch 38/100  Iteration 2

Epoch 38/100  Iteration 21193/56600 Training loss: 1.0631 0.4741 sec/batch
Epoch 38/100  Iteration 21194/56600 Training loss: 1.0630 0.4791 sec/batch
Epoch 38/100  Iteration 21195/56600 Training loss: 1.0630 0.4854 sec/batch
Epoch 38/100  Iteration 21196/56600 Training loss: 1.0630 0.4730 sec/batch
Epoch 38/100  Iteration 21197/56600 Training loss: 1.0629 0.4787 sec/batch
Epoch 38/100  Iteration 21198/56600 Training loss: 1.0630 0.4795 sec/batch
Epoch 38/100  Iteration 21199/56600 Training loss: 1.0628 0.4896 sec/batch
Epoch 38/100  Iteration 21200/56600 Training loss: 1.0628 0.5022 sec/batch
Epoch 38/100  Iteration 21201/56600 Training loss: 1.0627 0.4762 sec/batch
Epoch 38/100  Iteration 21202/56600 Training loss: 1.0625 0.4977 sec/batch
Epoch 38/100  Iteration 21203/56600 Training loss: 1.0624 0.4796 sec/batch
Epoch 38/100  Iteration 21204/56600 Training loss: 1.0624 0.4786 sec/batch
Epoch 38/100  Iteration 21205/56600 Training loss: 1.0623 0.4947 sec/batch
Epoch 38/100  Iteration 2

Epoch 38/100  Iteration 21303/56600 Training loss: 1.0598 0.4836 sec/batch
Epoch 38/100  Iteration 21304/56600 Training loss: 1.0597 0.4771 sec/batch
Epoch 38/100  Iteration 21305/56600 Training loss: 1.0597 0.4766 sec/batch
Epoch 38/100  Iteration 21306/56600 Training loss: 1.0597 0.4789 sec/batch
Epoch 38/100  Iteration 21307/56600 Training loss: 1.0596 0.4794 sec/batch
Epoch 38/100  Iteration 21308/56600 Training loss: 1.0596 0.4748 sec/batch
Epoch 38/100  Iteration 21309/56600 Training loss: 1.0596 0.4896 sec/batch
Epoch 38/100  Iteration 21310/56600 Training loss: 1.0595 0.4773 sec/batch
Epoch 38/100  Iteration 21311/56600 Training loss: 1.0595 0.4848 sec/batch
Epoch 38/100  Iteration 21312/56600 Training loss: 1.0594 0.4797 sec/batch
Epoch 38/100  Iteration 21313/56600 Training loss: 1.0593 0.4794 sec/batch
Epoch 38/100  Iteration 21314/56600 Training loss: 1.0592 0.4863 sec/batch
Epoch 38/100  Iteration 21315/56600 Training loss: 1.0592 0.4826 sec/batch
Epoch 38/100  Iteration 2

Epoch 38/100  Iteration 21413/56600 Training loss: 1.0568 0.4797 sec/batch
Epoch 38/100  Iteration 21414/56600 Training loss: 1.0569 0.4785 sec/batch
Epoch 38/100  Iteration 21415/56600 Training loss: 1.0570 0.4805 sec/batch
Epoch 38/100  Iteration 21416/56600 Training loss: 1.0569 0.4881 sec/batch
Epoch 38/100  Iteration 21417/56600 Training loss: 1.0569 0.4799 sec/batch
Epoch 38/100  Iteration 21418/56600 Training loss: 1.0568 0.4772 sec/batch
Epoch 38/100  Iteration 21419/56600 Training loss: 1.0569 0.4688 sec/batch
Epoch 38/100  Iteration 21420/56600 Training loss: 1.0568 0.4711 sec/batch
Epoch 38/100  Iteration 21421/56600 Training loss: 1.0568 0.4927 sec/batch
Epoch 38/100  Iteration 21422/56600 Training loss: 1.0568 0.4738 sec/batch
Epoch 38/100  Iteration 21423/56600 Training loss: 1.0568 0.4794 sec/batch
Epoch 38/100  Iteration 21424/56600 Training loss: 1.0568 0.4734 sec/batch
Epoch 38/100  Iteration 21425/56600 Training loss: 1.0568 0.4796 sec/batch
Epoch 38/100  Iteration 2

Epoch 39/100  Iteration 21523/56600 Training loss: 1.0720 0.4800 sec/batch
Epoch 39/100  Iteration 21524/56600 Training loss: 1.0706 0.4783 sec/batch
Epoch 39/100  Iteration 21525/56600 Training loss: 1.0683 0.4877 sec/batch
Epoch 39/100  Iteration 21526/56600 Training loss: 1.0666 0.4733 sec/batch
Epoch 39/100  Iteration 21527/56600 Training loss: 1.0641 0.4749 sec/batch
Epoch 39/100  Iteration 21528/56600 Training loss: 1.0625 0.4636 sec/batch
Epoch 39/100  Iteration 21529/56600 Training loss: 1.0610 0.4853 sec/batch
Epoch 39/100  Iteration 21530/56600 Training loss: 1.0587 0.4673 sec/batch
Epoch 39/100  Iteration 21531/56600 Training loss: 1.0574 0.4677 sec/batch
Epoch 39/100  Iteration 21532/56600 Training loss: 1.0569 0.4695 sec/batch
Epoch 39/100  Iteration 21533/56600 Training loss: 1.0563 0.4788 sec/batch
Epoch 39/100  Iteration 21534/56600 Training loss: 1.0548 0.4796 sec/batch
Epoch 39/100  Iteration 21535/56600 Training loss: 1.0549 0.4827 sec/batch
Epoch 39/100  Iteration 2

Epoch 39/100  Iteration 21633/56600 Training loss: 1.0525 0.4761 sec/batch
Epoch 39/100  Iteration 21634/56600 Training loss: 1.0525 0.4992 sec/batch
Epoch 39/100  Iteration 21635/56600 Training loss: 1.0526 0.4775 sec/batch
Epoch 39/100  Iteration 21636/56600 Training loss: 1.0526 0.4794 sec/batch
Epoch 39/100  Iteration 21637/56600 Training loss: 1.0528 0.4867 sec/batch
Epoch 39/100  Iteration 21638/56600 Training loss: 1.0527 0.4822 sec/batch
Epoch 39/100  Iteration 21639/56600 Training loss: 1.0527 0.4791 sec/batch
Epoch 39/100  Iteration 21640/56600 Training loss: 1.0526 0.4792 sec/batch
Epoch 39/100  Iteration 21641/56600 Training loss: 1.0526 0.4900 sec/batch
Epoch 39/100  Iteration 21642/56600 Training loss: 1.0525 0.4663 sec/batch
Epoch 39/100  Iteration 21643/56600 Training loss: 1.0523 0.4791 sec/batch
Epoch 39/100  Iteration 21644/56600 Training loss: 1.0522 0.4896 sec/batch
Epoch 39/100  Iteration 21645/56600 Training loss: 1.0523 0.4795 sec/batch
Epoch 39/100  Iteration 2

Epoch 39/100  Iteration 21743/56600 Training loss: 1.0578 0.4835 sec/batch
Epoch 39/100  Iteration 21744/56600 Training loss: 1.0578 0.4792 sec/batch
Epoch 39/100  Iteration 21745/56600 Training loss: 1.0579 0.4942 sec/batch
Epoch 39/100  Iteration 21746/56600 Training loss: 1.0580 0.4810 sec/batch
Epoch 39/100  Iteration 21747/56600 Training loss: 1.0581 0.4778 sec/batch
Epoch 39/100  Iteration 21748/56600 Training loss: 1.0584 0.4737 sec/batch
Epoch 39/100  Iteration 21749/56600 Training loss: 1.0585 0.4860 sec/batch
Epoch 39/100  Iteration 21750/56600 Training loss: 1.0588 0.4699 sec/batch
Epoch 39/100  Iteration 21751/56600 Training loss: 1.0590 0.4796 sec/batch
Epoch 39/100  Iteration 21752/56600 Training loss: 1.0592 0.4874 sec/batch
Epoch 39/100  Iteration 21753/56600 Training loss: 1.0594 0.4908 sec/batch
Epoch 39/100  Iteration 21754/56600 Training loss: 1.0596 0.4833 sec/batch
Epoch 39/100  Iteration 21755/56600 Training loss: 1.0597 0.4811 sec/batch
Epoch 39/100  Iteration 2

Epoch 39/100  Iteration 21853/56600 Training loss: 1.0572 0.4848 sec/batch
Epoch 39/100  Iteration 21854/56600 Training loss: 1.0572 0.4678 sec/batch
Epoch 39/100  Iteration 21855/56600 Training loss: 1.0571 0.4797 sec/batch
Epoch 39/100  Iteration 21856/56600 Training loss: 1.0570 0.4853 sec/batch
Epoch 39/100  Iteration 21857/56600 Training loss: 1.0570 0.4690 sec/batch
Epoch 39/100  Iteration 21858/56600 Training loss: 1.0569 0.4774 sec/batch
Epoch 39/100  Iteration 21859/56600 Training loss: 1.0568 0.4779 sec/batch
Epoch 39/100  Iteration 21860/56600 Training loss: 1.0566 0.4788 sec/batch
Epoch 39/100  Iteration 21861/56600 Training loss: 1.0567 0.4739 sec/batch
Epoch 39/100  Iteration 21862/56600 Training loss: 1.0566 0.4793 sec/batch
Epoch 39/100  Iteration 21863/56600 Training loss: 1.0567 0.4648 sec/batch
Epoch 39/100  Iteration 21864/56600 Training loss: 1.0568 0.4790 sec/batch
Epoch 39/100  Iteration 21865/56600 Training loss: 1.0569 0.4786 sec/batch
Epoch 39/100  Iteration 2

Epoch 39/100  Iteration 21963/56600 Training loss: 1.0538 0.4883 sec/batch
Epoch 39/100  Iteration 21964/56600 Training loss: 1.0538 0.4784 sec/batch
Epoch 39/100  Iteration 21965/56600 Training loss: 1.0537 0.4794 sec/batch
Epoch 39/100  Iteration 21966/56600 Training loss: 1.0537 0.4736 sec/batch
Epoch 39/100  Iteration 21967/56600 Training loss: 1.0536 0.4798 sec/batch
Epoch 39/100  Iteration 21968/56600 Training loss: 1.0536 0.4796 sec/batch
Epoch 39/100  Iteration 21969/56600 Training loss: 1.0536 0.4942 sec/batch
Epoch 39/100  Iteration 21970/56600 Training loss: 1.0536 0.4757 sec/batch
Epoch 39/100  Iteration 21971/56600 Training loss: 1.0536 0.4666 sec/batch
Epoch 39/100  Iteration 21972/56600 Training loss: 1.0535 0.4779 sec/batch
Epoch 39/100  Iteration 21973/56600 Training loss: 1.0535 0.4858 sec/batch
Epoch 39/100  Iteration 21974/56600 Training loss: 1.0535 0.4888 sec/batch
Epoch 39/100  Iteration 21975/56600 Training loss: 1.0535 0.4793 sec/batch
Epoch 39/100  Iteration 2

Epoch 39/100  Iteration 22072/56600 Training loss: 1.0536 0.4860 sec/batch
Epoch 39/100  Iteration 22073/56600 Training loss: 1.0537 0.4583 sec/batch
Epoch 39/100  Iteration 22074/56600 Training loss: 1.0538 0.4726 sec/batch
Epoch 40/100  Iteration 22075/56600 Training loss: 1.1909 0.4791 sec/batch
Epoch 40/100  Iteration 22076/56600 Training loss: 1.1393 0.4793 sec/batch
Epoch 40/100  Iteration 22077/56600 Training loss: 1.1277 0.4794 sec/batch
Epoch 40/100  Iteration 22078/56600 Training loss: 1.1137 0.4815 sec/batch
Epoch 40/100  Iteration 22079/56600 Training loss: 1.0982 0.4726 sec/batch
Epoch 40/100  Iteration 22080/56600 Training loss: 1.0936 0.4800 sec/batch
Epoch 40/100  Iteration 22081/56600 Training loss: 1.0881 0.4827 sec/batch
Epoch 40/100  Iteration 22082/56600 Training loss: 1.0829 0.4749 sec/batch
Epoch 40/100  Iteration 22083/56600 Training loss: 1.0799 0.4614 sec/batch
Epoch 40/100  Iteration 22084/56600 Training loss: 1.0781 0.4692 sec/batch
Epoch 40/100  Iteration 2

Epoch 40/100  Iteration 22182/56600 Training loss: 1.0460 0.4746 sec/batch
Epoch 40/100  Iteration 22183/56600 Training loss: 1.0460 0.4786 sec/batch
Epoch 40/100  Iteration 22184/56600 Training loss: 1.0461 0.4787 sec/batch
Epoch 40/100  Iteration 22185/56600 Training loss: 1.0460 0.4844 sec/batch
Epoch 40/100  Iteration 22186/56600 Training loss: 1.0459 0.4786 sec/batch
Epoch 40/100  Iteration 22187/56600 Training loss: 1.0463 0.4689 sec/batch
Epoch 40/100  Iteration 22188/56600 Training loss: 1.0466 0.4744 sec/batch
Epoch 40/100  Iteration 22189/56600 Training loss: 1.0468 0.4854 sec/batch
Epoch 40/100  Iteration 22190/56600 Training loss: 1.0471 0.4830 sec/batch
Epoch 40/100  Iteration 22191/56600 Training loss: 1.0475 0.4798 sec/batch
Epoch 40/100  Iteration 22192/56600 Training loss: 1.0479 0.4786 sec/batch
Epoch 40/100  Iteration 22193/56600 Training loss: 1.0480 0.4791 sec/batch
Epoch 40/100  Iteration 22194/56600 Training loss: 1.0486 0.4851 sec/batch
Epoch 40/100  Iteration 2

Epoch 40/100  Iteration 22292/56600 Training loss: 1.0542 0.4864 sec/batch
Epoch 40/100  Iteration 22293/56600 Training loss: 1.0544 0.4718 sec/batch
Epoch 40/100  Iteration 22294/56600 Training loss: 1.0546 0.4759 sec/batch
Epoch 40/100  Iteration 22295/56600 Training loss: 1.0546 0.4769 sec/batch
Epoch 40/100  Iteration 22296/56600 Training loss: 1.0546 0.4793 sec/batch
Epoch 40/100  Iteration 22297/56600 Training loss: 1.0545 0.4787 sec/batch
Epoch 40/100  Iteration 22298/56600 Training loss: 1.0547 0.4797 sec/batch
Epoch 40/100  Iteration 22299/56600 Training loss: 1.0548 0.4738 sec/batch
Epoch 40/100  Iteration 22300/56600 Training loss: 1.0549 0.4860 sec/batch
Epoch 40/100  Iteration 22301/56600 Training loss: 1.0550 0.4709 sec/batch
Epoch 40/100  Iteration 22302/56600 Training loss: 1.0549 0.4796 sec/batch
Epoch 40/100  Iteration 22303/56600 Training loss: 1.0549 0.4794 sec/batch
Epoch 40/100  Iteration 22304/56600 Training loss: 1.0552 0.4643 sec/batch
Epoch 40/100  Iteration 2

Epoch 40/100  Iteration 22402/56600 Training loss: 1.0547 0.4682 sec/batch
Epoch 40/100  Iteration 22403/56600 Training loss: 1.0547 0.4794 sec/batch
Epoch 40/100  Iteration 22404/56600 Training loss: 1.0547 0.4846 sec/batch
Epoch 40/100  Iteration 22405/56600 Training loss: 1.0547 0.4785 sec/batch
Epoch 40/100  Iteration 22406/56600 Training loss: 1.0547 0.4744 sec/batch
Epoch 40/100  Iteration 22407/56600 Training loss: 1.0548 0.4688 sec/batch
Epoch 40/100  Iteration 22408/56600 Training loss: 1.0547 0.4765 sec/batch
Epoch 40/100  Iteration 22409/56600 Training loss: 1.0546 0.4756 sec/batch
Epoch 40/100  Iteration 22410/56600 Training loss: 1.0547 0.4795 sec/batch
Epoch 40/100  Iteration 22411/56600 Training loss: 1.0546 0.4785 sec/batch
Epoch 40/100  Iteration 22412/56600 Training loss: 1.0547 0.4792 sec/batch
Epoch 40/100  Iteration 22413/56600 Training loss: 1.0546 0.4947 sec/batch
Epoch 40/100  Iteration 22414/56600 Training loss: 1.0546 0.4743 sec/batch
Epoch 40/100  Iteration 2

Epoch 40/100  Iteration 22512/56600 Training loss: 1.0517 0.4769 sec/batch
Epoch 40/100  Iteration 22513/56600 Training loss: 1.0516 0.4856 sec/batch
Epoch 40/100  Iteration 22514/56600 Training loss: 1.0515 0.4837 sec/batch
Epoch 40/100  Iteration 22515/56600 Training loss: 1.0514 0.4797 sec/batch
Epoch 40/100  Iteration 22516/56600 Training loss: 1.0514 0.4739 sec/batch
Epoch 40/100  Iteration 22517/56600 Training loss: 1.0514 0.4696 sec/batch
Epoch 40/100  Iteration 22518/56600 Training loss: 1.0514 0.4679 sec/batch
Epoch 40/100  Iteration 22519/56600 Training loss: 1.0513 0.4785 sec/batch
Epoch 40/100  Iteration 22520/56600 Training loss: 1.0512 0.4794 sec/batch
Epoch 40/100  Iteration 22521/56600 Training loss: 1.0512 0.4737 sec/batch
Epoch 40/100  Iteration 22522/56600 Training loss: 1.0511 0.4844 sec/batch
Epoch 40/100  Iteration 22523/56600 Training loss: 1.0511 0.4630 sec/batch
Epoch 40/100  Iteration 22524/56600 Training loss: 1.0512 0.4795 sec/batch
Epoch 40/100  Iteration 2

Epoch 40/100  Iteration 22622/56600 Training loss: 1.0501 0.4790 sec/batch
Epoch 40/100  Iteration 22623/56600 Training loss: 1.0501 0.4786 sec/batch
Epoch 40/100  Iteration 22624/56600 Training loss: 1.0500 0.4863 sec/batch
Epoch 40/100  Iteration 22625/56600 Training loss: 1.0500 0.4825 sec/batch
Epoch 40/100  Iteration 22626/56600 Training loss: 1.0500 0.4901 sec/batch
Epoch 40/100  Iteration 22627/56600 Training loss: 1.0501 0.4832 sec/batch
Epoch 40/100  Iteration 22628/56600 Training loss: 1.0501 0.4840 sec/batch
Epoch 40/100  Iteration 22629/56600 Training loss: 1.0500 0.4957 sec/batch
Epoch 40/100  Iteration 22630/56600 Training loss: 1.0500 0.4942 sec/batch
Epoch 40/100  Iteration 22631/56600 Training loss: 1.0500 0.4791 sec/batch
Epoch 40/100  Iteration 22632/56600 Training loss: 1.0501 0.4788 sec/batch
Epoch 40/100  Iteration 22633/56600 Training loss: 1.0501 0.4688 sec/batch
Epoch 40/100  Iteration 22634/56600 Training loss: 1.0501 0.4663 sec/batch
Epoch 40/100  Iteration 2

Epoch 41/100  Iteration 22732/56600 Training loss: 1.0441 0.4779 sec/batch
Epoch 41/100  Iteration 22733/56600 Training loss: 1.0438 0.4804 sec/batch
Epoch 41/100  Iteration 22734/56600 Training loss: 1.0437 0.4899 sec/batch
Epoch 41/100  Iteration 22735/56600 Training loss: 1.0435 0.4946 sec/batch
Epoch 41/100  Iteration 22736/56600 Training loss: 1.0432 0.4874 sec/batch
Epoch 41/100  Iteration 22737/56600 Training loss: 1.0428 0.4814 sec/batch
Epoch 41/100  Iteration 22738/56600 Training loss: 1.0425 0.4785 sec/batch
Epoch 41/100  Iteration 22739/56600 Training loss: 1.0427 0.4791 sec/batch
Epoch 41/100  Iteration 22740/56600 Training loss: 1.0434 0.4805 sec/batch
Epoch 41/100  Iteration 22741/56600 Training loss: 1.0434 0.4688 sec/batch
Epoch 41/100  Iteration 22742/56600 Training loss: 1.0438 0.4781 sec/batch
Epoch 41/100  Iteration 22743/56600 Training loss: 1.0437 0.4739 sec/batch
Epoch 41/100  Iteration 22744/56600 Training loss: 1.0439 0.4714 sec/batch
Epoch 41/100  Iteration 2

Epoch 41/100  Iteration 22842/56600 Training loss: 1.0510 0.4785 sec/batch
Epoch 41/100  Iteration 22843/56600 Training loss: 1.0511 0.4689 sec/batch
Epoch 41/100  Iteration 22844/56600 Training loss: 1.0511 0.4787 sec/batch
Epoch 41/100  Iteration 22845/56600 Training loss: 1.0513 0.4802 sec/batch
Epoch 41/100  Iteration 22846/56600 Training loss: 1.0515 0.4778 sec/batch
Epoch 41/100  Iteration 22847/56600 Training loss: 1.0515 0.4636 sec/batch
Epoch 41/100  Iteration 22848/56600 Training loss: 1.0514 0.4825 sec/batch
Epoch 41/100  Iteration 22849/56600 Training loss: 1.0515 0.4702 sec/batch
Epoch 41/100  Iteration 22850/56600 Training loss: 1.0514 0.4756 sec/batch
Epoch 41/100  Iteration 22851/56600 Training loss: 1.0514 0.4662 sec/batch
Epoch 41/100  Iteration 22852/56600 Training loss: 1.0513 0.4702 sec/batch
Epoch 41/100  Iteration 22853/56600 Training loss: 1.0512 0.4830 sec/batch
Epoch 41/100  Iteration 22854/56600 Training loss: 1.0510 0.4789 sec/batch
Epoch 41/100  Iteration 2

Epoch 41/100  Iteration 22952/56600 Training loss: 1.0526 0.4805 sec/batch
Epoch 41/100  Iteration 22953/56600 Training loss: 1.0525 0.4740 sec/batch
Epoch 41/100  Iteration 22954/56600 Training loss: 1.0524 0.4805 sec/batch
Epoch 41/100  Iteration 22955/56600 Training loss: 1.0524 0.4634 sec/batch
Epoch 41/100  Iteration 22956/56600 Training loss: 1.0524 0.4692 sec/batch
Epoch 41/100  Iteration 22957/56600 Training loss: 1.0525 0.4737 sec/batch
Epoch 41/100  Iteration 22958/56600 Training loss: 1.0525 0.4787 sec/batch
Epoch 41/100  Iteration 22959/56600 Training loss: 1.0524 0.4790 sec/batch
Epoch 41/100  Iteration 22960/56600 Training loss: 1.0523 0.4839 sec/batch
Epoch 41/100  Iteration 22961/56600 Training loss: 1.0523 0.4907 sec/batch
Epoch 41/100  Iteration 22962/56600 Training loss: 1.0522 0.4844 sec/batch
Epoch 41/100  Iteration 22963/56600 Training loss: 1.0520 0.4876 sec/batch
Epoch 41/100  Iteration 22964/56600 Training loss: 1.0520 0.4699 sec/batch
Epoch 41/100  Iteration 2

Epoch 41/100  Iteration 23062/56600 Training loss: 1.0497 0.4850 sec/batch
Epoch 41/100  Iteration 23063/56600 Training loss: 1.0497 0.4886 sec/batch
Epoch 41/100  Iteration 23064/56600 Training loss: 1.0498 0.4892 sec/batch
Epoch 41/100  Iteration 23065/56600 Training loss: 1.0497 0.4796 sec/batch
Epoch 41/100  Iteration 23066/56600 Training loss: 1.0497 0.4763 sec/batch
Epoch 41/100  Iteration 23067/56600 Training loss: 1.0496 0.4653 sec/batch
Epoch 41/100  Iteration 23068/56600 Training loss: 1.0495 0.4784 sec/batch
Epoch 41/100  Iteration 23069/56600 Training loss: 1.0494 0.4842 sec/batch
Epoch 41/100  Iteration 23070/56600 Training loss: 1.0494 0.4793 sec/batch
Epoch 41/100  Iteration 23071/56600 Training loss: 1.0493 0.4789 sec/batch
Epoch 41/100  Iteration 23072/56600 Training loss: 1.0492 0.4738 sec/batch
Epoch 41/100  Iteration 23073/56600 Training loss: 1.0491 0.4788 sec/batch
Epoch 41/100  Iteration 23074/56600 Training loss: 1.0491 0.4791 sec/batch
Epoch 41/100  Iteration 2

Epoch 41/100  Iteration 23172/56600 Training loss: 1.0476 0.4694 sec/batch
Epoch 41/100  Iteration 23173/56600 Training loss: 1.0475 0.4871 sec/batch
Epoch 41/100  Iteration 23174/56600 Training loss: 1.0474 0.4828 sec/batch
Epoch 41/100  Iteration 23175/56600 Training loss: 1.0474 0.4753 sec/batch
Epoch 41/100  Iteration 23176/56600 Training loss: 1.0475 0.4794 sec/batch
Epoch 41/100  Iteration 23177/56600 Training loss: 1.0475 0.4940 sec/batch
Epoch 41/100  Iteration 23178/56600 Training loss: 1.0474 0.4731 sec/batch
Epoch 41/100  Iteration 23179/56600 Training loss: 1.0475 0.4854 sec/batch
Epoch 41/100  Iteration 23180/56600 Training loss: 1.0475 0.4906 sec/batch
Epoch 41/100  Iteration 23181/56600 Training loss: 1.0474 0.4931 sec/batch
Epoch 41/100  Iteration 23182/56600 Training loss: 1.0474 0.4938 sec/batch
Epoch 41/100  Iteration 23183/56600 Training loss: 1.0474 0.4727 sec/batch
Epoch 41/100  Iteration 23184/56600 Training loss: 1.0474 0.4820 sec/batch
Epoch 41/100  Iteration 2

Epoch 42/100  Iteration 23282/56600 Training loss: 1.0404 0.4799 sec/batch
Epoch 42/100  Iteration 23283/56600 Training loss: 1.0403 0.4794 sec/batch
Epoch 42/100  Iteration 23284/56600 Training loss: 1.0402 0.4780 sec/batch
Epoch 42/100  Iteration 23285/56600 Training loss: 1.0398 0.4743 sec/batch
Epoch 42/100  Iteration 23286/56600 Training loss: 1.0397 0.4789 sec/batch
Epoch 42/100  Iteration 23287/56600 Training loss: 1.0396 0.4737 sec/batch
Epoch 42/100  Iteration 23288/56600 Training loss: 1.0396 0.4792 sec/batch
Epoch 42/100  Iteration 23289/56600 Training loss: 1.0399 0.4793 sec/batch
Epoch 42/100  Iteration 23290/56600 Training loss: 1.0402 0.4844 sec/batch
Epoch 42/100  Iteration 23291/56600 Training loss: 1.0399 0.4820 sec/batch
Epoch 42/100  Iteration 23292/56600 Training loss: 1.0397 0.4764 sec/batch
Epoch 42/100  Iteration 23293/56600 Training loss: 1.0393 0.4896 sec/batch
Epoch 42/100  Iteration 23294/56600 Training loss: 1.0388 0.4839 sec/batch
Epoch 42/100  Iteration 2

Epoch 42/100  Iteration 23392/56600 Training loss: 1.0466 0.4808 sec/batch
Epoch 42/100  Iteration 23393/56600 Training loss: 1.0468 0.4786 sec/batch
Epoch 42/100  Iteration 23394/56600 Training loss: 1.0470 0.4778 sec/batch
Epoch 42/100  Iteration 23395/56600 Training loss: 1.0469 0.4792 sec/batch
Epoch 42/100  Iteration 23396/56600 Training loss: 1.0469 0.4791 sec/batch
Epoch 42/100  Iteration 23397/56600 Training loss: 1.0470 0.4688 sec/batch
Epoch 42/100  Iteration 23398/56600 Training loss: 1.0469 0.4791 sec/batch
Epoch 42/100  Iteration 23399/56600 Training loss: 1.0471 0.4951 sec/batch
Epoch 42/100  Iteration 23400/56600 Training loss: 1.0471 0.4944 sec/batch
Epoch 42/100  Iteration 23401/56600 Training loss: 1.0473 0.4945 sec/batch
Epoch 42/100  Iteration 23402/56600 Training loss: 1.0473 0.4793 sec/batch
Epoch 42/100  Iteration 23403/56600 Training loss: 1.0474 0.4788 sec/batch
Epoch 42/100  Iteration 23404/56600 Training loss: 1.0475 0.4902 sec/batch
Epoch 42/100  Iteration 2

Epoch 42/100  Iteration 23502/56600 Training loss: 1.0502 0.4832 sec/batch
Epoch 42/100  Iteration 23503/56600 Training loss: 1.0502 0.4801 sec/batch
Epoch 42/100  Iteration 23504/56600 Training loss: 1.0503 0.4903 sec/batch
Epoch 42/100  Iteration 23505/56600 Training loss: 1.0502 0.4706 sec/batch
Epoch 42/100  Iteration 23506/56600 Training loss: 1.0502 0.4667 sec/batch
Epoch 42/100  Iteration 23507/56600 Training loss: 1.0501 0.4814 sec/batch
Epoch 42/100  Iteration 23508/56600 Training loss: 1.0501 0.4738 sec/batch
Epoch 42/100  Iteration 23509/56600 Training loss: 1.0501 0.4672 sec/batch
Epoch 42/100  Iteration 23510/56600 Training loss: 1.0500 0.4723 sec/batch
Epoch 42/100  Iteration 23511/56600 Training loss: 1.0500 0.4768 sec/batch
Epoch 42/100  Iteration 23512/56600 Training loss: 1.0499 0.4934 sec/batch
Epoch 42/100  Iteration 23513/56600 Training loss: 1.0498 0.4682 sec/batch
Epoch 42/100  Iteration 23514/56600 Training loss: 1.0496 0.4806 sec/batch
Epoch 42/100  Iteration 2

Epoch 42/100  Iteration 23612/56600 Training loss: 1.0474 0.4688 sec/batch
Epoch 42/100  Iteration 23613/56600 Training loss: 1.0473 0.4746 sec/batch
Epoch 42/100  Iteration 23614/56600 Training loss: 1.0473 0.4774 sec/batch
Epoch 42/100  Iteration 23615/56600 Training loss: 1.0472 0.4789 sec/batch
Epoch 42/100  Iteration 23616/56600 Training loss: 1.0471 0.4801 sec/batch
Epoch 42/100  Iteration 23617/56600 Training loss: 1.0470 0.4732 sec/batch
Epoch 42/100  Iteration 23618/56600 Training loss: 1.0470 0.4786 sec/batch
Epoch 42/100  Iteration 23619/56600 Training loss: 1.0470 0.4795 sec/batch
Epoch 42/100  Iteration 23620/56600 Training loss: 1.0470 0.4941 sec/batch
Epoch 42/100  Iteration 23621/56600 Training loss: 1.0470 0.4793 sec/batch
Epoch 42/100  Iteration 23622/56600 Training loss: 1.0469 0.4794 sec/batch
Epoch 42/100  Iteration 23623/56600 Training loss: 1.0470 0.4824 sec/batch
Epoch 42/100  Iteration 23624/56600 Training loss: 1.0470 0.4683 sec/batch
Epoch 42/100  Iteration 2

Epoch 42/100  Iteration 23722/56600 Training loss: 1.0447 0.4631 sec/batch
Epoch 42/100  Iteration 23723/56600 Training loss: 1.0447 0.4793 sec/batch
Epoch 42/100  Iteration 23724/56600 Training loss: 1.0448 0.4791 sec/batch
Epoch 42/100  Iteration 23725/56600 Training loss: 1.0448 0.4844 sec/batch
Epoch 42/100  Iteration 23726/56600 Training loss: 1.0448 0.4717 sec/batch
Epoch 42/100  Iteration 23727/56600 Training loss: 1.0448 0.4863 sec/batch
Epoch 42/100  Iteration 23728/56600 Training loss: 1.0448 0.4796 sec/batch
Epoch 42/100  Iteration 23729/56600 Training loss: 1.0448 0.4790 sec/batch
Epoch 42/100  Iteration 23730/56600 Training loss: 1.0449 0.4742 sec/batch
Epoch 42/100  Iteration 23731/56600 Training loss: 1.0449 0.4787 sec/batch
Epoch 42/100  Iteration 23732/56600 Training loss: 1.0449 0.4800 sec/batch
Epoch 42/100  Iteration 23733/56600 Training loss: 1.0449 0.4789 sec/batch
Epoch 42/100  Iteration 23734/56600 Training loss: 1.0449 0.4763 sec/batch
Epoch 42/100  Iteration 2

Epoch 43/100  Iteration 23832/56600 Training loss: 1.0421 0.4656 sec/batch
Epoch 43/100  Iteration 23833/56600 Training loss: 1.0416 0.4793 sec/batch
Epoch 43/100  Iteration 23834/56600 Training loss: 1.0412 0.4788 sec/batch
Epoch 43/100  Iteration 23835/56600 Training loss: 1.0410 0.4761 sec/batch
Epoch 43/100  Iteration 23836/56600 Training loss: 1.0402 0.4928 sec/batch
Epoch 43/100  Iteration 23837/56600 Training loss: 1.0396 0.4740 sec/batch
Epoch 43/100  Iteration 23838/56600 Training loss: 1.0394 0.4836 sec/batch
Epoch 43/100  Iteration 23839/56600 Training loss: 1.0391 0.4793 sec/batch
Epoch 43/100  Iteration 23840/56600 Training loss: 1.0395 0.4793 sec/batch
Epoch 43/100  Iteration 23841/56600 Training loss: 1.0397 0.4865 sec/batch
Epoch 43/100  Iteration 23842/56600 Training loss: 1.0393 0.4713 sec/batch
Epoch 43/100  Iteration 23843/56600 Training loss: 1.0392 0.4787 sec/batch
Epoch 43/100  Iteration 23844/56600 Training loss: 1.0385 0.4740 sec/batch
Epoch 43/100  Iteration 2

Epoch 43/100  Iteration 23942/56600 Training loss: 1.0437 0.4760 sec/batch
Epoch 43/100  Iteration 23943/56600 Training loss: 1.0435 0.4667 sec/batch
Epoch 43/100  Iteration 23944/56600 Training loss: 1.0435 0.4638 sec/batch
Epoch 43/100  Iteration 23945/56600 Training loss: 1.0435 0.4784 sec/batch
Epoch 43/100  Iteration 23946/56600 Training loss: 1.0436 0.4773 sec/batch
Epoch 43/100  Iteration 23947/56600 Training loss: 1.0434 0.4868 sec/batch
Epoch 43/100  Iteration 23948/56600 Training loss: 1.0433 0.4789 sec/batch
Epoch 43/100  Iteration 23949/56600 Training loss: 1.0433 0.4794 sec/batch
Epoch 43/100  Iteration 23950/56600 Training loss: 1.0432 0.4888 sec/batch
Epoch 43/100  Iteration 23951/56600 Training loss: 1.0432 0.4629 sec/batch
Epoch 43/100  Iteration 23952/56600 Training loss: 1.0432 0.4737 sec/batch
Epoch 43/100  Iteration 23953/56600 Training loss: 1.0432 0.4796 sec/batch
Epoch 43/100  Iteration 23954/56600 Training loss: 1.0433 0.4787 sec/batch
Epoch 43/100  Iteration 2

Epoch 43/100  Iteration 24051/56600 Training loss: 1.0478 0.4799 sec/batch
Epoch 43/100  Iteration 24052/56600 Training loss: 1.0478 0.4739 sec/batch
Epoch 43/100  Iteration 24053/56600 Training loss: 1.0477 0.4749 sec/batch
Epoch 43/100  Iteration 24054/56600 Training loss: 1.0476 0.4695 sec/batch
Epoch 43/100  Iteration 24055/56600 Training loss: 1.0476 0.4740 sec/batch
Epoch 43/100  Iteration 24056/56600 Training loss: 1.0475 0.4627 sec/batch
Epoch 43/100  Iteration 24057/56600 Training loss: 1.0475 0.4743 sec/batch
Epoch 43/100  Iteration 24058/56600 Training loss: 1.0476 0.4797 sec/batch
Epoch 43/100  Iteration 24059/56600 Training loss: 1.0477 0.4730 sec/batch
Epoch 43/100  Iteration 24060/56600 Training loss: 1.0477 0.4636 sec/batch
Epoch 43/100  Iteration 24061/56600 Training loss: 1.0476 0.4791 sec/batch
Epoch 43/100  Iteration 24062/56600 Training loss: 1.0477 0.4792 sec/batch
Epoch 43/100  Iteration 24063/56600 Training loss: 1.0478 0.4845 sec/batch
Epoch 43/100  Iteration 2

Epoch 43/100  Iteration 24161/56600 Training loss: 1.0454 0.4766 sec/batch
Epoch 43/100  Iteration 24162/56600 Training loss: 1.0454 0.4797 sec/batch
Epoch 43/100  Iteration 24163/56600 Training loss: 1.0453 0.4891 sec/batch
Epoch 43/100  Iteration 24164/56600 Training loss: 1.0453 0.4779 sec/batch
Epoch 43/100  Iteration 24165/56600 Training loss: 1.0453 0.4794 sec/batch
Epoch 43/100  Iteration 24166/56600 Training loss: 1.0453 0.4795 sec/batch
Epoch 43/100  Iteration 24167/56600 Training loss: 1.0453 0.4794 sec/batch
Epoch 43/100  Iteration 24168/56600 Training loss: 1.0453 0.4627 sec/batch
Epoch 43/100  Iteration 24169/56600 Training loss: 1.0454 0.4749 sec/batch
Epoch 43/100  Iteration 24170/56600 Training loss: 1.0454 0.4688 sec/batch
Epoch 43/100  Iteration 24171/56600 Training loss: 1.0454 0.4760 sec/batch
Epoch 43/100  Iteration 24172/56600 Training loss: 1.0454 0.4866 sec/batch
Epoch 43/100  Iteration 24173/56600 Training loss: 1.0453 0.4780 sec/batch
Epoch 43/100  Iteration 2

Epoch 43/100  Iteration 24271/56600 Training loss: 1.0427 0.4677 sec/batch
Epoch 43/100  Iteration 24272/56600 Training loss: 1.0426 0.4638 sec/batch
Epoch 43/100  Iteration 24273/56600 Training loss: 1.0426 0.4798 sec/batch
Epoch 43/100  Iteration 24274/56600 Training loss: 1.0425 0.4785 sec/batch
Epoch 43/100  Iteration 24275/56600 Training loss: 1.0425 0.4933 sec/batch
Epoch 43/100  Iteration 24276/56600 Training loss: 1.0424 0.4759 sec/batch
Epoch 43/100  Iteration 24277/56600 Training loss: 1.0424 0.4787 sec/batch
Epoch 43/100  Iteration 24278/56600 Training loss: 1.0424 0.4784 sec/batch
Epoch 43/100  Iteration 24279/56600 Training loss: 1.0423 0.4798 sec/batch
Epoch 43/100  Iteration 24280/56600 Training loss: 1.0422 0.4785 sec/batch
Epoch 43/100  Iteration 24281/56600 Training loss: 1.0422 0.4808 sec/batch
Epoch 43/100  Iteration 24282/56600 Training loss: 1.0422 0.4780 sec/batch
Epoch 43/100  Iteration 24283/56600 Training loss: 1.0422 0.4736 sec/batch
Epoch 43/100  Iteration 2

Epoch 44/100  Iteration 24381/56600 Training loss: 1.0482 0.4894 sec/batch
Epoch 44/100  Iteration 24382/56600 Training loss: 1.0479 0.4787 sec/batch
Epoch 44/100  Iteration 24383/56600 Training loss: 1.0486 0.4797 sec/batch
Epoch 44/100  Iteration 24384/56600 Training loss: 1.0490 0.4862 sec/batch
Epoch 44/100  Iteration 24385/56600 Training loss: 1.0482 0.4827 sec/batch
Epoch 44/100  Iteration 24386/56600 Training loss: 1.0483 0.4969 sec/batch
Epoch 44/100  Iteration 24387/56600 Training loss: 1.0480 0.4944 sec/batch
Epoch 44/100  Iteration 24388/56600 Training loss: 1.0474 0.4841 sec/batch
Epoch 44/100  Iteration 24389/56600 Training loss: 1.0469 0.4743 sec/batch
Epoch 44/100  Iteration 24390/56600 Training loss: 1.0460 0.4789 sec/batch
Epoch 44/100  Iteration 24391/56600 Training loss: 1.0453 0.4894 sec/batch
Epoch 44/100  Iteration 24392/56600 Training loss: 1.0445 0.4857 sec/batch
Epoch 44/100  Iteration 24393/56600 Training loss: 1.0440 0.4784 sec/batch
Epoch 44/100  Iteration 2

Epoch 44/100  Iteration 24491/56600 Training loss: 1.0414 0.4785 sec/batch
Epoch 44/100  Iteration 24492/56600 Training loss: 1.0414 0.4774 sec/batch
Epoch 44/100  Iteration 24493/56600 Training loss: 1.0416 0.4938 sec/batch
Epoch 44/100  Iteration 24494/56600 Training loss: 1.0417 0.4876 sec/batch
Epoch 44/100  Iteration 24495/56600 Training loss: 1.0419 0.4883 sec/batch
Epoch 44/100  Iteration 24496/56600 Training loss: 1.0420 0.4952 sec/batch
Epoch 44/100  Iteration 24497/56600 Training loss: 1.0420 0.4870 sec/batch
Epoch 44/100  Iteration 24498/56600 Training loss: 1.0419 0.4881 sec/batch
Epoch 44/100  Iteration 24499/56600 Training loss: 1.0419 0.4865 sec/batch
Epoch 44/100  Iteration 24500/56600 Training loss: 1.0419 0.4965 sec/batch
Epoch 44/100  Iteration 24501/56600 Training loss: 1.0419 0.4791 sec/batch
Epoch 44/100  Iteration 24502/56600 Training loss: 1.0418 0.4800 sec/batch
Epoch 44/100  Iteration 24503/56600 Training loss: 1.0418 0.4921 sec/batch
Epoch 44/100  Iteration 2

Epoch 44/100  Iteration 24601/56600 Training loss: 1.0459 0.4951 sec/batch
Epoch 44/100  Iteration 24602/56600 Training loss: 1.0458 0.4954 sec/batch
Epoch 44/100  Iteration 24603/56600 Training loss: 1.0457 0.4935 sec/batch
Epoch 44/100  Iteration 24604/56600 Training loss: 1.0457 0.4954 sec/batch
Epoch 44/100  Iteration 24605/56600 Training loss: 1.0456 0.4944 sec/batch
Epoch 44/100  Iteration 24606/56600 Training loss: 1.0454 0.4947 sec/batch
Epoch 44/100  Iteration 24607/56600 Training loss: 1.0453 0.4905 sec/batch
Epoch 44/100  Iteration 24608/56600 Training loss: 1.0453 0.4935 sec/batch
Epoch 44/100  Iteration 24609/56600 Training loss: 1.0453 0.5023 sec/batch
Epoch 44/100  Iteration 24610/56600 Training loss: 1.0453 0.4738 sec/batch
Epoch 44/100  Iteration 24611/56600 Training loss: 1.0453 0.4757 sec/batch
Epoch 44/100  Iteration 24612/56600 Training loss: 1.0452 0.4859 sec/batch
Epoch 44/100  Iteration 24613/56600 Training loss: 1.0452 0.4948 sec/batch
Epoch 44/100  Iteration 2

Epoch 44/100  Iteration 24711/56600 Training loss: 1.0430 0.4781 sec/batch
Epoch 44/100  Iteration 24712/56600 Training loss: 1.0429 0.4800 sec/batch
Epoch 44/100  Iteration 24713/56600 Training loss: 1.0428 0.4864 sec/batch
Epoch 44/100  Iteration 24714/56600 Training loss: 1.0428 0.4712 sec/batch
Epoch 44/100  Iteration 24715/56600 Training loss: 1.0428 0.4797 sec/batch
Epoch 44/100  Iteration 24716/56600 Training loss: 1.0429 0.4737 sec/batch
Epoch 44/100  Iteration 24717/56600 Training loss: 1.0428 0.4846 sec/batch
Epoch 44/100  Iteration 24718/56600 Training loss: 1.0428 0.4690 sec/batch
Epoch 44/100  Iteration 24719/56600 Training loss: 1.0427 0.4647 sec/batch
Epoch 44/100  Iteration 24720/56600 Training loss: 1.0427 0.4682 sec/batch
Epoch 44/100  Iteration 24721/56600 Training loss: 1.0427 0.4584 sec/batch
Epoch 44/100  Iteration 24722/56600 Training loss: 1.0427 0.4780 sec/batch
Epoch 44/100  Iteration 24723/56600 Training loss: 1.0426 0.4871 sec/batch
Epoch 44/100  Iteration 2

Epoch 44/100  Iteration 24821/56600 Training loss: 1.0406 0.4647 sec/batch
Epoch 44/100  Iteration 24822/56600 Training loss: 1.0406 0.4780 sec/batch
Epoch 44/100  Iteration 24823/56600 Training loss: 1.0405 0.4786 sec/batch
Epoch 44/100  Iteration 24824/56600 Training loss: 1.0405 0.4794 sec/batch
Epoch 44/100  Iteration 24825/56600 Training loss: 1.0404 0.4924 sec/batch
Epoch 44/100  Iteration 24826/56600 Training loss: 1.0405 0.4844 sec/batch
Epoch 44/100  Iteration 24827/56600 Training loss: 1.0404 0.4873 sec/batch
Epoch 44/100  Iteration 24828/56600 Training loss: 1.0404 0.4816 sec/batch
Epoch 44/100  Iteration 24829/56600 Training loss: 1.0404 0.4856 sec/batch
Epoch 44/100  Iteration 24830/56600 Training loss: 1.0404 0.4886 sec/batch
Epoch 44/100  Iteration 24831/56600 Training loss: 1.0404 0.4888 sec/batch
Epoch 44/100  Iteration 24832/56600 Training loss: 1.0405 0.4801 sec/batch
Epoch 44/100  Iteration 24833/56600 Training loss: 1.0404 0.4947 sec/batch
Epoch 44/100  Iteration 2

Epoch 45/100  Iteration 24931/56600 Training loss: 1.0418 0.4788 sec/batch
Epoch 45/100  Iteration 24932/56600 Training loss: 1.0416 0.4794 sec/batch
Epoch 45/100  Iteration 24933/56600 Training loss: 1.0412 0.4797 sec/batch
Epoch 45/100  Iteration 24934/56600 Training loss: 1.0408 0.4785 sec/batch
Epoch 45/100  Iteration 24935/56600 Training loss: 1.0402 0.4797 sec/batch
Epoch 45/100  Iteration 24936/56600 Training loss: 1.0396 0.4829 sec/batch
Epoch 45/100  Iteration 24937/56600 Training loss: 1.0399 0.4739 sec/batch
Epoch 45/100  Iteration 24938/56600 Training loss: 1.0411 0.4938 sec/batch
Epoch 45/100  Iteration 24939/56600 Training loss: 1.0427 0.4892 sec/batch
Epoch 45/100  Iteration 24940/56600 Training loss: 1.0433 0.4804 sec/batch
Epoch 45/100  Iteration 24941/56600 Training loss: 1.0444 0.4731 sec/batch
Epoch 45/100  Iteration 24942/56600 Training loss: 1.0448 0.4693 sec/batch
Epoch 45/100  Iteration 24943/56600 Training loss: 1.0449 0.4792 sec/batch
Epoch 45/100  Iteration 2

Epoch 45/100  Iteration 25041/56600 Training loss: 1.0363 0.4791 sec/batch
Epoch 45/100  Iteration 25042/56600 Training loss: 1.0367 0.4795 sec/batch
Epoch 45/100  Iteration 25043/56600 Training loss: 1.0369 0.4794 sec/batch
Epoch 45/100  Iteration 25044/56600 Training loss: 1.0369 0.4948 sec/batch
Epoch 45/100  Iteration 25045/56600 Training loss: 1.0371 0.4792 sec/batch
Epoch 45/100  Iteration 25046/56600 Training loss: 1.0372 0.4879 sec/batch
Epoch 45/100  Iteration 25047/56600 Training loss: 1.0374 0.4752 sec/batch
Epoch 45/100  Iteration 25048/56600 Training loss: 1.0373 0.4875 sec/batch
Epoch 45/100  Iteration 25049/56600 Training loss: 1.0375 0.4725 sec/batch
Epoch 45/100  Iteration 25050/56600 Training loss: 1.0374 0.4816 sec/batch
Epoch 45/100  Iteration 25051/56600 Training loss: 1.0373 0.4937 sec/batch
Epoch 45/100  Iteration 25052/56600 Training loss: 1.0375 0.4796 sec/batch
Epoch 45/100  Iteration 25053/56600 Training loss: 1.0376 0.4734 sec/batch
Epoch 45/100  Iteration 2

Epoch 45/100  Iteration 25151/56600 Training loss: 1.0442 0.4778 sec/batch
Epoch 45/100  Iteration 25152/56600 Training loss: 1.0442 0.4607 sec/batch
Epoch 45/100  Iteration 25153/56600 Training loss: 1.0442 0.4756 sec/batch
Epoch 45/100  Iteration 25154/56600 Training loss: 1.0443 0.4801 sec/batch
Epoch 45/100  Iteration 25155/56600 Training loss: 1.0441 0.4639 sec/batch
Epoch 45/100  Iteration 25156/56600 Training loss: 1.0440 0.4784 sec/batch
Epoch 45/100  Iteration 25157/56600 Training loss: 1.0440 0.4797 sec/batch
Epoch 45/100  Iteration 25158/56600 Training loss: 1.0440 0.4900 sec/batch
Epoch 45/100  Iteration 25159/56600 Training loss: 1.0439 0.4727 sec/batch
Epoch 45/100  Iteration 25160/56600 Training loss: 1.0439 0.4692 sec/batch
Epoch 45/100  Iteration 25161/56600 Training loss: 1.0438 0.4895 sec/batch
Epoch 45/100  Iteration 25162/56600 Training loss: 1.0437 0.4750 sec/batch
Epoch 45/100  Iteration 25163/56600 Training loss: 1.0436 0.4820 sec/batch
Epoch 45/100  Iteration 2

Epoch 45/100  Iteration 25261/56600 Training loss: 1.0409 0.4795 sec/batch
Epoch 45/100  Iteration 25262/56600 Training loss: 1.0409 0.4794 sec/batch
Epoch 45/100  Iteration 25263/56600 Training loss: 1.0409 0.4739 sec/batch
Epoch 45/100  Iteration 25264/56600 Training loss: 1.0409 0.4795 sec/batch
Epoch 45/100  Iteration 25265/56600 Training loss: 1.0409 0.4939 sec/batch
Epoch 45/100  Iteration 25266/56600 Training loss: 1.0409 0.4799 sec/batch
Epoch 45/100  Iteration 25267/56600 Training loss: 1.0408 0.4936 sec/batch
Epoch 45/100  Iteration 25268/56600 Training loss: 1.0408 0.4733 sec/batch
Epoch 45/100  Iteration 25269/56600 Training loss: 1.0407 0.4774 sec/batch
Epoch 45/100  Iteration 25270/56600 Training loss: 1.0407 0.4801 sec/batch
Epoch 45/100  Iteration 25271/56600 Training loss: 1.0407 0.4699 sec/batch
Epoch 45/100  Iteration 25272/56600 Training loss: 1.0406 0.4723 sec/batch
Epoch 45/100  Iteration 25273/56600 Training loss: 1.0406 0.4791 sec/batch
Epoch 45/100  Iteration 2

Epoch 45/100  Iteration 25371/56600 Training loss: 1.0376 0.4688 sec/batch
Epoch 45/100  Iteration 25372/56600 Training loss: 1.0377 0.4768 sec/batch
Epoch 45/100  Iteration 25373/56600 Training loss: 1.0377 0.4818 sec/batch
Epoch 45/100  Iteration 25374/56600 Training loss: 1.0377 0.4795 sec/batch
Epoch 45/100  Iteration 25375/56600 Training loss: 1.0378 0.4789 sec/batch
Epoch 45/100  Iteration 25376/56600 Training loss: 1.0380 0.4658 sec/batch
Epoch 45/100  Iteration 25377/56600 Training loss: 1.0380 0.4768 sec/batch
Epoch 45/100  Iteration 25378/56600 Training loss: 1.0379 0.4736 sec/batch
Epoch 45/100  Iteration 25379/56600 Training loss: 1.0379 0.4638 sec/batch
Epoch 45/100  Iteration 25380/56600 Training loss: 1.0379 0.4827 sec/batch
Epoch 45/100  Iteration 25381/56600 Training loss: 1.0379 0.4749 sec/batch
Epoch 45/100  Iteration 25382/56600 Training loss: 1.0378 0.4878 sec/batch
Epoch 45/100  Iteration 25383/56600 Training loss: 1.0378 0.4808 sec/batch
Epoch 45/100  Iteration 2

Epoch 46/100  Iteration 25481/56600 Training loss: 1.0627 0.4794 sec/batch
Epoch 46/100  Iteration 25482/56600 Training loss: 1.0630 0.4788 sec/batch
Epoch 46/100  Iteration 25483/56600 Training loss: 1.0624 0.4794 sec/batch
Epoch 46/100  Iteration 25484/56600 Training loss: 1.0602 0.4869 sec/batch
Epoch 46/100  Iteration 25485/56600 Training loss: 1.0584 0.4597 sec/batch
Epoch 46/100  Iteration 25486/56600 Training loss: 1.0575 0.4690 sec/batch
Epoch 46/100  Iteration 25487/56600 Training loss: 1.0555 0.4641 sec/batch
Epoch 46/100  Iteration 25488/56600 Training loss: 1.0536 0.4876 sec/batch
Epoch 46/100  Iteration 25489/56600 Training loss: 1.0511 0.4654 sec/batch
Epoch 46/100  Iteration 25490/56600 Training loss: 1.0498 0.4723 sec/batch
Epoch 46/100  Iteration 25491/56600 Training loss: 1.0483 0.4708 sec/batch
Epoch 46/100  Iteration 25492/56600 Training loss: 1.0460 0.4630 sec/batch
Epoch 46/100  Iteration 25493/56600 Training loss: 1.0448 0.4808 sec/batch
Epoch 46/100  Iteration 2

Epoch 46/100  Iteration 25591/56600 Training loss: 1.0341 0.4678 sec/batch
Epoch 46/100  Iteration 25592/56600 Training loss: 1.0346 0.4605 sec/batch
Epoch 46/100  Iteration 25593/56600 Training loss: 1.0345 0.4774 sec/batch
Epoch 46/100  Iteration 25594/56600 Training loss: 1.0345 0.4627 sec/batch
Epoch 46/100  Iteration 25595/56600 Training loss: 1.0348 0.4637 sec/batch
Epoch 46/100  Iteration 25596/56600 Training loss: 1.0350 0.4794 sec/batch
Epoch 46/100  Iteration 25597/56600 Training loss: 1.0350 0.4844 sec/batch
Epoch 46/100  Iteration 25598/56600 Training loss: 1.0351 0.4714 sec/batch
Epoch 46/100  Iteration 25599/56600 Training loss: 1.0352 0.4657 sec/batch
Epoch 46/100  Iteration 25600/56600 Training loss: 1.0351 0.4637 sec/batch
Epoch 46/100  Iteration 25601/56600 Training loss: 1.0352 0.4846 sec/batch
Epoch 46/100  Iteration 25602/56600 Training loss: 1.0351 0.4736 sec/batch
Epoch 46/100  Iteration 25603/56600 Training loss: 1.0350 0.4791 sec/batch
Epoch 46/100  Iteration 2

Epoch 46/100  Iteration 25701/56600 Training loss: 1.0401 0.4641 sec/batch
Epoch 46/100  Iteration 25702/56600 Training loss: 1.0402 0.4787 sec/batch
Epoch 46/100  Iteration 25703/56600 Training loss: 1.0404 0.4739 sec/batch
Epoch 46/100  Iteration 25704/56600 Training loss: 1.0404 0.4637 sec/batch
Epoch 46/100  Iteration 25705/56600 Training loss: 1.0405 0.4687 sec/batch
Epoch 46/100  Iteration 25706/56600 Training loss: 1.0406 0.4790 sec/batch
Epoch 46/100  Iteration 25707/56600 Training loss: 1.0407 0.4879 sec/batch
Epoch 46/100  Iteration 25708/56600 Training loss: 1.0408 0.4799 sec/batch
Epoch 46/100  Iteration 25709/56600 Training loss: 1.0409 0.4665 sec/batch
Epoch 46/100  Iteration 25710/56600 Training loss: 1.0411 0.4878 sec/batch
Epoch 46/100  Iteration 25711/56600 Training loss: 1.0413 0.4797 sec/batch
Epoch 46/100  Iteration 25712/56600 Training loss: 1.0415 0.4774 sec/batch
Epoch 46/100  Iteration 25713/56600 Training loss: 1.0418 0.4913 sec/batch
Epoch 46/100  Iteration 2

Epoch 46/100  Iteration 25811/56600 Training loss: 1.0395 0.4712 sec/batch
Epoch 46/100  Iteration 25812/56600 Training loss: 1.0394 0.4845 sec/batch
Epoch 46/100  Iteration 25813/56600 Training loss: 1.0393 0.4718 sec/batch
Epoch 46/100  Iteration 25814/56600 Training loss: 1.0393 0.4944 sec/batch
Epoch 46/100  Iteration 25815/56600 Training loss: 1.0393 0.4738 sec/batch
Epoch 46/100  Iteration 25816/56600 Training loss: 1.0393 0.4798 sec/batch
Epoch 46/100  Iteration 25817/56600 Training loss: 1.0392 0.4786 sec/batch
Epoch 46/100  Iteration 25818/56600 Training loss: 1.0392 0.4797 sec/batch
Epoch 46/100  Iteration 25819/56600 Training loss: 1.0391 0.4943 sec/batch
Epoch 46/100  Iteration 25820/56600 Training loss: 1.0390 0.4798 sec/batch
Epoch 46/100  Iteration 25821/56600 Training loss: 1.0388 0.4857 sec/batch
Epoch 46/100  Iteration 25822/56600 Training loss: 1.0387 0.4844 sec/batch
Epoch 46/100  Iteration 25823/56600 Training loss: 1.0387 0.4867 sec/batch
Epoch 46/100  Iteration 2

Epoch 46/100  Iteration 25921/56600 Training loss: 1.0357 0.4804 sec/batch
Epoch 46/100  Iteration 25922/56600 Training loss: 1.0357 0.4619 sec/batch
Epoch 46/100  Iteration 25923/56600 Training loss: 1.0357 0.4746 sec/batch
Epoch 46/100  Iteration 25924/56600 Training loss: 1.0357 0.4844 sec/batch
Epoch 46/100  Iteration 25925/56600 Training loss: 1.0357 0.4755 sec/batch
Epoch 46/100  Iteration 25926/56600 Training loss: 1.0356 0.4667 sec/batch
Epoch 46/100  Iteration 25927/56600 Training loss: 1.0356 0.4737 sec/batch
Epoch 46/100  Iteration 25928/56600 Training loss: 1.0355 0.4848 sec/batch
Epoch 46/100  Iteration 25929/56600 Training loss: 1.0355 0.4788 sec/batch
Epoch 46/100  Iteration 25930/56600 Training loss: 1.0355 0.4796 sec/batch
Epoch 46/100  Iteration 25931/56600 Training loss: 1.0355 0.4740 sec/batch
Epoch 46/100  Iteration 25932/56600 Training loss: 1.0355 0.4791 sec/batch
Epoch 46/100  Iteration 25933/56600 Training loss: 1.0355 0.4763 sec/batch
Epoch 46/100  Iteration 2

Epoch 46/100  Iteration 26030/56600 Training loss: 1.0349 0.4790 sec/batch
Epoch 46/100  Iteration 26031/56600 Training loss: 1.0349 0.4638 sec/batch
Epoch 46/100  Iteration 26032/56600 Training loss: 1.0349 0.4794 sec/batch
Epoch 46/100  Iteration 26033/56600 Training loss: 1.0349 0.4785 sec/batch
Epoch 46/100  Iteration 26034/56600 Training loss: 1.0350 0.4793 sec/batch
Epoch 46/100  Iteration 26035/56600 Training loss: 1.0351 0.4793 sec/batch
Epoch 46/100  Iteration 26036/56600 Training loss: 1.0351 0.4688 sec/batch
Epoch 47/100  Iteration 26037/56600 Training loss: 1.1542 0.4792 sec/batch
Epoch 47/100  Iteration 26038/56600 Training loss: 1.1160 0.4793 sec/batch
Epoch 47/100  Iteration 26039/56600 Training loss: 1.1033 0.4788 sec/batch
Epoch 47/100  Iteration 26040/56600 Training loss: 1.0921 0.4797 sec/batch
Epoch 47/100  Iteration 26041/56600 Training loss: 1.0776 0.4628 sec/batch
Epoch 47/100  Iteration 26042/56600 Training loss: 1.0732 0.4638 sec/batch
Epoch 47/100  Iteration 2

Epoch 47/100  Iteration 26140/56600 Training loss: 1.0268 0.4726 sec/batch
Epoch 47/100  Iteration 26141/56600 Training loss: 1.0268 0.4734 sec/batch
Epoch 47/100  Iteration 26142/56600 Training loss: 1.0268 0.4803 sec/batch
Epoch 47/100  Iteration 26143/56600 Training loss: 1.0265 0.4741 sec/batch
Epoch 47/100  Iteration 26144/56600 Training loss: 1.0266 0.4864 sec/batch
Epoch 47/100  Iteration 26145/56600 Training loss: 1.0268 0.4631 sec/batch
Epoch 47/100  Iteration 26146/56600 Training loss: 1.0269 0.4598 sec/batch
Epoch 47/100  Iteration 26147/56600 Training loss: 1.0269 0.4679 sec/batch
Epoch 47/100  Iteration 26148/56600 Training loss: 1.0269 0.4592 sec/batch
Epoch 47/100  Iteration 26149/56600 Training loss: 1.0273 0.4873 sec/batch
Epoch 47/100  Iteration 26150/56600 Training loss: 1.0275 0.4719 sec/batch
Epoch 47/100  Iteration 26151/56600 Training loss: 1.0278 0.4788 sec/batch
Epoch 47/100  Iteration 26152/56600 Training loss: 1.0280 0.4795 sec/batch
Epoch 47/100  Iteration 2

Epoch 47/100  Iteration 26250/56600 Training loss: 1.0358 0.4844 sec/batch
Epoch 47/100  Iteration 26251/56600 Training loss: 1.0358 0.4756 sec/batch
Epoch 47/100  Iteration 26252/56600 Training loss: 1.0358 0.4716 sec/batch
Epoch 47/100  Iteration 26253/56600 Training loss: 1.0357 0.4827 sec/batch
Epoch 47/100  Iteration 26254/56600 Training loss: 1.0357 0.4697 sec/batch
Epoch 47/100  Iteration 26255/56600 Training loss: 1.0359 0.4731 sec/batch
Epoch 47/100  Iteration 26256/56600 Training loss: 1.0360 0.4814 sec/batch
Epoch 47/100  Iteration 26257/56600 Training loss: 1.0361 0.4786 sec/batch
Epoch 47/100  Iteration 26258/56600 Training loss: 1.0361 0.4791 sec/batch
Epoch 47/100  Iteration 26259/56600 Training loss: 1.0361 0.4784 sec/batch
Epoch 47/100  Iteration 26260/56600 Training loss: 1.0363 0.4878 sec/batch
Epoch 47/100  Iteration 26261/56600 Training loss: 1.0364 0.4629 sec/batch
Epoch 47/100  Iteration 26262/56600 Training loss: 1.0365 0.4814 sec/batch
Epoch 47/100  Iteration 2

Epoch 47/100  Iteration 26360/56600 Training loss: 1.0367 0.4950 sec/batch
Epoch 47/100  Iteration 26361/56600 Training loss: 1.0366 0.4800 sec/batch
Epoch 47/100  Iteration 26362/56600 Training loss: 1.0367 0.4864 sec/batch
Epoch 47/100  Iteration 26363/56600 Training loss: 1.0366 0.4696 sec/batch
Epoch 47/100  Iteration 26364/56600 Training loss: 1.0365 0.4856 sec/batch
Epoch 47/100  Iteration 26365/56600 Training loss: 1.0365 0.4816 sec/batch
Epoch 47/100  Iteration 26366/56600 Training loss: 1.0365 0.4722 sec/batch
Epoch 47/100  Iteration 26367/56600 Training loss: 1.0365 0.4866 sec/batch
Epoch 47/100  Iteration 26368/56600 Training loss: 1.0366 0.4894 sec/batch
Epoch 47/100  Iteration 26369/56600 Training loss: 1.0366 0.4796 sec/batch
Epoch 47/100  Iteration 26370/56600 Training loss: 1.0365 0.4871 sec/batch
Epoch 47/100  Iteration 26371/56600 Training loss: 1.0365 0.4876 sec/batch
Epoch 47/100  Iteration 26372/56600 Training loss: 1.0365 0.4781 sec/batch
Epoch 47/100  Iteration 2

Epoch 47/100  Iteration 26470/56600 Training loss: 1.0337 0.4844 sec/batch
Epoch 47/100  Iteration 26471/56600 Training loss: 1.0337 0.4764 sec/batch
Epoch 47/100  Iteration 26472/56600 Training loss: 1.0337 0.4816 sec/batch
Epoch 47/100  Iteration 26473/56600 Training loss: 1.0337 0.4947 sec/batch
Epoch 47/100  Iteration 26474/56600 Training loss: 1.0336 0.4794 sec/batch
Epoch 47/100  Iteration 26475/56600 Training loss: 1.0334 0.4799 sec/batch
Epoch 47/100  Iteration 26476/56600 Training loss: 1.0334 0.4789 sec/batch
Epoch 47/100  Iteration 26477/56600 Training loss: 1.0333 0.4789 sec/batch
Epoch 47/100  Iteration 26478/56600 Training loss: 1.0332 0.4790 sec/batch
Epoch 47/100  Iteration 26479/56600 Training loss: 1.0333 0.4899 sec/batch
Epoch 47/100  Iteration 26480/56600 Training loss: 1.0333 0.4885 sec/batch
Epoch 47/100  Iteration 26481/56600 Training loss: 1.0331 0.4774 sec/batch
Epoch 47/100  Iteration 26482/56600 Training loss: 1.0331 0.4707 sec/batch
Epoch 47/100  Iteration 2

Epoch 47/100  Iteration 26580/56600 Training loss: 1.0320 0.4630 sec/batch
Epoch 47/100  Iteration 26581/56600 Training loss: 1.0320 0.4788 sec/batch
Epoch 47/100  Iteration 26582/56600 Training loss: 1.0319 0.4800 sec/batch
Epoch 47/100  Iteration 26583/56600 Training loss: 1.0319 0.4844 sec/batch
Epoch 47/100  Iteration 26584/56600 Training loss: 1.0319 0.4711 sec/batch
Epoch 47/100  Iteration 26585/56600 Training loss: 1.0319 0.4789 sec/batch
Epoch 47/100  Iteration 26586/56600 Training loss: 1.0319 0.4714 sec/batch
Epoch 47/100  Iteration 26587/56600 Training loss: 1.0319 0.4580 sec/batch
Epoch 47/100  Iteration 26588/56600 Training loss: 1.0319 0.4650 sec/batch
Epoch 47/100  Iteration 26589/56600 Training loss: 1.0319 0.4783 sec/batch
Epoch 47/100  Iteration 26590/56600 Training loss: 1.0319 0.4749 sec/batch
Epoch 47/100  Iteration 26591/56600 Training loss: 1.0318 0.4844 sec/batch
Epoch 47/100  Iteration 26592/56600 Training loss: 1.0319 0.4726 sec/batch
Epoch 47/100  Iteration 2

Epoch 48/100  Iteration 26690/56600 Training loss: 1.0240 0.4795 sec/batch
Epoch 48/100  Iteration 26691/56600 Training loss: 1.0242 0.4834 sec/batch
Epoch 48/100  Iteration 26692/56600 Training loss: 1.0244 0.4685 sec/batch
Epoch 48/100  Iteration 26693/56600 Training loss: 1.0244 0.4771 sec/batch
Epoch 48/100  Iteration 26694/56600 Training loss: 1.0243 0.4696 sec/batch
Epoch 48/100  Iteration 26695/56600 Training loss: 1.0241 0.4799 sec/batch
Epoch 48/100  Iteration 26696/56600 Training loss: 1.0240 0.4790 sec/batch
Epoch 48/100  Iteration 26697/56600 Training loss: 1.0239 0.4868 sec/batch
Epoch 48/100  Iteration 26698/56600 Training loss: 1.0236 0.4815 sec/batch
Epoch 48/100  Iteration 26699/56600 Training loss: 1.0233 0.4883 sec/batch
Epoch 48/100  Iteration 26700/56600 Training loss: 1.0230 0.4865 sec/batch
Epoch 48/100  Iteration 26701/56600 Training loss: 1.0232 0.4788 sec/batch
Epoch 48/100  Iteration 26702/56600 Training loss: 1.0237 0.4844 sec/batch
Epoch 48/100  Iteration 2

Epoch 48/100  Iteration 26800/56600 Training loss: 1.0328 0.4863 sec/batch
Epoch 48/100  Iteration 26801/56600 Training loss: 1.0329 0.4877 sec/batch
Epoch 48/100  Iteration 26802/56600 Training loss: 1.0329 0.4788 sec/batch
Epoch 48/100  Iteration 26803/56600 Training loss: 1.0328 0.4795 sec/batch
Epoch 48/100  Iteration 26804/56600 Training loss: 1.0331 0.4735 sec/batch
Epoch 48/100  Iteration 26805/56600 Training loss: 1.0332 0.4792 sec/batch
Epoch 48/100  Iteration 26806/56600 Training loss: 1.0332 0.4789 sec/batch
Epoch 48/100  Iteration 26807/56600 Training loss: 1.0334 0.4795 sec/batch
Epoch 48/100  Iteration 26808/56600 Training loss: 1.0335 0.4931 sec/batch
Epoch 48/100  Iteration 26809/56600 Training loss: 1.0335 0.4860 sec/batch
Epoch 48/100  Iteration 26810/56600 Training loss: 1.0335 0.4745 sec/batch
Epoch 48/100  Iteration 26811/56600 Training loss: 1.0335 0.4788 sec/batch
Epoch 48/100  Iteration 26812/56600 Training loss: 1.0335 0.4957 sec/batch
Epoch 48/100  Iteration 2

Epoch 48/100  Iteration 26910/56600 Training loss: 1.0347 0.4757 sec/batch
Epoch 48/100  Iteration 26911/56600 Training loss: 1.0346 0.4749 sec/batch
Epoch 48/100  Iteration 26912/56600 Training loss: 1.0346 0.4775 sec/batch
Epoch 48/100  Iteration 26913/56600 Training loss: 1.0346 0.4782 sec/batch
Epoch 48/100  Iteration 26914/56600 Training loss: 1.0345 0.4800 sec/batch
Epoch 48/100  Iteration 26915/56600 Training loss: 1.0344 0.4799 sec/batch
Epoch 48/100  Iteration 26916/56600 Training loss: 1.0343 0.4789 sec/batch
Epoch 48/100  Iteration 26917/56600 Training loss: 1.0343 0.4894 sec/batch
Epoch 48/100  Iteration 26918/56600 Training loss: 1.0343 0.4946 sec/batch
Epoch 48/100  Iteration 26919/56600 Training loss: 1.0344 0.4908 sec/batch
Epoch 48/100  Iteration 26920/56600 Training loss: 1.0344 0.4844 sec/batch
Epoch 48/100  Iteration 26921/56600 Training loss: 1.0344 0.4873 sec/batch
Epoch 48/100  Iteration 26922/56600 Training loss: 1.0343 0.4815 sec/batch
Epoch 48/100  Iteration 2

Epoch 48/100  Iteration 27020/56600 Training loss: 1.0317 0.4786 sec/batch
Epoch 48/100  Iteration 27021/56600 Training loss: 1.0318 0.4795 sec/batch
Epoch 48/100  Iteration 27022/56600 Training loss: 1.0318 0.4901 sec/batch
Epoch 48/100  Iteration 27023/56600 Training loss: 1.0317 0.4994 sec/batch
Epoch 48/100  Iteration 27024/56600 Training loss: 1.0317 0.4788 sec/batch
Epoch 48/100  Iteration 27025/56600 Training loss: 1.0317 0.4799 sec/batch
Epoch 48/100  Iteration 27026/56600 Training loss: 1.0318 0.4813 sec/batch
Epoch 48/100  Iteration 27027/56600 Training loss: 1.0317 0.4693 sec/batch
Epoch 48/100  Iteration 27028/56600 Training loss: 1.0317 0.4714 sec/batch
Epoch 48/100  Iteration 27029/56600 Training loss: 1.0316 0.4709 sec/batch
Epoch 48/100  Iteration 27030/56600 Training loss: 1.0315 0.4786 sec/batch
Epoch 48/100  Iteration 27031/56600 Training loss: 1.0315 0.4950 sec/batch
Epoch 48/100  Iteration 27032/56600 Training loss: 1.0314 0.4798 sec/batch
Epoch 48/100  Iteration 2

Epoch 48/100  Iteration 27130/56600 Training loss: 1.0297 0.4965 sec/batch
Epoch 48/100  Iteration 27131/56600 Training loss: 1.0297 0.4779 sec/batch
Epoch 48/100  Iteration 27132/56600 Training loss: 1.0297 0.4787 sec/batch
Epoch 48/100  Iteration 27133/56600 Training loss: 1.0297 0.4744 sec/batch
Epoch 48/100  Iteration 27134/56600 Training loss: 1.0297 0.4787 sec/batch
Epoch 48/100  Iteration 27135/56600 Training loss: 1.0296 0.4951 sec/batch
Epoch 48/100  Iteration 27136/56600 Training loss: 1.0296 0.4772 sec/batch
Epoch 48/100  Iteration 27137/56600 Training loss: 1.0296 0.4765 sec/batch
Epoch 48/100  Iteration 27138/56600 Training loss: 1.0296 0.4910 sec/batch
Epoch 48/100  Iteration 27139/56600 Training loss: 1.0296 0.4817 sec/batch
Epoch 48/100  Iteration 27140/56600 Training loss: 1.0296 0.4724 sec/batch
Epoch 48/100  Iteration 27141/56600 Training loss: 1.0296 0.4856 sec/batch
Epoch 48/100  Iteration 27142/56600 Training loss: 1.0296 0.4795 sec/batch
Epoch 48/100  Iteration 2

Epoch 49/100  Iteration 27240/56600 Training loss: 1.0231 0.4720 sec/batch
Epoch 49/100  Iteration 27241/56600 Training loss: 1.0225 0.4755 sec/batch
Epoch 49/100  Iteration 27242/56600 Training loss: 1.0229 0.4790 sec/batch
Epoch 49/100  Iteration 27243/56600 Training loss: 1.0231 0.4789 sec/batch
Epoch 49/100  Iteration 27244/56600 Training loss: 1.0226 0.4750 sec/batch
Epoch 49/100  Iteration 27245/56600 Training loss: 1.0225 0.4783 sec/batch
Epoch 49/100  Iteration 27246/56600 Training loss: 1.0225 0.4801 sec/batch
Epoch 49/100  Iteration 27247/56600 Training loss: 1.0222 0.4789 sec/batch
Epoch 49/100  Iteration 27248/56600 Training loss: 1.0221 0.4891 sec/batch
Epoch 49/100  Iteration 27249/56600 Training loss: 1.0220 0.4797 sec/batch
Epoch 49/100  Iteration 27250/56600 Training loss: 1.0220 0.4785 sec/batch
Epoch 49/100  Iteration 27251/56600 Training loss: 1.0224 0.5022 sec/batch
Epoch 49/100  Iteration 27252/56600 Training loss: 1.0226 0.4777 sec/batch
Epoch 49/100  Iteration 2

Epoch 49/100  Iteration 27350/56600 Training loss: 1.0291 0.4798 sec/batch
Epoch 49/100  Iteration 27351/56600 Training loss: 1.0293 0.4736 sec/batch
Epoch 49/100  Iteration 27352/56600 Training loss: 1.0294 0.4803 sec/batch
Epoch 49/100  Iteration 27353/56600 Training loss: 1.0295 0.4780 sec/batch
Epoch 49/100  Iteration 27354/56600 Training loss: 1.0296 0.4844 sec/batch
Epoch 49/100  Iteration 27355/56600 Training loss: 1.0296 0.4690 sec/batch
Epoch 49/100  Iteration 27356/56600 Training loss: 1.0298 0.4678 sec/batch
Epoch 49/100  Iteration 27357/56600 Training loss: 1.0297 0.4661 sec/batch
Epoch 49/100  Iteration 27358/56600 Training loss: 1.0298 0.4768 sec/batch
Epoch 49/100  Iteration 27359/56600 Training loss: 1.0297 0.4790 sec/batch
Epoch 49/100  Iteration 27360/56600 Training loss: 1.0297 0.4794 sec/batch
Epoch 49/100  Iteration 27361/56600 Training loss: 1.0297 0.4893 sec/batch
Epoch 49/100  Iteration 27362/56600 Training loss: 1.0299 0.4801 sec/batch
Epoch 49/100  Iteration 2

Epoch 49/100  Iteration 27460/56600 Training loss: 1.0326 0.4786 sec/batch
Epoch 49/100  Iteration 27461/56600 Training loss: 1.0327 0.4949 sec/batch
Epoch 49/100  Iteration 27462/56600 Training loss: 1.0328 0.4952 sec/batch
Epoch 49/100  Iteration 27463/56600 Training loss: 1.0329 0.4790 sec/batch
Epoch 49/100  Iteration 27464/56600 Training loss: 1.0329 0.4786 sec/batch
Epoch 49/100  Iteration 27465/56600 Training loss: 1.0330 0.4783 sec/batch
Epoch 49/100  Iteration 27466/56600 Training loss: 1.0330 0.4837 sec/batch
Epoch 49/100  Iteration 27467/56600 Training loss: 1.0330 0.4957 sec/batch
Epoch 49/100  Iteration 27468/56600 Training loss: 1.0329 0.4791 sec/batch
Epoch 49/100  Iteration 27469/56600 Training loss: 1.0329 0.4815 sec/batch
Epoch 49/100  Iteration 27470/56600 Training loss: 1.0328 0.4922 sec/batch
Epoch 49/100  Iteration 27471/56600 Training loss: 1.0328 0.4891 sec/batch
Epoch 49/100  Iteration 27472/56600 Training loss: 1.0328 0.4796 sec/batch
Epoch 49/100  Iteration 2

Epoch 49/100  Iteration 27570/56600 Training loss: 1.0299 0.4761 sec/batch
Epoch 49/100  Iteration 27571/56600 Training loss: 1.0299 0.4794 sec/batch
Epoch 49/100  Iteration 27572/56600 Training loss: 1.0298 0.4791 sec/batch
Epoch 49/100  Iteration 27573/56600 Training loss: 1.0297 0.4788 sec/batch
Epoch 49/100  Iteration 27574/56600 Training loss: 1.0298 0.4794 sec/batch
Epoch 49/100  Iteration 27575/56600 Training loss: 1.0297 0.4693 sec/batch
Epoch 49/100  Iteration 27576/56600 Training loss: 1.0297 0.4884 sec/batch
Epoch 49/100  Iteration 27577/56600 Training loss: 1.0296 0.4788 sec/batch
Epoch 49/100  Iteration 27578/56600 Training loss: 1.0295 0.4791 sec/batch
Epoch 49/100  Iteration 27579/56600 Training loss: 1.0294 0.4866 sec/batch
Epoch 49/100  Iteration 27580/56600 Training loss: 1.0294 0.4823 sec/batch
Epoch 49/100  Iteration 27581/56600 Training loss: 1.0294 0.4786 sec/batch
Epoch 49/100  Iteration 27582/56600 Training loss: 1.0294 0.4798 sec/batch
Epoch 49/100  Iteration 2

Epoch 49/100  Iteration 27680/56600 Training loss: 1.0271 0.4784 sec/batch
Epoch 49/100  Iteration 27681/56600 Training loss: 1.0271 0.4796 sec/batch
Epoch 49/100  Iteration 27682/56600 Training loss: 1.0271 0.4914 sec/batch
Epoch 49/100  Iteration 27683/56600 Training loss: 1.0271 0.4826 sec/batch
Epoch 49/100  Iteration 27684/56600 Training loss: 1.0272 0.5043 sec/batch
Epoch 49/100  Iteration 27685/56600 Training loss: 1.0272 0.4673 sec/batch
Epoch 49/100  Iteration 27686/56600 Training loss: 1.0272 0.4791 sec/batch
Epoch 49/100  Iteration 27687/56600 Training loss: 1.0272 0.4792 sec/batch
Epoch 49/100  Iteration 27688/56600 Training loss: 1.0272 0.4745 sec/batch
Epoch 49/100  Iteration 27689/56600 Training loss: 1.0272 0.4785 sec/batch
Epoch 49/100  Iteration 27690/56600 Training loss: 1.0273 0.4799 sec/batch
Epoch 49/100  Iteration 27691/56600 Training loss: 1.0272 0.4748 sec/batch
Epoch 49/100  Iteration 27692/56600 Training loss: 1.0273 0.4932 sec/batch
Epoch 49/100  Iteration 2

Epoch 50/100  Iteration 27790/56600 Training loss: 1.0282 0.4773 sec/batch
Epoch 50/100  Iteration 27791/56600 Training loss: 1.0275 0.4809 sec/batch
Epoch 50/100  Iteration 27792/56600 Training loss: 1.0267 0.4789 sec/batch
Epoch 50/100  Iteration 27793/56600 Training loss: 1.0262 0.4796 sec/batch
Epoch 50/100  Iteration 27794/56600 Training loss: 1.0258 0.4628 sec/batch
Epoch 50/100  Iteration 27795/56600 Training loss: 1.0252 0.4744 sec/batch
Epoch 50/100  Iteration 27796/56600 Training loss: 1.0249 0.4755 sec/batch
Epoch 50/100  Iteration 27797/56600 Training loss: 1.0247 0.4822 sec/batch
Epoch 50/100  Iteration 27798/56600 Training loss: 1.0240 0.4722 sec/batch
Epoch 50/100  Iteration 27799/56600 Training loss: 1.0236 0.4703 sec/batch
Epoch 50/100  Iteration 27800/56600 Training loss: 1.0234 0.4742 sec/batch
Epoch 50/100  Iteration 27801/56600 Training loss: 1.0231 0.4782 sec/batch
Epoch 50/100  Iteration 27802/56600 Training loss: 1.0235 0.4800 sec/batch
Epoch 50/100  Iteration 2

Epoch 50/100  Iteration 27900/56600 Training loss: 1.0272 0.4928 sec/batch
Epoch 50/100  Iteration 27901/56600 Training loss: 1.0273 0.4799 sec/batch
Epoch 50/100  Iteration 27902/56600 Training loss: 1.0275 0.4795 sec/batch
Epoch 50/100  Iteration 27903/56600 Training loss: 1.0275 0.4782 sec/batch
Epoch 50/100  Iteration 27904/56600 Training loss: 1.0276 0.4720 sec/batch
Epoch 50/100  Iteration 27905/56600 Training loss: 1.0275 0.4735 sec/batch
Epoch 50/100  Iteration 27906/56600 Training loss: 1.0274 0.4943 sec/batch
Epoch 50/100  Iteration 27907/56600 Training loss: 1.0274 0.4791 sec/batch
Epoch 50/100  Iteration 27908/56600 Training loss: 1.0274 0.4797 sec/batch
Epoch 50/100  Iteration 27909/56600 Training loss: 1.0273 0.4797 sec/batch
Epoch 50/100  Iteration 27910/56600 Training loss: 1.0272 0.4782 sec/batch
Epoch 50/100  Iteration 27911/56600 Training loss: 1.0272 0.4879 sec/batch
Epoch 50/100  Iteration 27912/56600 Training loss: 1.0272 0.4830 sec/batch
Epoch 50/100  Iteration 2

Epoch 50/100  Iteration 28009/56600 Training loss: 1.0316 0.4783 sec/batch
Epoch 50/100  Iteration 28010/56600 Training loss: 1.0315 0.4802 sec/batch
Epoch 50/100  Iteration 28011/56600 Training loss: 1.0315 0.4717 sec/batch
Epoch 50/100  Iteration 28012/56600 Training loss: 1.0314 0.4864 sec/batch
Epoch 50/100  Iteration 28013/56600 Training loss: 1.0312 0.4793 sec/batch
Epoch 50/100  Iteration 28014/56600 Training loss: 1.0311 0.4744 sec/batch
Epoch 50/100  Iteration 28015/56600 Training loss: 1.0311 0.4794 sec/batch
Epoch 50/100  Iteration 28016/56600 Training loss: 1.0310 0.4852 sec/batch
Epoch 50/100  Iteration 28017/56600 Training loss: 1.0310 0.4674 sec/batch
Epoch 50/100  Iteration 28018/56600 Training loss: 1.0309 0.4823 sec/batch
Epoch 50/100  Iteration 28019/56600 Training loss: 1.0309 0.4757 sec/batch
Epoch 50/100  Iteration 28020/56600 Training loss: 1.0309 0.4688 sec/batch
Epoch 50/100  Iteration 28021/56600 Training loss: 1.0311 0.4794 sec/batch
Epoch 50/100  Iteration 2

Epoch 50/100  Iteration 28119/56600 Training loss: 1.0284 0.4899 sec/batch
Epoch 50/100  Iteration 28120/56600 Training loss: 1.0285 0.4732 sec/batch
Epoch 50/100  Iteration 28121/56600 Training loss: 1.0285 0.4844 sec/batch
Epoch 50/100  Iteration 28122/56600 Training loss: 1.0285 0.4627 sec/batch
Epoch 50/100  Iteration 28123/56600 Training loss: 1.0284 0.4801 sec/batch
Epoch 50/100  Iteration 28124/56600 Training loss: 1.0284 0.4784 sec/batch
Epoch 50/100  Iteration 28125/56600 Training loss: 1.0283 0.4799 sec/batch
Epoch 50/100  Iteration 28126/56600 Training loss: 1.0283 0.4734 sec/batch
Epoch 50/100  Iteration 28127/56600 Training loss: 1.0284 0.4848 sec/batch
Epoch 50/100  Iteration 28128/56600 Training loss: 1.0283 0.4731 sec/batch
Epoch 50/100  Iteration 28129/56600 Training loss: 1.0283 0.4861 sec/batch
Epoch 50/100  Iteration 28130/56600 Training loss: 1.0283 0.4723 sec/batch
Epoch 50/100  Iteration 28131/56600 Training loss: 1.0283 0.4788 sec/batch
Epoch 50/100  Iteration 2

Epoch 50/100  Iteration 28229/56600 Training loss: 1.0261 0.4787 sec/batch
Epoch 50/100  Iteration 28230/56600 Training loss: 1.0261 0.4792 sec/batch
Epoch 50/100  Iteration 28231/56600 Training loss: 1.0260 0.4748 sec/batch
Epoch 50/100  Iteration 28232/56600 Training loss: 1.0260 0.4774 sec/batch
Epoch 50/100  Iteration 28233/56600 Training loss: 1.0260 0.4799 sec/batch
Epoch 50/100  Iteration 28234/56600 Training loss: 1.0259 0.4785 sec/batch
Epoch 50/100  Iteration 28235/56600 Training loss: 1.0259 0.4637 sec/batch
Epoch 50/100  Iteration 28236/56600 Training loss: 1.0259 0.4793 sec/batch
Epoch 50/100  Iteration 28237/56600 Training loss: 1.0258 0.4719 sec/batch
Epoch 50/100  Iteration 28238/56600 Training loss: 1.0257 0.4652 sec/batch
Epoch 50/100  Iteration 28239/56600 Training loss: 1.0257 0.4714 sec/batch
Epoch 50/100  Iteration 28240/56600 Training loss: 1.0257 0.4864 sec/batch
Epoch 50/100  Iteration 28241/56600 Training loss: 1.0256 0.4848 sec/batch
Epoch 50/100  Iteration 2

Epoch 51/100  Iteration 28339/56600 Training loss: 1.0329 0.4802 sec/batch
Epoch 51/100  Iteration 28340/56600 Training loss: 1.0336 0.4937 sec/batch
Epoch 51/100  Iteration 28341/56600 Training loss: 1.0335 0.4921 sec/batch
Epoch 51/100  Iteration 28342/56600 Training loss: 1.0339 0.4789 sec/batch
Epoch 51/100  Iteration 28343/56600 Training loss: 1.0336 0.4808 sec/batch
Epoch 51/100  Iteration 28344/56600 Training loss: 1.0332 0.4724 sec/batch
Epoch 51/100  Iteration 28345/56600 Training loss: 1.0338 0.4809 sec/batch
Epoch 51/100  Iteration 28346/56600 Training loss: 1.0343 0.4794 sec/batch
Epoch 51/100  Iteration 28347/56600 Training loss: 1.0335 0.4786 sec/batch
Epoch 51/100  Iteration 28348/56600 Training loss: 1.0334 0.4946 sec/batch
Epoch 51/100  Iteration 28349/56600 Training loss: 1.0331 0.4798 sec/batch
Epoch 51/100  Iteration 28350/56600 Training loss: 1.0323 0.4735 sec/batch
Epoch 51/100  Iteration 28351/56600 Training loss: 1.0316 0.4835 sec/batch
Epoch 51/100  Iteration 2

Epoch 51/100  Iteration 28449/56600 Training loss: 1.0254 0.4944 sec/batch
Epoch 51/100  Iteration 28450/56600 Training loss: 1.0254 0.4807 sec/batch
Epoch 51/100  Iteration 28451/56600 Training loss: 1.0259 0.4783 sec/batch
Epoch 51/100  Iteration 28452/56600 Training loss: 1.0262 0.4792 sec/batch
Epoch 51/100  Iteration 28453/56600 Training loss: 1.0263 0.4947 sec/batch
Epoch 51/100  Iteration 28454/56600 Training loss: 1.0264 0.4893 sec/batch
Epoch 51/100  Iteration 28455/56600 Training loss: 1.0265 0.4848 sec/batch
Epoch 51/100  Iteration 28456/56600 Training loss: 1.0265 0.4773 sec/batch
Epoch 51/100  Iteration 28457/56600 Training loss: 1.0267 0.4887 sec/batch
Epoch 51/100  Iteration 28458/56600 Training loss: 1.0269 0.4812 sec/batch
Epoch 51/100  Iteration 28459/56600 Training loss: 1.0268 0.4828 sec/batch
Epoch 51/100  Iteration 28460/56600 Training loss: 1.0268 0.4948 sec/batch
Epoch 51/100  Iteration 28461/56600 Training loss: 1.0268 0.4799 sec/batch
Epoch 51/100  Iteration 2

Epoch 51/100  Iteration 28559/56600 Training loss: 1.0307 0.4771 sec/batch
Epoch 51/100  Iteration 28560/56600 Training loss: 1.0306 0.4789 sec/batch
Epoch 51/100  Iteration 28561/56600 Training loss: 1.0304 0.4790 sec/batch
Epoch 51/100  Iteration 28562/56600 Training loss: 1.0304 0.4905 sec/batch
Epoch 51/100  Iteration 28563/56600 Training loss: 1.0303 0.4666 sec/batch
Epoch 51/100  Iteration 28564/56600 Training loss: 1.0303 0.4724 sec/batch
Epoch 51/100  Iteration 28565/56600 Training loss: 1.0302 0.4868 sec/batch
Epoch 51/100  Iteration 28566/56600 Training loss: 1.0301 0.4825 sec/batch
Epoch 51/100  Iteration 28567/56600 Training loss: 1.0300 0.4847 sec/batch
Epoch 51/100  Iteration 28568/56600 Training loss: 1.0299 0.4893 sec/batch
Epoch 51/100  Iteration 28569/56600 Training loss: 1.0298 0.4757 sec/batch
Epoch 51/100  Iteration 28570/56600 Training loss: 1.0297 0.4761 sec/batch
Epoch 51/100  Iteration 28571/56600 Training loss: 1.0298 0.4791 sec/batch
Epoch 51/100  Iteration 2

Epoch 51/100  Iteration 28669/56600 Training loss: 1.0275 0.4895 sec/batch
Epoch 51/100  Iteration 28670/56600 Training loss: 1.0274 0.4839 sec/batch
Epoch 51/100  Iteration 28671/56600 Training loss: 1.0273 0.4759 sec/batch
Epoch 51/100  Iteration 28672/56600 Training loss: 1.0272 0.4775 sec/batch
Epoch 51/100  Iteration 28673/56600 Training loss: 1.0271 0.4885 sec/batch
Epoch 51/100  Iteration 28674/56600 Training loss: 1.0270 0.4851 sec/batch
Epoch 51/100  Iteration 28675/56600 Training loss: 1.0269 0.4916 sec/batch
Epoch 51/100  Iteration 28676/56600 Training loss: 1.0269 0.4688 sec/batch
Epoch 51/100  Iteration 28677/56600 Training loss: 1.0270 0.4717 sec/batch
Epoch 51/100  Iteration 28678/56600 Training loss: 1.0270 0.4663 sec/batch
Epoch 51/100  Iteration 28679/56600 Training loss: 1.0269 0.4832 sec/batch
Epoch 51/100  Iteration 28680/56600 Training loss: 1.0269 0.4800 sec/batch
Epoch 51/100  Iteration 28681/56600 Training loss: 1.0269 0.4944 sec/batch
Epoch 51/100  Iteration 2

Epoch 51/100  Iteration 28779/56600 Training loss: 1.0247 0.4649 sec/batch
Epoch 51/100  Iteration 28780/56600 Training loss: 1.0247 0.4741 sec/batch
Epoch 51/100  Iteration 28781/56600 Training loss: 1.0247 0.4843 sec/batch
Epoch 51/100  Iteration 28782/56600 Training loss: 1.0247 0.4827 sec/batch
Epoch 51/100  Iteration 28783/56600 Training loss: 1.0247 0.4666 sec/batch
Epoch 51/100  Iteration 28784/56600 Training loss: 1.0246 0.4784 sec/batch
Epoch 51/100  Iteration 28785/56600 Training loss: 1.0246 0.4800 sec/batch
Epoch 51/100  Iteration 28786/56600 Training loss: 1.0246 0.4793 sec/batch
Epoch 51/100  Iteration 28787/56600 Training loss: 1.0245 0.4875 sec/batch
Epoch 51/100  Iteration 28788/56600 Training loss: 1.0245 0.4643 sec/batch
Epoch 51/100  Iteration 28789/56600 Training loss: 1.0245 0.4747 sec/batch
Epoch 51/100  Iteration 28790/56600 Training loss: 1.0245 0.4833 sec/batch
Epoch 51/100  Iteration 28791/56600 Training loss: 1.0245 0.4799 sec/batch
Epoch 51/100  Iteration 2

Epoch 52/100  Iteration 28889/56600 Training loss: 1.0283 0.4639 sec/batch
Epoch 52/100  Iteration 28890/56600 Training loss: 1.0277 0.4788 sec/batch
Epoch 52/100  Iteration 28891/56600 Training loss: 1.0269 0.4866 sec/batch
Epoch 52/100  Iteration 28892/56600 Training loss: 1.0251 0.4699 sec/batch
Epoch 52/100  Iteration 28893/56600 Training loss: 1.0249 0.4705 sec/batch
Epoch 52/100  Iteration 28894/56600 Training loss: 1.0246 0.4834 sec/batch
Epoch 52/100  Iteration 28895/56600 Training loss: 1.0237 0.4790 sec/batch
Epoch 52/100  Iteration 28896/56600 Training loss: 1.0232 0.4789 sec/batch
Epoch 52/100  Iteration 28897/56600 Training loss: 1.0228 0.4736 sec/batch
Epoch 52/100  Iteration 28898/56600 Training loss: 1.0226 0.4946 sec/batch
Epoch 52/100  Iteration 28899/56600 Training loss: 1.0232 0.4801 sec/batch
Epoch 52/100  Iteration 28900/56600 Training loss: 1.0248 0.4791 sec/batch
Epoch 52/100  Iteration 28901/56600 Training loss: 1.0267 0.4785 sec/batch
Epoch 52/100  Iteration 2

Epoch 52/100  Iteration 28999/56600 Training loss: 1.0216 0.4709 sec/batch
Epoch 52/100  Iteration 29000/56600 Training loss: 1.0215 0.4827 sec/batch
Epoch 52/100  Iteration 29001/56600 Training loss: 1.0212 0.4787 sec/batch
Epoch 52/100  Iteration 29002/56600 Training loss: 1.0211 0.4788 sec/batch
Epoch 52/100  Iteration 29003/56600 Training loss: 1.0212 0.4653 sec/batch
Epoch 52/100  Iteration 29004/56600 Training loss: 1.0216 0.4872 sec/batch
Epoch 52/100  Iteration 29005/56600 Training loss: 1.0219 0.4770 sec/batch
Epoch 52/100  Iteration 29006/56600 Training loss: 1.0218 0.4798 sec/batch
Epoch 52/100  Iteration 29007/56600 Training loss: 1.0220 0.4791 sec/batch
Epoch 52/100  Iteration 29008/56600 Training loss: 1.0221 0.4788 sec/batch
Epoch 52/100  Iteration 29009/56600 Training loss: 1.0222 0.4744 sec/batch
Epoch 52/100  Iteration 29010/56600 Training loss: 1.0222 0.4721 sec/batch
Epoch 52/100  Iteration 29011/56600 Training loss: 1.0224 0.4699 sec/batch
Epoch 52/100  Iteration 2

Epoch 52/100  Iteration 29109/56600 Training loss: 1.0288 0.4798 sec/batch
Epoch 52/100  Iteration 29110/56600 Training loss: 1.0290 0.4790 sec/batch
Epoch 52/100  Iteration 29111/56600 Training loss: 1.0293 0.4740 sec/batch
Epoch 52/100  Iteration 29112/56600 Training loss: 1.0294 0.4792 sec/batch
Epoch 52/100  Iteration 29113/56600 Training loss: 1.0295 0.4877 sec/batch
Epoch 52/100  Iteration 29114/56600 Training loss: 1.0295 0.4715 sec/batch
Epoch 52/100  Iteration 29115/56600 Training loss: 1.0296 0.4696 sec/batch
Epoch 52/100  Iteration 29116/56600 Training loss: 1.0296 0.4719 sec/batch
Epoch 52/100  Iteration 29117/56600 Training loss: 1.0294 0.4688 sec/batch
Epoch 52/100  Iteration 29118/56600 Training loss: 1.0293 0.4688 sec/batch
Epoch 52/100  Iteration 29119/56600 Training loss: 1.0293 0.4723 sec/batch
Epoch 52/100  Iteration 29120/56600 Training loss: 1.0293 0.4811 sec/batch
Epoch 52/100  Iteration 29121/56600 Training loss: 1.0292 0.4789 sec/batch
Epoch 52/100  Iteration 2

Epoch 52/100  Iteration 29219/56600 Training loss: 1.0257 0.4782 sec/batch
Epoch 52/100  Iteration 29220/56600 Training loss: 1.0257 0.4802 sec/batch
Epoch 52/100  Iteration 29221/56600 Training loss: 1.0257 0.4861 sec/batch
Epoch 52/100  Iteration 29222/56600 Training loss: 1.0258 0.4774 sec/batch
Epoch 52/100  Iteration 29223/56600 Training loss: 1.0259 0.4737 sec/batch
Epoch 52/100  Iteration 29224/56600 Training loss: 1.0259 0.4796 sec/batch
Epoch 52/100  Iteration 29225/56600 Training loss: 1.0259 0.4832 sec/batch
Epoch 52/100  Iteration 29226/56600 Training loss: 1.0260 0.4850 sec/batch
Epoch 52/100  Iteration 29227/56600 Training loss: 1.0259 0.4927 sec/batch
Epoch 52/100  Iteration 29228/56600 Training loss: 1.0259 0.4796 sec/batch
Epoch 52/100  Iteration 29229/56600 Training loss: 1.0259 0.4846 sec/batch
Epoch 52/100  Iteration 29230/56600 Training loss: 1.0258 0.4816 sec/batch
Epoch 52/100  Iteration 29231/56600 Training loss: 1.0257 0.4783 sec/batch
Epoch 52/100  Iteration 2

Epoch 52/100  Iteration 29329/56600 Training loss: 1.0225 0.4848 sec/batch
Epoch 52/100  Iteration 29330/56600 Training loss: 1.0224 0.4893 sec/batch
Epoch 52/100  Iteration 29331/56600 Training loss: 1.0224 0.4790 sec/batch
Epoch 52/100  Iteration 29332/56600 Training loss: 1.0224 0.4903 sec/batch
Epoch 52/100  Iteration 29333/56600 Training loss: 1.0225 0.4803 sec/batch
Epoch 52/100  Iteration 29334/56600 Training loss: 1.0225 0.4891 sec/batch
Epoch 52/100  Iteration 29335/56600 Training loss: 1.0225 0.4954 sec/batch
Epoch 52/100  Iteration 29336/56600 Training loss: 1.0226 0.4796 sec/batch
Epoch 52/100  Iteration 29337/56600 Training loss: 1.0227 0.4786 sec/batch
Epoch 52/100  Iteration 29338/56600 Training loss: 1.0228 0.4792 sec/batch
Epoch 52/100  Iteration 29339/56600 Training loss: 1.0229 0.4790 sec/batch
Epoch 52/100  Iteration 29340/56600 Training loss: 1.0228 0.4895 sec/batch
Epoch 52/100  Iteration 29341/56600 Training loss: 1.0227 0.4803 sec/batch
Epoch 52/100  Iteration 2

Epoch 53/100  Iteration 29439/56600 Training loss: 1.0600 0.4795 sec/batch
Epoch 53/100  Iteration 29440/56600 Training loss: 1.0530 0.4885 sec/batch
Epoch 53/100  Iteration 29441/56600 Training loss: 1.0496 0.4800 sec/batch
Epoch 53/100  Iteration 29442/56600 Training loss: 1.0477 0.4791 sec/batch
Epoch 53/100  Iteration 29443/56600 Training loss: 1.0469 0.4736 sec/batch
Epoch 53/100  Iteration 29444/56600 Training loss: 1.0464 0.4893 sec/batch
Epoch 53/100  Iteration 29445/56600 Training loss: 1.0451 0.4844 sec/batch
Epoch 53/100  Iteration 29446/56600 Training loss: 1.0429 0.4888 sec/batch
Epoch 53/100  Iteration 29447/56600 Training loss: 1.0412 0.4855 sec/batch
Epoch 53/100  Iteration 29448/56600 Training loss: 1.0402 0.4745 sec/batch
Epoch 53/100  Iteration 29449/56600 Training loss: 1.0383 0.4789 sec/batch
Epoch 53/100  Iteration 29450/56600 Training loss: 1.0362 0.4929 sec/batch
Epoch 53/100  Iteration 29451/56600 Training loss: 1.0327 0.4808 sec/batch
Epoch 53/100  Iteration 2

Epoch 53/100  Iteration 29549/56600 Training loss: 1.0160 0.4766 sec/batch
Epoch 53/100  Iteration 29550/56600 Training loss: 1.0163 0.4791 sec/batch
Epoch 53/100  Iteration 29551/56600 Training loss: 1.0165 0.4792 sec/batch
Epoch 53/100  Iteration 29552/56600 Training loss: 1.0171 0.4786 sec/batch
Epoch 53/100  Iteration 29553/56600 Training loss: 1.0177 0.4789 sec/batch
Epoch 53/100  Iteration 29554/56600 Training loss: 1.0182 0.4788 sec/batch
Epoch 53/100  Iteration 29555/56600 Training loss: 1.0181 0.4896 sec/batch
Epoch 53/100  Iteration 29556/56600 Training loss: 1.0181 0.4790 sec/batch
Epoch 53/100  Iteration 29557/56600 Training loss: 1.0184 0.4794 sec/batch
Epoch 53/100  Iteration 29558/56600 Training loss: 1.0186 0.4785 sec/batch
Epoch 53/100  Iteration 29559/56600 Training loss: 1.0187 0.4803 sec/batch
Epoch 53/100  Iteration 29560/56600 Training loss: 1.0188 0.4622 sec/batch
Epoch 53/100  Iteration 29561/56600 Training loss: 1.0190 0.4798 sec/batch
Epoch 53/100  Iteration 2

Epoch 53/100  Iteration 29659/56600 Training loss: 1.0245 0.4782 sec/batch
Epoch 53/100  Iteration 29660/56600 Training loss: 1.0244 0.4787 sec/batch
Epoch 53/100  Iteration 29661/56600 Training loss: 1.0245 0.4917 sec/batch
Epoch 53/100  Iteration 29662/56600 Training loss: 1.0247 0.4777 sec/batch
Epoch 53/100  Iteration 29663/56600 Training loss: 1.0248 0.4784 sec/batch
Epoch 53/100  Iteration 29664/56600 Training loss: 1.0249 0.4794 sec/batch
Epoch 53/100  Iteration 29665/56600 Training loss: 1.0251 0.4873 sec/batch
Epoch 53/100  Iteration 29666/56600 Training loss: 1.0251 0.4873 sec/batch
Epoch 53/100  Iteration 29667/56600 Training loss: 1.0252 0.4790 sec/batch
Epoch 53/100  Iteration 29668/56600 Training loss: 1.0253 0.4844 sec/batch
Epoch 53/100  Iteration 29669/56600 Training loss: 1.0253 0.4867 sec/batch
Epoch 53/100  Iteration 29670/56600 Training loss: 1.0255 0.4866 sec/batch
Epoch 53/100  Iteration 29671/56600 Training loss: 1.0256 0.4789 sec/batch
Epoch 53/100  Iteration 2

Epoch 53/100  Iteration 29769/56600 Training loss: 1.0243 0.4802 sec/batch
Epoch 53/100  Iteration 29770/56600 Training loss: 1.0243 0.4775 sec/batch
Epoch 53/100  Iteration 29771/56600 Training loss: 1.0243 0.4798 sec/batch
Epoch 53/100  Iteration 29772/56600 Training loss: 1.0243 0.4798 sec/batch
Epoch 53/100  Iteration 29773/56600 Training loss: 1.0241 0.4730 sec/batch
Epoch 53/100  Iteration 29774/56600 Training loss: 1.0241 0.4796 sec/batch
Epoch 53/100  Iteration 29775/56600 Training loss: 1.0239 0.4787 sec/batch
Epoch 53/100  Iteration 29776/56600 Training loss: 1.0239 0.4779 sec/batch
Epoch 53/100  Iteration 29777/56600 Training loss: 1.0240 0.4810 sec/batch
Epoch 53/100  Iteration 29778/56600 Training loss: 1.0239 0.4784 sec/batch
Epoch 53/100  Iteration 29779/56600 Training loss: 1.0238 0.4756 sec/batch
Epoch 53/100  Iteration 29780/56600 Training loss: 1.0238 0.4855 sec/batch
Epoch 53/100  Iteration 29781/56600 Training loss: 1.0237 0.4768 sec/batch
Epoch 53/100  Iteration 2

Epoch 53/100  Iteration 29879/56600 Training loss: 1.0206 0.4825 sec/batch
Epoch 53/100  Iteration 29880/56600 Training loss: 1.0206 0.4835 sec/batch
Epoch 53/100  Iteration 29881/56600 Training loss: 1.0206 0.4810 sec/batch
Epoch 53/100  Iteration 29882/56600 Training loss: 1.0206 0.4756 sec/batch
Epoch 53/100  Iteration 29883/56600 Training loss: 1.0206 0.4931 sec/batch
Epoch 53/100  Iteration 29884/56600 Training loss: 1.0206 0.4938 sec/batch
Epoch 53/100  Iteration 29885/56600 Training loss: 1.0205 0.4797 sec/batch
Epoch 53/100  Iteration 29886/56600 Training loss: 1.0205 0.4949 sec/batch
Epoch 53/100  Iteration 29887/56600 Training loss: 1.0206 0.4790 sec/batch
Epoch 53/100  Iteration 29888/56600 Training loss: 1.0205 0.4793 sec/batch
Epoch 53/100  Iteration 29889/56600 Training loss: 1.0204 0.4893 sec/batch
Epoch 53/100  Iteration 29890/56600 Training loss: 1.0204 0.4806 sec/batch
Epoch 53/100  Iteration 29891/56600 Training loss: 1.0203 0.4930 sec/batch
Epoch 53/100  Iteration 2

Epoch 53/100  Iteration 29989/56600 Training loss: 1.0195 0.4767 sec/batch
Epoch 53/100  Iteration 29990/56600 Training loss: 1.0196 0.4791 sec/batch
Epoch 53/100  Iteration 29991/56600 Training loss: 1.0196 0.4795 sec/batch
Epoch 53/100  Iteration 29992/56600 Training loss: 1.0196 0.4734 sec/batch
Epoch 53/100  Iteration 29993/56600 Training loss: 1.0197 0.4796 sec/batch
Epoch 53/100  Iteration 29994/56600 Training loss: 1.0197 0.4791 sec/batch
Epoch 53/100  Iteration 29995/56600 Training loss: 1.0196 0.4912 sec/batch
Epoch 53/100  Iteration 29996/56600 Training loss: 1.0197 0.4809 sec/batch
Epoch 53/100  Iteration 29997/56600 Training loss: 1.0198 0.4781 sec/batch
Epoch 53/100  Iteration 29998/56600 Training loss: 1.0199 0.4886 sec/batch
Epoch 54/100  Iteration 29999/56600 Training loss: 1.1564 0.4807 sec/batch
Epoch 54/100  Iteration 30000/56600 Training loss: 1.1085 0.4738 sec/batch
Validation loss: 1.2015 Saving checkpoint!
Epoch 54/100  Iteration 30001/56600 Training loss: 1.1724

Epoch 54/100  Iteration 30098/56600 Training loss: 1.0148 0.4895 sec/batch
Epoch 54/100  Iteration 30099/56600 Training loss: 1.0149 0.4639 sec/batch
Epoch 54/100  Iteration 30100/56600 Training loss: 1.0153 0.4681 sec/batch
Epoch 54/100  Iteration 30101/56600 Training loss: 1.0152 0.4858 sec/batch
Epoch 54/100  Iteration 30102/56600 Training loss: 1.0153 0.4680 sec/batch
Epoch 54/100  Iteration 30103/56600 Training loss: 1.0154 0.4781 sec/batch
Epoch 54/100  Iteration 30104/56600 Training loss: 1.0152 0.4801 sec/batch
Epoch 54/100  Iteration 30105/56600 Training loss: 1.0149 0.4860 sec/batch
Epoch 54/100  Iteration 30106/56600 Training loss: 1.0150 0.4727 sec/batch
Epoch 54/100  Iteration 30107/56600 Training loss: 1.0150 0.4725 sec/batch
Epoch 54/100  Iteration 30108/56600 Training loss: 1.0152 0.4895 sec/batch
Epoch 54/100  Iteration 30109/56600 Training loss: 1.0151 0.4772 sec/batch
Epoch 54/100  Iteration 30110/56600 Training loss: 1.0150 0.4796 sec/batch
Epoch 54/100  Iteration 3

Epoch 54/100  Iteration 30208/56600 Training loss: 1.0233 0.4786 sec/batch
Epoch 54/100  Iteration 30209/56600 Training loss: 1.0234 0.4688 sec/batch
Epoch 54/100  Iteration 30210/56600 Training loss: 1.0232 0.4905 sec/batch
Epoch 54/100  Iteration 30211/56600 Training loss: 1.0231 0.4688 sec/batch
Epoch 54/100  Iteration 30212/56600 Training loss: 1.0229 0.4786 sec/batch
Epoch 54/100  Iteration 30213/56600 Training loss: 1.0229 0.4790 sec/batch
Epoch 54/100  Iteration 30214/56600 Training loss: 1.0228 0.4795 sec/batch
Epoch 54/100  Iteration 30215/56600 Training loss: 1.0228 0.4792 sec/batch
Epoch 54/100  Iteration 30216/56600 Training loss: 1.0227 0.4788 sec/batch
Epoch 54/100  Iteration 30217/56600 Training loss: 1.0229 0.4799 sec/batch
Epoch 54/100  Iteration 30218/56600 Training loss: 1.0231 0.4757 sec/batch
Epoch 54/100  Iteration 30219/56600 Training loss: 1.0231 0.4844 sec/batch
Epoch 54/100  Iteration 30220/56600 Training loss: 1.0231 0.4791 sec/batch
Epoch 54/100  Iteration 3

Epoch 54/100  Iteration 30318/56600 Training loss: 1.0237 0.4948 sec/batch
Epoch 54/100  Iteration 30319/56600 Training loss: 1.0237 0.4949 sec/batch
Epoch 54/100  Iteration 30320/56600 Training loss: 1.0235 0.4949 sec/batch
Epoch 54/100  Iteration 30321/56600 Training loss: 1.0234 0.4763 sec/batch
Epoch 54/100  Iteration 30322/56600 Training loss: 1.0233 0.4746 sec/batch
Epoch 54/100  Iteration 30323/56600 Training loss: 1.0233 0.4787 sec/batch
Epoch 54/100  Iteration 30324/56600 Training loss: 1.0233 0.4792 sec/batch
Epoch 54/100  Iteration 30325/56600 Training loss: 1.0232 0.4794 sec/batch
Epoch 54/100  Iteration 30326/56600 Training loss: 1.0231 0.4888 sec/batch
Epoch 54/100  Iteration 30327/56600 Training loss: 1.0232 0.4798 sec/batch
Epoch 54/100  Iteration 30328/56600 Training loss: 1.0232 0.4943 sec/batch
Epoch 54/100  Iteration 30329/56600 Training loss: 1.0232 0.4794 sec/batch
Epoch 54/100  Iteration 30330/56600 Training loss: 1.0233 0.4791 sec/batch
Epoch 54/100  Iteration 3

Epoch 54/100  Iteration 30428/56600 Training loss: 1.0202 0.4945 sec/batch
Epoch 54/100  Iteration 30429/56600 Training loss: 1.0202 0.4790 sec/batch
Epoch 54/100  Iteration 30430/56600 Training loss: 1.0201 0.4795 sec/batch
Epoch 54/100  Iteration 30431/56600 Training loss: 1.0200 0.4776 sec/batch
Epoch 54/100  Iteration 30432/56600 Training loss: 1.0199 0.4810 sec/batch
Epoch 54/100  Iteration 30433/56600 Training loss: 1.0199 0.4882 sec/batch
Epoch 54/100  Iteration 30434/56600 Training loss: 1.0199 0.4807 sec/batch
Epoch 54/100  Iteration 30435/56600 Training loss: 1.0199 0.4783 sec/batch
Epoch 54/100  Iteration 30436/56600 Training loss: 1.0198 0.4844 sec/batch
Epoch 54/100  Iteration 30437/56600 Training loss: 1.0197 0.5044 sec/batch
Epoch 54/100  Iteration 30438/56600 Training loss: 1.0196 0.4910 sec/batch
Epoch 54/100  Iteration 30439/56600 Training loss: 1.0195 0.4822 sec/batch
Epoch 54/100  Iteration 30440/56600 Training loss: 1.0194 0.4864 sec/batch
Epoch 54/100  Iteration 3

Epoch 54/100  Iteration 30538/56600 Training loss: 1.0182 0.4706 sec/batch
Epoch 54/100  Iteration 30539/56600 Training loss: 1.0182 0.4784 sec/batch
Epoch 54/100  Iteration 30540/56600 Training loss: 1.0181 0.4766 sec/batch
Epoch 54/100  Iteration 30541/56600 Training loss: 1.0182 0.4923 sec/batch
Epoch 54/100  Iteration 30542/56600 Training loss: 1.0181 0.4797 sec/batch
Epoch 54/100  Iteration 30543/56600 Training loss: 1.0181 0.4801 sec/batch
Epoch 54/100  Iteration 30544/56600 Training loss: 1.0181 0.4866 sec/batch
Epoch 54/100  Iteration 30545/56600 Training loss: 1.0181 0.4718 sec/batch
Epoch 54/100  Iteration 30546/56600 Training loss: 1.0181 0.4780 sec/batch
Epoch 54/100  Iteration 30547/56600 Training loss: 1.0181 0.4912 sec/batch
Epoch 54/100  Iteration 30548/56600 Training loss: 1.0180 0.4832 sec/batch
Epoch 54/100  Iteration 30549/56600 Training loss: 1.0180 0.4780 sec/batch
Epoch 54/100  Iteration 30550/56600 Training loss: 1.0180 0.4791 sec/batch
Epoch 54/100  Iteration 3

Epoch 55/100  Iteration 30648/56600 Training loss: 1.0109 0.4793 sec/batch
Epoch 55/100  Iteration 30649/56600 Training loss: 1.0107 0.4788 sec/batch
Epoch 55/100  Iteration 30650/56600 Training loss: 1.0106 0.4947 sec/batch
Epoch 55/100  Iteration 30651/56600 Training loss: 1.0103 0.4796 sec/batch
Epoch 55/100  Iteration 30652/56600 Training loss: 1.0099 0.4941 sec/batch
Epoch 55/100  Iteration 30653/56600 Training loss: 1.0099 0.4906 sec/batch
Epoch 55/100  Iteration 30654/56600 Training loss: 1.0102 0.4813 sec/batch
Epoch 55/100  Iteration 30655/56600 Training loss: 1.0102 0.4716 sec/batch
Epoch 55/100  Iteration 30656/56600 Training loss: 1.0101 0.4807 sec/batch
Epoch 55/100  Iteration 30657/56600 Training loss: 1.0099 0.4877 sec/batch
Epoch 55/100  Iteration 30658/56600 Training loss: 1.0100 0.4722 sec/batch
Epoch 55/100  Iteration 30659/56600 Training loss: 1.0098 0.4712 sec/batch
Epoch 55/100  Iteration 30660/56600 Training loss: 1.0097 0.4811 sec/batch
Epoch 55/100  Iteration 3

Epoch 55/100  Iteration 30758/56600 Training loss: 1.0180 0.4795 sec/batch
Epoch 55/100  Iteration 30759/56600 Training loss: 1.0183 0.4744 sec/batch
Epoch 55/100  Iteration 30760/56600 Training loss: 1.0183 0.4825 sec/batch
Epoch 55/100  Iteration 30761/56600 Training loss: 1.0184 0.4786 sec/batch
Epoch 55/100  Iteration 30762/56600 Training loss: 1.0186 0.4844 sec/batch
Epoch 55/100  Iteration 30763/56600 Training loss: 1.0187 0.4876 sec/batch
Epoch 55/100  Iteration 30764/56600 Training loss: 1.0187 0.4865 sec/batch
Epoch 55/100  Iteration 30765/56600 Training loss: 1.0186 0.5031 sec/batch
Epoch 55/100  Iteration 30766/56600 Training loss: 1.0189 0.4794 sec/batch
Epoch 55/100  Iteration 30767/56600 Training loss: 1.0190 0.4794 sec/batch
Epoch 55/100  Iteration 30768/56600 Training loss: 1.0191 0.4672 sec/batch
Epoch 55/100  Iteration 30769/56600 Training loss: 1.0193 0.4861 sec/batch
Epoch 55/100  Iteration 30770/56600 Training loss: 1.0195 0.4875 sec/batch
Epoch 55/100  Iteration 3

Epoch 55/100  Iteration 30868/56600 Training loss: 1.0211 0.4938 sec/batch
Epoch 55/100  Iteration 30869/56600 Training loss: 1.0211 0.4687 sec/batch
Epoch 55/100  Iteration 30870/56600 Training loss: 1.0210 0.4785 sec/batch
Epoch 55/100  Iteration 30871/56600 Training loss: 1.0208 0.4793 sec/batch
Epoch 55/100  Iteration 30872/56600 Training loss: 1.0207 0.4791 sec/batch
Epoch 55/100  Iteration 30873/56600 Training loss: 1.0206 0.4682 sec/batch
Epoch 55/100  Iteration 30874/56600 Training loss: 1.0206 0.4733 sec/batch
Epoch 55/100  Iteration 30875/56600 Training loss: 1.0206 0.4798 sec/batch
Epoch 55/100  Iteration 30876/56600 Training loss: 1.0205 0.4785 sec/batch
Epoch 55/100  Iteration 30877/56600 Training loss: 1.0204 0.4801 sec/batch
Epoch 55/100  Iteration 30878/56600 Training loss: 1.0203 0.4802 sec/batch
Epoch 55/100  Iteration 30879/56600 Training loss: 1.0203 0.4779 sec/batch
Epoch 55/100  Iteration 30880/56600 Training loss: 1.0203 0.4788 sec/batch
Epoch 55/100  Iteration 3

Epoch 55/100  Iteration 30978/56600 Training loss: 1.0176 0.4794 sec/batch
Epoch 55/100  Iteration 30979/56600 Training loss: 1.0175 0.4816 sec/batch
Epoch 55/100  Iteration 30980/56600 Training loss: 1.0175 0.4934 sec/batch
Epoch 55/100  Iteration 30981/56600 Training loss: 1.0175 0.4780 sec/batch
Epoch 55/100  Iteration 30982/56600 Training loss: 1.0175 0.4790 sec/batch
Epoch 55/100  Iteration 30983/56600 Training loss: 1.0175 0.4877 sec/batch
Epoch 55/100  Iteration 30984/56600 Training loss: 1.0176 0.4757 sec/batch
Epoch 55/100  Iteration 30985/56600 Training loss: 1.0175 0.4909 sec/batch
Epoch 55/100  Iteration 30986/56600 Training loss: 1.0175 0.4883 sec/batch
Epoch 55/100  Iteration 30987/56600 Training loss: 1.0175 0.4839 sec/batch
Epoch 55/100  Iteration 30988/56600 Training loss: 1.0176 0.4738 sec/batch
Epoch 55/100  Iteration 30989/56600 Training loss: 1.0175 0.4860 sec/batch
Epoch 55/100  Iteration 30990/56600 Training loss: 1.0174 0.4713 sec/batch
Epoch 55/100  Iteration 3

Epoch 55/100  Iteration 31088/56600 Training loss: 1.0154 0.4746 sec/batch
Epoch 55/100  Iteration 31089/56600 Training loss: 1.0154 0.4626 sec/batch
Epoch 55/100  Iteration 31090/56600 Training loss: 1.0154 0.4756 sec/batch
Epoch 55/100  Iteration 31091/56600 Training loss: 1.0155 0.4663 sec/batch
Epoch 55/100  Iteration 31092/56600 Training loss: 1.0155 0.4797 sec/batch
Epoch 55/100  Iteration 31093/56600 Training loss: 1.0155 0.4733 sec/batch
Epoch 55/100  Iteration 31094/56600 Training loss: 1.0155 0.4642 sec/batch
Epoch 55/100  Iteration 31095/56600 Training loss: 1.0155 0.4625 sec/batch
Epoch 55/100  Iteration 31096/56600 Training loss: 1.0154 0.4691 sec/batch
Epoch 55/100  Iteration 31097/56600 Training loss: 1.0154 0.4841 sec/batch
Epoch 55/100  Iteration 31098/56600 Training loss: 1.0153 0.4691 sec/batch
Epoch 55/100  Iteration 31099/56600 Training loss: 1.0153 0.4742 sec/batch
Epoch 55/100  Iteration 31100/56600 Training loss: 1.0153 0.4864 sec/batch
Epoch 55/100  Iteration 3

Epoch 56/100  Iteration 31198/56600 Training loss: 1.0110 0.4850 sec/batch
Epoch 56/100  Iteration 31199/56600 Training loss: 1.0112 0.4707 sec/batch
Epoch 56/100  Iteration 31200/56600 Training loss: 1.0107 0.4751 sec/batch
Epoch 56/100  Iteration 31201/56600 Training loss: 1.0105 0.4931 sec/batch
Epoch 56/100  Iteration 31202/56600 Training loss: 1.0099 0.4791 sec/batch
Epoch 56/100  Iteration 31203/56600 Training loss: 1.0093 0.4794 sec/batch
Epoch 56/100  Iteration 31204/56600 Training loss: 1.0098 0.4700 sec/batch
Epoch 56/100  Iteration 31205/56600 Training loss: 1.0099 0.4739 sec/batch
Epoch 56/100  Iteration 31206/56600 Training loss: 1.0094 0.4782 sec/batch
Epoch 56/100  Iteration 31207/56600 Training loss: 1.0092 0.4945 sec/batch
Epoch 56/100  Iteration 31208/56600 Training loss: 1.0091 0.4898 sec/batch
Epoch 56/100  Iteration 31209/56600 Training loss: 1.0088 0.4845 sec/batch
Epoch 56/100  Iteration 31210/56600 Training loss: 1.0088 0.4791 sec/batch
Epoch 56/100  Iteration 3

Epoch 56/100  Iteration 31308/56600 Training loss: 1.0152 0.4789 sec/batch
Epoch 56/100  Iteration 31309/56600 Training loss: 1.0150 0.4944 sec/batch
Epoch 56/100  Iteration 31310/56600 Training loss: 1.0151 0.4795 sec/batch
Epoch 56/100  Iteration 31311/56600 Training loss: 1.0151 0.4785 sec/batch
Epoch 56/100  Iteration 31312/56600 Training loss: 1.0151 0.5000 sec/batch
Epoch 56/100  Iteration 31313/56600 Training loss: 1.0152 0.4653 sec/batch
Epoch 56/100  Iteration 31314/56600 Training loss: 1.0154 0.4718 sec/batch
Epoch 56/100  Iteration 31315/56600 Training loss: 1.0154 0.4721 sec/batch
Epoch 56/100  Iteration 31316/56600 Training loss: 1.0155 0.4812 sec/batch
Epoch 56/100  Iteration 31317/56600 Training loss: 1.0157 0.4785 sec/batch
Epoch 56/100  Iteration 31318/56600 Training loss: 1.0159 0.4794 sec/batch
Epoch 56/100  Iteration 31319/56600 Training loss: 1.0159 0.4736 sec/batch
Epoch 56/100  Iteration 31320/56600 Training loss: 1.0160 0.4693 sec/batch
Epoch 56/100  Iteration 3

Epoch 56/100  Iteration 31418/56600 Training loss: 1.0188 0.4836 sec/batch
Epoch 56/100  Iteration 31419/56600 Training loss: 1.0187 0.4748 sec/batch
Epoch 56/100  Iteration 31420/56600 Training loss: 1.0188 0.4724 sec/batch
Epoch 56/100  Iteration 31421/56600 Training loss: 1.0189 0.4808 sec/batch
Epoch 56/100  Iteration 31422/56600 Training loss: 1.0191 0.4786 sec/batch
Epoch 56/100  Iteration 31423/56600 Training loss: 1.0192 0.4795 sec/batch
Epoch 56/100  Iteration 31424/56600 Training loss: 1.0193 0.4892 sec/batch
Epoch 56/100  Iteration 31425/56600 Training loss: 1.0194 0.4680 sec/batch
Epoch 56/100  Iteration 31426/56600 Training loss: 1.0194 0.4700 sec/batch
Epoch 56/100  Iteration 31427/56600 Training loss: 1.0195 0.4664 sec/batch
Epoch 56/100  Iteration 31428/56600 Training loss: 1.0196 0.4714 sec/batch
Epoch 56/100  Iteration 31429/56600 Training loss: 1.0196 0.4718 sec/batch
Epoch 56/100  Iteration 31430/56600 Training loss: 1.0196 0.4951 sec/batch
Epoch 56/100  Iteration 3

Epoch 56/100  Iteration 31528/56600 Training loss: 1.0161 0.4688 sec/batch
Epoch 56/100  Iteration 31529/56600 Training loss: 1.0163 0.4752 sec/batch
Epoch 56/100  Iteration 31530/56600 Training loss: 1.0163 0.4830 sec/batch
Epoch 56/100  Iteration 31531/56600 Training loss: 1.0162 0.4785 sec/batch
Epoch 56/100  Iteration 31532/56600 Training loss: 1.0161 0.4798 sec/batch
Epoch 56/100  Iteration 31533/56600 Training loss: 1.0161 0.4702 sec/batch
Epoch 56/100  Iteration 31534/56600 Training loss: 1.0160 0.4724 sec/batch
Epoch 56/100  Iteration 31535/56600 Training loss: 1.0160 0.4706 sec/batch
Epoch 56/100  Iteration 31536/56600 Training loss: 1.0161 0.4671 sec/batch
Epoch 56/100  Iteration 31537/56600 Training loss: 1.0161 0.4786 sec/batch
Epoch 56/100  Iteration 31538/56600 Training loss: 1.0160 0.5000 sec/batch
Epoch 56/100  Iteration 31539/56600 Training loss: 1.0159 0.4887 sec/batch
Epoch 56/100  Iteration 31540/56600 Training loss: 1.0158 0.4772 sec/batch
Epoch 56/100  Iteration 3

Epoch 56/100  Iteration 31638/56600 Training loss: 1.0136 0.4791 sec/batch
Epoch 56/100  Iteration 31639/56600 Training loss: 1.0136 0.4844 sec/batch
Epoch 56/100  Iteration 31640/56600 Training loss: 1.0136 0.4793 sec/batch
Epoch 56/100  Iteration 31641/56600 Training loss: 1.0136 0.4789 sec/batch
Epoch 56/100  Iteration 31642/56600 Training loss: 1.0135 0.4640 sec/batch
Epoch 56/100  Iteration 31643/56600 Training loss: 1.0135 0.4646 sec/batch
Epoch 56/100  Iteration 31644/56600 Training loss: 1.0136 0.4621 sec/batch
Epoch 56/100  Iteration 31645/56600 Training loss: 1.0136 0.4740 sec/batch
Epoch 56/100  Iteration 31646/56600 Training loss: 1.0136 0.4636 sec/batch
Epoch 56/100  Iteration 31647/56600 Training loss: 1.0136 0.4755 sec/batch
Epoch 56/100  Iteration 31648/56600 Training loss: 1.0136 0.4688 sec/batch
Epoch 56/100  Iteration 31649/56600 Training loss: 1.0136 0.4773 sec/batch
Epoch 56/100  Iteration 31650/56600 Training loss: 1.0136 0.4656 sec/batch
Epoch 56/100  Iteration 3

Epoch 57/100  Iteration 31748/56600 Training loss: 1.0173 0.4799 sec/batch
Epoch 57/100  Iteration 31749/56600 Training loss: 1.0168 0.4735 sec/batch
Epoch 57/100  Iteration 31750/56600 Training loss: 1.0162 0.4796 sec/batch
Epoch 57/100  Iteration 31751/56600 Training loss: 1.0155 0.4771 sec/batch
Epoch 57/100  Iteration 31752/56600 Training loss: 1.0148 0.4653 sec/batch
Epoch 57/100  Iteration 31753/56600 Training loss: 1.0140 0.4708 sec/batch
Epoch 57/100  Iteration 31754/56600 Training loss: 1.0131 0.4658 sec/batch
Epoch 57/100  Iteration 31755/56600 Training loss: 1.0126 0.4787 sec/batch
Epoch 57/100  Iteration 31756/56600 Training loss: 1.0122 0.4794 sec/batch
Epoch 57/100  Iteration 31757/56600 Training loss: 1.0116 0.4636 sec/batch
Epoch 57/100  Iteration 31758/56600 Training loss: 1.0111 0.4800 sec/batch
Epoch 57/100  Iteration 31759/56600 Training loss: 1.0109 0.4780 sec/batch
Epoch 57/100  Iteration 31760/56600 Training loss: 1.0102 0.4794 sec/batch
Epoch 57/100  Iteration 3

Epoch 57/100  Iteration 31858/56600 Training loss: 1.0144 0.4679 sec/batch
Epoch 57/100  Iteration 31859/56600 Training loss: 1.0143 0.4629 sec/batch
Epoch 57/100  Iteration 31860/56600 Training loss: 1.0143 0.4851 sec/batch
Epoch 57/100  Iteration 31861/56600 Training loss: 1.0142 0.4843 sec/batch
Epoch 57/100  Iteration 31862/56600 Training loss: 1.0143 0.4631 sec/batch
Epoch 57/100  Iteration 31863/56600 Training loss: 1.0144 0.4742 sec/batch
Epoch 57/100  Iteration 31864/56600 Training loss: 1.0146 0.4791 sec/batch
Epoch 57/100  Iteration 31865/56600 Training loss: 1.0146 0.4793 sec/batch
Epoch 57/100  Iteration 31866/56600 Training loss: 1.0146 0.4793 sec/batch
Epoch 57/100  Iteration 31867/56600 Training loss: 1.0144 0.4773 sec/batch
Epoch 57/100  Iteration 31868/56600 Training loss: 1.0143 0.4854 sec/batch
Epoch 57/100  Iteration 31869/56600 Training loss: 1.0143 0.4812 sec/batch
Epoch 57/100  Iteration 31870/56600 Training loss: 1.0143 0.4618 sec/batch
Epoch 57/100  Iteration 3

Epoch 57/100  Iteration 31968/56600 Training loss: 1.0177 0.4805 sec/batch
Epoch 57/100  Iteration 31969/56600 Training loss: 1.0177 0.4775 sec/batch
Epoch 57/100  Iteration 31970/56600 Training loss: 1.0177 0.4772 sec/batch
Epoch 57/100  Iteration 31971/56600 Training loss: 1.0176 0.4646 sec/batch
Epoch 57/100  Iteration 31972/56600 Training loss: 1.0176 0.4793 sec/batch
Epoch 57/100  Iteration 31973/56600 Training loss: 1.0176 0.4817 sec/batch
Epoch 57/100  Iteration 31974/56600 Training loss: 1.0175 0.4769 sec/batch
Epoch 57/100  Iteration 31975/56600 Training loss: 1.0173 0.4629 sec/batch
Epoch 57/100  Iteration 31976/56600 Training loss: 1.0172 0.4759 sec/batch
Epoch 57/100  Iteration 31977/56600 Training loss: 1.0171 0.4777 sec/batch
Epoch 57/100  Iteration 31978/56600 Training loss: 1.0170 0.4639 sec/batch
Epoch 57/100  Iteration 31979/56600 Training loss: 1.0171 0.4783 sec/batch
Epoch 57/100  Iteration 31980/56600 Training loss: 1.0170 0.4844 sec/batch
Epoch 57/100  Iteration 3

Epoch 57/100  Iteration 32077/56600 Training loss: 1.0153 0.4597 sec/batch
Epoch 57/100  Iteration 32078/56600 Training loss: 1.0153 0.4684 sec/batch
Epoch 57/100  Iteration 32079/56600 Training loss: 1.0152 0.4846 sec/batch
Epoch 57/100  Iteration 32080/56600 Training loss: 1.0152 0.4792 sec/batch
Epoch 57/100  Iteration 32081/56600 Training loss: 1.0152 0.4790 sec/batch
Epoch 57/100  Iteration 32082/56600 Training loss: 1.0153 0.4787 sec/batch
Epoch 57/100  Iteration 32083/56600 Training loss: 1.0153 0.4725 sec/batch
Epoch 57/100  Iteration 32084/56600 Training loss: 1.0153 0.4813 sec/batch
Epoch 57/100  Iteration 32085/56600 Training loss: 1.0152 0.4784 sec/batch
Epoch 57/100  Iteration 32086/56600 Training loss: 1.0152 0.4844 sec/batch
Epoch 57/100  Iteration 32087/56600 Training loss: 1.0151 0.4794 sec/batch
Epoch 57/100  Iteration 32088/56600 Training loss: 1.0151 0.4795 sec/batch
Epoch 57/100  Iteration 32089/56600 Training loss: 1.0152 0.4637 sec/batch
Epoch 57/100  Iteration 3

Epoch 57/100  Iteration 32187/56600 Training loss: 1.0128 0.4766 sec/batch
Epoch 57/100  Iteration 32188/56600 Training loss: 1.0128 0.4692 sec/batch
Epoch 57/100  Iteration 32189/56600 Training loss: 1.0128 0.4737 sec/batch
Epoch 57/100  Iteration 32190/56600 Training loss: 1.0128 0.4844 sec/batch
Epoch 57/100  Iteration 32191/56600 Training loss: 1.0128 0.4741 sec/batch
Epoch 57/100  Iteration 32192/56600 Training loss: 1.0128 0.4796 sec/batch
Epoch 57/100  Iteration 32193/56600 Training loss: 1.0127 0.4938 sec/batch
Epoch 57/100  Iteration 32194/56600 Training loss: 1.0127 0.4802 sec/batch
Epoch 57/100  Iteration 32195/56600 Training loss: 1.0127 0.4958 sec/batch
Epoch 57/100  Iteration 32196/56600 Training loss: 1.0126 0.4703 sec/batch
Epoch 57/100  Iteration 32197/56600 Training loss: 1.0126 0.4714 sec/batch
Epoch 57/100  Iteration 32198/56600 Training loss: 1.0126 0.4865 sec/batch
Epoch 57/100  Iteration 32199/56600 Training loss: 1.0125 0.4739 sec/batch
Epoch 57/100  Iteration 3

Epoch 58/100  Iteration 32297/56600 Training loss: 1.0153 0.4825 sec/batch
Epoch 58/100  Iteration 32298/56600 Training loss: 1.0163 0.4916 sec/batch
Epoch 58/100  Iteration 32299/56600 Training loss: 1.0171 0.4791 sec/batch
Epoch 58/100  Iteration 32300/56600 Training loss: 1.0176 0.4799 sec/batch
Epoch 58/100  Iteration 32301/56600 Training loss: 1.0178 0.4904 sec/batch
Epoch 58/100  Iteration 32302/56600 Training loss: 1.0185 0.4749 sec/batch
Epoch 58/100  Iteration 32303/56600 Training loss: 1.0186 0.4956 sec/batch
Epoch 58/100  Iteration 32304/56600 Training loss: 1.0190 0.4786 sec/batch
Epoch 58/100  Iteration 32305/56600 Training loss: 1.0187 0.4708 sec/batch
Epoch 58/100  Iteration 32306/56600 Training loss: 1.0184 0.4721 sec/batch
Epoch 58/100  Iteration 32307/56600 Training loss: 1.0188 0.4790 sec/batch
Epoch 58/100  Iteration 32308/56600 Training loss: 1.0195 0.4785 sec/batch
Epoch 58/100  Iteration 32309/56600 Training loss: 1.0186 0.4797 sec/batch
Epoch 58/100  Iteration 3

Epoch 58/100  Iteration 32407/56600 Training loss: 1.0102 0.4799 sec/batch
Epoch 58/100  Iteration 32408/56600 Training loss: 1.0102 0.4926 sec/batch
Epoch 58/100  Iteration 32409/56600 Training loss: 1.0102 0.4805 sec/batch
Epoch 58/100  Iteration 32410/56600 Training loss: 1.0104 0.4797 sec/batch
Epoch 58/100  Iteration 32411/56600 Training loss: 1.0105 0.4802 sec/batch
Epoch 58/100  Iteration 32412/56600 Training loss: 1.0107 0.4703 sec/batch
Epoch 58/100  Iteration 32413/56600 Training loss: 1.0111 0.4867 sec/batch
Epoch 58/100  Iteration 32414/56600 Training loss: 1.0114 0.4792 sec/batch
Epoch 58/100  Iteration 32415/56600 Training loss: 1.0117 0.4796 sec/batch
Epoch 58/100  Iteration 32416/56600 Training loss: 1.0117 0.4894 sec/batch
Epoch 58/100  Iteration 32417/56600 Training loss: 1.0119 0.4844 sec/batch
Epoch 58/100  Iteration 32418/56600 Training loss: 1.0120 0.4795 sec/batch
Epoch 58/100  Iteration 32419/56600 Training loss: 1.0123 0.4786 sec/batch
Epoch 58/100  Iteration 3

Epoch 58/100  Iteration 32517/56600 Training loss: 1.0170 0.4947 sec/batch
Epoch 58/100  Iteration 32518/56600 Training loss: 1.0170 0.4759 sec/batch
Epoch 58/100  Iteration 32519/56600 Training loss: 1.0168 0.4784 sec/batch
Epoch 58/100  Iteration 32520/56600 Training loss: 1.0167 0.4784 sec/batch
Epoch 58/100  Iteration 32521/56600 Training loss: 1.0166 0.4800 sec/batch
Epoch 58/100  Iteration 32522/56600 Training loss: 1.0164 0.4868 sec/batch
Epoch 58/100  Iteration 32523/56600 Training loss: 1.0163 0.4818 sec/batch
Epoch 58/100  Iteration 32524/56600 Training loss: 1.0162 0.4947 sec/batch
Epoch 58/100  Iteration 32525/56600 Training loss: 1.0162 0.4800 sec/batch
Epoch 58/100  Iteration 32526/56600 Training loss: 1.0162 0.4874 sec/batch
Epoch 58/100  Iteration 32527/56600 Training loss: 1.0161 0.5020 sec/batch
Epoch 58/100  Iteration 32528/56600 Training loss: 1.0160 0.4786 sec/batch
Epoch 58/100  Iteration 32529/56600 Training loss: 1.0159 0.4844 sec/batch
Epoch 58/100  Iteration 3

Epoch 58/100  Iteration 32627/56600 Training loss: 1.0139 0.4787 sec/batch
Epoch 58/100  Iteration 32628/56600 Training loss: 1.0139 0.4762 sec/batch
Epoch 58/100  Iteration 32629/56600 Training loss: 1.0138 0.4673 sec/batch
Epoch 58/100  Iteration 32630/56600 Training loss: 1.0138 0.4792 sec/batch
Epoch 58/100  Iteration 32631/56600 Training loss: 1.0137 0.4786 sec/batch
Epoch 58/100  Iteration 32632/56600 Training loss: 1.0136 0.4803 sec/batch
Epoch 58/100  Iteration 32633/56600 Training loss: 1.0136 0.4770 sec/batch
Epoch 58/100  Iteration 32634/56600 Training loss: 1.0135 0.4657 sec/batch
Epoch 58/100  Iteration 32635/56600 Training loss: 1.0134 0.4610 sec/batch
Epoch 58/100  Iteration 32636/56600 Training loss: 1.0134 0.4722 sec/batch
Epoch 58/100  Iteration 32637/56600 Training loss: 1.0133 0.4708 sec/batch
Epoch 58/100  Iteration 32638/56600 Training loss: 1.0133 0.4863 sec/batch
Epoch 58/100  Iteration 32639/56600 Training loss: 1.0133 0.4666 sec/batch
Epoch 58/100  Iteration 3

Epoch 58/100  Iteration 32737/56600 Training loss: 1.0113 0.4834 sec/batch
Epoch 58/100  Iteration 32738/56600 Training loss: 1.0113 0.4907 sec/batch
Epoch 58/100  Iteration 32739/56600 Training loss: 1.0113 0.4761 sec/batch
Epoch 58/100  Iteration 32740/56600 Training loss: 1.0113 0.4766 sec/batch
Epoch 58/100  Iteration 32741/56600 Training loss: 1.0113 0.4911 sec/batch
Epoch 58/100  Iteration 32742/56600 Training loss: 1.0112 0.4830 sec/batch
Epoch 58/100  Iteration 32743/56600 Training loss: 1.0112 0.4870 sec/batch
Epoch 58/100  Iteration 32744/56600 Training loss: 1.0112 0.4715 sec/batch
Epoch 58/100  Iteration 32745/56600 Training loss: 1.0112 0.4898 sec/batch
Epoch 58/100  Iteration 32746/56600 Training loss: 1.0111 0.4844 sec/batch
Epoch 58/100  Iteration 32747/56600 Training loss: 1.0111 0.4889 sec/batch
Epoch 58/100  Iteration 32748/56600 Training loss: 1.0111 0.4942 sec/batch
Epoch 58/100  Iteration 32749/56600 Training loss: 1.0111 0.4955 sec/batch
Epoch 58/100  Iteration 3

Epoch 59/100  Iteration 32847/56600 Training loss: 1.0231 0.4789 sec/batch
Epoch 59/100  Iteration 32848/56600 Training loss: 1.0213 0.4738 sec/batch
Epoch 59/100  Iteration 32849/56600 Training loss: 1.0200 0.4687 sec/batch
Epoch 59/100  Iteration 32850/56600 Training loss: 1.0182 0.4799 sec/batch
Epoch 59/100  Iteration 32851/56600 Training loss: 1.0173 0.4861 sec/batch
Epoch 59/100  Iteration 32852/56600 Training loss: 1.0164 0.4688 sec/batch
Epoch 59/100  Iteration 32853/56600 Training loss: 1.0156 0.4636 sec/batch
Epoch 59/100  Iteration 32854/56600 Training loss: 1.0137 0.4718 sec/batch
Epoch 59/100  Iteration 32855/56600 Training loss: 1.0132 0.4814 sec/batch
Epoch 59/100  Iteration 32856/56600 Training loss: 1.0129 0.4788 sec/batch
Epoch 59/100  Iteration 32857/56600 Training loss: 1.0122 0.4757 sec/batch
Epoch 59/100  Iteration 32858/56600 Training loss: 1.0117 0.4761 sec/batch
Epoch 59/100  Iteration 32859/56600 Training loss: 1.0108 0.4864 sec/batch
Epoch 59/100  Iteration 3

Epoch 59/100  Iteration 32957/56600 Training loss: 1.0077 0.4942 sec/batch
Epoch 59/100  Iteration 32958/56600 Training loss: 1.0076 0.4957 sec/batch
Epoch 59/100  Iteration 32959/56600 Training loss: 1.0078 0.4782 sec/batch
Epoch 59/100  Iteration 32960/56600 Training loss: 1.0076 0.4970 sec/batch
Epoch 59/100  Iteration 32961/56600 Training loss: 1.0076 0.4749 sec/batch
Epoch 59/100  Iteration 32962/56600 Training loss: 1.0076 0.4785 sec/batch
Epoch 59/100  Iteration 32963/56600 Training loss: 1.0073 0.4952 sec/batch
Epoch 59/100  Iteration 32964/56600 Training loss: 1.0072 0.4875 sec/batch
Epoch 59/100  Iteration 32965/56600 Training loss: 1.0073 0.4816 sec/batch
Epoch 59/100  Iteration 32966/56600 Training loss: 1.0077 0.4939 sec/batch
Epoch 59/100  Iteration 32967/56600 Training loss: 1.0080 0.4792 sec/batch
Epoch 59/100  Iteration 32968/56600 Training loss: 1.0078 0.4739 sec/batch
Epoch 59/100  Iteration 32969/56600 Training loss: 1.0080 0.4846 sec/batch
Epoch 59/100  Iteration 3

Epoch 59/100  Iteration 33067/56600 Training loss: 1.0142 0.4943 sec/batch
Epoch 59/100  Iteration 33068/56600 Training loss: 1.0144 0.4916 sec/batch
Epoch 59/100  Iteration 33069/56600 Training loss: 1.0146 0.4921 sec/batch
Epoch 59/100  Iteration 33070/56600 Training loss: 1.0148 0.4777 sec/batch
Epoch 59/100  Iteration 33071/56600 Training loss: 1.0150 0.4851 sec/batch
Epoch 59/100  Iteration 33072/56600 Training loss: 1.0152 0.4792 sec/batch
Epoch 59/100  Iteration 33073/56600 Training loss: 1.0153 0.4930 sec/batch
Epoch 59/100  Iteration 33074/56600 Training loss: 1.0155 0.4811 sec/batch
Epoch 59/100  Iteration 33075/56600 Training loss: 1.0156 0.4690 sec/batch
Epoch 59/100  Iteration 33076/56600 Training loss: 1.0155 0.4785 sec/batch
Epoch 59/100  Iteration 33077/56600 Training loss: 1.0156 0.4798 sec/batch
Epoch 59/100  Iteration 33078/56600 Training loss: 1.0157 0.4946 sec/batch
Epoch 59/100  Iteration 33079/56600 Training loss: 1.0154 0.4790 sec/batch
Epoch 59/100  Iteration 3

Epoch 59/100  Iteration 33177/56600 Training loss: 1.0122 0.4888 sec/batch
Epoch 59/100  Iteration 33178/56600 Training loss: 1.0121 0.4780 sec/batch
Epoch 59/100  Iteration 33179/56600 Training loss: 1.0119 0.4790 sec/batch
Epoch 59/100  Iteration 33180/56600 Training loss: 1.0117 0.4797 sec/batch
Epoch 59/100  Iteration 33181/56600 Training loss: 1.0118 0.4687 sec/batch
Epoch 59/100  Iteration 33182/56600 Training loss: 1.0117 0.4777 sec/batch
Epoch 59/100  Iteration 33183/56600 Training loss: 1.0117 0.4797 sec/batch
Epoch 59/100  Iteration 33184/56600 Training loss: 1.0118 0.4735 sec/batch
Epoch 59/100  Iteration 33185/56600 Training loss: 1.0120 0.4795 sec/batch
Epoch 59/100  Iteration 33186/56600 Training loss: 1.0119 0.4784 sec/batch
Epoch 59/100  Iteration 33187/56600 Training loss: 1.0119 0.4790 sec/batch
Epoch 59/100  Iteration 33188/56600 Training loss: 1.0120 0.4791 sec/batch
Epoch 59/100  Iteration 33189/56600 Training loss: 1.0120 0.4801 sec/batch
Epoch 59/100  Iteration 3

Epoch 59/100  Iteration 33287/56600 Training loss: 1.0087 0.4922 sec/batch
Epoch 59/100  Iteration 33288/56600 Training loss: 1.0088 0.4798 sec/batch
Epoch 59/100  Iteration 33289/56600 Training loss: 1.0088 0.4790 sec/batch
Epoch 59/100  Iteration 33290/56600 Training loss: 1.0087 0.4849 sec/batch
Epoch 59/100  Iteration 33291/56600 Training loss: 1.0087 0.4670 sec/batch
Epoch 59/100  Iteration 33292/56600 Training loss: 1.0087 0.4780 sec/batch
Epoch 59/100  Iteration 33293/56600 Training loss: 1.0087 0.4946 sec/batch
Epoch 59/100  Iteration 33294/56600 Training loss: 1.0087 0.4795 sec/batch
Epoch 59/100  Iteration 33295/56600 Training loss: 1.0087 0.4955 sec/batch
Epoch 59/100  Iteration 33296/56600 Training loss: 1.0088 0.4688 sec/batch
Epoch 59/100  Iteration 33297/56600 Training loss: 1.0088 0.4705 sec/batch
Epoch 59/100  Iteration 33298/56600 Training loss: 1.0089 0.4763 sec/batch
Epoch 59/100  Iteration 33299/56600 Training loss: 1.0090 0.4896 sec/batch
Epoch 59/100  Iteration 3

Epoch 60/100  Iteration 33397/56600 Training loss: 1.0894 0.4638 sec/batch
Epoch 60/100  Iteration 33398/56600 Training loss: 1.0772 0.4798 sec/batch
Epoch 60/100  Iteration 33399/56600 Training loss: 1.0604 0.4725 sec/batch
Epoch 60/100  Iteration 33400/56600 Training loss: 1.0548 0.4637 sec/batch
Epoch 60/100  Iteration 33401/56600 Training loss: 1.0490 0.4784 sec/batch
Epoch 60/100  Iteration 33402/56600 Training loss: 1.0408 0.4795 sec/batch
Epoch 60/100  Iteration 33403/56600 Training loss: 1.0379 0.4798 sec/batch
Epoch 60/100  Iteration 33404/56600 Training loss: 1.0365 0.4785 sec/batch
Epoch 60/100  Iteration 33405/56600 Training loss: 1.0353 0.4879 sec/batch
Epoch 60/100  Iteration 33406/56600 Training loss: 1.0346 0.4699 sec/batch
Epoch 60/100  Iteration 33407/56600 Training loss: 1.0334 0.4721 sec/batch
Epoch 60/100  Iteration 33408/56600 Training loss: 1.0308 0.4812 sec/batch
Epoch 60/100  Iteration 33409/56600 Training loss: 1.0292 0.4720 sec/batch
Epoch 60/100  Iteration 3

Epoch 60/100  Iteration 33507/56600 Training loss: 1.0015 0.4786 sec/batch
Epoch 60/100  Iteration 33508/56600 Training loss: 1.0018 0.4692 sec/batch
Epoch 60/100  Iteration 33509/56600 Training loss: 1.0020 0.4739 sec/batch
Epoch 60/100  Iteration 33510/56600 Training loss: 1.0024 0.4712 sec/batch
Epoch 60/100  Iteration 33511/56600 Training loss: 1.0029 0.4768 sec/batch
Epoch 60/100  Iteration 33512/56600 Training loss: 1.0033 0.4768 sec/batch
Epoch 60/100  Iteration 33513/56600 Training loss: 1.0034 0.4651 sec/batch
Epoch 60/100  Iteration 33514/56600 Training loss: 1.0040 0.4797 sec/batch
Epoch 60/100  Iteration 33515/56600 Training loss: 1.0045 0.4792 sec/batch
Epoch 60/100  Iteration 33516/56600 Training loss: 1.0051 0.4897 sec/batch
Epoch 60/100  Iteration 33517/56600 Training loss: 1.0050 0.4786 sec/batch
Epoch 60/100  Iteration 33518/56600 Training loss: 1.0050 0.4795 sec/batch
Epoch 60/100  Iteration 33519/56600 Training loss: 1.0054 0.4795 sec/batch
Epoch 60/100  Iteration 3

Epoch 60/100  Iteration 33617/56600 Training loss: 1.0107 0.4671 sec/batch
Epoch 60/100  Iteration 33618/56600 Training loss: 1.0109 0.4786 sec/batch
Epoch 60/100  Iteration 33619/56600 Training loss: 1.0110 0.4787 sec/batch
Epoch 60/100  Iteration 33620/56600 Training loss: 1.0112 0.4799 sec/batch
Epoch 60/100  Iteration 33621/56600 Training loss: 1.0113 0.4786 sec/batch
Epoch 60/100  Iteration 33622/56600 Training loss: 1.0112 0.4844 sec/batch
Epoch 60/100  Iteration 33623/56600 Training loss: 1.0114 0.4880 sec/batch
Epoch 60/100  Iteration 33624/56600 Training loss: 1.0116 0.4860 sec/batch
Epoch 60/100  Iteration 33625/56600 Training loss: 1.0118 0.4795 sec/batch
Epoch 60/100  Iteration 33626/56600 Training loss: 1.0119 0.4803 sec/batch
Epoch 60/100  Iteration 33627/56600 Training loss: 1.0121 0.4783 sec/batch
Epoch 60/100  Iteration 33628/56600 Training loss: 1.0121 0.4789 sec/batch
Epoch 60/100  Iteration 33629/56600 Training loss: 1.0123 0.4801 sec/batch
Epoch 60/100  Iteration 3

Epoch 60/100  Iteration 33727/56600 Training loss: 1.0111 0.4812 sec/batch
Epoch 60/100  Iteration 33728/56600 Training loss: 1.0111 0.4699 sec/batch
Epoch 60/100  Iteration 33729/56600 Training loss: 1.0110 0.4785 sec/batch
Epoch 60/100  Iteration 33730/56600 Training loss: 1.0110 0.4785 sec/batch
Epoch 60/100  Iteration 33731/56600 Training loss: 1.0110 0.4874 sec/batch
Epoch 60/100  Iteration 33732/56600 Training loss: 1.0110 0.4881 sec/batch
Epoch 60/100  Iteration 33733/56600 Training loss: 1.0110 0.4866 sec/batch
Epoch 60/100  Iteration 33734/56600 Training loss: 1.0110 0.4806 sec/batch
Epoch 60/100  Iteration 33735/56600 Training loss: 1.0109 0.4737 sec/batch
Epoch 60/100  Iteration 33736/56600 Training loss: 1.0108 0.4695 sec/batch
Epoch 60/100  Iteration 33737/56600 Training loss: 1.0107 0.4787 sec/batch
Epoch 60/100  Iteration 33738/56600 Training loss: 1.0107 0.4867 sec/batch
Epoch 60/100  Iteration 33739/56600 Training loss: 1.0108 0.4696 sec/batch
Epoch 60/100  Iteration 3

Epoch 60/100  Iteration 33837/56600 Training loss: 1.0078 0.5008 sec/batch
Epoch 60/100  Iteration 33838/56600 Training loss: 1.0078 0.4703 sec/batch
Epoch 60/100  Iteration 33839/56600 Training loss: 1.0076 0.4775 sec/batch
Epoch 60/100  Iteration 33840/56600 Training loss: 1.0076 0.4805 sec/batch
Epoch 60/100  Iteration 33841/56600 Training loss: 1.0075 0.4796 sec/batch
Epoch 60/100  Iteration 33842/56600 Training loss: 1.0075 0.4786 sec/batch
Epoch 60/100  Iteration 33843/56600 Training loss: 1.0075 0.4715 sec/batch
Epoch 60/100  Iteration 33844/56600 Training loss: 1.0075 0.4715 sec/batch
Epoch 60/100  Iteration 33845/56600 Training loss: 1.0075 0.4846 sec/batch
Epoch 60/100  Iteration 33846/56600 Training loss: 1.0076 0.4811 sec/batch
Epoch 60/100  Iteration 33847/56600 Training loss: 1.0075 0.4881 sec/batch
Epoch 60/100  Iteration 33848/56600 Training loss: 1.0075 0.4830 sec/batch
Epoch 60/100  Iteration 33849/56600 Training loss: 1.0075 0.4818 sec/batch
Epoch 60/100  Iteration 3

Epoch 60/100  Iteration 33947/56600 Training loss: 1.0067 0.4765 sec/batch
Epoch 60/100  Iteration 33948/56600 Training loss: 1.0067 0.4783 sec/batch
Epoch 60/100  Iteration 33949/56600 Training loss: 1.0066 0.4719 sec/batch
Epoch 60/100  Iteration 33950/56600 Training loss: 1.0067 0.4815 sec/batch
Epoch 60/100  Iteration 33951/56600 Training loss: 1.0066 0.4798 sec/batch
Epoch 60/100  Iteration 33952/56600 Training loss: 1.0067 0.4784 sec/batch
Epoch 60/100  Iteration 33953/56600 Training loss: 1.0067 0.4792 sec/batch
Epoch 60/100  Iteration 33954/56600 Training loss: 1.0067 0.4745 sec/batch
Epoch 60/100  Iteration 33955/56600 Training loss: 1.0068 0.4769 sec/batch
Epoch 60/100  Iteration 33956/56600 Training loss: 1.0068 0.4646 sec/batch
Epoch 60/100  Iteration 33957/56600 Training loss: 1.0067 0.4737 sec/batch
Epoch 60/100  Iteration 33958/56600 Training loss: 1.0068 0.4799 sec/batch
Epoch 60/100  Iteration 33959/56600 Training loss: 1.0069 0.4792 sec/batch
Epoch 60/100  Iteration 3

Epoch 61/100  Iteration 34056/56600 Training loss: 1.0004 0.4713 sec/batch
Epoch 61/100  Iteration 34057/56600 Training loss: 0.9999 0.4797 sec/batch
Epoch 61/100  Iteration 34058/56600 Training loss: 0.9997 0.4807 sec/batch
Epoch 61/100  Iteration 34059/56600 Training loss: 0.9999 0.4617 sec/batch
Epoch 61/100  Iteration 34060/56600 Training loss: 1.0005 0.4640 sec/batch
Epoch 61/100  Iteration 34061/56600 Training loss: 1.0006 0.4634 sec/batch
Epoch 61/100  Iteration 34062/56600 Training loss: 1.0010 0.4742 sec/batch
Epoch 61/100  Iteration 34063/56600 Training loss: 1.0009 0.4667 sec/batch
Epoch 61/100  Iteration 34064/56600 Training loss: 1.0010 0.4752 sec/batch
Epoch 61/100  Iteration 34065/56600 Training loss: 1.0009 0.4792 sec/batch
Epoch 61/100  Iteration 34066/56600 Training loss: 1.0009 0.4793 sec/batch
Epoch 61/100  Iteration 34067/56600 Training loss: 1.0005 0.4807 sec/batch
Epoch 61/100  Iteration 34068/56600 Training loss: 1.0005 0.4626 sec/batch
Epoch 61/100  Iteration 3

Epoch 61/100  Iteration 34166/56600 Training loss: 1.0099 0.4641 sec/batch
Epoch 61/100  Iteration 34167/56600 Training loss: 1.0099 0.4788 sec/batch
Epoch 61/100  Iteration 34168/56600 Training loss: 1.0098 0.4648 sec/batch
Epoch 61/100  Iteration 34169/56600 Training loss: 1.0098 0.4680 sec/batch
Epoch 61/100  Iteration 34170/56600 Training loss: 1.0098 0.4704 sec/batch
Epoch 61/100  Iteration 34171/56600 Training loss: 1.0098 0.4660 sec/batch
Epoch 61/100  Iteration 34172/56600 Training loss: 1.0097 0.4788 sec/batch
Epoch 61/100  Iteration 34173/56600 Training loss: 1.0095 0.4796 sec/batch
Epoch 61/100  Iteration 34174/56600 Training loss: 1.0094 0.4817 sec/batch
Epoch 61/100  Iteration 34175/56600 Training loss: 1.0094 0.4770 sec/batch
Epoch 61/100  Iteration 34176/56600 Training loss: 1.0093 0.4786 sec/batch
Epoch 61/100  Iteration 34177/56600 Training loss: 1.0092 0.4844 sec/batch
Epoch 61/100  Iteration 34178/56600 Training loss: 1.0092 0.4790 sec/batch
Epoch 61/100  Iteration 3

Epoch 61/100  Iteration 34276/56600 Training loss: 1.0104 0.4841 sec/batch
Epoch 61/100  Iteration 34277/56600 Training loss: 1.0104 0.4891 sec/batch
Epoch 61/100  Iteration 34278/56600 Training loss: 1.0105 0.4792 sec/batch
Epoch 61/100  Iteration 34279/56600 Training loss: 1.0104 0.4804 sec/batch
Epoch 61/100  Iteration 34280/56600 Training loss: 1.0103 0.4759 sec/batch
Epoch 61/100  Iteration 34281/56600 Training loss: 1.0103 0.4688 sec/batch
Epoch 61/100  Iteration 34282/56600 Training loss: 1.0101 0.4712 sec/batch
Epoch 61/100  Iteration 34283/56600 Training loss: 1.0100 0.4778 sec/batch
Epoch 61/100  Iteration 34284/56600 Training loss: 1.0099 0.4719 sec/batch
Epoch 61/100  Iteration 34285/56600 Training loss: 1.0099 0.4814 sec/batch
Epoch 61/100  Iteration 34286/56600 Training loss: 1.0099 0.4878 sec/batch
Epoch 61/100  Iteration 34287/56600 Training loss: 1.0098 0.4708 sec/batch
Epoch 61/100  Iteration 34288/56600 Training loss: 1.0097 0.4788 sec/batch
Epoch 61/100  Iteration 3

Epoch 61/100  Iteration 34386/56600 Training loss: 1.0072 0.4780 sec/batch
Epoch 61/100  Iteration 34387/56600 Training loss: 1.0071 0.4786 sec/batch
Epoch 61/100  Iteration 34388/56600 Training loss: 1.0070 0.4791 sec/batch
Epoch 61/100  Iteration 34389/56600 Training loss: 1.0069 0.4844 sec/batch
Epoch 61/100  Iteration 34390/56600 Training loss: 1.0069 0.4638 sec/batch
Epoch 61/100  Iteration 34391/56600 Training loss: 1.0068 0.4774 sec/batch
Epoch 61/100  Iteration 34392/56600 Training loss: 1.0067 0.4824 sec/batch
Epoch 61/100  Iteration 34393/56600 Training loss: 1.0067 0.4807 sec/batch
Epoch 61/100  Iteration 34394/56600 Training loss: 1.0066 0.4755 sec/batch
Epoch 61/100  Iteration 34395/56600 Training loss: 1.0066 0.4780 sec/batch
Epoch 61/100  Iteration 34396/56600 Training loss: 1.0066 0.4741 sec/batch
Epoch 61/100  Iteration 34397/56600 Training loss: 1.0065 0.4791 sec/batch
Epoch 61/100  Iteration 34398/56600 Training loss: 1.0065 0.4785 sec/batch
Epoch 61/100  Iteration 3

Epoch 61/100  Iteration 34496/56600 Training loss: 1.0053 0.4788 sec/batch
Epoch 61/100  Iteration 34497/56600 Training loss: 1.0053 0.4945 sec/batch
Epoch 61/100  Iteration 34498/56600 Training loss: 1.0053 0.4806 sec/batch
Epoch 61/100  Iteration 34499/56600 Training loss: 1.0053 0.4775 sec/batch
Epoch 61/100  Iteration 34500/56600 Training loss: 1.0053 0.4810 sec/batch
Epoch 61/100  Iteration 34501/56600 Training loss: 1.0053 0.4734 sec/batch
Epoch 61/100  Iteration 34502/56600 Training loss: 1.0052 0.4936 sec/batch
Epoch 61/100  Iteration 34503/56600 Training loss: 1.0053 0.4844 sec/batch
Epoch 61/100  Iteration 34504/56600 Training loss: 1.0052 0.4790 sec/batch
Epoch 61/100  Iteration 34505/56600 Training loss: 1.0052 0.4742 sec/batch
Epoch 61/100  Iteration 34506/56600 Training loss: 1.0052 0.4690 sec/batch
Epoch 61/100  Iteration 34507/56600 Training loss: 1.0052 0.4857 sec/batch
Epoch 61/100  Iteration 34508/56600 Training loss: 1.0052 0.4827 sec/batch
Epoch 61/100  Iteration 3

Epoch 62/100  Iteration 34606/56600 Training loss: 0.9985 0.4791 sec/batch
Epoch 62/100  Iteration 34607/56600 Training loss: 0.9984 0.4790 sec/batch
Epoch 62/100  Iteration 34608/56600 Training loss: 0.9984 0.4687 sec/batch
Epoch 62/100  Iteration 34609/56600 Training loss: 0.9987 0.4772 sec/batch
Epoch 62/100  Iteration 34610/56600 Training loss: 0.9989 0.4799 sec/batch
Epoch 62/100  Iteration 34611/56600 Training loss: 0.9987 0.4785 sec/batch
Epoch 62/100  Iteration 34612/56600 Training loss: 0.9984 0.4640 sec/batch
Epoch 62/100  Iteration 34613/56600 Training loss: 0.9980 0.4636 sec/batch
Epoch 62/100  Iteration 34614/56600 Training loss: 0.9975 0.4631 sec/batch
Epoch 62/100  Iteration 34615/56600 Training loss: 0.9978 0.4755 sec/batch
Epoch 62/100  Iteration 34616/56600 Training loss: 0.9980 0.4779 sec/batch
Epoch 62/100  Iteration 34617/56600 Training loss: 0.9979 0.4824 sec/batch
Epoch 62/100  Iteration 34618/56600 Training loss: 0.9977 0.4607 sec/batch
Epoch 62/100  Iteration 3

Epoch 62/100  Iteration 34716/56600 Training loss: 1.0063 0.4794 sec/batch
Epoch 62/100  Iteration 34717/56600 Training loss: 1.0063 0.4908 sec/batch
Epoch 62/100  Iteration 34718/56600 Training loss: 1.0063 0.4724 sec/batch
Epoch 62/100  Iteration 34719/56600 Training loss: 1.0063 0.4875 sec/batch
Epoch 62/100  Iteration 34720/56600 Training loss: 1.0065 0.4795 sec/batch
Epoch 62/100  Iteration 34721/56600 Training loss: 1.0067 0.4789 sec/batch
Epoch 62/100  Iteration 34722/56600 Training loss: 1.0067 0.4867 sec/batch
Epoch 62/100  Iteration 34723/56600 Training loss: 1.0069 0.4823 sec/batch
Epoch 62/100  Iteration 34724/56600 Training loss: 1.0070 0.4840 sec/batch
Epoch 62/100  Iteration 34725/56600 Training loss: 1.0071 0.4911 sec/batch
Epoch 62/100  Iteration 34726/56600 Training loss: 1.0071 0.4732 sec/batch
Epoch 62/100  Iteration 34727/56600 Training loss: 1.0071 0.4784 sec/batch
Epoch 62/100  Iteration 34728/56600 Training loss: 1.0073 0.4713 sec/batch
Epoch 62/100  Iteration 3

Epoch 62/100  Iteration 34826/56600 Training loss: 1.0097 0.4788 sec/batch
Epoch 62/100  Iteration 34827/56600 Training loss: 1.0096 0.4736 sec/batch
Epoch 62/100  Iteration 34828/56600 Training loss: 1.0096 0.4794 sec/batch
Epoch 62/100  Iteration 34829/56600 Training loss: 1.0096 0.4633 sec/batch
Epoch 62/100  Iteration 34830/56600 Training loss: 1.0096 0.4794 sec/batch
Epoch 62/100  Iteration 34831/56600 Training loss: 1.0096 0.4717 sec/batch
Epoch 62/100  Iteration 34832/56600 Training loss: 1.0095 0.4860 sec/batch
Epoch 62/100  Iteration 34833/56600 Training loss: 1.0094 0.4882 sec/batch
Epoch 62/100  Iteration 34834/56600 Training loss: 1.0092 0.4853 sec/batch
Epoch 62/100  Iteration 34835/56600 Training loss: 1.0091 0.4786 sec/batch
Epoch 62/100  Iteration 34836/56600 Training loss: 1.0091 0.4762 sec/batch
Epoch 62/100  Iteration 34837/56600 Training loss: 1.0090 0.4832 sec/batch
Epoch 62/100  Iteration 34838/56600 Training loss: 1.0090 0.4842 sec/batch
Epoch 62/100  Iteration 3

Epoch 62/100  Iteration 34936/56600 Training loss: 1.0060 0.4757 sec/batch
Epoch 62/100  Iteration 34937/56600 Training loss: 1.0059 0.4945 sec/batch
Epoch 62/100  Iteration 34938/56600 Training loss: 1.0059 0.4779 sec/batch
Epoch 62/100  Iteration 34939/56600 Training loss: 1.0059 0.4644 sec/batch
Epoch 62/100  Iteration 34940/56600 Training loss: 1.0059 0.4795 sec/batch
Epoch 62/100  Iteration 34941/56600 Training loss: 1.0058 0.4791 sec/batch
Epoch 62/100  Iteration 34942/56600 Training loss: 1.0058 0.4742 sec/batch
Epoch 62/100  Iteration 34943/56600 Training loss: 1.0058 0.4946 sec/batch
Epoch 62/100  Iteration 34944/56600 Training loss: 1.0058 0.4794 sec/batch
Epoch 62/100  Iteration 34945/56600 Training loss: 1.0058 0.4923 sec/batch
Epoch 62/100  Iteration 34946/56600 Training loss: 1.0058 0.4765 sec/batch
Epoch 62/100  Iteration 34947/56600 Training loss: 1.0058 0.4838 sec/batch
Epoch 62/100  Iteration 34948/56600 Training loss: 1.0058 0.4877 sec/batch
Epoch 62/100  Iteration 3

Epoch 62/100  Iteration 35046/56600 Training loss: 1.0037 0.4949 sec/batch
Epoch 62/100  Iteration 35047/56600 Training loss: 1.0038 0.4788 sec/batch
Epoch 62/100  Iteration 35048/56600 Training loss: 1.0038 0.4904 sec/batch
Epoch 62/100  Iteration 35049/56600 Training loss: 1.0038 0.4844 sec/batch
Epoch 62/100  Iteration 35050/56600 Training loss: 1.0038 0.4844 sec/batch
Epoch 62/100  Iteration 35051/56600 Training loss: 1.0039 0.4985 sec/batch
Epoch 62/100  Iteration 35052/56600 Training loss: 1.0039 0.4949 sec/batch
Epoch 62/100  Iteration 35053/56600 Training loss: 1.0039 0.4944 sec/batch
Epoch 62/100  Iteration 35054/56600 Training loss: 1.0039 0.4792 sec/batch
Epoch 62/100  Iteration 35055/56600 Training loss: 1.0039 0.4790 sec/batch
Epoch 62/100  Iteration 35056/56600 Training loss: 1.0039 0.4921 sec/batch
Epoch 62/100  Iteration 35057/56600 Training loss: 1.0039 0.4831 sec/batch
Epoch 62/100  Iteration 35058/56600 Training loss: 1.0039 0.4946 sec/batch
Epoch 62/100  Iteration 3

Epoch 63/100  Iteration 35156/56600 Training loss: 0.9990 0.4785 sec/batch
Epoch 63/100  Iteration 35157/56600 Training loss: 0.9986 0.4803 sec/batch
Epoch 63/100  Iteration 35158/56600 Training loss: 0.9982 0.4726 sec/batch
Epoch 63/100  Iteration 35159/56600 Training loss: 0.9979 0.4805 sec/batch
Epoch 63/100  Iteration 35160/56600 Training loss: 0.9983 0.4794 sec/batch
Epoch 63/100  Iteration 35161/56600 Training loss: 0.9984 0.4790 sec/batch
Epoch 63/100  Iteration 35162/56600 Training loss: 0.9979 0.4883 sec/batch
Epoch 63/100  Iteration 35163/56600 Training loss: 0.9978 0.4855 sec/batch
Epoch 63/100  Iteration 35164/56600 Training loss: 0.9972 0.4806 sec/batch
Epoch 63/100  Iteration 35165/56600 Training loss: 0.9966 0.4774 sec/batch
Epoch 63/100  Iteration 35166/56600 Training loss: 0.9971 0.4828 sec/batch
Epoch 63/100  Iteration 35167/56600 Training loss: 0.9973 0.4780 sec/batch
Epoch 63/100  Iteration 35168/56600 Training loss: 0.9969 0.4898 sec/batch
Epoch 63/100  Iteration 3

Epoch 63/100  Iteration 35266/56600 Training loss: 1.0040 0.4868 sec/batch
Epoch 63/100  Iteration 35267/56600 Training loss: 1.0038 0.4740 sec/batch
Epoch 63/100  Iteration 35268/56600 Training loss: 1.0037 0.4942 sec/batch
Epoch 63/100  Iteration 35269/56600 Training loss: 1.0037 0.4947 sec/batch
Epoch 63/100  Iteration 35270/56600 Training loss: 1.0037 0.4791 sec/batch
Epoch 63/100  Iteration 35271/56600 Training loss: 1.0036 0.4694 sec/batch
Epoch 63/100  Iteration 35272/56600 Training loss: 1.0036 0.4777 sec/batch
Epoch 63/100  Iteration 35273/56600 Training loss: 1.0036 0.4687 sec/batch
Epoch 63/100  Iteration 35274/56600 Training loss: 1.0037 0.4790 sec/batch
Epoch 63/100  Iteration 35275/56600 Training loss: 1.0038 0.4741 sec/batch
Epoch 63/100  Iteration 35276/56600 Training loss: 1.0040 0.4847 sec/batch
Epoch 63/100  Iteration 35277/56600 Training loss: 1.0040 0.4913 sec/batch
Epoch 63/100  Iteration 35278/56600 Training loss: 1.0041 0.4905 sec/batch
Epoch 63/100  Iteration 3

Epoch 63/100  Iteration 35376/56600 Training loss: 1.0071 0.4748 sec/batch
Epoch 63/100  Iteration 35377/56600 Training loss: 1.0071 0.4667 sec/batch
Epoch 63/100  Iteration 35378/56600 Training loss: 1.0072 0.4707 sec/batch
Epoch 63/100  Iteration 35379/56600 Training loss: 1.0073 0.4726 sec/batch
Epoch 63/100  Iteration 35380/56600 Training loss: 1.0073 0.4844 sec/batch
Epoch 63/100  Iteration 35381/56600 Training loss: 1.0073 0.4791 sec/batch
Epoch 63/100  Iteration 35382/56600 Training loss: 1.0073 0.4627 sec/batch
Epoch 63/100  Iteration 35383/56600 Training loss: 1.0074 0.4745 sec/batch
Epoch 63/100  Iteration 35384/56600 Training loss: 1.0076 0.4793 sec/batch
Epoch 63/100  Iteration 35385/56600 Training loss: 1.0077 0.4803 sec/batch
Epoch 63/100  Iteration 35386/56600 Training loss: 1.0078 0.4779 sec/batch
Epoch 63/100  Iteration 35387/56600 Training loss: 1.0079 0.4794 sec/batch
Epoch 63/100  Iteration 35388/56600 Training loss: 1.0080 0.4791 sec/batch
Epoch 63/100  Iteration 3

Epoch 63/100  Iteration 35486/56600 Training loss: 1.0044 0.4690 sec/batch
Epoch 63/100  Iteration 35487/56600 Training loss: 1.0044 0.4696 sec/batch
Epoch 63/100  Iteration 35488/56600 Training loss: 1.0044 0.4836 sec/batch
Epoch 63/100  Iteration 35489/56600 Training loss: 1.0045 0.4593 sec/batch
Epoch 63/100  Iteration 35490/56600 Training loss: 1.0044 0.4788 sec/batch
Epoch 63/100  Iteration 35491/56600 Training loss: 1.0046 0.4794 sec/batch
Epoch 63/100  Iteration 35492/56600 Training loss: 1.0046 0.4798 sec/batch
Epoch 63/100  Iteration 35493/56600 Training loss: 1.0045 0.4734 sec/batch
Epoch 63/100  Iteration 35494/56600 Training loss: 1.0044 0.4844 sec/batch
Epoch 63/100  Iteration 35495/56600 Training loss: 1.0044 0.4650 sec/batch
Epoch 63/100  Iteration 35496/56600 Training loss: 1.0044 0.4706 sec/batch
Epoch 63/100  Iteration 35497/56600 Training loss: 1.0043 0.4807 sec/batch
Epoch 63/100  Iteration 35498/56600 Training loss: 1.0044 0.4777 sec/batch
Epoch 63/100  Iteration 3

Epoch 63/100  Iteration 35596/56600 Training loss: 1.0021 0.4770 sec/batch
Epoch 63/100  Iteration 35597/56600 Training loss: 1.0020 0.4653 sec/batch
Epoch 63/100  Iteration 35598/56600 Training loss: 1.0020 0.4800 sec/batch
Epoch 63/100  Iteration 35599/56600 Training loss: 1.0019 0.4844 sec/batch
Epoch 63/100  Iteration 35600/56600 Training loss: 1.0018 0.4791 sec/batch
Epoch 63/100  Iteration 35601/56600 Training loss: 1.0018 0.4940 sec/batch
Epoch 63/100  Iteration 35602/56600 Training loss: 1.0018 0.4799 sec/batch
Epoch 63/100  Iteration 35603/56600 Training loss: 1.0018 0.4793 sec/batch
Epoch 63/100  Iteration 35604/56600 Training loss: 1.0017 0.4787 sec/batch
Epoch 63/100  Iteration 35605/56600 Training loss: 1.0018 0.4890 sec/batch
Epoch 63/100  Iteration 35606/56600 Training loss: 1.0018 0.4795 sec/batch
Epoch 63/100  Iteration 35607/56600 Training loss: 1.0018 0.4793 sec/batch
Epoch 63/100  Iteration 35608/56600 Training loss: 1.0019 0.4790 sec/batch
Epoch 63/100  Iteration 3

Epoch 64/100  Iteration 35706/56600 Training loss: 1.0083 0.4815 sec/batch
Epoch 64/100  Iteration 35707/56600 Training loss: 1.0079 0.4790 sec/batch
Epoch 64/100  Iteration 35708/56600 Training loss: 1.0070 0.4788 sec/batch
Epoch 64/100  Iteration 35709/56600 Training loss: 1.0062 0.4789 sec/batch
Epoch 64/100  Iteration 35710/56600 Training loss: 1.0052 0.4791 sec/batch
Epoch 64/100  Iteration 35711/56600 Training loss: 1.0047 0.4800 sec/batch
Epoch 64/100  Iteration 35712/56600 Training loss: 1.0041 0.4815 sec/batch
Epoch 64/100  Iteration 35713/56600 Training loss: 1.0034 0.4753 sec/batch
Epoch 64/100  Iteration 35714/56600 Training loss: 1.0028 0.4780 sec/batch
Epoch 64/100  Iteration 35715/56600 Training loss: 1.0023 0.4695 sec/batch
Epoch 64/100  Iteration 35716/56600 Training loss: 1.0015 0.4734 sec/batch
Epoch 64/100  Iteration 35717/56600 Training loss: 1.0009 0.4902 sec/batch
Epoch 64/100  Iteration 35718/56600 Training loss: 1.0005 0.4719 sec/batch
Epoch 64/100  Iteration 3

Epoch 64/100  Iteration 35816/56600 Training loss: 1.0018 0.4799 sec/batch
Epoch 64/100  Iteration 35817/56600 Training loss: 1.0018 0.4819 sec/batch
Epoch 64/100  Iteration 35818/56600 Training loss: 1.0018 0.4718 sec/batch
Epoch 64/100  Iteration 35819/56600 Training loss: 1.0018 0.4858 sec/batch
Epoch 64/100  Iteration 35820/56600 Training loss: 1.0018 0.4792 sec/batch
Epoch 64/100  Iteration 35821/56600 Training loss: 1.0018 0.4630 sec/batch
Epoch 64/100  Iteration 35822/56600 Training loss: 1.0017 0.4637 sec/batch
Epoch 64/100  Iteration 35823/56600 Training loss: 1.0018 0.4793 sec/batch
Epoch 64/100  Iteration 35824/56600 Training loss: 1.0018 0.4818 sec/batch
Epoch 64/100  Iteration 35825/56600 Training loss: 1.0019 0.4688 sec/batch
Epoch 64/100  Iteration 35826/56600 Training loss: 1.0021 0.4886 sec/batch
Epoch 64/100  Iteration 35827/56600 Training loss: 1.0022 0.4755 sec/batch
Epoch 64/100  Iteration 35828/56600 Training loss: 1.0022 0.4734 sec/batch
Epoch 64/100  Iteration 3

Epoch 64/100  Iteration 35926/56600 Training loss: 1.0059 0.4816 sec/batch
Epoch 64/100  Iteration 35927/56600 Training loss: 1.0057 0.4688 sec/batch
Epoch 64/100  Iteration 35928/56600 Training loss: 1.0057 0.4734 sec/batch
Epoch 64/100  Iteration 35929/56600 Training loss: 1.0057 0.4794 sec/batch
Epoch 64/100  Iteration 35930/56600 Training loss: 1.0058 0.4744 sec/batch
Epoch 64/100  Iteration 35931/56600 Training loss: 1.0057 0.4795 sec/batch
Epoch 64/100  Iteration 35932/56600 Training loss: 1.0057 0.4791 sec/batch
Epoch 64/100  Iteration 35933/56600 Training loss: 1.0057 0.4790 sec/batch
Epoch 64/100  Iteration 35934/56600 Training loss: 1.0057 0.4807 sec/batch
Epoch 64/100  Iteration 35935/56600 Training loss: 1.0056 0.4933 sec/batch
Epoch 64/100  Iteration 35936/56600 Training loss: 1.0056 0.4894 sec/batch
Epoch 64/100  Iteration 35937/56600 Training loss: 1.0054 0.4916 sec/batch
Epoch 64/100  Iteration 35938/56600 Training loss: 1.0053 0.4808 sec/batch
Epoch 64/100  Iteration 3

Epoch 64/100  Iteration 36035/56600 Training loss: 1.0037 0.4756 sec/batch
Epoch 64/100  Iteration 36036/56600 Training loss: 1.0037 0.4774 sec/batch
Epoch 64/100  Iteration 36037/56600 Training loss: 1.0037 0.4576 sec/batch
Epoch 64/100  Iteration 36038/56600 Training loss: 1.0037 0.4750 sec/batch
Epoch 64/100  Iteration 36039/56600 Training loss: 1.0037 0.4784 sec/batch
Epoch 64/100  Iteration 36040/56600 Training loss: 1.0036 0.4635 sec/batch
Epoch 64/100  Iteration 36041/56600 Training loss: 1.0036 0.4746 sec/batch
Epoch 64/100  Iteration 36042/56600 Training loss: 1.0036 0.4730 sec/batch
Epoch 64/100  Iteration 36043/56600 Training loss: 1.0036 0.4696 sec/batch
Epoch 64/100  Iteration 36044/56600 Training loss: 1.0037 0.4736 sec/batch
Epoch 64/100  Iteration 36045/56600 Training loss: 1.0037 0.4793 sec/batch
Epoch 64/100  Iteration 36046/56600 Training loss: 1.0037 0.4771 sec/batch
Epoch 64/100  Iteration 36047/56600 Training loss: 1.0036 0.4807 sec/batch
Epoch 64/100  Iteration 3

Epoch 64/100  Iteration 36145/56600 Training loss: 1.0014 0.4945 sec/batch
Epoch 64/100  Iteration 36146/56600 Training loss: 1.0014 0.4833 sec/batch
Epoch 64/100  Iteration 36147/56600 Training loss: 1.0014 0.4793 sec/batch
Epoch 64/100  Iteration 36148/56600 Training loss: 1.0014 0.4940 sec/batch
Epoch 64/100  Iteration 36149/56600 Training loss: 1.0013 0.4772 sec/batch
Epoch 64/100  Iteration 36150/56600 Training loss: 1.0013 0.4769 sec/batch
Epoch 64/100  Iteration 36151/56600 Training loss: 1.0014 0.4941 sec/batch
Epoch 64/100  Iteration 36152/56600 Training loss: 1.0014 0.4791 sec/batch
Epoch 64/100  Iteration 36153/56600 Training loss: 1.0013 0.4947 sec/batch
Epoch 64/100  Iteration 36154/56600 Training loss: 1.0013 0.4791 sec/batch
Epoch 64/100  Iteration 36155/56600 Training loss: 1.0013 0.4904 sec/batch
Epoch 64/100  Iteration 36156/56600 Training loss: 1.0013 0.4811 sec/batch
Epoch 64/100  Iteration 36157/56600 Training loss: 1.0012 0.4737 sec/batch
Epoch 64/100  Iteration 3

Epoch 65/100  Iteration 36255/56600 Training loss: 1.0001 0.4792 sec/batch
Epoch 65/100  Iteration 36256/56600 Training loss: 0.9998 0.4798 sec/batch
Epoch 65/100  Iteration 36257/56600 Training loss: 1.0000 0.4797 sec/batch
Epoch 65/100  Iteration 36258/56600 Training loss: 1.0013 0.4782 sec/batch
Epoch 65/100  Iteration 36259/56600 Training loss: 1.0033 0.4646 sec/batch
Epoch 65/100  Iteration 36260/56600 Training loss: 1.0040 0.4789 sec/batch
Epoch 65/100  Iteration 36261/56600 Training loss: 1.0046 0.4709 sec/batch
Epoch 65/100  Iteration 36262/56600 Training loss: 1.0053 0.4712 sec/batch
Epoch 65/100  Iteration 36263/56600 Training loss: 1.0054 0.4707 sec/batch
Epoch 65/100  Iteration 36264/56600 Training loss: 1.0060 0.4748 sec/batch
Epoch 65/100  Iteration 36265/56600 Training loss: 1.0059 0.4788 sec/batch
Epoch 65/100  Iteration 36266/56600 Training loss: 1.0064 0.4725 sec/batch
Epoch 65/100  Iteration 36267/56600 Training loss: 1.0060 0.4706 sec/batch
Epoch 65/100  Iteration 3

Epoch 65/100  Iteration 36365/56600 Training loss: 0.9971 0.4789 sec/batch
Epoch 65/100  Iteration 36366/56600 Training loss: 0.9973 0.4796 sec/batch
Epoch 65/100  Iteration 36367/56600 Training loss: 0.9975 0.4772 sec/batch
Epoch 65/100  Iteration 36368/56600 Training loss: 0.9974 0.4707 sec/batch
Epoch 65/100  Iteration 36369/56600 Training loss: 0.9975 0.4940 sec/batch
Epoch 65/100  Iteration 36370/56600 Training loss: 0.9975 0.4902 sec/batch
Epoch 65/100  Iteration 36371/56600 Training loss: 0.9974 0.4779 sec/batch
Epoch 65/100  Iteration 36372/56600 Training loss: 0.9976 0.4946 sec/batch
Epoch 65/100  Iteration 36373/56600 Training loss: 0.9978 0.4792 sec/batch
Epoch 65/100  Iteration 36374/56600 Training loss: 0.9980 0.4791 sec/batch
Epoch 65/100  Iteration 36375/56600 Training loss: 0.9984 0.4860 sec/batch
Epoch 65/100  Iteration 36376/56600 Training loss: 0.9988 0.4838 sec/batch
Epoch 65/100  Iteration 36377/56600 Training loss: 0.9990 0.4845 sec/batch
Epoch 65/100  Iteration 3

Epoch 65/100  Iteration 36475/56600 Training loss: 1.0051 0.4821 sec/batch
Epoch 65/100  Iteration 36476/56600 Training loss: 1.0049 0.4790 sec/batch
Epoch 65/100  Iteration 36477/56600 Training loss: 1.0049 0.4792 sec/batch
Epoch 65/100  Iteration 36478/56600 Training loss: 1.0049 0.4740 sec/batch
Epoch 65/100  Iteration 36479/56600 Training loss: 1.0049 0.4847 sec/batch
Epoch 65/100  Iteration 36480/56600 Training loss: 1.0049 0.4790 sec/batch
Epoch 65/100  Iteration 36481/56600 Training loss: 1.0048 0.4791 sec/batch
Epoch 65/100  Iteration 36482/56600 Training loss: 1.0047 0.4951 sec/batch
Epoch 65/100  Iteration 36483/56600 Training loss: 1.0046 0.4915 sec/batch
Epoch 65/100  Iteration 36484/56600 Training loss: 1.0044 0.4844 sec/batch
Epoch 65/100  Iteration 36485/56600 Training loss: 1.0043 0.4795 sec/batch
Epoch 65/100  Iteration 36486/56600 Training loss: 1.0043 0.4741 sec/batch
Epoch 65/100  Iteration 36487/56600 Training loss: 1.0042 0.4787 sec/batch
Epoch 65/100  Iteration 3

Epoch 65/100  Iteration 36585/56600 Training loss: 1.0018 0.4630 sec/batch
Epoch 65/100  Iteration 36586/56600 Training loss: 1.0017 0.4741 sec/batch
Epoch 65/100  Iteration 36587/56600 Training loss: 1.0017 0.4785 sec/batch
Epoch 65/100  Iteration 36588/56600 Training loss: 1.0017 0.4793 sec/batch
Epoch 65/100  Iteration 36589/56600 Training loss: 1.0016 0.4792 sec/batch
Epoch 65/100  Iteration 36590/56600 Training loss: 1.0016 0.4790 sec/batch
Epoch 65/100  Iteration 36591/56600 Training loss: 1.0015 0.4688 sec/batch
Epoch 65/100  Iteration 36592/56600 Training loss: 1.0015 0.4724 sec/batch
Epoch 65/100  Iteration 36593/56600 Training loss: 1.0014 0.4712 sec/batch
Epoch 65/100  Iteration 36594/56600 Training loss: 1.0013 0.4856 sec/batch
Epoch 65/100  Iteration 36595/56600 Training loss: 1.0012 0.4826 sec/batch
Epoch 65/100  Iteration 36596/56600 Training loss: 1.0011 0.4758 sec/batch
Epoch 65/100  Iteration 36597/56600 Training loss: 1.0010 0.4823 sec/batch
Epoch 65/100  Iteration 3

Epoch 65/100  Iteration 36695/56600 Training loss: 0.9989 0.4767 sec/batch
Epoch 65/100  Iteration 36696/56600 Training loss: 0.9991 0.4814 sec/batch
Epoch 65/100  Iteration 36697/56600 Training loss: 0.9991 0.4885 sec/batch
Epoch 65/100  Iteration 36698/56600 Training loss: 0.9991 0.4693 sec/batch
Epoch 65/100  Iteration 36699/56600 Training loss: 0.9991 0.4787 sec/batch
Epoch 65/100  Iteration 36700/56600 Training loss: 0.9990 0.4746 sec/batch
Epoch 65/100  Iteration 36701/56600 Training loss: 0.9990 0.4784 sec/batch
Epoch 65/100  Iteration 36702/56600 Training loss: 0.9990 0.4791 sec/batch
Epoch 65/100  Iteration 36703/56600 Training loss: 0.9991 0.4793 sec/batch
Epoch 65/100  Iteration 36704/56600 Training loss: 0.9990 0.4643 sec/batch
Epoch 65/100  Iteration 36705/56600 Training loss: 0.9990 0.4789 sec/batch
Epoch 65/100  Iteration 36706/56600 Training loss: 0.9990 0.4787 sec/batch
Epoch 65/100  Iteration 36707/56600 Training loss: 0.9990 0.4785 sec/batch
Epoch 65/100  Iteration 3

Epoch 66/100  Iteration 36805/56600 Training loss: 1.0209 0.4843 sec/batch
Epoch 66/100  Iteration 36806/56600 Training loss: 1.0194 0.4737 sec/batch
Epoch 66/100  Iteration 36807/56600 Training loss: 1.0175 0.4792 sec/batch
Epoch 66/100  Iteration 36808/56600 Training loss: 1.0156 0.4743 sec/batch
Epoch 66/100  Iteration 36809/56600 Training loss: 1.0128 0.4805 sec/batch
Epoch 66/100  Iteration 36810/56600 Training loss: 1.0115 0.4791 sec/batch
Epoch 66/100  Iteration 36811/56600 Training loss: 1.0096 0.4628 sec/batch
Epoch 66/100  Iteration 36812/56600 Training loss: 1.0075 0.4730 sec/batch
Epoch 66/100  Iteration 36813/56600 Training loss: 1.0061 0.4688 sec/batch
Epoch 66/100  Iteration 36814/56600 Training loss: 1.0054 0.4797 sec/batch
Epoch 66/100  Iteration 36815/56600 Training loss: 1.0045 0.4709 sec/batch
Epoch 66/100  Iteration 36816/56600 Training loss: 1.0027 0.4714 sec/batch
Epoch 66/100  Iteration 36817/56600 Training loss: 1.0023 0.4641 sec/batch
Epoch 66/100  Iteration 3

Epoch 66/100  Iteration 36915/56600 Training loss: 0.9957 0.4792 sec/batch
Epoch 66/100  Iteration 36916/56600 Training loss: 0.9959 0.4789 sec/batch
Epoch 66/100  Iteration 36917/56600 Training loss: 0.9959 0.4838 sec/batch
Epoch 66/100  Iteration 36918/56600 Training loss: 0.9960 0.4720 sec/batch
Epoch 66/100  Iteration 36919/56600 Training loss: 0.9961 0.4783 sec/batch
Epoch 66/100  Iteration 36920/56600 Training loss: 0.9961 0.4790 sec/batch
Epoch 66/100  Iteration 36921/56600 Training loss: 0.9964 0.4829 sec/batch
Epoch 66/100  Iteration 36922/56600 Training loss: 0.9963 0.4916 sec/batch
Epoch 66/100  Iteration 36923/56600 Training loss: 0.9962 0.4790 sec/batch
Epoch 66/100  Iteration 36924/56600 Training loss: 0.9961 0.4892 sec/batch
Epoch 66/100  Iteration 36925/56600 Training loss: 0.9958 0.4794 sec/batch
Epoch 66/100  Iteration 36926/56600 Training loss: 0.9957 0.4786 sec/batch
Epoch 66/100  Iteration 36927/56600 Training loss: 0.9958 0.4951 sec/batch
Epoch 66/100  Iteration 3

Epoch 66/100  Iteration 37025/56600 Training loss: 1.0022 0.4642 sec/batch
Epoch 66/100  Iteration 37026/56600 Training loss: 1.0023 0.4592 sec/batch
Epoch 66/100  Iteration 37027/56600 Training loss: 1.0023 0.4626 sec/batch
Epoch 66/100  Iteration 37028/56600 Training loss: 1.0024 0.4627 sec/batch
Epoch 66/100  Iteration 37029/56600 Training loss: 1.0025 0.4636 sec/batch
Epoch 66/100  Iteration 37030/56600 Training loss: 1.0027 0.4796 sec/batch
Epoch 66/100  Iteration 37031/56600 Training loss: 1.0029 0.4688 sec/batch
Epoch 66/100  Iteration 37032/56600 Training loss: 1.0032 0.4797 sec/batch
Epoch 66/100  Iteration 37033/56600 Training loss: 1.0034 0.4684 sec/batch
Epoch 66/100  Iteration 37034/56600 Training loss: 1.0036 0.4740 sec/batch
Epoch 66/100  Iteration 37035/56600 Training loss: 1.0037 0.4787 sec/batch
Epoch 66/100  Iteration 37036/56600 Training loss: 1.0039 0.4791 sec/batch
Epoch 66/100  Iteration 37037/56600 Training loss: 1.0040 0.4742 sec/batch
Epoch 66/100  Iteration 3

Epoch 66/100  Iteration 37135/56600 Training loss: 1.0011 0.4783 sec/batch
Epoch 66/100  Iteration 37136/56600 Training loss: 1.0011 0.4889 sec/batch
Epoch 66/100  Iteration 37137/56600 Training loss: 1.0010 0.4844 sec/batch
Epoch 66/100  Iteration 37138/56600 Training loss: 1.0009 0.4798 sec/batch
Epoch 66/100  Iteration 37139/56600 Training loss: 1.0008 0.4784 sec/batch
Epoch 66/100  Iteration 37140/56600 Training loss: 1.0008 0.4736 sec/batch
Epoch 66/100  Iteration 37141/56600 Training loss: 1.0006 0.5007 sec/batch
Epoch 66/100  Iteration 37142/56600 Training loss: 1.0004 0.4868 sec/batch
Epoch 66/100  Iteration 37143/56600 Training loss: 1.0005 0.4869 sec/batch
Epoch 66/100  Iteration 37144/56600 Training loss: 1.0004 0.4716 sec/batch
Epoch 66/100  Iteration 37145/56600 Training loss: 1.0005 0.4768 sec/batch
Epoch 66/100  Iteration 37146/56600 Training loss: 1.0006 0.4951 sec/batch
Epoch 66/100  Iteration 37147/56600 Training loss: 1.0007 0.4775 sec/batch
Epoch 66/100  Iteration 3

Epoch 66/100  Iteration 37245/56600 Training loss: 0.9977 0.4668 sec/batch
Epoch 66/100  Iteration 37246/56600 Training loss: 0.9976 0.4818 sec/batch
Epoch 66/100  Iteration 37247/56600 Training loss: 0.9976 0.4785 sec/batch
Epoch 66/100  Iteration 37248/56600 Training loss: 0.9976 0.4799 sec/batch
Epoch 66/100  Iteration 37249/56600 Training loss: 0.9975 0.4789 sec/batch
Epoch 66/100  Iteration 37250/56600 Training loss: 0.9976 0.4871 sec/batch
Epoch 66/100  Iteration 37251/56600 Training loss: 0.9976 0.4658 sec/batch
Epoch 66/100  Iteration 37252/56600 Training loss: 0.9976 0.4793 sec/batch
Epoch 66/100  Iteration 37253/56600 Training loss: 0.9975 0.4792 sec/batch
Epoch 66/100  Iteration 37254/56600 Training loss: 0.9975 0.4918 sec/batch
Epoch 66/100  Iteration 37255/56600 Training loss: 0.9975 0.4735 sec/batch
Epoch 66/100  Iteration 37256/56600 Training loss: 0.9975 0.4832 sec/batch
Epoch 66/100  Iteration 37257/56600 Training loss: 0.9975 0.4778 sec/batch
Epoch 66/100  Iteration 3

Epoch 66/100  Iteration 37355/56600 Training loss: 0.9973 0.4794 sec/batch
Epoch 66/100  Iteration 37356/56600 Training loss: 0.9974 0.4860 sec/batch
Epoch 67/100  Iteration 37357/56600 Training loss: 1.1382 0.4763 sec/batch
Epoch 67/100  Iteration 37358/56600 Training loss: 1.0912 0.4767 sec/batch
Epoch 67/100  Iteration 37359/56600 Training loss: 1.0771 0.4813 sec/batch
Epoch 67/100  Iteration 37360/56600 Training loss: 1.0626 0.4874 sec/batch
Epoch 67/100  Iteration 37361/56600 Training loss: 1.0477 0.4865 sec/batch
Epoch 67/100  Iteration 37362/56600 Training loss: 1.0425 0.4800 sec/batch
Epoch 67/100  Iteration 37363/56600 Training loss: 1.0359 0.4799 sec/batch
Epoch 67/100  Iteration 37364/56600 Training loss: 1.0289 0.4827 sec/batch
Epoch 67/100  Iteration 37365/56600 Training loss: 1.0265 0.4743 sec/batch
Epoch 67/100  Iteration 37366/56600 Training loss: 1.0245 0.4835 sec/batch
Epoch 67/100  Iteration 37367/56600 Training loss: 1.0233 0.4723 sec/batch
Epoch 67/100  Iteration 3

Epoch 67/100  Iteration 37465/56600 Training loss: 0.9890 0.4834 sec/batch
Epoch 67/100  Iteration 37466/56600 Training loss: 0.9892 0.4849 sec/batch
Epoch 67/100  Iteration 37467/56600 Training loss: 0.9893 0.4787 sec/batch
Epoch 67/100  Iteration 37468/56600 Training loss: 0.9892 0.4800 sec/batch
Epoch 67/100  Iteration 37469/56600 Training loss: 0.9897 0.4784 sec/batch
Epoch 67/100  Iteration 37470/56600 Training loss: 0.9900 0.4790 sec/batch
Epoch 67/100  Iteration 37471/56600 Training loss: 0.9903 0.4901 sec/batch
Epoch 67/100  Iteration 37472/56600 Training loss: 0.9906 0.4942 sec/batch
Epoch 67/100  Iteration 37473/56600 Training loss: 0.9912 0.4799 sec/batch
Epoch 67/100  Iteration 37474/56600 Training loss: 0.9915 0.4789 sec/batch
Epoch 67/100  Iteration 37475/56600 Training loss: 0.9918 0.4962 sec/batch
Epoch 67/100  Iteration 37476/56600 Training loss: 0.9923 0.4693 sec/batch
Epoch 67/100  Iteration 37477/56600 Training loss: 0.9930 0.4797 sec/batch
Epoch 67/100  Iteration 3

Epoch 67/100  Iteration 37575/56600 Training loss: 0.9989 0.4781 sec/batch
Epoch 67/100  Iteration 37576/56600 Training loss: 0.9990 0.4802 sec/batch
Epoch 67/100  Iteration 37577/56600 Training loss: 0.9991 0.4774 sec/batch
Epoch 67/100  Iteration 37578/56600 Training loss: 0.9991 0.4974 sec/batch
Epoch 67/100  Iteration 37579/56600 Training loss: 0.9991 0.4753 sec/batch
Epoch 67/100  Iteration 37580/56600 Training loss: 0.9993 0.4836 sec/batch
Epoch 67/100  Iteration 37581/56600 Training loss: 0.9994 0.4950 sec/batch
Epoch 67/100  Iteration 37582/56600 Training loss: 0.9995 0.4902 sec/batch
Epoch 67/100  Iteration 37583/56600 Training loss: 0.9996 0.4791 sec/batch
Epoch 67/100  Iteration 37584/56600 Training loss: 0.9996 0.4785 sec/batch
Epoch 67/100  Iteration 37585/56600 Training loss: 0.9997 0.4790 sec/batch
Epoch 67/100  Iteration 37586/56600 Training loss: 0.9999 0.4904 sec/batch
Epoch 67/100  Iteration 37587/56600 Training loss: 1.0000 0.4789 sec/batch
Epoch 67/100  Iteration 3

Epoch 67/100  Iteration 37685/56600 Training loss: 0.9996 0.4793 sec/batch
Epoch 67/100  Iteration 37686/56600 Training loss: 0.9996 0.4895 sec/batch
Epoch 67/100  Iteration 37687/56600 Training loss: 0.9996 0.4870 sec/batch
Epoch 67/100  Iteration 37688/56600 Training loss: 0.9997 0.4785 sec/batch
Epoch 67/100  Iteration 37689/56600 Training loss: 0.9998 0.4867 sec/batch
Epoch 67/100  Iteration 37690/56600 Training loss: 0.9997 0.4866 sec/batch
Epoch 67/100  Iteration 37691/56600 Training loss: 0.9996 0.4863 sec/batch
Epoch 67/100  Iteration 37692/56600 Training loss: 0.9997 0.4915 sec/batch
Epoch 67/100  Iteration 37693/56600 Training loss: 0.9996 0.4773 sec/batch
Epoch 67/100  Iteration 37694/56600 Training loss: 0.9997 0.4795 sec/batch
Epoch 67/100  Iteration 37695/56600 Training loss: 0.9996 0.4799 sec/batch
Epoch 67/100  Iteration 37696/56600 Training loss: 0.9996 0.4782 sec/batch
Epoch 67/100  Iteration 37697/56600 Training loss: 0.9995 0.4926 sec/batch
Epoch 67/100  Iteration 3

Epoch 67/100  Iteration 37795/56600 Training loss: 0.9963 0.4749 sec/batch
Epoch 67/100  Iteration 37796/56600 Training loss: 0.9962 0.4768 sec/batch
Epoch 67/100  Iteration 37797/56600 Training loss: 0.9962 0.4805 sec/batch
Epoch 67/100  Iteration 37798/56600 Training loss: 0.9961 0.4876 sec/batch
Epoch 67/100  Iteration 37799/56600 Training loss: 0.9962 0.4864 sec/batch
Epoch 67/100  Iteration 37800/56600 Training loss: 0.9962 0.4952 sec/batch
Epoch 67/100  Iteration 37801/56600 Training loss: 0.9961 0.4785 sec/batch
Epoch 67/100  Iteration 37802/56600 Training loss: 0.9960 0.4724 sec/batch
Epoch 67/100  Iteration 37803/56600 Training loss: 0.9960 0.4704 sec/batch
Epoch 67/100  Iteration 37804/56600 Training loss: 0.9959 0.4874 sec/batch
Epoch 67/100  Iteration 37805/56600 Training loss: 0.9959 0.4735 sec/batch
Epoch 67/100  Iteration 37806/56600 Training loss: 0.9959 0.4911 sec/batch
Epoch 67/100  Iteration 37807/56600 Training loss: 0.9959 0.4817 sec/batch
Epoch 67/100  Iteration 3

Epoch 67/100  Iteration 37905/56600 Training loss: 0.9952 0.4795 sec/batch
Epoch 67/100  Iteration 37906/56600 Training loss: 0.9951 0.4790 sec/batch
Epoch 67/100  Iteration 37907/56600 Training loss: 0.9951 0.4816 sec/batch
Epoch 67/100  Iteration 37908/56600 Training loss: 0.9951 0.4904 sec/batch
Epoch 67/100  Iteration 37909/56600 Training loss: 0.9951 0.4781 sec/batch
Epoch 67/100  Iteration 37910/56600 Training loss: 0.9951 0.4882 sec/batch
Epoch 67/100  Iteration 37911/56600 Training loss: 0.9951 0.4800 sec/batch
Epoch 67/100  Iteration 37912/56600 Training loss: 0.9950 0.4799 sec/batch
Epoch 67/100  Iteration 37913/56600 Training loss: 0.9950 0.4785 sec/batch
Epoch 67/100  Iteration 37914/56600 Training loss: 0.9951 0.5047 sec/batch
Epoch 67/100  Iteration 37915/56600 Training loss: 0.9951 0.4851 sec/batch
Epoch 67/100  Iteration 37916/56600 Training loss: 0.9951 0.4946 sec/batch
Epoch 67/100  Iteration 37917/56600 Training loss: 0.9952 0.4794 sec/batch
Epoch 67/100  Iteration 3

Epoch 68/100  Iteration 38014/56600 Training loss: 0.9908 0.4793 sec/batch
Epoch 68/100  Iteration 38015/56600 Training loss: 0.9905 0.4739 sec/batch
Epoch 68/100  Iteration 38016/56600 Training loss: 0.9904 0.4791 sec/batch
Epoch 68/100  Iteration 38017/56600 Training loss: 0.9903 0.4787 sec/batch
Epoch 68/100  Iteration 38018/56600 Training loss: 0.9900 0.4649 sec/batch
Epoch 68/100  Iteration 38019/56600 Training loss: 0.9896 0.4731 sec/batch
Epoch 68/100  Iteration 38020/56600 Training loss: 0.9893 0.4794 sec/batch
Epoch 68/100  Iteration 38021/56600 Training loss: 0.9894 0.4643 sec/batch
Epoch 68/100  Iteration 38022/56600 Training loss: 0.9900 0.4789 sec/batch
Epoch 68/100  Iteration 38023/56600 Training loss: 0.9900 0.4874 sec/batch
Epoch 68/100  Iteration 38024/56600 Training loss: 0.9905 0.4708 sec/batch
Epoch 68/100  Iteration 38025/56600 Training loss: 0.9904 0.4634 sec/batch
Epoch 68/100  Iteration 38026/56600 Training loss: 0.9904 0.4648 sec/batch
Epoch 68/100  Iteration 3

Epoch 68/100  Iteration 38124/56600 Training loss: 0.9988 0.4800 sec/batch
Epoch 68/100  Iteration 38125/56600 Training loss: 0.9989 0.4852 sec/batch
Epoch 68/100  Iteration 38126/56600 Training loss: 0.9989 0.4724 sec/batch
Epoch 68/100  Iteration 38127/56600 Training loss: 0.9991 0.4687 sec/batch
Epoch 68/100  Iteration 38128/56600 Training loss: 0.9992 0.4787 sec/batch
Epoch 68/100  Iteration 38129/56600 Training loss: 0.9993 0.4797 sec/batch
Epoch 68/100  Iteration 38130/56600 Training loss: 0.9992 0.4795 sec/batch
Epoch 68/100  Iteration 38131/56600 Training loss: 0.9992 0.4737 sec/batch
Epoch 68/100  Iteration 38132/56600 Training loss: 0.9992 0.4760 sec/batch
Epoch 68/100  Iteration 38133/56600 Training loss: 0.9992 0.4619 sec/batch
Epoch 68/100  Iteration 38134/56600 Training loss: 0.9990 0.4755 sec/batch
Epoch 68/100  Iteration 38135/56600 Training loss: 0.9989 0.4599 sec/batch
Epoch 68/100  Iteration 38136/56600 Training loss: 0.9987 0.4651 sec/batch
Epoch 68/100  Iteration 3

Epoch 68/100  Iteration 38234/56600 Training loss: 1.0000 0.4895 sec/batch
Epoch 68/100  Iteration 38235/56600 Training loss: 0.9999 0.4688 sec/batch
Epoch 68/100  Iteration 38236/56600 Training loss: 0.9998 0.4866 sec/batch
Epoch 68/100  Iteration 38237/56600 Training loss: 0.9998 0.4814 sec/batch
Epoch 68/100  Iteration 38238/56600 Training loss: 0.9997 0.4751 sec/batch
Epoch 68/100  Iteration 38239/56600 Training loss: 0.9998 0.4836 sec/batch
Epoch 68/100  Iteration 38240/56600 Training loss: 0.9998 0.4716 sec/batch
Epoch 68/100  Iteration 38241/56600 Training loss: 0.9998 0.4814 sec/batch
Epoch 68/100  Iteration 38242/56600 Training loss: 0.9996 0.4724 sec/batch
Epoch 68/100  Iteration 38243/56600 Training loss: 0.9996 0.4708 sec/batch
Epoch 68/100  Iteration 38244/56600 Training loss: 0.9995 0.4940 sec/batch
Epoch 68/100  Iteration 38245/56600 Training loss: 0.9993 0.4844 sec/batch
Epoch 68/100  Iteration 38246/56600 Training loss: 0.9993 0.4763 sec/batch
Epoch 68/100  Iteration 3

Epoch 68/100  Iteration 38344/56600 Training loss: 0.9968 0.4794 sec/batch
Epoch 68/100  Iteration 38345/56600 Training loss: 0.9968 0.4818 sec/batch
Epoch 68/100  Iteration 38346/56600 Training loss: 0.9968 0.4674 sec/batch
Epoch 68/100  Iteration 38347/56600 Training loss: 0.9968 0.4866 sec/batch
Epoch 68/100  Iteration 38348/56600 Training loss: 0.9967 0.4779 sec/batch
Epoch 68/100  Iteration 38349/56600 Training loss: 0.9967 0.4785 sec/batch
Epoch 68/100  Iteration 38350/56600 Training loss: 0.9966 0.4709 sec/batch
Epoch 68/100  Iteration 38351/56600 Training loss: 0.9965 0.4804 sec/batch
Epoch 68/100  Iteration 38352/56600 Training loss: 0.9965 0.4738 sec/batch
Epoch 68/100  Iteration 38353/56600 Training loss: 0.9964 0.4787 sec/batch
Epoch 68/100  Iteration 38354/56600 Training loss: 0.9963 0.4943 sec/batch
Epoch 68/100  Iteration 38355/56600 Training loss: 0.9962 0.4799 sec/batch
Epoch 68/100  Iteration 38356/56600 Training loss: 0.9962 0.5027 sec/batch
Epoch 68/100  Iteration 3

Epoch 68/100  Iteration 38454/56600 Training loss: 0.9951 0.4811 sec/batch
Epoch 68/100  Iteration 38455/56600 Training loss: 0.9951 0.4927 sec/batch
Epoch 68/100  Iteration 38456/56600 Training loss: 0.9950 0.4888 sec/batch
Epoch 68/100  Iteration 38457/56600 Training loss: 0.9950 0.4802 sec/batch
Epoch 68/100  Iteration 38458/56600 Training loss: 0.9950 0.4777 sec/batch
Epoch 68/100  Iteration 38459/56600 Training loss: 0.9950 0.4963 sec/batch
Epoch 68/100  Iteration 38460/56600 Training loss: 0.9950 0.4785 sec/batch
Epoch 68/100  Iteration 38461/56600 Training loss: 0.9949 0.4792 sec/batch
Epoch 68/100  Iteration 38462/56600 Training loss: 0.9950 0.4788 sec/batch
Epoch 68/100  Iteration 38463/56600 Training loss: 0.9949 0.4688 sec/batch
Epoch 68/100  Iteration 38464/56600 Training loss: 0.9949 0.4727 sec/batch
Epoch 68/100  Iteration 38465/56600 Training loss: 0.9949 0.4783 sec/batch
Epoch 68/100  Iteration 38466/56600 Training loss: 0.9949 0.4898 sec/batch
Epoch 68/100  Iteration 3

Epoch 69/100  Iteration 38564/56600 Training loss: 0.9886 0.4823 sec/batch
Epoch 69/100  Iteration 38565/56600 Training loss: 0.9884 0.4764 sec/batch
Epoch 69/100  Iteration 38566/56600 Training loss: 0.9883 0.4763 sec/batch
Epoch 69/100  Iteration 38567/56600 Training loss: 0.9880 0.4633 sec/batch
Epoch 69/100  Iteration 38568/56600 Training loss: 0.9879 0.4641 sec/batch
Epoch 69/100  Iteration 38569/56600 Training loss: 0.9877 0.4791 sec/batch
Epoch 69/100  Iteration 38570/56600 Training loss: 0.9877 0.4790 sec/batch
Epoch 69/100  Iteration 38571/56600 Training loss: 0.9881 0.4688 sec/batch
Epoch 69/100  Iteration 38572/56600 Training loss: 0.9884 0.4742 sec/batch
Epoch 69/100  Iteration 38573/56600 Training loss: 0.9882 0.4784 sec/batch
Epoch 69/100  Iteration 38574/56600 Training loss: 0.9879 0.4791 sec/batch
Epoch 69/100  Iteration 38575/56600 Training loss: 0.9875 0.4630 sec/batch
Epoch 69/100  Iteration 38576/56600 Training loss: 0.9871 0.4792 sec/batch
Epoch 69/100  Iteration 3

Epoch 69/100  Iteration 38674/56600 Training loss: 0.9951 0.4774 sec/batch
Epoch 69/100  Iteration 38675/56600 Training loss: 0.9952 0.4960 sec/batch
Epoch 69/100  Iteration 38676/56600 Training loss: 0.9954 0.4793 sec/batch
Epoch 69/100  Iteration 38677/56600 Training loss: 0.9952 0.4790 sec/batch
Epoch 69/100  Iteration 38678/56600 Training loss: 0.9953 0.4737 sec/batch
Epoch 69/100  Iteration 38679/56600 Training loss: 0.9952 0.4790 sec/batch
Epoch 69/100  Iteration 38680/56600 Training loss: 0.9952 0.4952 sec/batch
Epoch 69/100  Iteration 38681/56600 Training loss: 0.9953 0.4844 sec/batch
Epoch 69/100  Iteration 38682/56600 Training loss: 0.9954 0.4871 sec/batch
Epoch 69/100  Iteration 38683/56600 Training loss: 0.9956 0.4764 sec/batch
Epoch 69/100  Iteration 38684/56600 Training loss: 0.9957 0.4843 sec/batch
Epoch 69/100  Iteration 38685/56600 Training loss: 0.9958 0.4785 sec/batch
Epoch 69/100  Iteration 38686/56600 Training loss: 0.9960 0.4772 sec/batch
Epoch 69/100  Iteration 3

Epoch 69/100  Iteration 38784/56600 Training loss: 0.9986 0.4927 sec/batch
Epoch 69/100  Iteration 38785/56600 Training loss: 0.9987 0.4760 sec/batch
Epoch 69/100  Iteration 38786/56600 Training loss: 0.9987 0.4796 sec/batch
Epoch 69/100  Iteration 38787/56600 Training loss: 0.9987 0.4795 sec/batch
Epoch 69/100  Iteration 38788/56600 Training loss: 0.9986 0.4947 sec/batch
Epoch 69/100  Iteration 38789/56600 Training loss: 0.9985 0.4785 sec/batch
Epoch 69/100  Iteration 38790/56600 Training loss: 0.9985 0.4868 sec/batch
Epoch 69/100  Iteration 38791/56600 Training loss: 0.9985 0.4809 sec/batch
Epoch 69/100  Iteration 38792/56600 Training loss: 0.9985 0.4785 sec/batch
Epoch 69/100  Iteration 38793/56600 Training loss: 0.9984 0.4795 sec/batch
Epoch 69/100  Iteration 38794/56600 Training loss: 0.9983 0.4893 sec/batch
Epoch 69/100  Iteration 38795/56600 Training loss: 0.9982 0.4687 sec/batch
Epoch 69/100  Iteration 38796/56600 Training loss: 0.9980 0.4800 sec/batch
Epoch 69/100  Iteration 3

Epoch 69/100  Iteration 38894/56600 Training loss: 0.9952 0.4786 sec/batch
Epoch 69/100  Iteration 38895/56600 Training loss: 0.9952 0.4776 sec/batch
Epoch 69/100  Iteration 38896/56600 Training loss: 0.9951 0.4958 sec/batch
Epoch 69/100  Iteration 38897/56600 Training loss: 0.9950 0.4906 sec/batch
Epoch 69/100  Iteration 38898/56600 Training loss: 0.9949 0.4918 sec/batch
Epoch 69/100  Iteration 38899/56600 Training loss: 0.9948 0.4895 sec/batch
Epoch 69/100  Iteration 38900/56600 Training loss: 0.9948 0.4752 sec/batch
Epoch 69/100  Iteration 38901/56600 Training loss: 0.9948 0.4944 sec/batch
Epoch 69/100  Iteration 38902/56600 Training loss: 0.9948 0.4861 sec/batch
Epoch 69/100  Iteration 38903/56600 Training loss: 0.9947 0.4809 sec/batch
Epoch 69/100  Iteration 38904/56600 Training loss: 0.9947 0.4794 sec/batch
Epoch 69/100  Iteration 38905/56600 Training loss: 0.9947 0.4779 sec/batch
Epoch 69/100  Iteration 38906/56600 Training loss: 0.9947 0.4789 sec/batch
Epoch 69/100  Iteration 3

Epoch 69/100  Iteration 39004/56600 Training loss: 0.9928 0.4789 sec/batch
Epoch 69/100  Iteration 39005/56600 Training loss: 0.9928 0.4795 sec/batch
Epoch 69/100  Iteration 39006/56600 Training loss: 0.9929 0.4735 sec/batch
Epoch 69/100  Iteration 39007/56600 Training loss: 0.9929 0.4797 sec/batch
Epoch 69/100  Iteration 39008/56600 Training loss: 0.9928 0.4943 sec/batch
Epoch 69/100  Iteration 39009/56600 Training loss: 0.9929 0.4792 sec/batch
Epoch 69/100  Iteration 39010/56600 Training loss: 0.9929 0.4809 sec/batch
Epoch 69/100  Iteration 39011/56600 Training loss: 0.9929 0.4934 sec/batch
Epoch 69/100  Iteration 39012/56600 Training loss: 0.9930 0.4884 sec/batch
Epoch 69/100  Iteration 39013/56600 Training loss: 0.9930 0.4673 sec/batch
Epoch 69/100  Iteration 39014/56600 Training loss: 0.9930 0.4878 sec/batch
Epoch 69/100  Iteration 39015/56600 Training loss: 0.9931 0.4810 sec/batch
Epoch 69/100  Iteration 39016/56600 Training loss: 0.9931 0.4785 sec/batch
Epoch 69/100  Iteration 3

Epoch 70/100  Iteration 39114/56600 Training loss: 0.9913 0.4787 sec/batch
Epoch 70/100  Iteration 39115/56600 Training loss: 0.9906 0.4816 sec/batch
Epoch 70/100  Iteration 39116/56600 Training loss: 0.9901 0.4777 sec/batch
Epoch 70/100  Iteration 39117/56600 Training loss: 0.9899 0.4789 sec/batch
Epoch 70/100  Iteration 39118/56600 Training loss: 0.9891 0.4788 sec/batch
Epoch 70/100  Iteration 39119/56600 Training loss: 0.9886 0.4744 sec/batch
Epoch 70/100  Iteration 39120/56600 Training loss: 0.9882 0.4786 sec/batch
Epoch 70/100  Iteration 39121/56600 Training loss: 0.9878 0.4688 sec/batch
Epoch 70/100  Iteration 39122/56600 Training loss: 0.9882 0.4633 sec/batch
Epoch 70/100  Iteration 39123/56600 Training loss: 0.9884 0.4722 sec/batch
Epoch 70/100  Iteration 39124/56600 Training loss: 0.9878 0.4715 sec/batch
Epoch 70/100  Iteration 39125/56600 Training loss: 0.9876 0.4783 sec/batch
Epoch 70/100  Iteration 39126/56600 Training loss: 0.9869 0.4636 sec/batch
Epoch 70/100  Iteration 3

Epoch 70/100  Iteration 39224/56600 Training loss: 0.9933 0.4785 sec/batch
Epoch 70/100  Iteration 39225/56600 Training loss: 0.9932 0.4841 sec/batch
Epoch 70/100  Iteration 39226/56600 Training loss: 0.9931 0.4733 sec/batch
Epoch 70/100  Iteration 39227/56600 Training loss: 0.9930 0.4809 sec/batch
Epoch 70/100  Iteration 39228/56600 Training loss: 0.9931 0.4768 sec/batch
Epoch 70/100  Iteration 39229/56600 Training loss: 0.9929 0.4712 sec/batch
Epoch 70/100  Iteration 39230/56600 Training loss: 0.9928 0.4720 sec/batch
Epoch 70/100  Iteration 39231/56600 Training loss: 0.9928 0.4641 sec/batch
Epoch 70/100  Iteration 39232/56600 Training loss: 0.9927 0.4780 sec/batch
Epoch 70/100  Iteration 39233/56600 Training loss: 0.9926 0.5050 sec/batch
Epoch 70/100  Iteration 39234/56600 Training loss: 0.9926 0.4848 sec/batch
Epoch 70/100  Iteration 39235/56600 Training loss: 0.9926 0.4869 sec/batch
Epoch 70/100  Iteration 39236/56600 Training loss: 0.9927 0.4653 sec/batch
Epoch 70/100  Iteration 3

Epoch 70/100  Iteration 39334/56600 Training loss: 0.9965 0.4785 sec/batch
Epoch 70/100  Iteration 39335/56600 Training loss: 0.9965 0.4798 sec/batch
Epoch 70/100  Iteration 39336/56600 Training loss: 0.9964 0.4788 sec/batch
Epoch 70/100  Iteration 39337/56600 Training loss: 0.9964 0.4745 sec/batch
Epoch 70/100  Iteration 39338/56600 Training loss: 0.9963 0.4789 sec/batch
Epoch 70/100  Iteration 39339/56600 Training loss: 0.9963 0.4790 sec/batch
Epoch 70/100  Iteration 39340/56600 Training loss: 0.9964 0.4757 sec/batch
Epoch 70/100  Iteration 39341/56600 Training loss: 0.9965 0.4915 sec/batch
Epoch 70/100  Iteration 39342/56600 Training loss: 0.9966 0.4789 sec/batch
Epoch 70/100  Iteration 39343/56600 Training loss: 0.9965 0.4795 sec/batch
Epoch 70/100  Iteration 39344/56600 Training loss: 0.9966 0.4893 sec/batch
Epoch 70/100  Iteration 39345/56600 Training loss: 0.9967 0.4847 sec/batch
Epoch 70/100  Iteration 39346/56600 Training loss: 0.9969 0.4802 sec/batch
Epoch 70/100  Iteration 3

Epoch 70/100  Iteration 39444/56600 Training loss: 0.9939 0.4784 sec/batch
Epoch 70/100  Iteration 39445/56600 Training loss: 0.9938 0.4733 sec/batch
Epoch 70/100  Iteration 39446/56600 Training loss: 0.9938 0.4806 sec/batch
Epoch 70/100  Iteration 39447/56600 Training loss: 0.9939 0.4937 sec/batch
Epoch 70/100  Iteration 39448/56600 Training loss: 0.9939 0.4850 sec/batch
Epoch 70/100  Iteration 39449/56600 Training loss: 0.9939 0.4867 sec/batch
Epoch 70/100  Iteration 39450/56600 Training loss: 0.9938 0.4817 sec/batch
Epoch 70/100  Iteration 39451/56600 Training loss: 0.9939 0.4791 sec/batch
Epoch 70/100  Iteration 39452/56600 Training loss: 0.9939 0.4763 sec/batch
Epoch 70/100  Iteration 39453/56600 Training loss: 0.9940 0.4744 sec/batch
Epoch 70/100  Iteration 39454/56600 Training loss: 0.9940 0.4800 sec/batch
Epoch 70/100  Iteration 39455/56600 Training loss: 0.9939 0.4805 sec/batch
Epoch 70/100  Iteration 39456/56600 Training loss: 0.9939 0.4776 sec/batch
Epoch 70/100  Iteration 3

Epoch 70/100  Iteration 39554/56600 Training loss: 0.9922 0.4797 sec/batch
Epoch 70/100  Iteration 39555/56600 Training loss: 0.9922 0.4799 sec/batch
Epoch 70/100  Iteration 39556/56600 Training loss: 0.9922 0.4769 sec/batch
Epoch 70/100  Iteration 39557/56600 Training loss: 0.9921 0.4798 sec/batch
Epoch 70/100  Iteration 39558/56600 Training loss: 0.9921 0.4874 sec/batch
Epoch 70/100  Iteration 39559/56600 Training loss: 0.9920 0.4804 sec/batch
Epoch 70/100  Iteration 39560/56600 Training loss: 0.9920 0.4763 sec/batch
Epoch 70/100  Iteration 39561/56600 Training loss: 0.9919 0.4814 sec/batch
Epoch 70/100  Iteration 39562/56600 Training loss: 0.9919 0.4867 sec/batch
Epoch 70/100  Iteration 39563/56600 Training loss: 0.9918 0.4821 sec/batch
Epoch 70/100  Iteration 39564/56600 Training loss: 0.9918 0.4794 sec/batch
Epoch 70/100  Iteration 39565/56600 Training loss: 0.9918 0.4669 sec/batch
Epoch 70/100  Iteration 39566/56600 Training loss: 0.9917 0.4688 sec/batch
Epoch 70/100  Iteration 3

Epoch 71/100  Iteration 39664/56600 Training loss: 0.9982 0.4732 sec/batch
Epoch 71/100  Iteration 39665/56600 Training loss: 0.9985 0.4760 sec/batch
Epoch 71/100  Iteration 39666/56600 Training loss: 0.9988 0.4795 sec/batch
Epoch 71/100  Iteration 39667/56600 Training loss: 0.9979 0.4637 sec/batch
Epoch 71/100  Iteration 39668/56600 Training loss: 0.9980 0.4787 sec/batch
Epoch 71/100  Iteration 39669/56600 Training loss: 0.9977 0.4736 sec/batch
Epoch 71/100  Iteration 39670/56600 Training loss: 0.9968 0.4777 sec/batch
Epoch 71/100  Iteration 39671/56600 Training loss: 0.9962 0.4606 sec/batch
Epoch 71/100  Iteration 39672/56600 Training loss: 0.9952 0.4667 sec/batch
Epoch 71/100  Iteration 39673/56600 Training loss: 0.9947 0.4867 sec/batch
Epoch 71/100  Iteration 39674/56600 Training loss: 0.9942 0.4780 sec/batch
Epoch 71/100  Iteration 39675/56600 Training loss: 0.9933 0.4886 sec/batch
Epoch 71/100  Iteration 39676/56600 Training loss: 0.9926 0.4787 sec/batch
Epoch 71/100  Iteration 3

Epoch 71/100  Iteration 39774/56600 Training loss: 0.9911 0.4840 sec/batch
Epoch 71/100  Iteration 39775/56600 Training loss: 0.9912 0.4729 sec/batch
Epoch 71/100  Iteration 39776/56600 Training loss: 0.9913 0.4767 sec/batch
Epoch 71/100  Iteration 39777/56600 Training loss: 0.9915 0.4766 sec/batch
Epoch 71/100  Iteration 39778/56600 Training loss: 0.9916 0.4840 sec/batch
Epoch 71/100  Iteration 39779/56600 Training loss: 0.9916 0.4798 sec/batch
Epoch 71/100  Iteration 39780/56600 Training loss: 0.9915 0.4738 sec/batch
Epoch 71/100  Iteration 39781/56600 Training loss: 0.9916 0.4865 sec/batch
Epoch 71/100  Iteration 39782/56600 Training loss: 0.9916 0.4779 sec/batch
Epoch 71/100  Iteration 39783/56600 Training loss: 0.9916 0.4832 sec/batch
Epoch 71/100  Iteration 39784/56600 Training loss: 0.9915 0.4978 sec/batch
Epoch 71/100  Iteration 39785/56600 Training loss: 0.9915 0.4682 sec/batch
Epoch 71/100  Iteration 39786/56600 Training loss: 0.9916 0.4790 sec/batch
Epoch 71/100  Iteration 3

Epoch 71/100  Iteration 39884/56600 Training loss: 0.9958 0.4849 sec/batch
Epoch 71/100  Iteration 39885/56600 Training loss: 0.9957 0.4723 sec/batch
Epoch 71/100  Iteration 39886/56600 Training loss: 0.9957 0.4788 sec/batch
Epoch 71/100  Iteration 39887/56600 Training loss: 0.9956 0.4759 sec/batch
Epoch 71/100  Iteration 39888/56600 Training loss: 0.9954 0.4628 sec/batch
Epoch 71/100  Iteration 39889/56600 Training loss: 0.9953 0.4777 sec/batch
Epoch 71/100  Iteration 39890/56600 Training loss: 0.9953 0.4794 sec/batch
Epoch 71/100  Iteration 39891/56600 Training loss: 0.9953 0.4870 sec/batch
Epoch 71/100  Iteration 39892/56600 Training loss: 0.9954 0.4713 sec/batch
Epoch 71/100  Iteration 39893/56600 Training loss: 0.9954 0.4783 sec/batch
Epoch 71/100  Iteration 39894/56600 Training loss: 0.9954 0.4688 sec/batch
Epoch 71/100  Iteration 39895/56600 Training loss: 0.9954 0.4736 sec/batch
Epoch 71/100  Iteration 39896/56600 Training loss: 0.9954 0.4734 sec/batch
Epoch 71/100  Iteration 3

Epoch 71/100  Iteration 39994/56600 Training loss: 0.9928 0.4791 sec/batch
Epoch 71/100  Iteration 39995/56600 Training loss: 0.9927 0.4933 sec/batch
Epoch 71/100  Iteration 39996/56600 Training loss: 0.9927 0.4790 sec/batch
Epoch 71/100  Iteration 39997/56600 Training loss: 0.9927 0.4815 sec/batch
Epoch 71/100  Iteration 39998/56600 Training loss: 0.9927 0.4764 sec/batch
Epoch 71/100  Iteration 39999/56600 Training loss: 0.9926 0.4853 sec/batch
Epoch 71/100  Iteration 40000/56600 Training loss: 0.9926 0.4734 sec/batch
Validation loss: 1.20652 Saving checkpoint!
Epoch 71/100  Iteration 40001/56600 Training loss: 0.9933 0.4789 sec/batch
Epoch 71/100  Iteration 40002/56600 Training loss: 0.9933 0.4844 sec/batch
Epoch 71/100  Iteration 40003/56600 Training loss: 0.9932 0.4586 sec/batch
Epoch 71/100  Iteration 40004/56600 Training loss: 0.9932 0.4762 sec/batch
Epoch 71/100  Iteration 40005/56600 Training loss: 0.9932 0.4828 sec/batch
Epoch 71/100  Iteration 40006/56600 Training loss: 0.993

Epoch 71/100  Iteration 40103/56600 Training loss: 0.9913 0.4751 sec/batch
Epoch 71/100  Iteration 40104/56600 Training loss: 0.9913 0.4783 sec/batch
Epoch 71/100  Iteration 40105/56600 Training loss: 0.9913 0.4854 sec/batch
Epoch 71/100  Iteration 40106/56600 Training loss: 0.9912 0.4688 sec/batch
Epoch 71/100  Iteration 40107/56600 Training loss: 0.9912 0.4981 sec/batch
Epoch 71/100  Iteration 40108/56600 Training loss: 0.9912 0.4794 sec/batch
Epoch 71/100  Iteration 40109/56600 Training loss: 0.9912 0.4784 sec/batch
Epoch 71/100  Iteration 40110/56600 Training loss: 0.9912 0.4791 sec/batch
Epoch 71/100  Iteration 40111/56600 Training loss: 0.9912 0.4797 sec/batch
Epoch 71/100  Iteration 40112/56600 Training loss: 0.9912 0.4736 sec/batch
Epoch 71/100  Iteration 40113/56600 Training loss: 0.9912 0.4797 sec/batch
Epoch 71/100  Iteration 40114/56600 Training loss: 0.9913 0.4784 sec/batch
Epoch 71/100  Iteration 40115/56600 Training loss: 0.9912 0.4844 sec/batch
Epoch 71/100  Iteration 4

Epoch 72/100  Iteration 40213/56600 Training loss: 0.9918 0.4807 sec/batch
Epoch 72/100  Iteration 40214/56600 Training loss: 0.9919 0.4731 sec/batch
Epoch 72/100  Iteration 40215/56600 Training loss: 0.9911 0.4788 sec/batch
Epoch 72/100  Iteration 40216/56600 Training loss: 0.9908 0.4839 sec/batch
Epoch 72/100  Iteration 40217/56600 Training loss: 0.9898 0.4743 sec/batch
Epoch 72/100  Iteration 40218/56600 Training loss: 0.9896 0.4795 sec/batch
Epoch 72/100  Iteration 40219/56600 Training loss: 0.9898 0.4765 sec/batch
Epoch 72/100  Iteration 40220/56600 Training loss: 0.9913 0.4724 sec/batch
Epoch 72/100  Iteration 40221/56600 Training loss: 0.9932 0.4828 sec/batch
Epoch 72/100  Iteration 40222/56600 Training loss: 0.9941 0.4785 sec/batch
Epoch 72/100  Iteration 40223/56600 Training loss: 0.9951 0.4679 sec/batch
Epoch 72/100  Iteration 40224/56600 Training loss: 0.9956 0.4753 sec/batch
Epoch 72/100  Iteration 40225/56600 Training loss: 0.9956 0.4788 sec/batch
Epoch 72/100  Iteration 4

Epoch 72/100  Iteration 40323/56600 Training loss: 0.9868 0.4831 sec/batch
Epoch 72/100  Iteration 40324/56600 Training loss: 0.9872 0.4886 sec/batch
Epoch 72/100  Iteration 40325/56600 Training loss: 0.9874 0.4774 sec/batch
Epoch 72/100  Iteration 40326/56600 Training loss: 0.9873 0.4790 sec/batch
Epoch 72/100  Iteration 40327/56600 Training loss: 0.9876 0.4805 sec/batch
Epoch 72/100  Iteration 40328/56600 Training loss: 0.9877 0.4780 sec/batch
Epoch 72/100  Iteration 40329/56600 Training loss: 0.9880 0.4900 sec/batch
Epoch 72/100  Iteration 40330/56600 Training loss: 0.9879 0.4794 sec/batch
Epoch 72/100  Iteration 40331/56600 Training loss: 0.9880 0.4785 sec/batch
Epoch 72/100  Iteration 40332/56600 Training loss: 0.9879 0.4795 sec/batch
Epoch 72/100  Iteration 40333/56600 Training loss: 0.9879 0.4805 sec/batch
Epoch 72/100  Iteration 40334/56600 Training loss: 0.9881 0.4793 sec/batch
Epoch 72/100  Iteration 40335/56600 Training loss: 0.9882 0.4705 sec/batch
Epoch 72/100  Iteration 4

Epoch 72/100  Iteration 40433/56600 Training loss: 0.9953 0.4783 sec/batch
Epoch 72/100  Iteration 40434/56600 Training loss: 0.9953 0.4790 sec/batch
Epoch 72/100  Iteration 40435/56600 Training loss: 0.9954 0.4844 sec/batch
Epoch 72/100  Iteration 40436/56600 Training loss: 0.9955 0.4832 sec/batch
Epoch 72/100  Iteration 40437/56600 Training loss: 0.9952 0.4768 sec/batch
Epoch 72/100  Iteration 40438/56600 Training loss: 0.9951 0.4874 sec/batch
Epoch 72/100  Iteration 40439/56600 Training loss: 0.9951 0.4852 sec/batch
Epoch 72/100  Iteration 40440/56600 Training loss: 0.9950 0.4793 sec/batch
Epoch 72/100  Iteration 40441/56600 Training loss: 0.9950 0.4789 sec/batch
Epoch 72/100  Iteration 40442/56600 Training loss: 0.9950 0.4893 sec/batch
Epoch 72/100  Iteration 40443/56600 Training loss: 0.9948 0.4796 sec/batch
Epoch 72/100  Iteration 40444/56600 Training loss: 0.9948 0.4789 sec/batch
Epoch 72/100  Iteration 40445/56600 Training loss: 0.9946 0.4844 sec/batch
Epoch 72/100  Iteration 4

Epoch 72/100  Iteration 40543/56600 Training loss: 0.9920 0.4795 sec/batch
Epoch 72/100  Iteration 40544/56600 Training loss: 0.9920 0.4755 sec/batch
Epoch 72/100  Iteration 40545/56600 Training loss: 0.9920 0.4674 sec/batch
Epoch 72/100  Iteration 40546/56600 Training loss: 0.9920 0.4862 sec/batch
Epoch 72/100  Iteration 40547/56600 Training loss: 0.9920 0.4710 sec/batch
Epoch 72/100  Iteration 40548/56600 Training loss: 0.9920 0.4789 sec/batch
Epoch 72/100  Iteration 40549/56600 Training loss: 0.9919 0.4892 sec/batch
Epoch 72/100  Iteration 40550/56600 Training loss: 0.9919 0.4926 sec/batch
Epoch 72/100  Iteration 40551/56600 Training loss: 0.9919 0.4844 sec/batch
Epoch 72/100  Iteration 40552/56600 Training loss: 0.9918 0.4789 sec/batch
Epoch 72/100  Iteration 40553/56600 Training loss: 0.9917 0.4794 sec/batch
Epoch 72/100  Iteration 40554/56600 Training loss: 0.9917 0.4794 sec/batch
Epoch 72/100  Iteration 40555/56600 Training loss: 0.9916 0.4895 sec/batch
Epoch 72/100  Iteration 4

Epoch 72/100  Iteration 40653/56600 Training loss: 0.9890 0.4803 sec/batch
Epoch 72/100  Iteration 40654/56600 Training loss: 0.9890 0.4774 sec/batch
Epoch 72/100  Iteration 40655/56600 Training loss: 0.9891 0.4643 sec/batch
Epoch 72/100  Iteration 40656/56600 Training loss: 0.9891 0.4793 sec/batch
Epoch 72/100  Iteration 40657/56600 Training loss: 0.9893 0.4789 sec/batch
Epoch 72/100  Iteration 40658/56600 Training loss: 0.9894 0.4749 sec/batch
Epoch 72/100  Iteration 40659/56600 Training loss: 0.9894 0.4674 sec/batch
Epoch 72/100  Iteration 40660/56600 Training loss: 0.9894 0.4809 sec/batch
Epoch 72/100  Iteration 40661/56600 Training loss: 0.9893 0.4626 sec/batch
Epoch 72/100  Iteration 40662/56600 Training loss: 0.9893 0.4875 sec/batch
Epoch 72/100  Iteration 40663/56600 Training loss: 0.9893 0.4758 sec/batch
Epoch 72/100  Iteration 40664/56600 Training loss: 0.9893 0.4719 sec/batch
Epoch 72/100  Iteration 40665/56600 Training loss: 0.9893 0.4709 sec/batch
Epoch 72/100  Iteration 4

Epoch 73/100  Iteration 40763/56600 Training loss: 1.0153 0.4833 sec/batch
Epoch 73/100  Iteration 40764/56600 Training loss: 1.0150 0.4856 sec/batch
Epoch 73/100  Iteration 40765/56600 Training loss: 1.0144 0.4900 sec/batch
Epoch 73/100  Iteration 40766/56600 Training loss: 1.0116 0.4775 sec/batch
Epoch 73/100  Iteration 40767/56600 Training loss: 1.0098 0.4896 sec/batch
Epoch 73/100  Iteration 40768/56600 Training loss: 1.0087 0.4757 sec/batch
Epoch 73/100  Iteration 40769/56600 Training loss: 1.0069 0.4656 sec/batch
Epoch 73/100  Iteration 40770/56600 Training loss: 1.0050 0.4834 sec/batch
Epoch 73/100  Iteration 40771/56600 Training loss: 1.0016 0.4836 sec/batch
Epoch 73/100  Iteration 40772/56600 Training loss: 0.9996 0.4791 sec/batch
Epoch 73/100  Iteration 40773/56600 Training loss: 0.9976 0.4793 sec/batch
Epoch 73/100  Iteration 40774/56600 Training loss: 0.9954 0.4893 sec/batch
Epoch 73/100  Iteration 40775/56600 Training loss: 0.9944 0.4796 sec/batch
Epoch 73/100  Iteration 4

Epoch 73/100  Iteration 40873/56600 Training loss: 0.9850 0.4753 sec/batch
Epoch 73/100  Iteration 40874/56600 Training loss: 0.9855 0.4669 sec/batch
Epoch 73/100  Iteration 40875/56600 Training loss: 0.9854 0.4637 sec/batch
Epoch 73/100  Iteration 40876/56600 Training loss: 0.9855 0.4638 sec/batch
Epoch 73/100  Iteration 40877/56600 Training loss: 0.9857 0.4789 sec/batch
Epoch 73/100  Iteration 40878/56600 Training loss: 0.9859 0.4788 sec/batch
Epoch 73/100  Iteration 40879/56600 Training loss: 0.9859 0.4795 sec/batch
Epoch 73/100  Iteration 40880/56600 Training loss: 0.9859 0.4688 sec/batch
Epoch 73/100  Iteration 40881/56600 Training loss: 0.9861 0.4794 sec/batch
Epoch 73/100  Iteration 40882/56600 Training loss: 0.9861 0.4843 sec/batch
Epoch 73/100  Iteration 40883/56600 Training loss: 0.9863 0.4694 sec/batch
Epoch 73/100  Iteration 40884/56600 Training loss: 0.9862 0.4630 sec/batch
Epoch 73/100  Iteration 40885/56600 Training loss: 0.9861 0.4764 sec/batch
Epoch 73/100  Iteration 4

Epoch 73/100  Iteration 40983/56600 Training loss: 0.9923 0.4814 sec/batch
Epoch 73/100  Iteration 40984/56600 Training loss: 0.9924 0.4785 sec/batch
Epoch 73/100  Iteration 40985/56600 Training loss: 0.9926 0.4792 sec/batch
Epoch 73/100  Iteration 40986/56600 Training loss: 0.9927 0.4803 sec/batch
Epoch 73/100  Iteration 40987/56600 Training loss: 0.9928 0.4781 sec/batch
Epoch 73/100  Iteration 40988/56600 Training loss: 0.9928 0.4688 sec/batch
Epoch 73/100  Iteration 40989/56600 Training loss: 0.9928 0.4790 sec/batch
Epoch 73/100  Iteration 40990/56600 Training loss: 0.9929 0.4717 sec/batch
Epoch 73/100  Iteration 40991/56600 Training loss: 0.9930 0.4865 sec/batch
Epoch 73/100  Iteration 40992/56600 Training loss: 0.9933 0.4885 sec/batch
Epoch 73/100  Iteration 40993/56600 Training loss: 0.9935 0.4807 sec/batch
Epoch 73/100  Iteration 40994/56600 Training loss: 0.9937 0.4788 sec/batch
Epoch 73/100  Iteration 40995/56600 Training loss: 0.9939 0.4800 sec/batch
Epoch 73/100  Iteration 4

Epoch 73/100  Iteration 41093/56600 Training loss: 0.9920 0.4811 sec/batch
Epoch 73/100  Iteration 41094/56600 Training loss: 0.9919 0.4833 sec/batch
Epoch 73/100  Iteration 41095/56600 Training loss: 0.9918 0.4751 sec/batch
Epoch 73/100  Iteration 41096/56600 Training loss: 0.9918 0.4851 sec/batch
Epoch 73/100  Iteration 41097/56600 Training loss: 0.9919 0.4727 sec/batch
Epoch 73/100  Iteration 41098/56600 Training loss: 0.9918 0.4697 sec/batch
Epoch 73/100  Iteration 41099/56600 Training loss: 0.9917 0.4681 sec/batch
Epoch 73/100  Iteration 41100/56600 Training loss: 0.9916 0.4755 sec/batch
Epoch 73/100  Iteration 41101/56600 Training loss: 0.9916 0.4687 sec/batch
Epoch 73/100  Iteration 41102/56600 Training loss: 0.9915 0.4832 sec/batch
Epoch 73/100  Iteration 41103/56600 Training loss: 0.9914 0.4786 sec/batch
Epoch 73/100  Iteration 41104/56600 Training loss: 0.9912 0.4642 sec/batch
Epoch 73/100  Iteration 41105/56600 Training loss: 0.9913 0.4739 sec/batch
Epoch 73/100  Iteration 4

Epoch 73/100  Iteration 41203/56600 Training loss: 0.9885 0.4844 sec/batch
Epoch 73/100  Iteration 41204/56600 Training loss: 0.9885 0.4803 sec/batch
Epoch 73/100  Iteration 41205/56600 Training loss: 0.9885 0.4773 sec/batch
Epoch 73/100  Iteration 41206/56600 Training loss: 0.9885 0.4840 sec/batch
Epoch 73/100  Iteration 41207/56600 Training loss: 0.9885 0.4752 sec/batch
Epoch 73/100  Iteration 41208/56600 Training loss: 0.9884 0.4782 sec/batch
Epoch 73/100  Iteration 41209/56600 Training loss: 0.9883 0.4802 sec/batch
Epoch 73/100  Iteration 41210/56600 Training loss: 0.9883 0.4797 sec/batch
Epoch 73/100  Iteration 41211/56600 Training loss: 0.9882 0.4778 sec/batch
Epoch 73/100  Iteration 41212/56600 Training loss: 0.9883 0.4879 sec/batch
Epoch 73/100  Iteration 41213/56600 Training loss: 0.9883 0.4781 sec/batch
Epoch 73/100  Iteration 41214/56600 Training loss: 0.9883 0.4813 sec/batch
Epoch 73/100  Iteration 41215/56600 Training loss: 0.9882 0.4798 sec/batch
Epoch 73/100  Iteration 4

Epoch 73/100  Iteration 41313/56600 Training loss: 0.9878 0.4815 sec/batch
Epoch 73/100  Iteration 41314/56600 Training loss: 0.9878 0.4796 sec/batch
Epoch 73/100  Iteration 41315/56600 Training loss: 0.9878 0.4788 sec/batch
Epoch 73/100  Iteration 41316/56600 Training loss: 0.9879 0.4702 sec/batch
Epoch 73/100  Iteration 41317/56600 Training loss: 0.9880 0.4725 sec/batch
Epoch 73/100  Iteration 41318/56600 Training loss: 0.9880 0.4793 sec/batch
Epoch 74/100  Iteration 41319/56600 Training loss: 1.1209 0.4792 sec/batch
Epoch 74/100  Iteration 41320/56600 Training loss: 1.0750 0.4889 sec/batch
Epoch 74/100  Iteration 41321/56600 Training loss: 1.0661 0.4673 sec/batch
Epoch 74/100  Iteration 41322/56600 Training loss: 1.0515 0.4727 sec/batch
Epoch 74/100  Iteration 41323/56600 Training loss: 1.0373 0.4805 sec/batch
Epoch 74/100  Iteration 41324/56600 Training loss: 1.0321 0.4789 sec/batch
Epoch 74/100  Iteration 41325/56600 Training loss: 1.0258 0.4951 sec/batch
Epoch 74/100  Iteration 4

Epoch 74/100  Iteration 41423/56600 Training loss: 0.9792 0.4844 sec/batch
Epoch 74/100  Iteration 41424/56600 Training loss: 0.9790 0.4738 sec/batch
Epoch 74/100  Iteration 41425/56600 Training loss: 0.9787 0.4687 sec/batch
Epoch 74/100  Iteration 41426/56600 Training loss: 0.9787 0.4835 sec/batch
Epoch 74/100  Iteration 41427/56600 Training loss: 0.9787 0.4697 sec/batch
Epoch 74/100  Iteration 41428/56600 Training loss: 0.9788 0.4749 sec/batch
Epoch 74/100  Iteration 41429/56600 Training loss: 0.9788 0.4781 sec/batch
Epoch 74/100  Iteration 41430/56600 Training loss: 0.9786 0.4794 sec/batch
Epoch 74/100  Iteration 41431/56600 Training loss: 0.9791 0.4790 sec/batch
Epoch 74/100  Iteration 41432/56600 Training loss: 0.9794 0.4852 sec/batch
Epoch 74/100  Iteration 41433/56600 Training loss: 0.9796 0.4733 sec/batch
Epoch 74/100  Iteration 41434/56600 Training loss: 0.9800 0.4726 sec/batch
Epoch 74/100  Iteration 41435/56600 Training loss: 0.9806 0.4802 sec/batch
Epoch 74/100  Iteration 4

Epoch 74/100  Iteration 41533/56600 Training loss: 0.9882 0.4778 sec/batch
Epoch 74/100  Iteration 41534/56600 Training loss: 0.9882 0.4787 sec/batch
Epoch 74/100  Iteration 41535/56600 Training loss: 0.9881 0.4852 sec/batch
Epoch 74/100  Iteration 41536/56600 Training loss: 0.9881 0.4834 sec/batch
Epoch 74/100  Iteration 41537/56600 Training loss: 0.9883 0.4799 sec/batch
Epoch 74/100  Iteration 41538/56600 Training loss: 0.9884 0.4896 sec/batch
Epoch 74/100  Iteration 41539/56600 Training loss: 0.9884 0.4887 sec/batch
Epoch 74/100  Iteration 41540/56600 Training loss: 0.9884 0.4763 sec/batch
Epoch 74/100  Iteration 41541/56600 Training loss: 0.9884 0.4742 sec/batch
Epoch 74/100  Iteration 41542/56600 Training loss: 0.9886 0.4738 sec/batch
Epoch 74/100  Iteration 41543/56600 Training loss: 0.9888 0.4636 sec/batch
Epoch 74/100  Iteration 41544/56600 Training loss: 0.9890 0.4730 sec/batch
Epoch 74/100  Iteration 41545/56600 Training loss: 0.9891 0.4648 sec/batch
Epoch 74/100  Iteration 4

Epoch 74/100  Iteration 41643/56600 Training loss: 0.9894 0.4931 sec/batch
Epoch 74/100  Iteration 41644/56600 Training loss: 0.9895 0.4761 sec/batch
Epoch 74/100  Iteration 41645/56600 Training loss: 0.9894 0.4901 sec/batch
Epoch 74/100  Iteration 41646/56600 Training loss: 0.9894 0.4836 sec/batch
Epoch 74/100  Iteration 41647/56600 Training loss: 0.9894 0.4798 sec/batch
Epoch 74/100  Iteration 41648/56600 Training loss: 0.9894 0.4800 sec/batch
Epoch 74/100  Iteration 41649/56600 Training loss: 0.9894 0.4888 sec/batch
Epoch 74/100  Iteration 41650/56600 Training loss: 0.9895 0.4796 sec/batch
Epoch 74/100  Iteration 41651/56600 Training loss: 0.9896 0.4926 sec/batch
Epoch 74/100  Iteration 41652/56600 Training loss: 0.9895 0.4792 sec/batch
Epoch 74/100  Iteration 41653/56600 Training loss: 0.9894 0.4874 sec/batch
Epoch 74/100  Iteration 41654/56600 Training loss: 0.9894 0.4846 sec/batch
Epoch 74/100  Iteration 41655/56600 Training loss: 0.9894 0.4955 sec/batch
Epoch 74/100  Iteration 4

Epoch 74/100  Iteration 41753/56600 Training loss: 0.9867 0.4795 sec/batch
Epoch 74/100  Iteration 41754/56600 Training loss: 0.9866 0.4796 sec/batch
Epoch 74/100  Iteration 41755/56600 Training loss: 0.9866 0.4716 sec/batch
Epoch 74/100  Iteration 41756/56600 Training loss: 0.9865 0.4628 sec/batch
Epoch 74/100  Iteration 41757/56600 Training loss: 0.9864 0.4803 sec/batch
Epoch 74/100  Iteration 41758/56600 Training loss: 0.9863 0.4875 sec/batch
Epoch 74/100  Iteration 41759/56600 Training loss: 0.9862 0.4852 sec/batch
Epoch 74/100  Iteration 41760/56600 Training loss: 0.9862 0.4791 sec/batch
Epoch 74/100  Iteration 41761/56600 Training loss: 0.9862 0.4774 sec/batch
Epoch 74/100  Iteration 41762/56600 Training loss: 0.9863 0.4733 sec/batch
Epoch 74/100  Iteration 41763/56600 Training loss: 0.9861 0.4780 sec/batch
Epoch 74/100  Iteration 41764/56600 Training loss: 0.9860 0.4793 sec/batch
Epoch 74/100  Iteration 41765/56600 Training loss: 0.9860 0.4790 sec/batch
Epoch 74/100  Iteration 4

Epoch 74/100  Iteration 41863/56600 Training loss: 0.9856 0.4986 sec/batch
Epoch 74/100  Iteration 41864/56600 Training loss: 0.9856 0.4952 sec/batch
Epoch 74/100  Iteration 41865/56600 Training loss: 0.9856 0.4953 sec/batch
Epoch 74/100  Iteration 41866/56600 Training loss: 0.9856 0.4789 sec/batch
Epoch 74/100  Iteration 41867/56600 Training loss: 0.9856 0.4785 sec/batch
Epoch 74/100  Iteration 41868/56600 Training loss: 0.9855 0.4799 sec/batch
Epoch 74/100  Iteration 41869/56600 Training loss: 0.9854 0.4890 sec/batch
Epoch 74/100  Iteration 41870/56600 Training loss: 0.9855 0.4798 sec/batch
Epoch 74/100  Iteration 41871/56600 Training loss: 0.9855 0.4787 sec/batch
Epoch 74/100  Iteration 41872/56600 Training loss: 0.9855 0.4844 sec/batch
Epoch 74/100  Iteration 41873/56600 Training loss: 0.9854 0.4900 sec/batch
Epoch 74/100  Iteration 41874/56600 Training loss: 0.9854 0.4992 sec/batch
Epoch 74/100  Iteration 41875/56600 Training loss: 0.9854 0.4947 sec/batch
Epoch 74/100  Iteration 4

Epoch 75/100  Iteration 41973/56600 Training loss: 0.9788 0.4691 sec/batch
Epoch 75/100  Iteration 41974/56600 Training loss: 0.9790 0.4734 sec/batch
Epoch 75/100  Iteration 41975/56600 Training loss: 0.9790 0.4739 sec/batch
Epoch 75/100  Iteration 41976/56600 Training loss: 0.9788 0.4796 sec/batch
Epoch 75/100  Iteration 41977/56600 Training loss: 0.9786 0.4790 sec/batch
Epoch 75/100  Iteration 41978/56600 Training loss: 0.9785 0.4794 sec/batch
Epoch 75/100  Iteration 41979/56600 Training loss: 0.9783 0.4952 sec/batch
Epoch 75/100  Iteration 41980/56600 Training loss: 0.9779 0.4785 sec/batch
Epoch 75/100  Iteration 41981/56600 Training loss: 0.9776 0.4793 sec/batch
Epoch 75/100  Iteration 41982/56600 Training loss: 0.9774 0.4844 sec/batch
Epoch 75/100  Iteration 41983/56600 Training loss: 0.9776 0.4901 sec/batch
Epoch 75/100  Iteration 41984/56600 Training loss: 0.9783 0.4787 sec/batch
Epoch 75/100  Iteration 41985/56600 Training loss: 0.9783 0.4794 sec/batch
Epoch 75/100  Iteration 4

Epoch 75/100  Iteration 42082/56600 Training loss: 0.9892 0.4677 sec/batch
Epoch 75/100  Iteration 42083/56600 Training loss: 0.9894 0.4641 sec/batch
Epoch 75/100  Iteration 42084/56600 Training loss: 0.9894 0.4790 sec/batch
Epoch 75/100  Iteration 42085/56600 Training loss: 0.9894 0.4743 sec/batch
Epoch 75/100  Iteration 42086/56600 Training loss: 0.9896 0.4676 sec/batch
Epoch 75/100  Iteration 42087/56600 Training loss: 0.9897 0.4699 sec/batch
Epoch 75/100  Iteration 42088/56600 Training loss: 0.9898 0.4797 sec/batch
Epoch 75/100  Iteration 42089/56600 Training loss: 0.9900 0.4723 sec/batch
Epoch 75/100  Iteration 42090/56600 Training loss: 0.9901 0.4802 sec/batch
Epoch 75/100  Iteration 42091/56600 Training loss: 0.9901 0.4766 sec/batch
Epoch 75/100  Iteration 42092/56600 Training loss: 0.9900 0.4709 sec/batch
Epoch 75/100  Iteration 42093/56600 Training loss: 0.9901 0.4793 sec/batch
Epoch 75/100  Iteration 42094/56600 Training loss: 0.9901 0.4786 sec/batch
Epoch 75/100  Iteration 4

Epoch 75/100  Iteration 42192/56600 Training loss: 0.9911 0.4784 sec/batch
Epoch 75/100  Iteration 42193/56600 Training loss: 0.9910 0.4740 sec/batch
Epoch 75/100  Iteration 42194/56600 Training loss: 0.9910 0.4636 sec/batch
Epoch 75/100  Iteration 42195/56600 Training loss: 0.9909 0.4688 sec/batch
Epoch 75/100  Iteration 42196/56600 Training loss: 0.9909 0.4625 sec/batch
Epoch 75/100  Iteration 42197/56600 Training loss: 0.9908 0.4717 sec/batch
Epoch 75/100  Iteration 42198/56600 Training loss: 0.9907 0.4703 sec/batch
Epoch 75/100  Iteration 42199/56600 Training loss: 0.9907 0.4797 sec/batch
Epoch 75/100  Iteration 42200/56600 Training loss: 0.9907 0.4784 sec/batch
Epoch 75/100  Iteration 42201/56600 Training loss: 0.9908 0.4740 sec/batch
Epoch 75/100  Iteration 42202/56600 Training loss: 0.9908 0.4652 sec/batch
Epoch 75/100  Iteration 42203/56600 Training loss: 0.9907 0.4739 sec/batch
Epoch 75/100  Iteration 42204/56600 Training loss: 0.9906 0.4753 sec/batch
Epoch 75/100  Iteration 4

Epoch 75/100  Iteration 42302/56600 Training loss: 0.9875 0.4741 sec/batch
Epoch 75/100  Iteration 42303/56600 Training loss: 0.9875 0.4792 sec/batch
Epoch 75/100  Iteration 42304/56600 Training loss: 0.9875 0.4790 sec/batch
Epoch 75/100  Iteration 42305/56600 Training loss: 0.9875 0.4790 sec/batch
Epoch 75/100  Iteration 42306/56600 Training loss: 0.9875 0.4739 sec/batch
Epoch 75/100  Iteration 42307/56600 Training loss: 0.9875 0.4793 sec/batch
Epoch 75/100  Iteration 42308/56600 Training loss: 0.9875 0.4792 sec/batch
Epoch 75/100  Iteration 42309/56600 Training loss: 0.9875 0.4944 sec/batch
Epoch 75/100  Iteration 42310/56600 Training loss: 0.9874 0.4953 sec/batch
Epoch 75/100  Iteration 42311/56600 Training loss: 0.9873 0.4844 sec/batch
Epoch 75/100  Iteration 42312/56600 Training loss: 0.9873 0.4784 sec/batch
Epoch 75/100  Iteration 42313/56600 Training loss: 0.9872 0.4799 sec/batch
Epoch 75/100  Iteration 42314/56600 Training loss: 0.9871 0.4820 sec/batch
Epoch 75/100  Iteration 4

Epoch 75/100  Iteration 42412/56600 Training loss: 0.9859 0.4798 sec/batch
Epoch 75/100  Iteration 42413/56600 Training loss: 0.9859 0.4785 sec/batch
Epoch 75/100  Iteration 42414/56600 Training loss: 0.9859 0.4828 sec/batch
Epoch 75/100  Iteration 42415/56600 Training loss: 0.9859 0.4609 sec/batch
Epoch 75/100  Iteration 42416/56600 Training loss: 0.9859 0.4795 sec/batch
Epoch 75/100  Iteration 42417/56600 Training loss: 0.9858 0.4779 sec/batch
Epoch 75/100  Iteration 42418/56600 Training loss: 0.9858 0.4816 sec/batch
Epoch 75/100  Iteration 42419/56600 Training loss: 0.9857 0.4703 sec/batch
Epoch 75/100  Iteration 42420/56600 Training loss: 0.9857 0.4693 sec/batch
Epoch 75/100  Iteration 42421/56600 Training loss: 0.9858 0.4895 sec/batch
Epoch 75/100  Iteration 42422/56600 Training loss: 0.9857 0.4734 sec/batch
Epoch 75/100  Iteration 42423/56600 Training loss: 0.9857 0.4796 sec/batch
Epoch 75/100  Iteration 42424/56600 Training loss: 0.9857 0.4793 sec/batch
Epoch 75/100  Iteration 4

Epoch 76/100  Iteration 42522/56600 Training loss: 0.9785 0.4947 sec/batch
Epoch 76/100  Iteration 42523/56600 Training loss: 0.9779 0.4635 sec/batch
Epoch 76/100  Iteration 42524/56600 Training loss: 0.9783 0.4792 sec/batch
Epoch 76/100  Iteration 42525/56600 Training loss: 0.9785 0.4844 sec/batch
Epoch 76/100  Iteration 42526/56600 Training loss: 0.9781 0.4735 sec/batch
Epoch 76/100  Iteration 42527/56600 Training loss: 0.9779 0.4847 sec/batch
Epoch 76/100  Iteration 42528/56600 Training loss: 0.9778 0.4787 sec/batch
Epoch 76/100  Iteration 42529/56600 Training loss: 0.9776 0.4790 sec/batch
Epoch 76/100  Iteration 42530/56600 Training loss: 0.9776 0.4896 sec/batch
Epoch 76/100  Iteration 42531/56600 Training loss: 0.9776 0.4801 sec/batch
Epoch 76/100  Iteration 42532/56600 Training loss: 0.9776 0.4783 sec/batch
Epoch 76/100  Iteration 42533/56600 Training loss: 0.9780 0.4798 sec/batch
Epoch 76/100  Iteration 42534/56600 Training loss: 0.9783 0.4784 sec/batch
Epoch 76/100  Iteration 4

Epoch 76/100  Iteration 42632/56600 Training loss: 0.9850 0.4789 sec/batch
Epoch 76/100  Iteration 42633/56600 Training loss: 0.9851 0.4893 sec/batch
Epoch 76/100  Iteration 42634/56600 Training loss: 0.9853 0.4796 sec/batch
Epoch 76/100  Iteration 42635/56600 Training loss: 0.9853 0.4636 sec/batch
Epoch 76/100  Iteration 42636/56600 Training loss: 0.9855 0.4789 sec/batch
Epoch 76/100  Iteration 42637/56600 Training loss: 0.9856 0.4755 sec/batch
Epoch 76/100  Iteration 42638/56600 Training loss: 0.9858 0.4855 sec/batch
Epoch 76/100  Iteration 42639/56600 Training loss: 0.9857 0.4703 sec/batch
Epoch 76/100  Iteration 42640/56600 Training loss: 0.9858 0.4947 sec/batch
Epoch 76/100  Iteration 42641/56600 Training loss: 0.9857 0.4878 sec/batch
Epoch 76/100  Iteration 42642/56600 Training loss: 0.9856 0.4698 sec/batch
Epoch 76/100  Iteration 42643/56600 Training loss: 0.9858 0.4752 sec/batch
Epoch 76/100  Iteration 42644/56600 Training loss: 0.9859 0.4785 sec/batch
Epoch 76/100  Iteration 4

Epoch 76/100  Iteration 42742/56600 Training loss: 0.9889 0.4784 sec/batch
Epoch 76/100  Iteration 42743/56600 Training loss: 0.9890 0.4799 sec/batch
Epoch 76/100  Iteration 42744/56600 Training loss: 0.9890 0.4819 sec/batch
Epoch 76/100  Iteration 42745/56600 Training loss: 0.9892 0.4760 sec/batch
Epoch 76/100  Iteration 42746/56600 Training loss: 0.9892 0.4797 sec/batch
Epoch 76/100  Iteration 42747/56600 Training loss: 0.9893 0.4790 sec/batch
Epoch 76/100  Iteration 42748/56600 Training loss: 0.9894 0.4737 sec/batch
Epoch 76/100  Iteration 42749/56600 Training loss: 0.9894 0.4844 sec/batch
Epoch 76/100  Iteration 42750/56600 Training loss: 0.9894 0.4881 sec/batch
Epoch 76/100  Iteration 42751/56600 Training loss: 0.9893 0.4701 sec/batch
Epoch 76/100  Iteration 42752/56600 Training loss: 0.9893 0.4727 sec/batch
Epoch 76/100  Iteration 42753/56600 Training loss: 0.9892 0.4778 sec/batch
Epoch 76/100  Iteration 42754/56600 Training loss: 0.9892 0.4796 sec/batch
Epoch 76/100  Iteration 4

Epoch 76/100  Iteration 42852/56600 Training loss: 0.9861 0.4823 sec/batch
Epoch 76/100  Iteration 42853/56600 Training loss: 0.9860 0.4790 sec/batch
Epoch 76/100  Iteration 42854/56600 Training loss: 0.9859 0.4825 sec/batch
Epoch 76/100  Iteration 42855/56600 Training loss: 0.9859 0.4702 sec/batch
Epoch 76/100  Iteration 42856/56600 Training loss: 0.9860 0.4782 sec/batch
Epoch 76/100  Iteration 42857/56600 Training loss: 0.9860 0.4755 sec/batch
Epoch 76/100  Iteration 42858/56600 Training loss: 0.9859 0.4824 sec/batch
Epoch 76/100  Iteration 42859/56600 Training loss: 0.9858 0.4796 sec/batch
Epoch 76/100  Iteration 42860/56600 Training loss: 0.9857 0.4793 sec/batch
Epoch 76/100  Iteration 42861/56600 Training loss: 0.9856 0.4737 sec/batch
Epoch 76/100  Iteration 42862/56600 Training loss: 0.9856 0.4797 sec/batch
Epoch 76/100  Iteration 42863/56600 Training loss: 0.9856 0.4781 sec/batch
Epoch 76/100  Iteration 42864/56600 Training loss: 0.9855 0.4885 sec/batch
Epoch 76/100  Iteration 4

Epoch 76/100  Iteration 42962/56600 Training loss: 0.9835 0.4680 sec/batch
Epoch 76/100  Iteration 42963/56600 Training loss: 0.9835 0.4753 sec/batch
Epoch 76/100  Iteration 42964/56600 Training loss: 0.9835 0.4780 sec/batch
Epoch 76/100  Iteration 42965/56600 Training loss: 0.9835 0.4799 sec/batch
Epoch 76/100  Iteration 42966/56600 Training loss: 0.9835 0.4734 sec/batch
Epoch 76/100  Iteration 42967/56600 Training loss: 0.9836 0.4860 sec/batch
Epoch 76/100  Iteration 42968/56600 Training loss: 0.9836 0.4722 sec/batch
Epoch 76/100  Iteration 42969/56600 Training loss: 0.9836 0.4716 sec/batch
Epoch 76/100  Iteration 42970/56600 Training loss: 0.9836 0.4809 sec/batch
Epoch 76/100  Iteration 42971/56600 Training loss: 0.9836 0.4881 sec/batch
Epoch 76/100  Iteration 42972/56600 Training loss: 0.9836 0.4863 sec/batch
Epoch 76/100  Iteration 42973/56600 Training loss: 0.9836 0.4714 sec/batch
Epoch 76/100  Iteration 42974/56600 Training loss: 0.9837 0.4659 sec/batch
Epoch 76/100  Iteration 4

Epoch 77/100  Iteration 43072/56600 Training loss: 0.9852 0.4709 sec/batch
Epoch 77/100  Iteration 43073/56600 Training loss: 0.9845 0.4844 sec/batch
Epoch 77/100  Iteration 43074/56600 Training loss: 0.9835 0.4904 sec/batch
Epoch 77/100  Iteration 43075/56600 Training loss: 0.9830 0.4765 sec/batch
Epoch 77/100  Iteration 43076/56600 Training loss: 0.9823 0.4809 sec/batch
Epoch 77/100  Iteration 43077/56600 Training loss: 0.9817 0.4732 sec/batch
Epoch 77/100  Iteration 43078/56600 Training loss: 0.9813 0.4685 sec/batch
Epoch 77/100  Iteration 43079/56600 Training loss: 0.9812 0.4747 sec/batch
Epoch 77/100  Iteration 43080/56600 Training loss: 0.9803 0.4788 sec/batch
Epoch 77/100  Iteration 43081/56600 Training loss: 0.9797 0.4870 sec/batch
Epoch 77/100  Iteration 43082/56600 Training loss: 0.9795 0.4752 sec/batch
Epoch 77/100  Iteration 43083/56600 Training loss: 0.9792 0.4841 sec/batch
Epoch 77/100  Iteration 43084/56600 Training loss: 0.9794 0.4793 sec/batch
Epoch 77/100  Iteration 4

Epoch 77/100  Iteration 43182/56600 Training loss: 0.9832 0.4806 sec/batch
Epoch 77/100  Iteration 43183/56600 Training loss: 0.9833 0.4856 sec/batch
Epoch 77/100  Iteration 43184/56600 Training loss: 0.9835 0.4695 sec/batch
Epoch 77/100  Iteration 43185/56600 Training loss: 0.9837 0.4791 sec/batch
Epoch 77/100  Iteration 43186/56600 Training loss: 0.9837 0.4765 sec/batch
Epoch 77/100  Iteration 43187/56600 Training loss: 0.9836 0.4816 sec/batch
Epoch 77/100  Iteration 43188/56600 Training loss: 0.9836 0.4790 sec/batch
Epoch 77/100  Iteration 43189/56600 Training loss: 0.9836 0.4705 sec/batch
Epoch 77/100  Iteration 43190/56600 Training loss: 0.9837 0.4807 sec/batch
Epoch 77/100  Iteration 43191/56600 Training loss: 0.9835 0.4658 sec/batch
Epoch 77/100  Iteration 43192/56600 Training loss: 0.9834 0.4799 sec/batch
Epoch 77/100  Iteration 43193/56600 Training loss: 0.9834 0.4874 sec/batch
Epoch 77/100  Iteration 43194/56600 Training loss: 0.9834 0.4615 sec/batch
Epoch 77/100  Iteration 4

Epoch 77/100  Iteration 43292/56600 Training loss: 0.9874 0.4888 sec/batch
Epoch 77/100  Iteration 43293/56600 Training loss: 0.9873 0.4688 sec/batch
Epoch 77/100  Iteration 43294/56600 Training loss: 0.9873 0.4773 sec/batch
Epoch 77/100  Iteration 43295/56600 Training loss: 0.9872 0.4814 sec/batch
Epoch 77/100  Iteration 43296/56600 Training loss: 0.9871 0.4649 sec/batch
Epoch 77/100  Iteration 43297/56600 Training loss: 0.9871 0.4778 sec/batch
Epoch 77/100  Iteration 43298/56600 Training loss: 0.9871 0.4643 sec/batch
Epoch 77/100  Iteration 43299/56600 Training loss: 0.9871 0.4842 sec/batch
Epoch 77/100  Iteration 43300/56600 Training loss: 0.9869 0.4838 sec/batch
Epoch 77/100  Iteration 43301/56600 Training loss: 0.9869 0.4802 sec/batch
Epoch 77/100  Iteration 43302/56600 Training loss: 0.9870 0.4623 sec/batch
Epoch 77/100  Iteration 43303/56600 Training loss: 0.9872 0.4844 sec/batch
Epoch 77/100  Iteration 43304/56600 Training loss: 0.9872 0.4732 sec/batch
Epoch 77/100  Iteration 4

Epoch 77/100  Iteration 43402/56600 Training loss: 0.9846 0.4770 sec/batch
Epoch 77/100  Iteration 43403/56600 Training loss: 0.9847 0.4817 sec/batch
Epoch 77/100  Iteration 43404/56600 Training loss: 0.9846 0.4879 sec/batch
Epoch 77/100  Iteration 43405/56600 Training loss: 0.9846 0.4856 sec/batch
Epoch 77/100  Iteration 43406/56600 Training loss: 0.9846 0.4944 sec/batch
Epoch 77/100  Iteration 43407/56600 Training loss: 0.9845 0.4791 sec/batch
Epoch 77/100  Iteration 43408/56600 Training loss: 0.9845 0.4864 sec/batch
Epoch 77/100  Iteration 43409/56600 Training loss: 0.9845 0.4717 sec/batch
Epoch 77/100  Iteration 43410/56600 Training loss: 0.9845 0.4786 sec/batch
Epoch 77/100  Iteration 43411/56600 Training loss: 0.9846 0.4741 sec/batch
Epoch 77/100  Iteration 43412/56600 Training loss: 0.9845 0.4845 sec/batch
Epoch 77/100  Iteration 43413/56600 Training loss: 0.9846 0.4742 sec/batch
Epoch 77/100  Iteration 43414/56600 Training loss: 0.9846 0.4794 sec/batch
Epoch 77/100  Iteration 4

Epoch 77/100  Iteration 43512/56600 Training loss: 0.9828 0.4819 sec/batch
Epoch 77/100  Iteration 43513/56600 Training loss: 0.9828 0.4796 sec/batch
Epoch 77/100  Iteration 43514/56600 Training loss: 0.9828 0.4804 sec/batch
Epoch 77/100  Iteration 43515/56600 Training loss: 0.9828 0.4635 sec/batch
Epoch 77/100  Iteration 43516/56600 Training loss: 0.9828 0.4757 sec/batch
Epoch 77/100  Iteration 43517/56600 Training loss: 0.9827 0.4613 sec/batch
Epoch 77/100  Iteration 43518/56600 Training loss: 0.9826 0.4632 sec/batch
Epoch 77/100  Iteration 43519/56600 Training loss: 0.9826 0.4794 sec/batch
Epoch 77/100  Iteration 43520/56600 Training loss: 0.9825 0.4849 sec/batch
Epoch 77/100  Iteration 43521/56600 Training loss: 0.9825 0.4663 sec/batch
Epoch 77/100  Iteration 43522/56600 Training loss: 0.9825 0.4712 sec/batch
Epoch 77/100  Iteration 43523/56600 Training loss: 0.9824 0.4630 sec/batch
Epoch 77/100  Iteration 43524/56600 Training loss: 0.9823 0.4638 sec/batch
Epoch 77/100  Iteration 4

Epoch 78/100  Iteration 43622/56600 Training loss: 0.9895 0.4837 sec/batch
Epoch 78/100  Iteration 43623/56600 Training loss: 0.9897 0.4694 sec/batch
Epoch 78/100  Iteration 43624/56600 Training loss: 0.9901 0.4839 sec/batch
Epoch 78/100  Iteration 43625/56600 Training loss: 0.9899 0.4820 sec/batch
Epoch 78/100  Iteration 43626/56600 Training loss: 0.9896 0.4718 sec/batch
Epoch 78/100  Iteration 43627/56600 Training loss: 0.9902 0.4784 sec/batch
Epoch 78/100  Iteration 43628/56600 Training loss: 0.9904 0.4736 sec/batch
Epoch 78/100  Iteration 43629/56600 Training loss: 0.9893 0.4707 sec/batch
Epoch 78/100  Iteration 43630/56600 Training loss: 0.9893 0.4787 sec/batch
Epoch 78/100  Iteration 43631/56600 Training loss: 0.9888 0.4793 sec/batch
Epoch 78/100  Iteration 43632/56600 Training loss: 0.9880 0.4895 sec/batch
Epoch 78/100  Iteration 43633/56600 Training loss: 0.9872 0.4796 sec/batch
Epoch 78/100  Iteration 43634/56600 Training loss: 0.9861 0.4787 sec/batch
Epoch 78/100  Iteration 4

Epoch 78/100  Iteration 43732/56600 Training loss: 0.9813 0.4787 sec/batch
Epoch 78/100  Iteration 43733/56600 Training loss: 0.9817 0.4643 sec/batch
Epoch 78/100  Iteration 43734/56600 Training loss: 0.9821 0.4598 sec/batch
Epoch 78/100  Iteration 43735/56600 Training loss: 0.9823 0.4708 sec/batch
Epoch 78/100  Iteration 43736/56600 Training loss: 0.9824 0.4716 sec/batch
Epoch 78/100  Iteration 43737/56600 Training loss: 0.9825 0.4841 sec/batch
Epoch 78/100  Iteration 43738/56600 Training loss: 0.9826 0.4690 sec/batch
Epoch 78/100  Iteration 43739/56600 Training loss: 0.9828 0.4632 sec/batch
Epoch 78/100  Iteration 43740/56600 Training loss: 0.9829 0.4691 sec/batch
Epoch 78/100  Iteration 43741/56600 Training loss: 0.9829 0.4790 sec/batch
Epoch 78/100  Iteration 43742/56600 Training loss: 0.9829 0.4636 sec/batch
Epoch 78/100  Iteration 43743/56600 Training loss: 0.9829 0.4688 sec/batch
Epoch 78/100  Iteration 43744/56600 Training loss: 0.9829 0.4721 sec/batch
Epoch 78/100  Iteration 4

Epoch 78/100  Iteration 43842/56600 Training loss: 0.9874 0.4743 sec/batch
Epoch 78/100  Iteration 43843/56600 Training loss: 0.9873 0.4786 sec/batch
Epoch 78/100  Iteration 43844/56600 Training loss: 0.9872 0.4792 sec/batch
Epoch 78/100  Iteration 43845/56600 Training loss: 0.9872 0.4720 sec/batch
Epoch 78/100  Iteration 43846/56600 Training loss: 0.9871 0.4875 sec/batch
Epoch 78/100  Iteration 43847/56600 Training loss: 0.9870 0.4887 sec/batch
Epoch 78/100  Iteration 43848/56600 Training loss: 0.9870 0.4785 sec/batch
Epoch 78/100  Iteration 43849/56600 Training loss: 0.9869 0.4901 sec/batch
Epoch 78/100  Iteration 43850/56600 Training loss: 0.9868 0.4897 sec/batch
Epoch 78/100  Iteration 43851/56600 Training loss: 0.9867 0.4658 sec/batch
Epoch 78/100  Iteration 43852/56600 Training loss: 0.9866 0.4814 sec/batch
Epoch 78/100  Iteration 43853/56600 Training loss: 0.9867 0.4750 sec/batch
Epoch 78/100  Iteration 43854/56600 Training loss: 0.9868 0.4821 sec/batch
Epoch 78/100  Iteration 4

Epoch 78/100  Iteration 43952/56600 Training loss: 0.9840 0.4703 sec/batch
Epoch 78/100  Iteration 43953/56600 Training loss: 0.9839 0.4792 sec/batch
Epoch 78/100  Iteration 43954/56600 Training loss: 0.9838 0.4770 sec/batch
Epoch 78/100  Iteration 43955/56600 Training loss: 0.9838 0.4813 sec/batch
Epoch 78/100  Iteration 43956/56600 Training loss: 0.9837 0.4797 sec/batch
Epoch 78/100  Iteration 43957/56600 Training loss: 0.9836 0.4794 sec/batch
Epoch 78/100  Iteration 43958/56600 Training loss: 0.9836 0.4723 sec/batch
Epoch 78/100  Iteration 43959/56600 Training loss: 0.9836 0.4852 sec/batch
Epoch 78/100  Iteration 43960/56600 Training loss: 0.9836 0.4688 sec/batch
Epoch 78/100  Iteration 43961/56600 Training loss: 0.9835 0.4580 sec/batch
Epoch 78/100  Iteration 43962/56600 Training loss: 0.9835 0.4644 sec/batch
Epoch 78/100  Iteration 43963/56600 Training loss: 0.9835 0.4788 sec/batch
Epoch 78/100  Iteration 43964/56600 Training loss: 0.9835 0.4801 sec/batch
Epoch 78/100  Iteration 4

Epoch 78/100  Iteration 44061/56600 Training loss: 0.9825 0.4629 sec/batch
Epoch 78/100  Iteration 44062/56600 Training loss: 0.9825 0.4882 sec/batch
Epoch 78/100  Iteration 44063/56600 Training loss: 0.9825 0.4596 sec/batch
Epoch 78/100  Iteration 44064/56600 Training loss: 0.9825 0.4687 sec/batch
Epoch 78/100  Iteration 44065/56600 Training loss: 0.9825 0.4870 sec/batch
Epoch 78/100  Iteration 44066/56600 Training loss: 0.9825 0.4660 sec/batch
Epoch 78/100  Iteration 44067/56600 Training loss: 0.9825 0.4636 sec/batch
Epoch 78/100  Iteration 44068/56600 Training loss: 0.9824 0.4626 sec/batch
Epoch 78/100  Iteration 44069/56600 Training loss: 0.9824 0.4768 sec/batch
Epoch 78/100  Iteration 44070/56600 Training loss: 0.9824 0.4661 sec/batch
Epoch 78/100  Iteration 44071/56600 Training loss: 0.9824 0.4636 sec/batch
Epoch 78/100  Iteration 44072/56600 Training loss: 0.9824 0.4641 sec/batch
Epoch 78/100  Iteration 44073/56600 Training loss: 0.9824 0.4688 sec/batch
Epoch 78/100  Iteration 4

Epoch 79/100  Iteration 44171/56600 Training loss: 0.9884 0.4710 sec/batch
Epoch 79/100  Iteration 44172/56600 Training loss: 0.9875 0.4564 sec/batch
Epoch 79/100  Iteration 44173/56600 Training loss: 0.9866 0.4846 sec/batch
Epoch 79/100  Iteration 44174/56600 Training loss: 0.9847 0.4795 sec/batch
Epoch 79/100  Iteration 44175/56600 Training loss: 0.9840 0.4817 sec/batch
Epoch 79/100  Iteration 44176/56600 Training loss: 0.9839 0.4626 sec/batch
Epoch 79/100  Iteration 44177/56600 Training loss: 0.9837 0.4626 sec/batch
Epoch 79/100  Iteration 44178/56600 Training loss: 0.9828 0.4763 sec/batch
Epoch 79/100  Iteration 44179/56600 Training loss: 0.9819 0.4765 sec/batch
Epoch 79/100  Iteration 44180/56600 Training loss: 0.9815 0.4801 sec/batch
Epoch 79/100  Iteration 44181/56600 Training loss: 0.9817 0.4787 sec/batch
Epoch 79/100  Iteration 44182/56600 Training loss: 0.9834 0.4736 sec/batch
Epoch 79/100  Iteration 44183/56600 Training loss: 0.9853 0.4790 sec/batch
Epoch 79/100  Iteration 4

Epoch 79/100  Iteration 44281/56600 Training loss: 0.9780 0.4793 sec/batch
Epoch 79/100  Iteration 44282/56600 Training loss: 0.9779 0.4950 sec/batch
Epoch 79/100  Iteration 44283/56600 Training loss: 0.9776 0.4815 sec/batch
Epoch 79/100  Iteration 44284/56600 Training loss: 0.9775 0.4765 sec/batch
Epoch 79/100  Iteration 44285/56600 Training loss: 0.9776 0.4812 sec/batch
Epoch 79/100  Iteration 44286/56600 Training loss: 0.9780 0.4867 sec/batch
Epoch 79/100  Iteration 44287/56600 Training loss: 0.9783 0.4797 sec/batch
Epoch 79/100  Iteration 44288/56600 Training loss: 0.9783 0.4880 sec/batch
Epoch 79/100  Iteration 44289/56600 Training loss: 0.9785 0.4683 sec/batch
Epoch 79/100  Iteration 44290/56600 Training loss: 0.9786 0.4805 sec/batch
Epoch 79/100  Iteration 44291/56600 Training loss: 0.9788 0.4787 sec/batch
Epoch 79/100  Iteration 44292/56600 Training loss: 0.9788 0.4794 sec/batch
Epoch 79/100  Iteration 44293/56600 Training loss: 0.9790 0.4886 sec/batch
Epoch 79/100  Iteration 4

Epoch 79/100  Iteration 44391/56600 Training loss: 0.9856 0.4794 sec/batch
Epoch 79/100  Iteration 44392/56600 Training loss: 0.9858 0.4783 sec/batch
Epoch 79/100  Iteration 44393/56600 Training loss: 0.9860 0.4751 sec/batch
Epoch 79/100  Iteration 44394/56600 Training loss: 0.9862 0.4785 sec/batch
Epoch 79/100  Iteration 44395/56600 Training loss: 0.9863 0.4783 sec/batch
Epoch 79/100  Iteration 44396/56600 Training loss: 0.9863 0.4797 sec/batch
Epoch 79/100  Iteration 44397/56600 Training loss: 0.9864 0.4862 sec/batch
Epoch 79/100  Iteration 44398/56600 Training loss: 0.9864 0.4642 sec/batch
Epoch 79/100  Iteration 44399/56600 Training loss: 0.9862 0.4795 sec/batch
Epoch 79/100  Iteration 44400/56600 Training loss: 0.9861 0.4745 sec/batch
Epoch 79/100  Iteration 44401/56600 Training loss: 0.9861 0.4791 sec/batch
Epoch 79/100  Iteration 44402/56600 Training loss: 0.9861 0.4792 sec/batch
Epoch 79/100  Iteration 44403/56600 Training loss: 0.9860 0.4688 sec/batch
Epoch 79/100  Iteration 4

Epoch 79/100  Iteration 44501/56600 Training loss: 0.9829 0.4775 sec/batch
Epoch 79/100  Iteration 44502/56600 Training loss: 0.9828 0.4813 sec/batch
Epoch 79/100  Iteration 44503/56600 Training loss: 0.9828 0.4770 sec/batch
Epoch 79/100  Iteration 44504/56600 Training loss: 0.9829 0.4814 sec/batch
Epoch 79/100  Iteration 44505/56600 Training loss: 0.9831 0.4798 sec/batch
Epoch 79/100  Iteration 44506/56600 Training loss: 0.9831 0.4789 sec/batch
Epoch 79/100  Iteration 44507/56600 Training loss: 0.9831 0.4731 sec/batch
Epoch 79/100  Iteration 44508/56600 Training loss: 0.9831 0.4696 sec/batch
Epoch 79/100  Iteration 44509/56600 Training loss: 0.9831 0.4782 sec/batch
Epoch 79/100  Iteration 44510/56600 Training loss: 0.9830 0.4790 sec/batch
Epoch 79/100  Iteration 44511/56600 Training loss: 0.9830 0.4786 sec/batch
Epoch 79/100  Iteration 44512/56600 Training loss: 0.9831 0.4793 sec/batch
Epoch 79/100  Iteration 44513/56600 Training loss: 0.9830 0.4748 sec/batch
Epoch 79/100  Iteration 4

Epoch 79/100  Iteration 44611/56600 Training loss: 0.9801 0.4803 sec/batch
Epoch 79/100  Iteration 44612/56600 Training loss: 0.9801 0.4787 sec/batch
Epoch 79/100  Iteration 44613/56600 Training loss: 0.9801 0.4870 sec/batch
Epoch 79/100  Iteration 44614/56600 Training loss: 0.9801 0.4863 sec/batch
Epoch 79/100  Iteration 44615/56600 Training loss: 0.9801 0.4873 sec/batch
Epoch 79/100  Iteration 44616/56600 Training loss: 0.9802 0.4810 sec/batch
Epoch 79/100  Iteration 44617/56600 Training loss: 0.9802 0.4796 sec/batch
Epoch 79/100  Iteration 44618/56600 Training loss: 0.9802 0.4946 sec/batch
Epoch 79/100  Iteration 44619/56600 Training loss: 0.9804 0.4781 sec/batch
Epoch 79/100  Iteration 44620/56600 Training loss: 0.9806 0.4791 sec/batch
Epoch 79/100  Iteration 44621/56600 Training loss: 0.9806 0.4844 sec/batch
Epoch 79/100  Iteration 44622/56600 Training loss: 0.9806 0.4808 sec/batch
Epoch 79/100  Iteration 44623/56600 Training loss: 0.9805 0.4783 sec/batch
Epoch 79/100  Iteration 4

Epoch 80/100  Iteration 44721/56600 Training loss: 1.0224 0.4780 sec/batch
Epoch 80/100  Iteration 44722/56600 Training loss: 1.0146 0.4759 sec/batch
Epoch 80/100  Iteration 44723/56600 Training loss: 1.0122 0.4691 sec/batch
Epoch 80/100  Iteration 44724/56600 Training loss: 1.0089 0.4716 sec/batch
Epoch 80/100  Iteration 44725/56600 Training loss: 1.0077 0.4640 sec/batch
Epoch 80/100  Iteration 44726/56600 Training loss: 1.0081 0.4737 sec/batch
Epoch 80/100  Iteration 44727/56600 Training loss: 1.0074 0.4803 sec/batch
Epoch 80/100  Iteration 44728/56600 Training loss: 1.0042 0.4777 sec/batch
Epoch 80/100  Iteration 44729/56600 Training loss: 1.0024 0.4688 sec/batch
Epoch 80/100  Iteration 44730/56600 Training loss: 1.0008 0.4697 sec/batch
Epoch 80/100  Iteration 44731/56600 Training loss: 0.9987 0.4702 sec/batch
Epoch 80/100  Iteration 44732/56600 Training loss: 0.9966 0.4725 sec/batch
Epoch 80/100  Iteration 44733/56600 Training loss: 0.9940 0.4805 sec/batch
Epoch 80/100  Iteration 4

Epoch 80/100  Iteration 44831/56600 Training loss: 0.9740 0.4696 sec/batch
Epoch 80/100  Iteration 44832/56600 Training loss: 0.9743 0.4689 sec/batch
Epoch 80/100  Iteration 44833/56600 Training loss: 0.9745 0.4873 sec/batch
Epoch 80/100  Iteration 44834/56600 Training loss: 0.9751 0.4743 sec/batch
Epoch 80/100  Iteration 44835/56600 Training loss: 0.9757 0.4809 sec/batch
Epoch 80/100  Iteration 44836/56600 Training loss: 0.9763 0.4790 sec/batch
Epoch 80/100  Iteration 44837/56600 Training loss: 0.9764 0.4794 sec/batch
Epoch 80/100  Iteration 44838/56600 Training loss: 0.9765 0.4726 sec/batch
Epoch 80/100  Iteration 44839/56600 Training loss: 0.9768 0.4851 sec/batch
Epoch 80/100  Iteration 44840/56600 Training loss: 0.9769 0.4791 sec/batch
Epoch 80/100  Iteration 44841/56600 Training loss: 0.9770 0.4635 sec/batch
Epoch 80/100  Iteration 44842/56600 Training loss: 0.9770 0.4753 sec/batch
Epoch 80/100  Iteration 44843/56600 Training loss: 0.9771 0.4844 sec/batch
Epoch 80/100  Iteration 4

Epoch 80/100  Iteration 44941/56600 Training loss: 0.9827 0.4793 sec/batch
Epoch 80/100  Iteration 44942/56600 Training loss: 0.9826 0.4794 sec/batch
Epoch 80/100  Iteration 44943/56600 Training loss: 0.9827 0.4788 sec/batch
Epoch 80/100  Iteration 44944/56600 Training loss: 0.9829 0.4691 sec/batch
Epoch 80/100  Iteration 44945/56600 Training loss: 0.9831 0.4738 sec/batch
Epoch 80/100  Iteration 44946/56600 Training loss: 0.9832 0.4788 sec/batch
Epoch 80/100  Iteration 44947/56600 Training loss: 0.9834 0.4900 sec/batch
Epoch 80/100  Iteration 44948/56600 Training loss: 0.9835 0.4762 sec/batch
Epoch 80/100  Iteration 44949/56600 Training loss: 0.9837 0.4857 sec/batch
Epoch 80/100  Iteration 44950/56600 Training loss: 0.9838 0.4789 sec/batch
Epoch 80/100  Iteration 44951/56600 Training loss: 0.9838 0.4739 sec/batch
Epoch 80/100  Iteration 44952/56600 Training loss: 0.9839 0.4799 sec/batch
Epoch 80/100  Iteration 44953/56600 Training loss: 0.9839 0.4785 sec/batch
Epoch 80/100  Iteration 4

Epoch 80/100  Iteration 45051/56600 Training loss: 0.9828 0.4922 sec/batch
Epoch 80/100  Iteration 45052/56600 Training loss: 0.9829 0.4795 sec/batch
Epoch 80/100  Iteration 45053/56600 Training loss: 0.9828 0.4636 sec/batch
Epoch 80/100  Iteration 45054/56600 Training loss: 0.9828 0.4794 sec/batch
Epoch 80/100  Iteration 45055/56600 Training loss: 0.9826 0.4875 sec/batch
Epoch 80/100  Iteration 45056/56600 Training loss: 0.9826 0.4778 sec/batch
Epoch 80/100  Iteration 45057/56600 Training loss: 0.9824 0.4807 sec/batch
Epoch 80/100  Iteration 45058/56600 Training loss: 0.9824 0.4871 sec/batch
Epoch 80/100  Iteration 45059/56600 Training loss: 0.9824 0.4868 sec/batch
Epoch 80/100  Iteration 45060/56600 Training loss: 0.9824 0.4977 sec/batch
Epoch 80/100  Iteration 45061/56600 Training loss: 0.9823 0.4800 sec/batch
Epoch 80/100  Iteration 45062/56600 Training loss: 0.9823 0.4810 sec/batch
Epoch 80/100  Iteration 45063/56600 Training loss: 0.9822 0.4779 sec/batch
Epoch 80/100  Iteration 4

Epoch 80/100  Iteration 45161/56600 Training loss: 0.9793 0.4789 sec/batch
Epoch 80/100  Iteration 45162/56600 Training loss: 0.9792 0.4796 sec/batch
Epoch 80/100  Iteration 45163/56600 Training loss: 0.9793 0.4998 sec/batch
Epoch 80/100  Iteration 45164/56600 Training loss: 0.9793 0.4790 sec/batch
Epoch 80/100  Iteration 45165/56600 Training loss: 0.9793 0.4738 sec/batch
Epoch 80/100  Iteration 45166/56600 Training loss: 0.9793 0.4851 sec/batch
Epoch 80/100  Iteration 45167/56600 Training loss: 0.9792 0.4791 sec/batch
Epoch 80/100  Iteration 45168/56600 Training loss: 0.9792 0.4790 sec/batch
Epoch 80/100  Iteration 45169/56600 Training loss: 0.9792 0.4736 sec/batch
Epoch 80/100  Iteration 45170/56600 Training loss: 0.9792 0.4796 sec/batch
Epoch 80/100  Iteration 45171/56600 Training loss: 0.9791 0.4947 sec/batch
Epoch 80/100  Iteration 45172/56600 Training loss: 0.9791 0.4802 sec/batch
Epoch 80/100  Iteration 45173/56600 Training loss: 0.9790 0.4883 sec/batch
Epoch 80/100  Iteration 4

Epoch 80/100  Iteration 45271/56600 Training loss: 0.9786 0.4774 sec/batch
Epoch 80/100  Iteration 45272/56600 Training loss: 0.9786 0.4765 sec/batch
Epoch 80/100  Iteration 45273/56600 Training loss: 0.9787 0.4940 sec/batch
Epoch 80/100  Iteration 45274/56600 Training loss: 0.9787 0.4799 sec/batch
Epoch 80/100  Iteration 45275/56600 Training loss: 0.9787 0.4940 sec/batch
Epoch 80/100  Iteration 45276/56600 Training loss: 0.9788 0.4718 sec/batch
Epoch 80/100  Iteration 45277/56600 Training loss: 0.9787 0.4789 sec/batch
Epoch 80/100  Iteration 45278/56600 Training loss: 0.9788 0.4805 sec/batch
Epoch 80/100  Iteration 45279/56600 Training loss: 0.9789 0.4879 sec/batch
Epoch 80/100  Iteration 45280/56600 Training loss: 0.9790 0.4928 sec/batch
Epoch 81/100  Iteration 45281/56600 Training loss: 1.1288 0.4844 sec/batch
Epoch 81/100  Iteration 45282/56600 Training loss: 1.0688 0.4793 sec/batch
Epoch 81/100  Iteration 45283/56600 Training loss: 1.0555 0.4790 sec/batch
Epoch 81/100  Iteration 4

Epoch 81/100  Iteration 45381/56600 Training loss: 0.9712 0.4796 sec/batch
Epoch 81/100  Iteration 45382/56600 Training loss: 0.9717 0.4791 sec/batch
Epoch 81/100  Iteration 45383/56600 Training loss: 0.9716 0.4897 sec/batch
Epoch 81/100  Iteration 45384/56600 Training loss: 0.9718 0.4787 sec/batch
Epoch 81/100  Iteration 45385/56600 Training loss: 0.9718 0.4797 sec/batch
Epoch 81/100  Iteration 45386/56600 Training loss: 0.9717 0.4939 sec/batch
Epoch 81/100  Iteration 45387/56600 Training loss: 0.9713 0.4779 sec/batch
Epoch 81/100  Iteration 45388/56600 Training loss: 0.9713 0.4785 sec/batch
Epoch 81/100  Iteration 45389/56600 Training loss: 0.9715 0.4869 sec/batch
Epoch 81/100  Iteration 45390/56600 Training loss: 0.9716 0.4874 sec/batch
Epoch 81/100  Iteration 45391/56600 Training loss: 0.9714 0.4941 sec/batch
Epoch 81/100  Iteration 45392/56600 Training loss: 0.9713 0.4758 sec/batch
Epoch 81/100  Iteration 45393/56600 Training loss: 0.9718 0.4780 sec/batch
Epoch 81/100  Iteration 4

Epoch 81/100  Iteration 45491/56600 Training loss: 0.9816 0.4798 sec/batch
Epoch 81/100  Iteration 45492/56600 Training loss: 0.9815 0.4778 sec/batch
Epoch 81/100  Iteration 45493/56600 Training loss: 0.9813 0.4634 sec/batch
Epoch 81/100  Iteration 45494/56600 Training loss: 0.9811 0.4799 sec/batch
Epoch 81/100  Iteration 45495/56600 Training loss: 0.9811 0.4692 sec/batch
Epoch 81/100  Iteration 45496/56600 Training loss: 0.9811 0.4819 sec/batch
Epoch 81/100  Iteration 45497/56600 Training loss: 0.9810 0.4794 sec/batch
Epoch 81/100  Iteration 45498/56600 Training loss: 0.9810 0.4784 sec/batch
Epoch 81/100  Iteration 45499/56600 Training loss: 0.9812 0.4795 sec/batch
Epoch 81/100  Iteration 45500/56600 Training loss: 0.9814 0.4722 sec/batch
Epoch 81/100  Iteration 45501/56600 Training loss: 0.9814 0.4648 sec/batch
Epoch 81/100  Iteration 45502/56600 Training loss: 0.9814 0.4638 sec/batch
Epoch 81/100  Iteration 45503/56600 Training loss: 0.9813 0.4642 sec/batch
Epoch 81/100  Iteration 4

Epoch 81/100  Iteration 45601/56600 Training loss: 0.9824 0.4775 sec/batch
Epoch 81/100  Iteration 45602/56600 Training loss: 0.9822 0.4796 sec/batch
Epoch 81/100  Iteration 45603/56600 Training loss: 0.9821 0.4796 sec/batch
Epoch 81/100  Iteration 45604/56600 Training loss: 0.9820 0.4795 sec/batch
Epoch 81/100  Iteration 45605/56600 Training loss: 0.9821 0.4787 sec/batch
Epoch 81/100  Iteration 45606/56600 Training loss: 0.9821 0.4849 sec/batch
Epoch 81/100  Iteration 45607/56600 Training loss: 0.9820 0.4829 sec/batch
Epoch 81/100  Iteration 45608/56600 Training loss: 0.9819 0.4794 sec/batch
Epoch 81/100  Iteration 45609/56600 Training loss: 0.9820 0.4762 sec/batch
Epoch 81/100  Iteration 45610/56600 Training loss: 0.9819 0.4744 sec/batch
Epoch 81/100  Iteration 45611/56600 Training loss: 0.9819 0.4710 sec/batch
Epoch 81/100  Iteration 45612/56600 Training loss: 0.9820 0.4870 sec/batch
Epoch 81/100  Iteration 45613/56600 Training loss: 0.9821 0.4814 sec/batch
Epoch 81/100  Iteration 4

Epoch 81/100  Iteration 45711/56600 Training loss: 0.9791 0.4674 sec/batch
Epoch 81/100  Iteration 45712/56600 Training loss: 0.9790 0.4748 sec/batch
Epoch 81/100  Iteration 45713/56600 Training loss: 0.9789 0.4743 sec/batch
Epoch 81/100  Iteration 45714/56600 Training loss: 0.9788 0.4669 sec/batch
Epoch 81/100  Iteration 45715/56600 Training loss: 0.9788 0.4691 sec/batch
Epoch 81/100  Iteration 45716/56600 Training loss: 0.9788 0.4738 sec/batch
Epoch 81/100  Iteration 45717/56600 Training loss: 0.9788 0.4894 sec/batch
Epoch 81/100  Iteration 45718/56600 Training loss: 0.9787 0.4808 sec/batch
Epoch 81/100  Iteration 45719/56600 Training loss: 0.9786 0.4773 sec/batch
Epoch 81/100  Iteration 45720/56600 Training loss: 0.9785 0.4799 sec/batch
Epoch 81/100  Iteration 45721/56600 Training loss: 0.9784 0.4773 sec/batch
Epoch 81/100  Iteration 45722/56600 Training loss: 0.9784 0.4699 sec/batch
Epoch 81/100  Iteration 45723/56600 Training loss: 0.9785 0.4886 sec/batch
Epoch 81/100  Iteration 4

Epoch 81/100  Iteration 45821/56600 Training loss: 0.9779 0.4789 sec/batch
Epoch 81/100  Iteration 45822/56600 Training loss: 0.9779 0.4743 sec/batch
Epoch 81/100  Iteration 45823/56600 Training loss: 0.9779 0.4942 sec/batch
Epoch 81/100  Iteration 45824/56600 Training loss: 0.9779 0.5022 sec/batch
Epoch 81/100  Iteration 45825/56600 Training loss: 0.9779 0.4719 sec/batch
Epoch 81/100  Iteration 45826/56600 Training loss: 0.9779 0.4892 sec/batch
Epoch 81/100  Iteration 45827/56600 Training loss: 0.9779 0.4834 sec/batch
Epoch 81/100  Iteration 45828/56600 Training loss: 0.9779 0.4742 sec/batch
Epoch 81/100  Iteration 45829/56600 Training loss: 0.9778 0.4941 sec/batch
Epoch 81/100  Iteration 45830/56600 Training loss: 0.9778 0.4805 sec/batch
Epoch 81/100  Iteration 45831/56600 Training loss: 0.9777 0.4942 sec/batch
Epoch 81/100  Iteration 45832/56600 Training loss: 0.9777 0.4787 sec/batch
Epoch 81/100  Iteration 45833/56600 Training loss: 0.9778 0.4793 sec/batch
Epoch 81/100  Iteration 4

Epoch 82/100  Iteration 45931/56600 Training loss: 0.9702 0.4842 sec/batch
Epoch 82/100  Iteration 45932/56600 Training loss: 0.9699 0.4951 sec/batch
Epoch 82/100  Iteration 45933/56600 Training loss: 0.9695 0.4654 sec/batch
Epoch 82/100  Iteration 45934/56600 Training loss: 0.9691 0.4804 sec/batch
Epoch 82/100  Iteration 45935/56600 Training loss: 0.9692 0.4881 sec/batch
Epoch 82/100  Iteration 45936/56600 Training loss: 0.9694 0.4790 sec/batch
Epoch 82/100  Iteration 45937/56600 Training loss: 0.9694 0.4797 sec/batch
Epoch 82/100  Iteration 45938/56600 Training loss: 0.9693 0.4793 sec/batch
Epoch 82/100  Iteration 45939/56600 Training loss: 0.9691 0.4784 sec/batch
Epoch 82/100  Iteration 45940/56600 Training loss: 0.9690 0.4811 sec/batch
Epoch 82/100  Iteration 45941/56600 Training loss: 0.9689 0.4759 sec/batch
Epoch 82/100  Iteration 45942/56600 Training loss: 0.9686 0.4786 sec/batch
Epoch 82/100  Iteration 45943/56600 Training loss: 0.9682 0.4788 sec/batch
Epoch 82/100  Iteration 4

Epoch 82/100  Iteration 46040/56600 Training loss: 0.9796 0.4673 sec/batch
Epoch 82/100  Iteration 46041/56600 Training loss: 0.9799 0.4806 sec/batch
Epoch 82/100  Iteration 46042/56600 Training loss: 0.9799 0.4686 sec/batch
Epoch 82/100  Iteration 46043/56600 Training loss: 0.9800 0.4743 sec/batch
Epoch 82/100  Iteration 46044/56600 Training loss: 0.9802 0.4691 sec/batch
Epoch 82/100  Iteration 46045/56600 Training loss: 0.9804 0.4839 sec/batch
Epoch 82/100  Iteration 46046/56600 Training loss: 0.9804 0.4767 sec/batch
Epoch 82/100  Iteration 46047/56600 Training loss: 0.9803 0.4815 sec/batch
Epoch 82/100  Iteration 46048/56600 Training loss: 0.9805 0.4793 sec/batch
Epoch 82/100  Iteration 46049/56600 Training loss: 0.9806 0.4851 sec/batch
Epoch 82/100  Iteration 46050/56600 Training loss: 0.9807 0.4663 sec/batch
Epoch 82/100  Iteration 46051/56600 Training loss: 0.9809 0.4800 sec/batch
Epoch 82/100  Iteration 46052/56600 Training loss: 0.9811 0.4828 sec/batch
Epoch 82/100  Iteration 4

Epoch 82/100  Iteration 46150/56600 Training loss: 0.9827 0.4797 sec/batch
Epoch 82/100  Iteration 46151/56600 Training loss: 0.9826 0.4790 sec/batch
Epoch 82/100  Iteration 46152/56600 Training loss: 0.9826 0.4635 sec/batch
Epoch 82/100  Iteration 46153/56600 Training loss: 0.9824 0.4632 sec/batch
Epoch 82/100  Iteration 46154/56600 Training loss: 0.9822 0.4745 sec/batch
Epoch 82/100  Iteration 46155/56600 Training loss: 0.9822 0.4635 sec/batch
Epoch 82/100  Iteration 46156/56600 Training loss: 0.9822 0.4787 sec/batch
Epoch 82/100  Iteration 46157/56600 Training loss: 0.9821 0.4879 sec/batch
Epoch 82/100  Iteration 46158/56600 Training loss: 0.9821 0.4626 sec/batch
Epoch 82/100  Iteration 46159/56600 Training loss: 0.9820 0.4663 sec/batch
Epoch 82/100  Iteration 46160/56600 Training loss: 0.9819 0.4760 sec/batch
Epoch 82/100  Iteration 46161/56600 Training loss: 0.9819 0.4819 sec/batch
Epoch 82/100  Iteration 46162/56600 Training loss: 0.9819 0.4785 sec/batch
Epoch 82/100  Iteration 4

Epoch 82/100  Iteration 46260/56600 Training loss: 0.9787 0.4824 sec/batch
Epoch 82/100  Iteration 46261/56600 Training loss: 0.9787 0.4768 sec/batch
Epoch 82/100  Iteration 46262/56600 Training loss: 0.9786 0.4765 sec/batch
Epoch 82/100  Iteration 46263/56600 Training loss: 0.9786 0.4871 sec/batch
Epoch 82/100  Iteration 46264/56600 Training loss: 0.9786 0.4868 sec/batch
Epoch 82/100  Iteration 46265/56600 Training loss: 0.9787 0.4800 sec/batch
Epoch 82/100  Iteration 46266/56600 Training loss: 0.9787 0.4868 sec/batch
Epoch 82/100  Iteration 46267/56600 Training loss: 0.9786 0.4774 sec/batch
Epoch 82/100  Iteration 46268/56600 Training loss: 0.9786 0.4807 sec/batch
Epoch 82/100  Iteration 46269/56600 Training loss: 0.9786 0.4794 sec/batch
Epoch 82/100  Iteration 46270/56600 Training loss: 0.9787 0.4642 sec/batch
Epoch 82/100  Iteration 46271/56600 Training loss: 0.9786 0.4784 sec/batch
Epoch 82/100  Iteration 46272/56600 Training loss: 0.9786 0.4794 sec/batch
Epoch 82/100  Iteration 4

Epoch 82/100  Iteration 46370/56600 Training loss: 0.9772 0.4896 sec/batch
Epoch 82/100  Iteration 46371/56600 Training loss: 0.9772 0.4795 sec/batch
Epoch 82/100  Iteration 46372/56600 Training loss: 0.9772 0.4784 sec/batch
Epoch 82/100  Iteration 46373/56600 Training loss: 0.9773 0.4801 sec/batch
Epoch 82/100  Iteration 46374/56600 Training loss: 0.9773 0.4743 sec/batch
Epoch 82/100  Iteration 46375/56600 Training loss: 0.9773 0.4844 sec/batch
Epoch 82/100  Iteration 46376/56600 Training loss: 0.9773 0.4741 sec/batch
Epoch 82/100  Iteration 46377/56600 Training loss: 0.9773 0.4943 sec/batch
Epoch 82/100  Iteration 46378/56600 Training loss: 0.9773 0.4733 sec/batch
Epoch 82/100  Iteration 46379/56600 Training loss: 0.9772 0.4850 sec/batch
Epoch 82/100  Iteration 46380/56600 Training loss: 0.9771 0.4940 sec/batch
Epoch 82/100  Iteration 46381/56600 Training loss: 0.9771 0.4805 sec/batch
Epoch 82/100  Iteration 46382/56600 Training loss: 0.9771 0.4783 sec/batch
Epoch 82/100  Iteration 4

Epoch 83/100  Iteration 46480/56600 Training loss: 0.9718 0.4844 sec/batch
Epoch 83/100  Iteration 46481/56600 Training loss: 0.9719 0.4802 sec/batch
Epoch 83/100  Iteration 46482/56600 Training loss: 0.9713 0.4739 sec/batch
Epoch 83/100  Iteration 46483/56600 Training loss: 0.9712 0.4915 sec/batch
Epoch 83/100  Iteration 46484/56600 Training loss: 0.9705 0.4760 sec/batch
Epoch 83/100  Iteration 46485/56600 Training loss: 0.9699 0.4724 sec/batch
Epoch 83/100  Iteration 46486/56600 Training loss: 0.9706 0.4806 sec/batch
Epoch 83/100  Iteration 46487/56600 Training loss: 0.9708 0.4752 sec/batch
Epoch 83/100  Iteration 46488/56600 Training loss: 0.9704 0.4780 sec/batch
Epoch 83/100  Iteration 46489/56600 Training loss: 0.9701 0.4747 sec/batch
Epoch 83/100  Iteration 46490/56600 Training loss: 0.9701 0.4813 sec/batch
Epoch 83/100  Iteration 46491/56600 Training loss: 0.9697 0.4884 sec/batch
Epoch 83/100  Iteration 46492/56600 Training loss: 0.9697 0.4778 sec/batch
Epoch 83/100  Iteration 4

Epoch 83/100  Iteration 46590/56600 Training loss: 0.9762 0.4805 sec/batch
Epoch 83/100  Iteration 46591/56600 Training loss: 0.9761 0.4933 sec/batch
Epoch 83/100  Iteration 46592/56600 Training loss: 0.9761 0.4967 sec/batch
Epoch 83/100  Iteration 46593/56600 Training loss: 0.9761 0.4790 sec/batch
Epoch 83/100  Iteration 46594/56600 Training loss: 0.9762 0.4787 sec/batch
Epoch 83/100  Iteration 46595/56600 Training loss: 0.9763 0.4781 sec/batch
Epoch 83/100  Iteration 46596/56600 Training loss: 0.9764 0.4753 sec/batch
Epoch 83/100  Iteration 46597/56600 Training loss: 0.9764 0.4934 sec/batch
Epoch 83/100  Iteration 46598/56600 Training loss: 0.9766 0.4954 sec/batch
Epoch 83/100  Iteration 46599/56600 Training loss: 0.9767 0.4792 sec/batch
Epoch 83/100  Iteration 46600/56600 Training loss: 0.9770 0.4781 sec/batch
Epoch 83/100  Iteration 46601/56600 Training loss: 0.9769 0.4798 sec/batch
Epoch 83/100  Iteration 46602/56600 Training loss: 0.9770 0.4907 sec/batch
Epoch 83/100  Iteration 4

Epoch 83/100  Iteration 46700/56600 Training loss: 0.9799 0.4840 sec/batch
Epoch 83/100  Iteration 46701/56600 Training loss: 0.9799 0.4716 sec/batch
Epoch 83/100  Iteration 46702/56600 Training loss: 0.9800 0.4794 sec/batch
Epoch 83/100  Iteration 46703/56600 Training loss: 0.9800 0.4956 sec/batch
Epoch 83/100  Iteration 46704/56600 Training loss: 0.9802 0.4790 sec/batch
Epoch 83/100  Iteration 46705/56600 Training loss: 0.9804 0.4853 sec/batch
Epoch 83/100  Iteration 46706/56600 Training loss: 0.9804 0.4796 sec/batch
Epoch 83/100  Iteration 46707/56600 Training loss: 0.9806 0.4956 sec/batch
Epoch 83/100  Iteration 46708/56600 Training loss: 0.9806 0.4890 sec/batch
Epoch 83/100  Iteration 46709/56600 Training loss: 0.9807 0.4799 sec/batch
Epoch 83/100  Iteration 46710/56600 Training loss: 0.9808 0.4790 sec/batch
Epoch 83/100  Iteration 46711/56600 Training loss: 0.9808 0.4936 sec/batch
Epoch 83/100  Iteration 46712/56600 Training loss: 0.9807 0.4844 sec/batch
Epoch 83/100  Iteration 4

Epoch 83/100  Iteration 46810/56600 Training loss: 0.9776 0.4705 sec/batch
Epoch 83/100  Iteration 46811/56600 Training loss: 0.9778 0.4626 sec/batch
Epoch 83/100  Iteration 46812/56600 Training loss: 0.9778 0.4758 sec/batch
Epoch 83/100  Iteration 46813/56600 Training loss: 0.9777 0.4980 sec/batch
Epoch 83/100  Iteration 46814/56600 Training loss: 0.9776 0.4793 sec/batch
Epoch 83/100  Iteration 46815/56600 Training loss: 0.9776 0.4688 sec/batch
Epoch 83/100  Iteration 46816/56600 Training loss: 0.9775 0.4725 sec/batch
Epoch 83/100  Iteration 46817/56600 Training loss: 0.9775 0.4650 sec/batch
Epoch 83/100  Iteration 46818/56600 Training loss: 0.9776 0.4822 sec/batch
Epoch 83/100  Iteration 46819/56600 Training loss: 0.9776 0.4727 sec/batch
Epoch 83/100  Iteration 46820/56600 Training loss: 0.9775 0.4666 sec/batch
Epoch 83/100  Iteration 46821/56600 Training loss: 0.9774 0.4795 sec/batch
Epoch 83/100  Iteration 46822/56600 Training loss: 0.9773 0.4786 sec/batch
Epoch 83/100  Iteration 4

Epoch 83/100  Iteration 46920/56600 Training loss: 0.9754 0.4746 sec/batch
Epoch 83/100  Iteration 46921/56600 Training loss: 0.9753 0.4833 sec/batch
Epoch 83/100  Iteration 46922/56600 Training loss: 0.9754 0.4793 sec/batch
Epoch 83/100  Iteration 46923/56600 Training loss: 0.9753 0.4790 sec/batch
Epoch 83/100  Iteration 46924/56600 Training loss: 0.9753 0.4885 sec/batch
Epoch 83/100  Iteration 46925/56600 Training loss: 0.9753 0.4805 sec/batch
Epoch 83/100  Iteration 46926/56600 Training loss: 0.9754 0.4875 sec/batch
Epoch 83/100  Iteration 46927/56600 Training loss: 0.9754 0.4717 sec/batch
Epoch 83/100  Iteration 46928/56600 Training loss: 0.9754 0.4764 sec/batch
Epoch 83/100  Iteration 46929/56600 Training loss: 0.9754 0.4688 sec/batch
Epoch 83/100  Iteration 46930/56600 Training loss: 0.9755 0.4636 sec/batch
Epoch 83/100  Iteration 46931/56600 Training loss: 0.9755 0.4782 sec/batch
Epoch 83/100  Iteration 46932/56600 Training loss: 0.9754 0.4798 sec/batch
Epoch 83/100  Iteration 4

Epoch 84/100  Iteration 47030/56600 Training loss: 0.9793 0.4777 sec/batch
Epoch 84/100  Iteration 47031/56600 Training loss: 0.9788 0.4800 sec/batch
Epoch 84/100  Iteration 47032/56600 Training loss: 0.9783 0.4730 sec/batch
Epoch 84/100  Iteration 47033/56600 Training loss: 0.9773 0.4793 sec/batch
Epoch 84/100  Iteration 47034/56600 Training loss: 0.9765 0.4793 sec/batch
Epoch 84/100  Iteration 47035/56600 Training loss: 0.9757 0.4894 sec/batch
Epoch 84/100  Iteration 47036/56600 Training loss: 0.9748 0.4791 sec/batch
Epoch 84/100  Iteration 47037/56600 Training loss: 0.9743 0.4847 sec/batch
Epoch 84/100  Iteration 47038/56600 Training loss: 0.9737 0.4798 sec/batch
Epoch 84/100  Iteration 47039/56600 Training loss: 0.9730 0.4784 sec/batch
Epoch 84/100  Iteration 47040/56600 Training loss: 0.9725 0.4877 sec/batch
Epoch 84/100  Iteration 47041/56600 Training loss: 0.9723 0.4815 sec/batch
Epoch 84/100  Iteration 47042/56600 Training loss: 0.9715 0.4790 sec/batch
Epoch 84/100  Iteration 4

Epoch 84/100  Iteration 47140/56600 Training loss: 0.9755 0.4737 sec/batch
Epoch 84/100  Iteration 47141/56600 Training loss: 0.9753 0.4641 sec/batch
Epoch 84/100  Iteration 47142/56600 Training loss: 0.9753 0.4792 sec/batch
Epoch 84/100  Iteration 47143/56600 Training loss: 0.9753 0.4790 sec/batch
Epoch 84/100  Iteration 47144/56600 Training loss: 0.9754 0.4794 sec/batch
Epoch 84/100  Iteration 47145/56600 Training loss: 0.9755 0.4788 sec/batch
Epoch 84/100  Iteration 47146/56600 Training loss: 0.9757 0.4636 sec/batch
Epoch 84/100  Iteration 47147/56600 Training loss: 0.9758 0.4788 sec/batch
Epoch 84/100  Iteration 47148/56600 Training loss: 0.9758 0.4807 sec/batch
Epoch 84/100  Iteration 47149/56600 Training loss: 0.9757 0.4687 sec/batch
Epoch 84/100  Iteration 47150/56600 Training loss: 0.9757 0.4610 sec/batch
Epoch 84/100  Iteration 47151/56600 Training loss: 0.9757 0.4718 sec/batch
Epoch 84/100  Iteration 47152/56600 Training loss: 0.9758 0.4656 sec/batch
Epoch 84/100  Iteration 4

Epoch 84/100  Iteration 47250/56600 Training loss: 0.9797 0.4780 sec/batch
Epoch 84/100  Iteration 47251/56600 Training loss: 0.9797 0.4784 sec/batch
Epoch 84/100  Iteration 47252/56600 Training loss: 0.9797 0.4794 sec/batch
Epoch 84/100  Iteration 47253/56600 Training loss: 0.9797 0.4905 sec/batch
Epoch 84/100  Iteration 47254/56600 Training loss: 0.9797 0.4686 sec/batch
Epoch 84/100  Iteration 47255/56600 Training loss: 0.9797 0.4727 sec/batch
Epoch 84/100  Iteration 47256/56600 Training loss: 0.9796 0.4733 sec/batch
Epoch 84/100  Iteration 47257/56600 Training loss: 0.9794 0.4708 sec/batch
Epoch 84/100  Iteration 47258/56600 Training loss: 0.9794 0.4792 sec/batch
Epoch 84/100  Iteration 47259/56600 Training loss: 0.9794 0.4703 sec/batch
Epoch 84/100  Iteration 47260/56600 Training loss: 0.9793 0.4815 sec/batch
Epoch 84/100  Iteration 47261/56600 Training loss: 0.9793 0.4800 sec/batch
Epoch 84/100  Iteration 47262/56600 Training loss: 0.9792 0.4834 sec/batch
Epoch 84/100  Iteration 4

Epoch 84/100  Iteration 47360/56600 Training loss: 0.9770 0.4670 sec/batch
Epoch 84/100  Iteration 47361/56600 Training loss: 0.9769 0.4819 sec/batch
Epoch 84/100  Iteration 47362/56600 Training loss: 0.9769 0.4743 sec/batch
Epoch 84/100  Iteration 47363/56600 Training loss: 0.9769 0.4790 sec/batch
Epoch 84/100  Iteration 47364/56600 Training loss: 0.9770 0.4789 sec/batch
Epoch 84/100  Iteration 47365/56600 Training loss: 0.9770 0.4794 sec/batch
Epoch 84/100  Iteration 47366/56600 Training loss: 0.9770 0.4895 sec/batch
Epoch 84/100  Iteration 47367/56600 Training loss: 0.9769 0.4796 sec/batch
Epoch 84/100  Iteration 47368/56600 Training loss: 0.9769 0.4793 sec/batch
Epoch 84/100  Iteration 47369/56600 Training loss: 0.9768 0.4790 sec/batch
Epoch 84/100  Iteration 47370/56600 Training loss: 0.9768 0.4810 sec/batch
Epoch 84/100  Iteration 47371/56600 Training loss: 0.9769 0.4733 sec/batch
Epoch 84/100  Iteration 47372/56600 Training loss: 0.9769 0.4907 sec/batch
Epoch 84/100  Iteration 4

Epoch 84/100  Iteration 47470/56600 Training loss: 0.9752 0.4776 sec/batch
Epoch 84/100  Iteration 47471/56600 Training loss: 0.9753 0.4839 sec/batch
Epoch 84/100  Iteration 47472/56600 Training loss: 0.9753 0.4798 sec/batch
Epoch 84/100  Iteration 47473/56600 Training loss: 0.9753 0.4877 sec/batch
Epoch 84/100  Iteration 47474/56600 Training loss: 0.9753 0.4656 sec/batch
Epoch 84/100  Iteration 47475/56600 Training loss: 0.9752 0.4807 sec/batch
Epoch 84/100  Iteration 47476/56600 Training loss: 0.9752 0.4705 sec/batch
Epoch 84/100  Iteration 47477/56600 Training loss: 0.9752 0.4797 sec/batch
Epoch 84/100  Iteration 47478/56600 Training loss: 0.9752 0.4634 sec/batch
Epoch 84/100  Iteration 47479/56600 Training loss: 0.9752 0.4794 sec/batch
Epoch 84/100  Iteration 47480/56600 Training loss: 0.9751 0.4787 sec/batch
Epoch 84/100  Iteration 47481/56600 Training loss: 0.9750 0.4798 sec/batch
Epoch 84/100  Iteration 47482/56600 Training loss: 0.9750 0.4736 sec/batch
Epoch 84/100  Iteration 4

Epoch 85/100  Iteration 47580/56600 Training loss: 0.9793 0.4793 sec/batch
Epoch 85/100  Iteration 47581/56600 Training loss: 0.9800 0.4785 sec/batch
Epoch 85/100  Iteration 47582/56600 Training loss: 0.9808 0.4799 sec/batch
Epoch 85/100  Iteration 47583/56600 Training loss: 0.9806 0.4784 sec/batch
Epoch 85/100  Iteration 47584/56600 Training loss: 0.9811 0.4949 sec/batch
Epoch 85/100  Iteration 47585/56600 Training loss: 0.9812 0.4792 sec/batch
Epoch 85/100  Iteration 47586/56600 Training loss: 0.9819 0.4792 sec/batch
Epoch 85/100  Iteration 47587/56600 Training loss: 0.9816 0.4695 sec/batch
Epoch 85/100  Iteration 47588/56600 Training loss: 0.9816 0.4815 sec/batch
Epoch 85/100  Iteration 47589/56600 Training loss: 0.9821 0.4874 sec/batch
Epoch 85/100  Iteration 47590/56600 Training loss: 0.9824 0.4875 sec/batch
Epoch 85/100  Iteration 47591/56600 Training loss: 0.9813 0.4778 sec/batch
Epoch 85/100  Iteration 47592/56600 Training loss: 0.9812 0.4788 sec/batch
Epoch 85/100  Iteration 4

Epoch 85/100  Iteration 47690/56600 Training loss: 0.9720 0.4641 sec/batch
Epoch 85/100  Iteration 47691/56600 Training loss: 0.9719 0.4788 sec/batch
Epoch 85/100  Iteration 47692/56600 Training loss: 0.9721 0.4790 sec/batch
Epoch 85/100  Iteration 47693/56600 Training loss: 0.9723 0.4645 sec/batch
Epoch 85/100  Iteration 47694/56600 Training loss: 0.9725 0.4778 sec/batch
Epoch 85/100  Iteration 47695/56600 Training loss: 0.9729 0.4682 sec/batch
Epoch 85/100  Iteration 47696/56600 Training loss: 0.9733 0.4782 sec/batch
Epoch 85/100  Iteration 47697/56600 Training loss: 0.9735 0.4697 sec/batch
Epoch 85/100  Iteration 47698/56600 Training loss: 0.9736 0.4879 sec/batch
Epoch 85/100  Iteration 47699/56600 Training loss: 0.9738 0.4823 sec/batch
Epoch 85/100  Iteration 47700/56600 Training loss: 0.9739 0.4780 sec/batch
Epoch 85/100  Iteration 47701/56600 Training loss: 0.9742 0.4790 sec/batch
Epoch 85/100  Iteration 47702/56600 Training loss: 0.9743 0.4850 sec/batch
Epoch 85/100  Iteration 4

Epoch 85/100  Iteration 47800/56600 Training loss: 0.9795 0.4985 sec/batch
Epoch 85/100  Iteration 47801/56600 Training loss: 0.9794 0.4778 sec/batch
Epoch 85/100  Iteration 47802/56600 Training loss: 0.9793 0.4812 sec/batch
Epoch 85/100  Iteration 47803/56600 Training loss: 0.9791 0.4789 sec/batch
Epoch 85/100  Iteration 47804/56600 Training loss: 0.9789 0.4796 sec/batch
Epoch 85/100  Iteration 47805/56600 Training loss: 0.9788 0.4814 sec/batch
Epoch 85/100  Iteration 47806/56600 Training loss: 0.9788 0.4923 sec/batch
Epoch 85/100  Iteration 47807/56600 Training loss: 0.9787 0.4906 sec/batch
Epoch 85/100  Iteration 47808/56600 Training loss: 0.9787 0.4929 sec/batch
Epoch 85/100  Iteration 47809/56600 Training loss: 0.9787 0.5087 sec/batch
Epoch 85/100  Iteration 47810/56600 Training loss: 0.9786 0.4789 sec/batch
Epoch 85/100  Iteration 47811/56600 Training loss: 0.9785 0.4870 sec/batch
Epoch 85/100  Iteration 47812/56600 Training loss: 0.9784 0.4819 sec/batch
Epoch 85/100  Iteration 4

Epoch 85/100  Iteration 47910/56600 Training loss: 0.9767 0.4851 sec/batch
Epoch 85/100  Iteration 47911/56600 Training loss: 0.9766 0.4708 sec/batch
Epoch 85/100  Iteration 47912/56600 Training loss: 0.9766 0.4792 sec/batch
Epoch 85/100  Iteration 47913/56600 Training loss: 0.9765 0.4807 sec/batch
Epoch 85/100  Iteration 47914/56600 Training loss: 0.9764 0.4782 sec/batch
Epoch 85/100  Iteration 47915/56600 Training loss: 0.9763 0.4783 sec/batch
Epoch 85/100  Iteration 47916/56600 Training loss: 0.9762 0.4827 sec/batch
Epoch 85/100  Iteration 47917/56600 Training loss: 0.9761 0.4696 sec/batch
Epoch 85/100  Iteration 47918/56600 Training loss: 0.9761 0.4689 sec/batch
Epoch 85/100  Iteration 47919/56600 Training loss: 0.9760 0.4893 sec/batch
Epoch 85/100  Iteration 47920/56600 Training loss: 0.9760 0.4894 sec/batch
Epoch 85/100  Iteration 47921/56600 Training loss: 0.9760 0.4789 sec/batch
Epoch 85/100  Iteration 47922/56600 Training loss: 0.9760 0.4636 sec/batch
Epoch 85/100  Iteration 4

Epoch 85/100  Iteration 48019/56600 Training loss: 0.9750 0.4756 sec/batch
Epoch 85/100  Iteration 48020/56600 Training loss: 0.9750 0.4670 sec/batch
Epoch 85/100  Iteration 48021/56600 Training loss: 0.9750 0.4872 sec/batch
Epoch 85/100  Iteration 48022/56600 Training loss: 0.9750 0.4761 sec/batch
Epoch 85/100  Iteration 48023/56600 Training loss: 0.9750 0.4754 sec/batch
Epoch 85/100  Iteration 48024/56600 Training loss: 0.9750 0.4773 sec/batch
Epoch 85/100  Iteration 48025/56600 Training loss: 0.9750 0.4740 sec/batch
Epoch 85/100  Iteration 48026/56600 Training loss: 0.9750 0.4756 sec/batch
Epoch 85/100  Iteration 48027/56600 Training loss: 0.9750 0.4669 sec/batch
Epoch 85/100  Iteration 48028/56600 Training loss: 0.9750 0.4786 sec/batch
Epoch 85/100  Iteration 48029/56600 Training loss: 0.9750 0.4640 sec/batch
Epoch 85/100  Iteration 48030/56600 Training loss: 0.9749 0.4636 sec/batch
Epoch 85/100  Iteration 48031/56600 Training loss: 0.9749 0.4681 sec/batch
Epoch 85/100  Iteration 4

Epoch 86/100  Iteration 48129/56600 Training loss: 0.9855 0.4632 sec/batch
Epoch 86/100  Iteration 48130/56600 Training loss: 0.9836 0.4771 sec/batch
Epoch 86/100  Iteration 48131/56600 Training loss: 0.9822 0.4719 sec/batch
Epoch 86/100  Iteration 48132/56600 Training loss: 0.9798 0.4560 sec/batch
Epoch 86/100  Iteration 48133/56600 Training loss: 0.9787 0.4734 sec/batch
Epoch 86/100  Iteration 48134/56600 Training loss: 0.9777 0.4637 sec/batch
Epoch 86/100  Iteration 48135/56600 Training loss: 0.9764 0.4773 sec/batch
Epoch 86/100  Iteration 48136/56600 Training loss: 0.9748 0.4815 sec/batch
Epoch 86/100  Iteration 48137/56600 Training loss: 0.9745 0.4792 sec/batch
Epoch 86/100  Iteration 48138/56600 Training loss: 0.9747 0.4681 sec/batch
Epoch 86/100  Iteration 48139/56600 Training loss: 0.9741 0.4858 sec/batch
Epoch 86/100  Iteration 48140/56600 Training loss: 0.9740 0.4662 sec/batch
Epoch 86/100  Iteration 48141/56600 Training loss: 0.9730 0.4829 sec/batch
Epoch 86/100  Iteration 4

Epoch 86/100  Iteration 48239/56600 Training loss: 0.9701 0.4762 sec/batch
Epoch 86/100  Iteration 48240/56600 Training loss: 0.9700 0.4821 sec/batch
Epoch 86/100  Iteration 48241/56600 Training loss: 0.9702 0.4795 sec/batch
Epoch 86/100  Iteration 48242/56600 Training loss: 0.9700 0.4800 sec/batch
Epoch 86/100  Iteration 48243/56600 Training loss: 0.9699 0.4778 sec/batch
Epoch 86/100  Iteration 48244/56600 Training loss: 0.9698 0.4841 sec/batch
Epoch 86/100  Iteration 48245/56600 Training loss: 0.9696 0.4887 sec/batch
Epoch 86/100  Iteration 48246/56600 Training loss: 0.9694 0.4789 sec/batch
Epoch 86/100  Iteration 48247/56600 Training loss: 0.9696 0.4878 sec/batch
Epoch 86/100  Iteration 48248/56600 Training loss: 0.9699 0.4858 sec/batch
Epoch 86/100  Iteration 48249/56600 Training loss: 0.9702 0.4910 sec/batch
Epoch 86/100  Iteration 48250/56600 Training loss: 0.9701 0.4717 sec/batch
Epoch 86/100  Iteration 48251/56600 Training loss: 0.9703 0.4739 sec/batch
Epoch 86/100  Iteration 4

Epoch 86/100  Iteration 48349/56600 Training loss: 0.9772 0.4794 sec/batch
Epoch 86/100  Iteration 48350/56600 Training loss: 0.9774 0.4793 sec/batch
Epoch 86/100  Iteration 48351/56600 Training loss: 0.9776 0.4794 sec/batch
Epoch 86/100  Iteration 48352/56600 Training loss: 0.9778 0.4879 sec/batch
Epoch 86/100  Iteration 48353/56600 Training loss: 0.9779 0.4693 sec/batch
Epoch 86/100  Iteration 48354/56600 Training loss: 0.9782 0.4823 sec/batch
Epoch 86/100  Iteration 48355/56600 Training loss: 0.9783 0.4764 sec/batch
Epoch 86/100  Iteration 48356/56600 Training loss: 0.9785 0.4785 sec/batch
Epoch 86/100  Iteration 48357/56600 Training loss: 0.9787 0.4638 sec/batch
Epoch 86/100  Iteration 48358/56600 Training loss: 0.9787 0.4801 sec/batch
Epoch 86/100  Iteration 48359/56600 Training loss: 0.9787 0.4788 sec/batch
Epoch 86/100  Iteration 48360/56600 Training loss: 0.9788 0.4892 sec/batch
Epoch 86/100  Iteration 48361/56600 Training loss: 0.9786 0.4802 sec/batch
Epoch 86/100  Iteration 4

Epoch 86/100  Iteration 48459/56600 Training loss: 0.9760 0.4784 sec/batch
Epoch 86/100  Iteration 48460/56600 Training loss: 0.9759 0.4793 sec/batch
Epoch 86/100  Iteration 48461/56600 Training loss: 0.9757 0.4794 sec/batch
Epoch 86/100  Iteration 48462/56600 Training loss: 0.9756 0.4788 sec/batch
Epoch 86/100  Iteration 48463/56600 Training loss: 0.9756 0.4901 sec/batch
Epoch 86/100  Iteration 48464/56600 Training loss: 0.9756 0.4657 sec/batch
Epoch 86/100  Iteration 48465/56600 Training loss: 0.9756 0.4761 sec/batch
Epoch 86/100  Iteration 48466/56600 Training loss: 0.9757 0.4774 sec/batch
Epoch 86/100  Iteration 48467/56600 Training loss: 0.9758 0.4674 sec/batch
Epoch 86/100  Iteration 48468/56600 Training loss: 0.9757 0.4799 sec/batch
Epoch 86/100  Iteration 48469/56600 Training loss: 0.9758 0.4904 sec/batch
Epoch 86/100  Iteration 48470/56600 Training loss: 0.9758 0.4793 sec/batch
Epoch 86/100  Iteration 48471/56600 Training loss: 0.9758 0.4733 sec/batch
Epoch 86/100  Iteration 4

Epoch 86/100  Iteration 48569/56600 Training loss: 0.9727 0.4649 sec/batch
Epoch 86/100  Iteration 48570/56600 Training loss: 0.9727 0.4790 sec/batch
Epoch 86/100  Iteration 48571/56600 Training loss: 0.9727 0.4790 sec/batch
Epoch 86/100  Iteration 48572/56600 Training loss: 0.9727 0.4898 sec/batch
Epoch 86/100  Iteration 48573/56600 Training loss: 0.9727 0.4757 sec/batch
Epoch 86/100  Iteration 48574/56600 Training loss: 0.9726 0.4732 sec/batch
Epoch 86/100  Iteration 48575/56600 Training loss: 0.9727 0.4684 sec/batch
Epoch 86/100  Iteration 48576/56600 Training loss: 0.9727 0.4848 sec/batch
Epoch 86/100  Iteration 48577/56600 Training loss: 0.9728 0.4774 sec/batch
Epoch 86/100  Iteration 48578/56600 Training loss: 0.9728 0.4710 sec/batch
Epoch 86/100  Iteration 48579/56600 Training loss: 0.9728 0.4863 sec/batch
Epoch 86/100  Iteration 48580/56600 Training loss: 0.9729 0.4799 sec/batch
Epoch 86/100  Iteration 48581/56600 Training loss: 0.9730 0.4685 sec/batch
Epoch 86/100  Iteration 4

Epoch 87/100  Iteration 48679/56600 Training loss: 1.0469 0.4639 sec/batch
Epoch 87/100  Iteration 48680/56600 Training loss: 1.0364 0.4785 sec/batch
Epoch 87/100  Iteration 48681/56600 Training loss: 1.0199 0.4861 sec/batch
Epoch 87/100  Iteration 48682/56600 Training loss: 1.0142 0.4616 sec/batch
Epoch 87/100  Iteration 48683/56600 Training loss: 1.0086 0.4684 sec/batch
Epoch 87/100  Iteration 48684/56600 Training loss: 1.0024 0.4789 sec/batch
Epoch 87/100  Iteration 48685/56600 Training loss: 0.9991 0.4793 sec/batch
Epoch 87/100  Iteration 48686/56600 Training loss: 0.9968 0.4583 sec/batch
Epoch 87/100  Iteration 48687/56600 Training loss: 0.9967 0.4793 sec/batch
Epoch 87/100  Iteration 48688/56600 Training loss: 0.9968 0.4794 sec/batch
Epoch 87/100  Iteration 48689/56600 Training loss: 0.9959 0.4794 sec/batch
Epoch 87/100  Iteration 48690/56600 Training loss: 0.9942 0.4784 sec/batch
Epoch 87/100  Iteration 48691/56600 Training loss: 0.9920 0.4855 sec/batch
Epoch 87/100  Iteration 4

Epoch 87/100  Iteration 48789/56600 Training loss: 0.9649 0.4898 sec/batch
Epoch 87/100  Iteration 48790/56600 Training loss: 0.9653 0.4683 sec/batch
Epoch 87/100  Iteration 48791/56600 Training loss: 0.9655 0.4662 sec/batch
Epoch 87/100  Iteration 48792/56600 Training loss: 0.9659 0.4664 sec/batch
Epoch 87/100  Iteration 48793/56600 Training loss: 0.9665 0.4705 sec/batch
Epoch 87/100  Iteration 48794/56600 Training loss: 0.9669 0.4819 sec/batch
Epoch 87/100  Iteration 48795/56600 Training loss: 0.9672 0.4790 sec/batch
Epoch 87/100  Iteration 48796/56600 Training loss: 0.9677 0.4795 sec/batch
Epoch 87/100  Iteration 48797/56600 Training loss: 0.9684 0.4833 sec/batch
Epoch 87/100  Iteration 48798/56600 Training loss: 0.9689 0.4843 sec/batch
Epoch 87/100  Iteration 48799/56600 Training loss: 0.9688 0.4780 sec/batch
Epoch 87/100  Iteration 48800/56600 Training loss: 0.9688 0.4797 sec/batch
Epoch 87/100  Iteration 48801/56600 Training loss: 0.9691 0.4841 sec/batch
Epoch 87/100  Iteration 4

Epoch 87/100  Iteration 48899/56600 Training loss: 0.9747 0.4630 sec/batch
Epoch 87/100  Iteration 48900/56600 Training loss: 0.9748 0.4683 sec/batch
Epoch 87/100  Iteration 48901/56600 Training loss: 0.9750 0.4738 sec/batch
Epoch 87/100  Iteration 48902/56600 Training loss: 0.9751 0.4800 sec/batch
Epoch 87/100  Iteration 48903/56600 Training loss: 0.9753 0.4710 sec/batch
Epoch 87/100  Iteration 48904/56600 Training loss: 0.9752 0.4912 sec/batch
Epoch 87/100  Iteration 48905/56600 Training loss: 0.9752 0.4788 sec/batch
Epoch 87/100  Iteration 48906/56600 Training loss: 0.9754 0.4788 sec/batch
Epoch 87/100  Iteration 48907/56600 Training loss: 0.9756 0.4905 sec/batch
Epoch 87/100  Iteration 48908/56600 Training loss: 0.9757 0.4844 sec/batch
Epoch 87/100  Iteration 48909/56600 Training loss: 0.9759 0.4716 sec/batch
Epoch 87/100  Iteration 48910/56600 Training loss: 0.9760 0.4709 sec/batch
Epoch 87/100  Iteration 48911/56600 Training loss: 0.9761 0.4790 sec/batch
Epoch 87/100  Iteration 4

Epoch 87/100  Iteration 49009/56600 Training loss: 0.9757 0.4787 sec/batch
Epoch 87/100  Iteration 49010/56600 Training loss: 0.9756 0.4796 sec/batch
Epoch 87/100  Iteration 49011/56600 Training loss: 0.9755 0.4783 sec/batch
Epoch 87/100  Iteration 49012/56600 Training loss: 0.9755 0.4687 sec/batch
Epoch 87/100  Iteration 49013/56600 Training loss: 0.9755 0.4790 sec/batch
Epoch 87/100  Iteration 49014/56600 Training loss: 0.9756 0.4786 sec/batch
Epoch 87/100  Iteration 49015/56600 Training loss: 0.9755 0.4869 sec/batch
Epoch 87/100  Iteration 49016/56600 Training loss: 0.9755 0.4837 sec/batch
Epoch 87/100  Iteration 49017/56600 Training loss: 0.9754 0.4907 sec/batch
Epoch 87/100  Iteration 49018/56600 Training loss: 0.9753 0.4972 sec/batch
Epoch 87/100  Iteration 49019/56600 Training loss: 0.9751 0.4786 sec/batch
Epoch 87/100  Iteration 49020/56600 Training loss: 0.9751 0.4803 sec/batch
Epoch 87/100  Iteration 49021/56600 Training loss: 0.9752 0.4938 sec/batch
Epoch 87/100  Iteration 4

Epoch 87/100  Iteration 49119/56600 Training loss: 0.9721 0.4739 sec/batch
Epoch 87/100  Iteration 49120/56600 Training loss: 0.9721 0.4795 sec/batch
Epoch 87/100  Iteration 49121/56600 Training loss: 0.9719 0.4789 sec/batch
Epoch 87/100  Iteration 49122/56600 Training loss: 0.9718 0.4819 sec/batch
Epoch 87/100  Iteration 49123/56600 Training loss: 0.9718 0.4712 sec/batch
Epoch 87/100  Iteration 49124/56600 Training loss: 0.9717 0.4798 sec/batch
Epoch 87/100  Iteration 49125/56600 Training loss: 0.9718 0.4789 sec/batch
Epoch 87/100  Iteration 49126/56600 Training loss: 0.9717 0.4789 sec/batch
Epoch 87/100  Iteration 49127/56600 Training loss: 0.9718 0.4789 sec/batch
Epoch 87/100  Iteration 49128/56600 Training loss: 0.9718 0.4742 sec/batch
Epoch 87/100  Iteration 49129/56600 Training loss: 0.9717 0.4868 sec/batch
Epoch 87/100  Iteration 49130/56600 Training loss: 0.9717 0.4683 sec/batch
Epoch 87/100  Iteration 49131/56600 Training loss: 0.9718 0.4795 sec/batch
Epoch 87/100  Iteration 4

Epoch 87/100  Iteration 49229/56600 Training loss: 0.9711 0.4791 sec/batch
Epoch 87/100  Iteration 49230/56600 Training loss: 0.9711 0.4889 sec/batch
Epoch 87/100  Iteration 49231/56600 Training loss: 0.9711 0.4672 sec/batch
Epoch 87/100  Iteration 49232/56600 Training loss: 0.9711 0.4804 sec/batch
Epoch 87/100  Iteration 49233/56600 Training loss: 0.9711 0.4724 sec/batch
Epoch 87/100  Iteration 49234/56600 Training loss: 0.9711 0.4850 sec/batch
Epoch 87/100  Iteration 49235/56600 Training loss: 0.9712 0.4726 sec/batch
Epoch 87/100  Iteration 49236/56600 Training loss: 0.9712 0.4807 sec/batch
Epoch 87/100  Iteration 49237/56600 Training loss: 0.9712 0.4868 sec/batch
Epoch 87/100  Iteration 49238/56600 Training loss: 0.9712 0.4864 sec/batch
Epoch 87/100  Iteration 49239/56600 Training loss: 0.9712 0.4790 sec/batch
Epoch 87/100  Iteration 49240/56600 Training loss: 0.9713 0.4779 sec/batch
Epoch 87/100  Iteration 49241/56600 Training loss: 0.9714 0.4723 sec/batch
Epoch 87/100  Iteration 4

Epoch 88/100  Iteration 49339/56600 Training loss: 0.9619 0.4791 sec/batch
Epoch 88/100  Iteration 49340/56600 Training loss: 0.9617 0.4799 sec/batch
Epoch 88/100  Iteration 49341/56600 Training loss: 0.9620 0.4786 sec/batch
Epoch 88/100  Iteration 49342/56600 Training loss: 0.9625 0.4793 sec/batch
Epoch 88/100  Iteration 49343/56600 Training loss: 0.9626 0.4788 sec/batch
Epoch 88/100  Iteration 49344/56600 Training loss: 0.9630 0.4833 sec/batch
Epoch 88/100  Iteration 49345/56600 Training loss: 0.9629 0.4739 sec/batch
Epoch 88/100  Iteration 49346/56600 Training loss: 0.9630 0.4886 sec/batch
Epoch 88/100  Iteration 49347/56600 Training loss: 0.9630 0.4800 sec/batch
Epoch 88/100  Iteration 49348/56600 Training loss: 0.9629 0.4762 sec/batch
Epoch 88/100  Iteration 49349/56600 Training loss: 0.9626 0.4663 sec/batch
Epoch 88/100  Iteration 49350/56600 Training loss: 0.9627 0.4795 sec/batch
Epoch 88/100  Iteration 49351/56600 Training loss: 0.9628 0.4790 sec/batch
Epoch 88/100  Iteration 4

Epoch 88/100  Iteration 49449/56600 Training loss: 0.9733 0.4892 sec/batch
Epoch 88/100  Iteration 49450/56600 Training loss: 0.9732 0.4784 sec/batch
Epoch 88/100  Iteration 49451/56600 Training loss: 0.9732 0.4787 sec/batch
Epoch 88/100  Iteration 49452/56600 Training loss: 0.9732 0.4798 sec/batch
Epoch 88/100  Iteration 49453/56600 Training loss: 0.9733 0.4785 sec/batch
Epoch 88/100  Iteration 49454/56600 Training loss: 0.9732 0.4794 sec/batch
Epoch 88/100  Iteration 49455/56600 Training loss: 0.9730 0.4772 sec/batch
Epoch 88/100  Iteration 49456/56600 Training loss: 0.9728 0.4636 sec/batch
Epoch 88/100  Iteration 49457/56600 Training loss: 0.9729 0.4636 sec/batch
Epoch 88/100  Iteration 49458/56600 Training loss: 0.9729 0.4637 sec/batch
Epoch 88/100  Iteration 49459/56600 Training loss: 0.9727 0.4724 sec/batch
Epoch 88/100  Iteration 49460/56600 Training loss: 0.9727 0.4858 sec/batch
Epoch 88/100  Iteration 49461/56600 Training loss: 0.9730 0.4837 sec/batch
Epoch 88/100  Iteration 4

Epoch 88/100  Iteration 49559/56600 Training loss: 0.9750 0.4789 sec/batch
Epoch 88/100  Iteration 49560/56600 Training loss: 0.9750 0.4778 sec/batch
Epoch 88/100  Iteration 49561/56600 Training loss: 0.9750 0.4853 sec/batch
Epoch 88/100  Iteration 49562/56600 Training loss: 0.9748 0.4740 sec/batch
Epoch 88/100  Iteration 49563/56600 Training loss: 0.9749 0.4944 sec/batch
Epoch 88/100  Iteration 49564/56600 Training loss: 0.9747 0.4746 sec/batch
Epoch 88/100  Iteration 49565/56600 Training loss: 0.9745 0.4947 sec/batch
Epoch 88/100  Iteration 49566/56600 Training loss: 0.9745 0.4791 sec/batch
Epoch 88/100  Iteration 49567/56600 Training loss: 0.9745 0.4785 sec/batch
Epoch 88/100  Iteration 49568/56600 Training loss: 0.9745 0.4892 sec/batch
Epoch 88/100  Iteration 49569/56600 Training loss: 0.9743 0.4667 sec/batch
Epoch 88/100  Iteration 49570/56600 Training loss: 0.9743 0.4765 sec/batch
Epoch 88/100  Iteration 49571/56600 Training loss: 0.9744 0.4768 sec/batch
Epoch 88/100  Iteration 4

Epoch 88/100  Iteration 49669/56600 Training loss: 0.9720 0.4786 sec/batch
Epoch 88/100  Iteration 49670/56600 Training loss: 0.9720 0.4793 sec/batch
Epoch 88/100  Iteration 49671/56600 Training loss: 0.9719 0.4844 sec/batch
Epoch 88/100  Iteration 49672/56600 Training loss: 0.9719 0.4797 sec/batch
Epoch 88/100  Iteration 49673/56600 Training loss: 0.9718 0.4858 sec/batch
Epoch 88/100  Iteration 49674/56600 Training loss: 0.9717 0.4878 sec/batch
Epoch 88/100  Iteration 49675/56600 Training loss: 0.9717 0.4851 sec/batch
Epoch 88/100  Iteration 49676/56600 Training loss: 0.9716 0.4760 sec/batch
Epoch 88/100  Iteration 49677/56600 Training loss: 0.9716 0.4682 sec/batch
Epoch 88/100  Iteration 49678/56600 Training loss: 0.9716 0.4900 sec/batch
Epoch 88/100  Iteration 49679/56600 Training loss: 0.9715 0.4794 sec/batch
Epoch 88/100  Iteration 49680/56600 Training loss: 0.9715 0.4839 sec/batch
Epoch 88/100  Iteration 49681/56600 Training loss: 0.9713 0.4658 sec/batch
Epoch 88/100  Iteration 4

Epoch 88/100  Iteration 49779/56600 Training loss: 0.9705 0.4787 sec/batch
Epoch 88/100  Iteration 49780/56600 Training loss: 0.9704 0.4827 sec/batch
Epoch 88/100  Iteration 49781/56600 Training loss: 0.9704 0.4811 sec/batch
Epoch 88/100  Iteration 49782/56600 Training loss: 0.9705 0.4683 sec/batch
Epoch 88/100  Iteration 49783/56600 Training loss: 0.9704 0.4798 sec/batch
Epoch 88/100  Iteration 49784/56600 Training loss: 0.9704 0.4788 sec/batch
Epoch 88/100  Iteration 49785/56600 Training loss: 0.9704 0.4739 sec/batch
Epoch 88/100  Iteration 49786/56600 Training loss: 0.9704 0.4794 sec/batch
Epoch 88/100  Iteration 49787/56600 Training loss: 0.9704 0.4761 sec/batch
Epoch 88/100  Iteration 49788/56600 Training loss: 0.9704 0.4656 sec/batch
Epoch 88/100  Iteration 49789/56600 Training loss: 0.9704 0.4666 sec/batch
Epoch 88/100  Iteration 49790/56600 Training loss: 0.9704 0.4754 sec/batch
Epoch 88/100  Iteration 49791/56600 Training loss: 0.9704 0.4902 sec/batch
Epoch 88/100  Iteration 4

Epoch 89/100  Iteration 49889/56600 Training loss: 0.9636 0.4784 sec/batch
Epoch 89/100  Iteration 49890/56600 Training loss: 0.9636 0.4843 sec/batch
Epoch 89/100  Iteration 49891/56600 Training loss: 0.9640 0.4895 sec/batch
Epoch 89/100  Iteration 49892/56600 Training loss: 0.9642 0.4759 sec/batch
Epoch 89/100  Iteration 49893/56600 Training loss: 0.9638 0.4781 sec/batch
Epoch 89/100  Iteration 49894/56600 Training loss: 0.9635 0.4827 sec/batch
Epoch 89/100  Iteration 49895/56600 Training loss: 0.9631 0.4733 sec/batch
Epoch 89/100  Iteration 49896/56600 Training loss: 0.9627 0.4794 sec/batch
Epoch 89/100  Iteration 49897/56600 Training loss: 0.9629 0.4789 sec/batch
Epoch 89/100  Iteration 49898/56600 Training loss: 0.9630 0.4795 sec/batch
Epoch 89/100  Iteration 49899/56600 Training loss: 0.9631 0.4892 sec/batch
Epoch 89/100  Iteration 49900/56600 Training loss: 0.9629 0.4795 sec/batch
Epoch 89/100  Iteration 49901/56600 Training loss: 0.9627 0.4940 sec/batch
Epoch 89/100  Iteration 4

Epoch 89/100  Iteration 49999/56600 Training loss: 0.9708 0.4701 sec/batch
Epoch 89/100  Iteration 50000/56600 Training loss: 0.9707 0.4746 sec/batch
Validation loss: 1.19871 Saving checkpoint!
Epoch 89/100  Iteration 50001/56600 Training loss: 0.9722 0.4639 sec/batch
Epoch 89/100  Iteration 50002/56600 Training loss: 0.9724 0.4719 sec/batch
Epoch 89/100  Iteration 50003/56600 Training loss: 0.9726 0.4706 sec/batch
Epoch 89/100  Iteration 50004/56600 Training loss: 0.9726 0.4817 sec/batch
Epoch 89/100  Iteration 50005/56600 Training loss: 0.9728 0.4626 sec/batch
Epoch 89/100  Iteration 50006/56600 Training loss: 0.9729 0.4629 sec/batch
Epoch 89/100  Iteration 50007/56600 Training loss: 0.9730 0.4745 sec/batch
Epoch 89/100  Iteration 50008/56600 Training loss: 0.9730 0.4629 sec/batch
Epoch 89/100  Iteration 50009/56600 Training loss: 0.9730 0.4748 sec/batch
Epoch 89/100  Iteration 50010/56600 Training loss: 0.9732 0.4679 sec/batch
Epoch 89/100  Iteration 50011/56600 Training loss: 0.973

Epoch 89/100  Iteration 50108/56600 Training loss: 0.9752 0.4799 sec/batch
Epoch 89/100  Iteration 50109/56600 Training loss: 0.9751 0.4844 sec/batch
Epoch 89/100  Iteration 50110/56600 Training loss: 0.9751 0.4797 sec/batch
Epoch 89/100  Iteration 50111/56600 Training loss: 0.9751 0.4782 sec/batch
Epoch 89/100  Iteration 50112/56600 Training loss: 0.9751 0.4882 sec/batch
Epoch 89/100  Iteration 50113/56600 Training loss: 0.9751 0.4650 sec/batch
Epoch 89/100  Iteration 50114/56600 Training loss: 0.9751 0.4782 sec/batch
Epoch 89/100  Iteration 50115/56600 Training loss: 0.9749 0.4745 sec/batch
Epoch 89/100  Iteration 50116/56600 Training loss: 0.9747 0.4789 sec/batch
Epoch 89/100  Iteration 50117/56600 Training loss: 0.9747 0.4736 sec/batch
Epoch 89/100  Iteration 50118/56600 Training loss: 0.9746 0.4741 sec/batch
Epoch 89/100  Iteration 50119/56600 Training loss: 0.9745 0.4608 sec/batch
Epoch 89/100  Iteration 50120/56600 Training loss: 0.9745 0.4713 sec/batch
Epoch 89/100  Iteration 5

Epoch 89/100  Iteration 50218/56600 Training loss: 0.9714 0.4744 sec/batch
Epoch 89/100  Iteration 50219/56600 Training loss: 0.9713 0.4793 sec/batch
Epoch 89/100  Iteration 50220/56600 Training loss: 0.9713 0.4774 sec/batch
Epoch 89/100  Iteration 50221/56600 Training loss: 0.9713 0.4651 sec/batch
Epoch 89/100  Iteration 50222/56600 Training loss: 0.9712 0.4632 sec/batch
Epoch 89/100  Iteration 50223/56600 Training loss: 0.9712 0.4790 sec/batch
Epoch 89/100  Iteration 50224/56600 Training loss: 0.9712 0.4774 sec/batch
Epoch 89/100  Iteration 50225/56600 Training loss: 0.9712 0.4655 sec/batch
Epoch 89/100  Iteration 50226/56600 Training loss: 0.9712 0.4657 sec/batch
Epoch 89/100  Iteration 50227/56600 Training loss: 0.9713 0.4803 sec/batch
Epoch 89/100  Iteration 50228/56600 Training loss: 0.9713 0.4782 sec/batch
Epoch 89/100  Iteration 50229/56600 Training loss: 0.9712 0.4806 sec/batch
Epoch 89/100  Iteration 50230/56600 Training loss: 0.9712 0.4775 sec/batch
Epoch 89/100  Iteration 5

Epoch 89/100  Iteration 50328/56600 Training loss: 0.9695 0.4745 sec/batch
Epoch 89/100  Iteration 50329/56600 Training loss: 0.9695 0.4773 sec/batch
Epoch 89/100  Iteration 50330/56600 Training loss: 0.9695 0.4791 sec/batch
Epoch 89/100  Iteration 50331/56600 Training loss: 0.9695 0.4852 sec/batch
Epoch 89/100  Iteration 50332/56600 Training loss: 0.9696 0.4847 sec/batch
Epoch 89/100  Iteration 50333/56600 Training loss: 0.9696 0.4885 sec/batch
Epoch 89/100  Iteration 50334/56600 Training loss: 0.9696 0.4793 sec/batch
Epoch 89/100  Iteration 50335/56600 Training loss: 0.9696 0.4754 sec/batch
Epoch 89/100  Iteration 50336/56600 Training loss: 0.9697 0.4827 sec/batch
Epoch 89/100  Iteration 50337/56600 Training loss: 0.9696 0.4808 sec/batch
Epoch 89/100  Iteration 50338/56600 Training loss: 0.9696 0.4772 sec/batch
Epoch 89/100  Iteration 50339/56600 Training loss: 0.9697 0.4901 sec/batch
Epoch 89/100  Iteration 50340/56600 Training loss: 0.9696 0.4973 sec/batch
Epoch 89/100  Iteration 5

Epoch 90/100  Iteration 50438/56600 Training loss: 0.9654 0.4770 sec/batch
Epoch 90/100  Iteration 50439/56600 Training loss: 0.9649 0.4709 sec/batch
Epoch 90/100  Iteration 50440/56600 Training loss: 0.9646 0.4811 sec/batch
Epoch 90/100  Iteration 50441/56600 Training loss: 0.9641 0.4767 sec/batch
Epoch 90/100  Iteration 50442/56600 Training loss: 0.9645 0.4798 sec/batch
Epoch 90/100  Iteration 50443/56600 Training loss: 0.9644 0.4791 sec/batch
Epoch 90/100  Iteration 50444/56600 Training loss: 0.9638 0.4748 sec/batch
Epoch 90/100  Iteration 50445/56600 Training loss: 0.9636 0.4704 sec/batch
Epoch 90/100  Iteration 50446/56600 Training loss: 0.9630 0.4720 sec/batch
Epoch 90/100  Iteration 50447/56600 Training loss: 0.9624 0.4778 sec/batch
Epoch 90/100  Iteration 50448/56600 Training loss: 0.9629 0.4881 sec/batch
Epoch 90/100  Iteration 50449/56600 Training loss: 0.9632 0.4807 sec/batch
Epoch 90/100  Iteration 50450/56600 Training loss: 0.9627 0.4724 sec/batch
Epoch 90/100  Iteration 5

Epoch 90/100  Iteration 50548/56600 Training loss: 0.9695 0.4844 sec/batch
Epoch 90/100  Iteration 50549/56600 Training loss: 0.9693 0.4732 sec/batch
Epoch 90/100  Iteration 50550/56600 Training loss: 0.9691 0.4814 sec/batch
Epoch 90/100  Iteration 50551/56600 Training loss: 0.9691 0.4795 sec/batch
Epoch 90/100  Iteration 50552/56600 Training loss: 0.9691 0.4782 sec/batch
Epoch 90/100  Iteration 50553/56600 Training loss: 0.9690 0.4801 sec/batch
Epoch 90/100  Iteration 50554/56600 Training loss: 0.9690 0.4793 sec/batch
Epoch 90/100  Iteration 50555/56600 Training loss: 0.9689 0.4790 sec/batch
Epoch 90/100  Iteration 50556/56600 Training loss: 0.9690 0.4791 sec/batch
Epoch 90/100  Iteration 50557/56600 Training loss: 0.9691 0.4792 sec/batch
Epoch 90/100  Iteration 50558/56600 Training loss: 0.9693 0.4865 sec/batch
Epoch 90/100  Iteration 50559/56600 Training loss: 0.9693 0.4703 sec/batch
Epoch 90/100  Iteration 50560/56600 Training loss: 0.9696 0.4784 sec/batch
Epoch 90/100  Iteration 5

Epoch 90/100  Iteration 50658/56600 Training loss: 0.9727 0.4789 sec/batch
Epoch 90/100  Iteration 50659/56600 Training loss: 0.9727 0.4839 sec/batch
Epoch 90/100  Iteration 50660/56600 Training loss: 0.9728 0.4742 sec/batch
Epoch 90/100  Iteration 50661/56600 Training loss: 0.9730 0.4808 sec/batch
Epoch 90/100  Iteration 50662/56600 Training loss: 0.9730 0.4779 sec/batch
Epoch 90/100  Iteration 50663/56600 Training loss: 0.9730 0.4771 sec/batch
Epoch 90/100  Iteration 50664/56600 Training loss: 0.9730 0.4688 sec/batch
Epoch 90/100  Iteration 50665/56600 Training loss: 0.9731 0.4717 sec/batch
Epoch 90/100  Iteration 50666/56600 Training loss: 0.9733 0.4741 sec/batch
Epoch 90/100  Iteration 50667/56600 Training loss: 0.9734 0.4796 sec/batch
Epoch 90/100  Iteration 50668/56600 Training loss: 0.9734 0.4789 sec/batch
Epoch 90/100  Iteration 50669/56600 Training loss: 0.9736 0.4812 sec/batch
Epoch 90/100  Iteration 50670/56600 Training loss: 0.9736 0.4837 sec/batch
Epoch 90/100  Iteration 5

Epoch 90/100  Iteration 50768/56600 Training loss: 0.9705 0.4870 sec/batch
Epoch 90/100  Iteration 50769/56600 Training loss: 0.9706 0.4687 sec/batch
Epoch 90/100  Iteration 50770/56600 Training loss: 0.9706 0.4784 sec/batch
Epoch 90/100  Iteration 50771/56600 Training loss: 0.9706 0.4800 sec/batch
Epoch 90/100  Iteration 50772/56600 Training loss: 0.9706 0.4895 sec/batch
Epoch 90/100  Iteration 50773/56600 Training loss: 0.9707 0.4792 sec/batch
Epoch 90/100  Iteration 50774/56600 Training loss: 0.9708 0.4885 sec/batch
Epoch 90/100  Iteration 50775/56600 Training loss: 0.9707 0.4853 sec/batch
Epoch 90/100  Iteration 50776/56600 Training loss: 0.9706 0.4949 sec/batch
Epoch 90/100  Iteration 50777/56600 Training loss: 0.9706 0.4802 sec/batch
Epoch 90/100  Iteration 50778/56600 Training loss: 0.9705 0.4732 sec/batch
Epoch 90/100  Iteration 50779/56600 Training loss: 0.9705 0.4897 sec/batch
Epoch 90/100  Iteration 50780/56600 Training loss: 0.9705 0.4688 sec/batch
Epoch 90/100  Iteration 5

Epoch 90/100  Iteration 50878/56600 Training loss: 0.9686 0.4822 sec/batch
Epoch 90/100  Iteration 50879/56600 Training loss: 0.9686 0.4947 sec/batch
Epoch 90/100  Iteration 50880/56600 Training loss: 0.9685 0.4940 sec/batch
Epoch 90/100  Iteration 50881/56600 Training loss: 0.9685 0.5053 sec/batch
Epoch 90/100  Iteration 50882/56600 Training loss: 0.9684 0.4828 sec/batch
Epoch 90/100  Iteration 50883/56600 Training loss: 0.9684 0.4890 sec/batch
Epoch 90/100  Iteration 50884/56600 Training loss: 0.9684 0.4776 sec/batch
Epoch 90/100  Iteration 50885/56600 Training loss: 0.9684 0.4794 sec/batch
Epoch 90/100  Iteration 50886/56600 Training loss: 0.9683 0.4738 sec/batch
Epoch 90/100  Iteration 50887/56600 Training loss: 0.9684 0.4786 sec/batch
Epoch 90/100  Iteration 50888/56600 Training loss: 0.9684 0.4825 sec/batch
Epoch 90/100  Iteration 50889/56600 Training loss: 0.9684 0.4843 sec/batch
Epoch 90/100  Iteration 50890/56600 Training loss: 0.9685 0.4926 sec/batch
Epoch 90/100  Iteration 5

Epoch 91/100  Iteration 50988/56600 Training loss: 0.9753 0.4653 sec/batch
Epoch 91/100  Iteration 50989/56600 Training loss: 0.9747 0.4691 sec/batch
Epoch 91/100  Iteration 50990/56600 Training loss: 0.9737 0.4738 sec/batch
Epoch 91/100  Iteration 50991/56600 Training loss: 0.9729 0.4667 sec/batch
Epoch 91/100  Iteration 50992/56600 Training loss: 0.9720 0.4708 sec/batch
Epoch 91/100  Iteration 50993/56600 Training loss: 0.9714 0.4784 sec/batch
Epoch 91/100  Iteration 50994/56600 Training loss: 0.9707 0.4792 sec/batch
Epoch 91/100  Iteration 50995/56600 Training loss: 0.9699 0.4741 sec/batch
Epoch 91/100  Iteration 50996/56600 Training loss: 0.9693 0.4792 sec/batch
Epoch 91/100  Iteration 50997/56600 Training loss: 0.9685 0.4790 sec/batch
Epoch 91/100  Iteration 50998/56600 Training loss: 0.9677 0.4844 sec/batch
Epoch 91/100  Iteration 50999/56600 Training loss: 0.9669 0.4877 sec/batch
Epoch 91/100  Iteration 51000/56600 Training loss: 0.9662 0.4853 sec/batch
Epoch 91/100  Iteration 5

Epoch 91/100  Iteration 51098/56600 Training loss: 0.9686 0.4803 sec/batch
Epoch 91/100  Iteration 51099/56600 Training loss: 0.9686 0.4934 sec/batch
Epoch 91/100  Iteration 51100/56600 Training loss: 0.9685 0.4948 sec/batch
Epoch 91/100  Iteration 51101/56600 Training loss: 0.9685 0.4929 sec/batch
Epoch 91/100  Iteration 51102/56600 Training loss: 0.9686 0.4833 sec/batch
Epoch 91/100  Iteration 51103/56600 Training loss: 0.9685 0.4942 sec/batch
Epoch 91/100  Iteration 51104/56600 Training loss: 0.9684 0.4795 sec/batch
Epoch 91/100  Iteration 51105/56600 Training loss: 0.9685 0.4743 sec/batch
Epoch 91/100  Iteration 51106/56600 Training loss: 0.9685 0.4790 sec/batch
Epoch 91/100  Iteration 51107/56600 Training loss: 0.9686 0.4753 sec/batch
Epoch 91/100  Iteration 51108/56600 Training loss: 0.9687 0.4774 sec/batch
Epoch 91/100  Iteration 51109/56600 Training loss: 0.9688 0.4898 sec/batch
Epoch 91/100  Iteration 51110/56600 Training loss: 0.9687 0.4795 sec/batch
Epoch 91/100  Iteration 5

Epoch 91/100  Iteration 51208/56600 Training loss: 0.9723 0.4728 sec/batch
Epoch 91/100  Iteration 51209/56600 Training loss: 0.9722 0.4786 sec/batch
Epoch 91/100  Iteration 51210/56600 Training loss: 0.9722 0.4793 sec/batch
Epoch 91/100  Iteration 51211/56600 Training loss: 0.9722 0.4787 sec/batch
Epoch 91/100  Iteration 51212/56600 Training loss: 0.9723 0.4791 sec/batch
Epoch 91/100  Iteration 51213/56600 Training loss: 0.9723 0.4797 sec/batch
Epoch 91/100  Iteration 51214/56600 Training loss: 0.9723 0.4808 sec/batch
Epoch 91/100  Iteration 51215/56600 Training loss: 0.9722 0.4613 sec/batch
Epoch 91/100  Iteration 51216/56600 Training loss: 0.9722 0.4721 sec/batch
Epoch 91/100  Iteration 51217/56600 Training loss: 0.9722 0.4700 sec/batch
Epoch 91/100  Iteration 51218/56600 Training loss: 0.9721 0.4651 sec/batch
Epoch 91/100  Iteration 51219/56600 Training loss: 0.9720 0.4777 sec/batch
Epoch 91/100  Iteration 51220/56600 Training loss: 0.9719 0.4897 sec/batch
Epoch 91/100  Iteration 5

Epoch 91/100  Iteration 51318/56600 Training loss: 0.9697 0.4844 sec/batch
Epoch 91/100  Iteration 51319/56600 Training loss: 0.9697 0.4795 sec/batch
Epoch 91/100  Iteration 51320/56600 Training loss: 0.9697 0.4771 sec/batch
Epoch 91/100  Iteration 51321/56600 Training loss: 0.9696 0.4812 sec/batch
Epoch 91/100  Iteration 51322/56600 Training loss: 0.9696 0.4793 sec/batch
Epoch 91/100  Iteration 51323/56600 Training loss: 0.9696 0.4632 sec/batch
Epoch 91/100  Iteration 51324/56600 Training loss: 0.9696 0.4649 sec/batch
Epoch 91/100  Iteration 51325/56600 Training loss: 0.9696 0.4623 sec/batch
Epoch 91/100  Iteration 51326/56600 Training loss: 0.9696 0.4855 sec/batch
Epoch 91/100  Iteration 51327/56600 Training loss: 0.9697 0.4573 sec/batch
Epoch 91/100  Iteration 51328/56600 Training loss: 0.9696 0.4584 sec/batch
Epoch 91/100  Iteration 51329/56600 Training loss: 0.9695 0.4636 sec/batch
Epoch 91/100  Iteration 51330/56600 Training loss: 0.9696 0.4639 sec/batch
Epoch 91/100  Iteration 5

Epoch 91/100  Iteration 51428/56600 Training loss: 0.9679 0.4793 sec/batch
Epoch 91/100  Iteration 51429/56600 Training loss: 0.9679 0.4567 sec/batch
Epoch 91/100  Iteration 51430/56600 Training loss: 0.9679 0.4630 sec/batch
Epoch 91/100  Iteration 51431/56600 Training loss: 0.9679 0.4741 sec/batch
Epoch 91/100  Iteration 51432/56600 Training loss: 0.9678 0.4791 sec/batch
Epoch 91/100  Iteration 51433/56600 Training loss: 0.9679 0.4795 sec/batch
Epoch 91/100  Iteration 51434/56600 Training loss: 0.9679 0.4855 sec/batch
Epoch 91/100  Iteration 51435/56600 Training loss: 0.9679 0.4670 sec/batch
Epoch 91/100  Iteration 51436/56600 Training loss: 0.9679 0.4629 sec/batch
Epoch 91/100  Iteration 51437/56600 Training loss: 0.9678 0.4635 sec/batch
Epoch 91/100  Iteration 51438/56600 Training loss: 0.9679 0.4677 sec/batch
Epoch 91/100  Iteration 51439/56600 Training loss: 0.9678 0.4748 sec/batch
Epoch 91/100  Iteration 51440/56600 Training loss: 0.9678 0.4807 sec/batch
Epoch 91/100  Iteration 5

Epoch 92/100  Iteration 51538/56600 Training loss: 0.9662 0.4637 sec/batch
Epoch 92/100  Iteration 51539/56600 Training loss: 0.9665 0.4700 sec/batch
Epoch 92/100  Iteration 51540/56600 Training loss: 0.9682 0.4738 sec/batch
Epoch 92/100  Iteration 51541/56600 Training loss: 0.9706 0.4629 sec/batch
Epoch 92/100  Iteration 51542/56600 Training loss: 0.9717 0.4787 sec/batch
Epoch 92/100  Iteration 51543/56600 Training loss: 0.9728 0.4626 sec/batch
Epoch 92/100  Iteration 51544/56600 Training loss: 0.9735 0.4740 sec/batch
Epoch 92/100  Iteration 51545/56600 Training loss: 0.9734 0.4642 sec/batch
Epoch 92/100  Iteration 51546/56600 Training loss: 0.9738 0.4635 sec/batch
Epoch 92/100  Iteration 51547/56600 Training loss: 0.9741 0.4631 sec/batch
Epoch 92/100  Iteration 51548/56600 Training loss: 0.9749 0.4632 sec/batch
Epoch 92/100  Iteration 51549/56600 Training loss: 0.9745 0.4638 sec/batch
Epoch 92/100  Iteration 51550/56600 Training loss: 0.9745 0.4749 sec/batch
Epoch 92/100  Iteration 5

Epoch 92/100  Iteration 51648/56600 Training loss: 0.9643 0.4640 sec/batch
Epoch 92/100  Iteration 51649/56600 Training loss: 0.9645 0.4634 sec/batch
Epoch 92/100  Iteration 51650/56600 Training loss: 0.9644 0.4699 sec/batch
Epoch 92/100  Iteration 51651/56600 Training loss: 0.9647 0.4910 sec/batch
Epoch 92/100  Iteration 51652/56600 Training loss: 0.9647 0.4606 sec/batch
Epoch 92/100  Iteration 51653/56600 Training loss: 0.9645 0.4630 sec/batch
Epoch 92/100  Iteration 51654/56600 Training loss: 0.9647 0.4728 sec/batch
Epoch 92/100  Iteration 51655/56600 Training loss: 0.9649 0.4804 sec/batch
Epoch 92/100  Iteration 51656/56600 Training loss: 0.9650 0.4797 sec/batch
Epoch 92/100  Iteration 51657/56600 Training loss: 0.9654 0.4789 sec/batch
Epoch 92/100  Iteration 51658/56600 Training loss: 0.9658 0.4713 sec/batch
Epoch 92/100  Iteration 51659/56600 Training loss: 0.9660 0.4713 sec/batch
Epoch 92/100  Iteration 51660/56600 Training loss: 0.9661 0.4751 sec/batch
Epoch 92/100  Iteration 5

Epoch 92/100  Iteration 51758/56600 Training loss: 0.9720 0.4742 sec/batch
Epoch 92/100  Iteration 51759/56600 Training loss: 0.9720 0.4844 sec/batch
Epoch 92/100  Iteration 51760/56600 Training loss: 0.9720 0.4583 sec/batch
Epoch 92/100  Iteration 51761/56600 Training loss: 0.9719 0.4634 sec/batch
Epoch 92/100  Iteration 51762/56600 Training loss: 0.9719 0.4791 sec/batch
Epoch 92/100  Iteration 51763/56600 Training loss: 0.9718 0.4746 sec/batch
Epoch 92/100  Iteration 51764/56600 Training loss: 0.9717 0.4632 sec/batch
Epoch 92/100  Iteration 51765/56600 Training loss: 0.9715 0.4792 sec/batch
Epoch 92/100  Iteration 51766/56600 Training loss: 0.9713 0.4797 sec/batch
Epoch 92/100  Iteration 51767/56600 Training loss: 0.9712 0.4734 sec/batch
Epoch 92/100  Iteration 51768/56600 Training loss: 0.9711 0.4880 sec/batch
Epoch 92/100  Iteration 51769/56600 Training loss: 0.9711 0.4703 sec/batch
Epoch 92/100  Iteration 51770/56600 Training loss: 0.9711 0.4702 sec/batch
Epoch 92/100  Iteration 5

Epoch 92/100  Iteration 51868/56600 Training loss: 0.9690 0.4795 sec/batch
Epoch 92/100  Iteration 51869/56600 Training loss: 0.9690 0.4634 sec/batch
Epoch 92/100  Iteration 51870/56600 Training loss: 0.9690 0.4734 sec/batch
Epoch 92/100  Iteration 51871/56600 Training loss: 0.9690 0.5019 sec/batch
Epoch 92/100  Iteration 51872/56600 Training loss: 0.9689 0.4688 sec/batch
Epoch 92/100  Iteration 51873/56600 Training loss: 0.9689 0.4710 sec/batch
Epoch 92/100  Iteration 51874/56600 Training loss: 0.9688 0.4722 sec/batch
Epoch 92/100  Iteration 51875/56600 Training loss: 0.9687 0.4864 sec/batch
Epoch 92/100  Iteration 51876/56600 Training loss: 0.9686 0.4817 sec/batch
Epoch 92/100  Iteration 51877/56600 Training loss: 0.9685 0.4784 sec/batch
Epoch 92/100  Iteration 51878/56600 Training loss: 0.9684 0.4794 sec/batch
Epoch 92/100  Iteration 51879/56600 Training loss: 0.9684 0.4794 sec/batch
Epoch 92/100  Iteration 51880/56600 Training loss: 0.9683 0.4788 sec/batch
Epoch 92/100  Iteration 5

Epoch 92/100  Iteration 51978/56600 Training loss: 0.9667 0.4798 sec/batch
Epoch 92/100  Iteration 51979/56600 Training loss: 0.9667 0.4784 sec/batch
Epoch 92/100  Iteration 51980/56600 Training loss: 0.9667 0.4797 sec/batch
Epoch 92/100  Iteration 51981/56600 Training loss: 0.9666 0.4950 sec/batch
Epoch 92/100  Iteration 51982/56600 Training loss: 0.9666 0.4785 sec/batch
Epoch 92/100  Iteration 51983/56600 Training loss: 0.9666 0.4798 sec/batch
Epoch 92/100  Iteration 51984/56600 Training loss: 0.9666 0.4853 sec/batch
Epoch 92/100  Iteration 51985/56600 Training loss: 0.9666 0.4706 sec/batch
Epoch 92/100  Iteration 51986/56600 Training loss: 0.9666 0.4897 sec/batch
Epoch 92/100  Iteration 51987/56600 Training loss: 0.9666 0.4794 sec/batch
Epoch 92/100  Iteration 51988/56600 Training loss: 0.9666 0.4788 sec/batch
Epoch 92/100  Iteration 51989/56600 Training loss: 0.9666 0.4638 sec/batch
Epoch 92/100  Iteration 51990/56600 Training loss: 0.9666 0.4730 sec/batch
Epoch 92/100  Iteration 5

Epoch 93/100  Iteration 52087/56600 Training loss: 0.9882 0.4627 sec/batch
Epoch 93/100  Iteration 52088/56600 Training loss: 0.9872 0.4764 sec/batch
Epoch 93/100  Iteration 52089/56600 Training loss: 0.9852 0.4655 sec/batch
Epoch 93/100  Iteration 52090/56600 Training loss: 0.9828 0.4796 sec/batch
Epoch 93/100  Iteration 52091/56600 Training loss: 0.9800 0.4812 sec/batch
Epoch 93/100  Iteration 52092/56600 Training loss: 0.9780 0.4615 sec/batch
Epoch 93/100  Iteration 52093/56600 Training loss: 0.9765 0.4636 sec/batch
Epoch 93/100  Iteration 52094/56600 Training loss: 0.9748 0.4788 sec/batch
Epoch 93/100  Iteration 52095/56600 Training loss: 0.9735 0.4810 sec/batch
Epoch 93/100  Iteration 52096/56600 Training loss: 0.9727 0.4735 sec/batch
Epoch 93/100  Iteration 52097/56600 Training loss: 0.9714 0.4622 sec/batch
Epoch 93/100  Iteration 52098/56600 Training loss: 0.9699 0.4728 sec/batch
Epoch 93/100  Iteration 52099/56600 Training loss: 0.9698 0.4834 sec/batch
Epoch 93/100  Iteration 5

Epoch 93/100  Iteration 52197/56600 Training loss: 0.9633 0.4824 sec/batch
Epoch 93/100  Iteration 52198/56600 Training loss: 0.9635 0.4829 sec/batch
Epoch 93/100  Iteration 52199/56600 Training loss: 0.9636 0.4755 sec/batch
Epoch 93/100  Iteration 52200/56600 Training loss: 0.9637 0.4897 sec/batch
Epoch 93/100  Iteration 52201/56600 Training loss: 0.9639 0.4844 sec/batch
Epoch 93/100  Iteration 52202/56600 Training loss: 0.9639 0.4793 sec/batch
Epoch 93/100  Iteration 52203/56600 Training loss: 0.9640 0.4872 sec/batch
Epoch 93/100  Iteration 52204/56600 Training loss: 0.9639 0.4688 sec/batch
Epoch 93/100  Iteration 52205/56600 Training loss: 0.9638 0.4792 sec/batch
Epoch 93/100  Iteration 52206/56600 Training loss: 0.9638 0.4798 sec/batch
Epoch 93/100  Iteration 52207/56600 Training loss: 0.9635 0.4788 sec/batch
Epoch 93/100  Iteration 52208/56600 Training loss: 0.9633 0.4788 sec/batch
Epoch 93/100  Iteration 52209/56600 Training loss: 0.9633 0.4744 sec/batch
Epoch 93/100  Iteration 5

Epoch 93/100  Iteration 52307/56600 Training loss: 0.9703 0.4795 sec/batch
Epoch 93/100  Iteration 52308/56600 Training loss: 0.9704 0.4692 sec/batch
Epoch 93/100  Iteration 52309/56600 Training loss: 0.9704 0.4776 sec/batch
Epoch 93/100  Iteration 52310/56600 Training loss: 0.9705 0.4776 sec/batch
Epoch 93/100  Iteration 52311/56600 Training loss: 0.9706 0.4729 sec/batch
Epoch 93/100  Iteration 52312/56600 Training loss: 0.9708 0.4800 sec/batch
Epoch 93/100  Iteration 52313/56600 Training loss: 0.9710 0.4748 sec/batch
Epoch 93/100  Iteration 52314/56600 Training loss: 0.9712 0.4783 sec/batch
Epoch 93/100  Iteration 52315/56600 Training loss: 0.9714 0.4907 sec/batch
Epoch 93/100  Iteration 52316/56600 Training loss: 0.9716 0.4788 sec/batch
Epoch 93/100  Iteration 52317/56600 Training loss: 0.9719 0.4790 sec/batch
Epoch 93/100  Iteration 52318/56600 Training loss: 0.9720 0.4687 sec/batch
Epoch 93/100  Iteration 52319/56600 Training loss: 0.9721 0.4877 sec/batch
Epoch 93/100  Iteration 5

Epoch 93/100  Iteration 52417/56600 Training loss: 0.9694 0.4769 sec/batch
Epoch 93/100  Iteration 52418/56600 Training loss: 0.9694 0.4815 sec/batch
Epoch 93/100  Iteration 52419/56600 Training loss: 0.9693 0.4785 sec/batch
Epoch 93/100  Iteration 52420/56600 Training loss: 0.9693 0.4792 sec/batch
Epoch 93/100  Iteration 52421/56600 Training loss: 0.9692 0.4792 sec/batch
Epoch 93/100  Iteration 52422/56600 Training loss: 0.9691 0.4864 sec/batch
Epoch 93/100  Iteration 52423/56600 Training loss: 0.9690 0.4769 sec/batch
Epoch 93/100  Iteration 52424/56600 Training loss: 0.9688 0.4819 sec/batch
Epoch 93/100  Iteration 52425/56600 Training loss: 0.9689 0.4873 sec/batch
Epoch 93/100  Iteration 52426/56600 Training loss: 0.9688 0.4866 sec/batch
Epoch 93/100  Iteration 52427/56600 Training loss: 0.9689 0.4747 sec/batch
Epoch 93/100  Iteration 52428/56600 Training loss: 0.9689 0.4780 sec/batch
Epoch 93/100  Iteration 52429/56600 Training loss: 0.9690 0.4888 sec/batch
Epoch 93/100  Iteration 5

Epoch 93/100  Iteration 52527/56600 Training loss: 0.9661 0.4797 sec/batch
Epoch 93/100  Iteration 52528/56600 Training loss: 0.9661 0.4921 sec/batch
Epoch 93/100  Iteration 52529/56600 Training loss: 0.9660 0.4627 sec/batch
Epoch 93/100  Iteration 52530/56600 Training loss: 0.9660 0.4769 sec/batch
Epoch 93/100  Iteration 52531/56600 Training loss: 0.9659 0.4783 sec/batch
Epoch 93/100  Iteration 52532/56600 Training loss: 0.9660 0.4793 sec/batch
Epoch 93/100  Iteration 52533/56600 Training loss: 0.9660 0.4730 sec/batch
Epoch 93/100  Iteration 52534/56600 Training loss: 0.9660 0.4864 sec/batch
Epoch 93/100  Iteration 52535/56600 Training loss: 0.9660 0.4784 sec/batch
Epoch 93/100  Iteration 52536/56600 Training loss: 0.9659 0.4786 sec/batch
Epoch 93/100  Iteration 52537/56600 Training loss: 0.9659 0.4739 sec/batch
Epoch 93/100  Iteration 52538/56600 Training loss: 0.9659 0.4844 sec/batch
Epoch 93/100  Iteration 52539/56600 Training loss: 0.9660 0.4797 sec/batch
Epoch 93/100  Iteration 5

Epoch 93/100  Iteration 52637/56600 Training loss: 0.9659 0.4795 sec/batch
Epoch 93/100  Iteration 52638/56600 Training loss: 0.9659 0.4799 sec/batch
Epoch 94/100  Iteration 52639/56600 Training loss: 1.1058 0.4779 sec/batch
Epoch 94/100  Iteration 52640/56600 Training loss: 1.0558 0.4877 sec/batch
Epoch 94/100  Iteration 52641/56600 Training loss: 1.0420 0.4643 sec/batch
Epoch 94/100  Iteration 52642/56600 Training loss: 1.0299 0.4653 sec/batch
Epoch 94/100  Iteration 52643/56600 Training loss: 1.0141 0.4714 sec/batch
Epoch 94/100  Iteration 52644/56600 Training loss: 1.0079 0.4724 sec/batch
Epoch 94/100  Iteration 52645/56600 Training loss: 1.0034 0.4861 sec/batch
Epoch 94/100  Iteration 52646/56600 Training loss: 0.9963 0.4811 sec/batch
Epoch 94/100  Iteration 52647/56600 Training loss: 0.9943 0.4887 sec/batch
Epoch 94/100  Iteration 52648/56600 Training loss: 0.9925 0.4698 sec/batch
Epoch 94/100  Iteration 52649/56600 Training loss: 0.9919 0.4797 sec/batch
Epoch 94/100  Iteration 5

Epoch 94/100  Iteration 52747/56600 Training loss: 0.9573 0.4779 sec/batch
Epoch 94/100  Iteration 52748/56600 Training loss: 0.9574 0.4782 sec/batch
Epoch 94/100  Iteration 52749/56600 Training loss: 0.9575 0.4754 sec/batch
Epoch 94/100  Iteration 52750/56600 Training loss: 0.9574 0.4793 sec/batch
Epoch 94/100  Iteration 52751/56600 Training loss: 0.9579 0.4689 sec/batch
Epoch 94/100  Iteration 52752/56600 Training loss: 0.9583 0.4787 sec/batch
Epoch 94/100  Iteration 52753/56600 Training loss: 0.9586 0.4842 sec/batch
Epoch 94/100  Iteration 52754/56600 Training loss: 0.9589 0.4798 sec/batch
Epoch 94/100  Iteration 52755/56600 Training loss: 0.9596 0.4736 sec/batch
Epoch 94/100  Iteration 52756/56600 Training loss: 0.9599 0.4790 sec/batch
Epoch 94/100  Iteration 52757/56600 Training loss: 0.9601 0.4631 sec/batch
Epoch 94/100  Iteration 52758/56600 Training loss: 0.9607 0.4643 sec/batch
Epoch 94/100  Iteration 52759/56600 Training loss: 0.9614 0.4787 sec/batch
Epoch 94/100  Iteration 5

Epoch 94/100  Iteration 52857/56600 Training loss: 0.9674 0.4760 sec/batch
Epoch 94/100  Iteration 52858/56600 Training loss: 0.9676 0.4810 sec/batch
Epoch 94/100  Iteration 52859/56600 Training loss: 0.9676 0.4788 sec/batch
Epoch 94/100  Iteration 52860/56600 Training loss: 0.9676 0.4802 sec/batch
Epoch 94/100  Iteration 52861/56600 Training loss: 0.9676 0.4873 sec/batch
Epoch 94/100  Iteration 52862/56600 Training loss: 0.9678 0.5021 sec/batch
Epoch 94/100  Iteration 52863/56600 Training loss: 0.9680 0.4739 sec/batch
Epoch 94/100  Iteration 52864/56600 Training loss: 0.9681 0.4787 sec/batch
Epoch 94/100  Iteration 52865/56600 Training loss: 0.9682 0.4796 sec/batch
Epoch 94/100  Iteration 52866/56600 Training loss: 0.9681 0.4874 sec/batch
Epoch 94/100  Iteration 52867/56600 Training loss: 0.9682 0.4800 sec/batch
Epoch 94/100  Iteration 52868/56600 Training loss: 0.9684 0.4835 sec/batch
Epoch 94/100  Iteration 52869/56600 Training loss: 0.9686 0.4844 sec/batch
Epoch 94/100  Iteration 5

Epoch 94/100  Iteration 52967/56600 Training loss: 0.9683 0.4787 sec/batch
Epoch 94/100  Iteration 52968/56600 Training loss: 0.9683 0.4684 sec/batch
Epoch 94/100  Iteration 52969/56600 Training loss: 0.9683 0.4749 sec/batch
Epoch 94/100  Iteration 52970/56600 Training loss: 0.9683 0.4947 sec/batch
Epoch 94/100  Iteration 52971/56600 Training loss: 0.9684 0.4788 sec/batch
Epoch 94/100  Iteration 52972/56600 Training loss: 0.9683 0.4830 sec/batch
Epoch 94/100  Iteration 52973/56600 Training loss: 0.9683 0.4687 sec/batch
Epoch 94/100  Iteration 52974/56600 Training loss: 0.9683 0.4791 sec/batch
Epoch 94/100  Iteration 52975/56600 Training loss: 0.9683 0.4790 sec/batch
Epoch 94/100  Iteration 52976/56600 Training loss: 0.9683 0.4716 sec/batch
Epoch 94/100  Iteration 52977/56600 Training loss: 0.9683 0.4866 sec/batch
Epoch 94/100  Iteration 52978/56600 Training loss: 0.9683 0.4894 sec/batch
Epoch 94/100  Iteration 52979/56600 Training loss: 0.9681 0.4638 sec/batch
Epoch 94/100  Iteration 5

Epoch 94/100  Iteration 53077/56600 Training loss: 0.9652 0.4867 sec/batch
Epoch 94/100  Iteration 53078/56600 Training loss: 0.9652 0.4874 sec/batch
Epoch 94/100  Iteration 53079/56600 Training loss: 0.9651 0.4899 sec/batch
Epoch 94/100  Iteration 53080/56600 Training loss: 0.9650 0.4673 sec/batch
Epoch 94/100  Iteration 53081/56600 Training loss: 0.9651 0.4712 sec/batch
Epoch 94/100  Iteration 53082/56600 Training loss: 0.9651 0.4804 sec/batch
Epoch 94/100  Iteration 53083/56600 Training loss: 0.9650 0.4895 sec/batch
Epoch 94/100  Iteration 53084/56600 Training loss: 0.9649 0.4825 sec/batch
Epoch 94/100  Iteration 53085/56600 Training loss: 0.9648 0.4800 sec/batch
Epoch 94/100  Iteration 53086/56600 Training loss: 0.9648 0.4944 sec/batch
Epoch 94/100  Iteration 53087/56600 Training loss: 0.9648 0.4805 sec/batch
Epoch 94/100  Iteration 53088/56600 Training loss: 0.9648 0.4937 sec/batch
Epoch 94/100  Iteration 53089/56600 Training loss: 0.9648 0.4777 sec/batch
Epoch 94/100  Iteration 5

Epoch 94/100  Iteration 53187/56600 Training loss: 0.9643 0.4815 sec/batch
Epoch 94/100  Iteration 53188/56600 Training loss: 0.9642 0.4784 sec/batch
Epoch 94/100  Iteration 53189/56600 Training loss: 0.9642 0.4804 sec/batch
Epoch 94/100  Iteration 53190/56600 Training loss: 0.9642 0.4801 sec/batch
Epoch 94/100  Iteration 53191/56600 Training loss: 0.9642 0.4925 sec/batch
Epoch 94/100  Iteration 53192/56600 Training loss: 0.9642 0.4771 sec/batch
Epoch 94/100  Iteration 53193/56600 Training loss: 0.9641 0.4703 sec/batch
Epoch 94/100  Iteration 53194/56600 Training loss: 0.9641 0.4930 sec/batch
Epoch 94/100  Iteration 53195/56600 Training loss: 0.9641 0.4654 sec/batch
Epoch 94/100  Iteration 53196/56600 Training loss: 0.9642 0.4819 sec/batch
Epoch 94/100  Iteration 53197/56600 Training loss: 0.9642 0.4787 sec/batch
Epoch 94/100  Iteration 53198/56600 Training loss: 0.9642 0.4794 sec/batch
Epoch 94/100  Iteration 53199/56600 Training loss: 0.9643 0.4790 sec/batch
Epoch 94/100  Iteration 5

Epoch 95/100  Iteration 53297/56600 Training loss: 0.9578 0.4788 sec/batch
Epoch 95/100  Iteration 53298/56600 Training loss: 0.9578 0.4803 sec/batch
Epoch 95/100  Iteration 53299/56600 Training loss: 0.9576 0.4711 sec/batch
Epoch 95/100  Iteration 53300/56600 Training loss: 0.9573 0.4706 sec/batch
Epoch 95/100  Iteration 53301/56600 Training loss: 0.9570 0.4629 sec/batch
Epoch 95/100  Iteration 53302/56600 Training loss: 0.9568 0.4640 sec/batch
Epoch 95/100  Iteration 53303/56600 Training loss: 0.9570 0.4647 sec/batch
Epoch 95/100  Iteration 53304/56600 Training loss: 0.9575 0.4776 sec/batch
Epoch 95/100  Iteration 53305/56600 Training loss: 0.9575 0.4813 sec/batch
Epoch 95/100  Iteration 53306/56600 Training loss: 0.9579 0.4619 sec/batch
Epoch 95/100  Iteration 53307/56600 Training loss: 0.9578 0.4790 sec/batch
Epoch 95/100  Iteration 53308/56600 Training loss: 0.9579 0.4720 sec/batch
Epoch 95/100  Iteration 53309/56600 Training loss: 0.9579 0.4823 sec/batch
Epoch 95/100  Iteration 5

Epoch 95/100  Iteration 53407/56600 Training loss: 0.9669 0.4919 sec/batch
Epoch 95/100  Iteration 53408/56600 Training loss: 0.9669 0.4862 sec/batch
Epoch 95/100  Iteration 53409/56600 Training loss: 0.9671 0.4803 sec/batch
Epoch 95/100  Iteration 53410/56600 Training loss: 0.9673 0.4881 sec/batch
Epoch 95/100  Iteration 53411/56600 Training loss: 0.9673 0.4677 sec/batch
Epoch 95/100  Iteration 53412/56600 Training loss: 0.9672 0.4778 sec/batch
Epoch 95/100  Iteration 53413/56600 Training loss: 0.9672 0.4754 sec/batch
Epoch 95/100  Iteration 53414/56600 Training loss: 0.9672 0.4788 sec/batch
Epoch 95/100  Iteration 53415/56600 Training loss: 0.9672 0.4880 sec/batch
Epoch 95/100  Iteration 53416/56600 Training loss: 0.9671 0.4808 sec/batch
Epoch 95/100  Iteration 53417/56600 Training loss: 0.9669 0.4760 sec/batch
Epoch 95/100  Iteration 53418/56600 Training loss: 0.9668 0.4826 sec/batch
Epoch 95/100  Iteration 53419/56600 Training loss: 0.9668 0.4843 sec/batch
Epoch 95/100  Iteration 5

Epoch 95/100  Iteration 53517/56600 Training loss: 0.9682 0.4764 sec/batch
Epoch 95/100  Iteration 53518/56600 Training loss: 0.9681 0.4691 sec/batch
Epoch 95/100  Iteration 53519/56600 Training loss: 0.9681 0.4735 sec/batch
Epoch 95/100  Iteration 53520/56600 Training loss: 0.9681 0.4739 sec/batch
Epoch 95/100  Iteration 53521/56600 Training loss: 0.9682 0.4791 sec/batch
Epoch 95/100  Iteration 53522/56600 Training loss: 0.9683 0.4819 sec/batch
Epoch 95/100  Iteration 53523/56600 Training loss: 0.9682 0.4769 sec/batch
Epoch 95/100  Iteration 53524/56600 Training loss: 0.9681 0.4951 sec/batch
Epoch 95/100  Iteration 53525/56600 Training loss: 0.9681 0.4883 sec/batch
Epoch 95/100  Iteration 53526/56600 Training loss: 0.9680 0.4771 sec/batch
Epoch 95/100  Iteration 53527/56600 Training loss: 0.9678 0.4660 sec/batch
Epoch 95/100  Iteration 53528/56600 Training loss: 0.9677 0.4609 sec/batch
Epoch 95/100  Iteration 53529/56600 Training loss: 0.9677 0.4657 sec/batch
Epoch 95/100  Iteration 5

Epoch 95/100  Iteration 53627/56600 Training loss: 0.9653 0.4632 sec/batch
Epoch 95/100  Iteration 53628/56600 Training loss: 0.9654 0.4631 sec/batch
Epoch 95/100  Iteration 53629/56600 Training loss: 0.9653 0.4636 sec/batch
Epoch 95/100  Iteration 53630/56600 Training loss: 0.9653 0.4725 sec/batch
Epoch 95/100  Iteration 53631/56600 Training loss: 0.9652 0.4792 sec/batch
Epoch 95/100  Iteration 53632/56600 Training loss: 0.9651 0.4758 sec/batch
Epoch 95/100  Iteration 53633/56600 Training loss: 0.9651 0.4736 sec/batch
Epoch 95/100  Iteration 53634/56600 Training loss: 0.9650 0.4857 sec/batch
Epoch 95/100  Iteration 53635/56600 Training loss: 0.9650 0.4836 sec/batch
Epoch 95/100  Iteration 53636/56600 Training loss: 0.9649 0.4688 sec/batch
Epoch 95/100  Iteration 53637/56600 Training loss: 0.9648 0.4631 sec/batch
Epoch 95/100  Iteration 53638/56600 Training loss: 0.9647 0.4640 sec/batch
Epoch 95/100  Iteration 53639/56600 Training loss: 0.9647 0.4737 sec/batch
Epoch 95/100  Iteration 5

Epoch 95/100  Iteration 53737/56600 Training loss: 0.9639 0.4848 sec/batch
Epoch 95/100  Iteration 53738/56600 Training loss: 0.9638 0.4853 sec/batch
Epoch 95/100  Iteration 53739/56600 Training loss: 0.9637 0.4688 sec/batch
Epoch 95/100  Iteration 53740/56600 Training loss: 0.9638 0.4762 sec/batch
Epoch 95/100  Iteration 53741/56600 Training loss: 0.9638 0.4770 sec/batch
Epoch 95/100  Iteration 53742/56600 Training loss: 0.9637 0.4876 sec/batch
Epoch 95/100  Iteration 53743/56600 Training loss: 0.9637 0.4864 sec/batch
Epoch 95/100  Iteration 53744/56600 Training loss: 0.9637 0.4802 sec/batch
Epoch 95/100  Iteration 53745/56600 Training loss: 0.9637 0.4783 sec/batch
Epoch 95/100  Iteration 53746/56600 Training loss: 0.9636 0.4788 sec/batch
Epoch 95/100  Iteration 53747/56600 Training loss: 0.9636 0.4905 sec/batch
Epoch 95/100  Iteration 53748/56600 Training loss: 0.9636 0.4780 sec/batch
Epoch 95/100  Iteration 53749/56600 Training loss: 0.9636 0.4844 sec/batch
Epoch 95/100  Iteration 5

Epoch 96/100  Iteration 53847/56600 Training loss: 0.9564 0.4952 sec/batch
Epoch 96/100  Iteration 53848/56600 Training loss: 0.9564 0.4893 sec/batch
Epoch 96/100  Iteration 53849/56600 Training loss: 0.9562 0.4778 sec/batch
Epoch 96/100  Iteration 53850/56600 Training loss: 0.9560 0.4866 sec/batch
Epoch 96/100  Iteration 53851/56600 Training loss: 0.9559 0.4789 sec/batch
Epoch 96/100  Iteration 53852/56600 Training loss: 0.9559 0.4789 sec/batch
Epoch 96/100  Iteration 53853/56600 Training loss: 0.9562 0.4909 sec/batch
Epoch 96/100  Iteration 53854/56600 Training loss: 0.9564 0.4789 sec/batch
Epoch 96/100  Iteration 53855/56600 Training loss: 0.9559 0.4807 sec/batch
Epoch 96/100  Iteration 53856/56600 Training loss: 0.9556 0.4597 sec/batch
Epoch 96/100  Iteration 53857/56600 Training loss: 0.9551 0.4886 sec/batch
Epoch 96/100  Iteration 53858/56600 Training loss: 0.9548 0.4646 sec/batch
Epoch 96/100  Iteration 53859/56600 Training loss: 0.9550 0.4792 sec/batch
Epoch 96/100  Iteration 5

Epoch 96/100  Iteration 53957/56600 Training loss: 0.9637 0.4796 sec/batch
Epoch 96/100  Iteration 53958/56600 Training loss: 0.9639 0.4997 sec/batch
Epoch 96/100  Iteration 53959/56600 Training loss: 0.9638 0.4892 sec/batch
Epoch 96/100  Iteration 53960/56600 Training loss: 0.9639 0.4962 sec/batch
Epoch 96/100  Iteration 53961/56600 Training loss: 0.9638 0.4784 sec/batch
Epoch 96/100  Iteration 53962/56600 Training loss: 0.9638 0.4844 sec/batch
Epoch 96/100  Iteration 53963/56600 Training loss: 0.9639 0.4787 sec/batch
Epoch 96/100  Iteration 53964/56600 Training loss: 0.9641 0.4970 sec/batch
Epoch 96/100  Iteration 53965/56600 Training loss: 0.9643 0.4786 sec/batch
Epoch 96/100  Iteration 53966/56600 Training loss: 0.9643 0.4797 sec/batch
Epoch 96/100  Iteration 53967/56600 Training loss: 0.9645 0.4788 sec/batch
Epoch 96/100  Iteration 53968/56600 Training loss: 0.9646 0.4789 sec/batch
Epoch 96/100  Iteration 53969/56600 Training loss: 0.9647 0.4741 sec/batch
Epoch 96/100  Iteration 5

Epoch 96/100  Iteration 54066/56600 Training loss: 0.9687 0.4790 sec/batch
Epoch 96/100  Iteration 54067/56600 Training loss: 0.9687 0.4744 sec/batch
Epoch 96/100  Iteration 54068/56600 Training loss: 0.9688 0.4783 sec/batch
Epoch 96/100  Iteration 54069/56600 Training loss: 0.9688 0.4637 sec/batch
Epoch 96/100  Iteration 54070/56600 Training loss: 0.9687 0.4636 sec/batch
Epoch 96/100  Iteration 54071/56600 Training loss: 0.9686 0.4628 sec/batch
Epoch 96/100  Iteration 54072/56600 Training loss: 0.9687 0.4700 sec/batch
Epoch 96/100  Iteration 54073/56600 Training loss: 0.9687 0.4788 sec/batch
Epoch 96/100  Iteration 54074/56600 Training loss: 0.9686 0.4806 sec/batch
Epoch 96/100  Iteration 54075/56600 Training loss: 0.9686 0.4779 sec/batch
Epoch 96/100  Iteration 54076/56600 Training loss: 0.9685 0.4634 sec/batch
Epoch 96/100  Iteration 54077/56600 Training loss: 0.9684 0.4739 sec/batch
Epoch 96/100  Iteration 54078/56600 Training loss: 0.9682 0.4736 sec/batch
Epoch 96/100  Iteration 5

Epoch 96/100  Iteration 54176/56600 Training loss: 0.9655 0.4768 sec/batch
Epoch 96/100  Iteration 54177/56600 Training loss: 0.9655 0.4643 sec/batch
Epoch 96/100  Iteration 54178/56600 Training loss: 0.9654 0.4722 sec/batch
Epoch 96/100  Iteration 54179/56600 Training loss: 0.9653 0.4737 sec/batch
Epoch 96/100  Iteration 54180/56600 Training loss: 0.9652 0.4746 sec/batch
Epoch 96/100  Iteration 54181/56600 Training loss: 0.9651 0.4699 sec/batch
Epoch 96/100  Iteration 54182/56600 Training loss: 0.9651 0.4881 sec/batch
Epoch 96/100  Iteration 54183/56600 Training loss: 0.9650 0.4834 sec/batch
Epoch 96/100  Iteration 54184/56600 Training loss: 0.9650 0.4586 sec/batch
Epoch 96/100  Iteration 54185/56600 Training loss: 0.9650 0.4731 sec/batch
Epoch 96/100  Iteration 54186/56600 Training loss: 0.9650 0.4653 sec/batch
Epoch 96/100  Iteration 54187/56600 Training loss: 0.9650 0.4626 sec/batch
Epoch 96/100  Iteration 54188/56600 Training loss: 0.9650 0.4643 sec/batch
Epoch 96/100  Iteration 5

Epoch 96/100  Iteration 54286/56600 Training loss: 0.9631 0.4716 sec/batch
Epoch 96/100  Iteration 54287/56600 Training loss: 0.9632 0.4760 sec/batch
Epoch 96/100  Iteration 54288/56600 Training loss: 0.9632 0.4801 sec/batch
Epoch 96/100  Iteration 54289/56600 Training loss: 0.9632 0.4791 sec/batch
Epoch 96/100  Iteration 54290/56600 Training loss: 0.9631 0.4780 sec/batch
Epoch 96/100  Iteration 54291/56600 Training loss: 0.9632 0.4655 sec/batch
Epoch 96/100  Iteration 54292/56600 Training loss: 0.9632 0.4626 sec/batch
Epoch 96/100  Iteration 54293/56600 Training loss: 0.9632 0.4803 sec/batch
Epoch 96/100  Iteration 54294/56600 Training loss: 0.9633 0.4775 sec/batch
Epoch 96/100  Iteration 54295/56600 Training loss: 0.9633 0.4794 sec/batch
Epoch 96/100  Iteration 54296/56600 Training loss: 0.9633 0.4790 sec/batch
Epoch 96/100  Iteration 54297/56600 Training loss: 0.9634 0.4789 sec/batch
Epoch 96/100  Iteration 54298/56600 Training loss: 0.9634 0.4800 sec/batch
Epoch 96/100  Iteration 5

Epoch 97/100  Iteration 54396/56600 Training loss: 0.9614 0.4680 sec/batch
Epoch 97/100  Iteration 54397/56600 Training loss: 0.9608 0.4892 sec/batch
Epoch 97/100  Iteration 54398/56600 Training loss: 0.9602 0.4673 sec/batch
Epoch 97/100  Iteration 54399/56600 Training loss: 0.9601 0.4819 sec/batch
Epoch 97/100  Iteration 54400/56600 Training loss: 0.9594 0.4717 sec/batch
Epoch 97/100  Iteration 54401/56600 Training loss: 0.9589 0.4703 sec/batch
Epoch 97/100  Iteration 54402/56600 Training loss: 0.9585 0.4759 sec/batch
Epoch 97/100  Iteration 54403/56600 Training loss: 0.9581 0.4627 sec/batch
Epoch 97/100  Iteration 54404/56600 Training loss: 0.9583 0.4640 sec/batch
Epoch 97/100  Iteration 54405/56600 Training loss: 0.9583 0.4774 sec/batch
Epoch 97/100  Iteration 54406/56600 Training loss: 0.9577 0.4756 sec/batch
Epoch 97/100  Iteration 54407/56600 Training loss: 0.9575 0.4630 sec/batch
Epoch 97/100  Iteration 54408/56600 Training loss: 0.9569 0.4690 sec/batch
Epoch 97/100  Iteration 5

Epoch 97/100  Iteration 54506/56600 Training loss: 0.9628 0.4794 sec/batch
Epoch 97/100  Iteration 54507/56600 Training loss: 0.9627 0.4800 sec/batch
Epoch 97/100  Iteration 54508/56600 Training loss: 0.9626 0.4785 sec/batch
Epoch 97/100  Iteration 54509/56600 Training loss: 0.9626 0.4792 sec/batch
Epoch 97/100  Iteration 54510/56600 Training loss: 0.9627 0.4788 sec/batch
Epoch 97/100  Iteration 54511/56600 Training loss: 0.9625 0.4875 sec/batch
Epoch 97/100  Iteration 54512/56600 Training loss: 0.9623 0.4697 sec/batch
Epoch 97/100  Iteration 54513/56600 Training loss: 0.9623 0.4823 sec/batch
Epoch 97/100  Iteration 54514/56600 Training loss: 0.9623 0.4775 sec/batch
Epoch 97/100  Iteration 54515/56600 Training loss: 0.9622 0.4790 sec/batch
Epoch 97/100  Iteration 54516/56600 Training loss: 0.9623 0.4788 sec/batch
Epoch 97/100  Iteration 54517/56600 Training loss: 0.9623 0.4775 sec/batch
Epoch 97/100  Iteration 54518/56600 Training loss: 0.9623 0.4808 sec/batch
Epoch 97/100  Iteration 5

Epoch 97/100  Iteration 54616/56600 Training loss: 0.9658 0.4902 sec/batch
Epoch 97/100  Iteration 54617/56600 Training loss: 0.9658 0.4943 sec/batch
Epoch 97/100  Iteration 54618/56600 Training loss: 0.9657 0.4793 sec/batch
Epoch 97/100  Iteration 54619/56600 Training loss: 0.9657 0.5007 sec/batch
Epoch 97/100  Iteration 54620/56600 Training loss: 0.9656 0.4865 sec/batch
Epoch 97/100  Iteration 54621/56600 Training loss: 0.9656 0.4874 sec/batch
Epoch 97/100  Iteration 54622/56600 Training loss: 0.9657 0.4876 sec/batch
Epoch 97/100  Iteration 54623/56600 Training loss: 0.9659 0.4907 sec/batch
Epoch 97/100  Iteration 54624/56600 Training loss: 0.9659 0.4708 sec/batch
Epoch 97/100  Iteration 54625/56600 Training loss: 0.9658 0.4782 sec/batch
Epoch 97/100  Iteration 54626/56600 Training loss: 0.9659 0.4786 sec/batch
Epoch 97/100  Iteration 54627/56600 Training loss: 0.9660 0.4789 sec/batch
Epoch 97/100  Iteration 54628/56600 Training loss: 0.9662 0.4798 sec/batch
Epoch 97/100  Iteration 5

Epoch 97/100  Iteration 54726/56600 Training loss: 0.9633 0.4759 sec/batch
Epoch 97/100  Iteration 54727/56600 Training loss: 0.9632 0.4839 sec/batch
Epoch 97/100  Iteration 54728/56600 Training loss: 0.9632 0.4800 sec/batch
Epoch 97/100  Iteration 54729/56600 Training loss: 0.9633 0.4789 sec/batch
Epoch 97/100  Iteration 54730/56600 Training loss: 0.9633 0.4882 sec/batch
Epoch 97/100  Iteration 54731/56600 Training loss: 0.9633 0.4800 sec/batch
Epoch 97/100  Iteration 54732/56600 Training loss: 0.9633 0.4801 sec/batch
Epoch 97/100  Iteration 54733/56600 Training loss: 0.9633 0.4783 sec/batch
Epoch 97/100  Iteration 54734/56600 Training loss: 0.9633 0.4792 sec/batch
Epoch 97/100  Iteration 54735/56600 Training loss: 0.9634 0.4789 sec/batch
Epoch 97/100  Iteration 54736/56600 Training loss: 0.9635 0.4861 sec/batch
Epoch 97/100  Iteration 54737/56600 Training loss: 0.9633 0.4713 sec/batch
Epoch 97/100  Iteration 54738/56600 Training loss: 0.9633 0.5020 sec/batch
Epoch 97/100  Iteration 5

Epoch 97/100  Iteration 54836/56600 Training loss: 0.9615 0.4794 sec/batch
Epoch 97/100  Iteration 54837/56600 Training loss: 0.9615 0.4779 sec/batch
Epoch 97/100  Iteration 54838/56600 Training loss: 0.9614 0.4889 sec/batch
Epoch 97/100  Iteration 54839/56600 Training loss: 0.9614 0.4796 sec/batch
Epoch 97/100  Iteration 54840/56600 Training loss: 0.9613 0.4852 sec/batch
Epoch 97/100  Iteration 54841/56600 Training loss: 0.9613 0.4696 sec/batch
Epoch 97/100  Iteration 54842/56600 Training loss: 0.9613 0.4790 sec/batch
Epoch 97/100  Iteration 54843/56600 Training loss: 0.9612 0.4896 sec/batch
Epoch 97/100  Iteration 54844/56600 Training loss: 0.9611 0.4954 sec/batch
Epoch 97/100  Iteration 54845/56600 Training loss: 0.9611 0.4941 sec/batch
Epoch 97/100  Iteration 54846/56600 Training loss: 0.9611 0.4907 sec/batch
Epoch 97/100  Iteration 54847/56600 Training loss: 0.9611 0.4759 sec/batch
Epoch 97/100  Iteration 54848/56600 Training loss: 0.9611 0.4793 sec/batch
Epoch 97/100  Iteration 5

Epoch 98/100  Iteration 54946/56600 Training loss: 0.9686 0.4943 sec/batch
Epoch 98/100  Iteration 54947/56600 Training loss: 0.9692 0.4930 sec/batch
Epoch 98/100  Iteration 54948/56600 Training loss: 0.9694 0.4789 sec/batch
Epoch 98/100  Iteration 54949/56600 Training loss: 0.9685 0.4786 sec/batch
Epoch 98/100  Iteration 54950/56600 Training loss: 0.9685 0.4739 sec/batch
Epoch 98/100  Iteration 54951/56600 Training loss: 0.9680 0.4800 sec/batch
Epoch 98/100  Iteration 54952/56600 Training loss: 0.9670 0.4743 sec/batch
Epoch 98/100  Iteration 54953/56600 Training loss: 0.9663 0.4781 sec/batch
Epoch 98/100  Iteration 54954/56600 Training loss: 0.9652 0.4878 sec/batch
Epoch 98/100  Iteration 54955/56600 Training loss: 0.9646 0.4860 sec/batch
Epoch 98/100  Iteration 54956/56600 Training loss: 0.9641 0.4792 sec/batch
Epoch 98/100  Iteration 54957/56600 Training loss: 0.9632 0.4797 sec/batch
Epoch 98/100  Iteration 54958/56600 Training loss: 0.9625 0.4739 sec/batch
Epoch 98/100  Iteration 5

Epoch 98/100  Iteration 55056/56600 Training loss: 0.9614 0.4858 sec/batch
Epoch 98/100  Iteration 55057/56600 Training loss: 0.9616 0.4880 sec/batch
Epoch 98/100  Iteration 55058/56600 Training loss: 0.9618 0.4770 sec/batch
Epoch 98/100  Iteration 55059/56600 Training loss: 0.9619 0.4817 sec/batch
Epoch 98/100  Iteration 55060/56600 Training loss: 0.9620 0.4874 sec/batch
Epoch 98/100  Iteration 55061/56600 Training loss: 0.9621 0.4814 sec/batch
Epoch 98/100  Iteration 55062/56600 Training loss: 0.9621 0.4784 sec/batch
Epoch 98/100  Iteration 55063/56600 Training loss: 0.9620 0.4844 sec/batch
Epoch 98/100  Iteration 55064/56600 Training loss: 0.9621 0.4798 sec/batch
Epoch 98/100  Iteration 55065/56600 Training loss: 0.9621 0.4752 sec/batch
Epoch 98/100  Iteration 55066/56600 Training loss: 0.9620 0.4790 sec/batch
Epoch 98/100  Iteration 55067/56600 Training loss: 0.9620 0.4742 sec/batch
Epoch 98/100  Iteration 55068/56600 Training loss: 0.9621 0.4797 sec/batch
Epoch 98/100  Iteration 5

Epoch 98/100  Iteration 55166/56600 Training loss: 0.9661 0.4799 sec/batch
Epoch 98/100  Iteration 55167/56600 Training loss: 0.9659 0.4786 sec/batch
Epoch 98/100  Iteration 55168/56600 Training loss: 0.9659 0.4783 sec/batch
Epoch 98/100  Iteration 55169/56600 Training loss: 0.9658 0.4797 sec/batch
Epoch 98/100  Iteration 55170/56600 Training loss: 0.9657 0.4739 sec/batch
Epoch 98/100  Iteration 55171/56600 Training loss: 0.9656 0.4792 sec/batch
Epoch 98/100  Iteration 55172/56600 Training loss: 0.9655 0.4754 sec/batch
Epoch 98/100  Iteration 55173/56600 Training loss: 0.9656 0.4652 sec/batch
Epoch 98/100  Iteration 55174/56600 Training loss: 0.9657 0.4784 sec/batch
Epoch 98/100  Iteration 55175/56600 Training loss: 0.9657 0.4799 sec/batch
Epoch 98/100  Iteration 55176/56600 Training loss: 0.9656 0.4637 sec/batch
Epoch 98/100  Iteration 55177/56600 Training loss: 0.9656 0.4637 sec/batch
Epoch 98/100  Iteration 55178/56600 Training loss: 0.9656 0.4793 sec/batch
Epoch 98/100  Iteration 5

Epoch 98/100  Iteration 55276/56600 Training loss: 0.9630 0.4667 sec/batch
Epoch 98/100  Iteration 55277/56600 Training loss: 0.9629 0.4761 sec/batch
Epoch 98/100  Iteration 55278/56600 Training loss: 0.9629 0.4834 sec/batch
Epoch 98/100  Iteration 55279/56600 Training loss: 0.9630 0.4895 sec/batch
Epoch 98/100  Iteration 55280/56600 Training loss: 0.9629 0.4663 sec/batch
Epoch 98/100  Iteration 55281/56600 Training loss: 0.9629 0.4788 sec/batch
Epoch 98/100  Iteration 55282/56600 Training loss: 0.9629 0.4900 sec/batch
Epoch 98/100  Iteration 55283/56600 Training loss: 0.9629 0.4779 sec/batch
Epoch 98/100  Iteration 55284/56600 Training loss: 0.9629 0.4795 sec/batch
Epoch 98/100  Iteration 55285/56600 Training loss: 0.9628 0.4794 sec/batch
Epoch 98/100  Iteration 55286/56600 Training loss: 0.9628 0.4840 sec/batch
Epoch 98/100  Iteration 55287/56600 Training loss: 0.9628 0.4946 sec/batch
Epoch 98/100  Iteration 55288/56600 Training loss: 0.9629 0.4758 sec/batch
Epoch 98/100  Iteration 5

Epoch 98/100  Iteration 55386/56600 Training loss: 0.9614 0.4794 sec/batch
Epoch 98/100  Iteration 55387/56600 Training loss: 0.9614 0.4793 sec/batch
Epoch 98/100  Iteration 55388/56600 Training loss: 0.9614 0.4878 sec/batch
Epoch 98/100  Iteration 55389/56600 Training loss: 0.9613 0.4668 sec/batch
Epoch 98/100  Iteration 55390/56600 Training loss: 0.9614 0.4643 sec/batch
Epoch 98/100  Iteration 55391/56600 Training loss: 0.9613 0.4680 sec/batch
Epoch 98/100  Iteration 55392/56600 Training loss: 0.9613 0.4769 sec/batch
Epoch 98/100  Iteration 55393/56600 Training loss: 0.9613 0.4753 sec/batch
Epoch 98/100  Iteration 55394/56600 Training loss: 0.9613 0.4794 sec/batch
Epoch 98/100  Iteration 55395/56600 Training loss: 0.9613 0.4747 sec/batch
Epoch 98/100  Iteration 55396/56600 Training loss: 0.9614 0.4785 sec/batch
Epoch 98/100  Iteration 55397/56600 Training loss: 0.9614 0.4749 sec/batch
Epoch 98/100  Iteration 55398/56600 Training loss: 0.9613 0.4627 sec/batch
Epoch 98/100  Iteration 5

Epoch 99/100  Iteration 55496/56600 Training loss: 0.9636 0.4794 sec/batch
Epoch 99/100  Iteration 55497/56600 Training loss: 0.9635 0.4951 sec/batch
Epoch 99/100  Iteration 55498/56600 Training loss: 0.9630 0.4783 sec/batch
Epoch 99/100  Iteration 55499/56600 Training loss: 0.9623 0.4796 sec/batch
Epoch 99/100  Iteration 55500/56600 Training loss: 0.9620 0.4982 sec/batch
Epoch 99/100  Iteration 55501/56600 Training loss: 0.9625 0.4873 sec/batch
Epoch 99/100  Iteration 55502/56600 Training loss: 0.9640 0.4641 sec/batch
Epoch 99/100  Iteration 55503/56600 Training loss: 0.9660 0.4789 sec/batch
Epoch 99/100  Iteration 55504/56600 Training loss: 0.9668 0.4850 sec/batch
Epoch 99/100  Iteration 55505/56600 Training loss: 0.9673 0.4885 sec/batch
Epoch 99/100  Iteration 55506/56600 Training loss: 0.9679 0.4719 sec/batch
Epoch 99/100  Iteration 55507/56600 Training loss: 0.9678 0.4639 sec/batch
Epoch 99/100  Iteration 55508/56600 Training loss: 0.9679 0.4733 sec/batch
Epoch 99/100  Iteration 5

Epoch 99/100  Iteration 55606/56600 Training loss: 0.9577 0.4807 sec/batch
Epoch 99/100  Iteration 55607/56600 Training loss: 0.9579 0.4728 sec/batch
Epoch 99/100  Iteration 55608/56600 Training loss: 0.9579 0.4939 sec/batch
Epoch 99/100  Iteration 55609/56600 Training loss: 0.9580 0.4688 sec/batch
Epoch 99/100  Iteration 55610/56600 Training loss: 0.9582 0.4784 sec/batch
Epoch 99/100  Iteration 55611/56600 Training loss: 0.9585 0.4794 sec/batch
Epoch 99/100  Iteration 55612/56600 Training loss: 0.9585 0.4798 sec/batch
Epoch 99/100  Iteration 55613/56600 Training loss: 0.9586 0.4783 sec/batch
Epoch 99/100  Iteration 55614/56600 Training loss: 0.9585 0.4642 sec/batch
Epoch 99/100  Iteration 55615/56600 Training loss: 0.9584 0.4744 sec/batch
Epoch 99/100  Iteration 55616/56600 Training loss: 0.9586 0.4787 sec/batch
Epoch 99/100  Iteration 55617/56600 Training loss: 0.9588 0.4801 sec/batch
Epoch 99/100  Iteration 55618/56600 Training loss: 0.9590 0.4941 sec/batch
Epoch 99/100  Iteration 5

Epoch 99/100  Iteration 55716/56600 Training loss: 0.9664 0.4785 sec/batch
Epoch 99/100  Iteration 55717/56600 Training loss: 0.9665 0.4832 sec/batch
Epoch 99/100  Iteration 55718/56600 Training loss: 0.9666 0.4683 sec/batch
Epoch 99/100  Iteration 55719/56600 Training loss: 0.9664 0.4815 sec/batch
Epoch 99/100  Iteration 55720/56600 Training loss: 0.9662 0.4722 sec/batch
Epoch 99/100  Iteration 55721/56600 Training loss: 0.9662 0.4859 sec/batch
Epoch 99/100  Iteration 55722/56600 Training loss: 0.9662 0.4742 sec/batch
Epoch 99/100  Iteration 55723/56600 Training loss: 0.9661 0.4790 sec/batch
Epoch 99/100  Iteration 55724/56600 Training loss: 0.9661 0.4715 sec/batch
Epoch 99/100  Iteration 55725/56600 Training loss: 0.9660 0.4715 sec/batch
Epoch 99/100  Iteration 55726/56600 Training loss: 0.9659 0.4892 sec/batch
Epoch 99/100  Iteration 55727/56600 Training loss: 0.9657 0.4849 sec/batch
Epoch 99/100  Iteration 55728/56600 Training loss: 0.9656 0.4723 sec/batch
Epoch 99/100  Iteration 5

Epoch 99/100  Iteration 55826/56600 Training loss: 0.9635 0.4785 sec/batch
Epoch 99/100  Iteration 55827/56600 Training loss: 0.9635 0.4796 sec/batch
Epoch 99/100  Iteration 55828/56600 Training loss: 0.9635 0.4786 sec/batch
Epoch 99/100  Iteration 55829/56600 Training loss: 0.9635 0.4844 sec/batch
Epoch 99/100  Iteration 55830/56600 Training loss: 0.9634 0.4722 sec/batch
Epoch 99/100  Iteration 55831/56600 Training loss: 0.9634 0.4868 sec/batch
Epoch 99/100  Iteration 55832/56600 Training loss: 0.9634 0.4814 sec/batch
Epoch 99/100  Iteration 55833/56600 Training loss: 0.9634 0.4797 sec/batch
Epoch 99/100  Iteration 55834/56600 Training loss: 0.9633 0.4788 sec/batch
Epoch 99/100  Iteration 55835/56600 Training loss: 0.9633 0.4735 sec/batch
Epoch 99/100  Iteration 55836/56600 Training loss: 0.9632 0.4692 sec/batch
Epoch 99/100  Iteration 55837/56600 Training loss: 0.9631 0.4808 sec/batch
Epoch 99/100  Iteration 55838/56600 Training loss: 0.9630 0.4654 sec/batch
Epoch 99/100  Iteration 5

Epoch 99/100  Iteration 55936/56600 Training loss: 0.9607 0.4760 sec/batch
Epoch 99/100  Iteration 55937/56600 Training loss: 0.9607 0.4823 sec/batch
Epoch 99/100  Iteration 55938/56600 Training loss: 0.9608 0.4806 sec/batch
Epoch 99/100  Iteration 55939/56600 Training loss: 0.9609 0.4718 sec/batch
Epoch 99/100  Iteration 55940/56600 Training loss: 0.9610 0.4808 sec/batch
Epoch 99/100  Iteration 55941/56600 Training loss: 0.9611 0.4603 sec/batch
Epoch 99/100  Iteration 55942/56600 Training loss: 0.9611 0.4631 sec/batch
Epoch 99/100  Iteration 55943/56600 Training loss: 0.9610 0.4752 sec/batch
Epoch 99/100  Iteration 55944/56600 Training loss: 0.9610 0.4778 sec/batch
Epoch 99/100  Iteration 55945/56600 Training loss: 0.9610 0.4792 sec/batch
Epoch 99/100  Iteration 55946/56600 Training loss: 0.9610 0.4840 sec/batch
Epoch 99/100  Iteration 55947/56600 Training loss: 0.9610 0.4735 sec/batch
Epoch 99/100  Iteration 55948/56600 Training loss: 0.9610 0.4796 sec/batch
Epoch 99/100  Iteration 5

Epoch 100/100  Iteration 56045/56600 Training loss: 0.9860 0.4786 sec/batch
Epoch 100/100  Iteration 56046/56600 Training loss: 0.9864 0.4794 sec/batch
Epoch 100/100  Iteration 56047/56600 Training loss: 0.9857 0.4729 sec/batch
Epoch 100/100  Iteration 56048/56600 Training loss: 0.9827 0.4802 sec/batch
Epoch 100/100  Iteration 56049/56600 Training loss: 0.9801 0.4754 sec/batch
Epoch 100/100  Iteration 56050/56600 Training loss: 0.9790 0.4672 sec/batch
Epoch 100/100  Iteration 56051/56600 Training loss: 0.9776 0.4731 sec/batch
Epoch 100/100  Iteration 56052/56600 Training loss: 0.9758 0.4641 sec/batch
Epoch 100/100  Iteration 56053/56600 Training loss: 0.9728 0.4741 sec/batch
Epoch 100/100  Iteration 56054/56600 Training loss: 0.9707 0.4789 sec/batch
Epoch 100/100  Iteration 56055/56600 Training loss: 0.9689 0.4792 sec/batch
Epoch 100/100  Iteration 56056/56600 Training loss: 0.9668 0.4845 sec/batch
Epoch 100/100  Iteration 56057/56600 Training loss: 0.9654 0.4736 sec/batch
Epoch 100/10

Epoch 100/100  Iteration 56153/56600 Training loss: 0.9543 0.4799 sec/batch
Epoch 100/100  Iteration 56154/56600 Training loss: 0.9549 0.4770 sec/batch
Epoch 100/100  Iteration 56155/56600 Training loss: 0.9557 0.4647 sec/batch
Epoch 100/100  Iteration 56156/56600 Training loss: 0.9562 0.4764 sec/batch
Epoch 100/100  Iteration 56157/56600 Training loss: 0.9561 0.4771 sec/batch
Epoch 100/100  Iteration 56158/56600 Training loss: 0.9561 0.4795 sec/batch
Epoch 100/100  Iteration 56159/56600 Training loss: 0.9565 0.4612 sec/batch
Epoch 100/100  Iteration 56160/56600 Training loss: 0.9566 0.4799 sec/batch
Epoch 100/100  Iteration 56161/56600 Training loss: 0.9567 0.4788 sec/batch
Epoch 100/100  Iteration 56162/56600 Training loss: 0.9567 0.4899 sec/batch
Epoch 100/100  Iteration 56163/56600 Training loss: 0.9569 0.4878 sec/batch
Epoch 100/100  Iteration 56164/56600 Training loss: 0.9569 0.4853 sec/batch
Epoch 100/100  Iteration 56165/56600 Training loss: 0.9570 0.4709 sec/batch
Epoch 100/10

Epoch 100/100  Iteration 56261/56600 Training loss: 0.9626 0.4663 sec/batch
Epoch 100/100  Iteration 56262/56600 Training loss: 0.9624 0.4799 sec/batch
Epoch 100/100  Iteration 56263/56600 Training loss: 0.9625 0.4838 sec/batch
Epoch 100/100  Iteration 56264/56600 Training loss: 0.9627 0.4720 sec/batch
Epoch 100/100  Iteration 56265/56600 Training loss: 0.9629 0.4607 sec/batch
Epoch 100/100  Iteration 56266/56600 Training loss: 0.9630 0.4633 sec/batch
Epoch 100/100  Iteration 56267/56600 Training loss: 0.9632 0.4791 sec/batch
Epoch 100/100  Iteration 56268/56600 Training loss: 0.9632 0.4862 sec/batch
Epoch 100/100  Iteration 56269/56600 Training loss: 0.9633 0.4838 sec/batch
Epoch 100/100  Iteration 56270/56600 Training loss: 0.9634 0.4688 sec/batch
Epoch 100/100  Iteration 56271/56600 Training loss: 0.9633 0.4800 sec/batch
Epoch 100/100  Iteration 56272/56600 Training loss: 0.9634 0.4783 sec/batch
Epoch 100/100  Iteration 56273/56600 Training loss: 0.9635 0.4747 sec/batch
Epoch 100/10

Epoch 100/100  Iteration 56369/56600 Training loss: 0.9625 0.4927 sec/batch
Epoch 100/100  Iteration 56370/56600 Training loss: 0.9625 0.4808 sec/batch
Epoch 100/100  Iteration 56371/56600 Training loss: 0.9624 0.4851 sec/batch
Epoch 100/100  Iteration 56372/56600 Training loss: 0.9625 0.4888 sec/batch
Epoch 100/100  Iteration 56373/56600 Training loss: 0.9625 0.4782 sec/batch
Epoch 100/100  Iteration 56374/56600 Training loss: 0.9624 0.4793 sec/batch
Epoch 100/100  Iteration 56375/56600 Training loss: 0.9623 0.4793 sec/batch
Epoch 100/100  Iteration 56376/56600 Training loss: 0.9623 0.4789 sec/batch
Epoch 100/100  Iteration 56377/56600 Training loss: 0.9621 0.4723 sec/batch
Epoch 100/100  Iteration 56378/56600 Training loss: 0.9621 0.4810 sec/batch
Epoch 100/100  Iteration 56379/56600 Training loss: 0.9622 0.4763 sec/batch
Epoch 100/100  Iteration 56380/56600 Training loss: 0.9622 0.4885 sec/batch
Epoch 100/100  Iteration 56381/56600 Training loss: 0.9621 0.4986 sec/batch
Epoch 100/10

Epoch 100/100  Iteration 56477/56600 Training loss: 0.9595 0.4844 sec/batch
Epoch 100/100  Iteration 56478/56600 Training loss: 0.9595 0.4746 sec/batch
Epoch 100/100  Iteration 56479/56600 Training loss: 0.9594 0.4834 sec/batch
Epoch 100/100  Iteration 56480/56600 Training loss: 0.9593 0.4793 sec/batch
Epoch 100/100  Iteration 56481/56600 Training loss: 0.9593 0.4636 sec/batch
Epoch 100/100  Iteration 56482/56600 Training loss: 0.9592 0.4716 sec/batch
Epoch 100/100  Iteration 56483/56600 Training loss: 0.9593 0.4657 sec/batch
Epoch 100/100  Iteration 56484/56600 Training loss: 0.9593 0.4785 sec/batch
Epoch 100/100  Iteration 56485/56600 Training loss: 0.9592 0.4793 sec/batch
Epoch 100/100  Iteration 56486/56600 Training loss: 0.9593 0.4857 sec/batch
Epoch 100/100  Iteration 56487/56600 Training loss: 0.9592 0.4709 sec/batch
Epoch 100/100  Iteration 56488/56600 Training loss: 0.9592 0.4728 sec/batch
Epoch 100/100  Iteration 56489/56600 Training loss: 0.9592 0.4805 sec/batch
Epoch 100/10

Epoch 100/100  Iteration 56585/56600 Training loss: 0.9589 0.4788 sec/batch
Epoch 100/100  Iteration 56586/56600 Training loss: 0.9589 0.4794 sec/batch
Epoch 100/100  Iteration 56587/56600 Training loss: 0.9589 0.4902 sec/batch
Epoch 100/100  Iteration 56588/56600 Training loss: 0.9589 0.4791 sec/batch
Epoch 100/100  Iteration 56589/56600 Training loss: 0.9588 0.4787 sec/batch
Epoch 100/100  Iteration 56590/56600 Training loss: 0.9588 0.4844 sec/batch
Epoch 100/100  Iteration 56591/56600 Training loss: 0.9588 0.4716 sec/batch
Epoch 100/100  Iteration 56592/56600 Training loss: 0.9589 0.4811 sec/batch
Epoch 100/100  Iteration 56593/56600 Training loss: 0.9589 0.4704 sec/batch
Epoch 100/100  Iteration 56594/56600 Training loss: 0.9589 0.4692 sec/batch
Epoch 100/100  Iteration 56595/56600 Training loss: 0.9590 0.4716 sec/batch
Epoch 100/100  Iteration 56596/56600 Training loss: 0.9590 0.4817 sec/batch
Epoch 100/100  Iteration 56597/56600 Training loss: 0.9590 0.4730 sec/batch
Epoch 100/10

Here is the loss graph,
<img src="assets/loss_graph2.JPG" width="800">

### Test
No test here, because I'm not predicting house prices. The model is to generate, not to predict. However, the validation loss is a good indicater about how well the model behaves. And, looks like I already got the best model almost in the middle, I waited for ~200 minutes just to know no need go more.

In [7]:
# list all checkpoints
tf.train.get_checkpoint_state('checkpoints/{}'.format(folder_name))

model_checkpoint_path: "checkpoints/aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw\\i56600_l512_1.206.ckpt"
all_model_checkpoint_paths: "checkpoints/aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw\\i2000_l512_1.425.ckpt"
all_model_checkpoint_paths: "checkpoints/aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw\\i4000_l512_1.303.ckpt"
all_model_checkpoint_paths: "checkpoints/aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw\\i6000_l512_1.260.ckpt"
all_model_checkpoint_paths: "checkpoints/aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw\\i8000_l512_1.236.ckpt"
all_model_checkpoint_paths: "checkpoints/aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw\\i10000_l512_1.227.ckpt"
all_model_checkpoint_paths: "checkpoints/aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw\\i12000_l512_1.218.ckpt"
all_model_checkpoint_paths: "checkpoints/aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw\\i14000_l512_1.213.ckpt"
all_model_checkpoint_paths: "checkpoints/aaaaabbcdddeeefhiiijkkmmmmnnoopssssssttw\\i16000_l512_1.211.ckpt"
all_model_checkpoint_paths: "checkpoints/aaaaa

In [8]:
checkpoint = "checkpoints/{}/i34000_l512_1.195.ckpt".format(folder_name)
samp = sample(checkpoint, 700, lstm_size, len(vocab), vocab_to_int, vocab, int_to_vocab, prime="The ", top_n=4)
print(samp)

The Stench Of With Hell

I am your land
I am the path to save me
The sorrow, I hear your fate
I will never know,
I cannot hope you I will die again
The one to the other side

As I still defend the writings
On my soul to search for me
And in a dream I'm the masquerade
This is the final scene

Too many stories of the lies
They're too many shouts to see
They set in our holy lead
And the checks of summer waits

The chosen cast in soul of an our own divide
The stench of far to stark on far away
And who's a thousand to be free again

When all the thoughts won't haunt me
And I should be trying to give up
And then this is my last
I can never see
And now I feel so far
They come along the way
They will sur


### Source
- [Udacity Deep Learning](https://github.com/udacity/deep-learning)  
- [Mat Leonard](https://github.com/mcleonard)