# Character-wise Recurrent Neural Network

In [1]:
import time

import numpy as np
import tensorflow as tf

# Getting and Preprocessing Data

### Data

In [2]:
class Data:
    
    def __init__(self):
        text = None
        chars = None
        chars_to_ints = None
        ints_to_chars = None
        encoded = None

## DataPreprocessor

In [3]:
class DataPreprocessor:
    
    def load_and_preprocess_data(self, input_file):
        """
        Load and preprocess data.
        
        Arguments
        ---------
        : input_file: Input file name
        """
        
        print("\nLoading and preprocesing data ...\n")
        
        data = Data()
        
        with open(input_file, 'r') as f:
            data.text = f.read()
                
        data.chars = sorted(set(data.text))
        data.chars_to_ints = {c: i for i, c in enumerate(data.chars)}
        data.ints_to_chars = dict(enumerate(data.chars))
        data.encoded = np.array([data.chars_to_ints[c] for c in data.text], dtype=np.int32)
        self.log_data(data)
        
        print("Loaded and preprocessed data\n")
        
        return data
    
    
    def log_data(self, data):
        txt = ""
        for ii in range(0, 100):
            ch = data.text[ii]
            ch = '(NEWLINE)' if ch == '\n' else ch
            txt += ch
        print("text[:100]:\n{}\n".format(txt))    
        
        print("len(chars):\n{}\n".format(len(data.chars)))
        print("chars[:50]:\n{}\n".format(data.chars[:50]))
        print("chars_to_ints:\n")
        for ii in range(0, 10):
            ch = data.chars[ii]
            ch = 'NEWLINE' if ch == '\n' else 'SPACE' if ch == ' ' else ch
            print("chars_to_ints[{}]: {}".format(ch, ii))
        print("")
        print("ints_to_chars:\n")
        for ii in range(0, 10):
            ch = data.ints_to_chars[ii]
            ch = 'NEWLINE' if ch == '\n' else 'SPACE' if ch == ' ' else ch
            print("ints_to_chars[{}]: {}".format(ii, ch))
        print("")
        print("encoded.shape:\n{}\n".format(data.encoded.shape))
        print("encoded[:100]:\n{}\n".format(data.encoded[:100]))

In [4]:
input_file = 'anna.txt'

In [5]:
dataPreprocessor = DataPreprocessor()
data = dataPreprocessor.load_and_preprocess_data(input_file)


Loading and preprocesing data ...

text[:100]:
Chapter 1(NEWLINE)(NEWLINE)(NEWLINE)Happy families are all alike; every unhappy family is unhappy in its own(NEWLINE)way.(NEWLINE)(NEWLINE)Everythin

len(chars):
83

chars[:50]:
['\n', ' ', '!', '"', '$', '%', '&', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U']

chars_to_ints:

chars_to_ints[NEWLINE]: 0
chars_to_ints[SPACE]: 1
chars_to_ints[!]: 2
chars_to_ints["]: 3
chars_to_ints[$]: 4
chars_to_ints[%]: 5
chars_to_ints[&]: 6
chars_to_ints[']: 7
chars_to_ints[(]: 8
chars_to_ints[)]: 9

ints_to_chars:

ints_to_chars[0]: NEWLINE
ints_to_chars[1]: SPACE
ints_to_chars[2]: !
ints_to_chars[3]: "
ints_to_chars[4]: $
ints_to_chars[5]: %
ints_to_chars[6]: &
ints_to_chars[7]: '
ints_to_chars[8]: (
ints_to_chars[9]: )

encoded.shape:
(1985223,)

encoded[:100]:
[31 64 57 72 76 61 74  1 

# Building Character-wise RNN Model

## RNNetwork

In [6]:
class RNNetwork:
    
    def create_placeholders(self, batch_size, num_steps):
        """ 
        Define placeholders for inputs, targets, and dropout 
    
        Arguments
        ---------
        : batch_size: Batch size, number of sequences per batch
        : num_steps: Number of sequence steps in a batch
        """
        
        inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
        targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
        keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        
        print("Created placeholders\n")
        
        return inputs, targets, keep_prob
    
    
    def build_lstm_layers(self, keep_prob, lstm_size, num_layers, batch_size):
        """
        Build LSTM layers.
    
        Arguments
        ---------
        : keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability
        : lstm_size: Size of the hidden layers in the LSTM cells
        : num_layers: Number of LSTM layers
        : batch_size: Batch size
        """
        
        cell = tf.contrib.rnn.MultiRNNCell(
            [self.build_lstm_cell(lstm_size, keep_prob) for _ in range(num_layers)])
        initial_state = cell.zero_state(batch_size, tf.float32)
        
        print("Built LSTM layers\n")
        
        return cell, initial_state
    
        
    def build_lstm_cell(self, lstm_size, keep_prob):
        """
        Build LSTM cell.
    
        Arguments
        ---------
        : lstm_size: Size of the hidden layers in the LSTM cells
        : keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability
        """
        
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        
        print("Built LSTM cell")
        
        return drop
    
    
    def build_output_layer(self, lstm_output, in_size, out_size):
        """
        Build a softmax layer, return the softmax output and logits.
    
        Arguments
        ---------
        : lstm_output: List of output tensors from the LSTM layer
        : in_size: Size of the input tensor, for example, size of the LSTM cells
        : out_size: Size of this softmax layer
        """
        
        # Reshape output so it's a bunch of rows, one row for each step for each sequence.
        # That is, the shape should be batch_size*num_steps rows by lstm_size columns
        
        # Concatenate lstm_output over axis 1 (the columns)
        seq_output = tf.concat(lstm_output, axis=1)
        
        # Reshape seq_output to a 2D tensor with lstm_size columns
        x = tf.reshape(seq_output, [-1, in_size])
        
        # Connect the RNN outputs to a softmax layer
        with tf.variable_scope('softmax'):
            softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
            softmax_b = tf.Variable(tf.zeros(out_size))
            
        # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
        # of rows of logit outputs, one for each step and sequence    
        logits = tf.nn.bias_add(tf.matmul(x, softmax_w), softmax_b)
        
        # Use softmax to get the probabilities for predicted characters
        out = tf.nn.softmax(logits, name='predictions')
        
        print("Built output layer\n")
        
        return out, logits
    

    def add_training_loss_computation(self, logits, targets, lstm_size, num_classes):
        """
        Calculate the loss from the logits and the targets.
    
        Arguments
        ---------
        : logits: Logits from final fully connected layer
        : targets: Targets for supervised learning
        : lstm_size: Number of LSTM hidden units
        : num_classes: Number of classes in targets
        """
        
        # One-hot encode targets and reshape to match logits, one row per sequence per step
        y_one_hot = tf.one_hot(targets, num_classes)
        y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
        
        # Softmax cross entropy loss
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
        loss = tf.reduce_mean(cross_entropy)
        
        print("Added training loss computation\n")
        
        return loss
    
    
    def build_optimizer(self, loss, learning_rate, grad_clip):
        """
        Build optmizer for training, using gradient clipping.
    
        Arguments:
        ---------
        : loss: Network loss
        : learning_rate: Learning rate for optimizer
        : grad_clip: For gradient clipping 
        """
        
        # Optimizer for training, using gradient clipping to control exploding gradients
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
        train_op = tf.train.AdamOptimizer(learning_rate)
        optimizer = train_op.apply_gradients(zip(grads, tvars))
        
        print("Built optimizer\n")
        
        return optimizer

## CharRNNModel

In [7]:
class CharRNNModel:
    
    def __init__(self, 
                 num_classes, 
                 batch_size, 
                 num_steps,
                 lstm_size, 
                 num_layers, 
                 learning_rate,
                 grad_clip, 
                 sampling=False):

        """
        Build CharRNN model.
        
        Arguments
        ---------
        : num_classes: Number of classes in targets
        : batch_size: Batch size, number of sequences per batch
        : num_steps: Number of sequence steps in a batch
        : lstm_size: Number of LSTM hidden units
        : num_layers: Number of LSTM layers
        : learning_rate: Learning rate
        : grad_clip: For gradient clipping
        : sampling: Whether or not the model is used for sampling
        """
        
        print("\nBuilding CharRNN model ...\n")
        
        # When we're using this network for sampling later, we'll be passing in
        # one character at a time, so providing an option for that
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps
        
        self.lstm_size = lstm_size
        self.batch_size, self.num_steps = batch_size, num_steps
        
        tf.reset_default_graph()
        
        # Create RNNetwork object
        network = RNNetwork()
        
        # Build the placeholder tensors
        self.inputs, self.targets, self.keep_prob = network.create_placeholders(self.batch_size, 
                                                                                self.num_steps)
        
        # Build the LSTM layers
        cell, self.initial_state = network.build_lstm_layers(self.keep_prob, 
                                                             self.lstm_size, 
                                                             num_layers,
                                                             batch_size)
        
        ### Run the data through the RNN layers
        # First, one-hot encode the input tokens
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # Run each sequence step through the RNN with tf.nn.dynamic_rnn
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # Get softmax predictions and logits
        self.prediction, self.logits = network.build_output_layer(outputs, 
                                                                  self.lstm_size,
                                                                  num_classes)
        
        # Loss and optimizer (with gradient clipping)
        self.loss = network.add_training_loss_computation(self.logits, 
                                                          self.targets, 
                                                          self.lstm_size, 
                                                          num_classes)
        
        self.optimizer = network.build_optimizer(self.loss, learning_rate, grad_clip)
        
        print("Built CharRNN model\n")

In [8]:
num_classes = len(data.chars)
batch_size = 64
num_steps = 50
lstm_size = 128
num_layers = 2
learning_rate = 0.001
grad_clip = 5

In [9]:
model = CharRNNModel(num_classes,
                     batch_size,
                     num_steps,
                     lstm_size,
                     num_layers,
                     learning_rate,
                     grad_clip,
                     False)


Building CharRNN model ...

Created placeholders

Built LSTM cell
Built LSTM cell
Built LSTM layers

Built output layer

Added training loss computation

Built optimizer

Built CharRNN model



# Training Character-wise RNN Model

### DataBatchGenerator

In [10]:
class DataBatchGenerator:
    
    def get_batches(self, arr, n_seqs, n_steps):
        """
        Create a generator that returns batches of size n_seqs x n_steps from arr.
        
        Arguments
        ---------
        : arr: Array you want to make batches from
        : n_seqs: Number of sequences per batch
        : n_steps: Number of sequence steps per batch
        """
        
        # Get the number of characters per batch and number of batches we can make
        chars_per_batch = n_seqs * n_steps # batch size
        n_batches = len(arr)//chars_per_batch
        
        # Keep only enough characters to make full batches
        arr = arr[:n_batches * chars_per_batch]
        
        # Reshape into n_seqs rows
        arr = arr.reshape((n_seqs, -1))
        
        # Generate each batch
        for n in range(0, arr.shape[1], n_steps):
            # features
            x = arr[:, n:n+n_steps]
            # targets
            y = np.zeros_like(x)
            
            # Targets are inputs shifted by one character
            # First input character is last target character
            y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
            
            yield x, y

## RNNModelTrainer

In [11]:
class RNNModelTrainer:
        
    def train_model(self, 
                    model, 
                    data, 
                    epochs, 
                    keep_prob, 
                    save_every_n,
                    max_to_keep):
        """
        Train RNN model.
        
        Arguments
        ---------
        : model: Model to train
        : data: Data to train model on
        : epochs: Number of epochs to train
        : keep_prob: Keep proability to pass to model
        : save_every_n: Interval to save session
        : max_to_keep: Param to pass to session saver
        """
        
        print("\nTraining CharRNN model ...\n")
        
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            
            saver = tf.train.Saver(max_to_keep=max_to_keep)
            
            # Load a checkpoint and resume training
            #saver.restore(sess, 'checkpoints/_____.ckpt')
            
            counter = 0
            
            for e in range(epochs):
                new_state = sess.run(model.initial_state)
                loss = 0
                
                dataBatchGenerator = DataBatchGenerator()
                batches = dataBatchGenerator.get_batches(data.encoded, model.batch_size, model.num_steps)

                for x, y in batches:
                    counter += 1
                    start = time.time()
                    
                    feed = {model.inputs: x,
                            model.targets: y,
                            model.keep_prob: keep_prob,
                            model.initial_state: new_state}
                    
                    batch_loss, new_state, _ = sess.run([model.loss,
                                                         model.final_state,
                                                         model.optimizer],
                                                         feed_dict=feed)
                    
                    end = time.time()
                    
                    print('Epoch: {}/{}... '.format(e+1, epochs),
                          'Training Step: {}... '.format(counter),
                          'Training loss: {:.4f}... '.format(batch_loss),
                          '{:.4f} sec/batch'.format((end-start)))
                    
                    if (counter % save_every_n == 0):
                        saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, 
                                                                           model.lstm_size))
        
            saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, model.lstm_size))
            
        print("\nTraining complete\n")

In [12]:
epochs = 20
keep_prob = 0.5
save_every_n = 200
max_to_keep = 100

In [13]:
mkdir "./checkpoints"

In [14]:
modelTrainer = RNNModelTrainer()

modelTrainer.train_model(model,
                         data,
                         epochs,
                         keep_prob,
                         save_every_n,
                         max_to_keep)


Training CharRNN model ...

Epoch: 1/20...  Training Step: 1...  Training loss: 4.4212...  0.2310 sec/batch
Epoch: 1/20...  Training Step: 2...  Training loss: 4.4082...  0.0523 sec/batch
Epoch: 1/20...  Training Step: 3...  Training loss: 4.3935...  0.0526 sec/batch
Epoch: 1/20...  Training Step: 4...  Training loss: 4.3727...  0.0578 sec/batch
Epoch: 1/20...  Training Step: 5...  Training loss: 4.3413...  0.0524 sec/batch
Epoch: 1/20...  Training Step: 6...  Training loss: 4.2838...  0.0554 sec/batch
Epoch: 1/20...  Training Step: 7...  Training loss: 4.1652...  0.0524 sec/batch
Epoch: 1/20...  Training Step: 8...  Training loss: 3.9284...  0.0524 sec/batch
Epoch: 1/20...  Training Step: 9...  Training loss: 3.7249...  0.0527 sec/batch
Epoch: 1/20...  Training Step: 10...  Training loss: 3.6379...  0.0577 sec/batch
Epoch: 1/20...  Training Step: 11...  Training loss: 3.6006...  0.0524 sec/batch
Epoch: 1/20...  Training Step: 12...  Training loss: 3.5656...  0.0591 sec/batch
Epoch: 1

Epoch: 1/20...  Training Step: 105...  Training loss: 3.1503...  0.0544 sec/batch
Epoch: 1/20...  Training Step: 106...  Training loss: 3.1465...  0.0554 sec/batch
Epoch: 1/20...  Training Step: 107...  Training loss: 3.1825...  0.0581 sec/batch
Epoch: 1/20...  Training Step: 108...  Training loss: 3.1358...  0.0555 sec/batch
Epoch: 1/20...  Training Step: 109...  Training loss: 3.1054...  0.0530 sec/batch
Epoch: 1/20...  Training Step: 110...  Training loss: 3.1176...  0.0547 sec/batch
Epoch: 1/20...  Training Step: 111...  Training loss: 3.1491...  0.0582 sec/batch
Epoch: 1/20...  Training Step: 112...  Training loss: 3.1678...  0.0527 sec/batch
Epoch: 1/20...  Training Step: 113...  Training loss: 3.1867...  0.0530 sec/batch
Epoch: 1/20...  Training Step: 114...  Training loss: 3.1324...  0.0551 sec/batch
Epoch: 1/20...  Training Step: 115...  Training loss: 3.1420...  0.0526 sec/batch
Epoch: 1/20...  Training Step: 116...  Training loss: 3.1285...  0.0555 sec/batch
Epoch: 1/20...  

Epoch: 1/20...  Training Step: 205...  Training loss: 3.0024...  0.0531 sec/batch
Epoch: 1/20...  Training Step: 206...  Training loss: 3.0014...  0.0533 sec/batch
Epoch: 1/20...  Training Step: 207...  Training loss: 2.9861...  0.0524 sec/batch
Epoch: 1/20...  Training Step: 208...  Training loss: 3.0394...  0.0604 sec/batch
Epoch: 1/20...  Training Step: 209...  Training loss: 3.0042...  0.0553 sec/batch
Epoch: 1/20...  Training Step: 210...  Training loss: 2.9627...  0.0522 sec/batch
Epoch: 1/20...  Training Step: 211...  Training loss: 2.9870...  0.0554 sec/batch
Epoch: 1/20...  Training Step: 212...  Training loss: 3.0226...  0.0531 sec/batch
Epoch: 1/20...  Training Step: 213...  Training loss: 2.9869...  0.0561 sec/batch
Epoch: 1/20...  Training Step: 214...  Training loss: 3.0003...  0.0557 sec/batch
Epoch: 1/20...  Training Step: 215...  Training loss: 2.9986...  0.0561 sec/batch
Epoch: 1/20...  Training Step: 216...  Training loss: 2.9795...  0.0594 sec/batch
Epoch: 1/20...  

Epoch: 1/20...  Training Step: 305...  Training loss: 2.7841...  0.0528 sec/batch
Epoch: 1/20...  Training Step: 306...  Training loss: 2.7509...  0.0549 sec/batch
Epoch: 1/20...  Training Step: 307...  Training loss: 2.7608...  0.0554 sec/batch
Epoch: 1/20...  Training Step: 308...  Training loss: 2.7182...  0.0530 sec/batch
Epoch: 1/20...  Training Step: 309...  Training loss: 2.7439...  0.0532 sec/batch
Epoch: 1/20...  Training Step: 310...  Training loss: 2.6692...  0.0594 sec/batch
Epoch: 1/20...  Training Step: 311...  Training loss: 2.7383...  0.0548 sec/batch
Epoch: 1/20...  Training Step: 312...  Training loss: 2.7406...  0.0537 sec/batch
Epoch: 1/20...  Training Step: 313...  Training loss: 2.7505...  0.0564 sec/batch
Epoch: 1/20...  Training Step: 314...  Training loss: 2.7238...  0.0543 sec/batch
Epoch: 1/20...  Training Step: 315...  Training loss: 2.7519...  0.0574 sec/batch
Epoch: 1/20...  Training Step: 316...  Training loss: 2.7156...  0.0556 sec/batch
Epoch: 1/20...  

Epoch: 1/20...  Training Step: 405...  Training loss: 2.6703...  0.0566 sec/batch
Epoch: 1/20...  Training Step: 406...  Training loss: 2.6186...  0.0528 sec/batch
Epoch: 1/20...  Training Step: 407...  Training loss: 2.6175...  0.0557 sec/batch
Epoch: 1/20...  Training Step: 408...  Training loss: 2.5954...  0.0520 sec/batch
Epoch: 1/20...  Training Step: 409...  Training loss: 2.6139...  0.0593 sec/batch
Epoch: 1/20...  Training Step: 410...  Training loss: 2.5897...  0.0558 sec/batch
Epoch: 1/20...  Training Step: 411...  Training loss: 2.5770...  0.0534 sec/batch
Epoch: 1/20...  Training Step: 412...  Training loss: 2.6020...  0.0530 sec/batch
Epoch: 1/20...  Training Step: 413...  Training loss: 2.5677...  0.0555 sec/batch
Epoch: 1/20...  Training Step: 414...  Training loss: 2.5891...  0.0589 sec/batch
Epoch: 1/20...  Training Step: 415...  Training loss: 2.5513...  0.0532 sec/batch
Epoch: 1/20...  Training Step: 416...  Training loss: 2.5477...  0.0612 sec/batch
Epoch: 1/20...  

Epoch: 1/20...  Training Step: 505...  Training loss: 2.5432...  0.0577 sec/batch
Epoch: 1/20...  Training Step: 506...  Training loss: 2.4832...  0.0527 sec/batch
Epoch: 1/20...  Training Step: 507...  Training loss: 2.5578...  0.0574 sec/batch
Epoch: 1/20...  Training Step: 508...  Training loss: 2.5307...  0.0602 sec/batch
Epoch: 1/20...  Training Step: 509...  Training loss: 2.5446...  0.0553 sec/batch
Epoch: 1/20...  Training Step: 510...  Training loss: 2.5604...  0.0583 sec/batch
Epoch: 1/20...  Training Step: 511...  Training loss: 2.5563...  0.0547 sec/batch
Epoch: 1/20...  Training Step: 512...  Training loss: 2.4947...  0.0526 sec/batch
Epoch: 1/20...  Training Step: 513...  Training loss: 2.6403...  0.0587 sec/batch
Epoch: 1/20...  Training Step: 514...  Training loss: 2.6023...  0.0555 sec/batch
Epoch: 1/20...  Training Step: 515...  Training loss: 2.5628...  0.0587 sec/batch
Epoch: 1/20...  Training Step: 516...  Training loss: 2.5322...  0.0593 sec/batch
Epoch: 1/20...  

Epoch: 1/20...  Training Step: 605...  Training loss: 2.4762...  0.0553 sec/batch
Epoch: 1/20...  Training Step: 606...  Training loss: 2.4682...  0.0529 sec/batch
Epoch: 1/20...  Training Step: 607...  Training loss: 2.4414...  0.0530 sec/batch
Epoch: 1/20...  Training Step: 608...  Training loss: 2.4718...  0.0548 sec/batch
Epoch: 1/20...  Training Step: 609...  Training loss: 2.4387...  0.0553 sec/batch
Epoch: 1/20...  Training Step: 610...  Training loss: 2.4874...  0.0555 sec/batch
Epoch: 1/20...  Training Step: 611...  Training loss: 2.4799...  0.0534 sec/batch
Epoch: 1/20...  Training Step: 612...  Training loss: 2.4928...  0.0550 sec/batch
Epoch: 1/20...  Training Step: 613...  Training loss: 2.4592...  0.0596 sec/batch
Epoch: 1/20...  Training Step: 614...  Training loss: 2.4616...  0.0550 sec/batch
Epoch: 1/20...  Training Step: 615...  Training loss: 2.4409...  0.0552 sec/batch
Epoch: 1/20...  Training Step: 616...  Training loss: 2.4979...  0.0530 sec/batch
Epoch: 1/20...  

Epoch: 2/20...  Training Step: 705...  Training loss: 2.4474...  0.0587 sec/batch
Epoch: 2/20...  Training Step: 706...  Training loss: 2.4332...  0.0558 sec/batch
Epoch: 2/20...  Training Step: 707...  Training loss: 2.4310...  0.0522 sec/batch
Epoch: 2/20...  Training Step: 708...  Training loss: 2.5048...  0.0581 sec/batch
Epoch: 2/20...  Training Step: 709...  Training loss: 2.4331...  0.0576 sec/batch
Epoch: 2/20...  Training Step: 710...  Training loss: 2.4370...  0.0590 sec/batch
Epoch: 2/20...  Training Step: 711...  Training loss: 2.4099...  0.0540 sec/batch
Epoch: 2/20...  Training Step: 712...  Training loss: 2.4816...  0.0612 sec/batch
Epoch: 2/20...  Training Step: 713...  Training loss: 2.4454...  0.0533 sec/batch
Epoch: 2/20...  Training Step: 714...  Training loss: 2.4161...  0.0523 sec/batch
Epoch: 2/20...  Training Step: 715...  Training loss: 2.4583...  0.0526 sec/batch
Epoch: 2/20...  Training Step: 716...  Training loss: 2.4312...  0.0522 sec/batch
Epoch: 2/20...  

Epoch: 2/20...  Training Step: 805...  Training loss: 2.3811...  0.0578 sec/batch
Epoch: 2/20...  Training Step: 806...  Training loss: 2.4054...  0.0536 sec/batch
Epoch: 2/20...  Training Step: 807...  Training loss: 2.3657...  0.0595 sec/batch
Epoch: 2/20...  Training Step: 808...  Training loss: 2.3687...  0.0536 sec/batch
Epoch: 2/20...  Training Step: 809...  Training loss: 2.3852...  0.0566 sec/batch
Epoch: 2/20...  Training Step: 810...  Training loss: 2.4511...  0.0533 sec/batch
Epoch: 2/20...  Training Step: 811...  Training loss: 2.4869...  0.0537 sec/batch
Epoch: 2/20...  Training Step: 812...  Training loss: 2.4617...  0.0563 sec/batch
Epoch: 2/20...  Training Step: 813...  Training loss: 2.4216...  0.0526 sec/batch
Epoch: 2/20...  Training Step: 814...  Training loss: 2.3869...  0.0585 sec/batch
Epoch: 2/20...  Training Step: 815...  Training loss: 2.4299...  0.0528 sec/batch
Epoch: 2/20...  Training Step: 816...  Training loss: 2.4498...  0.0554 sec/batch
Epoch: 2/20...  

Epoch: 2/20...  Training Step: 905...  Training loss: 2.3243...  0.0534 sec/batch
Epoch: 2/20...  Training Step: 906...  Training loss: 2.3438...  0.0533 sec/batch
Epoch: 2/20...  Training Step: 907...  Training loss: 2.3414...  0.0532 sec/batch
Epoch: 2/20...  Training Step: 908...  Training loss: 2.3627...  0.0533 sec/batch
Epoch: 2/20...  Training Step: 909...  Training loss: 2.3803...  0.0551 sec/batch
Epoch: 2/20...  Training Step: 910...  Training loss: 2.3880...  0.0575 sec/batch
Epoch: 2/20...  Training Step: 911...  Training loss: 2.3792...  0.0531 sec/batch
Epoch: 2/20...  Training Step: 912...  Training loss: 2.3861...  0.0565 sec/batch
Epoch: 2/20...  Training Step: 913...  Training loss: 2.4007...  0.0547 sec/batch
Epoch: 2/20...  Training Step: 914...  Training loss: 2.3498...  0.0602 sec/batch
Epoch: 2/20...  Training Step: 915...  Training loss: 2.3576...  0.0538 sec/batch
Epoch: 2/20...  Training Step: 916...  Training loss: 2.3551...  0.0561 sec/batch
Epoch: 2/20...  

Epoch: 2/20...  Training Step: 1005...  Training loss: 2.3561...  0.0560 sec/batch
Epoch: 2/20...  Training Step: 1006...  Training loss: 2.3090...  0.0527 sec/batch
Epoch: 2/20...  Training Step: 1007...  Training loss: 2.3014...  0.0596 sec/batch
Epoch: 2/20...  Training Step: 1008...  Training loss: 2.3603...  0.0552 sec/batch
Epoch: 2/20...  Training Step: 1009...  Training loss: 2.2715...  0.0569 sec/batch
Epoch: 2/20...  Training Step: 1010...  Training loss: 2.3030...  0.0553 sec/batch
Epoch: 2/20...  Training Step: 1011...  Training loss: 2.3737...  0.0525 sec/batch
Epoch: 2/20...  Training Step: 1012...  Training loss: 2.2827...  0.0583 sec/batch
Epoch: 2/20...  Training Step: 1013...  Training loss: 2.3358...  0.0592 sec/batch
Epoch: 2/20...  Training Step: 1014...  Training loss: 2.3317...  0.0583 sec/batch
Epoch: 2/20...  Training Step: 1015...  Training loss: 2.3329...  0.0560 sec/batch
Epoch: 2/20...  Training Step: 1016...  Training loss: 2.3214...  0.0526 sec/batch
Epoc

Epoch: 2/20...  Training Step: 1105...  Training loss: 2.3946...  0.0562 sec/batch
Epoch: 2/20...  Training Step: 1106...  Training loss: 2.3364...  0.0531 sec/batch
Epoch: 2/20...  Training Step: 1107...  Training loss: 2.3851...  0.0524 sec/batch
Epoch: 2/20...  Training Step: 1108...  Training loss: 2.3523...  0.0562 sec/batch
Epoch: 2/20...  Training Step: 1109...  Training loss: 2.3721...  0.0530 sec/batch
Epoch: 2/20...  Training Step: 1110...  Training loss: 2.3509...  0.0530 sec/batch
Epoch: 2/20...  Training Step: 1111...  Training loss: 2.3638...  0.0576 sec/batch
Epoch: 2/20...  Training Step: 1112...  Training loss: 2.3992...  0.0554 sec/batch
Epoch: 2/20...  Training Step: 1113...  Training loss: 2.3431...  0.0559 sec/batch
Epoch: 2/20...  Training Step: 1114...  Training loss: 2.3694...  0.0547 sec/batch
Epoch: 2/20...  Training Step: 1115...  Training loss: 2.3637...  0.0528 sec/batch
Epoch: 2/20...  Training Step: 1116...  Training loss: 2.3633...  0.0552 sec/batch
Epoc

Epoch: 2/20...  Training Step: 1205...  Training loss: 2.3165...  0.0585 sec/batch
Epoch: 2/20...  Training Step: 1206...  Training loss: 2.3093...  0.0528 sec/batch
Epoch: 2/20...  Training Step: 1207...  Training loss: 2.2724...  0.0556 sec/batch
Epoch: 2/20...  Training Step: 1208...  Training loss: 2.3202...  0.0577 sec/batch
Epoch: 2/20...  Training Step: 1209...  Training loss: 2.2639...  0.0555 sec/batch
Epoch: 2/20...  Training Step: 1210...  Training loss: 2.3686...  0.0567 sec/batch
Epoch: 2/20...  Training Step: 1211...  Training loss: 2.2592...  0.0578 sec/batch
Epoch: 2/20...  Training Step: 1212...  Training loss: 2.3463...  0.0527 sec/batch
Epoch: 2/20...  Training Step: 1213...  Training loss: 2.2960...  0.0602 sec/batch
Epoch: 2/20...  Training Step: 1214...  Training loss: 2.2749...  0.0530 sec/batch
Epoch: 2/20...  Training Step: 1215...  Training loss: 2.2871...  0.0547 sec/batch
Epoch: 2/20...  Training Step: 1216...  Training loss: 2.2868...  0.0574 sec/batch
Epoc

Epoch: 3/20...  Training Step: 1305...  Training loss: 2.2351...  0.0562 sec/batch
Epoch: 3/20...  Training Step: 1306...  Training loss: 2.2244...  0.0526 sec/batch
Epoch: 3/20...  Training Step: 1307...  Training loss: 2.2176...  0.0573 sec/batch
Epoch: 3/20...  Training Step: 1308...  Training loss: 2.2162...  0.0556 sec/batch
Epoch: 3/20...  Training Step: 1309...  Training loss: 2.2811...  0.0562 sec/batch
Epoch: 3/20...  Training Step: 1310...  Training loss: 2.2348...  0.0548 sec/batch
Epoch: 3/20...  Training Step: 1311...  Training loss: 2.2503...  0.0558 sec/batch
Epoch: 3/20...  Training Step: 1312...  Training loss: 2.2646...  0.0527 sec/batch
Epoch: 3/20...  Training Step: 1313...  Training loss: 2.2595...  0.0524 sec/batch
Epoch: 3/20...  Training Step: 1314...  Training loss: 2.2503...  0.0543 sec/batch
Epoch: 3/20...  Training Step: 1315...  Training loss: 2.2836...  0.0555 sec/batch
Epoch: 3/20...  Training Step: 1316...  Training loss: 2.2602...  0.0555 sec/batch
Epoc

Epoch: 3/20...  Training Step: 1405...  Training loss: 2.2481...  0.0589 sec/batch
Epoch: 3/20...  Training Step: 1406...  Training loss: 2.1857...  0.0591 sec/batch
Epoch: 3/20...  Training Step: 1407...  Training loss: 2.2710...  0.0528 sec/batch
Epoch: 3/20...  Training Step: 1408...  Training loss: 2.2268...  0.0521 sec/batch
Epoch: 3/20...  Training Step: 1409...  Training loss: 2.2238...  0.0541 sec/batch
Epoch: 3/20...  Training Step: 1410...  Training loss: 2.2366...  0.0531 sec/batch
Epoch: 3/20...  Training Step: 1411...  Training loss: 2.2212...  0.0535 sec/batch
Epoch: 3/20...  Training Step: 1412...  Training loss: 2.2367...  0.0568 sec/batch
Epoch: 3/20...  Training Step: 1413...  Training loss: 2.2591...  0.0569 sec/batch
Epoch: 3/20...  Training Step: 1414...  Training loss: 2.2565...  0.0568 sec/batch
Epoch: 3/20...  Training Step: 1415...  Training loss: 2.2317...  0.0580 sec/batch
Epoch: 3/20...  Training Step: 1416...  Training loss: 2.2431...  0.0535 sec/batch
Epoc

Epoch: 3/20...  Training Step: 1505...  Training loss: 2.2477...  0.0585 sec/batch
Epoch: 3/20...  Training Step: 1506...  Training loss: 2.1696...  0.0538 sec/batch
Epoch: 3/20...  Training Step: 1507...  Training loss: 2.2490...  0.0535 sec/batch
Epoch: 3/20...  Training Step: 1508...  Training loss: 2.2323...  0.0591 sec/batch
Epoch: 3/20...  Training Step: 1509...  Training loss: 2.2201...  0.0619 sec/batch
Epoch: 3/20...  Training Step: 1510...  Training loss: 2.2051...  0.0536 sec/batch
Epoch: 3/20...  Training Step: 1511...  Training loss: 2.2115...  0.0551 sec/batch
Epoch: 3/20...  Training Step: 1512...  Training loss: 2.2274...  0.0554 sec/batch
Epoch: 3/20...  Training Step: 1513...  Training loss: 2.2287...  0.0610 sec/batch
Epoch: 3/20...  Training Step: 1514...  Training loss: 2.1949...  0.0576 sec/batch
Epoch: 3/20...  Training Step: 1515...  Training loss: 2.2181...  0.0554 sec/batch
Epoch: 3/20...  Training Step: 1516...  Training loss: 2.2936...  0.0532 sec/batch
Epoc

Epoch: 3/20...  Training Step: 1605...  Training loss: 2.1865...  0.0562 sec/batch
Epoch: 3/20...  Training Step: 1606...  Training loss: 2.2115...  0.0554 sec/batch
Epoch: 3/20...  Training Step: 1607...  Training loss: 2.1901...  0.0553 sec/batch
Epoch: 3/20...  Training Step: 1608...  Training loss: 2.2573...  0.0539 sec/batch
Epoch: 3/20...  Training Step: 1609...  Training loss: 2.2002...  0.0537 sec/batch
Epoch: 3/20...  Training Step: 1610...  Training loss: 2.1800...  0.0594 sec/batch
Epoch: 3/20...  Training Step: 1611...  Training loss: 2.2399...  0.0539 sec/batch
Epoch: 3/20...  Training Step: 1612...  Training loss: 2.2596...  0.0561 sec/batch
Epoch: 3/20...  Training Step: 1613...  Training loss: 2.2314...  0.0597 sec/batch
Epoch: 3/20...  Training Step: 1614...  Training loss: 2.2189...  0.0561 sec/batch
Epoch: 3/20...  Training Step: 1615...  Training loss: 2.1705...  0.0540 sec/batch
Epoch: 3/20...  Training Step: 1616...  Training loss: 2.2313...  0.0533 sec/batch
Epoc

Epoch: 3/20...  Training Step: 1705...  Training loss: 2.2131...  0.0537 sec/batch
Epoch: 3/20...  Training Step: 1706...  Training loss: 2.1807...  0.0557 sec/batch
Epoch: 3/20...  Training Step: 1707...  Training loss: 2.1761...  0.0555 sec/batch
Epoch: 3/20...  Training Step: 1708...  Training loss: 2.1527...  0.0552 sec/batch
Epoch: 3/20...  Training Step: 1709...  Training loss: 2.2178...  0.0537 sec/batch
Epoch: 3/20...  Training Step: 1710...  Training loss: 2.1851...  0.0533 sec/batch
Epoch: 3/20...  Training Step: 1711...  Training loss: 2.1915...  0.0557 sec/batch
Epoch: 3/20...  Training Step: 1712...  Training loss: 2.1530...  0.0537 sec/batch
Epoch: 3/20...  Training Step: 1713...  Training loss: 2.1556...  0.0597 sec/batch
Epoch: 3/20...  Training Step: 1714...  Training loss: 2.1219...  0.0556 sec/batch
Epoch: 3/20...  Training Step: 1715...  Training loss: 2.2280...  0.0579 sec/batch
Epoch: 3/20...  Training Step: 1716...  Training loss: 2.2135...  0.0546 sec/batch
Epoc

Epoch: 3/20...  Training Step: 1805...  Training loss: 2.2626...  0.0551 sec/batch
Epoch: 3/20...  Training Step: 1806...  Training loss: 2.1983...  0.0536 sec/batch
Epoch: 3/20...  Training Step: 1807...  Training loss: 2.1795...  0.0558 sec/batch
Epoch: 3/20...  Training Step: 1808...  Training loss: 2.2219...  0.0571 sec/batch
Epoch: 3/20...  Training Step: 1809...  Training loss: 2.1824...  0.0577 sec/batch
Epoch: 3/20...  Training Step: 1810...  Training loss: 2.1767...  0.0575 sec/batch
Epoch: 3/20...  Training Step: 1811...  Training loss: 2.1707...  0.0616 sec/batch
Epoch: 3/20...  Training Step: 1812...  Training loss: 2.2412...  0.0596 sec/batch
Epoch: 3/20...  Training Step: 1813...  Training loss: 2.1619...  0.0577 sec/batch
Epoch: 3/20...  Training Step: 1814...  Training loss: 2.1717...  0.0586 sec/batch
Epoch: 3/20...  Training Step: 1815...  Training loss: 2.1734...  0.0568 sec/batch
Epoch: 3/20...  Training Step: 1816...  Training loss: 2.2447...  0.0577 sec/batch
Epoc

Epoch: 4/20...  Training Step: 1905...  Training loss: 2.1487...  0.0600 sec/batch
Epoch: 4/20...  Training Step: 1906...  Training loss: 2.1255...  0.0554 sec/batch
Epoch: 4/20...  Training Step: 1907...  Training loss: 2.0526...  0.0535 sec/batch
Epoch: 4/20...  Training Step: 1908...  Training loss: 2.1674...  0.0598 sec/batch
Epoch: 4/20...  Training Step: 1909...  Training loss: 2.1525...  0.0569 sec/batch
Epoch: 4/20...  Training Step: 1910...  Training loss: 2.1548...  0.0562 sec/batch
Epoch: 4/20...  Training Step: 1911...  Training loss: 2.1114...  0.0578 sec/batch
Epoch: 4/20...  Training Step: 1912...  Training loss: 2.1231...  0.0547 sec/batch
Epoch: 4/20...  Training Step: 1913...  Training loss: 2.1397...  0.0568 sec/batch
Epoch: 4/20...  Training Step: 1914...  Training loss: 2.2108...  0.0572 sec/batch
Epoch: 4/20...  Training Step: 1915...  Training loss: 2.1870...  0.0569 sec/batch
Epoch: 4/20...  Training Step: 1916...  Training loss: 2.1301...  0.0561 sec/batch
Epoc

Epoch: 4/20...  Training Step: 2005...  Training loss: 2.0927...  0.0559 sec/batch
Epoch: 4/20...  Training Step: 2006...  Training loss: 2.1324...  0.0554 sec/batch
Epoch: 4/20...  Training Step: 2007...  Training loss: 2.1199...  0.0555 sec/batch
Epoch: 4/20...  Training Step: 2008...  Training loss: 2.1535...  0.0526 sec/batch
Epoch: 4/20...  Training Step: 2009...  Training loss: 2.1072...  0.0574 sec/batch
Epoch: 4/20...  Training Step: 2010...  Training loss: 2.1246...  0.0529 sec/batch
Epoch: 4/20...  Training Step: 2011...  Training loss: 2.1437...  0.0568 sec/batch
Epoch: 4/20...  Training Step: 2012...  Training loss: 2.1317...  0.0528 sec/batch
Epoch: 4/20...  Training Step: 2013...  Training loss: 2.1252...  0.0549 sec/batch
Epoch: 4/20...  Training Step: 2014...  Training loss: 2.1655...  0.0523 sec/batch
Epoch: 4/20...  Training Step: 2015...  Training loss: 2.1009...  0.0554 sec/batch
Epoch: 4/20...  Training Step: 2016...  Training loss: 2.1371...  0.0525 sec/batch
Epoc

Epoch: 4/20...  Training Step: 2105...  Training loss: 2.0944...  0.0555 sec/batch
Epoch: 4/20...  Training Step: 2106...  Training loss: 2.1404...  0.0589 sec/batch
Epoch: 4/20...  Training Step: 2107...  Training loss: 2.0982...  0.0550 sec/batch
Epoch: 4/20...  Training Step: 2108...  Training loss: 2.1115...  0.0533 sec/batch
Epoch: 4/20...  Training Step: 2109...  Training loss: 2.0715...  0.0524 sec/batch
Epoch: 4/20...  Training Step: 2110...  Training loss: 2.1187...  0.0528 sec/batch
Epoch: 4/20...  Training Step: 2111...  Training loss: 2.1075...  0.0540 sec/batch
Epoch: 4/20...  Training Step: 2112...  Training loss: 2.0683...  0.0541 sec/batch
Epoch: 4/20...  Training Step: 2113...  Training loss: 2.1309...  0.0555 sec/batch
Epoch: 4/20...  Training Step: 2114...  Training loss: 2.1293...  0.0586 sec/batch
Epoch: 4/20...  Training Step: 2115...  Training loss: 2.1459...  0.0526 sec/batch
Epoch: 4/20...  Training Step: 2116...  Training loss: 2.1131...  0.0527 sec/batch
Epoc

Epoch: 4/20...  Training Step: 2205...  Training loss: 2.0872...  0.0559 sec/batch
Epoch: 4/20...  Training Step: 2206...  Training loss: 2.1185...  0.0604 sec/batch
Epoch: 4/20...  Training Step: 2207...  Training loss: 2.1117...  0.0561 sec/batch
Epoch: 4/20...  Training Step: 2208...  Training loss: 2.0837...  0.0572 sec/batch
Epoch: 4/20...  Training Step: 2209...  Training loss: 2.0823...  0.0555 sec/batch
Epoch: 4/20...  Training Step: 2210...  Training loss: 2.0826...  0.0531 sec/batch
Epoch: 4/20...  Training Step: 2211...  Training loss: 2.1009...  0.0533 sec/batch
Epoch: 4/20...  Training Step: 2212...  Training loss: 2.0947...  0.0549 sec/batch
Epoch: 4/20...  Training Step: 2213...  Training loss: 2.1154...  0.0610 sec/batch
Epoch: 4/20...  Training Step: 2214...  Training loss: 2.0836...  0.0640 sec/batch
Epoch: 4/20...  Training Step: 2215...  Training loss: 2.0956...  0.0525 sec/batch
Epoch: 4/20...  Training Step: 2216...  Training loss: 2.1478...  0.0551 sec/batch
Epoc

Epoch: 4/20...  Training Step: 2305...  Training loss: 2.0447...  0.0528 sec/batch
Epoch: 4/20...  Training Step: 2306...  Training loss: 2.0592...  0.0572 sec/batch
Epoch: 4/20...  Training Step: 2307...  Training loss: 2.0633...  0.0534 sec/batch
Epoch: 4/20...  Training Step: 2308...  Training loss: 2.0486...  0.0525 sec/batch
Epoch: 4/20...  Training Step: 2309...  Training loss: 2.1130...  0.0537 sec/batch
Epoch: 4/20...  Training Step: 2310...  Training loss: 2.1351...  0.0536 sec/batch
Epoch: 4/20...  Training Step: 2311...  Training loss: 2.1446...  0.0534 sec/batch
Epoch: 4/20...  Training Step: 2312...  Training loss: 2.0973...  0.0548 sec/batch
Epoch: 4/20...  Training Step: 2313...  Training loss: 2.0543...  0.0527 sec/batch
Epoch: 4/20...  Training Step: 2314...  Training loss: 2.0569...  0.0558 sec/batch
Epoch: 4/20...  Training Step: 2315...  Training loss: 2.0697...  0.0561 sec/batch
Epoch: 4/20...  Training Step: 2316...  Training loss: 2.0987...  0.0585 sec/batch
Epoc

Epoch: 4/20...  Training Step: 2405...  Training loss: 2.1047...  0.0604 sec/batch
Epoch: 4/20...  Training Step: 2406...  Training loss: 2.0704...  0.0599 sec/batch
Epoch: 4/20...  Training Step: 2407...  Training loss: 2.0929...  0.0528 sec/batch
Epoch: 4/20...  Training Step: 2408...  Training loss: 2.1169...  0.0554 sec/batch
Epoch: 4/20...  Training Step: 2409...  Training loss: 2.1337...  0.0564 sec/batch
Epoch: 4/20...  Training Step: 2410...  Training loss: 2.0911...  0.0533 sec/batch
Epoch: 4/20...  Training Step: 2411...  Training loss: 2.0647...  0.0529 sec/batch
Epoch: 4/20...  Training Step: 2412...  Training loss: 2.0451...  0.0594 sec/batch
Epoch: 4/20...  Training Step: 2413...  Training loss: 2.0760...  0.0528 sec/batch
Epoch: 4/20...  Training Step: 2414...  Training loss: 2.0993...  0.0551 sec/batch
Epoch: 4/20...  Training Step: 2415...  Training loss: 2.0612...  0.0527 sec/batch
Epoch: 4/20...  Training Step: 2416...  Training loss: 2.0457...  0.0522 sec/batch
Epoc

Epoch: 5/20...  Training Step: 2505...  Training loss: 2.0591...  0.0545 sec/batch
Epoch: 5/20...  Training Step: 2506...  Training loss: 2.0381...  0.0534 sec/batch
Epoch: 5/20...  Training Step: 2507...  Training loss: 2.0644...  0.0573 sec/batch
Epoch: 5/20...  Training Step: 2508...  Training loss: 2.0459...  0.0574 sec/batch
Epoch: 5/20...  Training Step: 2509...  Training loss: 2.0927...  0.0573 sec/batch
Epoch: 5/20...  Training Step: 2510...  Training loss: 2.0235...  0.0530 sec/batch
Epoch: 5/20...  Training Step: 2511...  Training loss: 2.0283...  0.0553 sec/batch
Epoch: 5/20...  Training Step: 2512...  Training loss: 2.0799...  0.0528 sec/batch
Epoch: 5/20...  Training Step: 2513...  Training loss: 2.0468...  0.0611 sec/batch
Epoch: 5/20...  Training Step: 2514...  Training loss: 2.0300...  0.0553 sec/batch
Epoch: 5/20...  Training Step: 2515...  Training loss: 2.0495...  0.0533 sec/batch
Epoch: 5/20...  Training Step: 2516...  Training loss: 2.0521...  0.0529 sec/batch
Epoc

Epoch: 5/20...  Training Step: 2605...  Training loss: 2.0768...  0.0587 sec/batch
Epoch: 5/20...  Training Step: 2606...  Training loss: 2.0953...  0.0547 sec/batch
Epoch: 5/20...  Training Step: 2607...  Training loss: 2.0763...  0.0526 sec/batch
Epoch: 5/20...  Training Step: 2608...  Training loss: 2.0058...  0.0526 sec/batch
Epoch: 5/20...  Training Step: 2609...  Training loss: 2.0252...  0.0597 sec/batch
Epoch: 5/20...  Training Step: 2610...  Training loss: 2.0813...  0.0582 sec/batch
Epoch: 5/20...  Training Step: 2611...  Training loss: 2.0306...  0.0531 sec/batch
Epoch: 5/20...  Training Step: 2612...  Training loss: 2.0973...  0.0521 sec/batch
Epoch: 5/20...  Training Step: 2613...  Training loss: 2.0862...  0.0543 sec/batch
Epoch: 5/20...  Training Step: 2614...  Training loss: 2.0393...  0.0532 sec/batch
Epoch: 5/20...  Training Step: 2615...  Training loss: 1.9946...  0.0525 sec/batch
Epoch: 5/20...  Training Step: 2616...  Training loss: 2.0381...  0.0601 sec/batch
Epoc

Epoch: 5/20...  Training Step: 2705...  Training loss: 2.0306...  0.0606 sec/batch
Epoch: 5/20...  Training Step: 2706...  Training loss: 2.0722...  0.0548 sec/batch
Epoch: 5/20...  Training Step: 2707...  Training loss: 2.0847...  0.0557 sec/batch
Epoch: 5/20...  Training Step: 2708...  Training loss: 2.0117...  0.0545 sec/batch
Epoch: 5/20...  Training Step: 2709...  Training loss: 2.0587...  0.0528 sec/batch
Epoch: 5/20...  Training Step: 2710...  Training loss: 2.0375...  0.0528 sec/batch
Epoch: 5/20...  Training Step: 2711...  Training loss: 2.0865...  0.0580 sec/batch
Epoch: 5/20...  Training Step: 2712...  Training loss: 2.0198...  0.0556 sec/batch
Epoch: 5/20...  Training Step: 2713...  Training loss: 2.0220...  0.0557 sec/batch
Epoch: 5/20...  Training Step: 2714...  Training loss: 2.0048...  0.0527 sec/batch
Epoch: 5/20...  Training Step: 2715...  Training loss: 2.0250...  0.0531 sec/batch
Epoch: 5/20...  Training Step: 2716...  Training loss: 2.0606...  0.0562 sec/batch
Epoc

Epoch: 5/20...  Training Step: 2805...  Training loss: 1.9803...  0.0561 sec/batch
Epoch: 5/20...  Training Step: 2806...  Training loss: 2.0047...  0.0579 sec/batch
Epoch: 5/20...  Training Step: 2807...  Training loss: 1.9958...  0.0585 sec/batch
Epoch: 5/20...  Training Step: 2808...  Training loss: 1.9718...  0.0556 sec/batch
Epoch: 5/20...  Training Step: 2809...  Training loss: 2.0269...  0.0574 sec/batch
Epoch: 5/20...  Training Step: 2810...  Training loss: 2.0017...  0.0555 sec/batch
Epoch: 5/20...  Training Step: 2811...  Training loss: 2.0310...  0.0553 sec/batch
Epoch: 5/20...  Training Step: 2812...  Training loss: 2.0110...  0.0589 sec/batch
Epoch: 5/20...  Training Step: 2813...  Training loss: 1.9760...  0.0562 sec/batch
Epoch: 5/20...  Training Step: 2814...  Training loss: 2.0153...  0.0549 sec/batch
Epoch: 5/20...  Training Step: 2815...  Training loss: 2.0290...  0.0523 sec/batch
Epoch: 5/20...  Training Step: 2816...  Training loss: 2.0425...  0.0531 sec/batch
Epoc

Epoch: 5/20...  Training Step: 2905...  Training loss: 2.0568...  0.0554 sec/batch
Epoch: 5/20...  Training Step: 2906...  Training loss: 2.0454...  0.0523 sec/batch
Epoch: 5/20...  Training Step: 2907...  Training loss: 1.9957...  0.0592 sec/batch
Epoch: 5/20...  Training Step: 2908...  Training loss: 2.0217...  0.0551 sec/batch
Epoch: 5/20...  Training Step: 2909...  Training loss: 2.0291...  0.0538 sec/batch
Epoch: 5/20...  Training Step: 2910...  Training loss: 1.9733...  0.0534 sec/batch
Epoch: 5/20...  Training Step: 2911...  Training loss: 2.0149...  0.0553 sec/batch
Epoch: 5/20...  Training Step: 2912...  Training loss: 2.0706...  0.0529 sec/batch
Epoch: 5/20...  Training Step: 2913...  Training loss: 2.0253...  0.0561 sec/batch
Epoch: 5/20...  Training Step: 2914...  Training loss: 1.9967...  0.0533 sec/batch
Epoch: 5/20...  Training Step: 2915...  Training loss: 1.9910...  0.0546 sec/batch
Epoch: 5/20...  Training Step: 2916...  Training loss: 2.0317...  0.0536 sec/batch
Epoc

Epoch: 5/20...  Training Step: 3005...  Training loss: 2.0033...  0.0561 sec/batch
Epoch: 5/20...  Training Step: 3006...  Training loss: 2.0532...  0.0531 sec/batch
Epoch: 5/20...  Training Step: 3007...  Training loss: 2.0406...  0.0533 sec/batch
Epoch: 5/20...  Training Step: 3008...  Training loss: 2.0278...  0.0530 sec/batch
Epoch: 5/20...  Training Step: 3009...  Training loss: 2.0059...  0.0529 sec/batch
Epoch: 5/20...  Training Step: 3010...  Training loss: 1.9803...  0.0529 sec/batch
Epoch: 5/20...  Training Step: 3011...  Training loss: 2.0164...  0.0532 sec/batch
Epoch: 5/20...  Training Step: 3012...  Training loss: 1.9805...  0.0535 sec/batch
Epoch: 5/20...  Training Step: 3013...  Training loss: 1.9973...  0.0552 sec/batch
Epoch: 5/20...  Training Step: 3014...  Training loss: 1.9794...  0.0534 sec/batch
Epoch: 5/20...  Training Step: 3015...  Training loss: 2.0066...  0.0537 sec/batch
Epoch: 5/20...  Training Step: 3016...  Training loss: 2.0143...  0.0581 sec/batch
Epoc

Epoch: 6/20...  Training Step: 3105...  Training loss: 2.0061...  0.0549 sec/batch
Epoch: 6/20...  Training Step: 3106...  Training loss: 2.0171...  0.0602 sec/batch
Epoch: 6/20...  Training Step: 3107...  Training loss: 1.9765...  0.0554 sec/batch
Epoch: 6/20...  Training Step: 3108...  Training loss: 1.9670...  0.0585 sec/batch
Epoch: 6/20...  Training Step: 3109...  Training loss: 1.9571...  0.0633 sec/batch
Epoch: 6/20...  Training Step: 3110...  Training loss: 1.9838...  0.0533 sec/batch
Epoch: 6/20...  Training Step: 3111...  Training loss: 2.0116...  0.0530 sec/batch
Epoch: 6/20...  Training Step: 3112...  Training loss: 1.9510...  0.0579 sec/batch
Epoch: 6/20...  Training Step: 3113...  Training loss: 2.0327...  0.0566 sec/batch
Epoch: 6/20...  Training Step: 3114...  Training loss: 1.9830...  0.0527 sec/batch
Epoch: 6/20...  Training Step: 3115...  Training loss: 2.0377...  0.0589 sec/batch
Epoch: 6/20...  Training Step: 3116...  Training loss: 2.0528...  0.0527 sec/batch
Epoc

Epoch: 6/20...  Training Step: 3205...  Training loss: 1.9608...  0.0555 sec/batch
Epoch: 6/20...  Training Step: 3206...  Training loss: 1.9414...  0.0562 sec/batch
Epoch: 6/20...  Training Step: 3207...  Training loss: 2.0121...  0.0574 sec/batch
Epoch: 6/20...  Training Step: 3208...  Training loss: 1.9760...  0.0562 sec/batch
Epoch: 6/20...  Training Step: 3209...  Training loss: 1.9690...  0.0533 sec/batch
Epoch: 6/20...  Training Step: 3210...  Training loss: 1.9673...  0.0529 sec/batch
Epoch: 6/20...  Training Step: 3211...  Training loss: 1.9515...  0.0553 sec/batch
Epoch: 6/20...  Training Step: 3212...  Training loss: 1.9851...  0.0524 sec/batch
Epoch: 6/20...  Training Step: 3213...  Training loss: 1.9911...  0.0569 sec/batch
Epoch: 6/20...  Training Step: 3214...  Training loss: 1.9560...  0.0522 sec/batch
Epoch: 6/20...  Training Step: 3215...  Training loss: 2.0090...  0.0552 sec/batch
Epoch: 6/20...  Training Step: 3216...  Training loss: 2.0090...  0.0551 sec/batch
Epoc

Epoch: 6/20...  Training Step: 3305...  Training loss: 1.9875...  0.0531 sec/batch
Epoch: 6/20...  Training Step: 3306...  Training loss: 1.9304...  0.0594 sec/batch
Epoch: 6/20...  Training Step: 3307...  Training loss: 1.9920...  0.0526 sec/batch
Epoch: 6/20...  Training Step: 3308...  Training loss: 1.9792...  0.0560 sec/batch
Epoch: 6/20...  Training Step: 3309...  Training loss: 1.9816...  0.0564 sec/batch
Epoch: 6/20...  Training Step: 3310...  Training loss: 1.9689...  0.0532 sec/batch
Epoch: 6/20...  Training Step: 3311...  Training loss: 1.9778...  0.0535 sec/batch
Epoch: 6/20...  Training Step: 3312...  Training loss: 1.9818...  0.0571 sec/batch
Epoch: 6/20...  Training Step: 3313...  Training loss: 2.0187...  0.0574 sec/batch
Epoch: 6/20...  Training Step: 3314...  Training loss: 2.0197...  0.0553 sec/batch
Epoch: 6/20...  Training Step: 3315...  Training loss: 1.9909...  0.0579 sec/batch
Epoch: 6/20...  Training Step: 3316...  Training loss: 2.0191...  0.0580 sec/batch
Epoc

Epoch: 6/20...  Training Step: 3405...  Training loss: 1.9800...  0.0594 sec/batch
Epoch: 6/20...  Training Step: 3406...  Training loss: 1.9934...  0.0529 sec/batch
Epoch: 6/20...  Training Step: 3407...  Training loss: 1.9701...  0.0529 sec/batch
Epoch: 6/20...  Training Step: 3408...  Training loss: 1.9606...  0.0567 sec/batch
Epoch: 6/20...  Training Step: 3409...  Training loss: 1.9858...  0.0523 sec/batch
Epoch: 6/20...  Training Step: 3410...  Training loss: 1.9416...  0.0570 sec/batch
Epoch: 6/20...  Training Step: 3411...  Training loss: 1.9347...  0.0553 sec/batch
Epoch: 6/20...  Training Step: 3412...  Training loss: 1.9631...  0.0522 sec/batch
Epoch: 6/20...  Training Step: 3413...  Training loss: 1.9410...  0.0571 sec/batch
Epoch: 6/20...  Training Step: 3414...  Training loss: 1.9355...  0.0543 sec/batch
Epoch: 6/20...  Training Step: 3415...  Training loss: 1.9655...  0.0531 sec/batch
Epoch: 6/20...  Training Step: 3416...  Training loss: 1.9984...  0.0534 sec/batch
Epoc

Epoch: 6/20...  Training Step: 3505...  Training loss: 1.9954...  0.0579 sec/batch
Epoch: 6/20...  Training Step: 3506...  Training loss: 2.0022...  0.0567 sec/batch
Epoch: 6/20...  Training Step: 3507...  Training loss: 2.0255...  0.0536 sec/batch
Epoch: 6/20...  Training Step: 3508...  Training loss: 1.9680...  0.0533 sec/batch
Epoch: 6/20...  Training Step: 3509...  Training loss: 2.0289...  0.0525 sec/batch
Epoch: 6/20...  Training Step: 3510...  Training loss: 1.9927...  0.0531 sec/batch
Epoch: 6/20...  Training Step: 3511...  Training loss: 1.9526...  0.0526 sec/batch
Epoch: 6/20...  Training Step: 3512...  Training loss: 1.9846...  0.0560 sec/batch
Epoch: 6/20...  Training Step: 3513...  Training loss: 1.9684...  0.0612 sec/batch
Epoch: 6/20...  Training Step: 3514...  Training loss: 1.9504...  0.0557 sec/batch
Epoch: 6/20...  Training Step: 3515...  Training loss: 1.9201...  0.0558 sec/batch
Epoch: 6/20...  Training Step: 3516...  Training loss: 1.9101...  0.0555 sec/batch
Epoc

Epoch: 6/20...  Training Step: 3605...  Training loss: 1.9609...  0.0539 sec/batch
Epoch: 6/20...  Training Step: 3606...  Training loss: 1.9257...  0.0545 sec/batch
Epoch: 6/20...  Training Step: 3607...  Training loss: 1.9763...  0.0537 sec/batch
Epoch: 6/20...  Training Step: 3608...  Training loss: 1.9355...  0.0617 sec/batch
Epoch: 6/20...  Training Step: 3609...  Training loss: 1.9729...  0.0550 sec/batch
Epoch: 6/20...  Training Step: 3610...  Training loss: 1.9913...  0.0614 sec/batch
Epoch: 6/20...  Training Step: 3611...  Training loss: 1.9919...  0.0558 sec/batch
Epoch: 6/20...  Training Step: 3612...  Training loss: 1.9685...  0.0609 sec/batch
Epoch: 6/20...  Training Step: 3613...  Training loss: 2.0127...  0.0573 sec/batch
Epoch: 6/20...  Training Step: 3614...  Training loss: 1.9897...  0.0541 sec/batch
Epoch: 6/20...  Training Step: 3615...  Training loss: 1.9864...  0.0543 sec/batch
Epoch: 6/20...  Training Step: 3616...  Training loss: 1.9472...  0.0568 sec/batch
Epoc

Epoch: 6/20...  Training Step: 3705...  Training loss: 1.9608...  0.0530 sec/batch
Epoch: 6/20...  Training Step: 3706...  Training loss: 1.9411...  0.0567 sec/batch
Epoch: 6/20...  Training Step: 3707...  Training loss: 1.9102...  0.0542 sec/batch
Epoch: 6/20...  Training Step: 3708...  Training loss: 1.9358...  0.0555 sec/batch
Epoch: 6/20...  Training Step: 3709...  Training loss: 1.9367...  0.0576 sec/batch
Epoch: 6/20...  Training Step: 3710...  Training loss: 1.9849...  0.0560 sec/batch
Epoch: 6/20...  Training Step: 3711...  Training loss: 1.9951...  0.0556 sec/batch
Epoch: 6/20...  Training Step: 3712...  Training loss: 1.9752...  0.0532 sec/batch
Epoch: 6/20...  Training Step: 3713...  Training loss: 1.9099...  0.0531 sec/batch
Epoch: 6/20...  Training Step: 3714...  Training loss: 1.9803...  0.0552 sec/batch
Epoch: 6/20...  Training Step: 3715...  Training loss: 1.9248...  0.0566 sec/batch
Epoch: 6/20...  Training Step: 3716...  Training loss: 1.9783...  0.0551 sec/batch
Epoc

Epoch: 7/20...  Training Step: 3805...  Training loss: 1.9291...  0.0573 sec/batch
Epoch: 7/20...  Training Step: 3806...  Training loss: 1.9763...  0.0556 sec/batch
Epoch: 7/20...  Training Step: 3807...  Training loss: 1.8999...  0.0559 sec/batch
Epoch: 7/20...  Training Step: 3808...  Training loss: 1.9807...  0.0553 sec/batch
Epoch: 7/20...  Training Step: 3809...  Training loss: 1.9626...  0.0545 sec/batch
Epoch: 7/20...  Training Step: 3810...  Training loss: 1.9556...  0.0532 sec/batch
Epoch: 7/20...  Training Step: 3811...  Training loss: 1.9387...  0.0532 sec/batch
Epoch: 7/20...  Training Step: 3812...  Training loss: 1.9844...  0.0551 sec/batch
Epoch: 7/20...  Training Step: 3813...  Training loss: 1.9573...  0.0607 sec/batch
Epoch: 7/20...  Training Step: 3814...  Training loss: 1.9349...  0.0531 sec/batch
Epoch: 7/20...  Training Step: 3815...  Training loss: 1.9532...  0.0528 sec/batch
Epoch: 7/20...  Training Step: 3816...  Training loss: 1.9652...  0.0536 sec/batch
Epoc

Epoch: 7/20...  Training Step: 3905...  Training loss: 1.8843...  0.0528 sec/batch
Epoch: 7/20...  Training Step: 3906...  Training loss: 1.9265...  0.0542 sec/batch
Epoch: 7/20...  Training Step: 3907...  Training loss: 1.9227...  0.0591 sec/batch
Epoch: 7/20...  Training Step: 3908...  Training loss: 1.9197...  0.0600 sec/batch
Epoch: 7/20...  Training Step: 3909...  Training loss: 1.9266...  0.0603 sec/batch
Epoch: 7/20...  Training Step: 3910...  Training loss: 2.0080...  0.0550 sec/batch
Epoch: 7/20...  Training Step: 3911...  Training loss: 1.9616...  0.0603 sec/batch
Epoch: 7/20...  Training Step: 3912...  Training loss: 1.9719...  0.0584 sec/batch
Epoch: 7/20...  Training Step: 3913...  Training loss: 1.9604...  0.0558 sec/batch
Epoch: 7/20...  Training Step: 3914...  Training loss: 1.9139...  0.0591 sec/batch
Epoch: 7/20...  Training Step: 3915...  Training loss: 1.9320...  0.0566 sec/batch
Epoch: 7/20...  Training Step: 3916...  Training loss: 2.0060...  0.0557 sec/batch
Epoc

Epoch: 7/20...  Training Step: 4005...  Training loss: 1.9221...  0.0574 sec/batch
Epoch: 7/20...  Training Step: 4006...  Training loss: 1.9557...  0.0559 sec/batch
Epoch: 7/20...  Training Step: 4007...  Training loss: 1.9204...  0.0584 sec/batch
Epoch: 7/20...  Training Step: 4008...  Training loss: 1.9238...  0.0576 sec/batch
Epoch: 7/20...  Training Step: 4009...  Training loss: 1.9436...  0.0533 sec/batch
Epoch: 7/20...  Training Step: 4010...  Training loss: 1.9730...  0.0589 sec/batch
Epoch: 7/20...  Training Step: 4011...  Training loss: 1.9234...  0.0553 sec/batch
Epoch: 7/20...  Training Step: 4012...  Training loss: 1.9406...  0.0522 sec/batch
Epoch: 7/20...  Training Step: 4013...  Training loss: 1.9437...  0.0555 sec/batch
Epoch: 7/20...  Training Step: 4014...  Training loss: 1.9469...  0.0570 sec/batch
Epoch: 7/20...  Training Step: 4015...  Training loss: 1.9084...  0.0555 sec/batch
Epoch: 7/20...  Training Step: 4016...  Training loss: 1.8885...  0.0529 sec/batch
Epoc

Epoch: 7/20...  Training Step: 4105...  Training loss: 1.9260...  0.0562 sec/batch
Epoch: 7/20...  Training Step: 4106...  Training loss: 1.8514...  0.0531 sec/batch
Epoch: 7/20...  Training Step: 4107...  Training loss: 1.8635...  0.0566 sec/batch
Epoch: 7/20...  Training Step: 4108...  Training loss: 1.9241...  0.0553 sec/batch
Epoch: 7/20...  Training Step: 4109...  Training loss: 1.8634...  0.0537 sec/batch
Epoch: 7/20...  Training Step: 4110...  Training loss: 1.8984...  0.0586 sec/batch
Epoch: 7/20...  Training Step: 4111...  Training loss: 1.9464...  0.0565 sec/batch
Epoch: 7/20...  Training Step: 4112...  Training loss: 1.8847...  0.0528 sec/batch
Epoch: 7/20...  Training Step: 4113...  Training loss: 1.9233...  0.0590 sec/batch
Epoch: 7/20...  Training Step: 4114...  Training loss: 1.9368...  0.0526 sec/batch
Epoch: 7/20...  Training Step: 4115...  Training loss: 1.8916...  0.0555 sec/batch
Epoch: 7/20...  Training Step: 4116...  Training loss: 1.9291...  0.0531 sec/batch
Epoc

Epoch: 7/20...  Training Step: 4205...  Training loss: 1.9778...  0.0539 sec/batch
Epoch: 7/20...  Training Step: 4206...  Training loss: 1.9174...  0.0532 sec/batch
Epoch: 7/20...  Training Step: 4207...  Training loss: 1.9502...  0.0554 sec/batch
Epoch: 7/20...  Training Step: 4208...  Training loss: 1.8909...  0.0573 sec/batch
Epoch: 7/20...  Training Step: 4209...  Training loss: 1.9269...  0.0565 sec/batch
Epoch: 7/20...  Training Step: 4210...  Training loss: 1.9294...  0.0522 sec/batch
Epoch: 7/20...  Training Step: 4211...  Training loss: 1.8948...  0.0528 sec/batch
Epoch: 7/20...  Training Step: 4212...  Training loss: 1.9329...  0.0524 sec/batch
Epoch: 7/20...  Training Step: 4213...  Training loss: 1.9229...  0.0533 sec/batch
Epoch: 7/20...  Training Step: 4214...  Training loss: 1.9027...  0.0531 sec/batch
Epoch: 7/20...  Training Step: 4215...  Training loss: 1.8971...  0.0528 sec/batch
Epoch: 7/20...  Training Step: 4216...  Training loss: 1.9173...  0.0585 sec/batch
Epoc

Epoch: 7/20...  Training Step: 4305...  Training loss: 1.9402...  0.0581 sec/batch
Epoch: 7/20...  Training Step: 4306...  Training loss: 1.9060...  0.0523 sec/batch
Epoch: 7/20...  Training Step: 4307...  Training loss: 1.8700...  0.0568 sec/batch
Epoch: 7/20...  Training Step: 4308...  Training loss: 1.9274...  0.0588 sec/batch
Epoch: 7/20...  Training Step: 4309...  Training loss: 1.8980...  0.0568 sec/batch
Epoch: 7/20...  Training Step: 4310...  Training loss: 1.9524...  0.0527 sec/batch
Epoch: 7/20...  Training Step: 4311...  Training loss: 1.8731...  0.0549 sec/batch
Epoch: 7/20...  Training Step: 4312...  Training loss: 1.9576...  0.0533 sec/batch
Epoch: 7/20...  Training Step: 4313...  Training loss: 1.8981...  0.0583 sec/batch
Epoch: 7/20...  Training Step: 4314...  Training loss: 1.9031...  0.0548 sec/batch
Epoch: 7/20...  Training Step: 4315...  Training loss: 1.9003...  0.0588 sec/batch
Epoch: 7/20...  Training Step: 4316...  Training loss: 1.8917...  0.0549 sec/batch
Epoc

Epoch: 8/20...  Training Step: 4405...  Training loss: 1.8751...  0.0554 sec/batch
Epoch: 8/20...  Training Step: 4406...  Training loss: 1.8489...  0.0530 sec/batch
Epoch: 8/20...  Training Step: 4407...  Training loss: 1.8685...  0.0544 sec/batch
Epoch: 8/20...  Training Step: 4408...  Training loss: 1.8759...  0.0560 sec/batch
Epoch: 8/20...  Training Step: 4409...  Training loss: 1.9266...  0.0565 sec/batch
Epoch: 8/20...  Training Step: 4410...  Training loss: 1.9049...  0.0581 sec/batch
Epoch: 8/20...  Training Step: 4411...  Training loss: 1.9309...  0.0599 sec/batch
Epoch: 8/20...  Training Step: 4412...  Training loss: 1.9115...  0.0548 sec/batch
Epoch: 8/20...  Training Step: 4413...  Training loss: 1.8474...  0.0562 sec/batch
Epoch: 8/20...  Training Step: 4414...  Training loss: 1.8941...  0.0553 sec/batch
Epoch: 8/20...  Training Step: 4415...  Training loss: 1.9399...  0.0556 sec/batch
Epoch: 8/20...  Training Step: 4416...  Training loss: 1.9118...  0.0550 sec/batch
Epoc

Epoch: 8/20...  Training Step: 4505...  Training loss: 1.8992...  0.0543 sec/batch
Epoch: 8/20...  Training Step: 4506...  Training loss: 1.9119...  0.0543 sec/batch
Epoch: 8/20...  Training Step: 4507...  Training loss: 1.9349...  0.0568 sec/batch
Epoch: 8/20...  Training Step: 4508...  Training loss: 1.8990...  0.0591 sec/batch
Epoch: 8/20...  Training Step: 4509...  Training loss: 1.8811...  0.0544 sec/batch
Epoch: 8/20...  Training Step: 4510...  Training loss: 1.8464...  0.0598 sec/batch
Epoch: 8/20...  Training Step: 4511...  Training loss: 1.8709...  0.0549 sec/batch
Epoch: 8/20...  Training Step: 4512...  Training loss: 1.9029...  0.0583 sec/batch
Epoch: 8/20...  Training Step: 4513...  Training loss: 1.8877...  0.0533 sec/batch
Epoch: 8/20...  Training Step: 4514...  Training loss: 1.9006...  0.0559 sec/batch
Epoch: 8/20...  Training Step: 4515...  Training loss: 1.8670...  0.0564 sec/batch
Epoch: 8/20...  Training Step: 4516...  Training loss: 1.8947...  0.0561 sec/batch
Epoc

Epoch: 8/20...  Training Step: 4605...  Training loss: 1.9467...  0.0542 sec/batch
Epoch: 8/20...  Training Step: 4606...  Training loss: 1.8207...  0.0596 sec/batch
Epoch: 8/20...  Training Step: 4607...  Training loss: 1.9034...  0.0551 sec/batch
Epoch: 8/20...  Training Step: 4608...  Training loss: 1.9009...  0.0533 sec/batch
Epoch: 8/20...  Training Step: 4609...  Training loss: 1.8750...  0.0543 sec/batch
Epoch: 8/20...  Training Step: 4610...  Training loss: 1.8729...  0.0536 sec/batch
Epoch: 8/20...  Training Step: 4611...  Training loss: 1.8489...  0.0536 sec/batch
Epoch: 8/20...  Training Step: 4612...  Training loss: 1.8989...  0.0551 sec/batch
Epoch: 8/20...  Training Step: 4613...  Training loss: 1.8588...  0.0608 sec/batch
Epoch: 8/20...  Training Step: 4614...  Training loss: 1.8819...  0.0534 sec/batch
Epoch: 8/20...  Training Step: 4615...  Training loss: 1.9144...  0.0605 sec/batch
Epoch: 8/20...  Training Step: 4616...  Training loss: 1.9442...  0.0569 sec/batch
Epoc

Epoch: 8/20...  Training Step: 4705...  Training loss: 1.8761...  0.0559 sec/batch
Epoch: 8/20...  Training Step: 4706...  Training loss: 1.8932...  0.0532 sec/batch
Epoch: 8/20...  Training Step: 4707...  Training loss: 1.8693...  0.0549 sec/batch
Epoch: 8/20...  Training Step: 4708...  Training loss: 1.9434...  0.0551 sec/batch
Epoch: 8/20...  Training Step: 4709...  Training loss: 1.9200...  0.0559 sec/batch
Epoch: 8/20...  Training Step: 4710...  Training loss: 1.8765...  0.0553 sec/batch
Epoch: 8/20...  Training Step: 4711...  Training loss: 1.9110...  0.0526 sec/batch
Epoch: 8/20...  Training Step: 4712...  Training loss: 1.9632...  0.0584 sec/batch
Epoch: 8/20...  Training Step: 4713...  Training loss: 1.8851...  0.0529 sec/batch
Epoch: 8/20...  Training Step: 4714...  Training loss: 1.8821...  0.0528 sec/batch
Epoch: 8/20...  Training Step: 4715...  Training loss: 1.8683...  0.0566 sec/batch
Epoch: 8/20...  Training Step: 4716...  Training loss: 1.8857...  0.0525 sec/batch
Epoc

Epoch: 8/20...  Training Step: 4805...  Training loss: 1.9261...  0.0583 sec/batch
Epoch: 8/20...  Training Step: 4806...  Training loss: 1.9062...  0.0532 sec/batch
Epoch: 8/20...  Training Step: 4807...  Training loss: 1.8741...  0.0578 sec/batch
Epoch: 8/20...  Training Step: 4808...  Training loss: 1.8615...  0.0550 sec/batch
Epoch: 8/20...  Training Step: 4809...  Training loss: 1.9151...  0.0525 sec/batch
Epoch: 8/20...  Training Step: 4810...  Training loss: 1.8796...  0.0532 sec/batch
Epoch: 8/20...  Training Step: 4811...  Training loss: 1.8659...  0.0589 sec/batch
Epoch: 8/20...  Training Step: 4812...  Training loss: 1.8790...  0.0558 sec/batch
Epoch: 8/20...  Training Step: 4813...  Training loss: 1.8553...  0.0528 sec/batch
Epoch: 8/20...  Training Step: 4814...  Training loss: 1.8236...  0.0545 sec/batch
Epoch: 8/20...  Training Step: 4815...  Training loss: 1.9249...  0.0550 sec/batch
Epoch: 8/20...  Training Step: 4816...  Training loss: 1.9193...  0.0547 sec/batch
Epoc

Epoch: 8/20...  Training Step: 4905...  Training loss: 1.9921...  0.0561 sec/batch
Epoch: 8/20...  Training Step: 4906...  Training loss: 1.9376...  0.0581 sec/batch
Epoch: 8/20...  Training Step: 4907...  Training loss: 1.8847...  0.0542 sec/batch
Epoch: 8/20...  Training Step: 4908...  Training loss: 1.9586...  0.0526 sec/batch
Epoch: 8/20...  Training Step: 4909...  Training loss: 1.8603...  0.0531 sec/batch
Epoch: 8/20...  Training Step: 4910...  Training loss: 1.9060...  0.0526 sec/batch
Epoch: 8/20...  Training Step: 4911...  Training loss: 1.8978...  0.0549 sec/batch
Epoch: 8/20...  Training Step: 4912...  Training loss: 1.9470...  0.0569 sec/batch
Epoch: 8/20...  Training Step: 4913...  Training loss: 1.8869...  0.0525 sec/batch
Epoch: 8/20...  Training Step: 4914...  Training loss: 1.8620...  0.0532 sec/batch
Epoch: 8/20...  Training Step: 4915...  Training loss: 1.8826...  0.0528 sec/batch
Epoch: 8/20...  Training Step: 4916...  Training loss: 1.9337...  0.0529 sec/batch
Epoc

Epoch: 9/20...  Training Step: 5005...  Training loss: 1.8427...  0.0532 sec/batch
Epoch: 9/20...  Training Step: 5006...  Training loss: 1.8611...  0.0522 sec/batch
Epoch: 9/20...  Training Step: 5007...  Training loss: 1.7546...  0.0553 sec/batch
Epoch: 9/20...  Training Step: 5008...  Training loss: 1.8860...  0.0526 sec/batch
Epoch: 9/20...  Training Step: 5009...  Training loss: 1.8384...  0.0544 sec/batch
Epoch: 9/20...  Training Step: 5010...  Training loss: 1.8970...  0.0526 sec/batch
Epoch: 9/20...  Training Step: 5011...  Training loss: 1.8418...  0.0537 sec/batch
Epoch: 9/20...  Training Step: 5012...  Training loss: 1.8456...  0.0585 sec/batch
Epoch: 9/20...  Training Step: 5013...  Training loss: 1.8582...  0.0568 sec/batch
Epoch: 9/20...  Training Step: 5014...  Training loss: 1.8831...  0.0543 sec/batch
Epoch: 9/20...  Training Step: 5015...  Training loss: 1.8834...  0.0525 sec/batch
Epoch: 9/20...  Training Step: 5016...  Training loss: 1.8652...  0.0529 sec/batch
Epoc

Epoch: 9/20...  Training Step: 5105...  Training loss: 1.8100...  0.0547 sec/batch
Epoch: 9/20...  Training Step: 5106...  Training loss: 1.8903...  0.0587 sec/batch
Epoch: 9/20...  Training Step: 5107...  Training loss: 1.8885...  0.0595 sec/batch
Epoch: 9/20...  Training Step: 5108...  Training loss: 1.8912...  0.0527 sec/batch
Epoch: 9/20...  Training Step: 5109...  Training loss: 1.8630...  0.0578 sec/batch
Epoch: 9/20...  Training Step: 5110...  Training loss: 1.8973...  0.0545 sec/batch
Epoch: 9/20...  Training Step: 5111...  Training loss: 1.8668...  0.0581 sec/batch
Epoch: 9/20...  Training Step: 5112...  Training loss: 1.8510...  0.0591 sec/batch
Epoch: 9/20...  Training Step: 5113...  Training loss: 1.8562...  0.0558 sec/batch
Epoch: 9/20...  Training Step: 5114...  Training loss: 1.9033...  0.0552 sec/batch
Epoch: 9/20...  Training Step: 5115...  Training loss: 1.8616...  0.0530 sec/batch
Epoch: 9/20...  Training Step: 5116...  Training loss: 1.8818...  0.0580 sec/batch
Epoc

Epoch: 9/20...  Training Step: 5205...  Training loss: 1.8773...  0.0537 sec/batch
Epoch: 9/20...  Training Step: 5206...  Training loss: 1.8534...  0.0562 sec/batch
Epoch: 9/20...  Training Step: 5207...  Training loss: 1.8825...  0.0550 sec/batch
Epoch: 9/20...  Training Step: 5208...  Training loss: 1.8868...  0.0579 sec/batch
Epoch: 9/20...  Training Step: 5209...  Training loss: 1.8130...  0.0558 sec/batch
Epoch: 9/20...  Training Step: 5210...  Training loss: 1.8453...  0.0577 sec/batch
Epoch: 9/20...  Training Step: 5211...  Training loss: 1.8689...  0.0571 sec/batch
Epoch: 9/20...  Training Step: 5212...  Training loss: 1.8492...  0.0550 sec/batch
Epoch: 9/20...  Training Step: 5213...  Training loss: 1.8951...  0.0554 sec/batch
Epoch: 9/20...  Training Step: 5214...  Training loss: 1.8695...  0.0591 sec/batch
Epoch: 9/20...  Training Step: 5215...  Training loss: 1.9044...  0.0550 sec/batch
Epoch: 9/20...  Training Step: 5216...  Training loss: 1.8647...  0.0529 sec/batch
Epoc

Epoch: 9/20...  Training Step: 5305...  Training loss: 1.8615...  0.0524 sec/batch
Epoch: 9/20...  Training Step: 5306...  Training loss: 1.8579...  0.0554 sec/batch
Epoch: 9/20...  Training Step: 5307...  Training loss: 1.8665...  0.0550 sec/batch
Epoch: 9/20...  Training Step: 5308...  Training loss: 1.8723...  0.0531 sec/batch
Epoch: 9/20...  Training Step: 5309...  Training loss: 1.8482...  0.0531 sec/batch
Epoch: 9/20...  Training Step: 5310...  Training loss: 1.8563...  0.0528 sec/batch
Epoch: 9/20...  Training Step: 5311...  Training loss: 1.8901...  0.0524 sec/batch
Epoch: 9/20...  Training Step: 5312...  Training loss: 1.8486...  0.0529 sec/batch
Epoch: 9/20...  Training Step: 5313...  Training loss: 1.8742...  0.0551 sec/batch
Epoch: 9/20...  Training Step: 5314...  Training loss: 1.8439...  0.0530 sec/batch
Epoch: 9/20...  Training Step: 5315...  Training loss: 1.8244...  0.0552 sec/batch
Epoch: 9/20...  Training Step: 5316...  Training loss: 1.9205...  0.0543 sec/batch
Epoc

Epoch: 9/20...  Training Step: 5405...  Training loss: 1.7536...  0.0558 sec/batch
Epoch: 9/20...  Training Step: 5406...  Training loss: 1.8564...  0.0584 sec/batch
Epoch: 9/20...  Training Step: 5407...  Training loss: 1.8254...  0.0547 sec/batch
Epoch: 9/20...  Training Step: 5408...  Training loss: 1.8206...  0.0554 sec/batch
Epoch: 9/20...  Training Step: 5409...  Training loss: 1.8497...  0.0531 sec/batch
Epoch: 9/20...  Training Step: 5410...  Training loss: 1.9069...  0.0527 sec/batch
Epoch: 9/20...  Training Step: 5411...  Training loss: 1.9063...  0.0528 sec/batch
Epoch: 9/20...  Training Step: 5412...  Training loss: 1.8833...  0.0575 sec/batch
Epoch: 9/20...  Training Step: 5413...  Training loss: 1.8300...  0.0529 sec/batch
Epoch: 9/20...  Training Step: 5414...  Training loss: 1.8670...  0.0566 sec/batch
Epoch: 9/20...  Training Step: 5415...  Training loss: 1.8327...  0.0530 sec/batch
Epoch: 9/20...  Training Step: 5416...  Training loss: 1.8394...  0.0544 sec/batch
Epoc

Epoch: 9/20...  Training Step: 5505...  Training loss: 1.8974...  0.0532 sec/batch
Epoch: 9/20...  Training Step: 5506...  Training loss: 1.8515...  0.0540 sec/batch
Epoch: 9/20...  Training Step: 5507...  Training loss: 1.8863...  0.0544 sec/batch
Epoch: 9/20...  Training Step: 5508...  Training loss: 1.9061...  0.0526 sec/batch
Epoch: 9/20...  Training Step: 5509...  Training loss: 1.8974...  0.0572 sec/batch
Epoch: 9/20...  Training Step: 5510...  Training loss: 1.8420...  0.0541 sec/batch
Epoch: 9/20...  Training Step: 5511...  Training loss: 1.8726...  0.0587 sec/batch
Epoch: 9/20...  Training Step: 5512...  Training loss: 1.8056...  0.0544 sec/batch
Epoch: 9/20...  Training Step: 5513...  Training loss: 1.8609...  0.0568 sec/batch
Epoch: 9/20...  Training Step: 5514...  Training loss: 1.8447...  0.0550 sec/batch
Epoch: 9/20...  Training Step: 5515...  Training loss: 1.8263...  0.0601 sec/batch
Epoch: 9/20...  Training Step: 5516...  Training loss: 1.8285...  0.0552 sec/batch
Epoc

Epoch: 10/20...  Training Step: 5605...  Training loss: 1.8333...  0.0544 sec/batch
Epoch: 10/20...  Training Step: 5606...  Training loss: 1.8178...  0.0526 sec/batch
Epoch: 10/20...  Training Step: 5607...  Training loss: 1.8401...  0.0529 sec/batch
Epoch: 10/20...  Training Step: 5608...  Training loss: 1.8399...  0.0561 sec/batch
Epoch: 10/20...  Training Step: 5609...  Training loss: 1.8433...  0.0532 sec/batch
Epoch: 10/20...  Training Step: 5610...  Training loss: 1.8217...  0.0585 sec/batch
Epoch: 10/20...  Training Step: 5611...  Training loss: 1.8075...  0.0545 sec/batch
Epoch: 10/20...  Training Step: 5612...  Training loss: 1.8742...  0.0527 sec/batch
Epoch: 10/20...  Training Step: 5613...  Training loss: 1.8310...  0.0526 sec/batch
Epoch: 10/20...  Training Step: 5614...  Training loss: 1.8316...  0.0584 sec/batch
Epoch: 10/20...  Training Step: 5615...  Training loss: 1.8369...  0.0552 sec/batch
Epoch: 10/20...  Training Step: 5616...  Training loss: 1.8464...  0.0544 se

Epoch: 10/20...  Training Step: 5705...  Training loss: 1.8648...  0.0534 sec/batch
Epoch: 10/20...  Training Step: 5706...  Training loss: 1.8778...  0.0520 sec/batch
Epoch: 10/20...  Training Step: 5707...  Training loss: 1.8908...  0.0528 sec/batch
Epoch: 10/20...  Training Step: 5708...  Training loss: 1.8102...  0.0582 sec/batch
Epoch: 10/20...  Training Step: 5709...  Training loss: 1.8372...  0.0588 sec/batch
Epoch: 10/20...  Training Step: 5710...  Training loss: 1.8898...  0.0527 sec/batch
Epoch: 10/20...  Training Step: 5711...  Training loss: 1.8177...  0.0523 sec/batch
Epoch: 10/20...  Training Step: 5712...  Training loss: 1.8689...  0.0545 sec/batch
Epoch: 10/20...  Training Step: 5713...  Training loss: 1.8774...  0.0525 sec/batch
Epoch: 10/20...  Training Step: 5714...  Training loss: 1.8275...  0.0549 sec/batch
Epoch: 10/20...  Training Step: 5715...  Training loss: 1.8088...  0.0530 sec/batch
Epoch: 10/20...  Training Step: 5716...  Training loss: 1.8558...  0.0529 se

Epoch: 10/20...  Training Step: 5805...  Training loss: 1.8052...  0.0531 sec/batch
Epoch: 10/20...  Training Step: 5806...  Training loss: 1.8789...  0.0524 sec/batch
Epoch: 10/20...  Training Step: 5807...  Training loss: 1.8807...  0.0530 sec/batch
Epoch: 10/20...  Training Step: 5808...  Training loss: 1.8085...  0.0542 sec/batch
Epoch: 10/20...  Training Step: 5809...  Training loss: 1.8817...  0.0555 sec/batch
Epoch: 10/20...  Training Step: 5810...  Training loss: 1.8286...  0.0567 sec/batch
Epoch: 10/20...  Training Step: 5811...  Training loss: 1.8992...  0.0554 sec/batch
Epoch: 10/20...  Training Step: 5812...  Training loss: 1.8025...  0.0561 sec/batch
Epoch: 10/20...  Training Step: 5813...  Training loss: 1.8059...  0.0537 sec/batch
Epoch: 10/20...  Training Step: 5814...  Training loss: 1.8181...  0.0529 sec/batch
Epoch: 10/20...  Training Step: 5815...  Training loss: 1.8219...  0.0559 sec/batch
Epoch: 10/20...  Training Step: 5816...  Training loss: 1.8460...  0.0580 se

Epoch: 10/20...  Training Step: 5905...  Training loss: 1.8183...  0.0556 sec/batch
Epoch: 10/20...  Training Step: 5906...  Training loss: 1.8322...  0.0521 sec/batch
Epoch: 10/20...  Training Step: 5907...  Training loss: 1.8047...  0.0550 sec/batch
Epoch: 10/20...  Training Step: 5908...  Training loss: 1.7811...  0.0528 sec/batch
Epoch: 10/20...  Training Step: 5909...  Training loss: 1.8359...  0.0549 sec/batch
Epoch: 10/20...  Training Step: 5910...  Training loss: 1.8124...  0.0531 sec/batch
Epoch: 10/20...  Training Step: 5911...  Training loss: 1.7969...  0.0547 sec/batch
Epoch: 10/20...  Training Step: 5912...  Training loss: 1.8196...  0.0544 sec/batch
Epoch: 10/20...  Training Step: 5913...  Training loss: 1.8100...  0.0548 sec/batch
Epoch: 10/20...  Training Step: 5914...  Training loss: 1.8165...  0.0530 sec/batch
Epoch: 10/20...  Training Step: 5915...  Training loss: 1.8349...  0.0533 sec/batch
Epoch: 10/20...  Training Step: 5916...  Training loss: 1.8197...  0.0529 se

Epoch: 10/20...  Training Step: 6005...  Training loss: 1.8555...  0.0548 sec/batch
Epoch: 10/20...  Training Step: 6006...  Training loss: 1.8499...  0.0570 sec/batch
Epoch: 10/20...  Training Step: 6007...  Training loss: 1.8123...  0.0539 sec/batch
Epoch: 10/20...  Training Step: 6008...  Training loss: 1.8319...  0.0544 sec/batch
Epoch: 10/20...  Training Step: 6009...  Training loss: 1.8209...  0.0548 sec/batch
Epoch: 10/20...  Training Step: 6010...  Training loss: 1.7947...  0.0539 sec/batch
Epoch: 10/20...  Training Step: 6011...  Training loss: 1.8232...  0.0542 sec/batch
Epoch: 10/20...  Training Step: 6012...  Training loss: 1.8882...  0.0525 sec/batch
Epoch: 10/20...  Training Step: 6013...  Training loss: 1.8459...  0.0527 sec/batch
Epoch: 10/20...  Training Step: 6014...  Training loss: 1.8027...  0.0526 sec/batch
Epoch: 10/20...  Training Step: 6015...  Training loss: 1.7850...  0.0528 sec/batch
Epoch: 10/20...  Training Step: 6016...  Training loss: 1.8258...  0.0550 se

Epoch: 10/20...  Training Step: 6105...  Training loss: 1.8436...  0.0553 sec/batch
Epoch: 10/20...  Training Step: 6106...  Training loss: 1.8799...  0.0543 sec/batch
Epoch: 10/20...  Training Step: 6107...  Training loss: 1.8567...  0.0553 sec/batch
Epoch: 10/20...  Training Step: 6108...  Training loss: 1.8430...  0.0549 sec/batch
Epoch: 10/20...  Training Step: 6109...  Training loss: 1.8288...  0.0528 sec/batch
Epoch: 10/20...  Training Step: 6110...  Training loss: 1.7874...  0.0581 sec/batch
Epoch: 10/20...  Training Step: 6111...  Training loss: 1.8215...  0.0576 sec/batch
Epoch: 10/20...  Training Step: 6112...  Training loss: 1.8064...  0.0551 sec/batch
Epoch: 10/20...  Training Step: 6113...  Training loss: 1.8149...  0.0523 sec/batch
Epoch: 10/20...  Training Step: 6114...  Training loss: 1.8214...  0.0553 sec/batch
Epoch: 10/20...  Training Step: 6115...  Training loss: 1.8262...  0.0549 sec/batch
Epoch: 10/20...  Training Step: 6116...  Training loss: 1.8342...  0.0545 se

Epoch: 11/20...  Training Step: 6205...  Training loss: 1.8142...  0.0553 sec/batch
Epoch: 11/20...  Training Step: 6206...  Training loss: 1.8305...  0.0524 sec/batch
Epoch: 11/20...  Training Step: 6207...  Training loss: 1.7932...  0.0548 sec/batch
Epoch: 11/20...  Training Step: 6208...  Training loss: 1.7682...  0.0566 sec/batch
Epoch: 11/20...  Training Step: 6209...  Training loss: 1.7661...  0.0589 sec/batch
Epoch: 11/20...  Training Step: 6210...  Training loss: 1.7992...  0.0561 sec/batch
Epoch: 11/20...  Training Step: 6211...  Training loss: 1.8154...  0.0532 sec/batch
Epoch: 11/20...  Training Step: 6212...  Training loss: 1.7950...  0.0531 sec/batch
Epoch: 11/20...  Training Step: 6213...  Training loss: 1.8268...  0.0589 sec/batch
Epoch: 11/20...  Training Step: 6214...  Training loss: 1.8036...  0.0547 sec/batch
Epoch: 11/20...  Training Step: 6215...  Training loss: 1.8447...  0.0586 sec/batch
Epoch: 11/20...  Training Step: 6216...  Training loss: 1.8532...  0.0529 se

Epoch: 11/20...  Training Step: 6305...  Training loss: 1.8235...  0.0582 sec/batch
Epoch: 11/20...  Training Step: 6306...  Training loss: 1.7934...  0.0548 sec/batch
Epoch: 11/20...  Training Step: 6307...  Training loss: 1.8425...  0.0527 sec/batch
Epoch: 11/20...  Training Step: 6308...  Training loss: 1.7969...  0.0554 sec/batch
Epoch: 11/20...  Training Step: 6309...  Training loss: 1.8164...  0.0524 sec/batch
Epoch: 11/20...  Training Step: 6310...  Training loss: 1.7778...  0.0579 sec/batch
Epoch: 11/20...  Training Step: 6311...  Training loss: 1.7813...  0.0528 sec/batch
Epoch: 11/20...  Training Step: 6312...  Training loss: 1.8113...  0.0565 sec/batch
Epoch: 11/20...  Training Step: 6313...  Training loss: 1.8079...  0.0578 sec/batch
Epoch: 11/20...  Training Step: 6314...  Training loss: 1.7963...  0.0525 sec/batch
Epoch: 11/20...  Training Step: 6315...  Training loss: 1.8435...  0.0536 sec/batch
Epoch: 11/20...  Training Step: 6316...  Training loss: 1.8438...  0.0527 se

Epoch: 11/20...  Training Step: 6405...  Training loss: 1.8143...  0.0571 sec/batch
Epoch: 11/20...  Training Step: 6406...  Training loss: 1.7818...  0.0568 sec/batch
Epoch: 11/20...  Training Step: 6407...  Training loss: 1.8308...  0.0554 sec/batch
Epoch: 11/20...  Training Step: 6408...  Training loss: 1.8067...  0.0529 sec/batch
Epoch: 11/20...  Training Step: 6409...  Training loss: 1.8318...  0.0526 sec/batch
Epoch: 11/20...  Training Step: 6410...  Training loss: 1.7938...  0.0547 sec/batch
Epoch: 11/20...  Training Step: 6411...  Training loss: 1.8195...  0.0583 sec/batch
Epoch: 11/20...  Training Step: 6412...  Training loss: 1.8276...  0.0577 sec/batch
Epoch: 11/20...  Training Step: 6413...  Training loss: 1.8313...  0.0564 sec/batch
Epoch: 11/20...  Training Step: 6414...  Training loss: 1.8478...  0.0568 sec/batch
Epoch: 11/20...  Training Step: 6415...  Training loss: 1.8456...  0.0523 sec/batch
Epoch: 11/20...  Training Step: 6416...  Training loss: 1.8263...  0.0575 se

Epoch: 11/20...  Training Step: 6505...  Training loss: 1.8106...  0.0554 sec/batch
Epoch: 11/20...  Training Step: 6506...  Training loss: 1.8068...  0.0587 sec/batch
Epoch: 11/20...  Training Step: 6507...  Training loss: 1.8116...  0.0569 sec/batch
Epoch: 11/20...  Training Step: 6508...  Training loss: 1.8019...  0.0546 sec/batch
Epoch: 11/20...  Training Step: 6509...  Training loss: 1.8051...  0.0533 sec/batch
Epoch: 11/20...  Training Step: 6510...  Training loss: 1.7868...  0.0527 sec/batch
Epoch: 11/20...  Training Step: 6511...  Training loss: 1.7726...  0.0528 sec/batch
Epoch: 11/20...  Training Step: 6512...  Training loss: 1.7742...  0.0543 sec/batch
Epoch: 11/20...  Training Step: 6513...  Training loss: 1.7780...  0.0529 sec/batch
Epoch: 11/20...  Training Step: 6514...  Training loss: 1.7719...  0.0545 sec/batch
Epoch: 11/20...  Training Step: 6515...  Training loss: 1.8073...  0.0549 sec/batch
Epoch: 11/20...  Training Step: 6516...  Training loss: 1.8353...  0.0546 se

Epoch: 11/20...  Training Step: 6605...  Training loss: 1.8488...  0.0628 sec/batch
Epoch: 11/20...  Training Step: 6606...  Training loss: 1.8503...  0.0569 sec/batch
Epoch: 11/20...  Training Step: 6607...  Training loss: 1.8669...  0.0527 sec/batch
Epoch: 11/20...  Training Step: 6608...  Training loss: 1.8267...  0.0544 sec/batch
Epoch: 11/20...  Training Step: 6609...  Training loss: 1.8845...  0.0528 sec/batch
Epoch: 11/20...  Training Step: 6610...  Training loss: 1.8563...  0.0531 sec/batch
Epoch: 11/20...  Training Step: 6611...  Training loss: 1.7954...  0.0544 sec/batch
Epoch: 11/20...  Training Step: 6612...  Training loss: 1.8397...  0.0533 sec/batch
Epoch: 11/20...  Training Step: 6613...  Training loss: 1.8386...  0.0560 sec/batch
Epoch: 11/20...  Training Step: 6614...  Training loss: 1.8065...  0.0560 sec/batch
Epoch: 11/20...  Training Step: 6615...  Training loss: 1.7628...  0.0558 sec/batch
Epoch: 11/20...  Training Step: 6616...  Training loss: 1.7856...  0.0583 se

Epoch: 11/20...  Training Step: 6705...  Training loss: 1.7896...  0.0526 sec/batch
Epoch: 11/20...  Training Step: 6706...  Training loss: 1.7750...  0.0532 sec/batch
Epoch: 11/20...  Training Step: 6707...  Training loss: 1.8305...  0.0549 sec/batch
Epoch: 11/20...  Training Step: 6708...  Training loss: 1.7856...  0.0574 sec/batch
Epoch: 11/20...  Training Step: 6709...  Training loss: 1.8351...  0.0529 sec/batch
Epoch: 11/20...  Training Step: 6710...  Training loss: 1.8152...  0.0552 sec/batch
Epoch: 11/20...  Training Step: 6711...  Training loss: 1.8692...  0.0551 sec/batch
Epoch: 11/20...  Training Step: 6712...  Training loss: 1.8407...  0.0566 sec/batch
Epoch: 11/20...  Training Step: 6713...  Training loss: 1.8651...  0.0618 sec/batch
Epoch: 11/20...  Training Step: 6714...  Training loss: 1.8503...  0.0563 sec/batch
Epoch: 11/20...  Training Step: 6715...  Training loss: 1.8200...  0.0538 sec/batch
Epoch: 11/20...  Training Step: 6716...  Training loss: 1.8048...  0.0527 se

Epoch: 11/20...  Training Step: 6805...  Training loss: 1.8201...  0.0554 sec/batch
Epoch: 11/20...  Training Step: 6806...  Training loss: 1.8255...  0.0578 sec/batch
Epoch: 11/20...  Training Step: 6807...  Training loss: 1.7650...  0.0564 sec/batch
Epoch: 11/20...  Training Step: 6808...  Training loss: 1.7797...  0.0548 sec/batch
Epoch: 11/20...  Training Step: 6809...  Training loss: 1.7941...  0.0541 sec/batch
Epoch: 11/20...  Training Step: 6810...  Training loss: 1.8663...  0.0527 sec/batch
Epoch: 11/20...  Training Step: 6811...  Training loss: 1.8715...  0.0534 sec/batch
Epoch: 11/20...  Training Step: 6812...  Training loss: 1.8300...  0.0555 sec/batch
Epoch: 11/20...  Training Step: 6813...  Training loss: 1.7823...  0.0557 sec/batch
Epoch: 11/20...  Training Step: 6814...  Training loss: 1.8179...  0.0529 sec/batch
Epoch: 11/20...  Training Step: 6815...  Training loss: 1.7750...  0.0530 sec/batch
Epoch: 11/20...  Training Step: 6816...  Training loss: 1.8372...  0.0591 se

Epoch: 12/20...  Training Step: 6905...  Training loss: 1.7949...  0.0537 sec/batch
Epoch: 12/20...  Training Step: 6906...  Training loss: 1.8460...  0.0586 sec/batch
Epoch: 12/20...  Training Step: 6907...  Training loss: 1.7716...  0.0534 sec/batch
Epoch: 12/20...  Training Step: 6908...  Training loss: 1.8565...  0.0578 sec/batch
Epoch: 12/20...  Training Step: 6909...  Training loss: 1.7900...  0.0583 sec/batch
Epoch: 12/20...  Training Step: 6910...  Training loss: 1.7956...  0.0553 sec/batch
Epoch: 12/20...  Training Step: 6911...  Training loss: 1.7593...  0.0544 sec/batch
Epoch: 12/20...  Training Step: 6912...  Training loss: 1.8383...  0.0552 sec/batch
Epoch: 12/20...  Training Step: 6913...  Training loss: 1.8090...  0.0555 sec/batch
Epoch: 12/20...  Training Step: 6914...  Training loss: 1.7981...  0.0533 sec/batch
Epoch: 12/20...  Training Step: 6915...  Training loss: 1.8049...  0.0528 sec/batch
Epoch: 12/20...  Training Step: 6916...  Training loss: 1.8355...  0.0529 se

Epoch: 12/20...  Training Step: 7005...  Training loss: 1.7623...  0.0588 sec/batch
Epoch: 12/20...  Training Step: 7006...  Training loss: 1.7860...  0.0530 sec/batch
Epoch: 12/20...  Training Step: 7007...  Training loss: 1.7840...  0.0553 sec/batch
Epoch: 12/20...  Training Step: 7008...  Training loss: 1.7974...  0.0570 sec/batch
Epoch: 12/20...  Training Step: 7009...  Training loss: 1.7916...  0.0590 sec/batch
Epoch: 12/20...  Training Step: 7010...  Training loss: 1.8527...  0.0547 sec/batch
Epoch: 12/20...  Training Step: 7011...  Training loss: 1.8098...  0.0528 sec/batch
Epoch: 12/20...  Training Step: 7012...  Training loss: 1.8413...  0.0525 sec/batch
Epoch: 12/20...  Training Step: 7013...  Training loss: 1.8191...  0.0565 sec/batch
Epoch: 12/20...  Training Step: 7014...  Training loss: 1.7799...  0.0537 sec/batch
Epoch: 12/20...  Training Step: 7015...  Training loss: 1.7633...  0.0531 sec/batch
Epoch: 12/20...  Training Step: 7016...  Training loss: 1.8620...  0.0541 se

Epoch: 12/20...  Training Step: 7105...  Training loss: 1.7743...  0.0551 sec/batch
Epoch: 12/20...  Training Step: 7106...  Training loss: 1.8169...  0.0546 sec/batch
Epoch: 12/20...  Training Step: 7107...  Training loss: 1.7873...  0.0593 sec/batch
Epoch: 12/20...  Training Step: 7108...  Training loss: 1.7953...  0.0554 sec/batch
Epoch: 12/20...  Training Step: 7109...  Training loss: 1.8007...  0.0596 sec/batch
Epoch: 12/20...  Training Step: 7110...  Training loss: 1.8243...  0.0531 sec/batch
Epoch: 12/20...  Training Step: 7111...  Training loss: 1.7871...  0.0545 sec/batch
Epoch: 12/20...  Training Step: 7112...  Training loss: 1.8032...  0.0563 sec/batch
Epoch: 12/20...  Training Step: 7113...  Training loss: 1.7686...  0.0561 sec/batch
Epoch: 12/20...  Training Step: 7114...  Training loss: 1.8184...  0.0567 sec/batch
Epoch: 12/20...  Training Step: 7115...  Training loss: 1.7797...  0.0534 sec/batch
Epoch: 12/20...  Training Step: 7116...  Training loss: 1.7654...  0.0556 se

Epoch: 12/20...  Training Step: 7205...  Training loss: 1.8048...  0.0527 sec/batch
Epoch: 12/20...  Training Step: 7206...  Training loss: 1.7114...  0.0527 sec/batch
Epoch: 12/20...  Training Step: 7207...  Training loss: 1.7254...  0.0549 sec/batch
Epoch: 12/20...  Training Step: 7208...  Training loss: 1.8096...  0.0545 sec/batch
Epoch: 12/20...  Training Step: 7209...  Training loss: 1.7483...  0.0556 sec/batch
Epoch: 12/20...  Training Step: 7210...  Training loss: 1.7839...  0.0528 sec/batch
Epoch: 12/20...  Training Step: 7211...  Training loss: 1.8101...  0.0586 sec/batch
Epoch: 12/20...  Training Step: 7212...  Training loss: 1.7466...  0.0543 sec/batch
Epoch: 12/20...  Training Step: 7213...  Training loss: 1.7789...  0.0553 sec/batch
Epoch: 12/20...  Training Step: 7214...  Training loss: 1.8126...  0.0577 sec/batch
Epoch: 12/20...  Training Step: 7215...  Training loss: 1.7496...  0.0535 sec/batch
Epoch: 12/20...  Training Step: 7216...  Training loss: 1.8125...  0.0533 se

Epoch: 12/20...  Training Step: 7305...  Training loss: 1.8741...  0.0535 sec/batch
Epoch: 12/20...  Training Step: 7306...  Training loss: 1.7760...  0.0539 sec/batch
Epoch: 12/20...  Training Step: 7307...  Training loss: 1.8233...  0.0547 sec/batch
Epoch: 12/20...  Training Step: 7308...  Training loss: 1.7867...  0.0546 sec/batch
Epoch: 12/20...  Training Step: 7309...  Training loss: 1.8147...  0.0572 sec/batch
Epoch: 12/20...  Training Step: 7310...  Training loss: 1.7754...  0.0550 sec/batch
Epoch: 12/20...  Training Step: 7311...  Training loss: 1.7778...  0.0537 sec/batch
Epoch: 12/20...  Training Step: 7312...  Training loss: 1.8036...  0.0526 sec/batch
Epoch: 12/20...  Training Step: 7313...  Training loss: 1.7877...  0.0583 sec/batch
Epoch: 12/20...  Training Step: 7314...  Training loss: 1.7556...  0.0550 sec/batch
Epoch: 12/20...  Training Step: 7315...  Training loss: 1.7597...  0.0558 sec/batch
Epoch: 12/20...  Training Step: 7316...  Training loss: 1.7816...  0.0551 se

Epoch: 12/20...  Training Step: 7405...  Training loss: 1.8045...  0.0523 sec/batch
Epoch: 12/20...  Training Step: 7406...  Training loss: 1.8067...  0.0536 sec/batch
Epoch: 12/20...  Training Step: 7407...  Training loss: 1.7619...  0.0576 sec/batch
Epoch: 12/20...  Training Step: 7408...  Training loss: 1.7933...  0.0572 sec/batch
Epoch: 12/20...  Training Step: 7409...  Training loss: 1.7897...  0.0532 sec/batch
Epoch: 12/20...  Training Step: 7410...  Training loss: 1.8375...  0.0546 sec/batch
Epoch: 12/20...  Training Step: 7411...  Training loss: 1.7671...  0.0581 sec/batch
Epoch: 12/20...  Training Step: 7412...  Training loss: 1.8369...  0.0582 sec/batch
Epoch: 12/20...  Training Step: 7413...  Training loss: 1.7872...  0.0566 sec/batch
Epoch: 12/20...  Training Step: 7414...  Training loss: 1.7646...  0.0545 sec/batch
Epoch: 12/20...  Training Step: 7415...  Training loss: 1.7617...  0.0536 sec/batch
Epoch: 12/20...  Training Step: 7416...  Training loss: 1.7663...  0.0528 se

Epoch: 13/20...  Training Step: 7505...  Training loss: 1.7435...  0.0529 sec/batch
Epoch: 13/20...  Training Step: 7506...  Training loss: 1.7120...  0.0588 sec/batch
Epoch: 13/20...  Training Step: 7507...  Training loss: 1.7562...  0.0587 sec/batch
Epoch: 13/20...  Training Step: 7508...  Training loss: 1.7515...  0.0562 sec/batch
Epoch: 13/20...  Training Step: 7509...  Training loss: 1.7955...  0.0555 sec/batch
Epoch: 13/20...  Training Step: 7510...  Training loss: 1.7807...  0.0569 sec/batch
Epoch: 13/20...  Training Step: 7511...  Training loss: 1.8251...  0.0532 sec/batch
Epoch: 13/20...  Training Step: 7512...  Training loss: 1.7922...  0.0579 sec/batch
Epoch: 13/20...  Training Step: 7513...  Training loss: 1.7089...  0.0557 sec/batch
Epoch: 13/20...  Training Step: 7514...  Training loss: 1.7493...  0.0531 sec/batch
Epoch: 13/20...  Training Step: 7515...  Training loss: 1.8325...  0.0528 sec/batch
Epoch: 13/20...  Training Step: 7516...  Training loss: 1.8036...  0.0524 se

Epoch: 13/20...  Training Step: 7605...  Training loss: 1.7919...  0.0538 sec/batch
Epoch: 13/20...  Training Step: 7606...  Training loss: 1.8021...  0.0555 sec/batch
Epoch: 13/20...  Training Step: 7607...  Training loss: 1.7856...  0.0583 sec/batch
Epoch: 13/20...  Training Step: 7608...  Training loss: 1.7846...  0.0553 sec/batch
Epoch: 13/20...  Training Step: 7609...  Training loss: 1.7778...  0.0536 sec/batch
Epoch: 13/20...  Training Step: 7610...  Training loss: 1.7440...  0.0573 sec/batch
Epoch: 13/20...  Training Step: 7611...  Training loss: 1.7656...  0.0553 sec/batch
Epoch: 13/20...  Training Step: 7612...  Training loss: 1.7877...  0.0526 sec/batch
Epoch: 13/20...  Training Step: 7613...  Training loss: 1.7784...  0.0565 sec/batch
Epoch: 13/20...  Training Step: 7614...  Training loss: 1.7528...  0.0536 sec/batch
Epoch: 13/20...  Training Step: 7615...  Training loss: 1.7420...  0.0574 sec/batch
Epoch: 13/20...  Training Step: 7616...  Training loss: 1.7816...  0.0550 se

Epoch: 13/20...  Training Step: 7705...  Training loss: 1.7845...  0.0558 sec/batch
Epoch: 13/20...  Training Step: 7706...  Training loss: 1.7209...  0.0548 sec/batch
Epoch: 13/20...  Training Step: 7707...  Training loss: 1.7649...  0.0548 sec/batch
Epoch: 13/20...  Training Step: 7708...  Training loss: 1.7982...  0.0570 sec/batch
Epoch: 13/20...  Training Step: 7709...  Training loss: 1.7684...  0.0538 sec/batch
Epoch: 13/20...  Training Step: 7710...  Training loss: 1.7319...  0.0527 sec/batch
Epoch: 13/20...  Training Step: 7711...  Training loss: 1.7392...  0.0562 sec/batch
Epoch: 13/20...  Training Step: 7712...  Training loss: 1.7773...  0.0572 sec/batch
Epoch: 13/20...  Training Step: 7713...  Training loss: 1.7643...  0.0539 sec/batch
Epoch: 13/20...  Training Step: 7714...  Training loss: 1.7557...  0.0546 sec/batch
Epoch: 13/20...  Training Step: 7715...  Training loss: 1.7992...  0.0537 sec/batch
Epoch: 13/20...  Training Step: 7716...  Training loss: 1.8127...  0.0573 se

Epoch: 13/20...  Training Step: 7805...  Training loss: 1.7683...  0.0535 sec/batch
Epoch: 13/20...  Training Step: 7806...  Training loss: 1.7870...  0.0545 sec/batch
Epoch: 13/20...  Training Step: 7807...  Training loss: 1.7602...  0.0546 sec/batch
Epoch: 13/20...  Training Step: 7808...  Training loss: 1.8094...  0.0569 sec/batch
Epoch: 13/20...  Training Step: 7809...  Training loss: 1.7921...  0.0526 sec/batch
Epoch: 13/20...  Training Step: 7810...  Training loss: 1.7491...  0.0549 sec/batch
Epoch: 13/20...  Training Step: 7811...  Training loss: 1.7738...  0.0553 sec/batch
Epoch: 13/20...  Training Step: 7812...  Training loss: 1.8431...  0.0584 sec/batch
Epoch: 13/20...  Training Step: 7813...  Training loss: 1.7886...  0.0560 sec/batch
Epoch: 13/20...  Training Step: 7814...  Training loss: 1.7788...  0.0535 sec/batch
Epoch: 13/20...  Training Step: 7815...  Training loss: 1.7717...  0.0588 sec/batch
Epoch: 13/20...  Training Step: 7816...  Training loss: 1.7610...  0.0569 se

Epoch: 13/20...  Training Step: 7905...  Training loss: 1.8192...  0.0557 sec/batch
Epoch: 13/20...  Training Step: 7906...  Training loss: 1.8017...  0.0546 sec/batch
Epoch: 13/20...  Training Step: 7907...  Training loss: 1.7563...  0.0549 sec/batch
Epoch: 13/20...  Training Step: 7908...  Training loss: 1.7619...  0.0548 sec/batch
Epoch: 13/20...  Training Step: 7909...  Training loss: 1.8002...  0.0545 sec/batch
Epoch: 13/20...  Training Step: 7910...  Training loss: 1.7530...  0.0533 sec/batch
Epoch: 13/20...  Training Step: 7911...  Training loss: 1.7838...  0.0558 sec/batch
Epoch: 13/20...  Training Step: 7912...  Training loss: 1.7887...  0.0557 sec/batch
Epoch: 13/20...  Training Step: 7913...  Training loss: 1.7478...  0.0574 sec/batch
Epoch: 13/20...  Training Step: 7914...  Training loss: 1.7347...  0.0536 sec/batch
Epoch: 13/20...  Training Step: 7915...  Training loss: 1.8320...  0.0548 sec/batch
Epoch: 13/20...  Training Step: 7916...  Training loss: 1.8162...  0.0533 se

Epoch: 13/20...  Training Step: 8005...  Training loss: 1.8705...  0.0529 sec/batch
Epoch: 13/20...  Training Step: 8006...  Training loss: 1.8437...  0.0528 sec/batch
Epoch: 13/20...  Training Step: 8007...  Training loss: 1.7743...  0.0556 sec/batch
Epoch: 13/20...  Training Step: 8008...  Training loss: 1.8396...  0.0552 sec/batch
Epoch: 13/20...  Training Step: 8009...  Training loss: 1.7670...  0.0575 sec/batch
Epoch: 13/20...  Training Step: 8010...  Training loss: 1.7866...  0.0528 sec/batch
Epoch: 13/20...  Training Step: 8011...  Training loss: 1.7980...  0.0547 sec/batch
Epoch: 13/20...  Training Step: 8012...  Training loss: 1.8189...  0.0532 sec/batch
Epoch: 13/20...  Training Step: 8013...  Training loss: 1.7641...  0.0562 sec/batch
Epoch: 13/20...  Training Step: 8014...  Training loss: 1.7577...  0.0555 sec/batch
Epoch: 13/20...  Training Step: 8015...  Training loss: 1.7785...  0.0570 sec/batch
Epoch: 13/20...  Training Step: 8016...  Training loss: 1.8109...  0.0533 se

Epoch: 14/20...  Training Step: 8105...  Training loss: 1.7538...  0.0545 sec/batch
Epoch: 14/20...  Training Step: 8106...  Training loss: 1.7502...  0.0527 sec/batch
Epoch: 14/20...  Training Step: 8107...  Training loss: 1.6469...  0.0578 sec/batch
Epoch: 14/20...  Training Step: 8108...  Training loss: 1.7613...  0.0573 sec/batch
Epoch: 14/20...  Training Step: 8109...  Training loss: 1.7268...  0.0566 sec/batch
Epoch: 14/20...  Training Step: 8110...  Training loss: 1.7880...  0.0548 sec/batch
Epoch: 14/20...  Training Step: 8111...  Training loss: 1.7607...  0.0549 sec/batch
Epoch: 14/20...  Training Step: 8112...  Training loss: 1.7364...  0.0524 sec/batch
Epoch: 14/20...  Training Step: 8113...  Training loss: 1.7599...  0.0595 sec/batch
Epoch: 14/20...  Training Step: 8114...  Training loss: 1.7937...  0.0552 sec/batch
Epoch: 14/20...  Training Step: 8115...  Training loss: 1.7789...  0.0565 sec/batch
Epoch: 14/20...  Training Step: 8116...  Training loss: 1.7593...  0.0526 se

Epoch: 14/20...  Training Step: 8205...  Training loss: 1.7113...  0.0618 sec/batch
Epoch: 14/20...  Training Step: 8206...  Training loss: 1.7953...  0.0529 sec/batch
Epoch: 14/20...  Training Step: 8207...  Training loss: 1.7846...  0.0545 sec/batch
Epoch: 14/20...  Training Step: 8208...  Training loss: 1.7939...  0.0535 sec/batch
Epoch: 14/20...  Training Step: 8209...  Training loss: 1.7735...  0.0590 sec/batch
Epoch: 14/20...  Training Step: 8210...  Training loss: 1.7890...  0.0521 sec/batch
Epoch: 14/20...  Training Step: 8211...  Training loss: 1.7892...  0.0570 sec/batch
Epoch: 14/20...  Training Step: 8212...  Training loss: 1.7465...  0.0548 sec/batch
Epoch: 14/20...  Training Step: 8213...  Training loss: 1.7504...  0.0582 sec/batch
Epoch: 14/20...  Training Step: 8214...  Training loss: 1.8038...  0.0528 sec/batch
Epoch: 14/20...  Training Step: 8215...  Training loss: 1.7510...  0.0524 sec/batch
Epoch: 14/20...  Training Step: 8216...  Training loss: 1.7655...  0.0527 se

Epoch: 14/20...  Training Step: 8305...  Training loss: 1.7761...  0.0532 sec/batch
Epoch: 14/20...  Training Step: 8306...  Training loss: 1.7693...  0.0565 sec/batch
Epoch: 14/20...  Training Step: 8307...  Training loss: 1.7702...  0.0543 sec/batch
Epoch: 14/20...  Training Step: 8308...  Training loss: 1.7996...  0.0563 sec/batch
Epoch: 14/20...  Training Step: 8309...  Training loss: 1.7317...  0.0574 sec/batch
Epoch: 14/20...  Training Step: 8310...  Training loss: 1.7427...  0.0528 sec/batch
Epoch: 14/20...  Training Step: 8311...  Training loss: 1.7729...  0.0531 sec/batch
Epoch: 14/20...  Training Step: 8312...  Training loss: 1.7534...  0.0565 sec/batch
Epoch: 14/20...  Training Step: 8313...  Training loss: 1.7909...  0.0567 sec/batch
Epoch: 14/20...  Training Step: 8314...  Training loss: 1.7839...  0.0590 sec/batch
Epoch: 14/20...  Training Step: 8315...  Training loss: 1.8134...  0.0526 sec/batch
Epoch: 14/20...  Training Step: 8316...  Training loss: 1.7638...  0.0574 se

Epoch: 14/20...  Training Step: 8405...  Training loss: 1.7396...  0.0536 sec/batch
Epoch: 14/20...  Training Step: 8406...  Training loss: 1.7710...  0.0531 sec/batch
Epoch: 14/20...  Training Step: 8407...  Training loss: 1.7610...  0.0587 sec/batch
Epoch: 14/20...  Training Step: 8408...  Training loss: 1.7790...  0.0551 sec/batch
Epoch: 14/20...  Training Step: 8409...  Training loss: 1.7558...  0.0536 sec/batch
Epoch: 14/20...  Training Step: 8410...  Training loss: 1.7530...  0.0528 sec/batch
Epoch: 14/20...  Training Step: 8411...  Training loss: 1.8137...  0.0544 sec/batch
Epoch: 14/20...  Training Step: 8412...  Training loss: 1.7776...  0.0552 sec/batch
Epoch: 14/20...  Training Step: 8413...  Training loss: 1.7611...  0.0526 sec/batch
Epoch: 14/20...  Training Step: 8414...  Training loss: 1.7321...  0.0531 sec/batch
Epoch: 14/20...  Training Step: 8415...  Training loss: 1.7270...  0.0525 sec/batch
Epoch: 14/20...  Training Step: 8416...  Training loss: 1.8084...  0.0584 se

Epoch: 14/20...  Training Step: 8505...  Training loss: 1.6837...  0.0529 sec/batch
Epoch: 14/20...  Training Step: 8506...  Training loss: 1.7541...  0.0529 sec/batch
Epoch: 14/20...  Training Step: 8507...  Training loss: 1.7146...  0.0525 sec/batch
Epoch: 14/20...  Training Step: 8508...  Training loss: 1.7098...  0.0548 sec/batch
Epoch: 14/20...  Training Step: 8509...  Training loss: 1.7368...  0.0568 sec/batch
Epoch: 14/20...  Training Step: 8510...  Training loss: 1.8184...  0.0525 sec/batch
Epoch: 14/20...  Training Step: 8511...  Training loss: 1.7957...  0.0522 sec/batch
Epoch: 14/20...  Training Step: 8512...  Training loss: 1.7793...  0.0528 sec/batch
Epoch: 14/20...  Training Step: 8513...  Training loss: 1.7262...  0.0544 sec/batch
Epoch: 14/20...  Training Step: 8514...  Training loss: 1.7677...  0.0578 sec/batch
Epoch: 14/20...  Training Step: 8515...  Training loss: 1.7198...  0.0528 sec/batch
Epoch: 14/20...  Training Step: 8516...  Training loss: 1.7533...  0.0577 se

Epoch: 14/20...  Training Step: 8605...  Training loss: 1.7777...  0.0550 sec/batch
Epoch: 14/20...  Training Step: 8606...  Training loss: 1.7565...  0.0558 sec/batch
Epoch: 14/20...  Training Step: 8607...  Training loss: 1.7947...  0.0527 sec/batch
Epoch: 14/20...  Training Step: 8608...  Training loss: 1.7907...  0.0541 sec/batch
Epoch: 14/20...  Training Step: 8609...  Training loss: 1.7965...  0.0541 sec/batch
Epoch: 14/20...  Training Step: 8610...  Training loss: 1.7309...  0.0530 sec/batch
Epoch: 14/20...  Training Step: 8611...  Training loss: 1.7816...  0.0540 sec/batch
Epoch: 14/20...  Training Step: 8612...  Training loss: 1.7077...  0.0548 sec/batch
Epoch: 14/20...  Training Step: 8613...  Training loss: 1.7719...  0.0585 sec/batch
Epoch: 14/20...  Training Step: 8614...  Training loss: 1.7262...  0.0584 sec/batch
Epoch: 14/20...  Training Step: 8615...  Training loss: 1.7633...  0.0528 sec/batch
Epoch: 14/20...  Training Step: 8616...  Training loss: 1.7525...  0.0563 se

Epoch: 15/20...  Training Step: 8705...  Training loss: 1.7472...  0.0564 sec/batch
Epoch: 15/20...  Training Step: 8706...  Training loss: 1.7265...  0.0546 sec/batch
Epoch: 15/20...  Training Step: 8707...  Training loss: 1.7479...  0.0592 sec/batch
Epoch: 15/20...  Training Step: 8708...  Training loss: 1.7645...  0.0550 sec/batch
Epoch: 15/20...  Training Step: 8709...  Training loss: 1.7599...  0.0581 sec/batch
Epoch: 15/20...  Training Step: 8710...  Training loss: 1.7066...  0.0522 sec/batch
Epoch: 15/20...  Training Step: 8711...  Training loss: 1.7292...  0.0547 sec/batch
Epoch: 15/20...  Training Step: 8712...  Training loss: 1.7625...  0.0563 sec/batch
Epoch: 15/20...  Training Step: 8713...  Training loss: 1.7461...  0.0548 sec/batch
Epoch: 15/20...  Training Step: 8714...  Training loss: 1.7382...  0.0577 sec/batch
Epoch: 15/20...  Training Step: 8715...  Training loss: 1.7331...  0.0528 sec/batch
Epoch: 15/20...  Training Step: 8716...  Training loss: 1.7383...  0.0548 se

Epoch: 15/20...  Training Step: 8805...  Training loss: 1.7757...  0.0564 sec/batch
Epoch: 15/20...  Training Step: 8806...  Training loss: 1.7691...  0.0528 sec/batch
Epoch: 15/20...  Training Step: 8807...  Training loss: 1.8238...  0.0552 sec/batch
Epoch: 15/20...  Training Step: 8808...  Training loss: 1.7219...  0.0530 sec/batch
Epoch: 15/20...  Training Step: 8809...  Training loss: 1.7707...  0.0521 sec/batch
Epoch: 15/20...  Training Step: 8810...  Training loss: 1.7860...  0.0578 sec/batch
Epoch: 15/20...  Training Step: 8811...  Training loss: 1.7561...  0.0545 sec/batch
Epoch: 15/20...  Training Step: 8812...  Training loss: 1.8025...  0.0524 sec/batch
Epoch: 15/20...  Training Step: 8813...  Training loss: 1.7853...  0.0556 sec/batch
Epoch: 15/20...  Training Step: 8814...  Training loss: 1.7686...  0.0522 sec/batch
Epoch: 15/20...  Training Step: 8815...  Training loss: 1.7459...  0.0530 sec/batch
Epoch: 15/20...  Training Step: 8816...  Training loss: 1.7404...  0.0524 se

Epoch: 15/20...  Training Step: 8905...  Training loss: 1.7487...  0.0550 sec/batch
Epoch: 15/20...  Training Step: 8906...  Training loss: 1.7866...  0.0547 sec/batch
Epoch: 15/20...  Training Step: 8907...  Training loss: 1.7771...  0.0551 sec/batch
Epoch: 15/20...  Training Step: 8908...  Training loss: 1.7264...  0.0548 sec/batch
Epoch: 15/20...  Training Step: 8909...  Training loss: 1.7790...  0.0529 sec/batch
Epoch: 15/20...  Training Step: 8910...  Training loss: 1.7385...  0.0526 sec/batch
Epoch: 15/20...  Training Step: 8911...  Training loss: 1.8008...  0.0529 sec/batch
Epoch: 15/20...  Training Step: 8912...  Training loss: 1.7225...  0.0552 sec/batch
Epoch: 15/20...  Training Step: 8913...  Training loss: 1.7148...  0.0542 sec/batch
Epoch: 15/20...  Training Step: 8914...  Training loss: 1.7409...  0.0565 sec/batch
Epoch: 15/20...  Training Step: 8915...  Training loss: 1.7222...  0.0522 sec/batch
Epoch: 15/20...  Training Step: 8916...  Training loss: 1.7754...  0.0547 se

Epoch: 15/20...  Training Step: 9005...  Training loss: 1.7102...  0.0531 sec/batch
Epoch: 15/20...  Training Step: 9006...  Training loss: 1.7235...  0.0525 sec/batch
Epoch: 15/20...  Training Step: 9007...  Training loss: 1.7098...  0.0544 sec/batch
Epoch: 15/20...  Training Step: 9008...  Training loss: 1.7076...  0.0560 sec/batch
Epoch: 15/20...  Training Step: 9009...  Training loss: 1.7486...  0.0549 sec/batch
Epoch: 15/20...  Training Step: 9010...  Training loss: 1.7357...  0.0545 sec/batch
Epoch: 15/20...  Training Step: 9011...  Training loss: 1.7098...  0.0548 sec/batch
Epoch: 15/20...  Training Step: 9012...  Training loss: 1.7367...  0.0546 sec/batch
Epoch: 15/20...  Training Step: 9013...  Training loss: 1.7297...  0.0575 sec/batch
Epoch: 15/20...  Training Step: 9014...  Training loss: 1.7369...  0.0582 sec/batch
Epoch: 15/20...  Training Step: 9015...  Training loss: 1.7421...  0.0547 sec/batch
Epoch: 15/20...  Training Step: 9016...  Training loss: 1.7422...  0.0544 se

Epoch: 15/20...  Training Step: 9105...  Training loss: 1.7797...  0.0557 sec/batch
Epoch: 15/20...  Training Step: 9106...  Training loss: 1.7510...  0.0558 sec/batch
Epoch: 15/20...  Training Step: 9107...  Training loss: 1.7071...  0.0548 sec/batch
Epoch: 15/20...  Training Step: 9108...  Training loss: 1.7762...  0.0526 sec/batch
Epoch: 15/20...  Training Step: 9109...  Training loss: 1.7335...  0.0545 sec/batch
Epoch: 15/20...  Training Step: 9110...  Training loss: 1.6881...  0.0529 sec/batch
Epoch: 15/20...  Training Step: 9111...  Training loss: 1.7428...  0.0547 sec/batch
Epoch: 15/20...  Training Step: 9112...  Training loss: 1.7866...  0.0525 sec/batch
Epoch: 15/20...  Training Step: 9113...  Training loss: 1.7547...  0.0546 sec/batch
Epoch: 15/20...  Training Step: 9114...  Training loss: 1.7366...  0.0525 sec/batch
Epoch: 15/20...  Training Step: 9115...  Training loss: 1.7198...  0.0548 sec/batch
Epoch: 15/20...  Training Step: 9116...  Training loss: 1.7263...  0.0567 se

Epoch: 15/20...  Training Step: 9205...  Training loss: 1.7783...  0.0559 sec/batch
Epoch: 15/20...  Training Step: 9206...  Training loss: 1.7940...  0.0549 sec/batch
Epoch: 15/20...  Training Step: 9207...  Training loss: 1.7675...  0.0578 sec/batch
Epoch: 15/20...  Training Step: 9208...  Training loss: 1.7744...  0.0527 sec/batch
Epoch: 15/20...  Training Step: 9209...  Training loss: 1.7393...  0.0529 sec/batch
Epoch: 15/20...  Training Step: 9210...  Training loss: 1.7081...  0.0541 sec/batch
Epoch: 15/20...  Training Step: 9211...  Training loss: 1.7272...  0.0578 sec/batch
Epoch: 15/20...  Training Step: 9212...  Training loss: 1.7344...  0.0521 sec/batch
Epoch: 15/20...  Training Step: 9213...  Training loss: 1.7429...  0.0554 sec/batch
Epoch: 15/20...  Training Step: 9214...  Training loss: 1.7304...  0.0526 sec/batch
Epoch: 15/20...  Training Step: 9215...  Training loss: 1.7504...  0.0528 sec/batch
Epoch: 15/20...  Training Step: 9216...  Training loss: 1.7399...  0.0547 se

Epoch: 16/20...  Training Step: 9305...  Training loss: 1.7394...  0.0564 sec/batch
Epoch: 16/20...  Training Step: 9306...  Training loss: 1.7668...  0.0564 sec/batch
Epoch: 16/20...  Training Step: 9307...  Training loss: 1.7074...  0.0546 sec/batch
Epoch: 16/20...  Training Step: 9308...  Training loss: 1.6909...  0.0571 sec/batch
Epoch: 16/20...  Training Step: 9309...  Training loss: 1.6933...  0.0532 sec/batch
Epoch: 16/20...  Training Step: 9310...  Training loss: 1.7136...  0.0551 sec/batch
Epoch: 16/20...  Training Step: 9311...  Training loss: 1.7253...  0.0573 sec/batch
Epoch: 16/20...  Training Step: 9312...  Training loss: 1.7020...  0.0546 sec/batch
Epoch: 16/20...  Training Step: 9313...  Training loss: 1.7400...  0.0576 sec/batch
Epoch: 16/20...  Training Step: 9314...  Training loss: 1.7065...  0.0528 sec/batch
Epoch: 16/20...  Training Step: 9315...  Training loss: 1.7732...  0.0544 sec/batch
Epoch: 16/20...  Training Step: 9316...  Training loss: 1.7807...  0.0577 se

Epoch: 16/20...  Training Step: 9405...  Training loss: 1.7367...  0.0527 sec/batch
Epoch: 16/20...  Training Step: 9406...  Training loss: 1.7097...  0.0523 sec/batch
Epoch: 16/20...  Training Step: 9407...  Training loss: 1.7741...  0.0555 sec/batch
Epoch: 16/20...  Training Step: 9408...  Training loss: 1.7331...  0.0542 sec/batch
Epoch: 16/20...  Training Step: 9409...  Training loss: 1.7473...  0.0560 sec/batch
Epoch: 16/20...  Training Step: 9410...  Training loss: 1.6948...  0.0586 sec/batch
Epoch: 16/20...  Training Step: 9411...  Training loss: 1.6941...  0.0529 sec/batch
Epoch: 16/20...  Training Step: 9412...  Training loss: 1.7285...  0.0526 sec/batch
Epoch: 16/20...  Training Step: 9413...  Training loss: 1.7312...  0.0583 sec/batch
Epoch: 16/20...  Training Step: 9414...  Training loss: 1.7103...  0.0609 sec/batch
Epoch: 16/20...  Training Step: 9415...  Training loss: 1.7277...  0.0529 sec/batch
Epoch: 16/20...  Training Step: 9416...  Training loss: 1.7984...  0.0534 se

Epoch: 16/20...  Training Step: 9505...  Training loss: 1.7496...  0.0585 sec/batch
Epoch: 16/20...  Training Step: 9506...  Training loss: 1.7192...  0.0520 sec/batch
Epoch: 16/20...  Training Step: 9507...  Training loss: 1.7719...  0.0546 sec/batch
Epoch: 16/20...  Training Step: 9508...  Training loss: 1.7348...  0.0539 sec/batch
Epoch: 16/20...  Training Step: 9509...  Training loss: 1.7270...  0.0527 sec/batch
Epoch: 16/20...  Training Step: 9510...  Training loss: 1.7210...  0.0527 sec/batch
Epoch: 16/20...  Training Step: 9511...  Training loss: 1.7283...  0.0562 sec/batch
Epoch: 16/20...  Training Step: 9512...  Training loss: 1.7747...  0.0547 sec/batch
Epoch: 16/20...  Training Step: 9513...  Training loss: 1.7522...  0.0527 sec/batch
Epoch: 16/20...  Training Step: 9514...  Training loss: 1.7625...  0.0531 sec/batch
Epoch: 16/20...  Training Step: 9515...  Training loss: 1.7533...  0.0526 sec/batch
Epoch: 16/20...  Training Step: 9516...  Training loss: 1.7642...  0.0528 se

Epoch: 16/20...  Training Step: 9605...  Training loss: 1.7124...  0.0544 sec/batch
Epoch: 16/20...  Training Step: 9606...  Training loss: 1.7376...  0.0550 sec/batch
Epoch: 16/20...  Training Step: 9607...  Training loss: 1.7606...  0.0551 sec/batch
Epoch: 16/20...  Training Step: 9608...  Training loss: 1.7261...  0.0565 sec/batch
Epoch: 16/20...  Training Step: 9609...  Training loss: 1.7215...  0.0562 sec/batch
Epoch: 16/20...  Training Step: 9610...  Training loss: 1.7167...  0.0530 sec/batch
Epoch: 16/20...  Training Step: 9611...  Training loss: 1.7046...  0.0532 sec/batch
Epoch: 16/20...  Training Step: 9612...  Training loss: 1.6830...  0.0534 sec/batch
Epoch: 16/20...  Training Step: 9613...  Training loss: 1.7139...  0.0528 sec/batch
Epoch: 16/20...  Training Step: 9614...  Training loss: 1.6861...  0.0535 sec/batch
Epoch: 16/20...  Training Step: 9615...  Training loss: 1.7321...  0.0533 sec/batch
Epoch: 16/20...  Training Step: 9616...  Training loss: 1.7685...  0.0535 se

Epoch: 16/20...  Training Step: 9705...  Training loss: 1.7974...  0.0536 sec/batch
Epoch: 16/20...  Training Step: 9706...  Training loss: 1.7598...  0.0554 sec/batch
Epoch: 16/20...  Training Step: 9707...  Training loss: 1.7891...  0.0528 sec/batch
Epoch: 16/20...  Training Step: 9708...  Training loss: 1.7519...  0.0545 sec/batch
Epoch: 16/20...  Training Step: 9709...  Training loss: 1.7837...  0.0525 sec/batch
Epoch: 16/20...  Training Step: 9710...  Training loss: 1.7761...  0.0553 sec/batch
Epoch: 16/20...  Training Step: 9711...  Training loss: 1.7021...  0.0529 sec/batch
Epoch: 16/20...  Training Step: 9712...  Training loss: 1.7731...  0.0545 sec/batch
Epoch: 16/20...  Training Step: 9713...  Training loss: 1.7722...  0.0576 sec/batch
Epoch: 16/20...  Training Step: 9714...  Training loss: 1.7273...  0.0550 sec/batch
Epoch: 16/20...  Training Step: 9715...  Training loss: 1.7093...  0.0587 sec/batch
Epoch: 16/20...  Training Step: 9716...  Training loss: 1.7126...  0.0586 se

Epoch: 16/20...  Training Step: 9805...  Training loss: 1.6798...  0.0670 sec/batch
Epoch: 16/20...  Training Step: 9806...  Training loss: 1.7023...  0.0537 sec/batch
Epoch: 16/20...  Training Step: 9807...  Training loss: 1.7703...  0.0528 sec/batch
Epoch: 16/20...  Training Step: 9808...  Training loss: 1.7184...  0.0578 sec/batch
Epoch: 16/20...  Training Step: 9809...  Training loss: 1.7722...  0.0552 sec/batch
Epoch: 16/20...  Training Step: 9810...  Training loss: 1.7431...  0.0549 sec/batch
Epoch: 16/20...  Training Step: 9811...  Training loss: 1.7913...  0.0570 sec/batch
Epoch: 16/20...  Training Step: 9812...  Training loss: 1.7652...  0.0537 sec/batch
Epoch: 16/20...  Training Step: 9813...  Training loss: 1.7719...  0.0526 sec/batch
Epoch: 16/20...  Training Step: 9814...  Training loss: 1.7549...  0.0528 sec/batch
Epoch: 16/20...  Training Step: 9815...  Training loss: 1.7380...  0.0524 sec/batch
Epoch: 16/20...  Training Step: 9816...  Training loss: 1.7210...  0.0526 se

Epoch: 16/20...  Training Step: 9905...  Training loss: 1.7328...  0.0549 sec/batch
Epoch: 16/20...  Training Step: 9906...  Training loss: 1.7387...  0.0534 sec/batch
Epoch: 16/20...  Training Step: 9907...  Training loss: 1.6880...  0.0529 sec/batch
Epoch: 16/20...  Training Step: 9908...  Training loss: 1.6937...  0.0556 sec/batch
Epoch: 16/20...  Training Step: 9909...  Training loss: 1.7456...  0.0595 sec/batch
Epoch: 16/20...  Training Step: 9910...  Training loss: 1.7827...  0.0529 sec/batch
Epoch: 16/20...  Training Step: 9911...  Training loss: 1.7938...  0.0531 sec/batch
Epoch: 16/20...  Training Step: 9912...  Training loss: 1.7373...  0.0573 sec/batch
Epoch: 16/20...  Training Step: 9913...  Training loss: 1.7007...  0.0556 sec/batch
Epoch: 16/20...  Training Step: 9914...  Training loss: 1.7547...  0.0581 sec/batch
Epoch: 16/20...  Training Step: 9915...  Training loss: 1.6924...  0.0560 sec/batch
Epoch: 16/20...  Training Step: 9916...  Training loss: 1.7662...  0.0590 se

Epoch: 17/20...  Training Step: 10005...  Training loss: 1.7016...  0.0534 sec/batch
Epoch: 17/20...  Training Step: 10006...  Training loss: 1.7677...  0.0521 sec/batch
Epoch: 17/20...  Training Step: 10007...  Training loss: 1.6908...  0.0535 sec/batch
Epoch: 17/20...  Training Step: 10008...  Training loss: 1.7776...  0.0573 sec/batch
Epoch: 17/20...  Training Step: 10009...  Training loss: 1.7222...  0.0573 sec/batch
Epoch: 17/20...  Training Step: 10010...  Training loss: 1.7410...  0.0576 sec/batch
Epoch: 17/20...  Training Step: 10011...  Training loss: 1.7178...  0.0535 sec/batch
Epoch: 17/20...  Training Step: 10012...  Training loss: 1.7785...  0.0562 sec/batch
Epoch: 17/20...  Training Step: 10013...  Training loss: 1.7498...  0.0533 sec/batch
Epoch: 17/20...  Training Step: 10014...  Training loss: 1.7291...  0.0562 sec/batch
Epoch: 17/20...  Training Step: 10015...  Training loss: 1.7036...  0.0527 sec/batch
Epoch: 17/20...  Training Step: 10016...  Training loss: 1.7633..

Epoch: 17/20...  Training Step: 10105...  Training loss: 1.6876...  0.0585 sec/batch
Epoch: 17/20...  Training Step: 10106...  Training loss: 1.7311...  0.0529 sec/batch
Epoch: 17/20...  Training Step: 10107...  Training loss: 1.7190...  0.0532 sec/batch
Epoch: 17/20...  Training Step: 10108...  Training loss: 1.7209...  0.0554 sec/batch
Epoch: 17/20...  Training Step: 10109...  Training loss: 1.7274...  0.0578 sec/batch
Epoch: 17/20...  Training Step: 10110...  Training loss: 1.7899...  0.0537 sec/batch
Epoch: 17/20...  Training Step: 10111...  Training loss: 1.7448...  0.0533 sec/batch
Epoch: 17/20...  Training Step: 10112...  Training loss: 1.7718...  0.0545 sec/batch
Epoch: 17/20...  Training Step: 10113...  Training loss: 1.7441...  0.0598 sec/batch
Epoch: 17/20...  Training Step: 10114...  Training loss: 1.7055...  0.0531 sec/batch
Epoch: 17/20...  Training Step: 10115...  Training loss: 1.7159...  0.0565 sec/batch
Epoch: 17/20...  Training Step: 10116...  Training loss: 1.7848..

Epoch: 17/20...  Training Step: 10205...  Training loss: 1.7120...  0.0570 sec/batch
Epoch: 17/20...  Training Step: 10206...  Training loss: 1.7415...  0.0529 sec/batch
Epoch: 17/20...  Training Step: 10207...  Training loss: 1.7120...  0.0551 sec/batch
Epoch: 17/20...  Training Step: 10208...  Training loss: 1.7471...  0.0544 sec/batch
Epoch: 17/20...  Training Step: 10209...  Training loss: 1.7266...  0.0546 sec/batch
Epoch: 17/20...  Training Step: 10210...  Training loss: 1.7672...  0.0576 sec/batch
Epoch: 17/20...  Training Step: 10211...  Training loss: 1.7136...  0.0581 sec/batch
Epoch: 17/20...  Training Step: 10212...  Training loss: 1.7030...  0.0543 sec/batch
Epoch: 17/20...  Training Step: 10213...  Training loss: 1.6861...  0.0569 sec/batch
Epoch: 17/20...  Training Step: 10214...  Training loss: 1.7417...  0.0524 sec/batch
Epoch: 17/20...  Training Step: 10215...  Training loss: 1.7024...  0.0585 sec/batch
Epoch: 17/20...  Training Step: 10216...  Training loss: 1.6571..

Epoch: 17/20...  Training Step: 10305...  Training loss: 1.7528...  0.0532 sec/batch
Epoch: 17/20...  Training Step: 10306...  Training loss: 1.6434...  0.0539 sec/batch
Epoch: 17/20...  Training Step: 10307...  Training loss: 1.6493...  0.0524 sec/batch
Epoch: 17/20...  Training Step: 10308...  Training loss: 1.7317...  0.0527 sec/batch
Epoch: 17/20...  Training Step: 10309...  Training loss: 1.6822...  0.0549 sec/batch
Epoch: 17/20...  Training Step: 10310...  Training loss: 1.7070...  0.0549 sec/batch
Epoch: 17/20...  Training Step: 10311...  Training loss: 1.7558...  0.0551 sec/batch
Epoch: 17/20...  Training Step: 10312...  Training loss: 1.6693...  0.0547 sec/batch
Epoch: 17/20...  Training Step: 10313...  Training loss: 1.7061...  0.0546 sec/batch
Epoch: 17/20...  Training Step: 10314...  Training loss: 1.7506...  0.0541 sec/batch
Epoch: 17/20...  Training Step: 10315...  Training loss: 1.6871...  0.0550 sec/batch
Epoch: 17/20...  Training Step: 10316...  Training loss: 1.7492..

Epoch: 17/20...  Training Step: 10405...  Training loss: 1.8127...  0.0527 sec/batch
Epoch: 17/20...  Training Step: 10406...  Training loss: 1.7137...  0.0551 sec/batch
Epoch: 17/20...  Training Step: 10407...  Training loss: 1.7266...  0.0609 sec/batch
Epoch: 17/20...  Training Step: 10408...  Training loss: 1.7369...  0.0535 sec/batch
Epoch: 17/20...  Training Step: 10409...  Training loss: 1.7517...  0.0534 sec/batch
Epoch: 17/20...  Training Step: 10410...  Training loss: 1.7221...  0.0536 sec/batch
Epoch: 17/20...  Training Step: 10411...  Training loss: 1.7148...  0.0566 sec/batch
Epoch: 17/20...  Training Step: 10412...  Training loss: 1.7330...  0.0569 sec/batch
Epoch: 17/20...  Training Step: 10413...  Training loss: 1.7441...  0.0578 sec/batch
Epoch: 17/20...  Training Step: 10414...  Training loss: 1.7111...  0.0581 sec/batch
Epoch: 17/20...  Training Step: 10415...  Training loss: 1.6931...  0.0565 sec/batch
Epoch: 17/20...  Training Step: 10416...  Training loss: 1.7181..

Epoch: 17/20...  Training Step: 10505...  Training loss: 1.7587...  0.0544 sec/batch
Epoch: 17/20...  Training Step: 10506...  Training loss: 1.7235...  0.0551 sec/batch
Epoch: 17/20...  Training Step: 10507...  Training loss: 1.6964...  0.0595 sec/batch
Epoch: 17/20...  Training Step: 10508...  Training loss: 1.7290...  0.0589 sec/batch
Epoch: 17/20...  Training Step: 10509...  Training loss: 1.7104...  0.0585 sec/batch
Epoch: 17/20...  Training Step: 10510...  Training loss: 1.7624...  0.0533 sec/batch
Epoch: 17/20...  Training Step: 10511...  Training loss: 1.6898...  0.0554 sec/batch
Epoch: 17/20...  Training Step: 10512...  Training loss: 1.7805...  0.0589 sec/batch
Epoch: 17/20...  Training Step: 10513...  Training loss: 1.7302...  0.0529 sec/batch
Epoch: 17/20...  Training Step: 10514...  Training loss: 1.6689...  0.0533 sec/batch
Epoch: 17/20...  Training Step: 10515...  Training loss: 1.7038...  0.0526 sec/batch
Epoch: 17/20...  Training Step: 10516...  Training loss: 1.6926..

Epoch: 18/20...  Training Step: 10605...  Training loss: 1.6706...  0.0527 sec/batch
Epoch: 18/20...  Training Step: 10606...  Training loss: 1.6594...  0.0528 sec/batch
Epoch: 18/20...  Training Step: 10607...  Training loss: 1.6952...  0.0618 sec/batch
Epoch: 18/20...  Training Step: 10608...  Training loss: 1.7120...  0.0581 sec/batch
Epoch: 18/20...  Training Step: 10609...  Training loss: 1.7071...  0.0552 sec/batch
Epoch: 18/20...  Training Step: 10610...  Training loss: 1.7314...  0.0525 sec/batch
Epoch: 18/20...  Training Step: 10611...  Training loss: 1.7858...  0.0531 sec/batch
Epoch: 18/20...  Training Step: 10612...  Training loss: 1.7418...  0.0527 sec/batch
Epoch: 18/20...  Training Step: 10613...  Training loss: 1.6439...  0.0530 sec/batch
Epoch: 18/20...  Training Step: 10614...  Training loss: 1.7075...  0.0559 sec/batch
Epoch: 18/20...  Training Step: 10615...  Training loss: 1.7519...  0.0528 sec/batch
Epoch: 18/20...  Training Step: 10616...  Training loss: 1.7478..

Epoch: 18/20...  Training Step: 10705...  Training loss: 1.7358...  0.0520 sec/batch
Epoch: 18/20...  Training Step: 10706...  Training loss: 1.7202...  0.0580 sec/batch
Epoch: 18/20...  Training Step: 10707...  Training loss: 1.7131...  0.0570 sec/batch
Epoch: 18/20...  Training Step: 10708...  Training loss: 1.7156...  0.0526 sec/batch
Epoch: 18/20...  Training Step: 10709...  Training loss: 1.7144...  0.0583 sec/batch
Epoch: 18/20...  Training Step: 10710...  Training loss: 1.7042...  0.0522 sec/batch
Epoch: 18/20...  Training Step: 10711...  Training loss: 1.6984...  0.0530 sec/batch
Epoch: 18/20...  Training Step: 10712...  Training loss: 1.7298...  0.0523 sec/batch
Epoch: 18/20...  Training Step: 10713...  Training loss: 1.6901...  0.0553 sec/batch
Epoch: 18/20...  Training Step: 10714...  Training loss: 1.6951...  0.0584 sec/batch
Epoch: 18/20...  Training Step: 10715...  Training loss: 1.6742...  0.0532 sec/batch
Epoch: 18/20...  Training Step: 10716...  Training loss: 1.7102..

Epoch: 18/20...  Training Step: 10805...  Training loss: 1.7280...  0.0530 sec/batch
Epoch: 18/20...  Training Step: 10806...  Training loss: 1.6680...  0.0576 sec/batch
Epoch: 18/20...  Training Step: 10807...  Training loss: 1.6972...  0.0548 sec/batch
Epoch: 18/20...  Training Step: 10808...  Training loss: 1.7202...  0.0527 sec/batch
Epoch: 18/20...  Training Step: 10809...  Training loss: 1.7004...  0.0558 sec/batch
Epoch: 18/20...  Training Step: 10810...  Training loss: 1.6618...  0.0555 sec/batch
Epoch: 18/20...  Training Step: 10811...  Training loss: 1.6847...  0.0522 sec/batch
Epoch: 18/20...  Training Step: 10812...  Training loss: 1.7270...  0.0573 sec/batch
Epoch: 18/20...  Training Step: 10813...  Training loss: 1.6990...  0.0572 sec/batch
Epoch: 18/20...  Training Step: 10814...  Training loss: 1.6806...  0.0550 sec/batch
Epoch: 18/20...  Training Step: 10815...  Training loss: 1.7465...  0.0549 sec/batch
Epoch: 18/20...  Training Step: 10816...  Training loss: 1.7532..

Epoch: 18/20...  Training Step: 10905...  Training loss: 1.7006...  0.0556 sec/batch
Epoch: 18/20...  Training Step: 10906...  Training loss: 1.7369...  0.0572 sec/batch
Epoch: 18/20...  Training Step: 10907...  Training loss: 1.7029...  0.0530 sec/batch
Epoch: 18/20...  Training Step: 10908...  Training loss: 1.7235...  0.0550 sec/batch
Epoch: 18/20...  Training Step: 10909...  Training loss: 1.7350...  0.0571 sec/batch
Epoch: 18/20...  Training Step: 10910...  Training loss: 1.7165...  0.0521 sec/batch
Epoch: 18/20...  Training Step: 10911...  Training loss: 1.6905...  0.0569 sec/batch
Epoch: 18/20...  Training Step: 10912...  Training loss: 1.7798...  0.0594 sec/batch
Epoch: 18/20...  Training Step: 10913...  Training loss: 1.7153...  0.0597 sec/batch
Epoch: 18/20...  Training Step: 10914...  Training loss: 1.7158...  0.0543 sec/batch
Epoch: 18/20...  Training Step: 10915...  Training loss: 1.6984...  0.0551 sec/batch
Epoch: 18/20...  Training Step: 10916...  Training loss: 1.7055..

Epoch: 18/20...  Training Step: 11005...  Training loss: 1.7479...  0.0595 sec/batch
Epoch: 18/20...  Training Step: 11006...  Training loss: 1.7453...  0.0542 sec/batch
Epoch: 18/20...  Training Step: 11007...  Training loss: 1.6910...  0.0549 sec/batch
Epoch: 18/20...  Training Step: 11008...  Training loss: 1.7133...  0.0523 sec/batch
Epoch: 18/20...  Training Step: 11009...  Training loss: 1.7010...  0.0572 sec/batch
Epoch: 18/20...  Training Step: 11010...  Training loss: 1.6823...  0.0565 sec/batch
Epoch: 18/20...  Training Step: 11011...  Training loss: 1.7001...  0.0531 sec/batch
Epoch: 18/20...  Training Step: 11012...  Training loss: 1.7270...  0.0563 sec/batch
Epoch: 18/20...  Training Step: 11013...  Training loss: 1.7185...  0.0577 sec/batch
Epoch: 18/20...  Training Step: 11014...  Training loss: 1.6766...  0.0533 sec/batch
Epoch: 18/20...  Training Step: 11015...  Training loss: 1.7623...  0.0528 sec/batch
Epoch: 18/20...  Training Step: 11016...  Training loss: 1.7598..

Epoch: 18/20...  Training Step: 11105...  Training loss: 1.7999...  0.0528 sec/batch
Epoch: 18/20...  Training Step: 11106...  Training loss: 1.7678...  0.0585 sec/batch
Epoch: 18/20...  Training Step: 11107...  Training loss: 1.7264...  0.0527 sec/batch
Epoch: 18/20...  Training Step: 11108...  Training loss: 1.7784...  0.0532 sec/batch
Epoch: 18/20...  Training Step: 11109...  Training loss: 1.7116...  0.0560 sec/batch
Epoch: 18/20...  Training Step: 11110...  Training loss: 1.7326...  0.0529 sec/batch
Epoch: 18/20...  Training Step: 11111...  Training loss: 1.7408...  0.0525 sec/batch
Epoch: 18/20...  Training Step: 11112...  Training loss: 1.7696...  0.0527 sec/batch
Epoch: 18/20...  Training Step: 11113...  Training loss: 1.7032...  0.0545 sec/batch
Epoch: 18/20...  Training Step: 11114...  Training loss: 1.7204...  0.0528 sec/batch
Epoch: 18/20...  Training Step: 11115...  Training loss: 1.7376...  0.0549 sec/batch
Epoch: 18/20...  Training Step: 11116...  Training loss: 1.7464..

Epoch: 19/20...  Training Step: 11205...  Training loss: 1.6926...  0.0591 sec/batch
Epoch: 19/20...  Training Step: 11206...  Training loss: 1.6981...  0.0538 sec/batch
Epoch: 19/20...  Training Step: 11207...  Training loss: 1.5846...  0.0528 sec/batch
Epoch: 19/20...  Training Step: 11208...  Training loss: 1.6924...  0.0543 sec/batch
Epoch: 19/20...  Training Step: 11209...  Training loss: 1.6621...  0.0524 sec/batch
Epoch: 19/20...  Training Step: 11210...  Training loss: 1.7202...  0.0576 sec/batch
Epoch: 19/20...  Training Step: 11211...  Training loss: 1.6920...  0.0552 sec/batch
Epoch: 19/20...  Training Step: 11212...  Training loss: 1.6715...  0.0547 sec/batch
Epoch: 19/20...  Training Step: 11213...  Training loss: 1.6975...  0.0549 sec/batch
Epoch: 19/20...  Training Step: 11214...  Training loss: 1.7190...  0.0531 sec/batch
Epoch: 19/20...  Training Step: 11215...  Training loss: 1.7276...  0.0523 sec/batch
Epoch: 19/20...  Training Step: 11216...  Training loss: 1.7313..

Epoch: 19/20...  Training Step: 11305...  Training loss: 1.6593...  0.0522 sec/batch
Epoch: 19/20...  Training Step: 11306...  Training loss: 1.7233...  0.0549 sec/batch
Epoch: 19/20...  Training Step: 11307...  Training loss: 1.7419...  0.0559 sec/batch
Epoch: 19/20...  Training Step: 11308...  Training loss: 1.6856...  0.0528 sec/batch
Epoch: 19/20...  Training Step: 11309...  Training loss: 1.7360...  0.0542 sec/batch
Epoch: 19/20...  Training Step: 11310...  Training loss: 1.7373...  0.0549 sec/batch
Epoch: 19/20...  Training Step: 11311...  Training loss: 1.7303...  0.0568 sec/batch
Epoch: 19/20...  Training Step: 11312...  Training loss: 1.6911...  0.0549 sec/batch
Epoch: 19/20...  Training Step: 11313...  Training loss: 1.7152...  0.0583 sec/batch
Epoch: 19/20...  Training Step: 11314...  Training loss: 1.7475...  0.0586 sec/batch
Epoch: 19/20...  Training Step: 11315...  Training loss: 1.7144...  0.0556 sec/batch
Epoch: 19/20...  Training Step: 11316...  Training loss: 1.7179..

Epoch: 19/20...  Training Step: 11405...  Training loss: 1.7026...  0.0531 sec/batch
Epoch: 19/20...  Training Step: 11406...  Training loss: 1.7090...  0.0521 sec/batch
Epoch: 19/20...  Training Step: 11407...  Training loss: 1.7323...  0.0551 sec/batch
Epoch: 19/20...  Training Step: 11408...  Training loss: 1.7253...  0.0524 sec/batch
Epoch: 19/20...  Training Step: 11409...  Training loss: 1.6566...  0.0530 sec/batch
Epoch: 19/20...  Training Step: 11410...  Training loss: 1.6850...  0.0522 sec/batch
Epoch: 19/20...  Training Step: 11411...  Training loss: 1.7119...  0.0544 sec/batch
Epoch: 19/20...  Training Step: 11412...  Training loss: 1.6754...  0.0522 sec/batch
Epoch: 19/20...  Training Step: 11413...  Training loss: 1.7180...  0.0530 sec/batch
Epoch: 19/20...  Training Step: 11414...  Training loss: 1.7077...  0.0573 sec/batch
Epoch: 19/20...  Training Step: 11415...  Training loss: 1.7474...  0.0544 sec/batch
Epoch: 19/20...  Training Step: 11416...  Training loss: 1.6854..

Epoch: 19/20...  Training Step: 11505...  Training loss: 1.6839...  0.0552 sec/batch
Epoch: 19/20...  Training Step: 11506...  Training loss: 1.6995...  0.0545 sec/batch
Epoch: 19/20...  Training Step: 11507...  Training loss: 1.7332...  0.0528 sec/batch
Epoch: 19/20...  Training Step: 11508...  Training loss: 1.7318...  0.0577 sec/batch
Epoch: 19/20...  Training Step: 11509...  Training loss: 1.7007...  0.0593 sec/batch
Epoch: 19/20...  Training Step: 11510...  Training loss: 1.6871...  0.0527 sec/batch
Epoch: 19/20...  Training Step: 11511...  Training loss: 1.7459...  0.0530 sec/batch
Epoch: 19/20...  Training Step: 11512...  Training loss: 1.7036...  0.0574 sec/batch
Epoch: 19/20...  Training Step: 11513...  Training loss: 1.7014...  0.0548 sec/batch
Epoch: 19/20...  Training Step: 11514...  Training loss: 1.7016...  0.0544 sec/batch
Epoch: 19/20...  Training Step: 11515...  Training loss: 1.6718...  0.0558 sec/batch
Epoch: 19/20...  Training Step: 11516...  Training loss: 1.7745..

Epoch: 19/20...  Training Step: 11605...  Training loss: 1.6041...  0.0544 sec/batch
Epoch: 19/20...  Training Step: 11606...  Training loss: 1.6996...  0.0530 sec/batch
Epoch: 19/20...  Training Step: 11607...  Training loss: 1.6616...  0.0569 sec/batch
Epoch: 19/20...  Training Step: 11608...  Training loss: 1.6541...  0.0547 sec/batch
Epoch: 19/20...  Training Step: 11609...  Training loss: 1.7148...  0.0539 sec/batch
Epoch: 19/20...  Training Step: 11610...  Training loss: 1.7419...  0.0521 sec/batch
Epoch: 19/20...  Training Step: 11611...  Training loss: 1.7303...  0.0572 sec/batch
Epoch: 19/20...  Training Step: 11612...  Training loss: 1.7414...  0.0544 sec/batch
Epoch: 19/20...  Training Step: 11613...  Training loss: 1.6569...  0.0536 sec/batch
Epoch: 19/20...  Training Step: 11614...  Training loss: 1.7108...  0.0547 sec/batch
Epoch: 19/20...  Training Step: 11615...  Training loss: 1.6784...  0.0587 sec/batch
Epoch: 19/20...  Training Step: 11616...  Training loss: 1.7032..

Epoch: 19/20...  Training Step: 11705...  Training loss: 1.7203...  0.0533 sec/batch
Epoch: 19/20...  Training Step: 11706...  Training loss: 1.7075...  0.0567 sec/batch
Epoch: 19/20...  Training Step: 11707...  Training loss: 1.7413...  0.0546 sec/batch
Epoch: 19/20...  Training Step: 11708...  Training loss: 1.7554...  0.0577 sec/batch
Epoch: 19/20...  Training Step: 11709...  Training loss: 1.7582...  0.0546 sec/batch
Epoch: 19/20...  Training Step: 11710...  Training loss: 1.6738...  0.0581 sec/batch
Epoch: 19/20...  Training Step: 11711...  Training loss: 1.7217...  0.0565 sec/batch
Epoch: 19/20...  Training Step: 11712...  Training loss: 1.6589...  0.0576 sec/batch
Epoch: 19/20...  Training Step: 11713...  Training loss: 1.7187...  0.0564 sec/batch
Epoch: 19/20...  Training Step: 11714...  Training loss: 1.7187...  0.0580 sec/batch
Epoch: 19/20...  Training Step: 11715...  Training loss: 1.6904...  0.0521 sec/batch
Epoch: 19/20...  Training Step: 11716...  Training loss: 1.6998..

Epoch: 20/20...  Training Step: 11805...  Training loss: 1.6725...  0.0531 sec/batch
Epoch: 20/20...  Training Step: 11806...  Training loss: 1.6607...  0.0570 sec/batch
Epoch: 20/20...  Training Step: 11807...  Training loss: 1.6728...  0.0529 sec/batch
Epoch: 20/20...  Training Step: 11808...  Training loss: 1.7285...  0.0554 sec/batch
Epoch: 20/20...  Training Step: 11809...  Training loss: 1.7119...  0.0565 sec/batch
Epoch: 20/20...  Training Step: 11810...  Training loss: 1.6665...  0.0558 sec/batch
Epoch: 20/20...  Training Step: 11811...  Training loss: 1.6702...  0.0528 sec/batch
Epoch: 20/20...  Training Step: 11812...  Training loss: 1.7139...  0.0549 sec/batch
Epoch: 20/20...  Training Step: 11813...  Training loss: 1.6958...  0.0527 sec/batch
Epoch: 20/20...  Training Step: 11814...  Training loss: 1.6761...  0.0584 sec/batch
Epoch: 20/20...  Training Step: 11815...  Training loss: 1.6808...  0.0560 sec/batch
Epoch: 20/20...  Training Step: 11816...  Training loss: 1.7087..

Epoch: 20/20...  Training Step: 11905...  Training loss: 1.7250...  0.0565 sec/batch
Epoch: 20/20...  Training Step: 11906...  Training loss: 1.7386...  0.0570 sec/batch
Epoch: 20/20...  Training Step: 11907...  Training loss: 1.7410...  0.0585 sec/batch
Epoch: 20/20...  Training Step: 11908...  Training loss: 1.6836...  0.0544 sec/batch
Epoch: 20/20...  Training Step: 11909...  Training loss: 1.7034...  0.0580 sec/batch
Epoch: 20/20...  Training Step: 11910...  Training loss: 1.7405...  0.0531 sec/batch
Epoch: 20/20...  Training Step: 11911...  Training loss: 1.7159...  0.0546 sec/batch
Epoch: 20/20...  Training Step: 11912...  Training loss: 1.7367...  0.0544 sec/batch
Epoch: 20/20...  Training Step: 11913...  Training loss: 1.7584...  0.0549 sec/batch
Epoch: 20/20...  Training Step: 11914...  Training loss: 1.7043...  0.0578 sec/batch
Epoch: 20/20...  Training Step: 11915...  Training loss: 1.6647...  0.0543 sec/batch
Epoch: 20/20...  Training Step: 11916...  Training loss: 1.6928..

Epoch: 20/20...  Training Step: 12005...  Training loss: 1.6822...  0.0559 sec/batch
Epoch: 20/20...  Training Step: 12006...  Training loss: 1.7326...  0.0581 sec/batch
Epoch: 20/20...  Training Step: 12007...  Training loss: 1.7421...  0.0522 sec/batch
Epoch: 20/20...  Training Step: 12008...  Training loss: 1.6749...  0.0585 sec/batch
Epoch: 20/20...  Training Step: 12009...  Training loss: 1.7202...  0.0588 sec/batch
Epoch: 20/20...  Training Step: 12010...  Training loss: 1.6909...  0.0530 sec/batch
Epoch: 20/20...  Training Step: 12011...  Training loss: 1.7724...  0.0549 sec/batch
Epoch: 20/20...  Training Step: 12012...  Training loss: 1.6724...  0.0568 sec/batch
Epoch: 20/20...  Training Step: 12013...  Training loss: 1.6679...  0.0593 sec/batch
Epoch: 20/20...  Training Step: 12014...  Training loss: 1.7043...  0.0525 sec/batch
Epoch: 20/20...  Training Step: 12015...  Training loss: 1.6443...  0.0543 sec/batch
Epoch: 20/20...  Training Step: 12016...  Training loss: 1.7286..

Epoch: 20/20...  Training Step: 12105...  Training loss: 1.6790...  0.0543 sec/batch
Epoch: 20/20...  Training Step: 12106...  Training loss: 1.6916...  0.0530 sec/batch
Epoch: 20/20...  Training Step: 12107...  Training loss: 1.6654...  0.0530 sec/batch
Epoch: 20/20...  Training Step: 12108...  Training loss: 1.6700...  0.0535 sec/batch
Epoch: 20/20...  Training Step: 12109...  Training loss: 1.6985...  0.0548 sec/batch
Epoch: 20/20...  Training Step: 12110...  Training loss: 1.6914...  0.0554 sec/batch
Epoch: 20/20...  Training Step: 12111...  Training loss: 1.6636...  0.0583 sec/batch
Epoch: 20/20...  Training Step: 12112...  Training loss: 1.6842...  0.0525 sec/batch
Epoch: 20/20...  Training Step: 12113...  Training loss: 1.6750...  0.0578 sec/batch
Epoch: 20/20...  Training Step: 12114...  Training loss: 1.6938...  0.0549 sec/batch
Epoch: 20/20...  Training Step: 12115...  Training loss: 1.6806...  0.0572 sec/batch
Epoch: 20/20...  Training Step: 12116...  Training loss: 1.6977..

Epoch: 20/20...  Training Step: 12205...  Training loss: 1.7188...  0.0527 sec/batch
Epoch: 20/20...  Training Step: 12206...  Training loss: 1.7313...  0.0530 sec/batch
Epoch: 20/20...  Training Step: 12207...  Training loss: 1.6811...  0.0522 sec/batch
Epoch: 20/20...  Training Step: 12208...  Training loss: 1.7020...  0.0563 sec/batch
Epoch: 20/20...  Training Step: 12209...  Training loss: 1.6813...  0.0561 sec/batch
Epoch: 20/20...  Training Step: 12210...  Training loss: 1.6398...  0.0549 sec/batch
Epoch: 20/20...  Training Step: 12211...  Training loss: 1.6849...  0.0554 sec/batch
Epoch: 20/20...  Training Step: 12212...  Training loss: 1.7692...  0.0554 sec/batch
Epoch: 20/20...  Training Step: 12213...  Training loss: 1.6929...  0.0576 sec/batch
Epoch: 20/20...  Training Step: 12214...  Training loss: 1.6791...  0.0545 sec/batch
Epoch: 20/20...  Training Step: 12215...  Training loss: 1.6541...  0.0527 sec/batch
Epoch: 20/20...  Training Step: 12216...  Training loss: 1.7045..

Epoch: 20/20...  Training Step: 12305...  Training loss: 1.7215...  0.0573 sec/batch
Epoch: 20/20...  Training Step: 12306...  Training loss: 1.7552...  0.0528 sec/batch
Epoch: 20/20...  Training Step: 12307...  Training loss: 1.7166...  0.0555 sec/batch
Epoch: 20/20...  Training Step: 12308...  Training loss: 1.7409...  0.0543 sec/batch
Epoch: 20/20...  Training Step: 12309...  Training loss: 1.7003...  0.0551 sec/batch
Epoch: 20/20...  Training Step: 12310...  Training loss: 1.6953...  0.0573 sec/batch
Epoch: 20/20...  Training Step: 12311...  Training loss: 1.6668...  0.0547 sec/batch
Epoch: 20/20...  Training Step: 12312...  Training loss: 1.6954...  0.0571 sec/batch
Epoch: 20/20...  Training Step: 12313...  Training loss: 1.6740...  0.0529 sec/batch
Epoch: 20/20...  Training Step: 12314...  Training loss: 1.6803...  0.0548 sec/batch
Epoch: 20/20...  Training Step: 12315...  Training loss: 1.6988...  0.0557 sec/batch
Epoch: 20/20...  Training Step: 12316...  Training loss: 1.6863..

In [15]:
tf.train.get_checkpoint_state('checkpoints')

model_checkpoint_path: "checkpoints/i12400_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i200_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i400_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i600_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i800_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i1000_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i1200_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i1400_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i1600_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i1800_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i2000_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i2200_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i2400_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i2600_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i2800_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i3000_l128.ckpt"
all_model_checkpoint_paths: "checkpoints/i3200_l128.ckpt"
all_model_checkpoint_p

# Sampling

## Sampler

In [16]:
class Sampler:
    
    def sample(self, model, data, checkpoint, n_samples, prime="The "):
        """
        Get sample model outputs from checkpoint.
        
        Arguments
        ---------
        : model: CharRNNModel object
        : data: Dataset
        : checkpoint: Checkpoint from which to get samples
        : n_samples: Number of samples
        : prime: Word to prime sampling
        """
        
        samples = [c for c in prime]
        num_chars = len(data.chars)
        
        saver = tf.train.Saver()
        
        with tf.Session() as sess:
            saver.restore(sess, checkpoint)
            new_state = sess.run(model.initial_state)
            
            for c in prime:
                x = np.zeros((1, 1))
                x[0,0] = data.chars_to_ints[c]
                feed = {model.inputs: x,
                        model.keep_prob: 1.,
                        model.initial_state: new_state}
                preds, new_state = sess.run([model.prediction, model.final_state], 
                                             feed_dict=feed)
                
            c = self.pick_top_n(preds, num_chars)
            samples.append(data.ints_to_chars[c])
            
            for i in range(n_samples):
                x[0,0] = c
                feed = {model.inputs: x,
                        model.keep_prob: 1.,
                        model.initial_state: new_state}
                preds, new_state = sess.run([model.prediction, model.final_state],
                                             feed_dict=feed)
                
                c = self.pick_top_n(preds, num_chars)
                samples.append(data.ints_to_chars[c])
                
        return ''.join(samples)
    
    
    def pick_top_n(self, preds, num_chars, top_n=5):
        """
        Pick random char among top_n chars.
        """
        p = np.squeeze(preds)
        p[np.argsort(p)[:-top_n]] = 0
        p = p / np.sum(p)
        c = np.random.choice(num_chars, 1, p=p)[0]
        return c

In [17]:
model = CharRNNModel(num_classes,
                     batch_size,
                     num_steps,
                     lstm_size,
                     num_layers,
                     learning_rate,
                     grad_clip,
                     True)


Building CharRNN model ...

Created placeholders

Built LSTM cell
Built LSTM cell
Built LSTM layers

Built output layer

Added training loss computation

Built optimizer

Built CharRNN model



In [18]:
sampler = Sampler()

In [19]:
n_samples = 1000
prime = "Far"

In [20]:
checkpoint = "checkpoints/i1000_l128.ckpt"

samp = sampler.sample(model, data, checkpoint, n_samples, prime)
print(samp)

INFO:tensorflow:Restoring parameters from checkpoints/i1000_l128.ckpt
Farsne the her thom, af has sore the wer on anlend ther the tham whathe wet and
of that we the sar hor ald on a wers to are wathed wo sansind tore anderitt him ansist as thited and.

"Te the sere sin withe alled and ald, withing to he
mase woult and this thor witing as to to and had and he thare the he with ot
the wart ot
thitt hith thite at houss on
hit anden hid, the sead to her woud, the thang, hin womt ware we wans..


I thore sore the to we he wers, wan her ald hat seed was he the har sand this, and on he sonsing of of ald ond anl hor, shere to she her and the the sans, alt her. An he tint he sons, thor has wis thor ho hat so couthing thould hererisgiliding and ant and hit heard, thar wis him," and
and ard on has was and alerer thoung wam an had sind on winh her some he and tarer of woun tho han her shat he wert thar antithing ande theull.

"The that
was the this her to his here wo lese he andented he
thimhing t

In [21]:
checkpoint = "checkpoints/i10000_l128.ckpt"

samp = sampler.sample(model, data, checkpoint, n_samples, prime)
print(samp)

INFO:tensorflow:Restoring parameters from checkpoints/i10000_l128.ckpt
Farr to
the sould of
her
and shillent, at her houre to the persor and has been was to thing and this take her home and a minner.

"Tant the mernate, as you and would so sere and a lart of that all the more is a passe that's not to make."

"They's be he would not stall to see, as you husd all her to see."

She was that the propers hears the ware wonesting of this, and stand that why say and thing, and
stor the peare which withohe that so what see his feeting of an and the cauner of time in the compines of
the carriathel of him of the some tried of him.

"Yes, he can there and and well. I have some and the thaps on the the peeper," she was a sease,
though
they stond where had
house to how with the standing him, when the where her with a pact he said out his forte the stinging that he had tried
intorethed
take her
hid as to him
that which she was she children had
that the
servine with his forgetter to him one to her, b

In [22]:
checkpoint = tf.train.latest_checkpoint('checkpoints')

samp = sampler.sample(model, data, checkpoint, n_samples, prime)
print(samp)

INFO:tensorflow:Restoring parameters from checkpoints/i12400_l128.ckpt
Farnane to ast was to the were,"
shad he so and here, and which.

"Tell, and she he saw," she was and the taking the corner of their heer tarking out of
the man han at the
still with at the solestly highing
thoughtself-to sat, the sairs and ansuneds of the past, and his thought of the mather of the princess of the to attart to herself. "Whos the sens of meaning to at the conversation would be sented how," and that would now so her and to step of an the hompering
of the most son of his sent, and her stalls, she without his cancest that she had been said, stronged, he were bettingt and short of the pair. And then the consress off and something
that was
strought on, what she cannot his collecting of at a corricons, so said, andry. "You don't be as staying out at the stranger windors in the count oversore women that," she said, but the warts, and was to see. Though whith he had
taken were aledery that they was not went 