In [5]:
#imports 

import tensorflow as tf
import numpy as np
import matplotlib as matplotlib
import matplotlib.ticker as ticker
from urllib.request import urlretrieve
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import spacy
import sys
import os
import csv

In [9]:
#load GloVe vectors

GloVe_vectors_file = "glove.6B.50d.txt"

glove_wordmap = {}
with open(GloVe_vectors_file, "r", encoding ="utf8") as glove:
    for line in glove:
        name, vector = tuple(line.split(" ", 1))
        glove_wordmap[name] = np.fromstring(vector, sep=" ")


OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a shortcut link, a Python package or a valid path to a data directory.

In [11]:
#load spacy & tqdm
tqdm_installed = True
nlp = spacy.load('en')

In [12]:
#turns sentence into (n,d) matrix, 
#n - no. of tokens
#d - no. of dimensions each vector word has
#also lemmatizes sentence before converting into vector form

def sentence2sequence(sentence):
    tokens = nlp(sentence.lower())
    rows = []
    words = []
    #Greedy search for tokens
    for token in tokens:
        word = token.lemma_
        if word in glove_wordmap:
            rows.append(glove_wordmap[word])
            words.append(word)
    return rows, words

In [13]:
#turn labels into score that could be fed into a network
#POSITIVE = [1.0 0.0 0.0]
#NEUTRAL = [0.0 1.0 0.0]
#CONTRADICTION = [0.0 0.0 1.0]

def score_setup(row):
    convert_dict = {
      'ENTAILMENT': 0,
      'NEUTRAL': 1,
      'CONTRADICTION': 2
    }
    score = np.zeros((3,))
    if row in convert_dict: score[convert_dict[row]] += 1
    return score / (1.0*np.sum(score))

In [14]:
#fits the different sentence shapes

def fit_to_size(matrix, shape):
    res = np.zeros(shape)
    slices = [slice(0,min(dim,shape[e])) for e, dim in enumerate(matrix.shape)]
    res[slices] = matrix[slices]
    return res

In [15]:
#loads training data

def process_training_data():
    with open("SICK_train.txt","r") as data:
        train = csv.DictReader(data, delimiter='\t')
        hyp_sentences = []
        evi_sentences = []
        scores = []
        labels = []
        for row in train:
            hyp_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_A"])[0]))
            evi_sentences.append(np.vstack(
                    sentence2sequence(row["sentence_B"])[0]))
            scores.append(score_setup(row["entailment_judgment"]))

        hyp_sentences = np.stack([fit_to_size(x, (max_hypothesis_length, vector_size))
                          for x in hyp_sentences])
        evi_sentences = np.stack([fit_to_size(x, (max_evidence_length, vector_size))
                          for x in evi_sentences])
        return (hyp_sentences, evi_sentences), np.array(scores)

In [16]:
# RNN vars
max_hypothesis_length, max_evidence_length = 30, 30
batch_size, vector_size, hidden_size = 128, 50, 64

lstm_size = hidden_size

training_iterations_count = 100000

display_step = 10

data_feature_list, correct_scores = process_training_data()

l_h, l_e = max_hypothesis_length, max_evidence_length
N, D, H = batch_size, vector_size, hidden_size
l_seq = l_h + l_e

  


In [17]:
weight_decay = 0.0001

learning_rate = 0.0005

input_p, output_p = 0.5, 0.5

tf.reset_default_graph()

lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)

lstm_drop =  tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)

Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').


In [18]:
#N - number of elements in each batch
#l_h - maximum length of a hypothesis sentence
#l_e - maximum length of an evidence sentence 
#D - size of used GloVe vectors 

hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence')
y = tf.placeholder(tf.float32, [N, 3], 'label')

In [19]:
#LSTM used to look backwards through the senteneces 
lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)

#dropout wrapper for lstm_back
lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)


In [20]:
# randomly initialize weights

fc_initializer = tf.random_normal_initializer(stddev=0.1)

# storage for the weights.
# multiplied by two because there are two LSTMs
fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3],
                            initializer = fc_initializer)

# storage for the bias
fc_bias = tf.get_variable('bias', [3])

In [21]:
# regularization on weights

tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                     tf.nn.l2_loss(fc_weight))

# shaping the inputs so it can data can be fed through the net

x = tf.concat([hyp, evi], 1) # N, (Lh+Le), d
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(x, l_seq,)

In [22]:
# runs the input through two recurrent nets, one forward, one backward
# and combines the outputs

rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back,
                                                            x, dtype=tf.float32)

In [23]:
#scores are relative certainties for output matching an entailment value

classification_scores = tf.matmul(rnn_outputs[-1], fc_weight) + fc_bias


In [24]:
with tf.variable_scope('Accuracy'):
    predicts = tf.cast(tf.argmax(classification_scores, 1), 'int32')
    y_label = tf.cast(tf.argmax(y, 1), 'int32')
    corrects = tf.equal(predicts, y_label)
    num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
    accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

with tf.variable_scope("loss"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits = classification_scores, labels = y)
    loss = tf.reduce_mean(cross_entropy)
    total_loss = loss + weight_decay * tf.add_n(
        tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

optimizer = tf.train.AdamOptimizer(learning_rate)

opt_op = optimizer.minimize(total_loss)

init = tf.global_variables_initializer()

# Launch the Tensorflow session
sess = tf.Session()
sess.run(init)

In [25]:
training_epochs = 7

for i in range(training_epochs):
    print("EPOCH ", i)

    training_iterations = range(0,training_iterations_count,batch_size)
    if tqdm_installed:
        # Add a progress bar if TQDM is installed
        training_iterations = tqdm(training_iterations)

    for i in training_iterations:

        # Select indices for a random data subset
        batch = np.random.randint(data_feature_list[0].shape[0], size=batch_size)
        #y_scores = np.reshape(correct_score,(len(correct_values),1))
        # Use the selected subset indices to initialize the graph's
        #   placeholder values
        hyps, evis, ys = (data_feature_list[0][batch,:],
                          data_feature_list[1][batch,:],
                          correct_scores[batch])
    # Run the optimization with these initialized values
        sess.run([opt_op], feed_dict={hyp: hyps, evi: evis, y: ys})
    # display_step: how often the accuracy and loss should
    #   be tested and displayed.
        if (i/batch_size) % display_step == 0:
        # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={hyp: hyps, evi: evis, y: ys})
        # Calculate batch loss
            tmp_loss = sess.run(loss, feed_dict={hyp: hyps, evi: evis, y: ys})
        # Display results
            print("Iter " + str(i/batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(tmp_loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))


EPOCH  0


  0%|                                                                                          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 1.028856, Training Accuracy= 0.55469


  1%|▊                                                                                 | 8/782 [00:03<23:06,  1.79s/it]

Iter 10.0, Minibatch Loss= 0.977827, Training Accuracy= 0.55469


  2%|█▊                                                                               | 18/782 [00:04<08:07,  1.57it/s]

Iter 20.0, Minibatch Loss= 0.977146, Training Accuracy= 0.56250


  4%|██▉                                                                              | 28/782 [00:04<03:03,  4.12it/s]

Iter 30.0, Minibatch Loss= 0.961730, Training Accuracy= 0.57812


  5%|███▊                                                                             | 37/782 [00:04<01:19,  9.38it/s]

Iter 40.0, Minibatch Loss= 0.930429, Training Accuracy= 0.60938


  6%|█████                                                                            | 49/782 [00:05<00:42, 17.17it/s]

Iter 50.0, Minibatch Loss= 1.054563, Training Accuracy= 0.48438


  7%|██████                                                                           | 58/782 [00:05<00:35, 20.63it/s]

Iter 60.0, Minibatch Loss= 0.978099, Training Accuracy= 0.53125


  9%|███████                                                                          | 68/782 [00:06<00:29, 24.50it/s]

Iter 70.0, Minibatch Loss= 0.936756, Training Accuracy= 0.57812


 10%|████████                                                                         | 78/782 [00:06<00:26, 26.76it/s]

Iter 80.0, Minibatch Loss= 0.968451, Training Accuracy= 0.53906


 11%|█████████                                                                        | 88/782 [00:06<00:25, 27.29it/s]

Iter 90.0, Minibatch Loss= 0.940893, Training Accuracy= 0.57031


 13%|██████████▎                                                                      | 99/782 [00:07<00:25, 26.37it/s]

Iter 100.0, Minibatch Loss= 0.942848, Training Accuracy= 0.60156


 14%|███████████▏                                                                    | 109/782 [00:07<00:24, 26.97it/s]

Iter 110.0, Minibatch Loss= 0.970205, Training Accuracy= 0.55469


 15%|████████████▏                                                                   | 119/782 [00:07<00:23, 27.64it/s]

Iter 120.0, Minibatch Loss= 0.910868, Training Accuracy= 0.60156


 16%|█████████████▏                                                                  | 129/782 [00:08<00:23, 27.75it/s]

Iter 130.0, Minibatch Loss= 0.966690, Training Accuracy= 0.59375


 18%|██████████████▏                                                                 | 139/782 [00:08<00:22, 28.23it/s]

Iter 140.0, Minibatch Loss= 0.984638, Training Accuracy= 0.51562


 19%|███████████████▏                                                                | 148/782 [00:09<00:25, 25.32it/s]

Iter 150.0, Minibatch Loss= 0.915319, Training Accuracy= 0.58594


 20%|████████████████▏                                                               | 158/782 [00:09<00:23, 26.62it/s]

Iter 160.0, Minibatch Loss= 0.985750, Training Accuracy= 0.53125


 21%|█████████████████▏                                                              | 168/782 [00:09<00:22, 27.57it/s]

Iter 170.0, Minibatch Loss= 0.972048, Training Accuracy= 0.57031


 23%|██████████████████▏                                                             | 178/782 [00:10<00:21, 28.56it/s]

Iter 180.0, Minibatch Loss= 0.926547, Training Accuracy= 0.58594


 24%|███████████████████▎                                                            | 189/782 [00:10<00:20, 29.00it/s]

Iter 190.0, Minibatch Loss= 0.955136, Training Accuracy= 0.60156


 25%|████████████████████▎                                                           | 198/782 [00:10<00:20, 27.89it/s]

Iter 200.0, Minibatch Loss= 0.926804, Training Accuracy= 0.57812


 27%|█████████████████████▍                                                          | 210/782 [00:11<00:22, 24.87it/s]

Iter 210.0, Minibatch Loss= 0.909523, Training Accuracy= 0.56250


 28%|██████████████████████▌                                                         | 220/782 [00:11<00:21, 25.71it/s]

Iter 220.0, Minibatch Loss= 0.989335, Training Accuracy= 0.53906


 29%|███████████████████████▌                                                        | 230/782 [00:12<00:20, 27.14it/s]

Iter 230.0, Minibatch Loss= 0.868210, Training Accuracy= 0.61719


 31%|████████████████████████▌                                                       | 240/782 [00:12<00:19, 27.31it/s]

Iter 240.0, Minibatch Loss= 0.964677, Training Accuracy= 0.53125


 32%|█████████████████████████▌                                                      | 250/782 [00:12<00:19, 27.69it/s]

Iter 250.0, Minibatch Loss= 0.930215, Training Accuracy= 0.62500


 33%|██████████████████████████▍                                                     | 259/782 [00:13<00:20, 25.27it/s]

Iter 260.0, Minibatch Loss= 0.915158, Training Accuracy= 0.59375


 34%|███████████████████████████▌                                                    | 269/782 [00:13<00:18, 27.18it/s]

Iter 270.0, Minibatch Loss= 0.897308, Training Accuracy= 0.62500


 36%|████████████████████████████▌                                                   | 279/782 [00:14<00:17, 28.06it/s]

Iter 280.0, Minibatch Loss= 0.963067, Training Accuracy= 0.54688


 37%|█████████████████████████████▍                                                  | 288/782 [00:14<00:18, 26.52it/s]

Iter 290.0, Minibatch Loss= 0.942274, Training Accuracy= 0.55469


 38%|██████████████████████████████▌                                                 | 299/782 [00:14<00:16, 28.65it/s]

Iter 300.0, Minibatch Loss= 1.046301, Training Accuracy= 0.47656


 40%|███████████████████████████████▌                                                | 309/782 [00:15<00:16, 28.17it/s]

Iter 310.0, Minibatch Loss= 0.874920, Training Accuracy= 0.62500


 41%|████████████████████████████████▌                                               | 318/782 [00:15<00:23, 19.96it/s]

Iter 320.0, Minibatch Loss= 0.997016, Training Accuracy= 0.49219


 42%|█████████████████████████████████▍                                              | 327/782 [00:15<00:19, 23.43it/s]

Iter 330.0, Minibatch Loss= 1.015883, Training Accuracy= 0.50000


 43%|██████████████████████████████████▌                                             | 338/782 [00:16<00:17, 25.01it/s]

Iter 340.0, Minibatch Loss= 0.947055, Training Accuracy= 0.53906


 45%|███████████████████████████████████▌                                            | 348/782 [00:16<00:17, 24.97it/s]

Iter 350.0, Minibatch Loss= 0.895678, Training Accuracy= 0.54688


 46%|████████████████████████████████████▌                                           | 358/782 [00:17<00:15, 27.22it/s]

Iter 360.0, Minibatch Loss= 0.874793, Training Accuracy= 0.56250


 47%|█████████████████████████████████████▋                                          | 369/782 [00:17<00:14, 28.59it/s]

Iter 370.0, Minibatch Loss= 0.773535, Training Accuracy= 0.66406


 49%|██████████████████████████████████████▊                                         | 380/782 [00:17<00:13, 29.17it/s]

Iter 380.0, Minibatch Loss= 0.895330, Training Accuracy= 0.53125


 50%|███████████████████████████████████████▉                                        | 390/782 [00:18<00:13, 28.87it/s]

Iter 390.0, Minibatch Loss= 0.808700, Training Accuracy= 0.64062


 51%|████████████████████████████████████████▉                                       | 400/782 [00:18<00:13, 28.37it/s]

Iter 400.0, Minibatch Loss= 0.808482, Training Accuracy= 0.62500


 52%|█████████████████████████████████████████▋                                      | 407/782 [00:18<00:13, 27.89it/s]

Iter 410.0, Minibatch Loss= 0.931143, Training Accuracy= 0.54688


 54%|██████████████████████████████████████████▊                                     | 419/782 [00:19<00:12, 29.38it/s]

Iter 420.0, Minibatch Loss= 0.834217, Training Accuracy= 0.57031


 55%|███████████████████████████████████████████▉                                    | 429/782 [00:19<00:12, 28.87it/s]

Iter 430.0, Minibatch Loss= 0.896156, Training Accuracy= 0.51562


 56%|████████████████████████████████████████████▉                                   | 439/782 [00:20<00:12, 28.15it/s]

Iter 440.0, Minibatch Loss= 0.970512, Training Accuracy= 0.60938


 57%|█████████████████████████████████████████████▉                                  | 449/782 [00:20<00:11, 28.20it/s]

Iter 450.0, Minibatch Loss= 0.935097, Training Accuracy= 0.57812


 59%|███████████████████████████████████████████████                                 | 460/782 [00:20<00:10, 29.28it/s]

Iter 460.0, Minibatch Loss= 0.949356, Training Accuracy= 0.53906


 60%|███████████████████████████████████████████████▊                                | 467/782 [00:21<00:11, 28.48it/s]

Iter 470.0, Minibatch Loss= 0.856282, Training Accuracy= 0.60938


 61%|████████████████████████████████████████████████▉                               | 478/782 [00:21<00:10, 29.34it/s]

Iter 480.0, Minibatch Loss= 0.818846, Training Accuracy= 0.62500


 63%|██████████████████████████████████████████████████                              | 489/782 [00:21<00:09, 29.58it/s]

Iter 490.0, Minibatch Loss= 0.911318, Training Accuracy= 0.55469


 64%|███████████████████████████████████████████████████                             | 499/782 [00:22<00:09, 29.05it/s]

Iter 500.0, Minibatch Loss= 0.871933, Training Accuracy= 0.57031


 65%|████████████████████████████████████████████████████                            | 509/782 [00:22<00:09, 29.08it/s]

Iter 510.0, Minibatch Loss= 0.887130, Training Accuracy= 0.61719


 66%|█████████████████████████████████████████████████████                           | 519/782 [00:22<00:09, 28.14it/s]

Iter 520.0, Minibatch Loss= 0.922062, Training Accuracy= 0.57812


 68%|██████████████████████████████████████████████████████                          | 529/782 [00:23<00:08, 28.60it/s]

Iter 530.0, Minibatch Loss= 0.920631, Training Accuracy= 0.56250


 69%|███████████████████████████████████████████████████████▏                        | 540/782 [00:23<00:08, 29.28it/s]

Iter 540.0, Minibatch Loss= 0.852916, Training Accuracy= 0.57812


 70%|████████████████████████████████████████████████████████▎                       | 550/782 [00:23<00:08, 28.29it/s]

Iter 550.0, Minibatch Loss= 0.877463, Training Accuracy= 0.60156


 72%|█████████████████████████████████████████████████████████▎                      | 560/782 [00:24<00:07, 28.48it/s]

Iter 560.0, Minibatch Loss= 0.784811, Training Accuracy= 0.57812


 73%|██████████████████████████████████████████████████████████▎                     | 570/782 [00:24<00:07, 28.65it/s]

Iter 570.0, Minibatch Loss= 0.852329, Training Accuracy= 0.56250


 74%|███████████████████████████████████████████████████████████                     | 577/782 [00:24<00:07, 28.03it/s]

Iter 580.0, Minibatch Loss= 0.736514, Training Accuracy= 0.64844


 75%|████████████████████████████████████████████████████████████▎                   | 589/782 [00:25<00:06, 29.43it/s]

Iter 590.0, Minibatch Loss= 0.807767, Training Accuracy= 0.58594


 77%|█████████████████████████████████████████████████████████████▍                  | 600/782 [00:25<00:06, 29.38it/s]

Iter 600.0, Minibatch Loss= 0.867851, Training Accuracy= 0.55469


 78%|██████████████████████████████████████████████████████████████                  | 607/782 [00:25<00:06, 28.42it/s]

Iter 610.0, Minibatch Loss= 0.960786, Training Accuracy= 0.53906


 79%|███████████████████████████████████████████████████████████████▎                | 619/782 [00:26<00:05, 29.45it/s]

Iter 620.0, Minibatch Loss= 1.073488, Training Accuracy= 0.46094


 80%|████████████████████████████████████████████████████████████████▎               | 629/782 [00:26<00:05, 28.76it/s]

Iter 630.0, Minibatch Loss= 0.920259, Training Accuracy= 0.54688


 82%|█████████████████████████████████████████████████████████████████▎              | 638/782 [00:27<00:05, 24.43it/s]

Iter 640.0, Minibatch Loss= 0.991120, Training Accuracy= 0.55469


 83%|██████████████████████████████████████████████████████████████████▏             | 647/782 [00:27<00:05, 23.50it/s]

Iter 650.0, Minibatch Loss= 0.900803, Training Accuracy= 0.59375


 84%|███████████████████████████████████████████████████████████████████▎            | 658/782 [00:27<00:04, 26.94it/s]

Iter 660.0, Minibatch Loss= 0.945177, Training Accuracy= 0.53906


 85%|████████████████████████████████████████████████████████████████████▏           | 667/782 [00:28<00:04, 27.67it/s]

Iter 670.0, Minibatch Loss= 0.926545, Training Accuracy= 0.56250


 87%|█████████████████████████████████████████████████████████████████████▎          | 678/782 [00:28<00:03, 29.22it/s]

Iter 680.0, Minibatch Loss= 0.854201, Training Accuracy= 0.63281


 88%|██████████████████████████████████████████████████████████████████████▌         | 690/782 [00:29<00:03, 25.56it/s]

Iter 690.0, Minibatch Loss= 0.903546, Training Accuracy= 0.56250


 90%|███████████████████████████████████████████████████████████████████████▌        | 700/782 [00:29<00:03, 27.06it/s]

Iter 700.0, Minibatch Loss= 0.947597, Training Accuracy= 0.56250


 90%|████████████████████████████████████████████████████████████████████████▎       | 707/782 [00:29<00:02, 27.30it/s]

Iter 710.0, Minibatch Loss= 0.874145, Training Accuracy= 0.59375


 92%|█████████████████████████████████████████████████████████████████████████▌      | 719/782 [00:30<00:02, 29.56it/s]

Iter 720.0, Minibatch Loss= 0.895963, Training Accuracy= 0.64844


 93%|██████████████████████████████████████████████████████████████████████████▋     | 730/782 [00:30<00:01, 28.91it/s]

Iter 730.0, Minibatch Loss= 0.951711, Training Accuracy= 0.53125


 95%|███████████████████████████████████████████████████████████████████████████▋    | 740/782 [00:30<00:01, 28.22it/s]

Iter 740.0, Minibatch Loss= 0.873287, Training Accuracy= 0.63281


 96%|████████████████████████████████████████████████████████████████████████████▋   | 750/782 [00:31<00:01, 28.38it/s]

Iter 750.0, Minibatch Loss= 0.833064, Training Accuracy= 0.63281


 97%|█████████████████████████████████████████████████████████████████████████████▋  | 760/782 [00:31<00:00, 28.36it/s]

Iter 760.0, Minibatch Loss= 0.847396, Training Accuracy= 0.54688


 98%|██████████████████████████████████████████████████████████████████████████████▍ | 767/782 [00:31<00:00, 27.67it/s]

Iter 770.0, Minibatch Loss= 0.816533, Training Accuracy= 0.62500


100%|███████████████████████████████████████████████████████████████████████████████▋| 779/782 [00:32<00:00, 29.42it/s]

Iter 780.0, Minibatch Loss= 0.845046, Training Accuracy= 0.58594


100%|████████████████████████████████████████████████████████████████████████████████| 782/782 [00:32<00:00, 27.81it/s]


EPOCH  1


  0%|                                                                                          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.890074, Training Accuracy= 0.50781


  1%|▋                                                                                 | 7/782 [00:00<00:30, 25.35it/s]

Iter 10.0, Minibatch Loss= 0.894365, Training Accuracy= 0.56250


  2%|█▉                                                                               | 19/782 [00:00<00:26, 28.80it/s]

Iter 20.0, Minibatch Loss= 0.889792, Training Accuracy= 0.57031


  4%|███                                                                              | 30/782 [00:01<00:25, 29.33it/s]

Iter 30.0, Minibatch Loss= 0.764310, Training Accuracy= 0.63281


  5%|███▊                                                                             | 37/782 [00:01<00:26, 28.28it/s]

Iter 40.0, Minibatch Loss= 0.884682, Training Accuracy= 0.53906


  6%|████▉                                                                            | 48/782 [00:01<00:25, 29.21it/s]

Iter 50.0, Minibatch Loss= 0.840230, Training Accuracy= 0.55469


  7%|██████                                                                           | 58/782 [00:02<00:24, 29.05it/s]

Iter 60.0, Minibatch Loss= 0.783279, Training Accuracy= 0.68750


  9%|███████                                                                          | 68/782 [00:02<00:24, 28.84it/s]

Iter 70.0, Minibatch Loss= 0.852741, Training Accuracy= 0.57031


 10%|████████                                                                         | 78/782 [00:02<00:24, 28.34it/s]

Iter 80.0, Minibatch Loss= 0.955776, Training Accuracy= 0.54688


 11%|█████████                                                                        | 88/782 [00:03<00:24, 28.56it/s]

Iter 90.0, Minibatch Loss= 0.842067, Training Accuracy= 0.56250


 13%|██████████▏                                                                      | 98/782 [00:03<00:23, 28.70it/s]

Iter 100.0, Minibatch Loss= 0.812655, Training Accuracy= 0.60156


 14%|███████████▏                                                                    | 109/782 [00:03<00:22, 29.26it/s]

Iter 110.0, Minibatch Loss= 0.862041, Training Accuracy= 0.65625


 15%|████████████▎                                                                   | 120/782 [00:04<00:22, 29.29it/s]

Iter 120.0, Minibatch Loss= 0.838052, Training Accuracy= 0.62500


 16%|████████████▉                                                                   | 127/782 [00:04<00:23, 28.44it/s]

Iter 130.0, Minibatch Loss= 0.890226, Training Accuracy= 0.57812


 18%|██████████████                                                                  | 138/782 [00:04<00:22, 28.98it/s]

Iter 140.0, Minibatch Loss= 0.915281, Training Accuracy= 0.53906


 19%|███████████████                                                                 | 147/782 [00:05<00:22, 28.16it/s]

Iter 150.0, Minibatch Loss= 0.845478, Training Accuracy= 0.63281


 20%|████████████████▎                                                               | 159/782 [00:05<00:21, 29.31it/s]

Iter 160.0, Minibatch Loss= 0.831042, Training Accuracy= 0.56250


 22%|█████████████████▎                                                              | 169/782 [00:05<00:21, 28.95it/s]

Iter 170.0, Minibatch Loss= 0.837978, Training Accuracy= 0.58594


 23%|██████████████████▍                                                             | 180/782 [00:06<00:20, 29.03it/s]

Iter 180.0, Minibatch Loss= 0.744098, Training Accuracy= 0.62500


 24%|███████████████████▏                                                            | 187/782 [00:06<00:21, 28.14it/s]

Iter 190.0, Minibatch Loss= 0.852094, Training Accuracy= 0.53906


 25%|████████████████████▎                                                           | 198/782 [00:06<00:20, 28.63it/s]

Iter 200.0, Minibatch Loss= 0.842957, Training Accuracy= 0.59375


 27%|█████████████████████▎                                                          | 208/782 [00:07<00:19, 28.74it/s]

Iter 210.0, Minibatch Loss= 0.825588, Training Accuracy= 0.62500


 28%|██████████████████████▏                                                         | 217/782 [00:07<00:20, 28.22it/s]

Iter 220.0, Minibatch Loss= 0.870565, Training Accuracy= 0.55469


 29%|███████████████████████▌                                                        | 230/782 [00:08<00:19, 28.61it/s]

Iter 230.0, Minibatch Loss= 0.759593, Training Accuracy= 0.62500


 31%|████████████████████████▌                                                       | 240/782 [00:08<00:19, 28.31it/s]

Iter 240.0, Minibatch Loss= 0.972362, Training Accuracy= 0.52344


 32%|█████████████████████████▌                                                      | 250/782 [00:08<00:19, 27.93it/s]

Iter 250.0, Minibatch Loss= 0.834374, Training Accuracy= 0.59375


 33%|██████████████████████████▌                                                     | 260/782 [00:09<00:18, 27.96it/s]

Iter 260.0, Minibatch Loss= 0.764876, Training Accuracy= 0.61719


 34%|███████████████████████████▎                                                    | 267/782 [00:09<00:18, 27.43it/s]

Iter 270.0, Minibatch Loss= 0.766186, Training Accuracy= 0.59375


 36%|████████████████████████████▍                                                   | 278/782 [00:09<00:17, 28.42it/s]

Iter 280.0, Minibatch Loss= 0.702348, Training Accuracy= 0.64844


 37%|█████████████████████████████▎                                                  | 287/782 [00:10<00:17, 27.72it/s]

Iter 290.0, Minibatch Loss= 0.659563, Training Accuracy= 0.64844


 38%|██████████████████████████████▌                                                 | 299/782 [00:10<00:16, 28.82it/s]

Iter 300.0, Minibatch Loss= 0.843841, Training Accuracy= 0.52344


 39%|███████████████████████████████▌                                                | 308/782 [00:10<00:16, 28.12it/s]

Iter 310.0, Minibatch Loss= 0.826100, Training Accuracy= 0.61719


 41%|████████████████████████████████▌                                               | 318/782 [00:11<00:16, 28.13it/s]

Iter 320.0, Minibatch Loss= 0.796986, Training Accuracy= 0.60156


 42%|█████████████████████████████████▌                                              | 328/782 [00:11<00:16, 28.31it/s]

Iter 330.0, Minibatch Loss= 0.799595, Training Accuracy= 0.54688


 43%|██████████████████████████████████▌                                             | 338/782 [00:12<00:15, 28.46it/s]

Iter 340.0, Minibatch Loss= 0.711098, Training Accuracy= 0.57812


 45%|███████████████████████████████████▌                                            | 348/782 [00:12<00:15, 28.67it/s]

Iter 350.0, Minibatch Loss= 0.754884, Training Accuracy= 0.60938


 46%|████████████████████████████████████▌                                           | 357/782 [00:12<00:15, 28.09it/s]

Iter 360.0, Minibatch Loss= 0.678625, Training Accuracy= 0.69531


 47%|█████████████████████████████████████▋                                          | 368/782 [00:13<00:14, 28.15it/s]

Iter 370.0, Minibatch Loss= 0.694443, Training Accuracy= 0.63281


 49%|██████████████████████████████████████▊                                         | 380/782 [00:13<00:14, 27.79it/s]

Iter 380.0, Minibatch Loss= 0.834812, Training Accuracy= 0.56250


 50%|███████████████████████████████████████▊                                        | 389/782 [00:13<00:13, 28.11it/s]

Iter 390.0, Minibatch Loss= 0.701958, Training Accuracy= 0.69531


 51%|████████████████████████████████████████▉                                       | 400/782 [00:14<00:13, 28.37it/s]

Iter 400.0, Minibatch Loss= 0.723205, Training Accuracy= 0.60156


 52%|█████████████████████████████████████████▉                                      | 410/782 [00:14<00:12, 28.74it/s]

Iter 410.0, Minibatch Loss= 0.764072, Training Accuracy= 0.61719


 53%|██████████████████████████████████████████▋                                     | 417/782 [00:14<00:13, 28.06it/s]

Iter 420.0, Minibatch Loss= 0.780442, Training Accuracy= 0.53906


 55%|███████████████████████████████████████████▊                                    | 428/782 [00:15<00:12, 28.29it/s]

Iter 430.0, Minibatch Loss= 0.778343, Training Accuracy= 0.57812


 56%|████████████████████████████████████████████▊                                   | 438/782 [00:15<00:12, 28.36it/s]

Iter 440.0, Minibatch Loss= 0.790354, Training Accuracy= 0.61719


 57%|█████████████████████████████████████████████▊                                  | 448/782 [00:15<00:11, 28.45it/s]

Iter 450.0, Minibatch Loss= 0.827736, Training Accuracy= 0.50781


 59%|███████████████████████████████████████████████                                 | 460/782 [00:16<00:11, 28.57it/s]

Iter 460.0, Minibatch Loss= 0.711435, Training Accuracy= 0.63281


 60%|████████████████████████████████████████████████                                | 470/782 [00:16<00:10, 28.43it/s]

Iter 470.0, Minibatch Loss= 0.782631, Training Accuracy= 0.57031


 61%|█████████████████████████████████████████████████                               | 479/782 [00:17<00:10, 27.65it/s]

Iter 480.0, Minibatch Loss= 0.784244, Training Accuracy= 0.57812


 63%|██████████████████████████████████████████████████                              | 489/782 [00:17<00:10, 28.30it/s]

Iter 490.0, Minibatch Loss= 0.726464, Training Accuracy= 0.63281


 64%|███████████████████████████████████████████████████▏                            | 500/782 [00:17<00:09, 28.66it/s]

Iter 500.0, Minibatch Loss= 0.714882, Training Accuracy= 0.61719


 65%|███████████████████████████████████████████████████▊                            | 507/782 [00:18<00:09, 28.14it/s]

Iter 510.0, Minibatch Loss= 0.688685, Training Accuracy= 0.60938


 66%|████████████████████████████████████████████████████▉                           | 517/782 [00:18<00:09, 27.93it/s]

Iter 520.0, Minibatch Loss= 0.663965, Training Accuracy= 0.60938


 68%|██████████████████████████████████████████████████████                          | 528/782 [00:18<00:08, 28.49it/s]

Iter 530.0, Minibatch Loss= 0.720231, Training Accuracy= 0.67969


 69%|███████████████████████████████████████████████████████                         | 538/782 [00:19<00:08, 27.90it/s]

Iter 540.0, Minibatch Loss= 0.749136, Training Accuracy= 0.59375


 70%|████████████████████████████████████████████████████████                        | 548/782 [00:19<00:08, 28.28it/s]

Iter 550.0, Minibatch Loss= 0.724256, Training Accuracy= 0.58594


 71%|█████████████████████████████████████████████████████████                       | 558/782 [00:19<00:07, 28.53it/s]

Iter 560.0, Minibatch Loss= 0.725596, Training Accuracy= 0.61719


 73%|██████████████████████████████████████████████████████████                      | 567/782 [00:20<00:07, 28.01it/s]

Iter 570.0, Minibatch Loss= 0.572037, Training Accuracy= 0.70312


 74%|███████████████████████████████████████████████████████████▏                    | 578/782 [00:20<00:07, 28.50it/s]

Iter 580.0, Minibatch Loss= 0.705968, Training Accuracy= 0.67188


 75%|████████████████████████████████████████████████████████████▏                   | 588/782 [00:21<00:06, 28.36it/s]

Iter 590.0, Minibatch Loss= 0.741951, Training Accuracy= 0.61719


 76%|█████████████████████████████████████████████████████████████                   | 597/782 [00:21<00:06, 27.92it/s]

Iter 600.0, Minibatch Loss= 0.762391, Training Accuracy= 0.60938


 78%|██████████████████████████████████████████████████████████████▏                 | 608/782 [00:21<00:06, 28.29it/s]

Iter 610.0, Minibatch Loss= 0.736693, Training Accuracy= 0.56250


 79%|███████████████████████████████████████████████████████████████▏                | 618/782 [00:22<00:05, 28.50it/s]

Iter 620.0, Minibatch Loss= 0.690997, Training Accuracy= 0.66406


 80%|████████████████████████████████████████████████████████████████▏               | 628/782 [00:22<00:05, 28.11it/s]

Iter 630.0, Minibatch Loss= 0.632014, Training Accuracy= 0.65625


 82%|█████████████████████████████████████████████████████████████████▎              | 638/782 [00:22<00:05, 27.95it/s]

Iter 640.0, Minibatch Loss= 0.590927, Training Accuracy= 0.69531


 83%|██████████████████████████████████████████████████████████████████▍             | 649/782 [00:23<00:04, 28.54it/s]

Iter 650.0, Minibatch Loss= 0.606949, Training Accuracy= 0.71875


 84%|███████████████████████████████████████████████████████████████████▍            | 659/782 [00:23<00:04, 28.22it/s]

Iter 660.0, Minibatch Loss= 0.645567, Training Accuracy= 0.68750


 86%|████████████████████████████████████████████████████████████████████▍           | 669/782 [00:23<00:03, 28.33it/s]

Iter 670.0, Minibatch Loss= 0.596402, Training Accuracy= 0.72656


 87%|█████████████████████████████████████████████████████████████████████▌          | 680/782 [00:24<00:03, 28.44it/s]

Iter 680.0, Minibatch Loss= 0.630388, Training Accuracy= 0.67969


 88%|██████████████████████████████████████████████████████████████████████▌         | 690/782 [00:24<00:03, 28.14it/s]

Iter 690.0, Minibatch Loss= 0.634138, Training Accuracy= 0.67188


 90%|███████████████████████████████████████████████████████████████████████▌        | 700/782 [00:25<00:02, 28.00it/s]

Iter 700.0, Minibatch Loss= 0.713154, Training Accuracy= 0.60938


 91%|████████████████████████████████████████████████████████████████████████▋       | 710/782 [00:25<00:02, 28.26it/s]

Iter 710.0, Minibatch Loss= 0.495922, Training Accuracy= 0.76562


 92%|█████████████████████████████████████████████████████████████████████████▋      | 720/782 [00:25<00:02, 27.50it/s]

Iter 720.0, Minibatch Loss= 0.633520, Training Accuracy= 0.70312


 93%|██████████████████████████████████████████████████████████████████████████▋     | 730/782 [00:26<00:01, 27.90it/s]

Iter 730.0, Minibatch Loss= 0.551691, Training Accuracy= 0.74219


 94%|███████████████████████████████████████████████████████████████████████████▍    | 737/782 [00:26<00:01, 27.49it/s]

Iter 740.0, Minibatch Loss= 0.581075, Training Accuracy= 0.69531


 96%|████████████████████████████████████████████████████████████████████████████▌   | 748/782 [00:26<00:01, 27.92it/s]

Iter 750.0, Minibatch Loss= 0.571298, Training Accuracy= 0.71094


 97%|█████████████████████████████████████████████████████████████████████████████▌  | 758/782 [00:27<00:00, 27.96it/s]

Iter 760.0, Minibatch Loss= 0.616051, Training Accuracy= 0.69531


 98%|██████████████████████████████████████████████████████████████████████████████▍ | 767/782 [00:27<00:00, 27.79it/s]

Iter 770.0, Minibatch Loss= 0.548451, Training Accuracy= 0.72656


 99%|███████████████████████████████████████████████████████████████████████████████▌| 778/782 [00:27<00:00, 28.56it/s]

Iter 780.0, Minibatch Loss= 0.623552, Training Accuracy= 0.69531


100%|████████████████████████████████████████████████████████████████████████████████| 782/782 [00:28<00:00, 27.84it/s]


EPOCH  2


  0%|                                                                                          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.690553, Training Accuracy= 0.65625


  1%|▋                                                                                 | 7/782 [00:00<00:31, 24.50it/s]

Iter 10.0, Minibatch Loss= 0.688524, Training Accuracy= 0.67969


  2%|█▊                                                                               | 18/782 [00:00<00:27, 27.43it/s]

Iter 20.0, Minibatch Loss= 0.713786, Training Accuracy= 0.64844


  4%|██▉                                                                              | 28/782 [00:01<00:26, 28.21it/s]

Iter 30.0, Minibatch Loss= 0.490991, Training Accuracy= 0.74219


  5%|███▉                                                                             | 38/782 [00:01<00:26, 28.12it/s]

Iter 40.0, Minibatch Loss= 0.589527, Training Accuracy= 0.75781


  6%|████▊                                                                            | 47/782 [00:01<00:26, 27.58it/s]

Iter 50.0, Minibatch Loss= 0.642641, Training Accuracy= 0.66406


  7%|█████▉                                                                           | 57/782 [00:02<00:25, 28.10it/s]

Iter 60.0, Minibatch Loss= 0.661249, Training Accuracy= 0.66406


  9%|███████                                                                          | 68/782 [00:02<00:24, 28.86it/s]

Iter 70.0, Minibatch Loss= 0.614098, Training Accuracy= 0.75000


 10%|████████                                                                         | 78/782 [00:02<00:24, 28.21it/s]

Iter 80.0, Minibatch Loss= 0.693334, Training Accuracy= 0.64062


 11%|█████████                                                                        | 88/782 [00:03<00:24, 28.33it/s]

Iter 90.0, Minibatch Loss= 0.499464, Training Accuracy= 0.75781


 12%|██████████                                                                       | 97/782 [00:03<00:24, 28.10it/s]

Iter 100.0, Minibatch Loss= 0.581287, Training Accuracy= 0.72656


 14%|███████████                                                                     | 108/782 [00:03<00:23, 28.53it/s]

Iter 110.0, Minibatch Loss= 0.656910, Training Accuracy= 0.74219


 15%|████████████                                                                    | 118/782 [00:04<00:23, 28.63it/s]

Iter 120.0, Minibatch Loss= 0.569303, Training Accuracy= 0.71875


 16%|█████████████                                                                   | 128/782 [00:04<00:23, 28.22it/s]

Iter 130.0, Minibatch Loss= 0.647685, Training Accuracy= 0.71094


 18%|██████████████                                                                  | 138/782 [00:04<00:22, 28.34it/s]

Iter 140.0, Minibatch Loss= 0.608521, Training Accuracy= 0.74219


 19%|███████████████                                                                 | 147/782 [00:05<00:23, 27.55it/s]

Iter 150.0, Minibatch Loss= 0.649826, Training Accuracy= 0.64062


 20%|████████████████▏                                                               | 158/782 [00:05<00:22, 27.74it/s]

Iter 160.0, Minibatch Loss= 0.628962, Training Accuracy= 0.74219


 21%|█████████████████▏                                                              | 168/782 [00:06<00:22, 27.79it/s]

Iter 170.0, Minibatch Loss= 0.607836, Training Accuracy= 0.67969


 23%|██████████████████▏                                                             | 178/782 [00:06<00:21, 28.28it/s]

Iter 180.0, Minibatch Loss= 0.545673, Training Accuracy= 0.79688


 24%|███████████████████▏                                                            | 187/782 [00:06<00:21, 27.56it/s]

Iter 190.0, Minibatch Loss= 0.483434, Training Accuracy= 0.78125


 25%|████████████████████▎                                                           | 198/782 [00:07<00:20, 28.20it/s]

Iter 200.0, Minibatch Loss= 0.536017, Training Accuracy= 0.69531


 26%|█████████████████████▏                                                          | 207/782 [00:07<00:21, 27.17it/s]

Iter 210.0, Minibatch Loss= 0.554788, Training Accuracy= 0.78906


 28%|██████████████████████▎                                                         | 218/782 [00:07<00:19, 28.25it/s]

Iter 220.0, Minibatch Loss= 0.628155, Training Accuracy= 0.72656


 29%|███████████████████████▎                                                        | 228/782 [00:08<00:19, 28.57it/s]

Iter 230.0, Minibatch Loss= 0.580874, Training Accuracy= 0.74219


 30%|████████████████████████▎                                                       | 238/782 [00:08<00:19, 28.41it/s]

Iter 240.0, Minibatch Loss= 0.617472, Training Accuracy= 0.71094


 32%|█████████████████████████▍                                                      | 249/782 [00:08<00:18, 28.81it/s]

Iter 250.0, Minibatch Loss= 0.424419, Training Accuracy= 0.83594


 33%|██████████████████████████▍                                                     | 259/782 [00:09<00:18, 28.65it/s]

Iter 260.0, Minibatch Loss= 0.475197, Training Accuracy= 0.76562


 34%|███████████████████████████▌                                                    | 269/782 [00:09<00:18, 28.35it/s]

Iter 270.0, Minibatch Loss= 0.516043, Training Accuracy= 0.80469


 36%|████████████████████████████▌                                                   | 279/782 [00:10<00:17, 28.51it/s]

Iter 280.0, Minibatch Loss= 0.494262, Training Accuracy= 0.80469


 37%|█████████████████████████████▌                                                  | 289/782 [00:10<00:17, 28.64it/s]

Iter 290.0, Minibatch Loss= 0.603957, Training Accuracy= 0.73438


 38%|██████████████████████████████▍                                                 | 298/782 [00:10<00:17, 27.91it/s]

Iter 300.0, Minibatch Loss= 0.530082, Training Accuracy= 0.75000


 39%|███████████████████████████████▌                                                | 308/782 [00:11<00:16, 27.95it/s]

Iter 310.0, Minibatch Loss= 0.526573, Training Accuracy= 0.80469


 41%|████████████████████████████████▍                                               | 317/782 [00:11<00:16, 27.61it/s]

Iter 320.0, Minibatch Loss= 0.539948, Training Accuracy= 0.77344


 42%|█████████████████████████████████▌                                              | 328/782 [00:11<00:16, 28.24it/s]

Iter 330.0, Minibatch Loss= 0.514473, Training Accuracy= 0.75781


 43%|██████████████████████████████████▍                                             | 337/782 [00:12<00:16, 27.16it/s]

Iter 340.0, Minibatch Loss= 0.575603, Training Accuracy= 0.78125


 45%|███████████████████████████████████▌                                            | 348/782 [00:12<00:15, 28.16it/s]

Iter 350.0, Minibatch Loss= 0.572480, Training Accuracy= 0.78125


 46%|████████████████████████████████████▌                                           | 357/782 [00:12<00:15, 27.68it/s]

Iter 360.0, Minibatch Loss= 0.596434, Training Accuracy= 0.76562


 47%|█████████████████████████████████████▋                                          | 368/782 [00:13<00:14, 28.19it/s]

Iter 370.0, Minibatch Loss= 0.472432, Training Accuracy= 0.78906


 48%|██████████████████████████████████████▋                                         | 378/782 [00:13<00:14, 27.92it/s]

Iter 380.0, Minibatch Loss= 0.433746, Training Accuracy= 0.81250


 50%|███████████████████████████████████████▊                                        | 389/782 [00:14<00:13, 28.57it/s]

Iter 390.0, Minibatch Loss= 0.537531, Training Accuracy= 0.75000


 51%|████████████████████████████████████████▊                                       | 399/782 [00:14<00:13, 28.55it/s]

Iter 400.0, Minibatch Loss= 0.520076, Training Accuracy= 0.78906


 52%|█████████████████████████████████████████▊                                      | 409/782 [00:14<00:13, 28.63it/s]

Iter 410.0, Minibatch Loss= 0.559134, Training Accuracy= 0.78125


 53%|██████████████████████████████████████████▊                                     | 418/782 [00:15<00:12, 28.17it/s]

Iter 420.0, Minibatch Loss= 0.526240, Training Accuracy= 0.80469


 55%|███████████████████████████████████████████▊                                    | 428/782 [00:15<00:12, 28.13it/s]

Iter 430.0, Minibatch Loss= 0.476201, Training Accuracy= 0.82031


 56%|████████████████████████████████████████████▊                                   | 438/782 [00:15<00:12, 28.20it/s]

Iter 440.0, Minibatch Loss= 0.468172, Training Accuracy= 0.84375


 57%|█████████████████████████████████████████████▊                                  | 448/782 [00:16<00:11, 28.47it/s]

Iter 450.0, Minibatch Loss= 0.540612, Training Accuracy= 0.77344


 59%|██████████████████████████████████████████████▊                                 | 458/782 [00:16<00:11, 28.46it/s]

Iter 460.0, Minibatch Loss= 0.457109, Training Accuracy= 0.82812


 60%|███████████████████████████████████████████████▉                                | 468/782 [00:16<00:11, 28.09it/s]

Iter 470.0, Minibatch Loss= 0.417790, Training Accuracy= 0.80469


 61%|████████████████████████████████████████████████▉                               | 478/782 [00:17<00:10, 28.62it/s]

Iter 480.0, Minibatch Loss= 0.498647, Training Accuracy= 0.77344


 62%|█████████████████████████████████████████████████▊                              | 487/782 [00:17<00:10, 27.45it/s]

Iter 490.0, Minibatch Loss= 0.450160, Training Accuracy= 0.81250


 64%|██████████████████████████████████████████████████▉                             | 498/782 [00:18<00:10, 28.05it/s]

Iter 500.0, Minibatch Loss= 0.500993, Training Accuracy= 0.78906


 65%|███████████████████████████████████████████████████▊                            | 507/782 [00:18<00:10, 27.26it/s]

Iter 510.0, Minibatch Loss= 0.504530, Training Accuracy= 0.78906


 66%|████████████████████████████████████████████████████▉                           | 518/782 [00:18<00:09, 28.15it/s]

Iter 520.0, Minibatch Loss= 0.414964, Training Accuracy= 0.85156


 67%|█████████████████████████████████████████████████████▉                          | 527/782 [00:19<00:09, 27.70it/s]

Iter 530.0, Minibatch Loss= 0.454884, Training Accuracy= 0.82031


 69%|███████████████████████████████████████████████████████                         | 538/782 [00:19<00:08, 28.44it/s]

Iter 540.0, Minibatch Loss= 0.554622, Training Accuracy= 0.75781


 70%|████████████████████████████████████████████████████████                        | 548/782 [00:19<00:08, 28.28it/s]

Iter 550.0, Minibatch Loss= 0.472838, Training Accuracy= 0.82031


 71%|█████████████████████████████████████████████████████████                       | 558/782 [00:20<00:07, 28.09it/s]

Iter 560.0, Minibatch Loss= 0.571621, Training Accuracy= 0.72656


 73%|██████████████████████████████████████████████████████████                      | 568/782 [00:20<00:07, 28.01it/s]

Iter 570.0, Minibatch Loss= 0.579311, Training Accuracy= 0.74219


 74%|███████████████████████████████████████████████████████████▏                    | 578/782 [00:20<00:07, 28.07it/s]

Iter 580.0, Minibatch Loss= 0.391586, Training Accuracy= 0.86719


 75%|████████████████████████████████████████████████████████████                    | 587/782 [00:21<00:06, 28.13it/s]

Iter 590.0, Minibatch Loss= 0.438732, Training Accuracy= 0.81250


 76%|█████████████████████████████████████████████████████████████▏                  | 598/782 [00:21<00:06, 28.37it/s]

Iter 600.0, Minibatch Loss= 0.448940, Training Accuracy= 0.80469


 78%|██████████████████████████████████████████████████████████████                  | 607/782 [00:21<00:06, 28.22it/s]

Iter 610.0, Minibatch Loss= 0.491402, Training Accuracy= 0.82812


 79%|███████████████████████████████████████████████████████████████▏                | 618/782 [00:22<00:05, 28.82it/s]

Iter 620.0, Minibatch Loss= 0.515872, Training Accuracy= 0.80469


 80%|████████████████████████████████████████████████████████████████▏               | 627/782 [00:22<00:05, 28.00it/s]

Iter 630.0, Minibatch Loss= 0.539763, Training Accuracy= 0.75781


 82%|█████████████████████████████████████████████████████████████████▎              | 638/782 [00:23<00:05, 28.35it/s]

Iter 640.0, Minibatch Loss= 0.427890, Training Accuracy= 0.85938


 83%|██████████████████████████████████████████████████████████████████▎             | 648/782 [00:23<00:04, 28.14it/s]

Iter 650.0, Minibatch Loss= 0.379537, Training Accuracy= 0.85938


 84%|███████████████████████████████████████████████████████████████████▎            | 658/782 [00:23<00:04, 28.16it/s]

Iter 660.0, Minibatch Loss= 0.473124, Training Accuracy= 0.82031


 85%|████████████████████████████████████████████████████████████████████▎           | 668/782 [00:24<00:04, 28.30it/s]

Iter 670.0, Minibatch Loss= 0.477042, Training Accuracy= 0.80469


 87%|█████████████████████████████████████████████████████████████████████▎          | 677/782 [00:24<00:03, 27.22it/s]

Iter 680.0, Minibatch Loss= 0.502853, Training Accuracy= 0.76562


 88%|██████████████████████████████████████████████████████████████████████▍         | 688/782 [00:24<00:03, 28.45it/s]

Iter 690.0, Minibatch Loss= 0.407934, Training Accuracy= 0.85156


 89%|███████████████████████████████████████████████████████████████████████▍        | 698/782 [00:25<00:02, 28.09it/s]

Iter 700.0, Minibatch Loss= 0.519050, Training Accuracy= 0.75000


 91%|████████████████████████████████████████████████████████████████████████▍       | 708/782 [00:25<00:02, 27.97it/s]

Iter 710.0, Minibatch Loss= 0.493202, Training Accuracy= 0.79688


 92%|█████████████████████████████████████████████████████████████████████████▍      | 718/782 [00:25<00:02, 27.82it/s]

Iter 720.0, Minibatch Loss= 0.304671, Training Accuracy= 0.85156


 93%|██████████████████████████████████████████████████████████████████████████▍     | 728/782 [00:26<00:01, 28.40it/s]

Iter 730.0, Minibatch Loss= 0.362091, Training Accuracy= 0.88281


 94%|███████████████████████████████████████████████████████████████████████████▍    | 738/782 [00:26<00:01, 28.19it/s]

Iter 740.0, Minibatch Loss= 0.444539, Training Accuracy= 0.82812


 96%|████████████████████████████████████████████████████████████████████████████▌   | 748/782 [00:27<00:01, 28.42it/s]

Iter 750.0, Minibatch Loss= 0.522703, Training Accuracy= 0.77344


 97%|█████████████████████████████████████████████████████████████████████████████▌  | 758/782 [00:27<00:00, 28.08it/s]

Iter 760.0, Minibatch Loss= 0.273935, Training Accuracy= 0.90625


 98%|██████████████████████████████████████████████████████████████████████████████▌ | 768/782 [00:27<00:00, 28.35it/s]

Iter 770.0, Minibatch Loss= 0.363968, Training Accuracy= 0.82031


 99%|███████████████████████████████████████████████████████████████████████████████▌| 778/782 [00:28<00:00, 28.35it/s]

Iter 780.0, Minibatch Loss= 0.436539, Training Accuracy= 0.80469


100%|████████████████████████████████████████████████████████████████████████████████| 782/782 [00:28<00:00, 27.64it/s]


EPOCH  3


  0%|                                                                                          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.463921, Training Accuracy= 0.78906


  1%|▉                                                                                 | 9/782 [00:00<00:33, 23.15it/s]

Iter 10.0, Minibatch Loss= 0.502490, Training Accuracy= 0.77344


  2%|█▉                                                                               | 19/782 [00:00<00:29, 26.16it/s]

Iter 20.0, Minibatch Loss= 0.384749, Training Accuracy= 0.79688


  4%|███                                                                              | 29/782 [00:01<00:27, 27.32it/s]

Iter 30.0, Minibatch Loss= 0.373054, Training Accuracy= 0.85156


  5%|████                                                                             | 39/782 [00:01<00:26, 27.84it/s]

Iter 40.0, Minibatch Loss= 0.511701, Training Accuracy= 0.78906


  6%|█████▏                                                                           | 50/782 [00:01<00:25, 28.28it/s]

Iter 50.0, Minibatch Loss= 0.417314, Training Accuracy= 0.82031


  8%|██████▏                                                                          | 60/782 [00:02<00:25, 27.93it/s]

Iter 60.0, Minibatch Loss= 0.393850, Training Accuracy= 0.82812


  9%|███████▎                                                                         | 70/782 [00:02<00:25, 27.39it/s]

Iter 70.0, Minibatch Loss= 0.388105, Training Accuracy= 0.89062


 10%|████████▎                                                                        | 80/782 [00:02<00:24, 28.21it/s]

Iter 80.0, Minibatch Loss= 0.384888, Training Accuracy= 0.88281


 12%|█████████▎                                                                       | 90/782 [00:03<00:24, 28.09it/s]

Iter 90.0, Minibatch Loss= 0.466599, Training Accuracy= 0.83594


 12%|██████████                                                                       | 97/782 [00:03<00:25, 26.87it/s]

Iter 100.0, Minibatch Loss= 0.305371, Training Accuracy= 0.91406


 14%|███████████▎                                                                    | 110/782 [00:04<00:27, 24.38it/s]

Iter 110.0, Minibatch Loss= 0.382040, Training Accuracy= 0.81250


 15%|████████████▏                                                                   | 119/782 [00:04<00:29, 22.34it/s]

Iter 120.0, Minibatch Loss= 0.398253, Training Accuracy= 0.84375


 16%|█████████████                                                                   | 128/782 [00:04<00:30, 21.71it/s]

Iter 130.0, Minibatch Loss= 0.411435, Training Accuracy= 0.85938


 18%|██████████████▎                                                                 | 140/782 [00:05<00:28, 22.17it/s]

Iter 140.0, Minibatch Loss= 0.335535, Training Accuracy= 0.88281


 19%|███████████████▏                                                                | 149/782 [00:05<00:28, 21.98it/s]

Iter 150.0, Minibatch Loss= 0.319552, Training Accuracy= 0.86719


 20%|████████████████▏                                                               | 158/782 [00:06<00:27, 22.31it/s]

Iter 160.0, Minibatch Loss= 0.314445, Training Accuracy= 0.88281


 21%|█████████████████                                                               | 167/782 [00:06<00:28, 21.35it/s]

Iter 170.0, Minibatch Loss= 0.380299, Training Accuracy= 0.85938


 23%|██████████████████▏                                                             | 178/782 [00:07<00:23, 26.09it/s]

Iter 180.0, Minibatch Loss= 0.424604, Training Accuracy= 0.84375


 24%|███████████████████▎                                                            | 189/782 [00:07<00:21, 27.61it/s]

Iter 190.0, Minibatch Loss= 0.422461, Training Accuracy= 0.85938


 25%|████████████████████▎                                                           | 199/782 [00:07<00:20, 28.21it/s]

Iter 200.0, Minibatch Loss= 0.456816, Training Accuracy= 0.80469


 27%|█████████████████████▍                                                          | 209/782 [00:08<00:20, 28.41it/s]

Iter 210.0, Minibatch Loss= 0.429455, Training Accuracy= 0.82812


 28%|██████████████████████▍                                                         | 219/782 [00:08<00:20, 27.72it/s]

Iter 220.0, Minibatch Loss= 0.326445, Training Accuracy= 0.89844


 29%|███████████████████████▍                                                        | 229/782 [00:09<00:19, 28.13it/s]

Iter 230.0, Minibatch Loss= 0.414168, Training Accuracy= 0.82031


 31%|████████████████████████▍                                                       | 239/782 [00:09<00:19, 27.86it/s]

Iter 240.0, Minibatch Loss= 0.243247, Training Accuracy= 0.92969


 32%|█████████████████████████▍                                                      | 249/782 [00:09<00:18, 28.21it/s]

Iter 250.0, Minibatch Loss= 0.502166, Training Accuracy= 0.75781


 33%|██████████████████████████▍                                                     | 259/782 [00:10<00:18, 28.32it/s]

Iter 260.0, Minibatch Loss= 0.489209, Training Accuracy= 0.80469


 34%|███████████████████████████▌                                                    | 269/782 [00:10<00:18, 28.47it/s]

Iter 270.0, Minibatch Loss= 0.283411, Training Accuracy= 0.89062


 36%|████████████████████████████▍                                                   | 278/782 [00:10<00:17, 28.05it/s]

Iter 280.0, Minibatch Loss= 0.467830, Training Accuracy= 0.81250


 37%|█████████████████████████████▍                                                  | 288/782 [00:11<00:17, 28.07it/s]

Iter 290.0, Minibatch Loss= 0.369215, Training Accuracy= 0.89062


 38%|██████████████████████████████▍                                                 | 298/782 [00:11<00:17, 28.06it/s]

Iter 300.0, Minibatch Loss= 0.346386, Training Accuracy= 0.89062


 39%|███████████████████████████████▍                                                | 307/782 [00:11<00:17, 27.80it/s]

Iter 310.0, Minibatch Loss= 0.293902, Training Accuracy= 0.87500


 41%|████████████████████████████████▌                                               | 318/782 [00:12<00:16, 28.02it/s]

Iter 320.0, Minibatch Loss= 0.335186, Training Accuracy= 0.88281


 42%|█████████████████████████████████▌                                              | 328/782 [00:12<00:16, 27.85it/s]

Iter 330.0, Minibatch Loss= 0.211365, Training Accuracy= 0.95312


 43%|██████████████████████████████████▌                                             | 338/782 [00:12<00:15, 27.80it/s]

Iter 340.0, Minibatch Loss= 0.340296, Training Accuracy= 0.86719


 45%|███████████████████████████████████▌                                            | 348/782 [00:13<00:15, 28.17it/s]

Iter 350.0, Minibatch Loss= 0.324736, Training Accuracy= 0.86719


 46%|████████████████████████████████████▌                                           | 357/782 [00:13<00:14, 28.36it/s]

Iter 360.0, Minibatch Loss= 0.433206, Training Accuracy= 0.83594


 47%|█████████████████████████████████████▋                                          | 369/782 [00:14<00:14, 28.90it/s]

Iter 370.0, Minibatch Loss= 0.332373, Training Accuracy= 0.89062


 48%|██████████████████████████████████████▊                                         | 379/782 [00:14<00:14, 28.46it/s]

Iter 380.0, Minibatch Loss= 0.280323, Training Accuracy= 0.91406


 50%|███████████████████████████████████████▊                                        | 389/782 [00:14<00:13, 28.09it/s]

Iter 390.0, Minibatch Loss= 0.260938, Training Accuracy= 0.92188


 51%|████████████████████████████████████████▊                                       | 399/782 [00:15<00:13, 27.37it/s]

Iter 400.0, Minibatch Loss= 0.396923, Training Accuracy= 0.85938


 52%|█████████████████████████████████████████▋                                      | 408/782 [00:15<00:13, 27.67it/s]

Iter 410.0, Minibatch Loss= 0.313136, Training Accuracy= 0.86719


 53%|██████████████████████████████████████████▊                                     | 418/782 [00:15<00:13, 27.89it/s]

Iter 420.0, Minibatch Loss= 0.368874, Training Accuracy= 0.82812


 55%|███████████████████████████████████████████▋                                    | 427/782 [00:16<00:12, 27.92it/s]

Iter 430.0, Minibatch Loss= 0.392236, Training Accuracy= 0.84375


 56%|████████████████████████████████████████████▊                                   | 438/782 [00:16<00:11, 28.71it/s]

Iter 440.0, Minibatch Loss= 0.193478, Training Accuracy= 0.94531


 57%|█████████████████████████████████████████████▋                                  | 447/782 [00:16<00:11, 28.33it/s]

Iter 450.0, Minibatch Loss= 0.338200, Training Accuracy= 0.90625


 59%|██████████████████████████████████████████████▊                                 | 458/782 [00:17<00:11, 28.73it/s]

Iter 460.0, Minibatch Loss= 0.322195, Training Accuracy= 0.85156


 60%|███████████████████████████████████████████████▉                                | 468/782 [00:17<00:10, 28.66it/s]

Iter 470.0, Minibatch Loss= 0.291353, Training Accuracy= 0.89844


 61%|████████████████████████████████████████████████▉                               | 478/782 [00:18<00:10, 28.20it/s]

Iter 480.0, Minibatch Loss= 0.254864, Training Accuracy= 0.91406


 62%|█████████████████████████████████████████████████▉                              | 488/782 [00:18<00:10, 28.43it/s]

Iter 490.0, Minibatch Loss= 0.261756, Training Accuracy= 0.92188


 64%|██████████████████████████████████████████████████▉                             | 498/782 [00:18<00:10, 28.32it/s]

Iter 500.0, Minibatch Loss= 0.305631, Training Accuracy= 0.87500


 65%|███████████████████████████████████████████████████▉                            | 508/782 [00:19<00:09, 28.21it/s]

Iter 510.0, Minibatch Loss= 0.285466, Training Accuracy= 0.91406


 66%|████████████████████████████████████████████████████▉                           | 517/782 [00:19<00:09, 27.91it/s]

Iter 520.0, Minibatch Loss= 0.295230, Training Accuracy= 0.87500


 68%|██████████████████████████████████████████████████████                          | 528/782 [00:19<00:08, 28.28it/s]

Iter 530.0, Minibatch Loss= 0.400322, Training Accuracy= 0.82812


 69%|███████████████████████████████████████████████████████                         | 538/782 [00:20<00:08, 28.09it/s]

Iter 540.0, Minibatch Loss= 0.328100, Training Accuracy= 0.88281


 70%|████████████████████████████████████████████████████████                        | 548/782 [00:20<00:08, 28.39it/s]

Iter 550.0, Minibatch Loss= 0.264141, Training Accuracy= 0.90625


 71%|█████████████████████████████████████████████████████████                       | 558/782 [00:20<00:07, 28.53it/s]

Iter 560.0, Minibatch Loss= 0.346497, Training Accuracy= 0.86719


 73%|██████████████████████████████████████████████████████████                      | 568/782 [00:21<00:07, 28.38it/s]

Iter 570.0, Minibatch Loss= 0.355277, Training Accuracy= 0.87500


 74%|███████████████████████████████████████████████████████████▏                    | 578/782 [00:21<00:07, 28.62it/s]

Iter 580.0, Minibatch Loss= 0.270827, Training Accuracy= 0.88281


 75%|████████████████████████████████████████████████████████████▏                   | 588/782 [00:21<00:06, 28.87it/s]

Iter 590.0, Minibatch Loss= 0.265219, Training Accuracy= 0.91406


 77%|█████████████████████████████████████████████████████████████▍                  | 600/782 [00:22<00:06, 28.19it/s]

Iter 600.0, Minibatch Loss= 0.314260, Training Accuracy= 0.88281


 78%|██████████████████████████████████████████████████████████████                  | 607/782 [00:22<00:06, 27.05it/s]

Iter 610.0, Minibatch Loss= 0.229237, Training Accuracy= 0.89062


 79%|███████████████████████████████████████████████████████████████                 | 617/782 [00:23<00:05, 27.96it/s]

Iter 620.0, Minibatch Loss= 0.361877, Training Accuracy= 0.89062


 80%|████████████████████████████████████████████████████████████████▏               | 628/782 [00:23<00:05, 28.71it/s]

Iter 630.0, Minibatch Loss= 0.254518, Training Accuracy= 0.89844


 82%|█████████████████████████████████████████████████████████████████▎              | 638/782 [00:23<00:05, 28.39it/s]

Iter 640.0, Minibatch Loss= 0.374045, Training Accuracy= 0.82031


 83%|██████████████████████████████████████████████████████████████████▎             | 648/782 [00:24<00:04, 28.47it/s]

Iter 650.0, Minibatch Loss= 0.241609, Training Accuracy= 0.89062


 84%|███████████████████████████████████████████████████████████████████▎            | 658/782 [00:24<00:04, 28.27it/s]

Iter 660.0, Minibatch Loss= 0.227740, Training Accuracy= 0.92969


 85%|████████████████████████████████████████████████████████████████████▏           | 667/782 [00:24<00:04, 26.78it/s]

Iter 670.0, Minibatch Loss= 0.364157, Training Accuracy= 0.84375


 87%|█████████████████████████████████████████████████████████████████████▎          | 678/782 [00:25<00:03, 28.04it/s]

Iter 680.0, Minibatch Loss= 0.210307, Training Accuracy= 0.92969


 88%|██████████████████████████████████████████████████████████████████████▍         | 688/782 [00:25<00:03, 28.21it/s]

Iter 690.0, Minibatch Loss= 0.250871, Training Accuracy= 0.91406


 89%|███████████████████████████████████████████████████████████████████████▍        | 698/782 [00:25<00:02, 28.21it/s]

Iter 700.0, Minibatch Loss= 0.181132, Training Accuracy= 0.93750


 91%|████████████████████████████████████████████████████████████████████████▍       | 708/782 [00:26<00:02, 28.38it/s]

Iter 710.0, Minibatch Loss= 0.329116, Training Accuracy= 0.87500


 92%|█████████████████████████████████████████████████████████████████████████▍      | 718/782 [00:26<00:02, 28.33it/s]

Iter 720.0, Minibatch Loss= 0.516553, Training Accuracy= 0.82031


 93%|██████████████████████████████████████████████████████████████████████████▍     | 728/782 [00:27<00:01, 28.41it/s]

Iter 730.0, Minibatch Loss= 0.278037, Training Accuracy= 0.92188


 94%|███████████████████████████████████████████████████████████████████████████▍    | 738/782 [00:27<00:01, 28.59it/s]

Iter 740.0, Minibatch Loss= 0.269736, Training Accuracy= 0.93750


 96%|████████████████████████████████████████████████████████████████████████████▌   | 748/782 [00:27<00:01, 28.19it/s]

Iter 750.0, Minibatch Loss= 0.225746, Training Accuracy= 0.92188


 97%|█████████████████████████████████████████████████████████████████████████████▌  | 758/782 [00:28<00:00, 27.97it/s]

Iter 760.0, Minibatch Loss= 0.353128, Training Accuracy= 0.89844


 98%|██████████████████████████████████████████████████████████████████████████████▌ | 768/782 [00:28<00:00, 28.35it/s]

Iter 770.0, Minibatch Loss= 0.292823, Training Accuracy= 0.86719


 99%|███████████████████████████████████████████████████████████████████████████████▍| 777/782 [00:28<00:00, 27.97it/s]

Iter 780.0, Minibatch Loss= 0.227172, Training Accuracy= 0.92188


100%|████████████████████████████████████████████████████████████████████████████████| 782/782 [00:29<00:00, 26.95it/s]


EPOCH  4


  0%|                                                                                          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.152366, Training Accuracy= 0.96094


  1%|▋                                                                                 | 7/782 [00:00<00:30, 25.16it/s]

Iter 10.0, Minibatch Loss= 0.144049, Training Accuracy= 0.95312


  2%|█▊                                                                               | 18/782 [00:00<00:28, 27.10it/s]

Iter 20.0, Minibatch Loss= 0.264778, Training Accuracy= 0.90625


  4%|██▉                                                                              | 28/782 [00:01<00:27, 27.54it/s]

Iter 30.0, Minibatch Loss= 0.235094, Training Accuracy= 0.91406


  5%|████                                                                             | 39/782 [00:01<00:26, 28.33it/s]

Iter 40.0, Minibatch Loss= 0.221677, Training Accuracy= 0.91406


  6%|████▉                                                                            | 48/782 [00:01<00:26, 27.20it/s]

Iter 50.0, Minibatch Loss= 0.300090, Training Accuracy= 0.89844


  7%|█████▉                                                                           | 57/782 [00:02<00:26, 27.41it/s]

Iter 60.0, Minibatch Loss= 0.203211, Training Accuracy= 0.95312


  9%|███████                                                                          | 68/782 [00:02<00:24, 28.57it/s]

Iter 70.0, Minibatch Loss= 0.185142, Training Accuracy= 0.92188


 10%|████████                                                                         | 78/782 [00:02<00:24, 28.27it/s]

Iter 80.0, Minibatch Loss= 0.197574, Training Accuracy= 0.92188


 11%|█████████                                                                        | 88/782 [00:03<00:24, 28.23it/s]

Iter 90.0, Minibatch Loss= 0.217029, Training Accuracy= 0.91406


 13%|██████████▏                                                                      | 98/782 [00:03<00:24, 28.22it/s]

Iter 100.0, Minibatch Loss= 0.254621, Training Accuracy= 0.92969


 14%|███████████                                                                     | 108/782 [00:03<00:23, 28.24it/s]

Iter 110.0, Minibatch Loss= 0.189912, Training Accuracy= 0.92188


 15%|████████████                                                                    | 118/782 [00:04<00:23, 28.12it/s]

Iter 120.0, Minibatch Loss= 0.261704, Training Accuracy= 0.90625


 16%|█████████████                                                                   | 128/782 [00:04<00:23, 28.13it/s]

Iter 130.0, Minibatch Loss= 0.280165, Training Accuracy= 0.89062


 18%|██████████████                                                                  | 138/782 [00:05<00:22, 28.08it/s]

Iter 140.0, Minibatch Loss= 0.158629, Training Accuracy= 0.93750


 19%|███████████████▏                                                                | 148/782 [00:05<00:22, 28.08it/s]

Iter 150.0, Minibatch Loss= 0.187976, Training Accuracy= 0.92188


 20%|████████████████▏                                                               | 158/782 [00:05<00:22, 28.08it/s]

Iter 160.0, Minibatch Loss= 0.254079, Training Accuracy= 0.92188


 21%|█████████████████▏                                                              | 168/782 [00:06<00:21, 28.37it/s]

Iter 170.0, Minibatch Loss= 0.229851, Training Accuracy= 0.91406


 23%|██████████████████▏                                                             | 178/782 [00:06<00:21, 28.08it/s]

Iter 180.0, Minibatch Loss= 0.246377, Training Accuracy= 0.89844


 24%|███████████████████▍                                                            | 190/782 [00:06<00:20, 28.37it/s]

Iter 190.0, Minibatch Loss= 0.166037, Training Accuracy= 0.95312


 25%|████████████████████▎                                                           | 199/782 [00:07<00:20, 27.95it/s]

Iter 200.0, Minibatch Loss= 0.185767, Training Accuracy= 0.93750


 27%|█████████████████████▎                                                          | 208/782 [00:07<00:21, 27.26it/s]

Iter 210.0, Minibatch Loss= 0.309501, Training Accuracy= 0.87500


 28%|██████████████████████▎                                                         | 218/782 [00:07<00:20, 28.02it/s]

Iter 220.0, Minibatch Loss= 0.338439, Training Accuracy= 0.89844


 29%|███████████████████████▏                                                        | 227/782 [00:08<00:20, 27.68it/s]

Iter 230.0, Minibatch Loss= 0.248225, Training Accuracy= 0.89844


 31%|████████████████████████▍                                                       | 239/782 [00:08<00:18, 28.62it/s]

Iter 240.0, Minibatch Loss= 0.263221, Training Accuracy= 0.92969


 32%|█████████████████████████▎                                                      | 248/782 [00:09<00:19, 27.59it/s]

Iter 250.0, Minibatch Loss= 0.270671, Training Accuracy= 0.92188


 33%|██████████████████████████▍                                                     | 259/782 [00:09<00:18, 28.41it/s]

Iter 260.0, Minibatch Loss= 0.156172, Training Accuracy= 0.94531


 35%|███████████████████████████▌                                                    | 270/782 [00:09<00:17, 28.81it/s]

Iter 270.0, Minibatch Loss= 0.182454, Training Accuracy= 0.92188


 36%|████████████████████████████▌                                                   | 279/782 [00:10<00:18, 27.48it/s]

Iter 280.0, Minibatch Loss= 0.140760, Training Accuracy= 0.94531


 37%|█████████████████████████████▋                                                  | 290/782 [00:10<00:17, 28.22it/s]

Iter 290.0, Minibatch Loss= 0.280653, Training Accuracy= 0.91406


 38%|██████████████████████████████▋                                                 | 300/782 [00:10<00:17, 28.22it/s]

Iter 300.0, Minibatch Loss= 0.246336, Training Accuracy= 0.89062


 40%|███████████████████████████████▋                                                | 310/782 [00:11<00:17, 27.52it/s]

Iter 310.0, Minibatch Loss= 0.147222, Training Accuracy= 0.94531


 41%|████████████████████████████████▋                                               | 320/782 [00:11<00:16, 27.90it/s]

Iter 320.0, Minibatch Loss= 0.176800, Training Accuracy= 0.94531


 42%|█████████████████████████████████▋                                              | 329/782 [00:11<00:16, 27.84it/s]

Iter 330.0, Minibatch Loss= 0.210049, Training Accuracy= 0.93750


 43%|██████████████████████████████████▊                                             | 340/782 [00:12<00:15, 27.67it/s]

Iter 340.0, Minibatch Loss= 0.242559, Training Accuracy= 0.90625


 44%|███████████████████████████████████▍                                            | 347/782 [00:12<00:15, 27.66it/s]

Iter 350.0, Minibatch Loss= 0.148548, Training Accuracy= 0.94531


 46%|████████████████████████████████████▌                                           | 358/782 [00:13<00:15, 28.11it/s]

Iter 360.0, Minibatch Loss= 0.250695, Training Accuracy= 0.91406


 47%|█████████████████████████████████████▋                                          | 368/782 [00:13<00:14, 28.12it/s]

Iter 370.0, Minibatch Loss= 0.194278, Training Accuracy= 0.92969


 48%|██████████████████████████████████████▋                                         | 378/782 [00:13<00:14, 27.93it/s]

Iter 380.0, Minibatch Loss= 0.128598, Training Accuracy= 0.97656


 49%|███████████████████████████████████████▌                                        | 387/782 [00:14<00:14, 27.52it/s]

Iter 390.0, Minibatch Loss= 0.147201, Training Accuracy= 0.96094


 51%|████████████████████████████████████████▋                                       | 398/782 [00:14<00:13, 28.21it/s]

Iter 400.0, Minibatch Loss= 0.135507, Training Accuracy= 0.95312


 52%|█████████████████████████████████████████▋                                      | 408/782 [00:14<00:13, 28.54it/s]

Iter 410.0, Minibatch Loss= 0.125630, Training Accuracy= 0.95312


 54%|██████████████████████████████████████████▊                                     | 419/782 [00:15<00:12, 28.69it/s]

Iter 420.0, Minibatch Loss= 0.129169, Training Accuracy= 0.95312


 55%|███████████████████████████████████████████▊                                    | 428/782 [00:15<00:12, 28.34it/s]

Iter 430.0, Minibatch Loss= 0.104198, Training Accuracy= 0.96875


 56%|████████████████████████████████████████████▋                                   | 437/782 [00:15<00:12, 27.76it/s]

Iter 440.0, Minibatch Loss= 0.211228, Training Accuracy= 0.90625


 57%|█████████████████████████████████████████████▊                                  | 448/782 [00:16<00:11, 28.21it/s]

Iter 450.0, Minibatch Loss= 0.158759, Training Accuracy= 0.94531


 59%|██████████████████████████████████████████████▊                                 | 458/782 [00:16<00:11, 28.42it/s]

Iter 460.0, Minibatch Loss= 0.274919, Training Accuracy= 0.89844


 60%|███████████████████████████████████████████████▉                                | 468/782 [00:16<00:11, 28.07it/s]

Iter 470.0, Minibatch Loss= 0.147880, Training Accuracy= 0.96875


 61%|████████████████████████████████████████████████▉                               | 478/782 [00:17<00:10, 27.74it/s]

Iter 480.0, Minibatch Loss= 0.164305, Training Accuracy= 0.96094


 62%|█████████████████████████████████████████████████▊                              | 487/782 [00:17<00:10, 27.96it/s]

Iter 490.0, Minibatch Loss= 0.195743, Training Accuracy= 0.92188


 64%|██████████████████████████████████████████████████▊                             | 497/782 [00:18<00:10, 28.07it/s]

Iter 500.0, Minibatch Loss= 0.259161, Training Accuracy= 0.92969


 65%|███████████████████████████████████████████████████▊                            | 507/782 [00:18<00:09, 28.00it/s]

Iter 510.0, Minibatch Loss= 0.158376, Training Accuracy= 0.94531


 66%|████████████████████████████████████████████████████▉                           | 518/782 [00:18<00:09, 28.15it/s]

Iter 520.0, Minibatch Loss= 0.205704, Training Accuracy= 0.92969


 68%|██████████████████████████████████████████████████████                          | 528/782 [00:19<00:08, 28.25it/s]

Iter 530.0, Minibatch Loss= 0.190610, Training Accuracy= 0.96094


 69%|███████████████████████████████████████████████████████                         | 538/782 [00:19<00:08, 28.00it/s]

Iter 540.0, Minibatch Loss= 0.163844, Training Accuracy= 0.92969


 70%|████████████████████████████████████████████████████████                        | 548/782 [00:19<00:08, 28.41it/s]

Iter 550.0, Minibatch Loss= 0.153357, Training Accuracy= 0.93750


 71%|█████████████████████████████████████████████████████████▏                      | 559/782 [00:20<00:07, 28.51it/s]

Iter 560.0, Minibatch Loss= 0.180419, Training Accuracy= 0.93750


 73%|██████████████████████████████████████████████████████████▏                     | 569/782 [00:20<00:07, 28.45it/s]

Iter 570.0, Minibatch Loss= 0.128749, Training Accuracy= 0.96875


 74%|███████████████████████████████████████████████████████████▏                    | 578/782 [00:20<00:07, 28.11it/s]

Iter 580.0, Minibatch Loss= 0.184611, Training Accuracy= 0.92188


 75%|████████████████████████████████████████████████████████████▏                   | 588/782 [00:21<00:06, 28.17it/s]

Iter 590.0, Minibatch Loss= 0.121153, Training Accuracy= 0.95312


 76%|█████████████████████████████████████████████████████████████▏                  | 598/782 [00:21<00:06, 28.45it/s]

Iter 600.0, Minibatch Loss= 0.107053, Training Accuracy= 0.97656


 78%|██████████████████████████████████████████████████████████████                  | 607/782 [00:22<00:06, 27.78it/s]

Iter 610.0, Minibatch Loss= 0.147279, Training Accuracy= 0.94531


 79%|███████████████████████████████████████████████████████████████▏                | 618/782 [00:22<00:05, 28.22it/s]

Iter 620.0, Minibatch Loss= 0.125374, Training Accuracy= 0.96094


 80%|████████████████████████████████████████████████████████████████▏               | 628/782 [00:22<00:05, 28.49it/s]

Iter 630.0, Minibatch Loss= 0.123127, Training Accuracy= 0.95312


 82%|█████████████████████████████████████████████████████████████████▎              | 638/782 [00:23<00:05, 28.41it/s]

Iter 640.0, Minibatch Loss= 0.171474, Training Accuracy= 0.93750


 83%|██████████████████████████████████████████████████████████████████▎             | 648/782 [00:23<00:04, 28.06it/s]

Iter 650.0, Minibatch Loss= 0.148912, Training Accuracy= 0.94531


 84%|███████████████████████████████████████████████████████████████████▏            | 657/782 [00:23<00:04, 27.52it/s]

Iter 660.0, Minibatch Loss= 0.184691, Training Accuracy= 0.92969


 85%|████████████████████████████████████████████████████████████████████▏           | 667/782 [00:24<00:04, 27.78it/s]

Iter 670.0, Minibatch Loss= 0.143522, Training Accuracy= 0.94531


 87%|█████████████████████████████████████████████████████████████████████▎          | 678/782 [00:24<00:03, 28.13it/s]

Iter 680.0, Minibatch Loss= 0.203292, Training Accuracy= 0.91406


 88%|██████████████████████████████████████████████████████████████████████▎         | 687/782 [00:24<00:03, 27.75it/s]

Iter 690.0, Minibatch Loss= 0.222428, Training Accuracy= 0.90625


 89%|███████████████████████████████████████████████████████████████████████▍        | 698/782 [00:25<00:02, 28.56it/s]

Iter 700.0, Minibatch Loss= 0.262550, Training Accuracy= 0.92188


 91%|████████████████████████████████████████████████████████████████████████▍       | 708/782 [00:25<00:02, 28.32it/s]

Iter 710.0, Minibatch Loss= 0.103926, Training Accuracy= 0.95312


 92%|█████████████████████████████████████████████████████████████████████████▍      | 718/782 [00:26<00:02, 27.96it/s]

Iter 720.0, Minibatch Loss= 0.109015, Training Accuracy= 0.97656


 93%|██████████████████████████████████████████████████████████████████████████▍     | 728/782 [00:26<00:01, 28.00it/s]

Iter 730.0, Minibatch Loss= 0.175511, Training Accuracy= 0.95312


 94%|███████████████████████████████████████████████████████████████████████████▍    | 738/782 [00:26<00:01, 28.14it/s]

Iter 740.0, Minibatch Loss= 0.197749, Training Accuracy= 0.92188


 96%|████████████████████████████████████████████████████████████████████████████▍   | 747/782 [00:27<00:01, 27.68it/s]

Iter 750.0, Minibatch Loss= 0.204661, Training Accuracy= 0.92188


 97%|█████████████████████████████████████████████████████████████████████████████▌  | 758/782 [00:27<00:00, 28.03it/s]

Iter 760.0, Minibatch Loss= 0.168337, Training Accuracy= 0.95312


 98%|██████████████████████████████████████████████████████████████████████████████▌ | 768/782 [00:27<00:00, 28.20it/s]

Iter 770.0, Minibatch Loss= 0.109952, Training Accuracy= 0.95312


 99%|███████████████████████████████████████████████████████████████████████████████▌| 778/782 [00:28<00:00, 27.98it/s]

Iter 780.0, Minibatch Loss= 0.269811, Training Accuracy= 0.90625


100%|████████████████████████████████████████████████████████████████████████████████| 782/782 [00:28<00:00, 27.53it/s]


EPOCH  5


  0%|                                                                                          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.164734, Training Accuracy= 0.93750


  1%|▉                                                                                 | 9/782 [00:00<00:32, 23.84it/s]

Iter 10.0, Minibatch Loss= 0.179010, Training Accuracy= 0.96094


  2%|█▊                                                                               | 18/782 [00:00<00:29, 25.54it/s]

Iter 20.0, Minibatch Loss= 0.130374, Training Accuracy= 0.96875


  3%|██▊                                                                              | 27/782 [00:01<00:28, 26.67it/s]

Iter 30.0, Minibatch Loss= 0.088782, Training Accuracy= 0.96094


  5%|███▉                                                                             | 38/782 [00:01<00:26, 27.89it/s]

Iter 40.0, Minibatch Loss= 0.125921, Training Accuracy= 0.95312


  6%|████▉                                                                            | 48/782 [00:01<00:26, 27.76it/s]

Iter 50.0, Minibatch Loss= 0.088737, Training Accuracy= 0.97656


  7%|█████▉                                                                           | 57/782 [00:02<00:25, 28.05it/s]

Iter 60.0, Minibatch Loss= 0.252105, Training Accuracy= 0.94531


  9%|███████                                                                          | 68/782 [00:02<00:25, 27.94it/s]

Iter 70.0, Minibatch Loss= 0.272083, Training Accuracy= 0.93750


 10%|████████                                                                         | 78/782 [00:02<00:24, 28.36it/s]

Iter 80.0, Minibatch Loss= 0.180400, Training Accuracy= 0.96094


 11%|█████████                                                                        | 88/782 [00:03<00:24, 28.48it/s]

Iter 90.0, Minibatch Loss= 0.126038, Training Accuracy= 0.96094


 13%|██████████▏                                                                      | 98/782 [00:03<00:24, 27.93it/s]

Iter 100.0, Minibatch Loss= 0.089076, Training Accuracy= 0.96875


 14%|██████████▉                                                                     | 107/782 [00:03<00:24, 27.65it/s]

Iter 110.0, Minibatch Loss= 0.135657, Training Accuracy= 0.95312


 15%|███████████▉                                                                    | 117/782 [00:04<00:24, 27.68it/s]

Iter 120.0, Minibatch Loss= 0.137088, Training Accuracy= 0.94531


 16%|████████████▉                                                                   | 127/782 [00:04<00:23, 27.35it/s]

Iter 130.0, Minibatch Loss= 0.052248, Training Accuracy= 0.99219


 18%|██████████████                                                                  | 138/782 [00:05<00:22, 28.34it/s]

Iter 140.0, Minibatch Loss= 0.178655, Training Accuracy= 0.92969


 19%|███████████████▏                                                                | 148/782 [00:05<00:22, 28.26it/s]

Iter 150.0, Minibatch Loss= 0.091546, Training Accuracy= 0.97656


 20%|████████████████▏                                                               | 158/782 [00:05<00:22, 27.80it/s]

Iter 160.0, Minibatch Loss= 0.142927, Training Accuracy= 0.96875


 21%|█████████████████▏                                                              | 168/782 [00:06<00:22, 27.16it/s]

Iter 170.0, Minibatch Loss= 0.203242, Training Accuracy= 0.91406


 23%|██████████████████▏                                                             | 178/782 [00:06<00:21, 27.51it/s]

Iter 180.0, Minibatch Loss= 0.120723, Training Accuracy= 0.96094


 24%|███████████████████▏                                                            | 188/782 [00:06<00:21, 27.98it/s]

Iter 190.0, Minibatch Loss= 0.160721, Training Accuracy= 0.95312


 25%|████████████████████▏                                                           | 197/782 [00:07<00:21, 27.47it/s]

Iter 200.0, Minibatch Loss= 0.145455, Training Accuracy= 0.94531


 26%|█████████████████████▏                                                          | 207/782 [00:07<00:20, 27.76it/s]

Iter 210.0, Minibatch Loss= 0.118083, Training Accuracy= 0.96094


 28%|██████████████████████▎                                                         | 218/782 [00:07<00:20, 28.18it/s]

Iter 220.0, Minibatch Loss= 0.182723, Training Accuracy= 0.93750


 29%|███████████████████████▏                                                        | 227/782 [00:08<00:20, 27.38it/s]

Iter 230.0, Minibatch Loss= 0.173019, Training Accuracy= 0.95312


 30%|████████████████████████▏                                                       | 237/782 [00:08<00:20, 27.23it/s]

Iter 240.0, Minibatch Loss= 0.143683, Training Accuracy= 0.93750


 32%|█████████████████████████▎                                                      | 247/782 [00:09<00:19, 27.20it/s]

Iter 250.0, Minibatch Loss= 0.213441, Training Accuracy= 0.91406


 33%|██████████████████████████▍                                                     | 258/782 [00:09<00:18, 27.86it/s]

Iter 260.0, Minibatch Loss= 0.098993, Training Accuracy= 0.96094


 35%|███████████████████████████▌                                                    | 270/782 [00:09<00:18, 28.25it/s]

Iter 270.0, Minibatch Loss= 0.147784, Training Accuracy= 0.95312


 36%|████████████████████████████▌                                                   | 279/782 [00:10<00:18, 27.91it/s]

Iter 280.0, Minibatch Loss= 0.241251, Training Accuracy= 0.93750


 37%|█████████████████████████████▌                                                  | 289/782 [00:10<00:17, 27.93it/s]

Iter 290.0, Minibatch Loss= 0.168983, Training Accuracy= 0.93750


 38%|██████████████████████████████▌                                                 | 299/782 [00:10<00:17, 27.51it/s]

Iter 300.0, Minibatch Loss= 0.151408, Training Accuracy= 0.96094


 40%|███████████████████████████████▌                                                | 309/782 [00:11<00:16, 27.98it/s]

Iter 310.0, Minibatch Loss= 0.178480, Training Accuracy= 0.94531


 41%|████████████████████████████████▋                                               | 319/782 [00:11<00:16, 28.01it/s]

Iter 320.0, Minibatch Loss= 0.133403, Training Accuracy= 0.96875


 42%|█████████████████████████████████▋                                              | 329/782 [00:12<00:16, 27.47it/s]

Iter 330.0, Minibatch Loss= 0.128398, Training Accuracy= 0.94531


 43%|██████████████████████████████████▌                                             | 338/782 [00:12<00:16, 27.66it/s]

Iter 340.0, Minibatch Loss= 0.237172, Training Accuracy= 0.93750


 45%|███████████████████████████████████▌                                            | 348/782 [00:12<00:15, 27.80it/s]

Iter 350.0, Minibatch Loss= 0.153294, Training Accuracy= 0.94531


 46%|████████████████████████████████████▌                                           | 357/782 [00:13<00:15, 27.59it/s]

Iter 360.0, Minibatch Loss= 0.163832, Training Accuracy= 0.96094


 47%|█████████████████████████████████████▋                                          | 368/782 [00:13<00:14, 28.41it/s]

Iter 370.0, Minibatch Loss= 0.191704, Training Accuracy= 0.94531


 48%|██████████████████████████████████████▋                                         | 378/782 [00:13<00:14, 28.09it/s]

Iter 380.0, Minibatch Loss= 0.235662, Training Accuracy= 0.93750


 50%|███████████████████████████████████████▋                                        | 388/782 [00:14<00:14, 28.10it/s]

Iter 390.0, Minibatch Loss= 0.169040, Training Accuracy= 0.93750


 51%|████████████████████████████████████████▌                                       | 397/782 [00:14<00:13, 27.61it/s]

Iter 400.0, Minibatch Loss= 0.136219, Training Accuracy= 0.95312


 52%|█████████████████████████████████████████▋                                      | 407/782 [00:14<00:13, 27.33it/s]

Iter 410.0, Minibatch Loss= 0.132727, Training Accuracy= 0.96094


 53%|██████████████████████████████████████████▋                                     | 417/782 [00:15<00:13, 27.81it/s]

Iter 420.0, Minibatch Loss= 0.106129, Training Accuracy= 0.95312


 55%|███████████████████████████████████████████▊                                    | 428/782 [00:15<00:12, 28.06it/s]

Iter 430.0, Minibatch Loss= 0.126348, Training Accuracy= 0.96094


 56%|████████████████████████████████████████████▊                                   | 438/782 [00:16<00:12, 28.12it/s]

Iter 440.0, Minibatch Loss= 0.121810, Training Accuracy= 0.95312


 57%|█████████████████████████████████████████████▊                                  | 448/782 [00:16<00:11, 28.44it/s]

Iter 450.0, Minibatch Loss= 0.153901, Training Accuracy= 0.96875


 59%|██████████████████████████████████████████████▊                                 | 458/782 [00:16<00:11, 28.23it/s]

Iter 460.0, Minibatch Loss= 0.121236, Training Accuracy= 0.96094


 60%|███████████████████████████████████████████████▊                                | 467/782 [00:17<00:11, 27.61it/s]

Iter 470.0, Minibatch Loss= 0.093831, Training Accuracy= 0.96094


 61%|████████████████████████████████████████████████▊                               | 477/782 [00:17<00:10, 27.99it/s]

Iter 480.0, Minibatch Loss= 0.096324, Training Accuracy= 0.96875


 62%|█████████████████████████████████████████████████▉                              | 488/782 [00:17<00:10, 28.38it/s]

Iter 490.0, Minibatch Loss= 0.101955, Training Accuracy= 0.97656


 64%|██████████████████████████████████████████████████▉                             | 498/782 [00:18<00:10, 28.07it/s]

Iter 500.0, Minibatch Loss= 0.143208, Training Accuracy= 0.96094


 65%|███████████████████████████████████████████████████▉                            | 508/782 [00:18<00:09, 28.18it/s]

Iter 510.0, Minibatch Loss= 0.091501, Training Accuracy= 0.96875


 66%|████████████████████████████████████████████████████▉                           | 518/782 [00:18<00:09, 27.96it/s]

Iter 520.0, Minibatch Loss= 0.098981, Training Accuracy= 0.96875


 68%|██████████████████████████████████████████████████████                          | 528/782 [00:19<00:09, 27.87it/s]

Iter 530.0, Minibatch Loss= 0.142295, Training Accuracy= 0.94531


 69%|███████████████████████████████████████████████████████                         | 538/782 [00:19<00:08, 28.44it/s]

Iter 540.0, Minibatch Loss= 0.090378, Training Accuracy= 0.96875


 70%|████████████████████████████████████████████████████████                        | 548/782 [00:20<00:08, 28.25it/s]

Iter 550.0, Minibatch Loss= 0.223711, Training Accuracy= 0.93750


 71%|█████████████████████████████████████████████████████████                       | 558/782 [00:20<00:07, 28.03it/s]

Iter 560.0, Minibatch Loss= 0.122071, Training Accuracy= 0.94531


 73%|██████████████████████████████████████████████████████████                      | 567/782 [00:20<00:07, 27.61it/s]

Iter 570.0, Minibatch Loss= 0.122561, Training Accuracy= 0.95312


 74%|███████████████████████████████████████████████████████████▏                    | 578/782 [00:21<00:07, 28.04it/s]

Iter 580.0, Minibatch Loss= 0.158404, Training Accuracy= 0.94531


 75%|████████████████████████████████████████████████████████████                    | 587/782 [00:21<00:07, 27.75it/s]

Iter 590.0, Minibatch Loss= 0.091316, Training Accuracy= 0.97656


 76%|█████████████████████████████████████████████████████████████▏                  | 598/782 [00:21<00:06, 28.13it/s]

Iter 600.0, Minibatch Loss= 0.127820, Training Accuracy= 0.94531


 78%|██████████████████████████████████████████████████████████████                  | 607/782 [00:22<00:06, 27.79it/s]

Iter 610.0, Minibatch Loss= 0.152387, Training Accuracy= 0.96094


 79%|███████████████████████████████████████████████████████████████▏                | 618/782 [00:22<00:05, 28.57it/s]

Iter 620.0, Minibatch Loss= 0.109959, Training Accuracy= 0.95312


 80%|████████████████████████████████████████████████████████████████▏               | 628/782 [00:22<00:05, 28.32it/s]

Iter 630.0, Minibatch Loss= 0.144572, Training Accuracy= 0.92969


 82%|█████████████████████████████████████████████████████████████████▎              | 638/782 [00:23<00:05, 28.33it/s]

Iter 640.0, Minibatch Loss= 0.064375, Training Accuracy= 0.97656


 83%|██████████████████████████████████████████████████████████████████▎             | 648/782 [00:23<00:04, 28.19it/s]

Iter 650.0, Minibatch Loss= 0.136804, Training Accuracy= 0.96094


 84%|███████████████████████████████████████████████████████████████████▎            | 658/782 [00:24<00:04, 28.32it/s]

Iter 660.0, Minibatch Loss= 0.085824, Training Accuracy= 0.97656


 85%|████████████████████████████████████████████████████████████████████▎           | 668/782 [00:24<00:04, 27.95it/s]

Iter 670.0, Minibatch Loss= 0.097857, Training Accuracy= 0.96094


 87%|█████████████████████████████████████████████████████████████████████▎          | 677/782 [00:24<00:03, 27.84it/s]

Iter 680.0, Minibatch Loss= 0.190326, Training Accuracy= 0.94531


 88%|██████████████████████████████████████████████████████████████████████▍         | 688/782 [00:25<00:03, 28.61it/s]

Iter 690.0, Minibatch Loss= 0.182765, Training Accuracy= 0.92969


 89%|███████████████████████████████████████████████████████████████████████▍        | 698/782 [00:25<00:02, 28.25it/s]

Iter 700.0, Minibatch Loss= 0.149086, Training Accuracy= 0.96094


 91%|████████████████████████████████████████████████████████████████████████▍       | 708/782 [00:25<00:02, 28.01it/s]

Iter 710.0, Minibatch Loss= 0.059656, Training Accuracy= 0.98438


 92%|█████████████████████████████████████████████████████████████████████████▎      | 717/782 [00:26<00:02, 27.76it/s]

Iter 720.0, Minibatch Loss= 0.165367, Training Accuracy= 0.94531


 93%|██████████████████████████████████████████████████████████████████████████▍     | 728/782 [00:26<00:01, 27.98it/s]

Iter 730.0, Minibatch Loss= 0.089388, Training Accuracy= 0.97656


 94%|███████████████████████████████████████████████████████████████████████████▍    | 738/782 [00:26<00:01, 28.28it/s]

Iter 740.0, Minibatch Loss= 0.092548, Training Accuracy= 0.96094


 96%|████████████████████████████████████████████████████████████████████████████▌   | 748/782 [00:27<00:01, 28.53it/s]

Iter 750.0, Minibatch Loss= 0.108890, Training Accuracy= 0.96875


 97%|█████████████████████████████████████████████████████████████████████████████▌  | 758/782 [00:27<00:00, 28.22it/s]

Iter 760.0, Minibatch Loss= 0.085533, Training Accuracy= 0.97656


 98%|██████████████████████████████████████████████████████████████████████████████▌ | 768/782 [00:28<00:00, 28.33it/s]

Iter 770.0, Minibatch Loss= 0.077195, Training Accuracy= 0.96875


 99%|███████████████████████████████████████████████████████████████████████████████▌| 778/782 [00:28<00:00, 28.35it/s]

Iter 780.0, Minibatch Loss= 0.062690, Training Accuracy= 0.98438


100%|████████████████████████████████████████████████████████████████████████████████| 782/782 [00:28<00:00, 27.37it/s]


EPOCH  6


  0%|                                                                                          | 0/782 [00:00<?, ?it/s]

Iter 0.0, Minibatch Loss= 0.148197, Training Accuracy= 0.94531


  1%|▉                                                                                 | 9/782 [00:00<00:33, 23.25it/s]

Iter 10.0, Minibatch Loss= 0.040682, Training Accuracy= 0.99219


  2%|█▉                                                                               | 19/782 [00:00<00:29, 25.99it/s]

Iter 20.0, Minibatch Loss= 0.108493, Training Accuracy= 0.95312


  4%|███                                                                              | 29/782 [00:01<00:27, 26.92it/s]

Iter 30.0, Minibatch Loss= 0.066361, Training Accuracy= 0.97656


  5%|███▉                                                                             | 38/782 [00:01<00:26, 27.67it/s]

Iter 40.0, Minibatch Loss= 0.094949, Training Accuracy= 0.97656


  6%|█████▏                                                                           | 50/782 [00:01<00:25, 28.39it/s]

Iter 50.0, Minibatch Loss= 0.125903, Training Accuracy= 0.97656


  8%|██████▏                                                                          | 60/782 [00:02<00:25, 28.04it/s]

Iter 60.0, Minibatch Loss= 0.072621, Training Accuracy= 0.98438


  9%|██████▉                                                                          | 67/782 [00:02<00:26, 27.46it/s]

Iter 70.0, Minibatch Loss= 0.073426, Training Accuracy= 0.97656


 10%|████████                                                                         | 78/782 [00:02<00:24, 28.43it/s]

Iter 80.0, Minibatch Loss= 0.070194, Training Accuracy= 0.98438


 11%|█████████                                                                        | 88/782 [00:03<00:24, 28.00it/s]

Iter 90.0, Minibatch Loss= 0.090071, Training Accuracy= 0.96875


 12%|██████████                                                                       | 97/782 [00:03<00:24, 28.10it/s]

Iter 100.0, Minibatch Loss= 0.112079, Training Accuracy= 0.95312


 14%|███████████                                                                     | 108/782 [00:03<00:23, 28.53it/s]

Iter 110.0, Minibatch Loss= 0.043264, Training Accuracy= 0.99219


 15%|████████████                                                                    | 118/782 [00:04<00:24, 27.60it/s]

Iter 120.0, Minibatch Loss= 0.151265, Training Accuracy= 0.95312


 17%|█████████████▎                                                                  | 130/782 [00:04<00:23, 28.21it/s]

Iter 130.0, Minibatch Loss= 0.148675, Training Accuracy= 0.93750


 18%|██████████████▎                                                                 | 140/782 [00:05<00:22, 28.06it/s]

Iter 140.0, Minibatch Loss= 0.071812, Training Accuracy= 0.98438


 19%|███████████████▎                                                                | 150/782 [00:05<00:22, 28.19it/s]

Iter 150.0, Minibatch Loss= 0.076807, Training Accuracy= 0.96875


 20%|████████████████                                                                | 157/782 [00:05<00:22, 27.58it/s]

Iter 160.0, Minibatch Loss= 0.105998, Training Accuracy= 0.95312


 21%|█████████████████▏                                                              | 168/782 [00:06<00:21, 28.14it/s]

Iter 170.0, Minibatch Loss= 0.146027, Training Accuracy= 0.94531


 23%|██████████████████                                                              | 177/782 [00:06<00:21, 27.83it/s]

Iter 180.0, Minibatch Loss= 0.080770, Training Accuracy= 0.96875


 24%|███████████████████▏                                                            | 188/782 [00:06<00:20, 28.51it/s]

Iter 190.0, Minibatch Loss= 0.135797, Training Accuracy= 0.97656


 25%|████████████████████▎                                                           | 198/782 [00:07<00:20, 28.37it/s]

Iter 200.0, Minibatch Loss= 0.138992, Training Accuracy= 0.96094


 27%|█████████████████████▎                                                          | 208/782 [00:07<00:20, 28.20it/s]

Iter 210.0, Minibatch Loss= 0.067422, Training Accuracy= 0.98438


 28%|██████████████████████▏                                                         | 217/782 [00:07<00:20, 27.83it/s]

Iter 220.0, Minibatch Loss= 0.083605, Training Accuracy= 0.99219


 29%|███████████████████████▎                                                        | 228/782 [00:08<00:19, 28.46it/s]

Iter 230.0, Minibatch Loss= 0.124432, Training Accuracy= 0.95312


 30%|████████████████████████▎                                                       | 238/782 [00:08<00:19, 28.15it/s]

Iter 240.0, Minibatch Loss= 0.111148, Training Accuracy= 0.97656


 32%|█████████████████████████▎                                                      | 248/782 [00:09<00:19, 27.91it/s]

Iter 250.0, Minibatch Loss= 0.103000, Training Accuracy= 0.96875


 33%|██████████████████████████▍                                                     | 258/782 [00:09<00:18, 27.96it/s]

Iter 260.0, Minibatch Loss= 0.117126, Training Accuracy= 0.93750


 34%|███████████████████████████▎                                                    | 267/782 [00:09<00:18, 27.72it/s]

Iter 270.0, Minibatch Loss= 0.099782, Training Accuracy= 0.96094


 35%|████████████████████████████▎                                                   | 277/782 [00:10<00:18, 27.95it/s]

Iter 280.0, Minibatch Loss= 0.043905, Training Accuracy= 0.99219


 37%|█████████████████████████████▍                                                  | 288/782 [00:10<00:17, 28.51it/s]

Iter 290.0, Minibatch Loss= 0.056547, Training Accuracy= 0.98438


 38%|██████████████████████████████▍                                                 | 297/782 [00:10<00:17, 28.19it/s]

Iter 300.0, Minibatch Loss= 0.032833, Training Accuracy= 1.00000


 39%|███████████████████████████████▌                                                | 308/782 [00:11<00:16, 28.45it/s]

Iter 310.0, Minibatch Loss= 0.067763, Training Accuracy= 0.97656


 41%|████████████████████████████████▌                                               | 318/782 [00:11<00:16, 28.32it/s]

Iter 320.0, Minibatch Loss= 0.120424, Training Accuracy= 0.95312


 42%|█████████████████████████████████▌                                              | 328/782 [00:11<00:16, 28.10it/s]

Iter 330.0, Minibatch Loss= 0.112510, Training Accuracy= 0.97656


 43%|██████████████████████████████████▌                                             | 338/782 [00:12<00:15, 28.36it/s]

Iter 340.0, Minibatch Loss= 0.096497, Training Accuracy= 0.96094


 45%|███████████████████████████████████▌                                            | 348/782 [00:12<00:15, 28.33it/s]

Iter 350.0, Minibatch Loss= 0.109611, Training Accuracy= 0.96094


 46%|████████████████████████████████████▌                                           | 358/782 [00:13<00:14, 28.36it/s]

Iter 360.0, Minibatch Loss= 0.102044, Training Accuracy= 0.97656


 47%|█████████████████████████████████████▋                                          | 368/782 [00:13<00:14, 28.58it/s]

Iter 370.0, Minibatch Loss= 0.029453, Training Accuracy= 0.99219


 48%|██████████████████████████████████████▋                                         | 378/782 [00:13<00:14, 28.59it/s]

Iter 380.0, Minibatch Loss= 0.063182, Training Accuracy= 0.98438


 50%|███████████████████████████████████████▋                                        | 388/782 [00:14<00:14, 27.86it/s]

Iter 390.0, Minibatch Loss= 0.131112, Training Accuracy= 0.96875


 51%|████████████████████████████████████████▋                                       | 398/782 [00:14<00:13, 28.29it/s]

Iter 400.0, Minibatch Loss= 0.104210, Training Accuracy= 0.96094


 52%|█████████████████████████████████████████▋                                      | 407/782 [00:14<00:13, 27.92it/s]

Iter 410.0, Minibatch Loss= 0.103662, Training Accuracy= 0.96094


 53%|██████████████████████████████████████████▋                                     | 417/782 [00:15<00:12, 28.11it/s]

Iter 420.0, Minibatch Loss= 0.084903, Training Accuracy= 0.96875


 55%|███████████████████████████████████████████▉                                    | 429/782 [00:15<00:12, 28.99it/s]

Iter 430.0, Minibatch Loss= 0.234315, Training Accuracy= 0.91406


 56%|████████████████████████████████████████████▉                                   | 439/782 [00:15<00:12, 28.22it/s]

Iter 440.0, Minibatch Loss= 0.128037, Training Accuracy= 0.95312


 57%|█████████████████████████████████████████████▊                                  | 448/782 [00:16<00:11, 27.97it/s]

Iter 450.0, Minibatch Loss= 0.101273, Training Accuracy= 0.96875


 59%|██████████████████████████████████████████████▊                                 | 458/782 [00:16<00:11, 27.85it/s]

Iter 460.0, Minibatch Loss= 0.057630, Training Accuracy= 0.96875


 60%|███████████████████████████████████████████████▊                                | 467/782 [00:16<00:11, 27.77it/s]

Iter 470.0, Minibatch Loss= 0.120291, Training Accuracy= 0.96094


 61%|████████████████████████████████████████████████▉                               | 478/782 [00:17<00:10, 28.44it/s]

Iter 480.0, Minibatch Loss= 0.029928, Training Accuracy= 1.00000


 62%|█████████████████████████████████████████████████▉                              | 488/782 [00:17<00:10, 28.28it/s]

Iter 490.0, Minibatch Loss= 0.070283, Training Accuracy= 0.97656


 64%|██████████████████████████████████████████████████▊                             | 497/782 [00:18<00:10, 27.66it/s]

Iter 500.0, Minibatch Loss= 0.033757, Training Accuracy= 0.98438


 65%|███████████████████████████████████████████████████▉                            | 508/782 [00:18<00:09, 28.06it/s]

Iter 510.0, Minibatch Loss= 0.098988, Training Accuracy= 0.96094


 66%|████████████████████████████████████████████████████▉                           | 518/782 [00:18<00:09, 28.29it/s]

Iter 520.0, Minibatch Loss= 0.087416, Training Accuracy= 0.94531


 68%|██████████████████████████████████████████████████████                          | 528/782 [00:19<00:09, 27.90it/s]

Iter 530.0, Minibatch Loss= 0.041838, Training Accuracy= 0.99219


 69%|███████████████████████████████████████████████████████                         | 538/782 [00:19<00:08, 27.81it/s]

Iter 540.0, Minibatch Loss= 0.136873, Training Accuracy= 0.96094


 70%|████████████████████████████████████████████████████████                        | 548/782 [00:19<00:08, 28.20it/s]

Iter 550.0, Minibatch Loss= 0.103782, Training Accuracy= 0.97656


 71%|█████████████████████████████████████████████████████████                       | 558/782 [00:20<00:07, 28.68it/s]

Iter 560.0, Minibatch Loss= 0.057381, Training Accuracy= 0.97656


 73%|██████████████████████████████████████████████████████████                      | 568/782 [00:20<00:07, 28.45it/s]

Iter 570.0, Minibatch Loss= 0.093064, Training Accuracy= 0.98438


 74%|███████████████████████████████████████████████████████████▏                    | 578/782 [00:20<00:07, 28.14it/s]

Iter 580.0, Minibatch Loss= 0.164421, Training Accuracy= 0.95312


 75%|████████████████████████████████████████████████████████████▎                   | 590/782 [00:21<00:06, 28.40it/s]

Iter 590.0, Minibatch Loss= 0.056884, Training Accuracy= 0.98438


 77%|█████████████████████████████████████████████████████████████▍                  | 600/782 [00:21<00:06, 28.55it/s]

Iter 600.0, Minibatch Loss= 0.171218, Training Accuracy= 0.94531


 78%|██████████████████████████████████████████████████████████████▎                 | 609/782 [00:22<00:06, 27.48it/s]

Iter 610.0, Minibatch Loss= 0.105378, Training Accuracy= 0.96094


 79%|███████████████████████████████████████████████████████████████▏                | 618/782 [00:22<00:05, 27.44it/s]

Iter 620.0, Minibatch Loss= 0.050571, Training Accuracy= 0.99219


 80%|████████████████████████████████████████████████████████████████▏               | 628/782 [00:22<00:05, 28.01it/s]

Iter 630.0, Minibatch Loss= 0.107571, Training Accuracy= 0.97656


 82%|█████████████████████████████████████████████████████████████████▎              | 638/782 [00:23<00:05, 28.21it/s]

Iter 640.0, Minibatch Loss= 0.045908, Training Accuracy= 0.99219


 83%|██████████████████████████████████████████████████████████████████▎             | 648/782 [00:23<00:04, 28.12it/s]

Iter 650.0, Minibatch Loss= 0.048303, Training Accuracy= 0.99219


 84%|███████████████████████████████████████████████████████████████████▍            | 659/782 [00:23<00:04, 28.42it/s]

Iter 660.0, Minibatch Loss= 0.161266, Training Accuracy= 0.95312


 86%|████████████████████████████████████████████████████████████████████▍           | 669/782 [00:24<00:03, 28.34it/s]

Iter 670.0, Minibatch Loss= 0.066167, Training Accuracy= 0.97656


 87%|█████████████████████████████████████████████████████████████████████▍          | 679/782 [00:24<00:03, 28.24it/s]

Iter 680.0, Minibatch Loss= 0.110786, Training Accuracy= 0.98438


 88%|██████████████████████████████████████████████████████████████████████▍         | 688/782 [00:24<00:03, 27.17it/s]

Iter 690.0, Minibatch Loss= 0.087423, Training Accuracy= 0.96094


 89%|███████████████████████████████████████████████████████████████████████▍        | 698/782 [00:25<00:02, 28.04it/s]

Iter 700.0, Minibatch Loss= 0.210317, Training Accuracy= 0.92188


 91%|████████████████████████████████████████████████████████████████████████▍       | 708/782 [00:25<00:02, 28.10it/s]

Iter 710.0, Minibatch Loss= 0.344357, Training Accuracy= 0.88281


 92%|█████████████████████████████████████████████████████████████████████████▎      | 717/782 [00:26<00:02, 28.16it/s]

Iter 720.0, Minibatch Loss= 0.197067, Training Accuracy= 0.91406


 93%|██████████████████████████████████████████████████████████████████████████▍     | 728/782 [00:26<00:01, 28.66it/s]

Iter 730.0, Minibatch Loss= 0.162904, Training Accuracy= 0.96094


 94%|███████████████████████████████████████████████████████████████████████████▍    | 738/782 [00:26<00:01, 28.46it/s]

Iter 740.0, Minibatch Loss= 0.123802, Training Accuracy= 0.97656


 96%|████████████████████████████████████████████████████████████████████████████▍   | 747/782 [00:27<00:01, 27.22it/s]

Iter 750.0, Minibatch Loss= 0.069127, Training Accuracy= 0.98438


 97%|█████████████████████████████████████████████████████████████████████████████▌  | 758/782 [00:27<00:00, 28.11it/s]

Iter 760.0, Minibatch Loss= 0.073016, Training Accuracy= 0.96875


 98%|██████████████████████████████████████████████████████████████████████████████▌ | 768/782 [00:27<00:00, 27.91it/s]

Iter 770.0, Minibatch Loss= 0.093306, Training Accuracy= 0.96875


 99%|███████████████████████████████████████████████████████████████████████████████▌| 778/782 [00:28<00:00, 27.30it/s]

Iter 780.0, Minibatch Loss= 0.042524, Training Accuracy= 0.99219


100%|████████████████████████████████████████████████████████████████████████████████| 782/782 [00:28<00:00, 27.52it/s]


In [26]:
evidences = ["Two spectators are kickboxing and some people are watching"]

hypotheses = ["Two people are kickboxing and spectators are watching"]

sentence1 = [fit_to_size(np.vstack(sentence2sequence(evidence)[0]),
                         (30, 50)) for evidence in evidences]

sentence2 = [fit_to_size(np.vstack(sentence2sequence(hypothesis)[0]),
                         (30,50)) for hypothesis in hypotheses]

prediction = sess.run(classification_scores, feed_dict={hyp: (sentence1 * N),
                                                        evi: (sentence2 * N),
                                                        y:[[0,0,0]]*N})
print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
      " entailment")
print("prediction",prediction[0])

  


Neutral entailment
prediction [-0.04078504  0.34014255 -0.4341808 ]


In [27]:
evidences = ["There is no boy playing outdoors and there is no man smiling"]

hypotheses = ["A group of kids is playing in a yard and an old man is standing in the background"]

sentence1 = [fit_to_size(np.vstack(sentence2sequence(evidence)[0]),
                         (30, 50)) for evidence in evidences]

sentence2 = [fit_to_size(np.vstack(sentence2sequence(hypothesis)[0]),
                         (30,50)) for hypothesis in hypotheses]

prediction = sess.run(classification_scores, feed_dict={hyp: (sentence1 * N),
                                                        evi: (sentence2 * N),
                                                        y:[[0,0,0]]*N})
print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
      " entailment")
print("prediction",prediction[0])

evidences = ["A man is performing a trick on a green bicycle"]

hypotheses = ["A man dressed in black is riding a bike"]

sentence1 = [fit_to_size(np.vstack(sentence2sequence(evidence)[0]),
                         (30, 50)) for evidence in evidences]

sentence2 = [fit_to_size(np.vstack(sentence2sequence(hypothesis)[0]),
                         (30,50)) for hypothesis in hypotheses]

prediction = sess.run(classification_scores, feed_dict={hyp: (sentence1 * N),
                                                        evi: (sentence2 * N),
                                                        y:[[0,0,0]]*N})
print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
      " entailment")
print("prediction",prediction[0])

evidences = ["A man is performing a trick on a green bicycle"]

hypotheses = ["A man is not performing a trick on a green bicyle"]

sentence1 = [fit_to_size(np.vstack(sentence2sequence(evidence)[0]),
                         (30, 50)) for evidence in evidences]

sentence2 = [fit_to_size(np.vstack(sentence2sequence(hypothesis)[0]),
                         (30,50)) for hypothesis in hypotheses]

prediction = sess.run(classification_scores, feed_dict={hyp: (sentence1 * N),
                                                        evi: (sentence2 * N),
                                                        y:[[0,0,0]]*N})
print(["Positive", "Neutral", "Negative"][np.argmax(prediction[0])]+
      " entailment")
print("prediction",prediction[0])

  


Neutral entailment
prediction [-2.3046298  5.136812  -3.564967 ]
Neutral entailment
prediction [-0.41946328  0.98137367 -1.1349845 ]
Positive entailment
prediction [ 0.7671622  -0.7415304  -0.78941536]


In [28]:
def process_test_data():
    with open("SICK_test.txt","r") as data:
        train = csv.DictReader(data, delimiter='\t')
        hyp_sentences = []
        evi_sentences = []
        for row in train:
            hyp_sentences.append(row["sentence_A"])
            evi_sentences.append(row["sentence_B"])

        return (hyp_sentences, evi_sentences)

hyp_test, evi_test = process_test_data()

In [29]:
#generate predictions on the whole test set
results = []

for i in range(len(hyp_test)):
    evidences = []
    hypotheses = []
    evidences.append(evi_test[i])
    hypotheses.append(hyp_test[i])
    
    sentence1 = [fit_to_size(np.vstack(sentence2sequence(evidence)[0]),
                         (30, 50)) for evidence in evidences]

    sentence2 = [fit_to_size(np.vstack(sentence2sequence(hypothesis)[0]),
                         (30,50)) for hypothesis in hypotheses]
    
    prediction = sess.run(classification_scores, feed_dict={hyp: (sentence1 * N),
                                                        evi: (sentence2 * N),
                                                        y:[[0,0,0]]*N})

    results.append(score_setup(["ENTAILMENT", "NEUTRAL", "CONTRADICTION"][np.argmax(prediction[0])]))

    

  


In [30]:
#get test correct labels
t_label = []
with open("SICK_test_annotated.txt","r") as data:
    train = csv.DictReader(data, delimiter='\t')
    for row in train:
        t_label.append(score_setup(row["entailment_judgment"]))

test_labels = np.array(t_label)

In [31]:
# calculate accuracy of predictions
results = np.array(results)
print(results)
print(test_labels.shape)
print(results[0])
correct = 0

for i in range(results.shape[0]):
    if(np.array_equal(results[i],test_labels[i])):
        correct = correct + 1
acc = correct / results.shape[0]

print("ACCURACY IS ", acc * 100, "%");


[[0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 ...
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]
(4927, 3)
[0. 1. 0.]
ACCURACY IS  60.604830525674856 %


In [None]:
sess.close()