In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import random
import json
import os
import time
import tensorflow as tf
from tensorflow.contrib import rnn


import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
#from sklearn.model_selection import train_test_split
import tensorflow.contrib.legacy_seq2seq as seq2seq
from utilities import show_graph
#from util import inv_sigmoid, linear_decay, dec_print_train, dec_print_val, dec_print_test

import unicodedata
import re
import numpy as np
import os
import io
import time
import collections
import json
import string

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
random.seed(0)
np.random.seed(0)
tf.set_random_seed(0)

n_inputs        = 4096
n_hidden        = 600
val_batch_size  = 100 #100
n_frames        = 80
max_caption_len = 50
forget_bias_red = 1.0
forget_bias_gre = 1.0
dropout_prob    = 0.5

special_tokens  = {'<PAD>': 0, '<BOS>': 1, '<EOS>': 2, '<UNK>': 3}
phases = {'train': 0, 'val': 1, 'test': 2}

In [3]:
class S2VT:
    def __init__(self, vocab_num = 0,lr = 1e-4):

        self.vocab_num = vocab_num
        self.learning_rate = lr

     
    def build_model(self, feat, captions=None, cap_len=None, sampling=None, phase=0):

        weights = {
            'W_feat': tf.Variable( tf.random_uniform([n_inputs, n_hidden], -0.1, 0.1), name='W_feat'), 
            'W_dec': tf.Variable(tf.random_uniform([n_hidden, self.vocab_num], -0.1, 0.1), name='W_dec')
        }
        biases = {
            'b_feat':  tf.Variable( tf.zeros([n_hidden]), name='b_feat'),
            'b_dec': tf.Variable(tf.zeros([self.vocab_num]), name='b_dec')
        }   
        embeddings = {
         'emb': tf.Variable(tf.random_uniform([self.vocab_num, n_hidden], -0.1, 0.1), name='emb')
        }

        batch_size = tf.shape(feat)[0]

        if phase != phases['test']:
            # cap_len: (250, 1) -> (250, 50)
            cap_mask = tf.sequence_mask(cap_len, max_caption_len, dtype=tf.float32)
     
        if phase == phases['train']: #  add noise
            noise = tf.random_uniform(tf.shape(feat), -0.1, 0.1, dtype=tf.float32)
            feat = feat + noise

        if phase == phases['train']:
            feat = tf.nn.dropout(feat, dropout_prob)

        feat = tf.reshape(feat, [-1, n_inputs])
        image_emb = tf.matmul(feat, weights['W_feat']) + biases['b_feat']
        image_emb = tf.reshape(image_emb, [-1, n_frames, n_hidden])
        image_emb = tf.transpose(image_emb, perm=[1, 0, 2])
        
        with tf.variable_scope('LSTM1'):
            lstm_red = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=forget_bias_red, state_is_tuple=True)
            if phase == phases['train']:
                lstm_red = tf.contrib.rnn.DropoutWrapper(lstm_red, output_keep_prob=dropout_prob)    
        with tf.variable_scope('LSTM2'):
            lstm_gre = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=forget_bias_gre, state_is_tuple=True)
            if phase == phases['train']:
                lstm_gre = tf.contrib.rnn.DropoutWrapper(lstm_gre, output_keep_prob=dropout_prob)    

        state_red = lstm_red.zero_state(batch_size, dtype=tf.float32)
        state_gre = lstm_gre.zero_state(batch_size, dtype=tf.float32)

        padding = tf.zeros([batch_size, n_hidden])

        h_src = []
        for i in range(0, n_frames):
            with tf.variable_scope("LSTM1"):
                output_red, state_red = lstm_red(image_emb[i,:,:], state_red)
            
            with tf.variable_scope("LSTM2"):
                output_gre, state_gre = lstm_gre(tf.concat([padding, output_red], axis=1), state_gre)
                h_src.append(output_gre) # even though padding is augmented, output_gre/state_gre's shape not change

        h_src = tf.stack(h_src, axis = 0)

        bos = tf.ones([batch_size, n_hidden])
        padding_in = tf.zeros([batch_size, n_hidden])

        logits = []
        max_prob_index = None

        

        cross_ent_list = []
        for i in range(0, max_caption_len):

            with tf.variable_scope("LSTM1"):
                output_red, state_red = lstm_red(padding_in, state_red)

            if i == 0:
                with tf.variable_scope("LSTM2"):
                    con = tf.concat([bos, output_red], axis=1)
                    output_gre, state_gre = lstm_gre(con, state_gre)
            else:
                if phase == phases['train']:
                    if sampling[i] == True:
                        feed_in = captions[:, i - 1]
                    else:
                        feed_in = tf.argmax(logit_words, 1)
                else:
                    feed_in = tf.argmax(logit_words, 1)
                with tf.device("/cpu:0"):
                    embed_result = tf.nn.embedding_lookup(embeddings['emb'], feed_in)
                with tf.variable_scope("LSTM2"):
                    con = tf.concat([embed_result, output_red], axis=1)
                    output_gre, state_gre = lstm_gre(con, state_gre)

            logit_words = tf.matmul(output_gre, weights['W_dec']) + biases['b_dec']
            logits.append(logit_words)

            if phase != phases['test']:
                labels = captions[:, i]
                one_hot_labels = tf.one_hot(labels, self.vocab_num, on_value = 1, off_value = None, axis = 1) 
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit_words, labels=one_hot_labels)
                cross_entropy = cross_entropy * cap_mask[:, i]
                cross_ent_list.append(cross_entropy)
        
        loss = 0.0
        if phase != phases['test']:
            cross_entropy_tensor = tf.stack(cross_ent_list, 1)
            loss = tf.reduce_sum(cross_entropy_tensor, axis=1)
            loss = tf.divide(loss, tf.cast(cap_len, tf.float32))
            loss = tf.reduce_mean(loss, axis=0)

        logits = tf.stack(logits, axis = 0)
        logits = tf.reshape(logits, (max_caption_len, batch_size, self.vocab_num))
        logits = tf.transpose(logits, [1, 0, 2])
        
        summary = None
        if phase == phases['train']:
            summary = tf.summary.scalar('training_loss', loss)
        elif phase == phases['val']:
            summary = tf.summary.scalar('validation_loss', loss)
            

        return logits, loss, summary

    def inference(self, logits):
        
        #print('using greedy search...')
        dec_pred = tf.argmax(logits, 2)
        return dec_pred

    def optimize(self, loss_op):

        params = tf.trainable_variables()
        optimizer = tf.train.AdamOptimizer(self.learning_rate)#.minimize(loss_op)
        gradients, variables = zip(*optimizer.compute_gradients(loss_op))
        gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
        train_op = optimizer.apply_gradients(zip(gradients, params))

        return train_op

In [4]:
def tokenize(line,token='word'):
    if token == 'word':
        return [line.split(' ')]
    elif token == 'char':
        return [list(line)]
    else:
        print('ERROR: unknown token type '+token)

In [5]:
def count_tokens(tokanized_sentences):
    # Flatten a list of token lists into a list of tokens
    tokens = [tk for line in tokanized_sentences for tk in line]
    return collections.Counter(tokens)

In [6]:
def parse_data_into_lists(filename,batch_size,feat_filepath, index2token, tokens):
    
    with open(filename, 'r') as f:
        datastore = json.load(f)
           
    #sentence_set = extract_sentences(filename)
    
    mult_vids = []
    all_sents = []
    all_enc_sents = []
    all_cap_len = []
    all_ids = []
    
    for data in datastore:
        
        sentences = data["caption"]
        sentences = [word.lower() for word in sentences] #Normalize the case
        table = str.maketrans('', '', string.punctuation) #Normalize the punctuation
        sentences = [word.translate(table) for word in sentences]
        
        num_sent = len(sentences)
        
        all_sents.extend(sentences)
        
        enc_sents = []
        
        for sentence in sentences:
            
            #print(sentence)
            tokenized_sentence, encoded_sentence, cap_len = num_encode(sentence,index2token,tokens)
            #print(tokenized_sentence)
            #print(encoded_sentence)

            #print(encoded_sentence)
            encoded_sentence = list(encoded_sentence)

            enc_sents.append(encoded_sentence)
            all_cap_len.append(cap_len)
            
        all_enc_sents.extend(enc_sents)


#         print(all_sents[0])
#         print(all_enc_sents[0])
#         print(np.shape(all_enc_sents))
#         print(np.shape(all_sents))
#         print(np.shape(all_cap_len))

        #print(len(all_enc_sents))

        #sentence_set[i] = sentences 
        #### Extracting all feature vectors per video
        
        video_id = data["id"]
        features = np.load(feat_filepath.format(video_id))
        
        for n in range(0,num_sent):
            mult_vids.append(features)
            all_ids.append(video_id)
        
        print("id: " + str(data["id"]) + " processed")

            
    return mult_vids, all_sents, all_enc_sents, all_cap_len, all_ids

In [7]:

def parse_data_into_batches(filename,batch_size,feat_filepath, index2token, tokens, mult_vids, all_sents, all_enc_sents, all_cap_len, all_ids):
    print(all_enc_sents[0:10])

    with open(filename, 'r') as f:
        datastore = json.load(f)
           
    vid_batch = {}
    sent_batch = {}
    enc_sent_batch = {}
    cap_len_batch = {}
    id_batch = {}
    #sentence_set = extract_sentences(filename)
    
#     mult_vids = []
#     all_sents = []
#     all_enc_sents = []
#     all_cap_len = []
    
#     for data in datastore:
        
#         sentences = data["caption"]
#         sentences = [word.lower() for word in sentences] #Normalize the case
#         table = str.maketrans('', '', string.punctuation) #Normalize the punctuation
#         sentences = [word.translate(table) for word in sentences]
        
#         num_sent = len(sentences)
        
#         all_sents.extend(sentences)
        
#         for sentence in sentences:
#             tokenized_sentence, encoded_sentence, cap_len = num_encode(sentence,index2token,tokens)
            
#             #print(encoded_sentence)
#             all_enc_sents.append(encoded_sentence)
#             all_cap_len.append(cap_len)

#         #print(all_enc_sents[0])
#         #print(len(all_enc_sents))

#         #sentence_set[i] = sentences 
#         #### Extracting all feature vectors per video
        
#         video_id = data["id"]
#         features = np.load(feat_filepath.format(video_id))
        
#         for n in range(0,num_sent):
#             mult_vids.append(features)
            
#         #print("id: " + str(data["id"]) + " processed")


    random.Random(30).shuffle(mult_vids)
    random.Random(30).shuffle(all_sents)
    random.Random(30).shuffle(all_enc_sents)
    random.Random(30).shuffle(all_cap_len)
    random.Random(30).shuffle(all_ids)
    
    #print(all_enc_sents[0:10])
    
    batches = len(mult_vids)/batch_size
    batches = int(batches)
        
    i = 0
    j = 0
    
    for n in range(0,batches):
        if j not in vid_batch:
            vid_batch[j] = []
            sent_batch[j] = []
            enc_sent_batch[j]=[]
            cap_len_batch[j] = []

            
        vid_batch[j] = mult_vids[i:i+batch_size]
        sent_batch[j] = all_sents[i:i+batch_size]
        enc_sent_batch[j] = all_enc_sents[i:i+batch_size]
        cap_len_batch[j] = all_cap_len[i:i+batch_size]
        id_batch[j] = all_ids[i:i+batch_size]

        print('parsed batch %d' %j)
        i = i+batch_size
        j = j+1
    
#     if j not in vid_batch:
#         vid_batch[j] = []

#     vid_batch[j].append(features)
                
#     i = i+1

#     if i%batch_size == 0:
#         j = j+1 
    #print(sent_batch[0])
    #print(enc_sent_batch[0])
            
    return vid_batch, sent_batch, enc_sent_batch, cap_len_batch, batches, id_batch

In [8]:
def extract_sentences(filename):
    
    sentence_set = {}
    
    with open(filename, 'r') as f:
        datastore = json.load(f)
        
    i = 0
    for data in datastore:
        
        #### Extracting only a single sentence per video into a standalone dict

        sentences = data["caption"]
        sentences = [word.lower() for word in sentences] #Normalize the case
        table = str.maketrans('', '', string.punctuation) #Normalize the punctuation
        sentences = [word.translate(table) for word in sentences]

        sentence_set[i] = sentences #0 for only the first sentence\
        
        i = i+1
        
    return sentence_set

In [9]:
# Mapping string tokens to numertical indices.
def listVocab(sentence_set):
    
    PAD_token = 0
    BOS_token = 1
    EOS_token = 2
    UNK_token = 3
    
    all_tokens = []
    word_count = {}
    token2index = {"<PAD>": 0,"<BOS>":1,"<EOS>":2,"<UNK>":3}
    index2token = {PAD_token: "<PAD>", BOS_token: "<BOS>", EOS_token: "<EOS>", UNK_token: "<UNK>"}
    
    for set_i in sentence_set:
        sentence_set_i = sentence_set[set_i]
        for line in sentence_set_i:
#             line = sentence_set[n]
            tokenized_captions = tokenize(line) #Seperate the words
            all_tokens += tokenized_captions
    
    counter = count_tokens(all_tokens) #Count the word repeatitions in each set
    
    counter_dict = counter.items()
    counter_sort = sorted(counter_dict, key=lambda x:x[1],reverse=True) #sort by frequency of occurance 
    #print(counter_sort)

    i = len(index2token)
    values = [0,1,2,3]
    tokens = ["<PAD>","<BOS>","<EOS>","<UNK>"]
    
    for token, freq in counter_sort:
        word_count[token] = freq
        index2token[i] = token
        token2index[token] = i
        values += [i]
        tokens += [token]
        i+=1
        
    word_count['<PAD>'] = i
    word_count['<BOS>'] = i
    word_count['<EOS>'] = i
    word_count['<UNK>'] = i
    
    bias_init_vector = np.array([1.0 * word_count[ index2token[i] ] for i in index2token])
    bias_init_vector /= np.sum(bias_init_vector) # normalize to frequencies
    bias_init_vector = np.log(bias_init_vector)
    bias_init_vector -= np.max(bias_init_vector) # shift to nice numeric range
    
    return [word_count, tokens, values, token2index, index2token, len(index2token),bias_init_vector]

In [10]:
def flattenList(nestedList,output): 
    for i in nestedList: 
        if type(i) == list: 
            flattenList(i,output) 
        else: 
            output.append(i) 
            
    return output

def num_encode(test_sentence,index2token,tokens,tokenized_sentence=[],num_encoded_sentence=[]):
    
    tokenized_sentence.clear()
    num_encoded_sentence.clear()
    
    tokenized_sentence = ["<BOS>"] + tokenize(test_sentence) + ["<EOS>"]
    #print(tokenized_sentence)
    output=[]
    tokenized_sentence = flattenList(tokenized_sentence,output)
    
    cap_len = len(tokenized_sentence)
    
    while len(tokenized_sentence) < MAX_WORDS:
        tokenized_sentence.append("<PAD>")    
    
    #print(len(tokenized_sentence))
    
    for ind, token in enumerate(tokenized_sentence):
        if token in tokens:
            for i in range(0,len(index2token)):
                if token == index2token[i]: 
                    num_encoded_sentence.append(i) 
                    
            #print("token exists")
        else:
            num_encoded_sentence.append(3)
            tokenized_sentence[ind] = tokens[3]
            #print("token unknown")
            
            
                
    #print(len(num_encoded_sentence))

        
    return tokenized_sentence, num_encoded_sentence, cap_len

In [11]:

# def parse_sentence_data_into_batches(sentence_set, index2token,tokens,batch_size):

#     tokenizedsentence_batch = {}
#     intencode_batch = {}
#     cap_len_batch = {}

#     ii = 0
#     jj = 0  

#     for n in sentence_set:
#         sentence = sentence_set[n]

#         tokenized_sentence,encoded_sentence, cap_len = num_encode(sentence,index2token,tokens)
        
#         #print(np.shape(encoded_sentence))

#         tokenized_sentence = list(tokenized_sentence)
#         encoded_sentence = list(encoded_sentence)

#         if jj not in intencode_batch:
#             #onehot_batch[jj] = []
#             intencode_batch[jj] = []
#             tokenizedsentence_batch[jj] = []
#             cap_len_batch[jj] = []

#         #print(np.shape(onehot_encoded_sentence))    
#         #onehot_batch[jj].append(onehot_encoded_sentence)
#         intencode_batch[jj].append(encoded_sentence)
#         tokenizedsentence_batch[jj].append(tokenized_sentence)
#         cap_len_batch[jj].append(cap_len)

#         ii = ii+1

#         if ii%batch_size == 0:
#             jj = jj+1
            
        
#     return tokenizedsentence_batch, intencode_batch, cap_len_batch

In [12]:
# with open(filename_train, 'r') as f:
#     datastore = json.load(f)
    
# for data in datastore:
#     print(data["id"])
    

In [13]:
filename_train = 'MLDS_hw2_1_data/training_label.json'
feat_filepath_train = "MLDS_hw2_1_data/training_data/feat/{}.npy"

ckpt_path = 'saved_model/trained_model.ckpt'

# forget_bias_red = 1.0
# forget_bias_gre = 1.0
# dropout_prob    = 0.5

batch_size = 250

MAX_WORDS = max_caption_len #max number of words in a caption
n_features = n_inputs
no_of_frames = n_frames
sizeof_sentence= MAX_WORDS
learning_rate = 0.0001
n_hidden = n_hidden

#### PARSE TRAINING DATA #####

# Extracting captions for each video
sentence_set = extract_sentences(filename_train)

word_count, tokens, values, token2index, index2token, n_words, bias_init_vector = listVocab(sentence_set)
print("There are %d unique words in the captions dataset" % n_words)

mult_vids, all_sents, all_enc_sents, all_cap_len, all_ids = parse_data_into_lists(filename_train,\
                                                                         batch_size,feat_filepath_train,\
                                                                         index2token, tokens)

vid_batch, sent_batch, intencode_batch, cap_len_batch, n_batches, id_batch = parse_data_into_batches(filename_train,\
                                                                                           batch_size,\
                                                                                           feat_filepath_train,\
                                                                                           index2token, \
                                                                                           tokens, \
                                                                                           mult_vids, \
                                                                                           all_sents, \
                                                                                           all_enc_sents, \
                                                                                           all_cap_len,\
                                                                                          all_ids)

#Parse Training Data into batches
# vid_batch, sent_batch, intencode_batch, cap_len_batch, n_batches = parse_data_into_batches(filename_train,batch_size,feat_filepath_train,index2token, tokens)

print("The number of videos in the training set are %d and each video has 80 frames with 4096 features/units each" % (n_batches*batch_size))

#tokenizedsentence_batch, intencode_batch, cap_len_batch = parse_sentence_data_into_batches(sentence_set,index2token,tokens,batch_size)

# # integer encode
# label_encoder = LabelEncoder()
# integer_encoded = label_encoder.fit_transform(values)
# integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
# integer_encoded

There are 6061 unique words in the captions dataset
[[1, 4, 8, 726, 444, 4, 54, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 4, 8, 1167, 444, 4, 54, 13, 307, 4, 3726, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 4, 28, 5, 3727, 47, 45, 153, 37, 653, 6, 116, 456, 11, 4, 54, 536, 3728, 9, 45, 153, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 4, 54, 3729, 9, 4, 374, 153, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 4, 54, 3730, 9, 6, 153, 11, 4, 8, 134, 565, 307, 10, 653, 6, 1247, 3731, 1168, 456, 13, 798, 45, 153, 47, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 4, 54, 5, 3732, 9, 4, 374, 153, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [14]:
# #### PARSE TESTING DATA #####

filename_test = 'MLDS_hw2_1_data/testing_label.json'
feat_filepath_test = "MLDS_hw2_1_data/testing_data/feat/{}.npy"
batch_size_test = 100


# #Parse Testing Data into batches
# vid_batch_test, n_batches_test = parse_vid_data_into_batches(filename_test,batch_size,feat_filepath_test)
# print("The number of videos in the test set are %d and each video has 80 frames with 4096 features/units each" % (n_batches_test*batch_size))

# # Extracting captions for each video
# sentence_set_test = extract_sentences(filename_test,feat_filepath_test)
# tokenizedsentence_batch_test, intencode_batch_test, cap_len_batch_test = parse_sentence_data_into_batches(sentence_set_test,index2token,tokens,batch_size)

#### PARSE TRAINING DATA #####

# Extracting captions for each video
sentence_set = extract_sentences(filename_test)

mult_vids, all_sents, all_enc_sents, all_cap_len, all_ids = parse_data_into_lists(filename_test,\
                                                                                  batch_size_test,\
                                                                                  feat_filepath_test,\
                                                                                  index2token,\
                                                                                  tokens)

vid_batch_test, sent_batch_test, intencode_batch_test, cap_len_batch_test, n_batches_test, id_batch_test = parse_data_into_batches(filename_test,\
                                                                                           batch_size_test,\
                                                                                           feat_filepath_test,\
                                                                                           index2token, \
                                                                                           tokens, \
                                                                                           mult_vids, \
                                                                                           all_sents, \
                                                                                           all_enc_sents, \
                                                                                           all_cap_len)


In [15]:
def schedule_sampling(sampling_prob, cap_len_batch):

        sampling = np.ones(max_caption_len, dtype = bool)
        for l in range(max_caption_len):
            if np.random.uniform(0,1,1) < sampling_prob:
                sampling[l] = True
            else:
                sampling[l] = False
         
        sampling[0] = True
        return sampling

In [16]:
def inv_sigmoid(num_epo):

    # 0.88 to 0.12 (-2.0 to 2.0)
    x = np.arange(-2.0, 2.0, (4.0/num_epo))
    y = 1/(1 + np.e**x)
    #y = np.ones(num_epo)
    print(y)
    return y

In [17]:
def pred_print(pred, cap_len, label, idx2word, batch_size, id_batch):
    
    print_this = np.random.randint(0, batch_size)
    seq=[]
    for i in range(0,batch_size):
        eos_pred = max_caption_len - 1
        eos = cap_len[i] - 1
        for j in range(0, max_caption_len):
                if pred[i][j] == special_tokens['<EOS>']:
                    eos_pred = j
                    break
        myid = id_batch[i]
        pre = list( map (lambda x: idx2word[x] , pred[i][0:eos_pred])  )
        lab = list( map (lambda x: idx2word[x] , label[i][0:eos])  )
        
        pre_no_eos = list( map (lambda x: idx2word[x] , pred[i][0:(eos_pred)])  )
        sen = ' '.join([w for w in pre_no_eos])
        seq.append(sen)
        if i in print_me:      
            print('\nid: ' + str(myid) + '\nanswer: ' + str(lab) + '\nprediction: ' + str(pre))
            
    return seq

In [18]:
# samp_prob = inv_sigmoid(num_epochs)
# samp_prob

In [19]:
# samp = schedule_sampling(samp_prob[epo], caption_lens_batch)
# samp

In [None]:
tf.reset_default_graph()

from tqdm import tqdm
train_graph = tf.Graph()
gpu_config = tf.ConfigProto()
gpu_config.gpu_options.allow_growth = True

print('train_graph: start')

vocab_num = n_words
num_epochs = 10
num_display_steps = 15
num_saver_epochs = 3
output_filename = 'output.txt'

with train_graph.as_default():
    feat = tf.placeholder(tf.float32, [None, n_frames, n_inputs], name='video_features')
    captions = tf.placeholder(tf.int32, [None, max_caption_len], name='captions')
    sampling = tf.placeholder(tf.bool, [max_caption_len], name='sampling')
    cap_len = tf.placeholder(tf.int32, [None], name='cap_len')
    model = S2VT(vocab_num=vocab_num, lr=learning_rate)
    logits, loss_op, summary = model.build_model(feat, captions, cap_len, sampling, phases['train'])
    dec_pred = model.inference(logits)
    train_op = model.optimize(loss_op)
    saver = tf.train.Saver(max_to_keep=3)

    init = tf.global_variables_initializer()
train_sess = tf.Session(graph=train_graph, config=gpu_config)


with val_graph.as_default():
    feat_val = tf.placeholder(tf.float32, [None, n_frames, n_inputs], name='video_features')
    captions_val = tf.placeholder(tf.int32, [None, max_caption_len], name='captions')
    cap_len_val = tf.placeholder(tf.int32, [None], name='cap_len')

    model_val = S2VT(vocab_num=vocab_num, with_attention=FLAGS.with_attention, lr=FLAGS.learning_rate)
    logits_val, loss_op_val, summary_val = model_val.build_model(feat_val, 
                captions_val, cap_len_val, phase=phases['val'])
    dec_pred_val = model_val.inference(logits_val)

    val_saver = tf.train.Saver(max_to_keep=3)
    
val_sess = tf.Session(graph=val_graph, config=gpu_config)

In [20]:
 
train_sess.run(init)

samp_prob = inv_sigmoid(num_epochs)
pbar = tqdm(range(0, num_epochs))

loss_list = []

for epo in pbar:
    num_steps = n_batches
    epo_loss = 0
    for i in range(0, num_steps):
        data_batch = np.array(vid_batch[i])
        label_batch = np.array(intencode_batch[i])
        id_batch_train = id_batch[i]
        
        caption_lens_batch = np.array(cap_len_batch[i])
        
        #data_batch, label_batch, caption_lens_batch, id_batch = datasetTrain.next_batch()
        
        samp = schedule_sampling(samp_prob[epo], caption_lens_batch)
        
        if i % num_display_steps == 1:
            # training 
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            _, loss, p, summ = train_sess.run([train_op, loss_op, dec_pred, summary], 
                            feed_dict={feat: data_batch,
                                       captions: label_batch,
                                       cap_len: caption_lens_batch,
                                       sampling: samp},
                            options=run_options)
            
            #print(p)
            
            loss_list.append(loss)
            
            #summary_writer.add_summary(summ, global_step=(epo * num_steps) + i)
            print("\n[Train. Prediction] Epoch " + str(epo) + ", step " + str(i) + "/" + str(num_steps) + "......",)
            
            seq = pred_print(p, caption_lens_batch, label_batch, index2token, batch_size, id_batch_train)

        else:
            _, loss, p = train_sess.run([train_op, loss_op, dec_pred], 
                            feed_dict={feat: data_batch,
                                       captions: label_batch,
                                       cap_len: caption_lens_batch,
                                       sampling: samp})
            
            loss_list.append(loss)


        epo_loss += loss
    
        pbar.set_description("Epoch " + str(epo) + ", step " + str(i) + "/" + str(num_steps) + \
            ", (Training Loss: " + "{:.4f}".format(loss) + \
            ", samp_prob: " + "{:.4f}".format(samp_prob[epo]) + ")" )

    print("\n[FINISHED] Epoch " + str(epo) + ", (Training Loss (per epoch): " + "{:.4f}".format(epo_loss) + " samp_prob: " + "{:.4f}".format(samp_prob[epo]) + ")")

    if epo % num_saver_epoches == 1:
        ckpt_path = saver.save(train_sess,ckpt_path, global_step=(epo * num_steps) + num_steps - 1)
        val_saver.restore(val_sess, ckpt_path)

        num_steps_val = n_batches_test
        epo_loss_val = 0
        txt = open(output_filename, 'w')
        for j in range(0,n_batches_test):
            data_batch_val = np.array(vid_batch_test[i])
            label_batch_val = np.array(intencode_batch_test[i])
            id_batch_val = id_batch_test[i]
            caption_lens_batch_val = np.array(cap_len_batch_test[i])
            
            loss_val, p_val, summ = val_sess.run([loss_op_val, dec_pred_val, summary_val], 
                                        feed_dict={feat_val: data_batch_val,
                                                   captions_val: label_batch_val,
                                                   cap_len_val: caption_lens_batch_val})
            
            seq_val = pred_print(p_val, caption_lens_batch, label_batch_val, index2token, batch_size_test, id_batch_val)
            total_loss_val += loss_val
            
            for k in range(0, batch_size_test):
                    txt.write(id_batch_val[k] + "," + seq_val[k] + "\n")
                    
            print('\nSave file: ' + output_filename)
            txt.close()
            call(['python3', 'MLDS_hw2_1_data/bleu_eval.py', output_filename])
            
            print("Validation: " + str((j+1) * batch_size_test) + "/" + \
                    str(n_batches_test) + ", done..." \
                    + "Total Loss: " + "{:.4f}".format(total_loss_val))


train_graph: start
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


  0%|          | 0/10 [00:00<?, ?it/s]

[0.88079708 0.83201839 0.76852478 0.68997448 0.59868766 0.5
 0.40131234 0.31002552 0.23147522 0.16798161]


Epoch 0, step 0/96, (Training Loss: 8.7123, samp_prob: 0.8808):   0%|          | 0/10 [00:45<?, ?it/s]

0

[FINISHED] Epoch 0, (Training Loss (per epoch): 8.7123 samp_prob: 0.8808)


Epoch 0, step 1/96, (Training Loss: 8.5531, samp_prob: 0.8808):   0%|          | 0/10 [01:31<?, ?it/s]

[[2243 5428 1820 ... 2847 5628 2590]
 [   1  223  256 ... 1080 4948 1686]
 [ 247  329 1904 ... 1397 5851 2069]
 ...
 [ 160 3551 2378 ...  525 2919 3327]
 [1761 4336 4804 ... 2627 2659 1194]
 [3837 6004 1061 ...  557 4713  962]]

[Train. Prediction] Epoch 0, step 1/96......

id: 125
answer: ['<BOS>', 'a', 'woman', 'is', 'applying', 'cosmetics', 'to', 'her', 'eyelid']
prediction: ['occurring', 'compressions', 'boiled', 'cheesy', 'trainer', 'thing', 'cry', 'boiled', 'streaks', 'fours', 'sailboat', 'spinning', 'rolercoaster', 'flour', 'current', 'crackers', 'leopard', 'treatment', 'robot', 'frowns', 'breaddough', 'strumming', 'mixes', 'swimmers', 'floater', 'woke', 'point', 'wreaths', 'dolls', 'pulled', 'shaved', 'transfers', 'sautes', 'superman', 'caressing', 'swallows', 'slam', 'fifth', 'carved', 'persona', 'apart', 'noises', 'playground', 'smeone', 'kicked', 'boulders', 'winning', 'foxes', 'aside']
1

[FINISHED] Epoch 0, (Training Loss (per epoch): 17.2654 samp_prob: 0.8808)


Epoch 0, step 2/96, (Training Loss: 8.3850, samp_prob: 0.8808):   0%|          | 0/10 [01:39<?, ?it/s]

2

[FINISHED] Epoch 0, (Training Loss (per epoch): 25.6505 samp_prob: 0.8808)


Epoch 0, step 3/96, (Training Loss: 8.2120, samp_prob: 0.8808):   0%|          | 0/10 [01:47<?, ?it/s]

3

[FINISHED] Epoch 0, (Training Loss (per epoch): 33.8624 samp_prob: 0.8808)


Epoch 0, step 4/96, (Training Loss: 7.9930, samp_prob: 0.8808):   0%|          | 0/10 [01:54<?, ?it/s]

4

[FINISHED] Epoch 0, (Training Loss (per epoch): 41.8554 samp_prob: 0.8808)


Epoch 0, step 5/96, (Training Loss: 7.8038, samp_prob: 0.8808):   0%|          | 0/10 [02:02<?, ?it/s]

5

[FINISHED] Epoch 0, (Training Loss (per epoch): 49.6592 samp_prob: 0.8808)


Epoch 0, step 6/96, (Training Loss: 7.5463, samp_prob: 0.8808):   0%|          | 0/10 [02:10<?, ?it/s]

6

[FINISHED] Epoch 0, (Training Loss (per epoch): 57.2056 samp_prob: 0.8808)


Epoch 0, step 7/96, (Training Loss: 7.3709, samp_prob: 0.8808):   0%|          | 0/10 [02:17<?, ?it/s]

7

[FINISHED] Epoch 0, (Training Loss (per epoch): 64.5765 samp_prob: 0.8808)


Epoch 0, step 8/96, (Training Loss: 7.0762, samp_prob: 0.8808):   0%|          | 0/10 [02:25<?, ?it/s]

8

[FINISHED] Epoch 0, (Training Loss (per epoch): 71.6527 samp_prob: 0.8808)


Epoch 0, step 9/96, (Training Loss: 6.8165, samp_prob: 0.8808):   0%|          | 0/10 [02:33<?, ?it/s]

9

[FINISHED] Epoch 0, (Training Loss (per epoch): 78.4691 samp_prob: 0.8808)


Epoch 0, step 10/96, (Training Loss: 6.5085, samp_prob: 0.8808):   0%|          | 0/10 [02:40<?, ?it/s]

10

[FINISHED] Epoch 0, (Training Loss (per epoch): 84.9776 samp_prob: 0.8808)


Epoch 0, step 11/96, (Training Loss: 6.3459, samp_prob: 0.8808):   0%|          | 0/10 [02:48<?, ?it/s]

11

[FINISHED] Epoch 0, (Training Loss (per epoch): 91.3236 samp_prob: 0.8808)


Epoch 0, step 12/96, (Training Loss: 6.1301, samp_prob: 0.8808):   0%|          | 0/10 [02:56<?, ?it/s]

12

[FINISHED] Epoch 0, (Training Loss (per epoch): 97.4536 samp_prob: 0.8808)


Epoch 0, step 13/96, (Training Loss: 6.0999, samp_prob: 0.8808):   0%|          | 0/10 [03:03<?, ?it/s]

13

[FINISHED] Epoch 0, (Training Loss (per epoch): 103.5535 samp_prob: 0.8808)


Epoch 0, step 14/96, (Training Loss: 5.9854, samp_prob: 0.8808):   0%|          | 0/10 [03:11<?, ?it/s]

14

[FINISHED] Epoch 0, (Training Loss (per epoch): 109.5389 samp_prob: 0.8808)


Epoch 0, step 15/96, (Training Loss: 5.9059, samp_prob: 0.8808):   0%|          | 0/10 [03:19<?, ?it/s]

15

[FINISHED] Epoch 0, (Training Loss (per epoch): 115.4448 samp_prob: 0.8808)


Epoch 0, step 16/96, (Training Loss: 5.7722, samp_prob: 0.8808):   0%|          | 0/10 [03:27<?, ?it/s]

[[1 4 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]
 ...
 [1 4 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]]

[Train. Prediction] Epoch 0, step 16/96......

id: 124
answer: ['<BOS>', 'the', 'man', 'is', 'singing']
prediction: ['<BOS>', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']
16

[FINISHED] Epoch 0, (Training Loss (per epoch): 121.2169 samp_prob: 0.8808)


Epoch 0, step 17/96, (Training Loss: 5.6653, samp_prob: 0.8808):   0%|          | 0/10 [03:34<?, ?it/s]

17

[FINISHED] Epoch 0, (Training Loss (per epoch): 126.8823 samp_prob: 0.8808)


Epoch 0, step 18/96, (Training Loss: 5.5787, samp_prob: 0.8808):   0%|          | 0/10 [03:42<?, ?it/s]

18

[FINISHED] Epoch 0, (Training Loss (per epoch): 132.4610 samp_prob: 0.8808)


Epoch 0, step 19/96, (Training Loss: 5.4477, samp_prob: 0.8808):   0%|          | 0/10 [03:50<?, ?it/s]

19

[FINISHED] Epoch 0, (Training Loss (per epoch): 137.9087 samp_prob: 0.8808)


Epoch 0, step 20/96, (Training Loss: 5.5848, samp_prob: 0.8808):   0%|          | 0/10 [03:58<?, ?it/s]

20

[FINISHED] Epoch 0, (Training Loss (per epoch): 143.4935 samp_prob: 0.8808)


Epoch 0, step 21/96, (Training Loss: 5.4953, samp_prob: 0.8808):   0%|          | 0/10 [04:05<?, ?it/s]

21

[FINISHED] Epoch 0, (Training Loss (per epoch): 148.9888 samp_prob: 0.8808)


Epoch 0, step 22/96, (Training Loss: 5.3883, samp_prob: 0.8808):   0%|          | 0/10 [04:13<?, ?it/s]

22

[FINISHED] Epoch 0, (Training Loss (per epoch): 154.3771 samp_prob: 0.8808)


Epoch 0, step 23/96, (Training Loss: 5.4415, samp_prob: 0.8808):   0%|          | 0/10 [04:20<?, ?it/s]

23

[FINISHED] Epoch 0, (Training Loss (per epoch): 159.8186 samp_prob: 0.8808)


Epoch 0, step 24/96, (Training Loss: 5.5368, samp_prob: 0.8808):   0%|          | 0/10 [04:28<?, ?it/s]

24

[FINISHED] Epoch 0, (Training Loss (per epoch): 165.3555 samp_prob: 0.8808)


Epoch 0, step 25/96, (Training Loss: 5.2621, samp_prob: 0.8808):   0%|          | 0/10 [04:36<?, ?it/s]

25

[FINISHED] Epoch 0, (Training Loss (per epoch): 170.6176 samp_prob: 0.8808)


Epoch 0, step 26/96, (Training Loss: 5.3219, samp_prob: 0.8808):   0%|          | 0/10 [04:44<?, ?it/s]

26

[FINISHED] Epoch 0, (Training Loss (per epoch): 175.9395 samp_prob: 0.8808)


Epoch 0, step 27/96, (Training Loss: 5.2532, samp_prob: 0.8808):   0%|          | 0/10 [04:51<?, ?it/s]

27

[FINISHED] Epoch 0, (Training Loss (per epoch): 181.1928 samp_prob: 0.8808)


Epoch 0, step 28/96, (Training Loss: 5.1096, samp_prob: 0.8808):   0%|          | 0/10 [04:59<?, ?it/s]

28

[FINISHED] Epoch 0, (Training Loss (per epoch): 186.3024 samp_prob: 0.8808)


Epoch 0, step 29/96, (Training Loss: 5.2470, samp_prob: 0.8808):   0%|          | 0/10 [05:06<?, ?it/s]

29

[FINISHED] Epoch 0, (Training Loss (per epoch): 191.5494 samp_prob: 0.8808)


Epoch 0, step 30/96, (Training Loss: 5.2409, samp_prob: 0.8808):   0%|          | 0/10 [05:14<?, ?it/s]

30

[FINISHED] Epoch 0, (Training Loss (per epoch): 196.7903 samp_prob: 0.8808)


Epoch 0, step 31/96, (Training Loss: 5.1575, samp_prob: 0.8808):   0%|          | 0/10 [05:22<?, ?it/s]

[[1 6 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]
 ...
 [1 4 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]]

[Train. Prediction] Epoch 0, step 31/96......

id: 0
answer: ['<BOS>', 'the', 'soccer', 'teams', 'kicked', 'the', 'ball']
prediction: ['<BOS>', 'the', 'a', 'the']
31

[FINISHED] Epoch 0, (Training Loss (per epoch): 201.9478 samp_prob: 0.8808)


Epoch 0, step 32/96, (Training Loss: 5.1944, samp_prob: 0.8808):   0%|          | 0/10 [05:30<?, ?it/s]

32

[FINISHED] Epoch 0, (Training Loss (per epoch): 207.1422 samp_prob: 0.8808)


Epoch 0, step 33/96, (Training Loss: 5.1365, samp_prob: 0.8808):   0%|          | 0/10 [05:37<?, ?it/s]

33

[FINISHED] Epoch 0, (Training Loss (per epoch): 212.2786 samp_prob: 0.8808)


Epoch 0, step 34/96, (Training Loss: 5.1325, samp_prob: 0.8808):   0%|          | 0/10 [05:45<?, ?it/s]

34

[FINISHED] Epoch 0, (Training Loss (per epoch): 217.4111 samp_prob: 0.8808)


Epoch 0, step 35/96, (Training Loss: 5.0868, samp_prob: 0.8808):   0%|          | 0/10 [05:53<?, ?it/s]

35

[FINISHED] Epoch 0, (Training Loss (per epoch): 222.4979 samp_prob: 0.8808)


Epoch 0, step 36/96, (Training Loss: 5.0772, samp_prob: 0.8808):   0%|          | 0/10 [06:00<?, ?it/s]

36

[FINISHED] Epoch 0, (Training Loss (per epoch): 227.5751 samp_prob: 0.8808)


Epoch 0, step 37/96, (Training Loss: 5.0435, samp_prob: 0.8808):   0%|          | 0/10 [06:08<?, ?it/s]

37

[FINISHED] Epoch 0, (Training Loss (per epoch): 232.6187 samp_prob: 0.8808)


Epoch 0, step 38/96, (Training Loss: 4.9314, samp_prob: 0.8808):   0%|          | 0/10 [06:16<?, ?it/s]

38

[FINISHED] Epoch 0, (Training Loss (per epoch): 237.5501 samp_prob: 0.8808)


Epoch 0, step 39/96, (Training Loss: 4.9560, samp_prob: 0.8808):   0%|          | 0/10 [06:23<?, ?it/s]

39

[FINISHED] Epoch 0, (Training Loss (per epoch): 242.5060 samp_prob: 0.8808)


Epoch 0, step 40/96, (Training Loss: 4.9066, samp_prob: 0.8808):   0%|          | 0/10 [06:31<?, ?it/s]

40

[FINISHED] Epoch 0, (Training Loss (per epoch): 247.4127 samp_prob: 0.8808)


Epoch 0, step 41/96, (Training Loss: 4.8883, samp_prob: 0.8808):   0%|          | 0/10 [06:38<?, ?it/s]

41

[FINISHED] Epoch 0, (Training Loss (per epoch): 252.3010 samp_prob: 0.8808)


Epoch 0, step 42/96, (Training Loss: 4.9522, samp_prob: 0.8808):   0%|          | 0/10 [06:46<?, ?it/s]

42

[FINISHED] Epoch 0, (Training Loss (per epoch): 257.2532 samp_prob: 0.8808)


Epoch 0, step 43/96, (Training Loss: 4.9226, samp_prob: 0.8808):   0%|          | 0/10 [06:54<?, ?it/s]

43

[FINISHED] Epoch 0, (Training Loss (per epoch): 262.1758 samp_prob: 0.8808)


Epoch 0, step 44/96, (Training Loss: 4.9375, samp_prob: 0.8808):   0%|          | 0/10 [07:01<?, ?it/s]

44

[FINISHED] Epoch 0, (Training Loss (per epoch): 267.1133 samp_prob: 0.8808)


Epoch 0, step 45/96, (Training Loss: 4.8767, samp_prob: 0.8808):   0%|          | 0/10 [07:09<?, ?it/s]

45

[FINISHED] Epoch 0, (Training Loss (per epoch): 271.9900 samp_prob: 0.8808)


Epoch 0, step 46/96, (Training Loss: 4.8817, samp_prob: 0.8808):   0%|          | 0/10 [07:17<?, ?it/s]

[[1 4 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 [1 4 4 ... 2 2 2]]

[Train. Prediction] Epoch 0, step 46/96......

id: 71
answer: ['<BOS>', 'three', 'boys', 'are', 'dancing']
prediction: ['<BOS>', 'a', 'a', 'is', 'is', 'a']
46

[FINISHED] Epoch 0, (Training Loss (per epoch): 276.8717 samp_prob: 0.8808)


Epoch 0, step 47/96, (Training Loss: 4.7824, samp_prob: 0.8808):   0%|          | 0/10 [07:25<?, ?it/s]

47

[FINISHED] Epoch 0, (Training Loss (per epoch): 281.6541 samp_prob: 0.8808)


Epoch 0, step 48/96, (Training Loss: 4.8315, samp_prob: 0.8808):   0%|          | 0/10 [07:32<?, ?it/s]

48

[FINISHED] Epoch 0, (Training Loss (per epoch): 286.4856 samp_prob: 0.8808)


Epoch 0, step 49/96, (Training Loss: 4.8193, samp_prob: 0.8808):   0%|          | 0/10 [07:40<?, ?it/s]

49

[FINISHED] Epoch 0, (Training Loss (per epoch): 291.3049 samp_prob: 0.8808)


Epoch 0, step 50/96, (Training Loss: 4.8415, samp_prob: 0.8808):   0%|          | 0/10 [07:48<?, ?it/s]

50

[FINISHED] Epoch 0, (Training Loss (per epoch): 296.1464 samp_prob: 0.8808)


Epoch 0, step 51/96, (Training Loss: 4.7325, samp_prob: 0.8808):   0%|          | 0/10 [07:55<?, ?it/s]

51

[FINISHED] Epoch 0, (Training Loss (per epoch): 300.8789 samp_prob: 0.8808)


Epoch 0, step 52/96, (Training Loss: 4.7595, samp_prob: 0.8808):   0%|          | 0/10 [08:03<?, ?it/s]

52

[FINISHED] Epoch 0, (Training Loss (per epoch): 305.6384 samp_prob: 0.8808)


Epoch 0, step 53/96, (Training Loss: 4.8120, samp_prob: 0.8808):   0%|          | 0/10 [08:10<?, ?it/s]

53

[FINISHED] Epoch 0, (Training Loss (per epoch): 310.4504 samp_prob: 0.8808)


Epoch 0, step 54/96, (Training Loss: 4.6444, samp_prob: 0.8808):   0%|          | 0/10 [08:18<?, ?it/s]

54

[FINISHED] Epoch 0, (Training Loss (per epoch): 315.0948 samp_prob: 0.8808)


Epoch 0, step 55/96, (Training Loss: 4.7365, samp_prob: 0.8808):   0%|          | 0/10 [08:26<?, ?it/s]

55

[FINISHED] Epoch 0, (Training Loss (per epoch): 319.8313 samp_prob: 0.8808)


Epoch 0, step 56/96, (Training Loss: 4.6250, samp_prob: 0.8808):   0%|          | 0/10 [08:33<?, ?it/s]

56

[FINISHED] Epoch 0, (Training Loss (per epoch): 324.4563 samp_prob: 0.8808)


Epoch 0, step 57/96, (Training Loss: 4.6197, samp_prob: 0.8808):   0%|          | 0/10 [08:41<?, ?it/s]

57

[FINISHED] Epoch 0, (Training Loss (per epoch): 329.0760 samp_prob: 0.8808)


Epoch 0, step 58/96, (Training Loss: 4.5808, samp_prob: 0.8808):   0%|          | 0/10 [08:49<?, ?it/s]

58

[FINISHED] Epoch 0, (Training Loss (per epoch): 333.6568 samp_prob: 0.8808)


Epoch 0, step 59/96, (Training Loss: 4.6141, samp_prob: 0.8808):   0%|          | 0/10 [08:56<?, ?it/s]

59

[FINISHED] Epoch 0, (Training Loss (per epoch): 338.2709 samp_prob: 0.8808)


Epoch 0, step 60/96, (Training Loss: 4.6241, samp_prob: 0.8808):   0%|          | 0/10 [09:04<?, ?it/s]

60

[FINISHED] Epoch 0, (Training Loss (per epoch): 342.8950 samp_prob: 0.8808)


Epoch 0, step 61/96, (Training Loss: 4.6771, samp_prob: 0.8808):   0%|          | 0/10 [09:12<?, ?it/s]

[[1 4 4 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 6 ... 2 2 2]
 ...
 [1 4 4 ... 2 2 2]
 [1 4 4 ... 2 2 2]
 [1 4 8 ... 2 2 2]]

[Train. Prediction] Epoch 0, step 61/96......

id: 66
answer: ['<BOS>', 'a', 'boy', 'looks', 'at', 'something', 'while', 'he', 'eats']
prediction: ['<BOS>', 'a', 'a', 'a', 'is', 'a']
61

[FINISHED] Epoch 0, (Training Loss (per epoch): 347.5720 samp_prob: 0.8808)


Epoch 0, step 62/96, (Training Loss: 4.6535, samp_prob: 0.8808):   0%|          | 0/10 [09:19<?, ?it/s]

62

[FINISHED] Epoch 0, (Training Loss (per epoch): 352.2256 samp_prob: 0.8808)


Epoch 0, step 63/96, (Training Loss: 4.6660, samp_prob: 0.8808):   0%|          | 0/10 [09:27<?, ?it/s]

63

[FINISHED] Epoch 0, (Training Loss (per epoch): 356.8915 samp_prob: 0.8808)


Epoch 0, step 64/96, (Training Loss: 4.6352, samp_prob: 0.8808):   0%|          | 0/10 [09:35<?, ?it/s]

64

[FINISHED] Epoch 0, (Training Loss (per epoch): 361.5268 samp_prob: 0.8808)


Epoch 0, step 65/96, (Training Loss: 4.5028, samp_prob: 0.8808):   0%|          | 0/10 [09:42<?, ?it/s]

65

[FINISHED] Epoch 0, (Training Loss (per epoch): 366.0295 samp_prob: 0.8808)


Epoch 0, step 66/96, (Training Loss: 4.6659, samp_prob: 0.8808):   0%|          | 0/10 [09:50<?, ?it/s]

66

[FINISHED] Epoch 0, (Training Loss (per epoch): 370.6954 samp_prob: 0.8808)


Epoch 0, step 67/96, (Training Loss: 4.5902, samp_prob: 0.8808):   0%|          | 0/10 [09:57<?, ?it/s]

67

[FINISHED] Epoch 0, (Training Loss (per epoch): 375.2856 samp_prob: 0.8808)


Epoch 0, step 68/96, (Training Loss: 4.5193, samp_prob: 0.8808):   0%|          | 0/10 [10:05<?, ?it/s]

68

[FINISHED] Epoch 0, (Training Loss (per epoch): 379.8049 samp_prob: 0.8808)


Epoch 0, step 69/96, (Training Loss: 4.5095, samp_prob: 0.8808):   0%|          | 0/10 [10:13<?, ?it/s]

69

[FINISHED] Epoch 0, (Training Loss (per epoch): 384.3144 samp_prob: 0.8808)


Epoch 0, step 70/96, (Training Loss: 4.6237, samp_prob: 0.8808):   0%|          | 0/10 [10:20<?, ?it/s]

70

[FINISHED] Epoch 0, (Training Loss (per epoch): 388.9381 samp_prob: 0.8808)


Epoch 0, step 71/96, (Training Loss: 4.5013, samp_prob: 0.8808):   0%|          | 0/10 [10:28<?, ?it/s]

71

[FINISHED] Epoch 0, (Training Loss (per epoch): 393.4394 samp_prob: 0.8808)


Epoch 0, step 72/96, (Training Loss: 4.5907, samp_prob: 0.8808):   0%|          | 0/10 [10:35<?, ?it/s]

72

[FINISHED] Epoch 0, (Training Loss (per epoch): 398.0301 samp_prob: 0.8808)


Epoch 0, step 73/96, (Training Loss: 4.4562, samp_prob: 0.8808):   0%|          | 0/10 [10:43<?, ?it/s]

73

[FINISHED] Epoch 0, (Training Loss (per epoch): 402.4862 samp_prob: 0.8808)


Epoch 0, step 74/96, (Training Loss: 4.5352, samp_prob: 0.8808):   0%|          | 0/10 [10:50<?, ?it/s]

74

[FINISHED] Epoch 0, (Training Loss (per epoch): 407.0214 samp_prob: 0.8808)


Epoch 0, step 75/96, (Training Loss: 4.5639, samp_prob: 0.8808):   0%|          | 0/10 [10:58<?, ?it/s]

75

[FINISHED] Epoch 0, (Training Loss (per epoch): 411.5854 samp_prob: 0.8808)


Epoch 0, step 76/96, (Training Loss: 4.5869, samp_prob: 0.8808):   0%|          | 0/10 [11:06<?, ?it/s]

[[1 4 7 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 [1 4 7 ... 2 2 2]]

[Train. Prediction] Epoch 0, step 76/96......

id: 37
answer: ['<BOS>', 'a', 'man', 'picks', 'a', 'car', 'up', 'off', 'of', 'its', 'hind', 'wheels']
prediction: ['<BOS>', 'a', 'a', 'a', 'the', 'a']
76

[FINISHED] Epoch 0, (Training Loss (per epoch): 416.1722 samp_prob: 0.8808)


Epoch 0, step 77/96, (Training Loss: 4.4364, samp_prob: 0.8808):   0%|          | 0/10 [11:14<?, ?it/s]

77

[FINISHED] Epoch 0, (Training Loss (per epoch): 420.6086 samp_prob: 0.8808)


Epoch 0, step 78/96, (Training Loss: 4.3846, samp_prob: 0.8808):   0%|          | 0/10 [11:21<?, ?it/s]

78

[FINISHED] Epoch 0, (Training Loss (per epoch): 424.9932 samp_prob: 0.8808)


Epoch 0, step 79/96, (Training Loss: 4.3867, samp_prob: 0.8808):   0%|          | 0/10 [11:29<?, ?it/s]

79

[FINISHED] Epoch 0, (Training Loss (per epoch): 429.3799 samp_prob: 0.8808)


Epoch 0, step 80/96, (Training Loss: 4.4601, samp_prob: 0.8808):   0%|          | 0/10 [11:37<?, ?it/s]

80

[FINISHED] Epoch 0, (Training Loss (per epoch): 433.8400 samp_prob: 0.8808)


Epoch 0, step 81/96, (Training Loss: 4.3550, samp_prob: 0.8808):   0%|          | 0/10 [11:44<?, ?it/s]

81

[FINISHED] Epoch 0, (Training Loss (per epoch): 438.1950 samp_prob: 0.8808)


Epoch 0, step 82/96, (Training Loss: 4.5722, samp_prob: 0.8808):   0%|          | 0/10 [11:52<?, ?it/s]

82

[FINISHED] Epoch 0, (Training Loss (per epoch): 442.7672 samp_prob: 0.8808)


Epoch 0, step 83/96, (Training Loss: 4.4723, samp_prob: 0.8808):   0%|          | 0/10 [11:59<?, ?it/s]

83

[FINISHED] Epoch 0, (Training Loss (per epoch): 447.2395 samp_prob: 0.8808)


Epoch 0, step 84/96, (Training Loss: 4.4309, samp_prob: 0.8808):   0%|          | 0/10 [12:07<?, ?it/s]

84

[FINISHED] Epoch 0, (Training Loss (per epoch): 451.6704 samp_prob: 0.8808)


Epoch 0, step 85/96, (Training Loss: 4.5116, samp_prob: 0.8808):   0%|          | 0/10 [12:14<?, ?it/s]

85

[FINISHED] Epoch 0, (Training Loss (per epoch): 456.1820 samp_prob: 0.8808)


Epoch 0, step 86/96, (Training Loss: 4.3758, samp_prob: 0.8808):   0%|          | 0/10 [12:22<?, ?it/s]

86

[FINISHED] Epoch 0, (Training Loss (per epoch): 460.5579 samp_prob: 0.8808)


Epoch 0, step 87/96, (Training Loss: 4.4602, samp_prob: 0.8808):   0%|          | 0/10 [12:30<?, ?it/s]

87

[FINISHED] Epoch 0, (Training Loss (per epoch): 465.0180 samp_prob: 0.8808)


Epoch 0, step 88/96, (Training Loss: 4.4072, samp_prob: 0.8808):   0%|          | 0/10 [12:37<?, ?it/s]

88

[FINISHED] Epoch 0, (Training Loss (per epoch): 469.4253 samp_prob: 0.8808)


Epoch 0, step 89/96, (Training Loss: 4.3489, samp_prob: 0.8808):   0%|          | 0/10 [12:45<?, ?it/s]

89

[FINISHED] Epoch 0, (Training Loss (per epoch): 473.7742 samp_prob: 0.8808)


Epoch 0, step 90/96, (Training Loss: 4.3345, samp_prob: 0.8808):   0%|          | 0/10 [12:53<?, ?it/s]

90

[FINISHED] Epoch 0, (Training Loss (per epoch): 478.1087 samp_prob: 0.8808)


Epoch 0, step 91/96, (Training Loss: 4.3736, samp_prob: 0.8808):   0%|          | 0/10 [13:01<?, ?it/s]

[[1 4 7 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 [1 4 7 ... 2 2 2]]

[Train. Prediction] Epoch 0, step 91/96......

id: 69
answer: ['<BOS>', 'a', 'cat', 'is', 'looking', 'out', 'a', 'window']
prediction: ['<BOS>', 'the', 'is', 'is', 'the', 'a']
91

[FINISHED] Epoch 0, (Training Loss (per epoch): 482.4823 samp_prob: 0.8808)


Epoch 0, step 92/96, (Training Loss: 4.2900, samp_prob: 0.8808):   0%|          | 0/10 [13:08<?, ?it/s]

92

[FINISHED] Epoch 0, (Training Loss (per epoch): 486.7723 samp_prob: 0.8808)


Epoch 0, step 93/96, (Training Loss: 4.3261, samp_prob: 0.8808):   0%|          | 0/10 [13:16<?, ?it/s]

93

[FINISHED] Epoch 0, (Training Loss (per epoch): 491.0983 samp_prob: 0.8808)


Epoch 0, step 94/96, (Training Loss: 4.4183, samp_prob: 0.8808):   0%|          | 0/10 [13:23<?, ?it/s]

94

[FINISHED] Epoch 0, (Training Loss (per epoch): 495.5166 samp_prob: 0.8808)


Epoch 0, step 95/96, (Training Loss: 4.4036, samp_prob: 0.8808):  10%|█         | 1/10 [13:31<2:01:42, 811.44s/it]

95

[FINISHED] Epoch 0, (Training Loss (per epoch): 499.9202 samp_prob: 0.8808)


Epoch 1, step 0/96, (Training Loss: 4.4099, samp_prob: 0.8320):  10%|█         | 1/10 [13:38<2:01:42, 811.44s/it] 

0

[FINISHED] Epoch 1, (Training Loss (per epoch): 4.4099 samp_prob: 0.8320)


Epoch 1, step 1/96, (Training Loss: 4.3766, samp_prob: 0.8320):  10%|█         | 1/10 [13:47<2:01:42, 811.44s/it]

[[1 4 5 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]]

[Train. Prediction] Epoch 1, step 1/96......

id: 16
answer: ['<BOS>', 'a', 'boy', 'puts', 'a', 'rock', 'in', 'his', 'mouth']
prediction: ['<BOS>', 'a', 'is', 'man', 'is', 'a']
1

[FINISHED] Epoch 1, (Training Loss (per epoch): 8.7865 samp_prob: 0.8320)


Epoch 1, step 2/96, (Training Loss: 4.3499, samp_prob: 0.8320):  10%|█         | 1/10 [13:54<2:01:42, 811.44s/it]

2

[FINISHED] Epoch 1, (Training Loss (per epoch): 13.1365 samp_prob: 0.8320)


Epoch 1, step 3/96, (Training Loss: 4.3683, samp_prob: 0.8320):  10%|█         | 1/10 [14:02<2:01:42, 811.44s/it]

3

[FINISHED] Epoch 1, (Training Loss (per epoch): 17.5048 samp_prob: 0.8320)


Epoch 1, step 4/96, (Training Loss: 4.2511, samp_prob: 0.8320):  10%|█         | 1/10 [14:09<2:01:42, 811.44s/it]

4

[FINISHED] Epoch 1, (Training Loss (per epoch): 21.7559 samp_prob: 0.8320)


Epoch 1, step 5/96, (Training Loss: 4.3148, samp_prob: 0.8320):  10%|█         | 1/10 [14:17<2:01:42, 811.44s/it]

5

[FINISHED] Epoch 1, (Training Loss (per epoch): 26.0707 samp_prob: 0.8320)


Epoch 1, step 6/96, (Training Loss: 4.2464, samp_prob: 0.8320):  10%|█         | 1/10 [14:25<2:01:42, 811.44s/it]

6

[FINISHED] Epoch 1, (Training Loss (per epoch): 30.3172 samp_prob: 0.8320)


Epoch 1, step 7/96, (Training Loss: 4.3591, samp_prob: 0.8320):  10%|█         | 1/10 [14:32<2:01:42, 811.44s/it]

7

[FINISHED] Epoch 1, (Training Loss (per epoch): 34.6763 samp_prob: 0.8320)


Epoch 1, step 8/96, (Training Loss: 4.3300, samp_prob: 0.8320):  10%|█         | 1/10 [14:40<2:01:42, 811.44s/it]

8

[FINISHED] Epoch 1, (Training Loss (per epoch): 39.0063 samp_prob: 0.8320)


Epoch 1, step 9/96, (Training Loss: 4.3108, samp_prob: 0.8320):  10%|█         | 1/10 [14:47<2:01:42, 811.44s/it]

9

[FINISHED] Epoch 1, (Training Loss (per epoch): 43.3171 samp_prob: 0.8320)


Epoch 1, step 10/96, (Training Loss: 4.2230, samp_prob: 0.8320):  10%|█         | 1/10 [14:55<2:01:42, 811.44s/it]

10

[FINISHED] Epoch 1, (Training Loss (per epoch): 47.5400 samp_prob: 0.8320)


Epoch 1, step 11/96, (Training Loss: 4.3154, samp_prob: 0.8320):  10%|█         | 1/10 [15:02<2:01:42, 811.44s/it]

11

[FINISHED] Epoch 1, (Training Loss (per epoch): 51.8554 samp_prob: 0.8320)


Epoch 1, step 12/96, (Training Loss: 4.2924, samp_prob: 0.8320):  10%|█         | 1/10 [15:10<2:01:42, 811.44s/it]

12

[FINISHED] Epoch 1, (Training Loss (per epoch): 56.1478 samp_prob: 0.8320)


Epoch 1, step 13/96, (Training Loss: 4.3986, samp_prob: 0.8320):  10%|█         | 1/10 [15:18<2:01:42, 811.44s/it]

13

[FINISHED] Epoch 1, (Training Loss (per epoch): 60.5464 samp_prob: 0.8320)


Epoch 1, step 14/96, (Training Loss: 4.3375, samp_prob: 0.8320):  10%|█         | 1/10 [15:25<2:01:42, 811.44s/it]

14

[FINISHED] Epoch 1, (Training Loss (per epoch): 64.8839 samp_prob: 0.8320)


Epoch 1, step 15/96, (Training Loss: 4.3018, samp_prob: 0.8320):  10%|█         | 1/10 [15:33<2:01:42, 811.44s/it]

15

[FINISHED] Epoch 1, (Training Loss (per epoch): 69.1857 samp_prob: 0.8320)


Epoch 1, step 16/96, (Training Loss: 4.2860, samp_prob: 0.8320):  10%|█         | 1/10 [15:41<2:01:42, 811.44s/it]

[[1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 ...
 [1 4 5 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 [1 4 7 ... 2 2 2]]

[Train. Prediction] Epoch 1, step 16/96......

id: 116
answer: ['<BOS>', 'a', 'man', 'is', 'playing', 'a', 'trumpet']
prediction: ['<BOS>', 'a', 'man', 'is', 'is']
16

[FINISHED] Epoch 1, (Training Loss (per epoch): 73.4717 samp_prob: 0.8320)


Epoch 1, step 17/96, (Training Loss: 4.2420, samp_prob: 0.8320):  10%|█         | 1/10 [15:49<2:01:42, 811.44s/it]

17

[FINISHED] Epoch 1, (Training Loss (per epoch): 77.7137 samp_prob: 0.8320)


Epoch 1, step 18/96, (Training Loss: 4.1436, samp_prob: 0.8320):  10%|█         | 1/10 [15:56<2:01:42, 811.44s/it]

18

[FINISHED] Epoch 1, (Training Loss (per epoch): 81.8573 samp_prob: 0.8320)


Epoch 1, step 19/96, (Training Loss: 4.0426, samp_prob: 0.8320):  10%|█         | 1/10 [16:04<2:01:42, 811.44s/it]

19

[FINISHED] Epoch 1, (Training Loss (per epoch): 85.8999 samp_prob: 0.8320)


Epoch 1, step 20/96, (Training Loss: 4.2951, samp_prob: 0.8320):  10%|█         | 1/10 [16:11<2:01:42, 811.44s/it]

20

[FINISHED] Epoch 1, (Training Loss (per epoch): 90.1951 samp_prob: 0.8320)


Epoch 1, step 21/96, (Training Loss: 4.2180, samp_prob: 0.8320):  10%|█         | 1/10 [16:19<2:01:42, 811.44s/it]

21

[FINISHED] Epoch 1, (Training Loss (per epoch): 94.4130 samp_prob: 0.8320)


Epoch 1, step 22/96, (Training Loss: 4.1981, samp_prob: 0.8320):  10%|█         | 1/10 [16:27<2:01:42, 811.44s/it]

22

[FINISHED] Epoch 1, (Training Loss (per epoch): 98.6111 samp_prob: 0.8320)


Epoch 1, step 23/96, (Training Loss: 4.2508, samp_prob: 0.8320):  10%|█         | 1/10 [16:34<2:01:42, 811.44s/it]

23

[FINISHED] Epoch 1, (Training Loss (per epoch): 102.8619 samp_prob: 0.8320)


Epoch 1, step 24/96, (Training Loss: 4.4164, samp_prob: 0.8320):  10%|█         | 1/10 [16:42<2:01:42, 811.44s/it]

24

[FINISHED] Epoch 1, (Training Loss (per epoch): 107.2782 samp_prob: 0.8320)


Epoch 1, step 25/96, (Training Loss: 4.1668, samp_prob: 0.8320):  10%|█         | 1/10 [16:49<2:01:42, 811.44s/it]

25

[FINISHED] Epoch 1, (Training Loss (per epoch): 111.4450 samp_prob: 0.8320)


Epoch 1, step 26/96, (Training Loss: 4.2313, samp_prob: 0.8320):  10%|█         | 1/10 [16:57<2:01:42, 811.44s/it]

26

[FINISHED] Epoch 1, (Training Loss (per epoch): 115.6763 samp_prob: 0.8320)


Epoch 1, step 27/96, (Training Loss: 4.1626, samp_prob: 0.8320):  10%|█         | 1/10 [17:04<2:01:42, 811.44s/it]

27

[FINISHED] Epoch 1, (Training Loss (per epoch): 119.8389 samp_prob: 0.8320)


Epoch 1, step 28/96, (Training Loss: 4.1188, samp_prob: 0.8320):  10%|█         | 1/10 [17:12<2:01:42, 811.44s/it]

28

[FINISHED] Epoch 1, (Training Loss (per epoch): 123.9577 samp_prob: 0.8320)


Epoch 1, step 29/96, (Training Loss: 4.2297, samp_prob: 0.8320):  10%|█         | 1/10 [17:20<2:01:42, 811.44s/it]

29

[FINISHED] Epoch 1, (Training Loss (per epoch): 128.1875 samp_prob: 0.8320)


Epoch 1, step 30/96, (Training Loss: 4.2507, samp_prob: 0.8320):  10%|█         | 1/10 [17:27<2:01:42, 811.44s/it]

30

[FINISHED] Epoch 1, (Training Loss (per epoch): 132.4381 samp_prob: 0.8320)


Epoch 1, step 31/96, (Training Loss: 4.2255, samp_prob: 0.8320):  10%|█         | 1/10 [17:35<2:01:42, 811.44s/it]

[[1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]]

[Train. Prediction] Epoch 1, step 31/96......

id: 136
answer: ['<BOS>', 'a', 'boy', 'is', 'playing', 'a', 'piano']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'a', 'a']
31

[FINISHED] Epoch 1, (Training Loss (per epoch): 136.6636 samp_prob: 0.8320)


Epoch 1, step 32/96, (Training Loss: 4.2443, samp_prob: 0.8320):  10%|█         | 1/10 [17:43<2:01:42, 811.44s/it]

32

[FINISHED] Epoch 1, (Training Loss (per epoch): 140.9079 samp_prob: 0.8320)


Epoch 1, step 33/96, (Training Loss: 4.2243, samp_prob: 0.8320):  10%|█         | 1/10 [17:51<2:01:42, 811.44s/it]

33

[FINISHED] Epoch 1, (Training Loss (per epoch): 145.1321 samp_prob: 0.8320)


Epoch 1, step 34/96, (Training Loss: 4.2107, samp_prob: 0.8320):  10%|█         | 1/10 [17:58<2:01:42, 811.44s/it]

34

[FINISHED] Epoch 1, (Training Loss (per epoch): 149.3428 samp_prob: 0.8320)


Epoch 1, step 35/96, (Training Loss: 4.2048, samp_prob: 0.8320):  10%|█         | 1/10 [18:06<2:01:42, 811.44s/it]

35

[FINISHED] Epoch 1, (Training Loss (per epoch): 153.5476 samp_prob: 0.8320)


Epoch 1, step 36/96, (Training Loss: 4.2545, samp_prob: 0.8320):  10%|█         | 1/10 [18:14<2:01:42, 811.44s/it]

36

[FINISHED] Epoch 1, (Training Loss (per epoch): 157.8020 samp_prob: 0.8320)


Epoch 1, step 37/96, (Training Loss: 4.2342, samp_prob: 0.8320):  10%|█         | 1/10 [18:21<2:01:42, 811.44s/it]

37

[FINISHED] Epoch 1, (Training Loss (per epoch): 162.0362 samp_prob: 0.8320)


Epoch 1, step 38/96, (Training Loss: 4.1629, samp_prob: 0.8320):  10%|█         | 1/10 [18:29<2:01:42, 811.44s/it]

38

[FINISHED] Epoch 1, (Training Loss (per epoch): 166.1991 samp_prob: 0.8320)


Epoch 1, step 39/96, (Training Loss: 4.1760, samp_prob: 0.8320):  10%|█         | 1/10 [18:36<2:01:42, 811.44s/it]

39

[FINISHED] Epoch 1, (Training Loss (per epoch): 170.3751 samp_prob: 0.8320)


Epoch 1, step 40/96, (Training Loss: 4.1190, samp_prob: 0.8320):  10%|█         | 1/10 [18:44<2:01:42, 811.44s/it]

40

[FINISHED] Epoch 1, (Training Loss (per epoch): 174.4941 samp_prob: 0.8320)


Epoch 1, step 41/96, (Training Loss: 4.1442, samp_prob: 0.8320):  10%|█         | 1/10 [18:52<2:01:42, 811.44s/it]

41

[FINISHED] Epoch 1, (Training Loss (per epoch): 178.6383 samp_prob: 0.8320)


Epoch 1, step 42/96, (Training Loss: 4.1888, samp_prob: 0.8320):  10%|█         | 1/10 [18:59<2:01:42, 811.44s/it]

42

[FINISHED] Epoch 1, (Training Loss (per epoch): 182.8270 samp_prob: 0.8320)


Epoch 1, step 43/96, (Training Loss: 4.1928, samp_prob: 0.8320):  10%|█         | 1/10 [19:07<2:01:42, 811.44s/it]

43

[FINISHED] Epoch 1, (Training Loss (per epoch): 187.0198 samp_prob: 0.8320)


Epoch 1, step 44/96, (Training Loss: 4.1679, samp_prob: 0.8320):  10%|█         | 1/10 [19:15<2:01:42, 811.44s/it]

44

[FINISHED] Epoch 1, (Training Loss (per epoch): 191.1878 samp_prob: 0.8320)


Epoch 1, step 45/96, (Training Loss: 4.1883, samp_prob: 0.8320):  10%|█         | 1/10 [19:22<2:01:42, 811.44s/it]

45

[FINISHED] Epoch 1, (Training Loss (per epoch): 195.3761 samp_prob: 0.8320)


Epoch 1, step 46/96, (Training Loss: 4.2090, samp_prob: 0.8320):  10%|█         | 1/10 [19:30<2:01:42, 811.44s/it]

[[1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]]

[Train. Prediction] Epoch 1, step 46/96......

id: 230
answer: ['<BOS>', 'a', 'little', 'school', 'girl', 'walking', 'dejectedly', 'on', 'the', 'road', 'is', 'approached', 'from', 'behind', 'by', 'a', 'man', 'in', 'a', 'jeep']
prediction: ['<BOS>', 'a', 'man', 'is', 'is', 'a']
46

[FINISHED] Epoch 1, (Training Loss (per epoch): 199.5851 samp_prob: 0.8320)


Epoch 1, step 47/96, (Training Loss: 4.1443, samp_prob: 0.8320):  10%|█         | 1/10 [19:38<2:01:42, 811.44s/it]

47

[FINISHED] Epoch 1, (Training Loss (per epoch): 203.7294 samp_prob: 0.8320)


Epoch 1, step 48/96, (Training Loss: 4.1579, samp_prob: 0.8320):  10%|█         | 1/10 [19:46<2:01:42, 811.44s/it]

48

[FINISHED] Epoch 1, (Training Loss (per epoch): 207.8873 samp_prob: 0.8320)


Epoch 1, step 49/96, (Training Loss: 4.2176, samp_prob: 0.8320):  10%|█         | 1/10 [19:53<2:01:42, 811.44s/it]

49

[FINISHED] Epoch 1, (Training Loss (per epoch): 212.1049 samp_prob: 0.8320)


Epoch 1, step 50/96, (Training Loss: 4.2330, samp_prob: 0.8320):  10%|█         | 1/10 [20:01<2:01:42, 811.44s/it]

50

[FINISHED] Epoch 1, (Training Loss (per epoch): 216.3379 samp_prob: 0.8320)


Epoch 1, step 51/96, (Training Loss: 4.1268, samp_prob: 0.8320):  10%|█         | 1/10 [20:08<2:01:42, 811.44s/it]

51

[FINISHED] Epoch 1, (Training Loss (per epoch): 220.4648 samp_prob: 0.8320)


Epoch 1, step 52/96, (Training Loss: 4.1404, samp_prob: 0.8320):  10%|█         | 1/10 [20:16<2:01:42, 811.44s/it]

52

[FINISHED] Epoch 1, (Training Loss (per epoch): 224.6052 samp_prob: 0.8320)


Epoch 1, step 53/96, (Training Loss: 4.2344, samp_prob: 0.8320):  10%|█         | 1/10 [20:24<2:01:42, 811.44s/it]

53

[FINISHED] Epoch 1, (Training Loss (per epoch): 228.8396 samp_prob: 0.8320)


Epoch 1, step 54/96, (Training Loss: 4.1261, samp_prob: 0.8320):  10%|█         | 1/10 [20:31<2:01:42, 811.44s/it]

54

[FINISHED] Epoch 1, (Training Loss (per epoch): 232.9657 samp_prob: 0.8320)


Epoch 1, step 55/96, (Training Loss: 4.1322, samp_prob: 0.8320):  10%|█         | 1/10 [20:39<2:01:42, 811.44s/it]

55

[FINISHED] Epoch 1, (Training Loss (per epoch): 237.0979 samp_prob: 0.8320)


Epoch 1, step 56/96, (Training Loss: 4.0762, samp_prob: 0.8320):  10%|█         | 1/10 [20:47<2:01:42, 811.44s/it]

56

[FINISHED] Epoch 1, (Training Loss (per epoch): 241.1741 samp_prob: 0.8320)


Epoch 1, step 57/96, (Training Loss: 4.0773, samp_prob: 0.8320):  10%|█         | 1/10 [20:54<2:01:42, 811.44s/it]

57

[FINISHED] Epoch 1, (Training Loss (per epoch): 245.2514 samp_prob: 0.8320)


Epoch 1, step 58/96, (Training Loss: 4.0720, samp_prob: 0.8320):  10%|█         | 1/10 [21:02<2:01:42, 811.44s/it]

58

[FINISHED] Epoch 1, (Training Loss (per epoch): 249.3234 samp_prob: 0.8320)


Epoch 1, step 59/96, (Training Loss: 4.1192, samp_prob: 0.8320):  10%|█         | 1/10 [21:10<2:01:42, 811.44s/it]

59

[FINISHED] Epoch 1, (Training Loss (per epoch): 253.4427 samp_prob: 0.8320)


Epoch 1, step 60/96, (Training Loss: 4.1305, samp_prob: 0.8320):  10%|█         | 1/10 [21:17<2:01:42, 811.44s/it]

60

[FINISHED] Epoch 1, (Training Loss (per epoch): 257.5732 samp_prob: 0.8320)


Epoch 1, step 61/96, (Training Loss: 4.1863, samp_prob: 0.8320):  10%|█         | 1/10 [21:25<2:01:42, 811.44s/it]

[[1 4 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 [1 4 5 ... 2 2 2]]

[Train. Prediction] Epoch 1, step 61/96......

id: 6
answer: ['<BOS>', 'a', 'man', 'moonwalks', 'with', 'a', 'grocery', 'cart', 'in', 'a', 'supermarket']
prediction: ['<BOS>', 'the', 'woman', 'is', 'a']
61

[FINISHED] Epoch 1, (Training Loss (per epoch): 261.7595 samp_prob: 0.8320)


Epoch 1, step 62/96, (Training Loss: 4.1799, samp_prob: 0.8320):  10%|█         | 1/10 [21:33<2:01:42, 811.44s/it]

62

[FINISHED] Epoch 1, (Training Loss (per epoch): 265.9394 samp_prob: 0.8320)


Epoch 1, step 63/96, (Training Loss: 4.1873, samp_prob: 0.8320):  10%|█         | 1/10 [21:40<2:01:42, 811.44s/it]

63

[FINISHED] Epoch 1, (Training Loss (per epoch): 270.1267 samp_prob: 0.8320)


Epoch 1, step 64/96, (Training Loss: 4.2226, samp_prob: 0.8320):  10%|█         | 1/10 [21:48<2:01:42, 811.44s/it]

64

[FINISHED] Epoch 1, (Training Loss (per epoch): 274.3493 samp_prob: 0.8320)


Epoch 1, step 65/96, (Training Loss: 4.0131, samp_prob: 0.8320):  10%|█         | 1/10 [21:56<2:01:42, 811.44s/it]

65

[FINISHED] Epoch 1, (Training Loss (per epoch): 278.3624 samp_prob: 0.8320)


Epoch 1, step 66/96, (Training Loss: 4.2526, samp_prob: 0.8320):  10%|█         | 1/10 [22:03<2:01:42, 811.44s/it]

66

[FINISHED] Epoch 1, (Training Loss (per epoch): 282.6150 samp_prob: 0.8320)


Epoch 1, step 67/96, (Training Loss: 4.1998, samp_prob: 0.8320):  10%|█         | 1/10 [22:11<2:01:42, 811.44s/it]

67

[FINISHED] Epoch 1, (Training Loss (per epoch): 286.8148 samp_prob: 0.8320)


Epoch 1, step 68/96, (Training Loss: 4.1107, samp_prob: 0.8320):  10%|█         | 1/10 [22:19<2:01:42, 811.44s/it]

68

[FINISHED] Epoch 1, (Training Loss (per epoch): 290.9255 samp_prob: 0.8320)


Epoch 1, step 69/96, (Training Loss: 4.0529, samp_prob: 0.8320):  10%|█         | 1/10 [22:26<2:01:42, 811.44s/it]

69

[FINISHED] Epoch 1, (Training Loss (per epoch): 294.9784 samp_prob: 0.8320)


Epoch 1, step 70/96, (Training Loss: 4.1884, samp_prob: 0.8320):  10%|█         | 1/10 [22:34<2:01:42, 811.44s/it]

70

[FINISHED] Epoch 1, (Training Loss (per epoch): 299.1668 samp_prob: 0.8320)


Epoch 1, step 71/96, (Training Loss: 4.0777, samp_prob: 0.8320):  10%|█         | 1/10 [22:41<2:01:42, 811.44s/it]

71

[FINISHED] Epoch 1, (Training Loss (per epoch): 303.2445 samp_prob: 0.8320)


Epoch 1, step 72/96, (Training Loss: 4.2088, samp_prob: 0.8320):  10%|█         | 1/10 [22:49<2:01:42, 811.44s/it]

72

[FINISHED] Epoch 1, (Training Loss (per epoch): 307.4533 samp_prob: 0.8320)


Epoch 1, step 73/96, (Training Loss: 4.0610, samp_prob: 0.8320):  10%|█         | 1/10 [22:57<2:01:42, 811.44s/it]

73

[FINISHED] Epoch 1, (Training Loss (per epoch): 311.5143 samp_prob: 0.8320)


Epoch 1, step 74/96, (Training Loss: 4.1501, samp_prob: 0.8320):  10%|█         | 1/10 [23:04<2:01:42, 811.44s/it]

74

[FINISHED] Epoch 1, (Training Loss (per epoch): 315.6644 samp_prob: 0.8320)


Epoch 1, step 75/96, (Training Loss: 4.1558, samp_prob: 0.8320):  10%|█         | 1/10 [23:12<2:01:42, 811.44s/it]

75

[FINISHED] Epoch 1, (Training Loss (per epoch): 319.8202 samp_prob: 0.8320)


Epoch 1, step 76/96, (Training Loss: 4.1766, samp_prob: 0.8320):  10%|█         | 1/10 [23:20<2:01:42, 811.44s/it]

[[1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 [1 4 7 ... 2 2 2]]

[Train. Prediction] Epoch 1, step 76/96......

id: 196
answer: ['<BOS>', 'the', 'man', 'is', 'walking', 'on', 'the', 'wall', 'and', 'the', 'ceiling']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'a']
76

[FINISHED] Epoch 1, (Training Loss (per epoch): 323.9967 samp_prob: 0.8320)


Epoch 1, step 77/96, (Training Loss: 4.0456, samp_prob: 0.8320):  10%|█         | 1/10 [23:27<2:01:42, 811.44s/it]

77

[FINISHED] Epoch 1, (Training Loss (per epoch): 328.0423 samp_prob: 0.8320)


Epoch 1, step 78/96, (Training Loss: 3.9845, samp_prob: 0.8320):  10%|█         | 1/10 [23:35<2:01:42, 811.44s/it]

78

[FINISHED] Epoch 1, (Training Loss (per epoch): 332.0268 samp_prob: 0.8320)


Epoch 1, step 79/96, (Training Loss: 4.0340, samp_prob: 0.8320):  10%|█         | 1/10 [23:43<2:01:42, 811.44s/it]

79

[FINISHED] Epoch 1, (Training Loss (per epoch): 336.0608 samp_prob: 0.8320)


Epoch 1, step 80/96, (Training Loss: 4.0816, samp_prob: 0.8320):  10%|█         | 1/10 [23:50<2:01:42, 811.44s/it]

80

[FINISHED] Epoch 1, (Training Loss (per epoch): 340.1425 samp_prob: 0.8320)


Epoch 1, step 81/96, (Training Loss: 3.9639, samp_prob: 0.8320):  10%|█         | 1/10 [23:58<2:01:42, 811.44s/it]

81

[FINISHED] Epoch 1, (Training Loss (per epoch): 344.1064 samp_prob: 0.8320)


Epoch 1, step 82/96, (Training Loss: 4.2067, samp_prob: 0.8320):  10%|█         | 1/10 [24:05<2:01:42, 811.44s/it]

82

[FINISHED] Epoch 1, (Training Loss (per epoch): 348.3131 samp_prob: 0.8320)


Epoch 1, step 83/96, (Training Loss: 4.1354, samp_prob: 0.8320):  10%|█         | 1/10 [24:13<2:01:42, 811.44s/it]

83

[FINISHED] Epoch 1, (Training Loss (per epoch): 352.4485 samp_prob: 0.8320)


Epoch 1, step 84/96, (Training Loss: 4.0302, samp_prob: 0.8320):  10%|█         | 1/10 [24:21<2:01:42, 811.44s/it]

84

[FINISHED] Epoch 1, (Training Loss (per epoch): 356.4787 samp_prob: 0.8320)


Epoch 1, step 85/96, (Training Loss: 4.1720, samp_prob: 0.8320):  10%|█         | 1/10 [24:28<2:01:42, 811.44s/it]

85

[FINISHED] Epoch 1, (Training Loss (per epoch): 360.6507 samp_prob: 0.8320)


Epoch 1, step 86/96, (Training Loss: 4.0429, samp_prob: 0.8320):  10%|█         | 1/10 [24:36<2:01:42, 811.44s/it]

86

[FINISHED] Epoch 1, (Training Loss (per epoch): 364.6936 samp_prob: 0.8320)


Epoch 1, step 87/96, (Training Loss: 4.1183, samp_prob: 0.8320):  10%|█         | 1/10 [24:43<2:01:42, 811.44s/it]

87

[FINISHED] Epoch 1, (Training Loss (per epoch): 368.8118 samp_prob: 0.8320)


Epoch 1, step 88/96, (Training Loss: 4.0828, samp_prob: 0.8320):  10%|█         | 1/10 [24:51<2:01:42, 811.44s/it]

88

[FINISHED] Epoch 1, (Training Loss (per epoch): 372.8947 samp_prob: 0.8320)


Epoch 1, step 89/96, (Training Loss: 4.0552, samp_prob: 0.8320):  10%|█         | 1/10 [24:59<2:01:42, 811.44s/it]

89

[FINISHED] Epoch 1, (Training Loss (per epoch): 376.9499 samp_prob: 0.8320)


Epoch 1, step 90/96, (Training Loss: 4.0208, samp_prob: 0.8320):  10%|█         | 1/10 [25:06<2:01:42, 811.44s/it]

90

[FINISHED] Epoch 1, (Training Loss (per epoch): 380.9707 samp_prob: 0.8320)


Epoch 1, step 91/96, (Training Loss: 4.0644, samp_prob: 0.8320):  10%|█         | 1/10 [25:14<2:01:42, 811.44s/it]

[[1 4 7 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 5 ... 2 2 2]
 [1 4 7 ... 2 2 2]]

[Train. Prediction] Epoch 1, step 91/96......

id: 179
answer: ['<BOS>', 'a', 'squirrel', 'is', 'spinning', 'around', 'in', 'circles']
prediction: ['<BOS>', 'a', 'is', 'is']
91

[FINISHED] Epoch 1, (Training Loss (per epoch): 385.0351 samp_prob: 0.8320)


Epoch 1, step 92/96, (Training Loss: 3.9281, samp_prob: 0.8320):  10%|█         | 1/10 [25:22<2:01:42, 811.44s/it]

92

[FINISHED] Epoch 1, (Training Loss (per epoch): 388.9632 samp_prob: 0.8320)


Epoch 1, step 93/96, (Training Loss: 3.9858, samp_prob: 0.8320):  10%|█         | 1/10 [25:30<2:01:42, 811.44s/it]

93

[FINISHED] Epoch 1, (Training Loss (per epoch): 392.9491 samp_prob: 0.8320)


Epoch 1, step 94/96, (Training Loss: 4.0859, samp_prob: 0.8320):  10%|█         | 1/10 [25:37<2:01:42, 811.44s/it]

94

[FINISHED] Epoch 1, (Training Loss (per epoch): 397.0350 samp_prob: 0.8320)


Epoch 1, step 95/96, (Training Loss: 4.0621, samp_prob: 0.8320):  20%|██        | 2/10 [25:45<1:45:05, 788.16s/it]

95

[FINISHED] Epoch 1, (Training Loss (per epoch): 401.0971 samp_prob: 0.8320)


Epoch 2, step 0/96, (Training Loss: 4.1140, samp_prob: 0.7685):  20%|██        | 2/10 [25:52<1:45:05, 788.16s/it] 

0

[FINISHED] Epoch 2, (Training Loss (per epoch): 4.1140 samp_prob: 0.7685)


Epoch 2, step 1/96, (Training Loss: 4.0615, samp_prob: 0.7685):  20%|██        | 2/10 [26:00<1:45:05, 788.16s/it]

[[ 1  4 24 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 2, step 1/96......

id: 172
answer: ['<BOS>', 'spring', 'onion', 'is', 'chopped', 'finely']
prediction: ['<BOS>', 'a', 'man', 'is', 'is', 'a']
1

[FINISHED] Epoch 2, (Training Loss (per epoch): 8.1755 samp_prob: 0.7685)


Epoch 2, step 2/96, (Training Loss: 4.0217, samp_prob: 0.7685):  20%|██        | 2/10 [26:08<1:45:05, 788.16s/it]

2

[FINISHED] Epoch 2, (Training Loss (per epoch): 12.1972 samp_prob: 0.7685)


Epoch 2, step 3/96, (Training Loss: 4.0880, samp_prob: 0.7685):  20%|██        | 2/10 [26:15<1:45:05, 788.16s/it]

3

[FINISHED] Epoch 2, (Training Loss (per epoch): 16.2852 samp_prob: 0.7685)


Epoch 2, step 4/96, (Training Loss: 3.9375, samp_prob: 0.7685):  20%|██        | 2/10 [26:23<1:45:05, 788.16s/it]

4

[FINISHED] Epoch 2, (Training Loss (per epoch): 20.2227 samp_prob: 0.7685)


Epoch 2, step 5/96, (Training Loss: 4.0059, samp_prob: 0.7685):  20%|██        | 2/10 [26:31<1:45:05, 788.16s/it]

5

[FINISHED] Epoch 2, (Training Loss (per epoch): 24.2287 samp_prob: 0.7685)


Epoch 2, step 6/96, (Training Loss: 3.9660, samp_prob: 0.7685):  20%|██        | 2/10 [26:38<1:45:05, 788.16s/it]

6

[FINISHED] Epoch 2, (Training Loss (per epoch): 28.1947 samp_prob: 0.7685)


Epoch 2, step 7/96, (Training Loss: 4.0331, samp_prob: 0.7685):  20%|██        | 2/10 [26:46<1:45:05, 788.16s/it]

7

[FINISHED] Epoch 2, (Training Loss (per epoch): 32.2278 samp_prob: 0.7685)


Epoch 2, step 8/96, (Training Loss: 3.9805, samp_prob: 0.7685):  20%|██        | 2/10 [26:54<1:45:05, 788.16s/it]

8

[FINISHED] Epoch 2, (Training Loss (per epoch): 36.2083 samp_prob: 0.7685)


Epoch 2, step 9/96, (Training Loss: 4.0137, samp_prob: 0.7685):  20%|██        | 2/10 [27:01<1:45:05, 788.16s/it]

9

[FINISHED] Epoch 2, (Training Loss (per epoch): 40.2220 samp_prob: 0.7685)


Epoch 2, step 10/96, (Training Loss: 3.9529, samp_prob: 0.7685):  20%|██        | 2/10 [27:09<1:45:05, 788.16s/it]

10

[FINISHED] Epoch 2, (Training Loss (per epoch): 44.1749 samp_prob: 0.7685)


Epoch 2, step 11/96, (Training Loss: 4.0201, samp_prob: 0.7685):  20%|██        | 2/10 [27:16<1:45:05, 788.16s/it]

11

[FINISHED] Epoch 2, (Training Loss (per epoch): 48.1950 samp_prob: 0.7685)


Epoch 2, step 12/96, (Training Loss: 4.0401, samp_prob: 0.7685):  20%|██        | 2/10 [27:24<1:45:05, 788.16s/it]

12

[FINISHED] Epoch 2, (Training Loss (per epoch): 52.2351 samp_prob: 0.7685)


Epoch 2, step 13/96, (Training Loss: 4.0942, samp_prob: 0.7685):  20%|██        | 2/10 [27:31<1:45:05, 788.16s/it]

13

[FINISHED] Epoch 2, (Training Loss (per epoch): 56.3293 samp_prob: 0.7685)


Epoch 2, step 14/96, (Training Loss: 4.0864, samp_prob: 0.7685):  20%|██        | 2/10 [27:39<1:45:05, 788.16s/it]

14

[FINISHED] Epoch 2, (Training Loss (per epoch): 60.4157 samp_prob: 0.7685)


Epoch 2, step 15/96, (Training Loss: 4.0637, samp_prob: 0.7685):  20%|██        | 2/10 [27:47<1:45:05, 788.16s/it]

15

[FINISHED] Epoch 2, (Training Loss (per epoch): 64.4794 samp_prob: 0.7685)


Epoch 2, step 16/96, (Training Loss: 4.0038, samp_prob: 0.7685):  20%|██        | 2/10 [27:55<1:45:05, 788.16s/it]

[[1 6 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 [1 4 8 ... 2 2 2]]

[Train. Prediction] Epoch 2, step 16/96......

id: 116
answer: ['<BOS>', 'a', 'man', 'is', 'playing', 'a', 'trumpet']
prediction: ['<BOS>', 'a', 'man', 'is', 'is', 'a', 'a']
16

[FINISHED] Epoch 2, (Training Loss (per epoch): 68.4832 samp_prob: 0.7685)


Epoch 2, step 17/96, (Training Loss: 3.9886, samp_prob: 0.7685):  20%|██        | 2/10 [28:02<1:45:05, 788.16s/it]

17

[FINISHED] Epoch 2, (Training Loss (per epoch): 72.4718 samp_prob: 0.7685)


Epoch 2, step 18/96, (Training Loss: 3.8681, samp_prob: 0.7685):  20%|██        | 2/10 [28:10<1:45:05, 788.16s/it]

18

[FINISHED] Epoch 2, (Training Loss (per epoch): 76.3399 samp_prob: 0.7685)


Epoch 2, step 19/96, (Training Loss: 3.7602, samp_prob: 0.7685):  20%|██        | 2/10 [28:18<1:45:05, 788.16s/it]

19

[FINISHED] Epoch 2, (Training Loss (per epoch): 80.1001 samp_prob: 0.7685)


Epoch 2, step 20/96, (Training Loss: 4.0833, samp_prob: 0.7685):  20%|██        | 2/10 [28:25<1:45:05, 788.16s/it]

20

[FINISHED] Epoch 2, (Training Loss (per epoch): 84.1834 samp_prob: 0.7685)


Epoch 2, step 21/96, (Training Loss: 3.9499, samp_prob: 0.7685):  20%|██        | 2/10 [28:33<1:45:05, 788.16s/it]

21

[FINISHED] Epoch 2, (Training Loss (per epoch): 88.1332 samp_prob: 0.7685)


Epoch 2, step 22/96, (Training Loss: 3.9271, samp_prob: 0.7685):  20%|██        | 2/10 [28:41<1:45:05, 788.16s/it]

22

[FINISHED] Epoch 2, (Training Loss (per epoch): 92.0604 samp_prob: 0.7685)


Epoch 2, step 23/96, (Training Loss: 4.0292, samp_prob: 0.7685):  20%|██        | 2/10 [28:48<1:45:05, 788.16s/it]

23

[FINISHED] Epoch 2, (Training Loss (per epoch): 96.0895 samp_prob: 0.7685)


Epoch 2, step 24/96, (Training Loss: 4.1344, samp_prob: 0.7685):  20%|██        | 2/10 [28:56<1:45:05, 788.16s/it]

24

[FINISHED] Epoch 2, (Training Loss (per epoch): 100.2239 samp_prob: 0.7685)


Epoch 2, step 25/96, (Training Loss: 3.9177, samp_prob: 0.7685):  20%|██        | 2/10 [29:03<1:45:05, 788.16s/it]

25

[FINISHED] Epoch 2, (Training Loss (per epoch): 104.1416 samp_prob: 0.7685)


Epoch 2, step 26/96, (Training Loss: 3.9678, samp_prob: 0.7685):  20%|██        | 2/10 [29:11<1:45:05, 788.16s/it]

26

[FINISHED] Epoch 2, (Training Loss (per epoch): 108.1095 samp_prob: 0.7685)


Epoch 2, step 27/96, (Training Loss: 3.9188, samp_prob: 0.7685):  20%|██        | 2/10 [29:19<1:45:05, 788.16s/it]

27

[FINISHED] Epoch 2, (Training Loss (per epoch): 112.0283 samp_prob: 0.7685)


Epoch 2, step 28/96, (Training Loss: 3.8570, samp_prob: 0.7685):  20%|██        | 2/10 [29:26<1:45:05, 788.16s/it]

28

[FINISHED] Epoch 2, (Training Loss (per epoch): 115.8852 samp_prob: 0.7685)


Epoch 2, step 29/96, (Training Loss: 3.9974, samp_prob: 0.7685):  20%|██        | 2/10 [29:34<1:45:05, 788.16s/it]

29

[FINISHED] Epoch 2, (Training Loss (per epoch): 119.8826 samp_prob: 0.7685)


Epoch 2, step 30/96, (Training Loss: 4.0497, samp_prob: 0.7685):  20%|██        | 2/10 [29:41<1:45:05, 788.16s/it]

30

[FINISHED] Epoch 2, (Training Loss (per epoch): 123.9323 samp_prob: 0.7685)


Epoch 2, step 31/96, (Training Loss: 3.9690, samp_prob: 0.7685):  20%|██        | 2/10 [29:49<1:45:05, 788.16s/it]

[[ 1  4 12 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 2, step 31/96......

id: 150
answer: ['<BOS>', 'a', 'person', 'is', 'boiling', 'soup']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'a', 'a']
31

[FINISHED] Epoch 2, (Training Loss (per epoch): 127.9014 samp_prob: 0.7685)


Epoch 2, step 32/96, (Training Loss: 3.9677, samp_prob: 0.7685):  20%|██        | 2/10 [29:57<1:45:05, 788.16s/it]

32

[FINISHED] Epoch 2, (Training Loss (per epoch): 131.8691 samp_prob: 0.7685)


Epoch 2, step 33/96, (Training Loss: 3.9797, samp_prob: 0.7685):  20%|██        | 2/10 [30:05<1:45:05, 788.16s/it]

33

[FINISHED] Epoch 2, (Training Loss (per epoch): 135.8487 samp_prob: 0.7685)


Epoch 2, step 34/96, (Training Loss: 3.9492, samp_prob: 0.7685):  20%|██        | 2/10 [30:12<1:45:05, 788.16s/it]

34

[FINISHED] Epoch 2, (Training Loss (per epoch): 139.7979 samp_prob: 0.7685)


Epoch 2, step 35/96, (Training Loss: 3.9455, samp_prob: 0.7685):  20%|██        | 2/10 [30:20<1:45:05, 788.16s/it]

35

[FINISHED] Epoch 2, (Training Loss (per epoch): 143.7434 samp_prob: 0.7685)


Epoch 2, step 36/96, (Training Loss: 4.0352, samp_prob: 0.7685):  20%|██        | 2/10 [30:28<1:45:05, 788.16s/it]

36

[FINISHED] Epoch 2, (Training Loss (per epoch): 147.7786 samp_prob: 0.7685)


Epoch 2, step 37/96, (Training Loss: 4.0023, samp_prob: 0.7685):  20%|██        | 2/10 [30:35<1:45:05, 788.16s/it]

37

[FINISHED] Epoch 2, (Training Loss (per epoch): 151.7809 samp_prob: 0.7685)


Epoch 2, step 38/96, (Training Loss: 3.9217, samp_prob: 0.7685):  20%|██        | 2/10 [30:43<1:45:05, 788.16s/it]

38

[FINISHED] Epoch 2, (Training Loss (per epoch): 155.7026 samp_prob: 0.7685)


Epoch 2, step 39/96, (Training Loss: 3.9335, samp_prob: 0.7685):  20%|██        | 2/10 [30:50<1:45:05, 788.16s/it]

39

[FINISHED] Epoch 2, (Training Loss (per epoch): 159.6362 samp_prob: 0.7685)


Epoch 2, step 40/96, (Training Loss: 3.8539, samp_prob: 0.7685):  20%|██        | 2/10 [30:58<1:45:05, 788.16s/it]

40

[FINISHED] Epoch 2, (Training Loss (per epoch): 163.4901 samp_prob: 0.7685)


Epoch 2, step 41/96, (Training Loss: 3.9150, samp_prob: 0.7685):  20%|██        | 2/10 [31:06<1:45:05, 788.16s/it]

41

[FINISHED] Epoch 2, (Training Loss (per epoch): 167.4050 samp_prob: 0.7685)


Epoch 2, step 42/96, (Training Loss: 3.9496, samp_prob: 0.7685):  20%|██        | 2/10 [31:13<1:45:05, 788.16s/it]

42

[FINISHED] Epoch 2, (Training Loss (per epoch): 171.3547 samp_prob: 0.7685)


Epoch 2, step 43/96, (Training Loss: 3.9707, samp_prob: 0.7685):  20%|██        | 2/10 [31:21<1:45:05, 788.16s/it]

43

[FINISHED] Epoch 2, (Training Loss (per epoch): 175.3254 samp_prob: 0.7685)


Epoch 2, step 44/96, (Training Loss: 3.9382, samp_prob: 0.7685):  20%|██        | 2/10 [31:29<1:45:05, 788.16s/it]

44

[FINISHED] Epoch 2, (Training Loss (per epoch): 179.2635 samp_prob: 0.7685)


Epoch 2, step 45/96, (Training Loss: 3.9273, samp_prob: 0.7685):  20%|██        | 2/10 [31:36<1:45:05, 788.16s/it]

45

[FINISHED] Epoch 2, (Training Loss (per epoch): 183.1908 samp_prob: 0.7685)


Epoch 2, step 46/96, (Training Loss: 3.9844, samp_prob: 0.7685):  20%|██        | 2/10 [31:44<1:45:05, 788.16s/it]

[[ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  6 27 ...  2  2  2]
 [ 1  6  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 2, step 46/96......

id: 20
answer: ['<BOS>', 'a', 'man', 'is', 'putting', 'cooked', 'chicken', 'wings', 'in', 'a', 'plastic', 'container']
prediction: ['<BOS>', 'a', 'is', 'is', 'a', 'a', 'a']
46

[FINISHED] Epoch 2, (Training Loss (per epoch): 187.1752 samp_prob: 0.7685)


Epoch 2, step 47/96, (Training Loss: 3.8979, samp_prob: 0.7685):  20%|██        | 2/10 [31:52<1:45:05, 788.16s/it]

47

[FINISHED] Epoch 2, (Training Loss (per epoch): 191.0731 samp_prob: 0.7685)


Epoch 2, step 48/96, (Training Loss: 3.9214, samp_prob: 0.7685):  20%|██        | 2/10 [32:00<1:45:05, 788.16s/it]

48

[FINISHED] Epoch 2, (Training Loss (per epoch): 194.9945 samp_prob: 0.7685)


Epoch 2, step 49/96, (Training Loss: 4.0224, samp_prob: 0.7685):  20%|██        | 2/10 [32:07<1:45:05, 788.16s/it]

49

[FINISHED] Epoch 2, (Training Loss (per epoch): 199.0169 samp_prob: 0.7685)


Epoch 2, step 50/96, (Training Loss: 4.0100, samp_prob: 0.7685):  20%|██        | 2/10 [32:15<1:45:05, 788.16s/it]

50

[FINISHED] Epoch 2, (Training Loss (per epoch): 203.0268 samp_prob: 0.7685)


Epoch 2, step 51/96, (Training Loss: 3.8909, samp_prob: 0.7685):  20%|██        | 2/10 [32:22<1:45:05, 788.16s/it]

51

[FINISHED] Epoch 2, (Training Loss (per epoch): 206.9177 samp_prob: 0.7685)


Epoch 2, step 52/96, (Training Loss: 3.8959, samp_prob: 0.7685):  20%|██        | 2/10 [32:30<1:45:05, 788.16s/it]

52

[FINISHED] Epoch 2, (Training Loss (per epoch): 210.8136 samp_prob: 0.7685)


Epoch 2, step 53/96, (Training Loss: 4.0287, samp_prob: 0.7685):  20%|██        | 2/10 [32:38<1:45:05, 788.16s/it]

53

[FINISHED] Epoch 2, (Training Loss (per epoch): 214.8423 samp_prob: 0.7685)


Epoch 2, step 54/96, (Training Loss: 3.8720, samp_prob: 0.7685):  20%|██        | 2/10 [32:45<1:45:05, 788.16s/it]

54

[FINISHED] Epoch 2, (Training Loss (per epoch): 218.7143 samp_prob: 0.7685)


Epoch 2, step 55/96, (Training Loss: 3.9535, samp_prob: 0.7685):  20%|██        | 2/10 [32:53<1:45:05, 788.16s/it]

55

[FINISHED] Epoch 2, (Training Loss (per epoch): 222.6678 samp_prob: 0.7685)


Epoch 2, step 56/96, (Training Loss: 3.8886, samp_prob: 0.7685):  20%|██        | 2/10 [33:01<1:45:05, 788.16s/it]

56

[FINISHED] Epoch 2, (Training Loss (per epoch): 226.5564 samp_prob: 0.7685)


Epoch 2, step 57/96, (Training Loss: 3.8307, samp_prob: 0.7685):  20%|██        | 2/10 [33:08<1:45:05, 788.16s/it]

57

[FINISHED] Epoch 2, (Training Loss (per epoch): 230.3871 samp_prob: 0.7685)


Epoch 2, step 58/96, (Training Loss: 3.8715, samp_prob: 0.7685):  20%|██        | 2/10 [33:16<1:45:05, 788.16s/it]

58

[FINISHED] Epoch 2, (Training Loss (per epoch): 234.2585 samp_prob: 0.7685)


Epoch 2, step 59/96, (Training Loss: 3.9204, samp_prob: 0.7685):  20%|██        | 2/10 [33:24<1:45:05, 788.16s/it]

59

[FINISHED] Epoch 2, (Training Loss (per epoch): 238.1789 samp_prob: 0.7685)


Epoch 2, step 60/96, (Training Loss: 3.9059, samp_prob: 0.7685):  20%|██        | 2/10 [33:31<1:45:05, 788.16s/it]

60

[FINISHED] Epoch 2, (Training Loss (per epoch): 242.0848 samp_prob: 0.7685)


Epoch 2, step 61/96, (Training Loss: 4.0250, samp_prob: 0.7685):  20%|██        | 2/10 [33:39<1:45:05, 788.16s/it]

[[1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 [1 6 8 ... 2 2 2]]

[Train. Prediction] Epoch 2, step 61/96......

id: 93
answer: ['<BOS>', 'a', 'woman', 'is', 'cutting', 'the', 'stems', 'and', 'leaves', 'of', 'an', 'herb', 'into', 'pieces']
prediction: ['<BOS>', 'a', 'woman', 'is', 'slicing']
61

[FINISHED] Epoch 2, (Training Loss (per epoch): 246.1098 samp_prob: 0.7685)


Epoch 2, step 62/96, (Training Loss: 3.9676, samp_prob: 0.7685):  20%|██        | 2/10 [33:47<1:45:05, 788.16s/it]

62

[FINISHED] Epoch 2, (Training Loss (per epoch): 250.0774 samp_prob: 0.7685)


Epoch 2, step 63/96, (Training Loss: 3.9738, samp_prob: 0.7685):  20%|██        | 2/10 [33:55<1:45:05, 788.16s/it]

63

[FINISHED] Epoch 2, (Training Loss (per epoch): 254.0512 samp_prob: 0.7685)


Epoch 2, step 64/96, (Training Loss: 4.0001, samp_prob: 0.7685):  20%|██        | 2/10 [34:02<1:45:05, 788.16s/it]

64

[FINISHED] Epoch 2, (Training Loss (per epoch): 258.0513 samp_prob: 0.7685)


Epoch 2, step 65/96, (Training Loss: 3.7604, samp_prob: 0.7685):  20%|██        | 2/10 [34:10<1:45:05, 788.16s/it]

65

[FINISHED] Epoch 2, (Training Loss (per epoch): 261.8117 samp_prob: 0.7685)


Epoch 2, step 66/96, (Training Loss: 4.0590, samp_prob: 0.7685):  20%|██        | 2/10 [34:18<1:45:05, 788.16s/it]

66

[FINISHED] Epoch 2, (Training Loss (per epoch): 265.8707 samp_prob: 0.7685)


Epoch 2, step 67/96, (Training Loss: 3.9788, samp_prob: 0.7685):  20%|██        | 2/10 [34:25<1:45:05, 788.16s/it]

67

[FINISHED] Epoch 2, (Training Loss (per epoch): 269.8495 samp_prob: 0.7685)


Epoch 2, step 68/96, (Training Loss: 3.8878, samp_prob: 0.7685):  20%|██        | 2/10 [34:33<1:45:05, 788.16s/it]

68

[FINISHED] Epoch 2, (Training Loss (per epoch): 273.7373 samp_prob: 0.7685)


Epoch 2, step 69/96, (Training Loss: 3.8023, samp_prob: 0.7685):  20%|██        | 2/10 [34:40<1:45:05, 788.16s/it]

69

[FINISHED] Epoch 2, (Training Loss (per epoch): 277.5395 samp_prob: 0.7685)


Epoch 2, step 70/96, (Training Loss: 4.0039, samp_prob: 0.7685):  20%|██        | 2/10 [34:48<1:45:05, 788.16s/it]

70

[FINISHED] Epoch 2, (Training Loss (per epoch): 281.5435 samp_prob: 0.7685)


Epoch 2, step 71/96, (Training Loss: 3.8483, samp_prob: 0.7685):  20%|██        | 2/10 [34:56<1:45:05, 788.16s/it]

71

[FINISHED] Epoch 2, (Training Loss (per epoch): 285.3918 samp_prob: 0.7685)


Epoch 2, step 72/96, (Training Loss: 3.9937, samp_prob: 0.7685):  20%|██        | 2/10 [35:03<1:45:05, 788.16s/it]

72

[FINISHED] Epoch 2, (Training Loss (per epoch): 289.3854 samp_prob: 0.7685)


Epoch 2, step 73/96, (Training Loss: 3.8701, samp_prob: 0.7685):  20%|██        | 2/10 [35:11<1:45:05, 788.16s/it]

73

[FINISHED] Epoch 2, (Training Loss (per epoch): 293.2555 samp_prob: 0.7685)


Epoch 2, step 74/96, (Training Loss: 3.9408, samp_prob: 0.7685):  20%|██        | 2/10 [35:19<1:45:05, 788.16s/it]

74

[FINISHED] Epoch 2, (Training Loss (per epoch): 297.1963 samp_prob: 0.7685)


Epoch 2, step 75/96, (Training Loss: 3.9399, samp_prob: 0.7685):  20%|██        | 2/10 [35:26<1:45:05, 788.16s/it]

75

[FINISHED] Epoch 2, (Training Loss (per epoch): 301.1363 samp_prob: 0.7685)


Epoch 2, step 76/96, (Training Loss: 3.9872, samp_prob: 0.7685):  20%|██        | 2/10 [35:35<1:45:05, 788.16s/it]

[[1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 ...
 [1 4 8 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 [1 4 7 ... 2 2 2]]

[Train. Prediction] Epoch 2, step 76/96......

id: 199
answer: ['<BOS>', 'a', 'man', 'is', 'gazing', 'intently', 'at', 'his', 'palm', 'outstretched', 'against', 'a', 'bright', 'yellow', 'and', 'brown', 'colored', 'triangular', 'patch']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'a', 'a']
76

[FINISHED] Epoch 2, (Training Loss (per epoch): 305.1235 samp_prob: 0.7685)


Epoch 2, step 77/96, (Training Loss: 3.8630, samp_prob: 0.7685):  20%|██        | 2/10 [35:42<1:45:05, 788.16s/it]

77

[FINISHED] Epoch 2, (Training Loss (per epoch): 308.9865 samp_prob: 0.7685)


Epoch 2, step 78/96, (Training Loss: 3.7773, samp_prob: 0.7685):  20%|██        | 2/10 [35:50<1:45:05, 788.16s/it]

78

[FINISHED] Epoch 2, (Training Loss (per epoch): 312.7638 samp_prob: 0.7685)


Epoch 2, step 79/96, (Training Loss: 3.8067, samp_prob: 0.7685):  20%|██        | 2/10 [35:57<1:45:05, 788.16s/it]

79

[FINISHED] Epoch 2, (Training Loss (per epoch): 316.5705 samp_prob: 0.7685)


Epoch 2, step 80/96, (Training Loss: 3.9112, samp_prob: 0.7685):  20%|██        | 2/10 [36:05<1:45:05, 788.16s/it]

80

[FINISHED] Epoch 2, (Training Loss (per epoch): 320.4817 samp_prob: 0.7685)


Epoch 2, step 81/96, (Training Loss: 3.7842, samp_prob: 0.7685):  20%|██        | 2/10 [36:13<1:45:05, 788.16s/it]

81

[FINISHED] Epoch 2, (Training Loss (per epoch): 324.2659 samp_prob: 0.7685)


Epoch 2, step 82/96, (Training Loss: 4.0083, samp_prob: 0.7685):  20%|██        | 2/10 [36:20<1:45:05, 788.16s/it]

82

[FINISHED] Epoch 2, (Training Loss (per epoch): 328.2742 samp_prob: 0.7685)


Epoch 2, step 83/96, (Training Loss: 3.9361, samp_prob: 0.7685):  20%|██        | 2/10 [36:28<1:45:05, 788.16s/it]

83

[FINISHED] Epoch 2, (Training Loss (per epoch): 332.2103 samp_prob: 0.7685)


Epoch 2, step 84/96, (Training Loss: 3.8374, samp_prob: 0.7685):  20%|██        | 2/10 [36:36<1:45:05, 788.16s/it]

84

[FINISHED] Epoch 2, (Training Loss (per epoch): 336.0477 samp_prob: 0.7685)


Epoch 2, step 85/96, (Training Loss: 3.9827, samp_prob: 0.7685):  20%|██        | 2/10 [36:43<1:45:05, 788.16s/it]

85

[FINISHED] Epoch 2, (Training Loss (per epoch): 340.0305 samp_prob: 0.7685)


Epoch 2, step 86/96, (Training Loss: 3.8454, samp_prob: 0.7685):  20%|██        | 2/10 [36:51<1:45:05, 788.16s/it]

86

[FINISHED] Epoch 2, (Training Loss (per epoch): 343.8758 samp_prob: 0.7685)


Epoch 2, step 87/96, (Training Loss: 3.9245, samp_prob: 0.7685):  20%|██        | 2/10 [36:59<1:45:05, 788.16s/it]

87

[FINISHED] Epoch 2, (Training Loss (per epoch): 347.8003 samp_prob: 0.7685)


Epoch 2, step 88/96, (Training Loss: 3.8990, samp_prob: 0.7685):  20%|██        | 2/10 [37:06<1:45:05, 788.16s/it]

88

[FINISHED] Epoch 2, (Training Loss (per epoch): 351.6993 samp_prob: 0.7685)


Epoch 2, step 89/96, (Training Loss: 3.8576, samp_prob: 0.7685):  20%|██        | 2/10 [37:14<1:45:05, 788.16s/it]

89

[FINISHED] Epoch 2, (Training Loss (per epoch): 355.5570 samp_prob: 0.7685)


Epoch 2, step 90/96, (Training Loss: 3.8310, samp_prob: 0.7685):  20%|██        | 2/10 [37:21<1:45:05, 788.16s/it]

90

[FINISHED] Epoch 2, (Training Loss (per epoch): 359.3880 samp_prob: 0.7685)


Epoch 2, step 91/96, (Training Loss: 3.8862, samp_prob: 0.7685):  20%|██        | 2/10 [37:30<1:45:05, 788.16s/it]

[[1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]]

[Train. Prediction] Epoch 2, step 91/96......

id: 225
answer: ['<BOS>', 'a', 'boy', 'runs', 'toward', 'a', 'tornado']
prediction: ['<BOS>', 'the', 'dog', 'is', 'a', 'a', 'a', 'a']
91

[FINISHED] Epoch 2, (Training Loss (per epoch): 363.2741 samp_prob: 0.7685)


Epoch 2, step 92/96, (Training Loss: 3.7297, samp_prob: 0.7685):  20%|██        | 2/10 [37:37<1:45:05, 788.16s/it]

92

[FINISHED] Epoch 2, (Training Loss (per epoch): 367.0038 samp_prob: 0.7685)


Epoch 2, step 93/96, (Training Loss: 3.8111, samp_prob: 0.7685):  20%|██        | 2/10 [37:45<1:45:05, 788.16s/it]

93

[FINISHED] Epoch 2, (Training Loss (per epoch): 370.8149 samp_prob: 0.7685)


Epoch 2, step 94/96, (Training Loss: 3.9285, samp_prob: 0.7685):  20%|██        | 2/10 [37:53<1:45:05, 788.16s/it]

94

[FINISHED] Epoch 2, (Training Loss (per epoch): 374.7434 samp_prob: 0.7685)


Epoch 2, step 95/96, (Training Loss: 3.8763, samp_prob: 0.7685):  30%|███       | 3/10 [38:00<1:30:06, 772.33s/it]

95

[FINISHED] Epoch 2, (Training Loss (per epoch): 378.6197 samp_prob: 0.7685)


Epoch 3, step 0/96, (Training Loss: 3.9421, samp_prob: 0.6900):  30%|███       | 3/10 [38:08<1:30:06, 772.33s/it] 

0

[FINISHED] Epoch 3, (Training Loss (per epoch): 3.9421 samp_prob: 0.6900)


Epoch 3, step 1/96, (Training Loss: 3.8998, samp_prob: 0.6900):  30%|███       | 3/10 [38:16<1:30:06, 772.33s/it]

[[ 1  4  8 ...  2  2  2]
 [ 1  4  5 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  6 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 3, step 1/96......

id: 212
answer: ['<BOS>', 'a', 'cat', 'is', 'looking', 'into', 'the', 'camera', 'and', 'opening', 'its', 'mouth']
prediction: ['<BOS>', 'a', 'cat', 'is', 'a', 'the']
1

[FINISHED] Epoch 3, (Training Loss (per epoch): 7.8418 samp_prob: 0.6900)


Epoch 3, step 2/96, (Training Loss: 3.8607, samp_prob: 0.6900):  30%|███       | 3/10 [38:24<1:30:06, 772.33s/it]

2

[FINISHED] Epoch 3, (Training Loss (per epoch): 11.7025 samp_prob: 0.6900)


Epoch 3, step 3/96, (Training Loss: 3.8886, samp_prob: 0.6900):  30%|███       | 3/10 [38:31<1:30:06, 772.33s/it]

3

[FINISHED] Epoch 3, (Training Loss (per epoch): 15.5912 samp_prob: 0.6900)


Epoch 3, step 4/96, (Training Loss: 3.7593, samp_prob: 0.6900):  30%|███       | 3/10 [38:39<1:30:06, 772.33s/it]

4

[FINISHED] Epoch 3, (Training Loss (per epoch): 19.3504 samp_prob: 0.6900)


Epoch 3, step 5/96, (Training Loss: 3.8133, samp_prob: 0.6900):  30%|███       | 3/10 [38:47<1:30:06, 772.33s/it]

5

[FINISHED] Epoch 3, (Training Loss (per epoch): 23.1637 samp_prob: 0.6900)


Epoch 3, step 6/96, (Training Loss: 3.7986, samp_prob: 0.6900):  30%|███       | 3/10 [38:54<1:30:06, 772.33s/it]

6

[FINISHED] Epoch 3, (Training Loss (per epoch): 26.9623 samp_prob: 0.6900)


Epoch 3, step 7/96, (Training Loss: 3.8552, samp_prob: 0.6900):  30%|███       | 3/10 [39:02<1:30:06, 772.33s/it]

7

[FINISHED] Epoch 3, (Training Loss (per epoch): 30.8176 samp_prob: 0.6900)


Epoch 3, step 8/96, (Training Loss: 3.8247, samp_prob: 0.6900):  30%|███       | 3/10 [39:10<1:30:06, 772.33s/it]

8

[FINISHED] Epoch 3, (Training Loss (per epoch): 34.6423 samp_prob: 0.6900)


Epoch 3, step 9/96, (Training Loss: 3.8541, samp_prob: 0.6900):  30%|███       | 3/10 [39:17<1:30:06, 772.33s/it]

9

[FINISHED] Epoch 3, (Training Loss (per epoch): 38.4963 samp_prob: 0.6900)


Epoch 3, step 10/96, (Training Loss: 3.7788, samp_prob: 0.6900):  30%|███       | 3/10 [39:25<1:30:06, 772.33s/it]

10

[FINISHED] Epoch 3, (Training Loss (per epoch): 42.2751 samp_prob: 0.6900)


Epoch 3, step 11/96, (Training Loss: 3.8325, samp_prob: 0.6900):  30%|███       | 3/10 [39:33<1:30:06, 772.33s/it]

11

[FINISHED] Epoch 3, (Training Loss (per epoch): 46.1076 samp_prob: 0.6900)


Epoch 3, step 12/96, (Training Loss: 3.8654, samp_prob: 0.6900):  30%|███       | 3/10 [39:40<1:30:06, 772.33s/it]

12

[FINISHED] Epoch 3, (Training Loss (per epoch): 49.9730 samp_prob: 0.6900)


Epoch 3, step 13/96, (Training Loss: 3.9356, samp_prob: 0.6900):  30%|███       | 3/10 [39:48<1:30:06, 772.33s/it]

13

[FINISHED] Epoch 3, (Training Loss (per epoch): 53.9086 samp_prob: 0.6900)


Epoch 3, step 14/96, (Training Loss: 3.9001, samp_prob: 0.6900):  30%|███       | 3/10 [39:55<1:30:06, 772.33s/it]

14

[FINISHED] Epoch 3, (Training Loss (per epoch): 57.8087 samp_prob: 0.6900)


Epoch 3, step 15/96, (Training Loss: 3.8657, samp_prob: 0.6900):  30%|███       | 3/10 [40:03<1:30:06, 772.33s/it]

15

[FINISHED] Epoch 3, (Training Loss (per epoch): 61.6744 samp_prob: 0.6900)


Epoch 3, step 16/96, (Training Loss: 3.8386, samp_prob: 0.6900):  30%|███       | 3/10 [40:11<1:30:06, 772.33s/it]

[[ 1  4 23 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4 12 ...  2  2  2]
 ...
 [ 1  4  5 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 3, step 16/96......

id: 171
answer: ['<BOS>', 'the', 'dog', 'is', 'running']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'a', 'a']
16

[FINISHED] Epoch 3, (Training Loss (per epoch): 65.5130 samp_prob: 0.6900)


Epoch 3, step 17/96, (Training Loss: 3.8222, samp_prob: 0.6900):  30%|███       | 3/10 [40:19<1:30:06, 772.33s/it]

17

[FINISHED] Epoch 3, (Training Loss (per epoch): 69.3352 samp_prob: 0.6900)


Epoch 3, step 18/96, (Training Loss: 3.6689, samp_prob: 0.6900):  30%|███       | 3/10 [40:27<1:30:06, 772.33s/it]

18

[FINISHED] Epoch 3, (Training Loss (per epoch): 73.0041 samp_prob: 0.6900)


Epoch 3, step 19/96, (Training Loss: 3.5782, samp_prob: 0.6900):  30%|███       | 3/10 [40:34<1:30:06, 772.33s/it]

19

[FINISHED] Epoch 3, (Training Loss (per epoch): 76.5823 samp_prob: 0.6900)


Epoch 3, step 20/96, (Training Loss: 3.8932, samp_prob: 0.6900):  30%|███       | 3/10 [40:42<1:30:06, 772.33s/it]

20

[FINISHED] Epoch 3, (Training Loss (per epoch): 80.4755 samp_prob: 0.6900)


Epoch 3, step 21/96, (Training Loss: 3.7490, samp_prob: 0.6900):  30%|███       | 3/10 [40:50<1:30:06, 772.33s/it]

21

[FINISHED] Epoch 3, (Training Loss (per epoch): 84.2245 samp_prob: 0.6900)


Epoch 3, step 22/96, (Training Loss: 3.7458, samp_prob: 0.6900):  30%|███       | 3/10 [40:57<1:30:06, 772.33s/it]

22

[FINISHED] Epoch 3, (Training Loss (per epoch): 87.9703 samp_prob: 0.6900)


Epoch 3, step 23/96, (Training Loss: 3.8382, samp_prob: 0.6900):  30%|███       | 3/10 [41:05<1:30:06, 772.33s/it]

23

[FINISHED] Epoch 3, (Training Loss (per epoch): 91.8085 samp_prob: 0.6900)


Epoch 3, step 24/96, (Training Loss: 4.0159, samp_prob: 0.6900):  30%|███       | 3/10 [41:12<1:30:06, 772.33s/it]

24

[FINISHED] Epoch 3, (Training Loss (per epoch): 95.8244 samp_prob: 0.6900)


Epoch 3, step 25/96, (Training Loss: 3.7412, samp_prob: 0.6900):  30%|███       | 3/10 [41:20<1:30:06, 772.33s/it]

25

[FINISHED] Epoch 3, (Training Loss (per epoch): 99.5656 samp_prob: 0.6900)


Epoch 3, step 26/96, (Training Loss: 3.7995, samp_prob: 0.6900):  30%|███       | 3/10 [41:28<1:30:06, 772.33s/it]

26

[FINISHED] Epoch 3, (Training Loss (per epoch): 103.3651 samp_prob: 0.6900)


Epoch 3, step 27/96, (Training Loss: 3.7355, samp_prob: 0.6900):  30%|███       | 3/10 [41:35<1:30:06, 772.33s/it]

27

[FINISHED] Epoch 3, (Training Loss (per epoch): 107.1006 samp_prob: 0.6900)


Epoch 3, step 28/96, (Training Loss: 3.7003, samp_prob: 0.6900):  30%|███       | 3/10 [41:43<1:30:06, 772.33s/it]

28

[FINISHED] Epoch 3, (Training Loss (per epoch): 110.8010 samp_prob: 0.6900)


Epoch 3, step 29/96, (Training Loss: 3.8035, samp_prob: 0.6900):  30%|███       | 3/10 [41:51<1:30:06, 772.33s/it]

29

[FINISHED] Epoch 3, (Training Loss (per epoch): 114.6045 samp_prob: 0.6900)


Epoch 3, step 30/96, (Training Loss: 3.8807, samp_prob: 0.6900):  30%|███       | 3/10 [41:58<1:30:06, 772.33s/it]

30

[FINISHED] Epoch 3, (Training Loss (per epoch): 118.4852 samp_prob: 0.6900)


Epoch 3, step 31/96, (Training Loss: 3.8016, samp_prob: 0.6900):  30%|███       | 3/10 [42:06<1:30:06, 772.33s/it]

[[ 1 20 12 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 3, step 31/96......

id: 247
answer: ['<BOS>', 'a', 'man', 'is', 'cutting', 'onions']
prediction: ['<BOS>', 'a', 'man', 'is', 'slicing', 'a']
31

[FINISHED] Epoch 3, (Training Loss (per epoch): 122.2868 samp_prob: 0.6900)


Epoch 3, step 32/96, (Training Loss: 3.7756, samp_prob: 0.6900):  30%|███       | 3/10 [42:14<1:30:06, 772.33s/it]

32

[FINISHED] Epoch 3, (Training Loss (per epoch): 126.0625 samp_prob: 0.6900)


Epoch 3, step 33/96, (Training Loss: 3.8154, samp_prob: 0.6900):  30%|███       | 3/10 [42:22<1:30:06, 772.33s/it]

33

[FINISHED] Epoch 3, (Training Loss (per epoch): 129.8778 samp_prob: 0.6900)


Epoch 3, step 34/96, (Training Loss: 3.8109, samp_prob: 0.6900):  30%|███       | 3/10 [42:29<1:30:06, 772.33s/it]

34

[FINISHED] Epoch 3, (Training Loss (per epoch): 133.6888 samp_prob: 0.6900)


Epoch 3, step 35/96, (Training Loss: 3.7614, samp_prob: 0.6900):  30%|███       | 3/10 [42:37<1:30:06, 772.33s/it]

35

[FINISHED] Epoch 3, (Training Loss (per epoch): 137.4502 samp_prob: 0.6900)


Epoch 3, step 36/96, (Training Loss: 3.8381, samp_prob: 0.6900):  30%|███       | 3/10 [42:45<1:30:06, 772.33s/it]

36

[FINISHED] Epoch 3, (Training Loss (per epoch): 141.2883 samp_prob: 0.6900)


Epoch 3, step 37/96, (Training Loss: 3.8296, samp_prob: 0.6900):  30%|███       | 3/10 [42:52<1:30:06, 772.33s/it]

37

[FINISHED] Epoch 3, (Training Loss (per epoch): 145.1180 samp_prob: 0.6900)


Epoch 3, step 38/96, (Training Loss: 3.7815, samp_prob: 0.6900):  30%|███       | 3/10 [43:00<1:30:06, 772.33s/it]

38

[FINISHED] Epoch 3, (Training Loss (per epoch): 148.8995 samp_prob: 0.6900)


Epoch 3, step 39/96, (Training Loss: 3.7543, samp_prob: 0.6900):  30%|███       | 3/10 [43:08<1:30:06, 772.33s/it]

39

[FINISHED] Epoch 3, (Training Loss (per epoch): 152.6538 samp_prob: 0.6900)


Epoch 3, step 40/96, (Training Loss: 3.6563, samp_prob: 0.6900):  30%|███       | 3/10 [43:15<1:30:06, 772.33s/it]

40

[FINISHED] Epoch 3, (Training Loss (per epoch): 156.3101 samp_prob: 0.6900)


Epoch 3, step 41/96, (Training Loss: 3.7057, samp_prob: 0.6900):  30%|███       | 3/10 [43:23<1:30:06, 772.33s/it]

41

[FINISHED] Epoch 3, (Training Loss (per epoch): 160.0157 samp_prob: 0.6900)


Epoch 3, step 42/96, (Training Loss: 3.7864, samp_prob: 0.6900):  30%|███       | 3/10 [43:31<1:30:06, 772.33s/it]

42

[FINISHED] Epoch 3, (Training Loss (per epoch): 163.8021 samp_prob: 0.6900)


Epoch 3, step 43/96, (Training Loss: 3.7974, samp_prob: 0.6900):  30%|███       | 3/10 [43:38<1:30:06, 772.33s/it]

43

[FINISHED] Epoch 3, (Training Loss (per epoch): 167.5995 samp_prob: 0.6900)


Epoch 3, step 44/96, (Training Loss: 3.7402, samp_prob: 0.6900):  30%|███       | 3/10 [43:46<1:30:06, 772.33s/it]

44

[FINISHED] Epoch 3, (Training Loss (per epoch): 171.3397 samp_prob: 0.6900)


Epoch 3, step 45/96, (Training Loss: 3.7555, samp_prob: 0.6900):  30%|███       | 3/10 [43:54<1:30:06, 772.33s/it]

45

[FINISHED] Epoch 3, (Training Loss (per epoch): 175.0952 samp_prob: 0.6900)


Epoch 3, step 46/96, (Training Loss: 3.8027, samp_prob: 0.6900):  30%|███       | 3/10 [44:02<1:30:06, 772.33s/it]

[[ 1  4  8 ...  2  2  2]
 [ 1  4 51 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 12 ...  2  2  2]
 [ 1  6 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 3, step 46/96......

id: 211
answer: ['<BOS>', 'the', 'lady', 'put', 'the', 'bean', 'pods', 'in', 'the', 'boiling', 'water']
prediction: ['<BOS>', 'a', 'woman', 'is', 'a', 'a']
46

[FINISHED] Epoch 3, (Training Loss (per epoch): 178.8979 samp_prob: 0.6900)


Epoch 3, step 47/96, (Training Loss: 3.7333, samp_prob: 0.6900):  30%|███       | 3/10 [44:09<1:30:06, 772.33s/it]

47

[FINISHED] Epoch 3, (Training Loss (per epoch): 182.6311 samp_prob: 0.6900)


Epoch 3, step 48/96, (Training Loss: 3.7328, samp_prob: 0.6900):  30%|███       | 3/10 [44:17<1:30:06, 772.33s/it]

48

[FINISHED] Epoch 3, (Training Loss (per epoch): 186.3639 samp_prob: 0.6900)


Epoch 3, step 49/96, (Training Loss: 3.8259, samp_prob: 0.6900):  30%|███       | 3/10 [44:24<1:30:06, 772.33s/it]

49

[FINISHED] Epoch 3, (Training Loss (per epoch): 190.1898 samp_prob: 0.6900)


Epoch 3, step 50/96, (Training Loss: 3.8641, samp_prob: 0.6900):  30%|███       | 3/10 [44:32<1:30:06, 772.33s/it]

50

[FINISHED] Epoch 3, (Training Loss (per epoch): 194.0540 samp_prob: 0.6900)


Epoch 3, step 51/96, (Training Loss: 3.6998, samp_prob: 0.6900):  30%|███       | 3/10 [44:40<1:30:06, 772.33s/it]

51

[FINISHED] Epoch 3, (Training Loss (per epoch): 197.7538 samp_prob: 0.6900)


Epoch 3, step 52/96, (Training Loss: 3.7429, samp_prob: 0.6900):  30%|███       | 3/10 [44:47<1:30:06, 772.33s/it]

52

[FINISHED] Epoch 3, (Training Loss (per epoch): 201.4967 samp_prob: 0.6900)


Epoch 3, step 53/96, (Training Loss: 3.8316, samp_prob: 0.6900):  30%|███       | 3/10 [44:55<1:30:06, 772.33s/it]

53

[FINISHED] Epoch 3, (Training Loss (per epoch): 205.3284 samp_prob: 0.6900)


Epoch 3, step 54/96, (Training Loss: 3.7300, samp_prob: 0.6900):  30%|███       | 3/10 [45:03<1:30:06, 772.33s/it]

54

[FINISHED] Epoch 3, (Training Loss (per epoch): 209.0584 samp_prob: 0.6900)


Epoch 3, step 55/96, (Training Loss: 3.7679, samp_prob: 0.6900):  30%|███       | 3/10 [45:10<1:30:06, 772.33s/it]

55

[FINISHED] Epoch 3, (Training Loss (per epoch): 212.8263 samp_prob: 0.6900)


Epoch 3, step 56/96, (Training Loss: 3.7082, samp_prob: 0.6900):  30%|███       | 3/10 [45:18<1:30:06, 772.33s/it]

56

[FINISHED] Epoch 3, (Training Loss (per epoch): 216.5345 samp_prob: 0.6900)


Epoch 3, step 57/96, (Training Loss: 3.6641, samp_prob: 0.6900):  30%|███       | 3/10 [45:25<1:30:06, 772.33s/it]

57

[FINISHED] Epoch 3, (Training Loss (per epoch): 220.1985 samp_prob: 0.6900)


Epoch 3, step 58/96, (Training Loss: 3.6837, samp_prob: 0.6900):  30%|███       | 3/10 [45:33<1:30:06, 772.33s/it]

58

[FINISHED] Epoch 3, (Training Loss (per epoch): 223.8822 samp_prob: 0.6900)


Epoch 3, step 59/96, (Training Loss: 3.7544, samp_prob: 0.6900):  30%|███       | 3/10 [45:41<1:30:06, 772.33s/it]

59

[FINISHED] Epoch 3, (Training Loss (per epoch): 227.6366 samp_prob: 0.6900)


Epoch 3, step 60/96, (Training Loss: 3.7379, samp_prob: 0.6900):  30%|███       | 3/10 [45:48<1:30:06, 772.33s/it]

60

[FINISHED] Epoch 3, (Training Loss (per epoch): 231.3744 samp_prob: 0.6900)


Epoch 3, step 61/96, (Training Loss: 3.8344, samp_prob: 0.6900):  30%|███       | 3/10 [45:56<1:30:06, 772.33s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 28 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4 23 ...  2  2  2]]

[Train. Prediction] Epoch 3, step 61/96......

id: 127
answer: ['<BOS>', 'a', 'man', 'slid', 'down', 'the', 'escalator', 'railing']
prediction: ['<BOS>', 'a', 'man', 'is', 'on', 'a']
61

[FINISHED] Epoch 3, (Training Loss (per epoch): 235.2089 samp_prob: 0.6900)


Epoch 3, step 62/96, (Training Loss: 3.7709, samp_prob: 0.6900):  30%|███       | 3/10 [46:04<1:30:06, 772.33s/it]

62

[FINISHED] Epoch 3, (Training Loss (per epoch): 238.9798 samp_prob: 0.6900)


Epoch 3, step 63/96, (Training Loss: 3.7943, samp_prob: 0.6900):  30%|███       | 3/10 [46:12<1:30:06, 772.33s/it]

63

[FINISHED] Epoch 3, (Training Loss (per epoch): 242.7740 samp_prob: 0.6900)


Epoch 3, step 64/96, (Training Loss: 3.8354, samp_prob: 0.6900):  30%|███       | 3/10 [46:19<1:30:06, 772.33s/it]

64

[FINISHED] Epoch 3, (Training Loss (per epoch): 246.6094 samp_prob: 0.6900)


Epoch 3, step 65/96, (Training Loss: 3.5562, samp_prob: 0.6900):  30%|███       | 3/10 [46:27<1:30:06, 772.33s/it]

65

[FINISHED] Epoch 3, (Training Loss (per epoch): 250.1656 samp_prob: 0.6900)


Epoch 3, step 66/96, (Training Loss: 3.8497, samp_prob: 0.6900):  30%|███       | 3/10 [46:34<1:30:06, 772.33s/it]

66

[FINISHED] Epoch 3, (Training Loss (per epoch): 254.0153 samp_prob: 0.6900)


Epoch 3, step 67/96, (Training Loss: 3.8203, samp_prob: 0.6900):  30%|███       | 3/10 [46:42<1:30:06, 772.33s/it]

67

[FINISHED] Epoch 3, (Training Loss (per epoch): 257.8357 samp_prob: 0.6900)


Epoch 3, step 68/96, (Training Loss: 3.7402, samp_prob: 0.6900):  30%|███       | 3/10 [46:50<1:30:06, 772.33s/it]

68

[FINISHED] Epoch 3, (Training Loss (per epoch): 261.5759 samp_prob: 0.6900)


Epoch 3, step 69/96, (Training Loss: 3.6339, samp_prob: 0.6900):  30%|███       | 3/10 [46:57<1:30:06, 772.33s/it]

69

[FINISHED] Epoch 3, (Training Loss (per epoch): 265.2098 samp_prob: 0.6900)


Epoch 3, step 70/96, (Training Loss: 3.8090, samp_prob: 0.6900):  30%|███       | 3/10 [47:05<1:30:06, 772.33s/it]

70

[FINISHED] Epoch 3, (Training Loss (per epoch): 269.0188 samp_prob: 0.6900)


Epoch 3, step 71/96, (Training Loss: 3.7051, samp_prob: 0.6900):  30%|███       | 3/10 [47:12<1:30:06, 772.33s/it]

71

[FINISHED] Epoch 3, (Training Loss (per epoch): 272.7239 samp_prob: 0.6900)


Epoch 3, step 72/96, (Training Loss: 3.8429, samp_prob: 0.6900):  30%|███       | 3/10 [47:20<1:30:06, 772.33s/it]

72

[FINISHED] Epoch 3, (Training Loss (per epoch): 276.5668 samp_prob: 0.6900)


Epoch 3, step 73/96, (Training Loss: 3.7052, samp_prob: 0.6900):  30%|███       | 3/10 [47:28<1:30:06, 772.33s/it]

73

[FINISHED] Epoch 3, (Training Loss (per epoch): 280.2720 samp_prob: 0.6900)


Epoch 3, step 74/96, (Training Loss: 3.7554, samp_prob: 0.6900):  30%|███       | 3/10 [47:35<1:30:06, 772.33s/it]

74

[FINISHED] Epoch 3, (Training Loss (per epoch): 284.0275 samp_prob: 0.6900)


Epoch 3, step 75/96, (Training Loss: 3.7772, samp_prob: 0.6900):  30%|███       | 3/10 [47:43<1:30:06, 772.33s/it]

75

[FINISHED] Epoch 3, (Training Loss (per epoch): 287.8047 samp_prob: 0.6900)


Epoch 3, step 76/96, (Training Loss: 3.8096, samp_prob: 0.6900):  30%|███       | 3/10 [47:51<1:30:06, 772.33s/it]

[[1 4 7 ... 2 2 2]
 [1 4 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 ...
 [1 4 7 ... 2 2 2]
 [1 4 8 ... 2 2 2]
 [1 4 7 ... 2 2 2]]

[Train. Prediction] Epoch 3, step 76/96......

id: 247
answer: ['<BOS>', 'a', 'person', 'is', 'holding', 'a', 'hamburger', 'bun']
prediction: ['<BOS>', 'a', 'man', 'is', 'slicing']
76

[FINISHED] Epoch 3, (Training Loss (per epoch): 291.6143 samp_prob: 0.6900)


Epoch 3, step 77/96, (Training Loss: 3.7010, samp_prob: 0.6900):  30%|███       | 3/10 [47:59<1:30:06, 772.33s/it]

77

[FINISHED] Epoch 3, (Training Loss (per epoch): 295.3153 samp_prob: 0.6900)


Epoch 3, step 78/96, (Training Loss: 3.6237, samp_prob: 0.6900):  30%|███       | 3/10 [48:06<1:30:06, 772.33s/it]

78

[FINISHED] Epoch 3, (Training Loss (per epoch): 298.9390 samp_prob: 0.6900)


Epoch 3, step 79/96, (Training Loss: 3.6320, samp_prob: 0.6900):  30%|███       | 3/10 [48:14<1:30:06, 772.33s/it]

79

[FINISHED] Epoch 3, (Training Loss (per epoch): 302.5710 samp_prob: 0.6900)


Epoch 3, step 80/96, (Training Loss: 3.7221, samp_prob: 0.6900):  30%|███       | 3/10 [48:21<1:30:06, 772.33s/it]

80

[FINISHED] Epoch 3, (Training Loss (per epoch): 306.2931 samp_prob: 0.6900)


Epoch 3, step 81/96, (Training Loss: 3.5853, samp_prob: 0.6900):  30%|███       | 3/10 [48:29<1:30:06, 772.33s/it]

81

[FINISHED] Epoch 3, (Training Loss (per epoch): 309.8784 samp_prob: 0.6900)


Epoch 3, step 82/96, (Training Loss: 3.8617, samp_prob: 0.6900):  30%|███       | 3/10 [48:37<1:30:06, 772.33s/it]

82

[FINISHED] Epoch 3, (Training Loss (per epoch): 313.7401 samp_prob: 0.6900)


Epoch 3, step 83/96, (Training Loss: 3.7729, samp_prob: 0.6900):  30%|███       | 3/10 [48:44<1:30:06, 772.33s/it]

83

[FINISHED] Epoch 3, (Training Loss (per epoch): 317.5130 samp_prob: 0.6900)


Epoch 3, step 84/96, (Training Loss: 3.6768, samp_prob: 0.6900):  30%|███       | 3/10 [48:52<1:30:06, 772.33s/it]

84

[FINISHED] Epoch 3, (Training Loss (per epoch): 321.1898 samp_prob: 0.6900)


Epoch 3, step 85/96, (Training Loss: 3.8107, samp_prob: 0.6900):  30%|███       | 3/10 [49:00<1:30:06, 772.33s/it]

85

[FINISHED] Epoch 3, (Training Loss (per epoch): 325.0006 samp_prob: 0.6900)


Epoch 3, step 86/96, (Training Loss: 3.6477, samp_prob: 0.6900):  30%|███       | 3/10 [49:07<1:30:06, 772.33s/it]

86

[FINISHED] Epoch 3, (Training Loss (per epoch): 328.6482 samp_prob: 0.6900)


Epoch 3, step 87/96, (Training Loss: 3.7545, samp_prob: 0.6900):  30%|███       | 3/10 [49:15<1:30:06, 772.33s/it]

87

[FINISHED] Epoch 3, (Training Loss (per epoch): 332.4027 samp_prob: 0.6900)


Epoch 3, step 88/96, (Training Loss: 3.7324, samp_prob: 0.6900):  30%|███       | 3/10 [49:22<1:30:06, 772.33s/it]

88

[FINISHED] Epoch 3, (Training Loss (per epoch): 336.1351 samp_prob: 0.6900)


Epoch 3, step 89/96, (Training Loss: 3.6947, samp_prob: 0.6900):  30%|███       | 3/10 [49:30<1:30:06, 772.33s/it]

89

[FINISHED] Epoch 3, (Training Loss (per epoch): 339.8299 samp_prob: 0.6900)


Epoch 3, step 90/96, (Training Loss: 3.6434, samp_prob: 0.6900):  30%|███       | 3/10 [49:38<1:30:06, 772.33s/it]

90

[FINISHED] Epoch 3, (Training Loss (per epoch): 343.4733 samp_prob: 0.6900)


Epoch 3, step 91/96, (Training Loss: 3.7190, samp_prob: 0.6900):  30%|███       | 3/10 [49:46<1:30:06, 772.33s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  6  5 ...  2  2  2]
 [ 1  4 24 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 3, step 91/96......

id: 187
answer: ['<BOS>', 'a', 'baby', 'panda', 'is', 'hugging', 'its', 'mother']
prediction: ['<BOS>', 'a', 'baby', 'is', 'is']
91

[FINISHED] Epoch 3, (Training Loss (per epoch): 347.1923 samp_prob: 0.6900)


Epoch 3, step 92/96, (Training Loss: 3.6006, samp_prob: 0.6900):  30%|███       | 3/10 [49:53<1:30:06, 772.33s/it]

92

[FINISHED] Epoch 3, (Training Loss (per epoch): 350.7929 samp_prob: 0.6900)


Epoch 3, step 93/96, (Training Loss: 3.6646, samp_prob: 0.6900):  30%|███       | 3/10 [50:01<1:30:06, 772.33s/it]

93

[FINISHED] Epoch 3, (Training Loss (per epoch): 354.4574 samp_prob: 0.6900)


Epoch 3, step 94/96, (Training Loss: 3.7747, samp_prob: 0.6900):  30%|███       | 3/10 [50:08<1:30:06, 772.33s/it]

94

[FINISHED] Epoch 3, (Training Loss (per epoch): 358.2321 samp_prob: 0.6900)


Epoch 3, step 95/96, (Training Loss: 3.7055, samp_prob: 0.6900):  40%|████      | 4/10 [50:16<1:16:08, 761.42s/it]

95

[FINISHED] Epoch 3, (Training Loss (per epoch): 361.9376 samp_prob: 0.6900)


Epoch 4, step 0/96, (Training Loss: 3.7959, samp_prob: 0.5987):  40%|████      | 4/10 [50:24<1:16:08, 761.42s/it] 

0

[FINISHED] Epoch 4, (Training Loss (per epoch): 3.7959 samp_prob: 0.5987)


Epoch 4, step 1/96, (Training Loss: 3.7570, samp_prob: 0.5987):  40%|████      | 4/10 [50:32<1:16:08, 761.42s/it]

[[ 1  4 24 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 4, step 1/96......

id: 143
answer: ['<BOS>', 'a', 'little', 'boy', 'is', 'walking', 'on', 'a', 'treadmill']
prediction: ['<BOS>', 'a', 'man', 'is', 'is', 'a', 'a']
1

[FINISHED] Epoch 4, (Training Loss (per epoch): 7.5529 samp_prob: 0.5987)


Epoch 4, step 2/96, (Training Loss: 3.7018, samp_prob: 0.5987):  40%|████      | 4/10 [50:39<1:16:08, 761.42s/it]

2

[FINISHED] Epoch 4, (Training Loss (per epoch): 11.2548 samp_prob: 0.5987)


Epoch 4, step 3/96, (Training Loss: 3.7355, samp_prob: 0.5987):  40%|████      | 4/10 [50:47<1:16:08, 761.42s/it]

3

[FINISHED] Epoch 4, (Training Loss (per epoch): 14.9903 samp_prob: 0.5987)


Epoch 4, step 4/96, (Training Loss: 3.6033, samp_prob: 0.5987):  40%|████      | 4/10 [50:55<1:16:08, 761.42s/it]

4

[FINISHED] Epoch 4, (Training Loss (per epoch): 18.5936 samp_prob: 0.5987)


Epoch 4, step 5/96, (Training Loss: 3.6411, samp_prob: 0.5987):  40%|████      | 4/10 [51:02<1:16:08, 761.42s/it]

5

[FINISHED] Epoch 4, (Training Loss (per epoch): 22.2347 samp_prob: 0.5987)


Epoch 4, step 6/96, (Training Loss: 3.6619, samp_prob: 0.5987):  40%|████      | 4/10 [51:10<1:16:08, 761.42s/it]

6

[FINISHED] Epoch 4, (Training Loss (per epoch): 25.8966 samp_prob: 0.5987)


Epoch 4, step 7/96, (Training Loss: 3.7038, samp_prob: 0.5987):  40%|████      | 4/10 [51:17<1:16:08, 761.42s/it]

7

[FINISHED] Epoch 4, (Training Loss (per epoch): 29.6004 samp_prob: 0.5987)


Epoch 4, step 8/96, (Training Loss: 3.6628, samp_prob: 0.5987):  40%|████      | 4/10 [51:25<1:16:08, 761.42s/it]

8

[FINISHED] Epoch 4, (Training Loss (per epoch): 33.2632 samp_prob: 0.5987)


Epoch 4, step 9/96, (Training Loss: 3.6800, samp_prob: 0.5987):  40%|████      | 4/10 [51:33<1:16:08, 761.42s/it]

9

[FINISHED] Epoch 4, (Training Loss (per epoch): 36.9433 samp_prob: 0.5987)


Epoch 4, step 10/96, (Training Loss: 3.6444, samp_prob: 0.5987):  40%|████      | 4/10 [51:40<1:16:08, 761.42s/it]

10

[FINISHED] Epoch 4, (Training Loss (per epoch): 40.5877 samp_prob: 0.5987)


Epoch 4, step 11/96, (Training Loss: 3.6747, samp_prob: 0.5987):  40%|████      | 4/10 [51:48<1:16:08, 761.42s/it]

11

[FINISHED] Epoch 4, (Training Loss (per epoch): 44.2624 samp_prob: 0.5987)


Epoch 4, step 12/96, (Training Loss: 3.7138, samp_prob: 0.5987):  40%|████      | 4/10 [51:56<1:16:08, 761.42s/it]

12

[FINISHED] Epoch 4, (Training Loss (per epoch): 47.9762 samp_prob: 0.5987)


Epoch 4, step 13/96, (Training Loss: 3.7806, samp_prob: 0.5987):  40%|████      | 4/10 [52:03<1:16:08, 761.42s/it]

13

[FINISHED] Epoch 4, (Training Loss (per epoch): 51.7568 samp_prob: 0.5987)


Epoch 4, step 14/96, (Training Loss: 3.7506, samp_prob: 0.5987):  40%|████      | 4/10 [52:11<1:16:08, 761.42s/it]

14

[FINISHED] Epoch 4, (Training Loss (per epoch): 55.5074 samp_prob: 0.5987)


Epoch 4, step 15/96, (Training Loss: 3.7119, samp_prob: 0.5987):  40%|████      | 4/10 [52:18<1:16:08, 761.42s/it]

15

[FINISHED] Epoch 4, (Training Loss (per epoch): 59.2193 samp_prob: 0.5987)


Epoch 4, step 16/96, (Training Loss: 3.6552, samp_prob: 0.5987):  40%|████      | 4/10 [52:26<1:16:08, 761.42s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1 20 12 ...  2  2  2]
 ...
 [ 1  4 24 ...  2  2  2]
 [ 1  4 17 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 4, step 16/96......

id: 30
answer: ['<BOS>', 'someone', 'is', 'trimming', 'fat', 'from', 'a', 'piece', 'of', 'meat']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'a']
16

[FINISHED] Epoch 4, (Training Loss (per epoch): 62.8744 samp_prob: 0.5987)


Epoch 4, step 17/96, (Training Loss: 3.6264, samp_prob: 0.5987):  40%|████      | 4/10 [52:34<1:16:08, 761.42s/it]

17

[FINISHED] Epoch 4, (Training Loss (per epoch): 66.5008 samp_prob: 0.5987)


Epoch 4, step 18/96, (Training Loss: 3.5103, samp_prob: 0.5987):  40%|████      | 4/10 [52:42<1:16:08, 761.42s/it]

18

[FINISHED] Epoch 4, (Training Loss (per epoch): 70.0111 samp_prob: 0.5987)


Epoch 4, step 19/96, (Training Loss: 3.3999, samp_prob: 0.5987):  40%|████      | 4/10 [52:49<1:16:08, 761.42s/it]

19

[FINISHED] Epoch 4, (Training Loss (per epoch): 73.4110 samp_prob: 0.5987)


Epoch 4, step 20/96, (Training Loss: 3.7256, samp_prob: 0.5987):  40%|████      | 4/10 [52:57<1:16:08, 761.42s/it]

20

[FINISHED] Epoch 4, (Training Loss (per epoch): 77.1366 samp_prob: 0.5987)


Epoch 4, step 21/96, (Training Loss: 3.6132, samp_prob: 0.5987):  40%|████      | 4/10 [53:05<1:16:08, 761.42s/it]

21

[FINISHED] Epoch 4, (Training Loss (per epoch): 80.7497 samp_prob: 0.5987)


Epoch 4, step 22/96, (Training Loss: 3.6245, samp_prob: 0.5987):  40%|████      | 4/10 [53:12<1:16:08, 761.42s/it]

22

[FINISHED] Epoch 4, (Training Loss (per epoch): 84.3742 samp_prob: 0.5987)


Epoch 4, step 23/96, (Training Loss: 3.6962, samp_prob: 0.5987):  40%|████      | 4/10 [53:20<1:16:08, 761.42s/it]

23

[FINISHED] Epoch 4, (Training Loss (per epoch): 88.0704 samp_prob: 0.5987)


Epoch 4, step 24/96, (Training Loss: 3.8748, samp_prob: 0.5987):  40%|████      | 4/10 [53:27<1:16:08, 761.42s/it]

24

[FINISHED] Epoch 4, (Training Loss (per epoch): 91.9452 samp_prob: 0.5987)


Epoch 4, step 25/96, (Training Loss: 3.5741, samp_prob: 0.5987):  40%|████      | 4/10 [53:35<1:16:08, 761.42s/it]

25

[FINISHED] Epoch 4, (Training Loss (per epoch): 95.5193 samp_prob: 0.5987)


Epoch 4, step 26/96, (Training Loss: 3.6441, samp_prob: 0.5987):  40%|████      | 4/10 [53:42<1:16:08, 761.42s/it]

26

[FINISHED] Epoch 4, (Training Loss (per epoch): 99.1634 samp_prob: 0.5987)


Epoch 4, step 27/96, (Training Loss: 3.5621, samp_prob: 0.5987):  40%|████      | 4/10 [53:50<1:16:08, 761.42s/it]

27

[FINISHED] Epoch 4, (Training Loss (per epoch): 102.7255 samp_prob: 0.5987)


Epoch 4, step 28/96, (Training Loss: 3.5130, samp_prob: 0.5987):  40%|████      | 4/10 [53:58<1:16:08, 761.42s/it]

28

[FINISHED] Epoch 4, (Training Loss (per epoch): 106.2385 samp_prob: 0.5987)


Epoch 4, step 29/96, (Training Loss: 3.6263, samp_prob: 0.5987):  40%|████      | 4/10 [54:05<1:16:08, 761.42s/it]

29

[FINISHED] Epoch 4, (Training Loss (per epoch): 109.8648 samp_prob: 0.5987)


Epoch 4, step 30/96, (Training Loss: 3.7441, samp_prob: 0.5987):  40%|████      | 4/10 [54:13<1:16:08, 761.42s/it]

30

[FINISHED] Epoch 4, (Training Loss (per epoch): 113.6089 samp_prob: 0.5987)


Epoch 4, step 31/96, (Training Loss: 3.6537, samp_prob: 0.5987):  40%|████      | 4/10 [54:21<1:16:08, 761.42s/it]

[[ 1  6 12 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  5 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 4, step 31/96......

id: 127
answer: ['<BOS>', 'a', 'couple', 'mount', 'a', 'motorcycle']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'a', 'a']
31

[FINISHED] Epoch 4, (Training Loss (per epoch): 117.2626 samp_prob: 0.5987)


Epoch 4, step 32/96, (Training Loss: 3.6272, samp_prob: 0.5987):  40%|████      | 4/10 [54:29<1:16:08, 761.42s/it]

32

[FINISHED] Epoch 4, (Training Loss (per epoch): 120.8898 samp_prob: 0.5987)


Epoch 4, step 33/96, (Training Loss: 3.6491, samp_prob: 0.5987):  40%|████      | 4/10 [54:36<1:16:08, 761.42s/it]

33

[FINISHED] Epoch 4, (Training Loss (per epoch): 124.5390 samp_prob: 0.5987)


Epoch 4, step 34/96, (Training Loss: 3.6645, samp_prob: 0.5987):  40%|████      | 4/10 [54:44<1:16:08, 761.42s/it]

34

[FINISHED] Epoch 4, (Training Loss (per epoch): 128.2035 samp_prob: 0.5987)


Epoch 4, step 35/96, (Training Loss: 3.6314, samp_prob: 0.5987):  40%|████      | 4/10 [54:51<1:16:08, 761.42s/it]

35

[FINISHED] Epoch 4, (Training Loss (per epoch): 131.8349 samp_prob: 0.5987)


Epoch 4, step 36/96, (Training Loss: 3.7025, samp_prob: 0.5987):  40%|████      | 4/10 [54:59<1:16:08, 761.42s/it]

36

[FINISHED] Epoch 4, (Training Loss (per epoch): 135.5374 samp_prob: 0.5987)


Epoch 4, step 37/96, (Training Loss: 3.6575, samp_prob: 0.5987):  40%|████      | 4/10 [55:06<1:16:08, 761.42s/it]

37

[FINISHED] Epoch 4, (Training Loss (per epoch): 139.1949 samp_prob: 0.5987)


Epoch 4, step 38/96, (Training Loss: 3.6149, samp_prob: 0.5987):  40%|████      | 4/10 [55:14<1:16:08, 761.42s/it]

38

[FINISHED] Epoch 4, (Training Loss (per epoch): 142.8098 samp_prob: 0.5987)


Epoch 4, step 39/96, (Training Loss: 3.6155, samp_prob: 0.5987):  40%|████      | 4/10 [55:22<1:16:08, 761.42s/it]

39

[FINISHED] Epoch 4, (Training Loss (per epoch): 146.4253 samp_prob: 0.5987)


Epoch 4, step 40/96, (Training Loss: 3.5382, samp_prob: 0.5987):  40%|████      | 4/10 [55:29<1:16:08, 761.42s/it]

40

[FINISHED] Epoch 4, (Training Loss (per epoch): 149.9635 samp_prob: 0.5987)


Epoch 4, step 41/96, (Training Loss: 3.5654, samp_prob: 0.5987):  40%|████      | 4/10 [55:37<1:16:08, 761.42s/it]

41

[FINISHED] Epoch 4, (Training Loss (per epoch): 153.5290 samp_prob: 0.5987)


Epoch 4, step 42/96, (Training Loss: 3.6493, samp_prob: 0.5987):  40%|████      | 4/10 [55:44<1:16:08, 761.42s/it]

42

[FINISHED] Epoch 4, (Training Loss (per epoch): 157.1783 samp_prob: 0.5987)


Epoch 4, step 43/96, (Training Loss: 3.6247, samp_prob: 0.5987):  40%|████      | 4/10 [55:52<1:16:08, 761.42s/it]

43

[FINISHED] Epoch 4, (Training Loss (per epoch): 160.8030 samp_prob: 0.5987)


Epoch 4, step 44/96, (Training Loss: 3.6000, samp_prob: 0.5987):  40%|████      | 4/10 [56:00<1:16:08, 761.42s/it]

44

[FINISHED] Epoch 4, (Training Loss (per epoch): 164.4030 samp_prob: 0.5987)


Epoch 4, step 45/96, (Training Loss: 3.6077, samp_prob: 0.5987):  40%|████      | 4/10 [56:07<1:16:08, 761.42s/it]

45

[FINISHED] Epoch 4, (Training Loss (per epoch): 168.0107 samp_prob: 0.5987)


Epoch 4, step 46/96, (Training Loss: 3.6682, samp_prob: 0.5987):  40%|████      | 4/10 [56:15<1:16:08, 761.42s/it]

[[ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1 20 12 ...  2  2  2]
 [ 1  4 56 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 4, step 46/96......

id: 12
answer: ['<BOS>', 'a', 'toad', 'is', 'tickled']
prediction: ['<BOS>', 'a', 'person', 'is', 'is', 'a']
46

[FINISHED] Epoch 4, (Training Loss (per epoch): 171.6789 samp_prob: 0.5987)


Epoch 4, step 47/96, (Training Loss: 3.6090, samp_prob: 0.5987):  40%|████      | 4/10 [56:23<1:16:08, 761.42s/it]

47

[FINISHED] Epoch 4, (Training Loss (per epoch): 175.2879 samp_prob: 0.5987)


Epoch 4, step 48/96, (Training Loss: 3.5872, samp_prob: 0.5987):  40%|████      | 4/10 [56:31<1:16:08, 761.42s/it]

48

[FINISHED] Epoch 4, (Training Loss (per epoch): 178.8751 samp_prob: 0.5987)


Epoch 4, step 49/96, (Training Loss: 3.6582, samp_prob: 0.5987):  40%|████      | 4/10 [56:38<1:16:08, 761.42s/it]

49

[FINISHED] Epoch 4, (Training Loss (per epoch): 182.5333 samp_prob: 0.5987)


Epoch 4, step 50/96, (Training Loss: 3.7279, samp_prob: 0.5987):  40%|████      | 4/10 [56:46<1:16:08, 761.42s/it]

50

[FINISHED] Epoch 4, (Training Loss (per epoch): 186.2612 samp_prob: 0.5987)


Epoch 4, step 51/96, (Training Loss: 3.5647, samp_prob: 0.5987):  40%|████      | 4/10 [56:53<1:16:08, 761.42s/it]

51

[FINISHED] Epoch 4, (Training Loss (per epoch): 189.8259 samp_prob: 0.5987)


Epoch 4, step 52/96, (Training Loss: 3.5577, samp_prob: 0.5987):  40%|████      | 4/10 [57:01<1:16:08, 761.42s/it]

52

[FINISHED] Epoch 4, (Training Loss (per epoch): 193.3836 samp_prob: 0.5987)


Epoch 4, step 53/96, (Training Loss: 3.7078, samp_prob: 0.5987):  40%|████      | 4/10 [57:08<1:16:08, 761.42s/it]

53

[FINISHED] Epoch 4, (Training Loss (per epoch): 197.0914 samp_prob: 0.5987)


Epoch 4, step 54/96, (Training Loss: 3.5558, samp_prob: 0.5987):  40%|████      | 4/10 [57:16<1:16:08, 761.42s/it]

54

[FINISHED] Epoch 4, (Training Loss (per epoch): 200.6473 samp_prob: 0.5987)


Epoch 4, step 55/96, (Training Loss: 3.6263, samp_prob: 0.5987):  40%|████      | 4/10 [57:24<1:16:08, 761.42s/it]

55

[FINISHED] Epoch 4, (Training Loss (per epoch): 204.2736 samp_prob: 0.5987)


Epoch 4, step 56/96, (Training Loss: 3.5394, samp_prob: 0.5987):  40%|████      | 4/10 [57:31<1:16:08, 761.42s/it]

56

[FINISHED] Epoch 4, (Training Loss (per epoch): 207.8130 samp_prob: 0.5987)


Epoch 4, step 57/96, (Training Loss: 3.5244, samp_prob: 0.5987):  40%|████      | 4/10 [57:39<1:16:08, 761.42s/it]

57

[FINISHED] Epoch 4, (Training Loss (per epoch): 211.3374 samp_prob: 0.5987)


Epoch 4, step 58/96, (Training Loss: 3.5333, samp_prob: 0.5987):  40%|████      | 4/10 [57:47<1:16:08, 761.42s/it]

58

[FINISHED] Epoch 4, (Training Loss (per epoch): 214.8707 samp_prob: 0.5987)


Epoch 4, step 59/96, (Training Loss: 3.6052, samp_prob: 0.5987):  40%|████      | 4/10 [57:54<1:16:08, 761.42s/it]

59

[FINISHED] Epoch 4, (Training Loss (per epoch): 218.4760 samp_prob: 0.5987)


Epoch 4, step 60/96, (Training Loss: 3.5677, samp_prob: 0.5987):  40%|████      | 4/10 [58:02<1:16:08, 761.42s/it]

60

[FINISHED] Epoch 4, (Training Loss (per epoch): 222.0436 samp_prob: 0.5987)


Epoch 4, step 61/96, (Training Loss: 3.7217, samp_prob: 0.5987):  40%|████      | 4/10 [58:10<1:16:08, 761.42s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4 12 ...  2  2  2]]

[Train. Prediction] Epoch 4, step 61/96......

id: 1
answer: ['<BOS>', 'a', 'man', 'is', 'playing', 'a', 'violin', 'on', 'a', 'stage']
prediction: ['<BOS>', 'a', 'man', 'is', 'playing']
61

[FINISHED] Epoch 4, (Training Loss (per epoch): 225.7654 samp_prob: 0.5987)


Epoch 4, step 62/96, (Training Loss: 3.6448, samp_prob: 0.5987):  40%|████      | 4/10 [58:17<1:16:08, 761.42s/it]

62

[FINISHED] Epoch 4, (Training Loss (per epoch): 229.4101 samp_prob: 0.5987)


Epoch 4, step 63/96, (Training Loss: 3.6609, samp_prob: 0.5987):  40%|████      | 4/10 [58:25<1:16:08, 761.42s/it]

63

[FINISHED] Epoch 4, (Training Loss (per epoch): 233.0710 samp_prob: 0.5987)


Epoch 4, step 64/96, (Training Loss: 3.6670, samp_prob: 0.5987):  40%|████      | 4/10 [58:33<1:16:08, 761.42s/it]

64

[FINISHED] Epoch 4, (Training Loss (per epoch): 236.7380 samp_prob: 0.5987)


Epoch 4, step 65/96, (Training Loss: 3.4474, samp_prob: 0.5987):  40%|████      | 4/10 [58:40<1:16:08, 761.42s/it]

65

[FINISHED] Epoch 4, (Training Loss (per epoch): 240.1854 samp_prob: 0.5987)


Epoch 4, step 66/96, (Training Loss: 3.7621, samp_prob: 0.5987):  40%|████      | 4/10 [58:48<1:16:08, 761.42s/it]

66

[FINISHED] Epoch 4, (Training Loss (per epoch): 243.9475 samp_prob: 0.5987)


Epoch 4, step 67/96, (Training Loss: 3.7059, samp_prob: 0.5987):  40%|████      | 4/10 [58:55<1:16:08, 761.42s/it]

67

[FINISHED] Epoch 4, (Training Loss (per epoch): 247.6534 samp_prob: 0.5987)


Epoch 4, step 68/96, (Training Loss: 3.6241, samp_prob: 0.5987):  40%|████      | 4/10 [59:03<1:16:08, 761.42s/it]

68

[FINISHED] Epoch 4, (Training Loss (per epoch): 251.2774 samp_prob: 0.5987)


Epoch 4, step 69/96, (Training Loss: 3.5277, samp_prob: 0.5987):  40%|████      | 4/10 [59:11<1:16:08, 761.42s/it]

69

[FINISHED] Epoch 4, (Training Loss (per epoch): 254.8051 samp_prob: 0.5987)


Epoch 4, step 70/96, (Training Loss: 3.6659, samp_prob: 0.5987):  40%|████      | 4/10 [59:18<1:16:08, 761.42s/it]

70

[FINISHED] Epoch 4, (Training Loss (per epoch): 258.4710 samp_prob: 0.5987)


Epoch 4, step 71/96, (Training Loss: 3.5720, samp_prob: 0.5987):  40%|████      | 4/10 [59:26<1:16:08, 761.42s/it]

71

[FINISHED] Epoch 4, (Training Loss (per epoch): 262.0430 samp_prob: 0.5987)


Epoch 4, step 72/96, (Training Loss: 3.6625, samp_prob: 0.5987):  40%|████      | 4/10 [59:34<1:16:08, 761.42s/it]

72

[FINISHED] Epoch 4, (Training Loss (per epoch): 265.7055 samp_prob: 0.5987)


Epoch 4, step 73/96, (Training Loss: 3.5869, samp_prob: 0.5987):  40%|████      | 4/10 [59:41<1:16:08, 761.42s/it]

73

[FINISHED] Epoch 4, (Training Loss (per epoch): 269.2924 samp_prob: 0.5987)


Epoch 4, step 74/96, (Training Loss: 3.6206, samp_prob: 0.5987):  40%|████      | 4/10 [59:49<1:16:08, 761.42s/it]

74

[FINISHED] Epoch 4, (Training Loss (per epoch): 272.9129 samp_prob: 0.5987)


Epoch 4, step 75/96, (Training Loss: 3.6252, samp_prob: 0.5987):  40%|████      | 4/10 [59:57<1:16:08, 761.42s/it]

75

[FINISHED] Epoch 4, (Training Loss (per epoch): 276.5381 samp_prob: 0.5987)


Epoch 4, step 76/96, (Training Loss: 3.6766, samp_prob: 0.5987):  40%|████      | 4/10 [1:00:05<1:16:08, 761.42s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  6 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 ...
 [ 1  4  5 ...  2  2  2]
 [ 1  6  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 4, step 76/96......

id: 90
answer: ['<BOS>', 'a', 'man', 'ia', 'peeling', 'a', 'potato']
prediction: ['<BOS>', 'a', 'man', 'is', 'slicing', 'a']
76

[FINISHED] Epoch 4, (Training Loss (per epoch): 280.2147 samp_prob: 0.5987)


Epoch 4, step 77/96, (Training Loss: 3.5602, samp_prob: 0.5987):  40%|████      | 4/10 [1:00:12<1:16:08, 761.42s/it]

77

[FINISHED] Epoch 4, (Training Loss (per epoch): 283.7749 samp_prob: 0.5987)


Epoch 4, step 78/96, (Training Loss: 3.4938, samp_prob: 0.5987):  40%|████      | 4/10 [1:00:20<1:16:08, 761.42s/it]

78

[FINISHED] Epoch 4, (Training Loss (per epoch): 287.2687 samp_prob: 0.5987)


Epoch 4, step 79/96, (Training Loss: 3.5090, samp_prob: 0.5987):  40%|████      | 4/10 [1:00:28<1:16:08, 761.42s/it]

79

[FINISHED] Epoch 4, (Training Loss (per epoch): 290.7777 samp_prob: 0.5987)


Epoch 4, step 80/96, (Training Loss: 3.6149, samp_prob: 0.5987):  40%|████      | 4/10 [1:00:35<1:16:08, 761.42s/it]

80

[FINISHED] Epoch 4, (Training Loss (per epoch): 294.3926 samp_prob: 0.5987)


Epoch 4, step 81/96, (Training Loss: 3.4540, samp_prob: 0.5987):  40%|████      | 4/10 [1:00:43<1:16:08, 761.42s/it]

81

[FINISHED] Epoch 4, (Training Loss (per epoch): 297.8466 samp_prob: 0.5987)


Epoch 4, step 82/96, (Training Loss: 3.7215, samp_prob: 0.5987):  40%|████      | 4/10 [1:00:50<1:16:08, 761.42s/it]

82

[FINISHED] Epoch 4, (Training Loss (per epoch): 301.5681 samp_prob: 0.5987)


Epoch 4, step 83/96, (Training Loss: 3.6209, samp_prob: 0.5987):  40%|████      | 4/10 [1:00:58<1:16:08, 761.42s/it]

83

[FINISHED] Epoch 4, (Training Loss (per epoch): 305.1890 samp_prob: 0.5987)


Epoch 4, step 84/96, (Training Loss: 3.5325, samp_prob: 0.5987):  40%|████      | 4/10 [1:01:06<1:16:08, 761.42s/it]

84

[FINISHED] Epoch 4, (Training Loss (per epoch): 308.7215 samp_prob: 0.5987)


Epoch 4, step 85/96, (Training Loss: 3.6634, samp_prob: 0.5987):  40%|████      | 4/10 [1:01:13<1:16:08, 761.42s/it]

85

[FINISHED] Epoch 4, (Training Loss (per epoch): 312.3849 samp_prob: 0.5987)


Epoch 4, step 86/96, (Training Loss: 3.4909, samp_prob: 0.5987):  40%|████      | 4/10 [1:01:21<1:16:08, 761.42s/it]

86

[FINISHED] Epoch 4, (Training Loss (per epoch): 315.8758 samp_prob: 0.5987)


Epoch 4, step 87/96, (Training Loss: 3.6228, samp_prob: 0.5987):  40%|████      | 4/10 [1:01:29<1:16:08, 761.42s/it]

87

[FINISHED] Epoch 4, (Training Loss (per epoch): 319.4986 samp_prob: 0.5987)


Epoch 4, step 88/96, (Training Loss: 3.5941, samp_prob: 0.5987):  40%|████      | 4/10 [1:01:36<1:16:08, 761.42s/it]

88

[FINISHED] Epoch 4, (Training Loss (per epoch): 323.0927 samp_prob: 0.5987)


Epoch 4, step 89/96, (Training Loss: 3.5677, samp_prob: 0.5987):  40%|████      | 4/10 [1:01:44<1:16:08, 761.42s/it]

89

[FINISHED] Epoch 4, (Training Loss (per epoch): 326.6604 samp_prob: 0.5987)


Epoch 4, step 90/96, (Training Loss: 3.5135, samp_prob: 0.5987):  40%|████      | 4/10 [1:01:52<1:16:08, 761.42s/it]

90

[FINISHED] Epoch 4, (Training Loss (per epoch): 330.1738 samp_prob: 0.5987)


Epoch 4, step 91/96, (Training Loss: 3.6039, samp_prob: 0.5987):  40%|████      | 4/10 [1:02:00<1:16:08, 761.42s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  6 23 ...  2  2  2]
 [ 1  4 23 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 4, step 91/96......

id: 21
answer: ['<BOS>', 'a', 'woman', 'wearing', 'a', 'headset', 'is', 'singing', 'into', 'a', 'large', 'microphone']
prediction: ['<BOS>', 'a', 'woman', 'is', 'a', 'a']
91

[FINISHED] Epoch 4, (Training Loss (per epoch): 333.7777 samp_prob: 0.5987)


Epoch 4, step 92/96, (Training Loss: 3.4368, samp_prob: 0.5987):  40%|████      | 4/10 [1:02:07<1:16:08, 761.42s/it]

92

[FINISHED] Epoch 4, (Training Loss (per epoch): 337.2145 samp_prob: 0.5987)


Epoch 4, step 93/96, (Training Loss: 3.5072, samp_prob: 0.5987):  40%|████      | 4/10 [1:02:15<1:16:08, 761.42s/it]

93

[FINISHED] Epoch 4, (Training Loss (per epoch): 340.7217 samp_prob: 0.5987)


Epoch 4, step 94/96, (Training Loss: 3.6245, samp_prob: 0.5987):  40%|████      | 4/10 [1:02:23<1:16:08, 761.42s/it]

94

[FINISHED] Epoch 4, (Training Loss (per epoch): 344.3462 samp_prob: 0.5987)


Epoch 4, step 95/96, (Training Loss: 3.5781, samp_prob: 0.5987):  50%|█████     | 5/10 [1:02:30<1:02:46, 753.22s/it]

95

[FINISHED] Epoch 4, (Training Loss (per epoch): 347.9243 samp_prob: 0.5987)


Epoch 5, step 0/96, (Training Loss: 3.6284, samp_prob: 0.5000):  50%|█████     | 5/10 [1:02:38<1:02:46, 753.22s/it] 

0

[FINISHED] Epoch 5, (Training Loss (per epoch): 3.6284 samp_prob: 0.5000)


Epoch 5, step 1/96, (Training Loss: 3.6218, samp_prob: 0.5000):  50%|█████     | 5/10 [1:02:46<1:02:46, 753.22s/it]

[[ 1  4 24 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  6 12 ...  2  2  2]
 [ 1 21  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 5, step 1/96......

id: 116
answer: ['<BOS>', 'a', 'man', 'attempts', 'to', 'poll', 'dance', 'and', 'falls', 'down']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'a', 'a']
1

[FINISHED] Epoch 5, (Training Loss (per epoch): 7.2501 samp_prob: 0.5000)


Epoch 5, step 2/96, (Training Loss: 3.5196, samp_prob: 0.5000):  50%|█████     | 5/10 [1:02:54<1:02:46, 753.22s/it]

2

[FINISHED] Epoch 5, (Training Loss (per epoch): 10.7698 samp_prob: 0.5000)


Epoch 5, step 3/96, (Training Loss: 3.6270, samp_prob: 0.5000):  50%|█████     | 5/10 [1:03:01<1:02:46, 753.22s/it]

3

[FINISHED] Epoch 5, (Training Loss (per epoch): 14.3968 samp_prob: 0.5000)


Epoch 5, step 4/96, (Training Loss: 3.4712, samp_prob: 0.5000):  50%|█████     | 5/10 [1:03:09<1:02:46, 753.22s/it]

4

[FINISHED] Epoch 5, (Training Loss (per epoch): 17.8680 samp_prob: 0.5000)


Epoch 5, step 5/96, (Training Loss: 3.5053, samp_prob: 0.5000):  50%|█████     | 5/10 [1:03:17<1:02:46, 753.22s/it]

5

[FINISHED] Epoch 5, (Training Loss (per epoch): 21.3733 samp_prob: 0.5000)


Epoch 5, step 6/96, (Training Loss: 3.5223, samp_prob: 0.5000):  50%|█████     | 5/10 [1:03:24<1:02:46, 753.22s/it]

6

[FINISHED] Epoch 5, (Training Loss (per epoch): 24.8956 samp_prob: 0.5000)


Epoch 5, step 7/96, (Training Loss: 3.5642, samp_prob: 0.5000):  50%|█████     | 5/10 [1:03:32<1:02:46, 753.22s/it]

7

[FINISHED] Epoch 5, (Training Loss (per epoch): 28.4598 samp_prob: 0.5000)


Epoch 5, step 8/96, (Training Loss: 3.5322, samp_prob: 0.5000):  50%|█████     | 5/10 [1:03:39<1:02:46, 753.22s/it]

8

[FINISHED] Epoch 5, (Training Loss (per epoch): 31.9920 samp_prob: 0.5000)


Epoch 5, step 9/96, (Training Loss: 3.5732, samp_prob: 0.5000):  50%|█████     | 5/10 [1:03:47<1:02:46, 753.22s/it]

9

[FINISHED] Epoch 5, (Training Loss (per epoch): 35.5652 samp_prob: 0.5000)


Epoch 5, step 10/96, (Training Loss: 3.4997, samp_prob: 0.5000):  50%|█████     | 5/10 [1:03:55<1:02:46, 753.22s/it]

10

[FINISHED] Epoch 5, (Training Loss (per epoch): 39.0649 samp_prob: 0.5000)


Epoch 5, step 11/96, (Training Loss: 3.5378, samp_prob: 0.5000):  50%|█████     | 5/10 [1:04:02<1:02:46, 753.22s/it]

11

[FINISHED] Epoch 5, (Training Loss (per epoch): 42.6026 samp_prob: 0.5000)


Epoch 5, step 12/96, (Training Loss: 3.5778, samp_prob: 0.5000):  50%|█████     | 5/10 [1:04:10<1:02:46, 753.22s/it]

12

[FINISHED] Epoch 5, (Training Loss (per epoch): 46.1804 samp_prob: 0.5000)


Epoch 5, step 13/96, (Training Loss: 3.6774, samp_prob: 0.5000):  50%|█████     | 5/10 [1:04:18<1:02:46, 753.22s/it]

13

[FINISHED] Epoch 5, (Training Loss (per epoch): 49.8578 samp_prob: 0.5000)


Epoch 5, step 14/96, (Training Loss: 3.5975, samp_prob: 0.5000):  50%|█████     | 5/10 [1:04:25<1:02:46, 753.22s/it]

14

[FINISHED] Epoch 5, (Training Loss (per epoch): 53.4553 samp_prob: 0.5000)


Epoch 5, step 15/96, (Training Loss: 3.5905, samp_prob: 0.5000):  50%|█████     | 5/10 [1:04:33<1:02:46, 753.22s/it]

15

[FINISHED] Epoch 5, (Training Loss (per epoch): 57.0457 samp_prob: 0.5000)


Epoch 5, step 16/96, (Training Loss: 3.5263, samp_prob: 0.5000):  50%|█████     | 5/10 [1:04:41<1:02:46, 753.22s/it]

[[  1   4  23 ...   2   2   2]
 [  1   4   8 ...   2   2   2]
 [  1 103  12 ...   2   2   2]
 ...
 [  1   4  24 ...   2   2   2]
 [  1   4   8 ...   2   2   2]
 [  1   4   8 ...   2   2   2]]

[Train. Prediction] Epoch 5, step 16/96......

id: 208
answer: ['<BOS>', 'someone', 'is', 'petting', 'a', 'monkey']
prediction: ['<BOS>', 'a', 'person', 'is', 'a', 'a']
16

[FINISHED] Epoch 5, (Training Loss (per epoch): 60.5721 samp_prob: 0.5000)


Epoch 5, step 17/96, (Training Loss: 3.4938, samp_prob: 0.5000):  50%|█████     | 5/10 [1:04:49<1:02:46, 753.22s/it]

17

[FINISHED] Epoch 5, (Training Loss (per epoch): 64.0659 samp_prob: 0.5000)


Epoch 5, step 18/96, (Training Loss: 3.3941, samp_prob: 0.5000):  50%|█████     | 5/10 [1:04:56<1:02:46, 753.22s/it]

18

[FINISHED] Epoch 5, (Training Loss (per epoch): 67.4600 samp_prob: 0.5000)


Epoch 5, step 19/96, (Training Loss: 3.2711, samp_prob: 0.5000):  50%|█████     | 5/10 [1:05:04<1:02:46, 753.22s/it]

19

[FINISHED] Epoch 5, (Training Loss (per epoch): 70.7310 samp_prob: 0.5000)


Epoch 5, step 20/96, (Training Loss: 3.6201, samp_prob: 0.5000):  50%|█████     | 5/10 [1:05:11<1:02:46, 753.22s/it]

20

[FINISHED] Epoch 5, (Training Loss (per epoch): 74.3511 samp_prob: 0.5000)


Epoch 5, step 21/96, (Training Loss: 3.4702, samp_prob: 0.5000):  50%|█████     | 5/10 [1:05:19<1:02:46, 753.22s/it]

21

[FINISHED] Epoch 5, (Training Loss (per epoch): 77.8214 samp_prob: 0.5000)


Epoch 5, step 22/96, (Training Loss: 3.4793, samp_prob: 0.5000):  50%|█████     | 5/10 [1:05:27<1:02:46, 753.22s/it]

22

[FINISHED] Epoch 5, (Training Loss (per epoch): 81.3007 samp_prob: 0.5000)


Epoch 5, step 23/96, (Training Loss: 3.5378, samp_prob: 0.5000):  50%|█████     | 5/10 [1:05:34<1:02:46, 753.22s/it]

23

[FINISHED] Epoch 5, (Training Loss (per epoch): 84.8385 samp_prob: 0.5000)


Epoch 5, step 24/96, (Training Loss: 3.7282, samp_prob: 0.5000):  50%|█████     | 5/10 [1:05:42<1:02:46, 753.22s/it]

24

[FINISHED] Epoch 5, (Training Loss (per epoch): 88.5667 samp_prob: 0.5000)


Epoch 5, step 25/96, (Training Loss: 3.4684, samp_prob: 0.5000):  50%|█████     | 5/10 [1:05:49<1:02:46, 753.22s/it]

25

[FINISHED] Epoch 5, (Training Loss (per epoch): 92.0351 samp_prob: 0.5000)


Epoch 5, step 26/96, (Training Loss: 3.5340, samp_prob: 0.5000):  50%|█████     | 5/10 [1:05:57<1:02:46, 753.22s/it]

26

[FINISHED] Epoch 5, (Training Loss (per epoch): 95.5691 samp_prob: 0.5000)


Epoch 5, step 27/96, (Training Loss: 3.4317, samp_prob: 0.5000):  50%|█████     | 5/10 [1:06:05<1:02:46, 753.22s/it]

27

[FINISHED] Epoch 5, (Training Loss (per epoch): 99.0009 samp_prob: 0.5000)


Epoch 5, step 28/96, (Training Loss: 3.3997, samp_prob: 0.5000):  50%|█████     | 5/10 [1:06:12<1:02:46, 753.22s/it]

28

[FINISHED] Epoch 5, (Training Loss (per epoch): 102.4006 samp_prob: 0.5000)


Epoch 5, step 29/96, (Training Loss: 3.5090, samp_prob: 0.5000):  50%|█████     | 5/10 [1:06:20<1:02:46, 753.22s/it]

29

[FINISHED] Epoch 5, (Training Loss (per epoch): 105.9096 samp_prob: 0.5000)


Epoch 5, step 30/96, (Training Loss: 3.6055, samp_prob: 0.5000):  50%|█████     | 5/10 [1:06:28<1:02:46, 753.22s/it]

30

[FINISHED] Epoch 5, (Training Loss (per epoch): 109.5151 samp_prob: 0.5000)


Epoch 5, step 31/96, (Training Loss: 3.4930, samp_prob: 0.5000):  50%|█████     | 5/10 [1:06:36<1:02:46, 753.22s/it]

[[ 1 20 12 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4 17 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 5, step 31/96......

id: 46
answer: ['<BOS>', 'a', 'black', 'colored', 'pet', 'dog', 'is', 'biting', 'a', 'stuffed', 'toy', 'fish']
prediction: ['<BOS>', 'a', 'man', 'is', 'playing', 'a', 'a']
31

[FINISHED] Epoch 5, (Training Loss (per epoch): 113.0081 samp_prob: 0.5000)


Epoch 5, step 32/96, (Training Loss: 3.4632, samp_prob: 0.5000):  50%|█████     | 5/10 [1:06:43<1:02:46, 753.22s/it]

32

[FINISHED] Epoch 5, (Training Loss (per epoch): 116.4712 samp_prob: 0.5000)


Epoch 5, step 33/96, (Training Loss: 3.5317, samp_prob: 0.5000):  50%|█████     | 5/10 [1:06:51<1:02:46, 753.22s/it]

33

[FINISHED] Epoch 5, (Training Loss (per epoch): 120.0029 samp_prob: 0.5000)


Epoch 5, step 34/96, (Training Loss: 3.5338, samp_prob: 0.5000):  50%|█████     | 5/10 [1:06:59<1:02:46, 753.22s/it]

34

[FINISHED] Epoch 5, (Training Loss (per epoch): 123.5367 samp_prob: 0.5000)


Epoch 5, step 35/96, (Training Loss: 3.5394, samp_prob: 0.5000):  50%|█████     | 5/10 [1:07:06<1:02:46, 753.22s/it]

35

[FINISHED] Epoch 5, (Training Loss (per epoch): 127.0761 samp_prob: 0.5000)


Epoch 5, step 36/96, (Training Loss: 3.5630, samp_prob: 0.5000):  50%|█████     | 5/10 [1:07:14<1:02:46, 753.22s/it]

36

[FINISHED] Epoch 5, (Training Loss (per epoch): 130.6391 samp_prob: 0.5000)


Epoch 5, step 37/96, (Training Loss: 3.5393, samp_prob: 0.5000):  50%|█████     | 5/10 [1:07:21<1:02:46, 753.22s/it]

37

[FINISHED] Epoch 5, (Training Loss (per epoch): 134.1784 samp_prob: 0.5000)


Epoch 5, step 38/96, (Training Loss: 3.4901, samp_prob: 0.5000):  50%|█████     | 5/10 [1:07:29<1:02:46, 753.22s/it]

38

[FINISHED] Epoch 5, (Training Loss (per epoch): 137.6684 samp_prob: 0.5000)


Epoch 5, step 39/96, (Training Loss: 3.4899, samp_prob: 0.5000):  50%|█████     | 5/10 [1:07:37<1:02:46, 753.22s/it]

39

[FINISHED] Epoch 5, (Training Loss (per epoch): 141.1584 samp_prob: 0.5000)


Epoch 5, step 40/96, (Training Loss: 3.4014, samp_prob: 0.5000):  50%|█████     | 5/10 [1:07:44<1:02:46, 753.22s/it]

40

[FINISHED] Epoch 5, (Training Loss (per epoch): 144.5598 samp_prob: 0.5000)


Epoch 5, step 41/96, (Training Loss: 3.4593, samp_prob: 0.5000):  50%|█████     | 5/10 [1:07:52<1:02:46, 753.22s/it]

41

[FINISHED] Epoch 5, (Training Loss (per epoch): 148.0191 samp_prob: 0.5000)


Epoch 5, step 42/96, (Training Loss: 3.5258, samp_prob: 0.5000):  50%|█████     | 5/10 [1:07:59<1:02:46, 753.22s/it]

42

[FINISHED] Epoch 5, (Training Loss (per epoch): 151.5448 samp_prob: 0.5000)


Epoch 5, step 43/96, (Training Loss: 3.5182, samp_prob: 0.5000):  50%|█████     | 5/10 [1:08:07<1:02:46, 753.22s/it]

43

[FINISHED] Epoch 5, (Training Loss (per epoch): 155.0630 samp_prob: 0.5000)


Epoch 5, step 44/96, (Training Loss: 3.4455, samp_prob: 0.5000):  50%|█████     | 5/10 [1:08:15<1:02:46, 753.22s/it]

44

[FINISHED] Epoch 5, (Training Loss (per epoch): 158.5086 samp_prob: 0.5000)


Epoch 5, step 45/96, (Training Loss: 3.4817, samp_prob: 0.5000):  50%|█████     | 5/10 [1:08:22<1:02:46, 753.22s/it]

45

[FINISHED] Epoch 5, (Training Loss (per epoch): 161.9902 samp_prob: 0.5000)


Epoch 5, step 46/96, (Training Loss: 3.5559, samp_prob: 0.5000):  50%|█████     | 5/10 [1:08:31<1:02:46, 753.22s/it]

[[ 1  4 12 ...  2  2  2]
 [ 1  4 23 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  6 39 ...  2  2  2]
 [ 1 20 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 5, step 46/96......

id: 249
answer: ['<BOS>', 'the', 'lady', 'cut', 'a', 'piece', 'of', 'lemon', 'rind']
prediction: ['<BOS>', 'a', 'woman', 'is', 'slicing', 'a']
46

[FINISHED] Epoch 5, (Training Loss (per epoch): 165.5461 samp_prob: 0.5000)


Epoch 5, step 47/96, (Training Loss: 3.4564, samp_prob: 0.5000):  50%|█████     | 5/10 [1:08:38<1:02:46, 753.22s/it]

47

[FINISHED] Epoch 5, (Training Loss (per epoch): 169.0025 samp_prob: 0.5000)


Epoch 5, step 48/96, (Training Loss: 3.4617, samp_prob: 0.5000):  50%|█████     | 5/10 [1:08:46<1:02:46, 753.22s/it]

48

[FINISHED] Epoch 5, (Training Loss (per epoch): 172.4642 samp_prob: 0.5000)


Epoch 5, step 49/96, (Training Loss: 3.5601, samp_prob: 0.5000):  50%|█████     | 5/10 [1:08:53<1:02:46, 753.22s/it]

49

[FINISHED] Epoch 5, (Training Loss (per epoch): 176.0244 samp_prob: 0.5000)


Epoch 5, step 50/96, (Training Loss: 3.6371, samp_prob: 0.5000):  50%|█████     | 5/10 [1:09:01<1:02:46, 753.22s/it]

50

[FINISHED] Epoch 5, (Training Loss (per epoch): 179.6615 samp_prob: 0.5000)


Epoch 5, step 51/96, (Training Loss: 3.4315, samp_prob: 0.5000):  50%|█████     | 5/10 [1:09:09<1:02:46, 753.22s/it]

51

[FINISHED] Epoch 5, (Training Loss (per epoch): 183.0930 samp_prob: 0.5000)


Epoch 5, step 52/96, (Training Loss: 3.4281, samp_prob: 0.5000):  50%|█████     | 5/10 [1:09:16<1:02:46, 753.22s/it]

52

[FINISHED] Epoch 5, (Training Loss (per epoch): 186.5211 samp_prob: 0.5000)


Epoch 5, step 53/96, (Training Loss: 3.5843, samp_prob: 0.5000):  50%|█████     | 5/10 [1:09:24<1:02:46, 753.22s/it]

53

[FINISHED] Epoch 5, (Training Loss (per epoch): 190.1054 samp_prob: 0.5000)


Epoch 5, step 54/96, (Training Loss: 3.4503, samp_prob: 0.5000):  50%|█████     | 5/10 [1:09:32<1:02:46, 753.22s/it]

54

[FINISHED] Epoch 5, (Training Loss (per epoch): 193.5557 samp_prob: 0.5000)


Epoch 5, step 55/96, (Training Loss: 3.4615, samp_prob: 0.5000):  50%|█████     | 5/10 [1:09:39<1:02:46, 753.22s/it]

55

[FINISHED] Epoch 5, (Training Loss (per epoch): 197.0172 samp_prob: 0.5000)


Epoch 5, step 56/96, (Training Loss: 3.4152, samp_prob: 0.5000):  50%|█████     | 5/10 [1:09:47<1:02:46, 753.22s/it]

56

[FINISHED] Epoch 5, (Training Loss (per epoch): 200.4324 samp_prob: 0.5000)


Epoch 5, step 57/96, (Training Loss: 3.3727, samp_prob: 0.5000):  50%|█████     | 5/10 [1:09:54<1:02:46, 753.22s/it]

57

[FINISHED] Epoch 5, (Training Loss (per epoch): 203.8051 samp_prob: 0.5000)


Epoch 5, step 58/96, (Training Loss: 3.3915, samp_prob: 0.5000):  50%|█████     | 5/10 [1:10:02<1:02:46, 753.22s/it]

58

[FINISHED] Epoch 5, (Training Loss (per epoch): 207.1966 samp_prob: 0.5000)


Epoch 5, step 59/96, (Training Loss: 3.5054, samp_prob: 0.5000):  50%|█████     | 5/10 [1:10:10<1:02:46, 753.22s/it]

59

[FINISHED] Epoch 5, (Training Loss (per epoch): 210.7020 samp_prob: 0.5000)


Epoch 5, step 60/96, (Training Loss: 3.4370, samp_prob: 0.5000):  50%|█████     | 5/10 [1:10:17<1:02:46, 753.22s/it]

60

[FINISHED] Epoch 5, (Training Loss (per epoch): 214.1390 samp_prob: 0.5000)


Epoch 5, step 61/96, (Training Loss: 3.5571, samp_prob: 0.5000):  50%|█████     | 5/10 [1:10:25<1:02:46, 753.22s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  6 12 ...  2  2  2]]

[Train. Prediction] Epoch 5, step 61/96......

id: 14
answer: ['<BOS>', 'a', 'person', 'with', 'a', 'knife', 'is', 'slicing', 'an', 'onion']
prediction: ['<BOS>', 'a', 'lady', 'is', 'slicing', 'an', 'a']
61

[FINISHED] Epoch 5, (Training Loss (per epoch): 217.6961 samp_prob: 0.5000)


Epoch 5, step 62/96, (Training Loss: 3.5166, samp_prob: 0.5000):  50%|█████     | 5/10 [1:10:33<1:02:46, 753.22s/it]

62

[FINISHED] Epoch 5, (Training Loss (per epoch): 221.2127 samp_prob: 0.5000)


Epoch 5, step 63/96, (Training Loss: 3.5294, samp_prob: 0.5000):  50%|█████     | 5/10 [1:10:40<1:02:46, 753.22s/it]

63

[FINISHED] Epoch 5, (Training Loss (per epoch): 224.7421 samp_prob: 0.5000)


Epoch 5, step 64/96, (Training Loss: 3.5453, samp_prob: 0.5000):  50%|█████     | 5/10 [1:10:48<1:02:46, 753.22s/it]

64

[FINISHED] Epoch 5, (Training Loss (per epoch): 228.2874 samp_prob: 0.5000)


Epoch 5, step 65/96, (Training Loss: 3.3073, samp_prob: 0.5000):  50%|█████     | 5/10 [1:10:56<1:02:46, 753.22s/it]

65

[FINISHED] Epoch 5, (Training Loss (per epoch): 231.5947 samp_prob: 0.5000)


Epoch 5, step 66/96, (Training Loss: 3.6199, samp_prob: 0.5000):  50%|█████     | 5/10 [1:11:03<1:02:46, 753.22s/it]

66

[FINISHED] Epoch 5, (Training Loss (per epoch): 235.2147 samp_prob: 0.5000)


Epoch 5, step 67/96, (Training Loss: 3.6234, samp_prob: 0.5000):  50%|█████     | 5/10 [1:11:11<1:02:46, 753.22s/it]

67

[FINISHED] Epoch 5, (Training Loss (per epoch): 238.8381 samp_prob: 0.5000)


Epoch 5, step 68/96, (Training Loss: 3.4725, samp_prob: 0.5000):  50%|█████     | 5/10 [1:11:19<1:02:46, 753.22s/it]

68

[FINISHED] Epoch 5, (Training Loss (per epoch): 242.3106 samp_prob: 0.5000)


Epoch 5, step 69/96, (Training Loss: 3.3706, samp_prob: 0.5000):  50%|█████     | 5/10 [1:11:26<1:02:46, 753.22s/it]

69

[FINISHED] Epoch 5, (Training Loss (per epoch): 245.6811 samp_prob: 0.5000)


Epoch 5, step 70/96, (Training Loss: 3.5581, samp_prob: 0.5000):  50%|█████     | 5/10 [1:11:34<1:02:46, 753.22s/it]

70

[FINISHED] Epoch 5, (Training Loss (per epoch): 249.2392 samp_prob: 0.5000)


Epoch 5, step 71/96, (Training Loss: 3.4381, samp_prob: 0.5000):  50%|█████     | 5/10 [1:11:42<1:02:46, 753.22s/it]

71

[FINISHED] Epoch 5, (Training Loss (per epoch): 252.6773 samp_prob: 0.5000)


Epoch 5, step 72/96, (Training Loss: 3.5720, samp_prob: 0.5000):  50%|█████     | 5/10 [1:11:49<1:02:46, 753.22s/it]

72

[FINISHED] Epoch 5, (Training Loss (per epoch): 256.2493 samp_prob: 0.5000)


Epoch 5, step 73/96, (Training Loss: 3.4463, samp_prob: 0.5000):  50%|█████     | 5/10 [1:11:57<1:02:46, 753.22s/it]

73

[FINISHED] Epoch 5, (Training Loss (per epoch): 259.6957 samp_prob: 0.5000)


Epoch 5, step 74/96, (Training Loss: 3.4886, samp_prob: 0.5000):  50%|█████     | 5/10 [1:12:04<1:02:46, 753.22s/it]

74

[FINISHED] Epoch 5, (Training Loss (per epoch): 263.1843 samp_prob: 0.5000)


Epoch 5, step 75/96, (Training Loss: 3.5038, samp_prob: 0.5000):  50%|█████     | 5/10 [1:12:12<1:02:46, 753.22s/it]

75

[FINISHED] Epoch 5, (Training Loss (per epoch): 266.6881 samp_prob: 0.5000)


Epoch 5, step 76/96, (Training Loss: 3.5358, samp_prob: 0.5000):  50%|█████     | 5/10 [1:12:20<1:02:46, 753.22s/it]

[[ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 ...
 [ 1  4  8 ...  2  2  2]
 [ 1  4 17 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 5, step 76/96......

id: 221
answer: ['<BOS>', 'a', 'dog', 'is', 'running']
prediction: ['<BOS>', 'a', 'dog', 'is', 'is', 'a', 'a']
76

[FINISHED] Epoch 5, (Training Loss (per epoch): 270.2239 samp_prob: 0.5000)


Epoch 5, step 77/96, (Training Loss: 3.4313, samp_prob: 0.5000):  50%|█████     | 5/10 [1:12:28<1:02:46, 753.22s/it]

77

[FINISHED] Epoch 5, (Training Loss (per epoch): 273.6552 samp_prob: 0.5000)


Epoch 5, step 78/96, (Training Loss: 3.3509, samp_prob: 0.5000):  50%|█████     | 5/10 [1:12:35<1:02:46, 753.22s/it]

78

[FINISHED] Epoch 5, (Training Loss (per epoch): 277.0061 samp_prob: 0.5000)


Epoch 5, step 79/96, (Training Loss: 3.3677, samp_prob: 0.5000):  50%|█████     | 5/10 [1:12:43<1:02:46, 753.22s/it]

79

[FINISHED] Epoch 5, (Training Loss (per epoch): 280.3739 samp_prob: 0.5000)


Epoch 5, step 80/96, (Training Loss: 3.4805, samp_prob: 0.5000):  50%|█████     | 5/10 [1:12:51<1:02:46, 753.22s/it]

80

[FINISHED] Epoch 5, (Training Loss (per epoch): 283.8544 samp_prob: 0.5000)


Epoch 5, step 81/96, (Training Loss: 3.3497, samp_prob: 0.5000):  50%|█████     | 5/10 [1:12:58<1:02:46, 753.22s/it]

81

[FINISHED] Epoch 5, (Training Loss (per epoch): 287.2041 samp_prob: 0.5000)


Epoch 5, step 82/96, (Training Loss: 3.6051, samp_prob: 0.5000):  50%|█████     | 5/10 [1:13:06<1:02:46, 753.22s/it]

82

[FINISHED] Epoch 5, (Training Loss (per epoch): 290.8091 samp_prob: 0.5000)


Epoch 5, step 83/96, (Training Loss: 3.5096, samp_prob: 0.5000):  50%|█████     | 5/10 [1:13:13<1:02:46, 753.22s/it]

83

[FINISHED] Epoch 5, (Training Loss (per epoch): 294.3187 samp_prob: 0.5000)


Epoch 5, step 84/96, (Training Loss: 3.4024, samp_prob: 0.5000):  50%|█████     | 5/10 [1:13:21<1:02:46, 753.22s/it]

84

[FINISHED] Epoch 5, (Training Loss (per epoch): 297.7212 samp_prob: 0.5000)


Epoch 5, step 85/96, (Training Loss: 3.5314, samp_prob: 0.5000):  50%|█████     | 5/10 [1:13:29<1:02:46, 753.22s/it]

85

[FINISHED] Epoch 5, (Training Loss (per epoch): 301.2525 samp_prob: 0.5000)


Epoch 5, step 86/96, (Training Loss: 3.3570, samp_prob: 0.5000):  50%|█████     | 5/10 [1:13:36<1:02:46, 753.22s/it]

86

[FINISHED] Epoch 5, (Training Loss (per epoch): 304.6095 samp_prob: 0.5000)


Epoch 5, step 87/96, (Training Loss: 3.5133, samp_prob: 0.5000):  50%|█████     | 5/10 [1:13:44<1:02:46, 753.22s/it]

87

[FINISHED] Epoch 5, (Training Loss (per epoch): 308.1228 samp_prob: 0.5000)


Epoch 5, step 88/96, (Training Loss: 3.4956, samp_prob: 0.5000):  50%|█████     | 5/10 [1:13:51<1:02:46, 753.22s/it]

88

[FINISHED] Epoch 5, (Training Loss (per epoch): 311.6184 samp_prob: 0.5000)


Epoch 5, step 89/96, (Training Loss: 3.4235, samp_prob: 0.5000):  50%|█████     | 5/10 [1:13:59<1:02:46, 753.22s/it]

89

[FINISHED] Epoch 5, (Training Loss (per epoch): 315.0419 samp_prob: 0.5000)


Epoch 5, step 90/96, (Training Loss: 3.3993, samp_prob: 0.5000):  50%|█████     | 5/10 [1:14:07<1:02:46, 753.22s/it]

90

[FINISHED] Epoch 5, (Training Loss (per epoch): 318.4412 samp_prob: 0.5000)


Epoch 5, step 91/96, (Training Loss: 3.4915, samp_prob: 0.5000):  50%|█████     | 5/10 [1:14:15<1:02:46, 753.22s/it]

[[  1   4   7 ...   2   2   2]
 [  1   4 105 ...   2   2   2]
 [  1   4  24 ...   2   2   2]
 ...
 [  1   4   7 ...   2   2   2]
 [  1   4   7 ...   2   2   2]
 [  1   4   7 ...   2   2   2]]

[Train. Prediction] Epoch 5, step 91/96......

id: 163
answer: ['<BOS>', 'someone', 'is', 'dicing', 'a', 'green', 'onion']
prediction: ['<BOS>', 'a', 'woman', 'is', 'cutting']
91

[FINISHED] Epoch 5, (Training Loss (per epoch): 321.9327 samp_prob: 0.5000)


Epoch 5, step 92/96, (Training Loss: 3.3335, samp_prob: 0.5000):  50%|█████     | 5/10 [1:14:22<1:02:46, 753.22s/it]

92

[FINISHED] Epoch 5, (Training Loss (per epoch): 325.2662 samp_prob: 0.5000)


Epoch 5, step 93/96, (Training Loss: 3.3687, samp_prob: 0.5000):  50%|█████     | 5/10 [1:14:30<1:02:46, 753.22s/it]

93

[FINISHED] Epoch 5, (Training Loss (per epoch): 328.6349 samp_prob: 0.5000)


Epoch 5, step 94/96, (Training Loss: 3.5314, samp_prob: 0.5000):  50%|█████     | 5/10 [1:14:37<1:02:46, 753.22s/it]

94

[FINISHED] Epoch 5, (Training Loss (per epoch): 332.1662 samp_prob: 0.5000)


Epoch 5, step 95/96, (Training Loss: 3.4790, samp_prob: 0.5000):  60%|██████    | 6/10 [1:14:45<49:50, 747.72s/it]  

95

[FINISHED] Epoch 5, (Training Loss (per epoch): 335.6452 samp_prob: 0.5000)


Epoch 6, step 0/96, (Training Loss: 3.5204, samp_prob: 0.4013):  60%|██████    | 6/10 [1:14:53<49:50, 747.72s/it] 

0

[FINISHED] Epoch 6, (Training Loss (per epoch): 3.5204 samp_prob: 0.4013)


Epoch 6, step 1/96, (Training Loss: 3.4857, samp_prob: 0.4013):  60%|██████    | 6/10 [1:15:01<49:50, 747.72s/it]

[[ 1  4 24 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  6 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 6, step 1/96......

id: 139
answer: ['<BOS>', 'someone', 'cut', 'up', 'the', 'orange', 'bell', 'pepper']
prediction: ['<BOS>', 'the', 'woman', 'is', 'slicing', 'cutting']
1

[FINISHED] Epoch 6, (Training Loss (per epoch): 7.0062 samp_prob: 0.4013)


Epoch 6, step 2/96, (Training Loss: 3.4296, samp_prob: 0.4013):  60%|██████    | 6/10 [1:15:08<49:50, 747.72s/it]

2

[FINISHED] Epoch 6, (Training Loss (per epoch): 10.4358 samp_prob: 0.4013)


Epoch 6, step 3/96, (Training Loss: 3.4917, samp_prob: 0.4013):  60%|██████    | 6/10 [1:15:16<49:50, 747.72s/it]

3

[FINISHED] Epoch 6, (Training Loss (per epoch): 13.9275 samp_prob: 0.4013)


Epoch 6, step 4/96, (Training Loss: 3.3573, samp_prob: 0.4013):  60%|██████    | 6/10 [1:15:23<49:50, 747.72s/it]

4

[FINISHED] Epoch 6, (Training Loss (per epoch): 17.2848 samp_prob: 0.4013)


Epoch 6, step 5/96, (Training Loss: 3.3800, samp_prob: 0.4013):  60%|██████    | 6/10 [1:15:31<49:50, 747.72s/it]

5

[FINISHED] Epoch 6, (Training Loss (per epoch): 20.6649 samp_prob: 0.4013)


Epoch 6, step 6/96, (Training Loss: 3.3848, samp_prob: 0.4013):  60%|██████    | 6/10 [1:15:39<49:50, 747.72s/it]

6

[FINISHED] Epoch 6, (Training Loss (per epoch): 24.0497 samp_prob: 0.4013)


Epoch 6, step 7/96, (Training Loss: 3.4353, samp_prob: 0.4013):  60%|██████    | 6/10 [1:15:46<49:50, 747.72s/it]

7

[FINISHED] Epoch 6, (Training Loss (per epoch): 27.4850 samp_prob: 0.4013)


Epoch 6, step 8/96, (Training Loss: 3.4001, samp_prob: 0.4013):  60%|██████    | 6/10 [1:15:54<49:50, 747.72s/it]

8

[FINISHED] Epoch 6, (Training Loss (per epoch): 30.8851 samp_prob: 0.4013)


Epoch 6, step 9/96, (Training Loss: 3.4378, samp_prob: 0.4013):  60%|██████    | 6/10 [1:16:01<49:50, 747.72s/it]

9

[FINISHED] Epoch 6, (Training Loss (per epoch): 34.3230 samp_prob: 0.4013)


Epoch 6, step 10/96, (Training Loss: 3.3602, samp_prob: 0.4013):  60%|██████    | 6/10 [1:16:09<49:50, 747.72s/it]

10

[FINISHED] Epoch 6, (Training Loss (per epoch): 37.6832 samp_prob: 0.4013)


Epoch 6, step 11/96, (Training Loss: 3.4171, samp_prob: 0.4013):  60%|██████    | 6/10 [1:16:17<49:50, 747.72s/it]

11

[FINISHED] Epoch 6, (Training Loss (per epoch): 41.1003 samp_prob: 0.4013)


Epoch 6, step 12/96, (Training Loss: 3.4532, samp_prob: 0.4013):  60%|██████    | 6/10 [1:16:24<49:50, 747.72s/it]

12

[FINISHED] Epoch 6, (Training Loss (per epoch): 44.5535 samp_prob: 0.4013)


Epoch 6, step 13/96, (Training Loss: 3.5436, samp_prob: 0.4013):  60%|██████    | 6/10 [1:16:32<49:50, 747.72s/it]

13

[FINISHED] Epoch 6, (Training Loss (per epoch): 48.0971 samp_prob: 0.4013)


Epoch 6, step 14/96, (Training Loss: 3.5102, samp_prob: 0.4013):  60%|██████    | 6/10 [1:16:39<49:50, 747.72s/it]

14

[FINISHED] Epoch 6, (Training Loss (per epoch): 51.6073 samp_prob: 0.4013)


Epoch 6, step 15/96, (Training Loss: 3.4368, samp_prob: 0.4013):  60%|██████    | 6/10 [1:16:47<49:50, 747.72s/it]

15

[FINISHED] Epoch 6, (Training Loss (per epoch): 55.0441 samp_prob: 0.4013)


Epoch 6, step 16/96, (Training Loss: 3.4168, samp_prob: 0.4013):  60%|██████    | 6/10 [1:16:55<49:50, 747.72s/it]

[[ 1  4 23 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  6 56 ...  2  2  2]
 ...
 [ 1  4 24 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 6, step 16/96......

id: 191
answer: ['<BOS>', 'a', 'man', 'is', 'feeding', 'a', 'baby', 'panda']
prediction: ['<BOS>', 'a', 'are', 'are', 'in', 'a', 'in']
16

[FINISHED] Epoch 6, (Training Loss (per epoch): 58.4609 samp_prob: 0.4013)


Epoch 6, step 17/96, (Training Loss: 3.3963, samp_prob: 0.4013):  60%|██████    | 6/10 [1:17:03<49:50, 747.72s/it]

17

[FINISHED] Epoch 6, (Training Loss (per epoch): 61.8572 samp_prob: 0.4013)


Epoch 6, step 18/96, (Training Loss: 3.2614, samp_prob: 0.4013):  60%|██████    | 6/10 [1:17:10<49:50, 747.72s/it]

18

[FINISHED] Epoch 6, (Training Loss (per epoch): 65.1186 samp_prob: 0.4013)


Epoch 6, step 19/96, (Training Loss: 3.1255, samp_prob: 0.4013):  60%|██████    | 6/10 [1:17:18<49:50, 747.72s/it]

19

[FINISHED] Epoch 6, (Training Loss (per epoch): 68.2440 samp_prob: 0.4013)


Epoch 6, step 20/96, (Training Loss: 3.4296, samp_prob: 0.4013):  60%|██████    | 6/10 [1:17:26<49:50, 747.72s/it]

20

[FINISHED] Epoch 6, (Training Loss (per epoch): 71.6736 samp_prob: 0.4013)


Epoch 6, step 21/96, (Training Loss: 3.3309, samp_prob: 0.4013):  60%|██████    | 6/10 [1:17:33<49:50, 747.72s/it]

21

[FINISHED] Epoch 6, (Training Loss (per epoch): 75.0045 samp_prob: 0.4013)


Epoch 6, step 22/96, (Training Loss: 3.3400, samp_prob: 0.4013):  60%|██████    | 6/10 [1:17:41<49:50, 747.72s/it]

22

[FINISHED] Epoch 6, (Training Loss (per epoch): 78.3445 samp_prob: 0.4013)


Epoch 6, step 23/96, (Training Loss: 3.4410, samp_prob: 0.4013):  60%|██████    | 6/10 [1:17:48<49:50, 747.72s/it]

23

[FINISHED] Epoch 6, (Training Loss (per epoch): 81.7855 samp_prob: 0.4013)


Epoch 6, step 24/96, (Training Loss: 3.5891, samp_prob: 0.4013):  60%|██████    | 6/10 [1:17:56<49:50, 747.72s/it]

24

[FINISHED] Epoch 6, (Training Loss (per epoch): 85.3746 samp_prob: 0.4013)


Epoch 6, step 25/96, (Training Loss: 3.3229, samp_prob: 0.4013):  60%|██████    | 6/10 [1:18:04<49:50, 747.72s/it]

25

[FINISHED] Epoch 6, (Training Loss (per epoch): 88.6975 samp_prob: 0.4013)


Epoch 6, step 26/96, (Training Loss: 3.4402, samp_prob: 0.4013):  60%|██████    | 6/10 [1:18:11<49:50, 747.72s/it]

26

[FINISHED] Epoch 6, (Training Loss (per epoch): 92.1377 samp_prob: 0.4013)


Epoch 6, step 27/96, (Training Loss: 3.3040, samp_prob: 0.4013):  60%|██████    | 6/10 [1:18:19<49:50, 747.72s/it]

27

[FINISHED] Epoch 6, (Training Loss (per epoch): 95.4417 samp_prob: 0.4013)


Epoch 6, step 28/96, (Training Loss: 3.2471, samp_prob: 0.4013):  60%|██████    | 6/10 [1:18:26<49:50, 747.72s/it]

28

[FINISHED] Epoch 6, (Training Loss (per epoch): 98.6887 samp_prob: 0.4013)


Epoch 6, step 29/96, (Training Loss: 3.3660, samp_prob: 0.4013):  60%|██████    | 6/10 [1:18:34<49:50, 747.72s/it]

29

[FINISHED] Epoch 6, (Training Loss (per epoch): 102.0548 samp_prob: 0.4013)


Epoch 6, step 30/96, (Training Loss: 3.4955, samp_prob: 0.4013):  60%|██████    | 6/10 [1:18:42<49:50, 747.72s/it]

30

[FINISHED] Epoch 6, (Training Loss (per epoch): 105.5503 samp_prob: 0.4013)


Epoch 6, step 31/96, (Training Loss: 3.4049, samp_prob: 0.4013):  60%|██████    | 6/10 [1:18:50<49:50, 747.72s/it]

[[ 1  4 39 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 6, step 31/96......

id: 39
answer: ['<BOS>', 'the', 'woman', 'is', 'cutting', 'broccoli']
prediction: ['<BOS>', 'a', 'woman', 'is', 'slicing', 'a']
31

[FINISHED] Epoch 6, (Training Loss (per epoch): 108.9552 samp_prob: 0.4013)


Epoch 6, step 32/96, (Training Loss: 3.3546, samp_prob: 0.4013):  60%|██████    | 6/10 [1:18:58<49:50, 747.72s/it]

32

[FINISHED] Epoch 6, (Training Loss (per epoch): 112.3098 samp_prob: 0.4013)


Epoch 6, step 33/96, (Training Loss: 3.4073, samp_prob: 0.4013):  60%|██████    | 6/10 [1:19:05<49:50, 747.72s/it]

33

[FINISHED] Epoch 6, (Training Loss (per epoch): 115.7171 samp_prob: 0.4013)


Epoch 6, step 34/96, (Training Loss: 3.4005, samp_prob: 0.4013):  60%|██████    | 6/10 [1:19:13<49:50, 747.72s/it]

34

[FINISHED] Epoch 6, (Training Loss (per epoch): 119.1176 samp_prob: 0.4013)


Epoch 6, step 35/96, (Training Loss: 3.4261, samp_prob: 0.4013):  60%|██████    | 6/10 [1:19:20<49:50, 747.72s/it]

35

[FINISHED] Epoch 6, (Training Loss (per epoch): 122.5437 samp_prob: 0.4013)


Epoch 6, step 36/96, (Training Loss: 3.4374, samp_prob: 0.4013):  60%|██████    | 6/10 [1:19:28<49:50, 747.72s/it]

36

[FINISHED] Epoch 6, (Training Loss (per epoch): 125.9811 samp_prob: 0.4013)


Epoch 6, step 37/96, (Training Loss: 3.4131, samp_prob: 0.4013):  60%|██████    | 6/10 [1:19:36<49:50, 747.72s/it]

37

[FINISHED] Epoch 6, (Training Loss (per epoch): 129.3942 samp_prob: 0.4013)


Epoch 6, step 38/96, (Training Loss: 3.3795, samp_prob: 0.4013):  60%|██████    | 6/10 [1:19:43<49:50, 747.72s/it]

38

[FINISHED] Epoch 6, (Training Loss (per epoch): 132.7737 samp_prob: 0.4013)


Epoch 6, step 39/96, (Training Loss: 3.3799, samp_prob: 0.4013):  60%|██████    | 6/10 [1:19:51<49:50, 747.72s/it]

39

[FINISHED] Epoch 6, (Training Loss (per epoch): 136.1535 samp_prob: 0.4013)


Epoch 6, step 40/96, (Training Loss: 3.2892, samp_prob: 0.4013):  60%|██████    | 6/10 [1:19:59<49:50, 747.72s/it]

40

[FINISHED] Epoch 6, (Training Loss (per epoch): 139.4428 samp_prob: 0.4013)


Epoch 6, step 41/96, (Training Loss: 3.3274, samp_prob: 0.4013):  60%|██████    | 6/10 [1:20:06<49:50, 747.72s/it]

41

[FINISHED] Epoch 6, (Training Loss (per epoch): 142.7702 samp_prob: 0.4013)


Epoch 6, step 42/96, (Training Loss: 3.4331, samp_prob: 0.4013):  60%|██████    | 6/10 [1:20:14<49:50, 747.72s/it]

42

[FINISHED] Epoch 6, (Training Loss (per epoch): 146.2032 samp_prob: 0.4013)


Epoch 6, step 43/96, (Training Loss: 3.4005, samp_prob: 0.4013):  60%|██████    | 6/10 [1:20:22<49:50, 747.72s/it]

43

[FINISHED] Epoch 6, (Training Loss (per epoch): 149.6037 samp_prob: 0.4013)


Epoch 6, step 44/96, (Training Loss: 3.3313, samp_prob: 0.4013):  60%|██████    | 6/10 [1:20:29<49:50, 747.72s/it]

44

[FINISHED] Epoch 6, (Training Loss (per epoch): 152.9350 samp_prob: 0.4013)


Epoch 6, step 45/96, (Training Loss: 3.3673, samp_prob: 0.4013):  60%|██████    | 6/10 [1:20:37<49:50, 747.72s/it]

45

[FINISHED] Epoch 6, (Training Loss (per epoch): 156.3024 samp_prob: 0.4013)


Epoch 6, step 46/96, (Training Loss: 3.4353, samp_prob: 0.4013):  60%|██████    | 6/10 [1:20:45<49:50, 747.72s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 12 ...  2  2  2]
 [ 1 20 56 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 6, step 46/96......

id: 142
answer: ['<BOS>', 'a', 'squirrel', 'is', 'dancing', 'in', 'the', 'grass']
prediction: ['<BOS>', 'a', 'baby', 'is', 'is']
46

[FINISHED] Epoch 6, (Training Loss (per epoch): 159.7377 samp_prob: 0.4013)


Epoch 6, step 47/96, (Training Loss: 3.3665, samp_prob: 0.4013):  60%|██████    | 6/10 [1:20:53<49:50, 747.72s/it]

47

[FINISHED] Epoch 6, (Training Loss (per epoch): 163.1042 samp_prob: 0.4013)


Epoch 6, step 48/96, (Training Loss: 3.3538, samp_prob: 0.4013):  60%|██████    | 6/10 [1:21:00<49:50, 747.72s/it]

48

[FINISHED] Epoch 6, (Training Loss (per epoch): 166.4580 samp_prob: 0.4013)


Epoch 6, step 49/96, (Training Loss: 3.4020, samp_prob: 0.4013):  60%|██████    | 6/10 [1:21:08<49:50, 747.72s/it]

49

[FINISHED] Epoch 6, (Training Loss (per epoch): 169.8600 samp_prob: 0.4013)


Epoch 6, step 50/96, (Training Loss: 3.4924, samp_prob: 0.4013):  60%|██████    | 6/10 [1:21:16<49:50, 747.72s/it]

50

[FINISHED] Epoch 6, (Training Loss (per epoch): 173.3523 samp_prob: 0.4013)


Epoch 6, step 51/96, (Training Loss: 3.3123, samp_prob: 0.4013):  60%|██████    | 6/10 [1:21:23<49:50, 747.72s/it]

51

[FINISHED] Epoch 6, (Training Loss (per epoch): 176.6646 samp_prob: 0.4013)


Epoch 6, step 52/96, (Training Loss: 3.3370, samp_prob: 0.4013):  60%|██████    | 6/10 [1:21:31<49:50, 747.72s/it]

52

[FINISHED] Epoch 6, (Training Loss (per epoch): 180.0016 samp_prob: 0.4013)


Epoch 6, step 53/96, (Training Loss: 3.4835, samp_prob: 0.4013):  60%|██████    | 6/10 [1:21:39<49:50, 747.72s/it]

53

[FINISHED] Epoch 6, (Training Loss (per epoch): 183.4851 samp_prob: 0.4013)


Epoch 6, step 54/96, (Training Loss: 3.3232, samp_prob: 0.4013):  60%|██████    | 6/10 [1:21:46<49:50, 747.72s/it]

54

[FINISHED] Epoch 6, (Training Loss (per epoch): 186.8084 samp_prob: 0.4013)


Epoch 6, step 55/96, (Training Loss: 3.4035, samp_prob: 0.4013):  60%|██████    | 6/10 [1:21:54<49:50, 747.72s/it]

55

[FINISHED] Epoch 6, (Training Loss (per epoch): 190.2118 samp_prob: 0.4013)


Epoch 6, step 56/96, (Training Loss: 3.3071, samp_prob: 0.4013):  60%|██████    | 6/10 [1:22:01<49:50, 747.72s/it]

56

[FINISHED] Epoch 6, (Training Loss (per epoch): 193.5190 samp_prob: 0.4013)


Epoch 6, step 57/96, (Training Loss: 3.2398, samp_prob: 0.4013):  60%|██████    | 6/10 [1:22:09<49:50, 747.72s/it]

57

[FINISHED] Epoch 6, (Training Loss (per epoch): 196.7588 samp_prob: 0.4013)


Epoch 6, step 58/96, (Training Loss: 3.2953, samp_prob: 0.4013):  60%|██████    | 6/10 [1:22:17<49:50, 747.72s/it]

58

[FINISHED] Epoch 6, (Training Loss (per epoch): 200.0541 samp_prob: 0.4013)


Epoch 6, step 59/96, (Training Loss: 3.3807, samp_prob: 0.4013):  60%|██████    | 6/10 [1:22:24<49:50, 747.72s/it]

59

[FINISHED] Epoch 6, (Training Loss (per epoch): 203.4348 samp_prob: 0.4013)


Epoch 6, step 60/96, (Training Loss: 3.3130, samp_prob: 0.4013):  60%|██████    | 6/10 [1:22:32<49:50, 747.72s/it]

60

[FINISHED] Epoch 6, (Training Loss (per epoch): 206.7478 samp_prob: 0.4013)


Epoch 6, step 61/96, (Training Loss: 3.4675, samp_prob: 0.4013):  60%|██████    | 6/10 [1:22:40<49:50, 747.72s/it]

[[ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 23 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4 12 ...  2  2  2]]

[Train. Prediction] Epoch 6, step 61/96......

id: 62
answer: ['<BOS>', 'two', 'men', 'are', 'walking', 'together']
prediction: ['<BOS>', 'two', 'men', 'are', 'are']
61

[FINISHED] Epoch 6, (Training Loss (per epoch): 210.2154 samp_prob: 0.4013)


Epoch 6, step 62/96, (Training Loss: 3.3940, samp_prob: 0.4013):  60%|██████    | 6/10 [1:22:48<49:50, 747.72s/it]

62

[FINISHED] Epoch 6, (Training Loss (per epoch): 213.6094 samp_prob: 0.4013)


Epoch 6, step 63/96, (Training Loss: 3.4137, samp_prob: 0.4013):  60%|██████    | 6/10 [1:22:55<49:50, 747.72s/it]

63

[FINISHED] Epoch 6, (Training Loss (per epoch): 217.0230 samp_prob: 0.4013)


Epoch 6, step 64/96, (Training Loss: 3.4666, samp_prob: 0.4013):  60%|██████    | 6/10 [1:23:03<49:50, 747.72s/it]

64

[FINISHED] Epoch 6, (Training Loss (per epoch): 220.4896 samp_prob: 0.4013)


Epoch 6, step 65/96, (Training Loss: 3.1886, samp_prob: 0.4013):  60%|██████    | 6/10 [1:23:11<49:50, 747.72s/it]

65

[FINISHED] Epoch 6, (Training Loss (per epoch): 223.6781 samp_prob: 0.4013)


Epoch 6, step 66/96, (Training Loss: 3.4877, samp_prob: 0.4013):  60%|██████    | 6/10 [1:23:18<49:50, 747.72s/it]

66

[FINISHED] Epoch 6, (Training Loss (per epoch): 227.1659 samp_prob: 0.4013)


Epoch 6, step 67/96, (Training Loss: 3.4793, samp_prob: 0.4013):  60%|██████    | 6/10 [1:23:26<49:50, 747.72s/it]

67

[FINISHED] Epoch 6, (Training Loss (per epoch): 230.6452 samp_prob: 0.4013)


Epoch 6, step 68/96, (Training Loss: 3.3492, samp_prob: 0.4013):  60%|██████    | 6/10 [1:23:34<49:50, 747.72s/it]

68

[FINISHED] Epoch 6, (Training Loss (per epoch): 233.9944 samp_prob: 0.4013)


Epoch 6, step 69/96, (Training Loss: 3.2586, samp_prob: 0.4013):  60%|██████    | 6/10 [1:23:41<49:50, 747.72s/it]

69

[FINISHED] Epoch 6, (Training Loss (per epoch): 237.2530 samp_prob: 0.4013)


Epoch 6, step 70/96, (Training Loss: 3.4384, samp_prob: 0.4013):  60%|██████    | 6/10 [1:23:49<49:50, 747.72s/it]

70

[FINISHED] Epoch 6, (Training Loss (per epoch): 240.6914 samp_prob: 0.4013)


Epoch 6, step 71/96, (Training Loss: 3.3106, samp_prob: 0.4013):  60%|██████    | 6/10 [1:23:56<49:50, 747.72s/it]

71

[FINISHED] Epoch 6, (Training Loss (per epoch): 244.0020 samp_prob: 0.4013)


Epoch 6, step 72/96, (Training Loss: 3.4580, samp_prob: 0.4013):  60%|██████    | 6/10 [1:24:04<49:50, 747.72s/it]

72

[FINISHED] Epoch 6, (Training Loss (per epoch): 247.4600 samp_prob: 0.4013)


Epoch 6, step 73/96, (Training Loss: 3.3431, samp_prob: 0.4013):  60%|██████    | 6/10 [1:24:12<49:50, 747.72s/it]

73

[FINISHED] Epoch 6, (Training Loss (per epoch): 250.8031 samp_prob: 0.4013)


Epoch 6, step 74/96, (Training Loss: 3.3770, samp_prob: 0.4013):  60%|██████    | 6/10 [1:24:19<49:50, 747.72s/it]

74

[FINISHED] Epoch 6, (Training Loss (per epoch): 254.1801 samp_prob: 0.4013)


Epoch 6, step 75/96, (Training Loss: 3.3903, samp_prob: 0.4013):  60%|██████    | 6/10 [1:24:27<49:50, 747.72s/it]

75

[FINISHED] Epoch 6, (Training Loss (per epoch): 257.5704 samp_prob: 0.4013)


Epoch 6, step 76/96, (Training Loss: 3.4217, samp_prob: 0.4013):  60%|██████    | 6/10 [1:24:35<49:50, 747.72s/it]

[[ 1  4  8 ...  2  2  2]
 [ 1  4 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 6, step 76/96......

id: 147
answer: ['<BOS>', 'a', 'woman', 'is', 'putting', 'sticks', 'lengthwise', 'into', 'large', 'pieces', 'of', 'uncooked', 'shrimp']
prediction: ['<BOS>', 'a', 'woman', 'is', 'the']
76

[FINISHED] Epoch 6, (Training Loss (per epoch): 260.9921 samp_prob: 0.4013)


Epoch 6, step 77/96, (Training Loss: 3.3444, samp_prob: 0.4013):  60%|██████    | 6/10 [1:24:43<49:50, 747.72s/it]

77

[FINISHED] Epoch 6, (Training Loss (per epoch): 264.3365 samp_prob: 0.4013)


Epoch 6, step 78/96, (Training Loss: 3.2288, samp_prob: 0.4013):  60%|██████    | 6/10 [1:24:50<49:50, 747.72s/it]

78

[FINISHED] Epoch 6, (Training Loss (per epoch): 267.5653 samp_prob: 0.4013)


Epoch 6, step 79/96, (Training Loss: 3.2620, samp_prob: 0.4013):  60%|██████    | 6/10 [1:24:58<49:50, 747.72s/it]

79

[FINISHED] Epoch 6, (Training Loss (per epoch): 270.8273 samp_prob: 0.4013)


Epoch 6, step 80/96, (Training Loss: 3.3793, samp_prob: 0.4013):  60%|██████    | 6/10 [1:25:06<49:50, 747.72s/it]

80

[FINISHED] Epoch 6, (Training Loss (per epoch): 274.2066 samp_prob: 0.4013)


Epoch 6, step 81/96, (Training Loss: 3.2235, samp_prob: 0.4013):  60%|██████    | 6/10 [1:25:13<49:50, 747.72s/it]

81

[FINISHED] Epoch 6, (Training Loss (per epoch): 277.4301 samp_prob: 0.4013)


Epoch 6, step 82/96, (Training Loss: 3.4946, samp_prob: 0.4013):  60%|██████    | 6/10 [1:25:21<49:50, 747.72s/it]

82

[FINISHED] Epoch 6, (Training Loss (per epoch): 280.9247 samp_prob: 0.4013)


Epoch 6, step 83/96, (Training Loss: 3.4030, samp_prob: 0.4013):  60%|██████    | 6/10 [1:25:28<49:50, 747.72s/it]

83

[FINISHED] Epoch 6, (Training Loss (per epoch): 284.3278 samp_prob: 0.4013)


Epoch 6, step 84/96, (Training Loss: 3.3076, samp_prob: 0.4013):  60%|██████    | 6/10 [1:25:36<49:50, 747.72s/it]

84

[FINISHED] Epoch 6, (Training Loss (per epoch): 287.6353 samp_prob: 0.4013)


Epoch 6, step 85/96, (Training Loss: 3.4420, samp_prob: 0.4013):  60%|██████    | 6/10 [1:25:44<49:50, 747.72s/it]

85

[FINISHED] Epoch 6, (Training Loss (per epoch): 291.0773 samp_prob: 0.4013)


Epoch 6, step 86/96, (Training Loss: 3.2788, samp_prob: 0.4013):  60%|██████    | 6/10 [1:25:51<49:50, 747.72s/it]

86

[FINISHED] Epoch 6, (Training Loss (per epoch): 294.3561 samp_prob: 0.4013)


Epoch 6, step 87/96, (Training Loss: 3.4081, samp_prob: 0.4013):  60%|██████    | 6/10 [1:25:59<49:50, 747.72s/it]

87

[FINISHED] Epoch 6, (Training Loss (per epoch): 297.7642 samp_prob: 0.4013)


Epoch 6, step 88/96, (Training Loss: 3.3878, samp_prob: 0.4013):  60%|██████    | 6/10 [1:26:07<49:50, 747.72s/it]

88

[FINISHED] Epoch 6, (Training Loss (per epoch): 301.1520 samp_prob: 0.4013)


Epoch 6, step 89/96, (Training Loss: 3.3704, samp_prob: 0.4013):  60%|██████    | 6/10 [1:26:14<49:50, 747.72s/it]

89

[FINISHED] Epoch 6, (Training Loss (per epoch): 304.5224 samp_prob: 0.4013)


Epoch 6, step 90/96, (Training Loss: 3.2704, samp_prob: 0.4013):  60%|██████    | 6/10 [1:26:22<49:50, 747.72s/it]

90

[FINISHED] Epoch 6, (Training Loss (per epoch): 307.7928 samp_prob: 0.4013)


Epoch 6, step 91/96, (Training Loss: 3.3709, samp_prob: 0.4013):  60%|██████    | 6/10 [1:26:30<49:50, 747.72s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  4 12 ...  2  2  2]
 [ 1  4 24 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 6, step 91/96......

id: 27
answer: ['<BOS>', 'a', 'baby', 'is', 'hugging', 'a', 'stuffed', 'animal']
prediction: ['<BOS>', 'a', 'baby', 'is', 'playing', 'a', 'a']
91

[FINISHED] Epoch 6, (Training Loss (per epoch): 311.1637 samp_prob: 0.4013)


Epoch 6, step 92/96, (Training Loss: 3.2214, samp_prob: 0.4013):  60%|██████    | 6/10 [1:26:38<49:50, 747.72s/it]

92

[FINISHED] Epoch 6, (Training Loss (per epoch): 314.3852 samp_prob: 0.4013)


Epoch 6, step 93/96, (Training Loss: 3.2832, samp_prob: 0.4013):  60%|██████    | 6/10 [1:26:45<49:50, 747.72s/it]

93

[FINISHED] Epoch 6, (Training Loss (per epoch): 317.6684 samp_prob: 0.4013)


Epoch 6, step 94/96, (Training Loss: 3.4118, samp_prob: 0.4013):  60%|██████    | 6/10 [1:26:53<49:50, 747.72s/it]

94

[FINISHED] Epoch 6, (Training Loss (per epoch): 321.0802 samp_prob: 0.4013)


Epoch 6, step 95/96, (Training Loss: 3.3693, samp_prob: 0.4013):  70%|███████   | 7/10 [1:27:01<37:12, 744.03s/it]

95

[FINISHED] Epoch 6, (Training Loss (per epoch): 324.4495 samp_prob: 0.4013)


Epoch 7, step 0/96, (Training Loss: 3.4267, samp_prob: 0.3100):  70%|███████   | 7/10 [1:27:08<37:12, 744.03s/it] 

0

[FINISHED] Epoch 7, (Training Loss (per epoch): 3.4267 samp_prob: 0.3100)


Epoch 7, step 1/96, (Training Loss: 3.3737, samp_prob: 0.3100):  70%|███████   | 7/10 [1:27:16<37:12, 744.03s/it]

[[ 1  4 24 ...  2  2  2]
 [ 1  4 17 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  6 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 7, step 1/96......

id: 113
answer: ['<BOS>', 'a', 'woman', 'is', 'cutting', 'an', 'onion']
prediction: ['<BOS>', 'a', 'woman', 'is', 'a', 'onion']
1

[FINISHED] Epoch 7, (Training Loss (per epoch): 6.8004 samp_prob: 0.3100)


Epoch 7, step 2/96, (Training Loss: 3.3448, samp_prob: 0.3100):  70%|███████   | 7/10 [1:27:24<37:12, 744.03s/it]

2

[FINISHED] Epoch 7, (Training Loss (per epoch): 10.1452 samp_prob: 0.3100)


Epoch 7, step 3/96, (Training Loss: 3.4077, samp_prob: 0.3100):  70%|███████   | 7/10 [1:27:32<37:12, 744.03s/it]

3

[FINISHED] Epoch 7, (Training Loss (per epoch): 13.5530 samp_prob: 0.3100)


Epoch 7, step 4/96, (Training Loss: 3.2589, samp_prob: 0.3100):  70%|███████   | 7/10 [1:27:39<37:12, 744.03s/it]

4

[FINISHED] Epoch 7, (Training Loss (per epoch): 16.8118 samp_prob: 0.3100)


Epoch 7, step 5/96, (Training Loss: 3.2700, samp_prob: 0.3100):  70%|███████   | 7/10 [1:27:47<37:12, 744.03s/it]

5

[FINISHED] Epoch 7, (Training Loss (per epoch): 20.0818 samp_prob: 0.3100)


Epoch 7, step 6/96, (Training Loss: 3.2944, samp_prob: 0.3100):  70%|███████   | 7/10 [1:27:55<37:12, 744.03s/it]

6

[FINISHED] Epoch 7, (Training Loss (per epoch): 23.3762 samp_prob: 0.3100)


Epoch 7, step 7/96, (Training Loss: 3.3435, samp_prob: 0.3100):  70%|███████   | 7/10 [1:28:02<37:12, 744.03s/it]

7

[FINISHED] Epoch 7, (Training Loss (per epoch): 26.7197 samp_prob: 0.3100)


Epoch 7, step 8/96, (Training Loss: 3.3153, samp_prob: 0.3100):  70%|███████   | 7/10 [1:28:10<37:12, 744.03s/it]

8

[FINISHED] Epoch 7, (Training Loss (per epoch): 30.0351 samp_prob: 0.3100)


Epoch 7, step 9/96, (Training Loss: 3.3270, samp_prob: 0.3100):  70%|███████   | 7/10 [1:28:18<37:12, 744.03s/it]

9

[FINISHED] Epoch 7, (Training Loss (per epoch): 33.3621 samp_prob: 0.3100)


Epoch 7, step 10/96, (Training Loss: 3.2851, samp_prob: 0.3100):  70%|███████   | 7/10 [1:28:25<37:12, 744.03s/it]

10

[FINISHED] Epoch 7, (Training Loss (per epoch): 36.6471 samp_prob: 0.3100)


Epoch 7, step 11/96, (Training Loss: 3.3351, samp_prob: 0.3100):  70%|███████   | 7/10 [1:28:33<37:12, 744.03s/it]

11

[FINISHED] Epoch 7, (Training Loss (per epoch): 39.9822 samp_prob: 0.3100)


Epoch 7, step 12/96, (Training Loss: 3.3338, samp_prob: 0.3100):  70%|███████   | 7/10 [1:28:41<37:12, 744.03s/it]

12

[FINISHED] Epoch 7, (Training Loss (per epoch): 43.3160 samp_prob: 0.3100)


Epoch 7, step 13/96, (Training Loss: 3.4402, samp_prob: 0.3100):  70%|███████   | 7/10 [1:28:48<37:12, 744.03s/it]

13

[FINISHED] Epoch 7, (Training Loss (per epoch): 46.7562 samp_prob: 0.3100)


Epoch 7, step 14/96, (Training Loss: 3.4247, samp_prob: 0.3100):  70%|███████   | 7/10 [1:28:56<37:12, 744.03s/it]

14

[FINISHED] Epoch 7, (Training Loss (per epoch): 50.1809 samp_prob: 0.3100)


Epoch 7, step 15/96, (Training Loss: 3.3925, samp_prob: 0.3100):  70%|███████   | 7/10 [1:29:04<37:12, 744.03s/it]

15

[FINISHED] Epoch 7, (Training Loss (per epoch): 53.5734 samp_prob: 0.3100)


Epoch 7, step 16/96, (Training Loss: 3.2884, samp_prob: 0.3100):  70%|███████   | 7/10 [1:29:12<37:12, 744.03s/it]

[[ 1  4 23 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4 56 ...  2  2  2]
 ...
 [ 1  4 23 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 7, step 16/96......

id: 126
answer: ['<BOS>', 'a', 'lady', 'sliced', 'a', 'lemon', 'with', 'a', 'tool']
prediction: ['<BOS>', 'a', 'is', 'is', 'a', 'a']
16

[FINISHED] Epoch 7, (Training Loss (per epoch): 56.8618 samp_prob: 0.3100)


Epoch 7, step 17/96, (Training Loss: 3.2631, samp_prob: 0.3100):  70%|███████   | 7/10 [1:29:19<37:12, 744.03s/it]

17

[FINISHED] Epoch 7, (Training Loss (per epoch): 60.1248 samp_prob: 0.3100)


Epoch 7, step 18/96, (Training Loss: 3.1743, samp_prob: 0.3100):  70%|███████   | 7/10 [1:29:27<37:12, 744.03s/it]

18

[FINISHED] Epoch 7, (Training Loss (per epoch): 63.2991 samp_prob: 0.3100)


Epoch 7, step 19/96, (Training Loss: 3.0374, samp_prob: 0.3100):  70%|███████   | 7/10 [1:29:35<37:12, 744.03s/it]

19

[FINISHED] Epoch 7, (Training Loss (per epoch): 66.3365 samp_prob: 0.3100)


Epoch 7, step 20/96, (Training Loss: 3.3731, samp_prob: 0.3100):  70%|███████   | 7/10 [1:29:42<37:12, 744.03s/it]

20

[FINISHED] Epoch 7, (Training Loss (per epoch): 69.7096 samp_prob: 0.3100)


Epoch 7, step 21/96, (Training Loss: 3.2575, samp_prob: 0.3100):  70%|███████   | 7/10 [1:29:50<37:12, 744.03s/it]

21

[FINISHED] Epoch 7, (Training Loss (per epoch): 72.9671 samp_prob: 0.3100)


Epoch 7, step 22/96, (Training Loss: 3.2142, samp_prob: 0.3100):  70%|███████   | 7/10 [1:29:58<37:12, 744.03s/it]

22

[FINISHED] Epoch 7, (Training Loss (per epoch): 76.1813 samp_prob: 0.3100)


Epoch 7, step 23/96, (Training Loss: 3.3624, samp_prob: 0.3100):  70%|███████   | 7/10 [1:30:05<37:12, 744.03s/it]

23

[FINISHED] Epoch 7, (Training Loss (per epoch): 79.5437 samp_prob: 0.3100)


Epoch 7, step 24/96, (Training Loss: 3.4892, samp_prob: 0.3100):  70%|███████   | 7/10 [1:30:13<37:12, 744.03s/it]

24

[FINISHED] Epoch 7, (Training Loss (per epoch): 83.0328 samp_prob: 0.3100)


Epoch 7, step 25/96, (Training Loss: 3.2484, samp_prob: 0.3100):  70%|███████   | 7/10 [1:30:20<37:12, 744.03s/it]

25

[FINISHED] Epoch 7, (Training Loss (per epoch): 86.2812 samp_prob: 0.3100)


Epoch 7, step 26/96, (Training Loss: 3.2999, samp_prob: 0.3100):  70%|███████   | 7/10 [1:30:28<37:12, 744.03s/it]

26

[FINISHED] Epoch 7, (Training Loss (per epoch): 89.5812 samp_prob: 0.3100)


Epoch 7, step 27/96, (Training Loss: 3.2030, samp_prob: 0.3100):  70%|███████   | 7/10 [1:30:36<37:12, 744.03s/it]

27

[FINISHED] Epoch 7, (Training Loss (per epoch): 92.7842 samp_prob: 0.3100)


Epoch 7, step 28/96, (Training Loss: 3.1667, samp_prob: 0.3100):  70%|███████   | 7/10 [1:30:43<37:12, 744.03s/it]

28

[FINISHED] Epoch 7, (Training Loss (per epoch): 95.9509 samp_prob: 0.3100)


Epoch 7, step 29/96, (Training Loss: 3.2708, samp_prob: 0.3100):  70%|███████   | 7/10 [1:30:51<37:12, 744.03s/it]

29

[FINISHED] Epoch 7, (Training Loss (per epoch): 99.2216 samp_prob: 0.3100)


Epoch 7, step 30/96, (Training Loss: 3.3990, samp_prob: 0.3100):  70%|███████   | 7/10 [1:30:59<37:12, 744.03s/it]

30

[FINISHED] Epoch 7, (Training Loss (per epoch): 102.6206 samp_prob: 0.3100)


Epoch 7, step 31/96, (Training Loss: 3.3093, samp_prob: 0.3100):  70%|███████   | 7/10 [1:31:07<37:12, 744.03s/it]

[[ 1  4 12 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 7, step 31/96......

id: 113
answer: ['<BOS>', 'a', 'small', 'panda', 'is', 'laying', 'close', 'to', 'a', 'larger', 'panda']
prediction: ['<BOS>', 'a', 'panda', 'are', 'on']
31

[FINISHED] Epoch 7, (Training Loss (per epoch): 105.9299 samp_prob: 0.3100)


Epoch 7, step 32/96, (Training Loss: 3.2727, samp_prob: 0.3100):  70%|███████   | 7/10 [1:31:14<37:12, 744.03s/it]

32

[FINISHED] Epoch 7, (Training Loss (per epoch): 109.2026 samp_prob: 0.3100)


Epoch 7, step 33/96, (Training Loss: 3.3521, samp_prob: 0.3100):  70%|███████   | 7/10 [1:31:22<37:12, 744.03s/it]

33

[FINISHED] Epoch 7, (Training Loss (per epoch): 112.5547 samp_prob: 0.3100)


Epoch 7, step 34/96, (Training Loss: 3.3097, samp_prob: 0.3100):  70%|███████   | 7/10 [1:31:30<37:12, 744.03s/it]

34

[FINISHED] Epoch 7, (Training Loss (per epoch): 115.8644 samp_prob: 0.3100)


Epoch 7, step 35/96, (Training Loss: 3.3188, samp_prob: 0.3100):  70%|███████   | 7/10 [1:31:37<37:12, 744.03s/it]

35

[FINISHED] Epoch 7, (Training Loss (per epoch): 119.1833 samp_prob: 0.3100)


Epoch 7, step 36/96, (Training Loss: 3.3599, samp_prob: 0.3100):  70%|███████   | 7/10 [1:31:45<37:12, 744.03s/it]

36

[FINISHED] Epoch 7, (Training Loss (per epoch): 122.5432 samp_prob: 0.3100)


Epoch 7, step 37/96, (Training Loss: 3.3119, samp_prob: 0.3100):  70%|███████   | 7/10 [1:31:53<37:12, 744.03s/it]

37

[FINISHED] Epoch 7, (Training Loss (per epoch): 125.8551 samp_prob: 0.3100)


Epoch 7, step 38/96, (Training Loss: 3.2918, samp_prob: 0.3100):  70%|███████   | 7/10 [1:32:00<37:12, 744.03s/it]

38

[FINISHED] Epoch 7, (Training Loss (per epoch): 129.1470 samp_prob: 0.3100)


Epoch 7, step 39/96, (Training Loss: 3.2834, samp_prob: 0.3100):  70%|███████   | 7/10 [1:32:08<37:12, 744.03s/it]

39

[FINISHED] Epoch 7, (Training Loss (per epoch): 132.4304 samp_prob: 0.3100)


Epoch 7, step 40/96, (Training Loss: 3.2076, samp_prob: 0.3100):  70%|███████   | 7/10 [1:32:15<37:12, 744.03s/it]

40

[FINISHED] Epoch 7, (Training Loss (per epoch): 135.6380 samp_prob: 0.3100)


Epoch 7, step 41/96, (Training Loss: 3.2346, samp_prob: 0.3100):  70%|███████   | 7/10 [1:32:23<37:12, 744.03s/it]

41

[FINISHED] Epoch 7, (Training Loss (per epoch): 138.8726 samp_prob: 0.3100)


Epoch 7, step 42/96, (Training Loss: 3.3432, samp_prob: 0.3100):  70%|███████   | 7/10 [1:32:31<37:12, 744.03s/it]

42

[FINISHED] Epoch 7, (Training Loss (per epoch): 142.2158 samp_prob: 0.3100)


Epoch 7, step 43/96, (Training Loss: 3.3022, samp_prob: 0.3100):  70%|███████   | 7/10 [1:32:38<37:12, 744.03s/it]

43

[FINISHED] Epoch 7, (Training Loss (per epoch): 145.5180 samp_prob: 0.3100)


Epoch 7, step 44/96, (Training Loss: 3.2473, samp_prob: 0.3100):  70%|███████   | 7/10 [1:32:46<37:12, 744.03s/it]

44

[FINISHED] Epoch 7, (Training Loss (per epoch): 148.7652 samp_prob: 0.3100)


Epoch 7, step 45/96, (Training Loss: 3.2667, samp_prob: 0.3100):  70%|███████   | 7/10 [1:32:54<37:12, 744.03s/it]

45

[FINISHED] Epoch 7, (Training Loss (per epoch): 152.0319 samp_prob: 0.3100)


Epoch 7, step 46/96, (Training Loss: 3.3500, samp_prob: 0.3100):  70%|███████   | 7/10 [1:33:02<37:12, 744.03s/it]

[[ 1  6  7 ...  2  2  2]
 [ 1  4 23 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 12 ...  2  2  2]
 [ 1 56 56 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 7, step 46/96......

id: 247
answer: ['<BOS>', 'several', 'men', 'in', 'a', 'group', 'start', 'fighting', 'each', 'other']
prediction: ['<BOS>', 'a', 'are', 'are', 'are']
46

[FINISHED] Epoch 7, (Training Loss (per epoch): 155.3819 samp_prob: 0.3100)


Epoch 7, step 47/96, (Training Loss: 3.2601, samp_prob: 0.3100):  70%|███████   | 7/10 [1:33:09<37:12, 744.03s/it]

47

[FINISHED] Epoch 7, (Training Loss (per epoch): 158.6421 samp_prob: 0.3100)


Epoch 7, step 48/96, (Training Loss: 3.2546, samp_prob: 0.3100):  70%|███████   | 7/10 [1:33:17<37:12, 744.03s/it]

48

[FINISHED] Epoch 7, (Training Loss (per epoch): 161.8967 samp_prob: 0.3100)


Epoch 7, step 49/96, (Training Loss: 3.3009, samp_prob: 0.3100):  70%|███████   | 7/10 [1:33:24<37:12, 744.03s/it]

49

[FINISHED] Epoch 7, (Training Loss (per epoch): 165.1976 samp_prob: 0.3100)


Epoch 7, step 50/96, (Training Loss: 3.3951, samp_prob: 0.3100):  70%|███████   | 7/10 [1:33:32<37:12, 744.03s/it]

50

[FINISHED] Epoch 7, (Training Loss (per epoch): 168.5927 samp_prob: 0.3100)


Epoch 7, step 51/96, (Training Loss: 3.2132, samp_prob: 0.3100):  70%|███████   | 7/10 [1:33:40<37:12, 744.03s/it]

51

[FINISHED] Epoch 7, (Training Loss (per epoch): 171.8059 samp_prob: 0.3100)


Epoch 7, step 52/96, (Training Loss: 3.2120, samp_prob: 0.3100):  70%|███████   | 7/10 [1:33:48<37:12, 744.03s/it]

52

[FINISHED] Epoch 7, (Training Loss (per epoch): 175.0179 samp_prob: 0.3100)


Epoch 7, step 53/96, (Training Loss: 3.3742, samp_prob: 0.3100):  70%|███████   | 7/10 [1:33:55<37:12, 744.03s/it]

53

[FINISHED] Epoch 7, (Training Loss (per epoch): 178.3922 samp_prob: 0.3100)


Epoch 7, step 54/96, (Training Loss: 3.1937, samp_prob: 0.3100):  70%|███████   | 7/10 [1:34:03<37:12, 744.03s/it]

54

[FINISHED] Epoch 7, (Training Loss (per epoch): 181.5859 samp_prob: 0.3100)


Epoch 7, step 55/96, (Training Loss: 3.2589, samp_prob: 0.3100):  70%|███████   | 7/10 [1:34:11<37:12, 744.03s/it]

55

[FINISHED] Epoch 7, (Training Loss (per epoch): 184.8447 samp_prob: 0.3100)


Epoch 7, step 56/96, (Training Loss: 3.2234, samp_prob: 0.3100):  70%|███████   | 7/10 [1:34:18<37:12, 744.03s/it]

56

[FINISHED] Epoch 7, (Training Loss (per epoch): 188.0681 samp_prob: 0.3100)


Epoch 7, step 57/96, (Training Loss: 3.0999, samp_prob: 0.3100):  70%|███████   | 7/10 [1:34:26<37:12, 744.03s/it]

57

[FINISHED] Epoch 7, (Training Loss (per epoch): 191.1680 samp_prob: 0.3100)


Epoch 7, step 58/96, (Training Loss: 3.1835, samp_prob: 0.3100):  70%|███████   | 7/10 [1:34:34<37:12, 744.03s/it]

58

[FINISHED] Epoch 7, (Training Loss (per epoch): 194.3515 samp_prob: 0.3100)


Epoch 7, step 59/96, (Training Loss: 3.2622, samp_prob: 0.3100):  70%|███████   | 7/10 [1:34:41<37:12, 744.03s/it]

59

[FINISHED] Epoch 7, (Training Loss (per epoch): 197.6137 samp_prob: 0.3100)


Epoch 7, step 60/96, (Training Loss: 3.2247, samp_prob: 0.3100):  70%|███████   | 7/10 [1:34:49<37:12, 744.03s/it]

60

[FINISHED] Epoch 7, (Training Loss (per epoch): 200.8384 samp_prob: 0.3100)


Epoch 7, step 61/96, (Training Loss: 3.3539, samp_prob: 0.3100):  70%|███████   | 7/10 [1:34:57<37:12, 744.03s/it]

[[ 1  4 12 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 23 ...  2  2  2]
 [ 1  4 17 ...  2  2  2]
 [ 1 20 12 ...  2  2  2]]

[Train. Prediction] Epoch 7, step 61/96......

id: 193
answer: ['<BOS>', 'someone', 'is', 'playing', 'a', 'piano']
prediction: ['<BOS>', 'a', 'man', 'is', 'the', 'a', 'the']
61

[FINISHED] Epoch 7, (Training Loss (per epoch): 204.1923 samp_prob: 0.3100)


Epoch 7, step 62/96, (Training Loss: 3.2875, samp_prob: 0.3100):  70%|███████   | 7/10 [1:35:05<37:12, 744.03s/it]

62

[FINISHED] Epoch 7, (Training Loss (per epoch): 207.4798 samp_prob: 0.3100)


Epoch 7, step 63/96, (Training Loss: 3.3140, samp_prob: 0.3100):  70%|███████   | 7/10 [1:35:12<37:12, 744.03s/it]

63

[FINISHED] Epoch 7, (Training Loss (per epoch): 210.7939 samp_prob: 0.3100)


Epoch 7, step 64/96, (Training Loss: 3.3539, samp_prob: 0.3100):  70%|███████   | 7/10 [1:35:20<37:12, 744.03s/it]

64

[FINISHED] Epoch 7, (Training Loss (per epoch): 214.1478 samp_prob: 0.3100)


Epoch 7, step 65/96, (Training Loss: 3.1101, samp_prob: 0.3100):  70%|███████   | 7/10 [1:35:28<37:12, 744.03s/it]

65

[FINISHED] Epoch 7, (Training Loss (per epoch): 217.2579 samp_prob: 0.3100)


Epoch 7, step 66/96, (Training Loss: 3.4054, samp_prob: 0.3100):  70%|███████   | 7/10 [1:35:35<37:12, 744.03s/it]

66

[FINISHED] Epoch 7, (Training Loss (per epoch): 220.6633 samp_prob: 0.3100)


Epoch 7, step 67/96, (Training Loss: 3.3881, samp_prob: 0.3100):  70%|███████   | 7/10 [1:35:43<37:12, 744.03s/it]

67

[FINISHED] Epoch 7, (Training Loss (per epoch): 224.0513 samp_prob: 0.3100)


Epoch 7, step 68/96, (Training Loss: 3.2410, samp_prob: 0.3100):  70%|███████   | 7/10 [1:35:51<37:12, 744.03s/it]

68

[FINISHED] Epoch 7, (Training Loss (per epoch): 227.2924 samp_prob: 0.3100)


Epoch 7, step 69/96, (Training Loss: 3.1240, samp_prob: 0.3100):  70%|███████   | 7/10 [1:35:58<37:12, 744.03s/it]

69

[FINISHED] Epoch 7, (Training Loss (per epoch): 230.4164 samp_prob: 0.3100)


Epoch 7, step 70/96, (Training Loss: 3.3625, samp_prob: 0.3100):  70%|███████   | 7/10 [1:36:06<37:12, 744.03s/it]

70

[FINISHED] Epoch 7, (Training Loss (per epoch): 233.7789 samp_prob: 0.3100)


Epoch 7, step 71/96, (Training Loss: 3.1848, samp_prob: 0.3100):  70%|███████   | 7/10 [1:36:14<37:12, 744.03s/it]

71

[FINISHED] Epoch 7, (Training Loss (per epoch): 236.9637 samp_prob: 0.3100)


Epoch 7, step 72/96, (Training Loss: 3.3284, samp_prob: 0.3100):  70%|███████   | 7/10 [1:36:21<37:12, 744.03s/it]

72

[FINISHED] Epoch 7, (Training Loss (per epoch): 240.2922 samp_prob: 0.3100)


Epoch 7, step 73/96, (Training Loss: 3.2529, samp_prob: 0.3100):  70%|███████   | 7/10 [1:36:29<37:12, 744.03s/it]

73

[FINISHED] Epoch 7, (Training Loss (per epoch): 243.5450 samp_prob: 0.3100)


Epoch 7, step 74/96, (Training Loss: 3.2457, samp_prob: 0.3100):  70%|███████   | 7/10 [1:36:36<37:12, 744.03s/it]

74

[FINISHED] Epoch 7, (Training Loss (per epoch): 246.7908 samp_prob: 0.3100)


Epoch 7, step 75/96, (Training Loss: 3.3108, samp_prob: 0.3100):  70%|███████   | 7/10 [1:36:44<37:12, 744.03s/it]

75

[FINISHED] Epoch 7, (Training Loss (per epoch): 250.1016 samp_prob: 0.3100)


Epoch 7, step 76/96, (Training Loss: 3.3294, samp_prob: 0.3100):  70%|███████   | 7/10 [1:36:52<37:12, 744.03s/it]

[[ 1  4  8 ...  2  2  2]
 [ 1  4 39 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 7, step 76/96......

id: 150
answer: ['<BOS>', 'a', 'cat', 'is', 'running', 'up', 'a', 'wall']
prediction: ['<BOS>', 'a', 'cat', 'is', 'on', 'a']
76

[FINISHED] Epoch 7, (Training Loss (per epoch): 253.4310 samp_prob: 0.3100)


Epoch 7, step 77/96, (Training Loss: 3.2152, samp_prob: 0.3100):  70%|███████   | 7/10 [1:37:00<37:12, 744.03s/it]

77

[FINISHED] Epoch 7, (Training Loss (per epoch): 256.6462 samp_prob: 0.3100)


Epoch 7, step 78/96, (Training Loss: 3.1280, samp_prob: 0.3100):  70%|███████   | 7/10 [1:37:07<37:12, 744.03s/it]

78

[FINISHED] Epoch 7, (Training Loss (per epoch): 259.7742 samp_prob: 0.3100)


Epoch 7, step 79/96, (Training Loss: 3.1594, samp_prob: 0.3100):  70%|███████   | 7/10 [1:37:15<37:12, 744.03s/it]

79

[FINISHED] Epoch 7, (Training Loss (per epoch): 262.9336 samp_prob: 0.3100)


Epoch 7, step 80/96, (Training Loss: 3.2882, samp_prob: 0.3100):  70%|███████   | 7/10 [1:37:23<37:12, 744.03s/it]

80

[FINISHED] Epoch 7, (Training Loss (per epoch): 266.2218 samp_prob: 0.3100)


Epoch 7, step 81/96, (Training Loss: 3.1300, samp_prob: 0.3100):  70%|███████   | 7/10 [1:37:31<37:12, 744.03s/it]

81

[FINISHED] Epoch 7, (Training Loss (per epoch): 269.3518 samp_prob: 0.3100)


Epoch 7, step 82/96, (Training Loss: 3.3667, samp_prob: 0.3100):  70%|███████   | 7/10 [1:37:38<37:12, 744.03s/it]

82

[FINISHED] Epoch 7, (Training Loss (per epoch): 272.7186 samp_prob: 0.3100)


Epoch 7, step 83/96, (Training Loss: 3.2727, samp_prob: 0.3100):  70%|███████   | 7/10 [1:37:46<37:12, 744.03s/it]

83

[FINISHED] Epoch 7, (Training Loss (per epoch): 275.9913 samp_prob: 0.3100)


Epoch 7, step 84/96, (Training Loss: 3.2109, samp_prob: 0.3100):  70%|███████   | 7/10 [1:37:53<37:12, 744.03s/it]

84

[FINISHED] Epoch 7, (Training Loss (per epoch): 279.2022 samp_prob: 0.3100)


Epoch 7, step 85/96, (Training Loss: 3.3413, samp_prob: 0.3100):  70%|███████   | 7/10 [1:38:01<37:12, 744.03s/it]

85

[FINISHED] Epoch 7, (Training Loss (per epoch): 282.5435 samp_prob: 0.3100)


Epoch 7, step 86/96, (Training Loss: 3.1436, samp_prob: 0.3100):  70%|███████   | 7/10 [1:38:09<37:12, 744.03s/it]

86

[FINISHED] Epoch 7, (Training Loss (per epoch): 285.6871 samp_prob: 0.3100)


Epoch 7, step 87/96, (Training Loss: 3.3053, samp_prob: 0.3100):  70%|███████   | 7/10 [1:38:16<37:12, 744.03s/it]

87

[FINISHED] Epoch 7, (Training Loss (per epoch): 288.9924 samp_prob: 0.3100)


Epoch 7, step 88/96, (Training Loss: 3.3061, samp_prob: 0.3100):  70%|███████   | 7/10 [1:38:24<37:12, 744.03s/it]

88

[FINISHED] Epoch 7, (Training Loss (per epoch): 292.2985 samp_prob: 0.3100)


Epoch 7, step 89/96, (Training Loss: 3.2496, samp_prob: 0.3100):  70%|███████   | 7/10 [1:38:32<37:12, 744.03s/it]

89

[FINISHED] Epoch 7, (Training Loss (per epoch): 295.5481 samp_prob: 0.3100)


Epoch 7, step 90/96, (Training Loss: 3.1898, samp_prob: 0.3100):  70%|███████   | 7/10 [1:38:39<37:12, 744.03s/it]

90

[FINISHED] Epoch 7, (Training Loss (per epoch): 298.7379 samp_prob: 0.3100)


Epoch 7, step 91/96, (Training Loss: 3.2693, samp_prob: 0.3100):  70%|███████   | 7/10 [1:38:47<37:12, 744.03s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  6 12 ...  2  2  2]
 [ 1  4 38 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 7, step 91/96......

id: 101
answer: ['<BOS>', 'a', 'monkey', 'is', 'riding', 'a', 'bicycle', 'around', 'a', 'stage']
prediction: ['<BOS>', 'a', 'man', 'is', 'riding', 'a', 'a', 'a']
91

[FINISHED] Epoch 7, (Training Loss (per epoch): 302.0072 samp_prob: 0.3100)


Epoch 7, step 92/96, (Training Loss: 3.1303, samp_prob: 0.3100):  70%|███████   | 7/10 [1:38:55<37:12, 744.03s/it]

92

[FINISHED] Epoch 7, (Training Loss (per epoch): 305.1375 samp_prob: 0.3100)


Epoch 7, step 93/96, (Training Loss: 3.1730, samp_prob: 0.3100):  70%|███████   | 7/10 [1:39:03<37:12, 744.03s/it]

93

[FINISHED] Epoch 7, (Training Loss (per epoch): 308.3105 samp_prob: 0.3100)


Epoch 7, step 94/96, (Training Loss: 3.3230, samp_prob: 0.3100):  70%|███████   | 7/10 [1:39:10<37:12, 744.03s/it]

94

[FINISHED] Epoch 7, (Training Loss (per epoch): 311.6334 samp_prob: 0.3100)


Epoch 7, step 95/96, (Training Loss: 3.2820, samp_prob: 0.3100):  80%|████████  | 8/10 [1:39:18<24:44, 742.08s/it]

95

[FINISHED] Epoch 7, (Training Loss (per epoch): 314.9154 samp_prob: 0.3100)


Epoch 8, step 0/96, (Training Loss: 3.2878, samp_prob: 0.2315):  80%|████████  | 8/10 [1:39:26<24:44, 742.08s/it] 

0

[FINISHED] Epoch 8, (Training Loss (per epoch): 3.2878 samp_prob: 0.2315)


Epoch 8, step 1/96, (Training Loss: 3.2780, samp_prob: 0.2315):  80%|████████  | 8/10 [1:39:34<24:44, 742.08s/it]

[[ 1  4 24 ...  2  2  2]
 [ 1  4 17 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 8, step 1/96......

id: 5
answer: ['<BOS>', 'a', 'woman', 'squeezes', 'juice', 'out', 'of', 'a', 'handful', 'of', 'pickles']
prediction: ['<BOS>', 'a', 'woman', 'is', 'a']
1

[FINISHED] Epoch 8, (Training Loss (per epoch): 6.5658 samp_prob: 0.2315)


Epoch 8, step 2/96, (Training Loss: 3.2160, samp_prob: 0.2315):  80%|████████  | 8/10 [1:39:41<24:44, 742.08s/it]

2

[FINISHED] Epoch 8, (Training Loss (per epoch): 9.7817 samp_prob: 0.2315)


Epoch 8, step 3/96, (Training Loss: 3.3359, samp_prob: 0.2315):  80%|████████  | 8/10 [1:39:49<24:44, 742.08s/it]

3

[FINISHED] Epoch 8, (Training Loss (per epoch): 13.1176 samp_prob: 0.2315)


Epoch 8, step 4/96, (Training Loss: 3.1463, samp_prob: 0.2315):  80%|████████  | 8/10 [1:39:57<24:44, 742.08s/it]

4

[FINISHED] Epoch 8, (Training Loss (per epoch): 16.2640 samp_prob: 0.2315)


Epoch 8, step 5/96, (Training Loss: 3.1778, samp_prob: 0.2315):  80%|████████  | 8/10 [1:40:04<24:44, 742.08s/it]

5

[FINISHED] Epoch 8, (Training Loss (per epoch): 19.4417 samp_prob: 0.2315)


Epoch 8, step 6/96, (Training Loss: 3.1715, samp_prob: 0.2315):  80%|████████  | 8/10 [1:40:12<24:44, 742.08s/it]

6

[FINISHED] Epoch 8, (Training Loss (per epoch): 22.6132 samp_prob: 0.2315)


Epoch 8, step 7/96, (Training Loss: 3.2261, samp_prob: 0.2315):  80%|████████  | 8/10 [1:40:20<24:44, 742.08s/it]

7

[FINISHED] Epoch 8, (Training Loss (per epoch): 25.8393 samp_prob: 0.2315)


Epoch 8, step 8/96, (Training Loss: 3.2053, samp_prob: 0.2315):  80%|████████  | 8/10 [1:40:27<24:44, 742.08s/it]

8

[FINISHED] Epoch 8, (Training Loss (per epoch): 29.0445 samp_prob: 0.2315)


Epoch 8, step 9/96, (Training Loss: 3.2401, samp_prob: 0.2315):  80%|████████  | 8/10 [1:40:35<24:44, 742.08s/it]

9

[FINISHED] Epoch 8, (Training Loss (per epoch): 32.2846 samp_prob: 0.2315)


Epoch 8, step 10/96, (Training Loss: 3.1411, samp_prob: 0.2315):  80%|████████  | 8/10 [1:40:42<24:44, 742.08s/it]

10

[FINISHED] Epoch 8, (Training Loss (per epoch): 35.4257 samp_prob: 0.2315)


Epoch 8, step 11/96, (Training Loss: 3.1926, samp_prob: 0.2315):  80%|████████  | 8/10 [1:40:50<24:44, 742.08s/it]

11

[FINISHED] Epoch 8, (Training Loss (per epoch): 38.6183 samp_prob: 0.2315)


Epoch 8, step 12/96, (Training Loss: 3.2849, samp_prob: 0.2315):  80%|████████  | 8/10 [1:40:58<24:44, 742.08s/it]

12

[FINISHED] Epoch 8, (Training Loss (per epoch): 41.9032 samp_prob: 0.2315)


Epoch 8, step 13/96, (Training Loss: 3.3208, samp_prob: 0.2315):  80%|████████  | 8/10 [1:41:05<24:44, 742.08s/it]

13

[FINISHED] Epoch 8, (Training Loss (per epoch): 45.2240 samp_prob: 0.2315)


Epoch 8, step 14/96, (Training Loss: 3.2920, samp_prob: 0.2315):  80%|████████  | 8/10 [1:41:13<24:44, 742.08s/it]

14

[FINISHED] Epoch 8, (Training Loss (per epoch): 48.5160 samp_prob: 0.2315)


Epoch 8, step 15/96, (Training Loss: 3.2601, samp_prob: 0.2315):  80%|████████  | 8/10 [1:41:21<24:44, 742.08s/it]

15

[FINISHED] Epoch 8, (Training Loss (per epoch): 51.7761 samp_prob: 0.2315)


Epoch 8, step 16/96, (Training Loss: 3.1954, samp_prob: 0.2315):  80%|████████  | 8/10 [1:41:29<24:44, 742.08s/it]

[[ 1  4 23 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1 56 56 ...  2  2  2]
 ...
 [ 1  4 23 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 8, step 16/96......

id: 83
answer: ['<BOS>', 'superman', 'is', 'lifting', 'heavy', 'rocks']
prediction: ['<BOS>', 'a', 'woman', 'is', 'a', 'the']
16

[FINISHED] Epoch 8, (Training Loss (per epoch): 54.9716 samp_prob: 0.2315)


Epoch 8, step 17/96, (Training Loss: 3.1716, samp_prob: 0.2315):  80%|████████  | 8/10 [1:41:37<24:44, 742.08s/it]

17

[FINISHED] Epoch 8, (Training Loss (per epoch): 58.1432 samp_prob: 0.2315)


Epoch 8, step 18/96, (Training Loss: 3.0680, samp_prob: 0.2315):  80%|████████  | 8/10 [1:41:44<24:44, 742.08s/it]

18

[FINISHED] Epoch 8, (Training Loss (per epoch): 61.2112 samp_prob: 0.2315)


Epoch 8, step 19/96, (Training Loss: 2.9190, samp_prob: 0.2315):  80%|████████  | 8/10 [1:41:52<24:44, 742.08s/it]

19

[FINISHED] Epoch 8, (Training Loss (per epoch): 64.1302 samp_prob: 0.2315)


Epoch 8, step 20/96, (Training Loss: 3.2629, samp_prob: 0.2315):  80%|████████  | 8/10 [1:41:59<24:44, 742.08s/it]

20

[FINISHED] Epoch 8, (Training Loss (per epoch): 67.3931 samp_prob: 0.2315)


Epoch 8, step 21/96, (Training Loss: 3.1619, samp_prob: 0.2315):  80%|████████  | 8/10 [1:42:07<24:44, 742.08s/it]

21

[FINISHED] Epoch 8, (Training Loss (per epoch): 70.5550 samp_prob: 0.2315)


Epoch 8, step 22/96, (Training Loss: 3.1363, samp_prob: 0.2315):  80%|████████  | 8/10 [1:42:15<24:44, 742.08s/it]

22

[FINISHED] Epoch 8, (Training Loss (per epoch): 73.6913 samp_prob: 0.2315)


Epoch 8, step 23/96, (Training Loss: 3.2478, samp_prob: 0.2315):  80%|████████  | 8/10 [1:42:22<24:44, 742.08s/it]

23

[FINISHED] Epoch 8, (Training Loss (per epoch): 76.9391 samp_prob: 0.2315)


Epoch 8, step 24/96, (Training Loss: 3.4400, samp_prob: 0.2315):  80%|████████  | 8/10 [1:42:30<24:44, 742.08s/it]

24

[FINISHED] Epoch 8, (Training Loss (per epoch): 80.3791 samp_prob: 0.2315)


Epoch 8, step 25/96, (Training Loss: 3.1047, samp_prob: 0.2315):  80%|████████  | 8/10 [1:42:38<24:44, 742.08s/it]

25

[FINISHED] Epoch 8, (Training Loss (per epoch): 83.4838 samp_prob: 0.2315)


Epoch 8, step 26/96, (Training Loss: 3.1995, samp_prob: 0.2315):  80%|████████  | 8/10 [1:42:45<24:44, 742.08s/it]

26

[FINISHED] Epoch 8, (Training Loss (per epoch): 86.6834 samp_prob: 0.2315)


Epoch 8, step 27/96, (Training Loss: 3.0916, samp_prob: 0.2315):  80%|████████  | 8/10 [1:42:53<24:44, 742.08s/it]

27

[FINISHED] Epoch 8, (Training Loss (per epoch): 89.7750 samp_prob: 0.2315)


Epoch 8, step 28/96, (Training Loss: 3.0588, samp_prob: 0.2315):  80%|████████  | 8/10 [1:43:01<24:44, 742.08s/it]

28

[FINISHED] Epoch 8, (Training Loss (per epoch): 92.8338 samp_prob: 0.2315)


Epoch 8, step 29/96, (Training Loss: 3.1683, samp_prob: 0.2315):  80%|████████  | 8/10 [1:43:08<24:44, 742.08s/it]

29

[FINISHED] Epoch 8, (Training Loss (per epoch): 96.0021 samp_prob: 0.2315)


Epoch 8, step 30/96, (Training Loss: 3.3232, samp_prob: 0.2315):  80%|████████  | 8/10 [1:43:16<24:44, 742.08s/it]

30

[FINISHED] Epoch 8, (Training Loss (per epoch): 99.3253 samp_prob: 0.2315)


Epoch 8, step 31/96, (Training Loss: 3.2069, samp_prob: 0.2315):  80%|████████  | 8/10 [1:43:24<24:44, 742.08s/it]

[[ 1 20 12 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  5 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 8, step 31/96......

id: 69
answer: ['<BOS>', 'the', 'tv', 'reporter', 'is', 'reporting', 'the', 'news']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'a']
31

[FINISHED] Epoch 8, (Training Loss (per epoch): 102.5322 samp_prob: 0.2315)


Epoch 8, step 32/96, (Training Loss: 3.1680, samp_prob: 0.2315):  80%|████████  | 8/10 [1:43:32<24:44, 742.08s/it]

32

[FINISHED] Epoch 8, (Training Loss (per epoch): 105.7002 samp_prob: 0.2315)


Epoch 8, step 33/96, (Training Loss: 3.2176, samp_prob: 0.2315):  80%|████████  | 8/10 [1:43:39<24:44, 742.08s/it]

33

[FINISHED] Epoch 8, (Training Loss (per epoch): 108.9178 samp_prob: 0.2315)


Epoch 8, step 34/96, (Training Loss: 3.2398, samp_prob: 0.2315):  80%|████████  | 8/10 [1:43:47<24:44, 742.08s/it]

34

[FINISHED] Epoch 8, (Training Loss (per epoch): 112.1576 samp_prob: 0.2315)


Epoch 8, step 35/96, (Training Loss: 3.2181, samp_prob: 0.2315):  80%|████████  | 8/10 [1:43:54<24:44, 742.08s/it]

35

[FINISHED] Epoch 8, (Training Loss (per epoch): 115.3757 samp_prob: 0.2315)


Epoch 8, step 36/96, (Training Loss: 3.2507, samp_prob: 0.2315):  80%|████████  | 8/10 [1:44:02<24:44, 742.08s/it]

36

[FINISHED] Epoch 8, (Training Loss (per epoch): 118.6264 samp_prob: 0.2315)


Epoch 8, step 37/96, (Training Loss: 3.1847, samp_prob: 0.2315):  80%|████████  | 8/10 [1:44:10<24:44, 742.08s/it]

37

[FINISHED] Epoch 8, (Training Loss (per epoch): 121.8111 samp_prob: 0.2315)


Epoch 8, step 38/96, (Training Loss: 3.1717, samp_prob: 0.2315):  80%|████████  | 8/10 [1:44:17<24:44, 742.08s/it]

38

[FINISHED] Epoch 8, (Training Loss (per epoch): 124.9828 samp_prob: 0.2315)


Epoch 8, step 39/96, (Training Loss: 3.2131, samp_prob: 0.2315):  80%|████████  | 8/10 [1:44:25<24:44, 742.08s/it]

39

[FINISHED] Epoch 8, (Training Loss (per epoch): 128.1959 samp_prob: 0.2315)


Epoch 8, step 40/96, (Training Loss: 3.1041, samp_prob: 0.2315):  80%|████████  | 8/10 [1:44:32<24:44, 742.08s/it]

40

[FINISHED] Epoch 8, (Training Loss (per epoch): 131.3000 samp_prob: 0.2315)


Epoch 8, step 41/96, (Training Loss: 3.1477, samp_prob: 0.2315):  80%|████████  | 8/10 [1:44:40<24:44, 742.08s/it]

41

[FINISHED] Epoch 8, (Training Loss (per epoch): 134.4476 samp_prob: 0.2315)


Epoch 8, step 42/96, (Training Loss: 3.2217, samp_prob: 0.2315):  80%|████████  | 8/10 [1:44:48<24:44, 742.08s/it]

42

[FINISHED] Epoch 8, (Training Loss (per epoch): 137.6693 samp_prob: 0.2315)


Epoch 8, step 43/96, (Training Loss: 3.2195, samp_prob: 0.2315):  80%|████████  | 8/10 [1:44:55<24:44, 742.08s/it]

43

[FINISHED] Epoch 8, (Training Loss (per epoch): 140.8888 samp_prob: 0.2315)


Epoch 8, step 44/96, (Training Loss: 3.1402, samp_prob: 0.2315):  80%|████████  | 8/10 [1:45:03<24:44, 742.08s/it]

44

[FINISHED] Epoch 8, (Training Loss (per epoch): 144.0290 samp_prob: 0.2315)


Epoch 8, step 45/96, (Training Loss: 3.1858, samp_prob: 0.2315):  80%|████████  | 8/10 [1:45:11<24:44, 742.08s/it]

45

[FINISHED] Epoch 8, (Training Loss (per epoch): 147.2148 samp_prob: 0.2315)


Epoch 8, step 46/96, (Training Loss: 3.2598, samp_prob: 0.2315):  80%|████████  | 8/10 [1:45:19<24:44, 742.08s/it]

[[ 1  4 56 ...  2  2  2]
 [ 1  4 23 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1 20 12 ...  2  2  2]
 [ 1  4 56 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 8, step 46/96......

id: 126
answer: ['<BOS>', 'the', 'man', 'is', 'playing', 'the', 'wooden', 'flute']
prediction: ['<BOS>', 'a', 'man', 'is', 'playing', 'a', 'flute']
46

[FINISHED] Epoch 8, (Training Loss (per epoch): 150.4746 samp_prob: 0.2315)


Epoch 8, step 47/96, (Training Loss: 3.1588, samp_prob: 0.2315):  80%|████████  | 8/10 [1:45:26<24:44, 742.08s/it]

47

[FINISHED] Epoch 8, (Training Loss (per epoch): 153.6334 samp_prob: 0.2315)


Epoch 8, step 48/96, (Training Loss: 3.1660, samp_prob: 0.2315):  80%|████████  | 8/10 [1:45:34<24:44, 742.08s/it]

48

[FINISHED] Epoch 8, (Training Loss (per epoch): 156.7994 samp_prob: 0.2315)


Epoch 8, step 49/96, (Training Loss: 3.2391, samp_prob: 0.2315):  80%|████████  | 8/10 [1:45:42<24:44, 742.08s/it]

49

[FINISHED] Epoch 8, (Training Loss (per epoch): 160.0384 samp_prob: 0.2315)


Epoch 8, step 50/96, (Training Loss: 3.3259, samp_prob: 0.2315):  80%|████████  | 8/10 [1:45:49<24:44, 742.08s/it]

50

[FINISHED] Epoch 8, (Training Loss (per epoch): 163.3643 samp_prob: 0.2315)


Epoch 8, step 51/96, (Training Loss: 3.1352, samp_prob: 0.2315):  80%|████████  | 8/10 [1:45:57<24:44, 742.08s/it]

51

[FINISHED] Epoch 8, (Training Loss (per epoch): 166.4995 samp_prob: 0.2315)


Epoch 8, step 52/96, (Training Loss: 3.1337, samp_prob: 0.2315):  80%|████████  | 8/10 [1:46:05<24:44, 742.08s/it]

52

[FINISHED] Epoch 8, (Training Loss (per epoch): 169.6332 samp_prob: 0.2315)


Epoch 8, step 53/96, (Training Loss: 3.2943, samp_prob: 0.2315):  80%|████████  | 8/10 [1:46:12<24:44, 742.08s/it]

53

[FINISHED] Epoch 8, (Training Loss (per epoch): 172.9275 samp_prob: 0.2315)


Epoch 8, step 54/96, (Training Loss: 3.1321, samp_prob: 0.2315):  80%|████████  | 8/10 [1:46:20<24:44, 742.08s/it]

54

[FINISHED] Epoch 8, (Training Loss (per epoch): 176.0596 samp_prob: 0.2315)


Epoch 8, step 55/96, (Training Loss: 3.1460, samp_prob: 0.2315):  80%|████████  | 8/10 [1:46:27<24:44, 742.08s/it]

55

[FINISHED] Epoch 8, (Training Loss (per epoch): 179.2056 samp_prob: 0.2315)


Epoch 8, step 56/96, (Training Loss: 3.0991, samp_prob: 0.2315):  80%|████████  | 8/10 [1:46:35<24:44, 742.08s/it]

56

[FINISHED] Epoch 8, (Training Loss (per epoch): 182.3047 samp_prob: 0.2315)


Epoch 8, step 57/96, (Training Loss: 3.0394, samp_prob: 0.2315):  80%|████████  | 8/10 [1:46:43<24:44, 742.08s/it]

57

[FINISHED] Epoch 8, (Training Loss (per epoch): 185.3440 samp_prob: 0.2315)


Epoch 8, step 58/96, (Training Loss: 3.1070, samp_prob: 0.2315):  80%|████████  | 8/10 [1:46:51<24:44, 742.08s/it]

58

[FINISHED] Epoch 8, (Training Loss (per epoch): 188.4510 samp_prob: 0.2315)


Epoch 8, step 59/96, (Training Loss: 3.1989, samp_prob: 0.2315):  80%|████████  | 8/10 [1:46:58<24:44, 742.08s/it]

59

[FINISHED] Epoch 8, (Training Loss (per epoch): 191.6499 samp_prob: 0.2315)


Epoch 8, step 60/96, (Training Loss: 3.1238, samp_prob: 0.2315):  80%|████████  | 8/10 [1:47:06<24:44, 742.08s/it]

60

[FINISHED] Epoch 8, (Training Loss (per epoch): 194.7737 samp_prob: 0.2315)


Epoch 8, step 61/96, (Training Loss: 3.2556, samp_prob: 0.2315):  80%|████████  | 8/10 [1:47:14<24:44, 742.08s/it]

[[ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 23 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  6 12 ...  2  2  2]]

[Train. Prediction] Epoch 8, step 61/96......

id: 45
answer: ['<BOS>', 'a', 'tiger', 'is', 'wrestling', 'with', 'its', 'trainer']
prediction: ['<BOS>', 'a', 'man', 'is', 'playing', 'a', 'a']
61

[FINISHED] Epoch 8, (Training Loss (per epoch): 198.0293 samp_prob: 0.2315)


Epoch 8, step 62/96, (Training Loss: 3.2184, samp_prob: 0.2315):  80%|████████  | 8/10 [1:47:21<24:44, 742.08s/it]

62

[FINISHED] Epoch 8, (Training Loss (per epoch): 201.2477 samp_prob: 0.2315)


Epoch 8, step 63/96, (Training Loss: 3.2038, samp_prob: 0.2315):  80%|████████  | 8/10 [1:47:29<24:44, 742.08s/it]

63

[FINISHED] Epoch 8, (Training Loss (per epoch): 204.4516 samp_prob: 0.2315)


Epoch 8, step 64/96, (Training Loss: 3.2799, samp_prob: 0.2315):  80%|████████  | 8/10 [1:47:37<24:44, 742.08s/it]

64

[FINISHED] Epoch 8, (Training Loss (per epoch): 207.7314 samp_prob: 0.2315)


Epoch 8, step 65/96, (Training Loss: 3.0225, samp_prob: 0.2315):  80%|████████  | 8/10 [1:47:44<24:44, 742.08s/it]

65

[FINISHED] Epoch 8, (Training Loss (per epoch): 210.7539 samp_prob: 0.2315)


Epoch 8, step 66/96, (Training Loss: 3.3196, samp_prob: 0.2315):  80%|████████  | 8/10 [1:47:52<24:44, 742.08s/it]

66

[FINISHED] Epoch 8, (Training Loss (per epoch): 214.0736 samp_prob: 0.2315)


Epoch 8, step 67/96, (Training Loss: 3.3362, samp_prob: 0.2315):  80%|████████  | 8/10 [1:48:00<24:44, 742.08s/it]

67

[FINISHED] Epoch 8, (Training Loss (per epoch): 217.4098 samp_prob: 0.2315)


Epoch 8, step 68/96, (Training Loss: 3.1655, samp_prob: 0.2315):  80%|████████  | 8/10 [1:48:07<24:44, 742.08s/it]

68

[FINISHED] Epoch 8, (Training Loss (per epoch): 220.5753 samp_prob: 0.2315)


Epoch 8, step 69/96, (Training Loss: 3.0773, samp_prob: 0.2315):  80%|████████  | 8/10 [1:48:15<24:44, 742.08s/it]

69

[FINISHED] Epoch 8, (Training Loss (per epoch): 223.6526 samp_prob: 0.2315)


Epoch 8, step 70/96, (Training Loss: 3.2559, samp_prob: 0.2315):  80%|████████  | 8/10 [1:48:23<24:44, 742.08s/it]

70

[FINISHED] Epoch 8, (Training Loss (per epoch): 226.9085 samp_prob: 0.2315)


Epoch 8, step 71/96, (Training Loss: 3.1226, samp_prob: 0.2315):  80%|████████  | 8/10 [1:48:30<24:44, 742.08s/it]

71

[FINISHED] Epoch 8, (Training Loss (per epoch): 230.0310 samp_prob: 0.2315)


Epoch 8, step 72/96, (Training Loss: 3.2573, samp_prob: 0.2315):  80%|████████  | 8/10 [1:48:38<24:44, 742.08s/it]

72

[FINISHED] Epoch 8, (Training Loss (per epoch): 233.2883 samp_prob: 0.2315)


Epoch 8, step 73/96, (Training Loss: 3.1457, samp_prob: 0.2315):  80%|████████  | 8/10 [1:48:45<24:44, 742.08s/it]

73

[FINISHED] Epoch 8, (Training Loss (per epoch): 236.4340 samp_prob: 0.2315)


Epoch 8, step 74/96, (Training Loss: 3.1803, samp_prob: 0.2315):  80%|████████  | 8/10 [1:48:53<24:44, 742.08s/it]

74

[FINISHED] Epoch 8, (Training Loss (per epoch): 239.6143 samp_prob: 0.2315)


Epoch 8, step 75/96, (Training Loss: 3.2343, samp_prob: 0.2315):  80%|████████  | 8/10 [1:49:01<24:44, 742.08s/it]

75

[FINISHED] Epoch 8, (Training Loss (per epoch): 242.8486 samp_prob: 0.2315)


Epoch 8, step 76/96, (Training Loss: 3.2381, samp_prob: 0.2315):  80%|████████  | 8/10 [1:49:09<24:44, 742.08s/it]

[[ 1  4 17 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 ...
 [ 1  4 17 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 8, step 76/96......

id: 201
answer: ['<BOS>', 'a', 'woman', 'is', 'firing', 'shotgun']
prediction: ['<BOS>', 'a', 'woman', 'is', 'a', 'a']
76

[FINISHED] Epoch 8, (Training Loss (per epoch): 246.0867 samp_prob: 0.2315)


Epoch 8, step 77/96, (Training Loss: 3.1325, samp_prob: 0.2315):  80%|████████  | 8/10 [1:49:16<24:44, 742.08s/it]

77

[FINISHED] Epoch 8, (Training Loss (per epoch): 249.2192 samp_prob: 0.2315)


Epoch 8, step 78/96, (Training Loss: 3.0323, samp_prob: 0.2315):  80%|████████  | 8/10 [1:49:24<24:44, 742.08s/it]

78

[FINISHED] Epoch 8, (Training Loss (per epoch): 252.2515 samp_prob: 0.2315)


Epoch 8, step 79/96, (Training Loss: 3.0714, samp_prob: 0.2315):  80%|████████  | 8/10 [1:49:31<24:44, 742.08s/it]

79

[FINISHED] Epoch 8, (Training Loss (per epoch): 255.3229 samp_prob: 0.2315)


Epoch 8, step 80/96, (Training Loss: 3.1787, samp_prob: 0.2315):  80%|████████  | 8/10 [1:49:39<24:44, 742.08s/it]

80

[FINISHED] Epoch 8, (Training Loss (per epoch): 258.5016 samp_prob: 0.2315)


Epoch 8, step 81/96, (Training Loss: 3.0145, samp_prob: 0.2315):  80%|████████  | 8/10 [1:49:47<24:44, 742.08s/it]

81

[FINISHED] Epoch 8, (Training Loss (per epoch): 261.5161 samp_prob: 0.2315)


Epoch 8, step 82/96, (Training Loss: 3.2891, samp_prob: 0.2315):  80%|████████  | 8/10 [1:49:54<24:44, 742.08s/it]

82

[FINISHED] Epoch 8, (Training Loss (per epoch): 264.8052 samp_prob: 0.2315)


Epoch 8, step 83/96, (Training Loss: 3.1975, samp_prob: 0.2315):  80%|████████  | 8/10 [1:50:02<24:44, 742.08s/it]

83

[FINISHED] Epoch 8, (Training Loss (per epoch): 268.0027 samp_prob: 0.2315)


Epoch 8, step 84/96, (Training Loss: 3.1046, samp_prob: 0.2315):  80%|████████  | 8/10 [1:50:09<24:44, 742.08s/it]

84

[FINISHED] Epoch 8, (Training Loss (per epoch): 271.1073 samp_prob: 0.2315)


Epoch 8, step 85/96, (Training Loss: 3.2081, samp_prob: 0.2315):  80%|████████  | 8/10 [1:50:17<24:44, 742.08s/it]

85

[FINISHED] Epoch 8, (Training Loss (per epoch): 274.3154 samp_prob: 0.2315)


Epoch 8, step 86/96, (Training Loss: 3.0694, samp_prob: 0.2315):  80%|████████  | 8/10 [1:50:25<24:44, 742.08s/it]

86

[FINISHED] Epoch 8, (Training Loss (per epoch): 277.3848 samp_prob: 0.2315)


Epoch 8, step 87/96, (Training Loss: 3.2146, samp_prob: 0.2315):  80%|████████  | 8/10 [1:50:32<24:44, 742.08s/it]

87

[FINISHED] Epoch 8, (Training Loss (per epoch): 280.5994 samp_prob: 0.2315)


Epoch 8, step 88/96, (Training Loss: 3.2155, samp_prob: 0.2315):  80%|████████  | 8/10 [1:50:40<24:44, 742.08s/it]

88

[FINISHED] Epoch 8, (Training Loss (per epoch): 283.8150 samp_prob: 0.2315)


Epoch 8, step 89/96, (Training Loss: 3.1580, samp_prob: 0.2315):  80%|████████  | 8/10 [1:50:47<24:44, 742.08s/it]

89

[FINISHED] Epoch 8, (Training Loss (per epoch): 286.9730 samp_prob: 0.2315)


Epoch 8, step 90/96, (Training Loss: 3.1077, samp_prob: 0.2315):  80%|████████  | 8/10 [1:50:55<24:44, 742.08s/it]

90

[FINISHED] Epoch 8, (Training Loss (per epoch): 290.0808 samp_prob: 0.2315)


Epoch 8, step 91/96, (Training Loss: 3.1627, samp_prob: 0.2315):  80%|████████  | 8/10 [1:51:03<24:44, 742.08s/it]

[[  1   4   7 ...   2   2   2]
 [  1   4 105 ...   2   2   2]
 [  1   4 132 ...   2   2   2]
 ...
 [  1   4   7 ...   2   2   2]
 [  1   4   8 ...   2   2   2]
 [  1   4   7 ...   2   2   2]]

[Train. Prediction] Epoch 8, step 91/96......

id: 155
answer: ['<BOS>', 'a', 'man', 'pours', 'sauce', 'into', 'a', 'plastic', 'bag', 'full', 'of', 'meat']
prediction: ['<BOS>', 'a', 'man', 'is', 'pouring', 'a', 'a']
91

[FINISHED] Epoch 8, (Training Loss (per epoch): 293.2434 samp_prob: 0.2315)


Epoch 8, step 92/96, (Training Loss: 3.0246, samp_prob: 0.2315):  80%|████████  | 8/10 [1:51:11<24:44, 742.08s/it]

92

[FINISHED] Epoch 8, (Training Loss (per epoch): 296.2680 samp_prob: 0.2315)


Epoch 8, step 93/96, (Training Loss: 3.1028, samp_prob: 0.2315):  80%|████████  | 8/10 [1:51:18<24:44, 742.08s/it]

93

[FINISHED] Epoch 8, (Training Loss (per epoch): 299.3709 samp_prob: 0.2315)


Epoch 8, step 94/96, (Training Loss: 3.2352, samp_prob: 0.2315):  80%|████████  | 8/10 [1:51:26<24:44, 742.08s/it]

94

[FINISHED] Epoch 8, (Training Loss (per epoch): 302.6061 samp_prob: 0.2315)


Epoch 8, step 95/96, (Training Loss: 3.1707, samp_prob: 0.2315):  90%|█████████ | 9/10 [1:51:34<12:20, 740.12s/it]

95

[FINISHED] Epoch 8, (Training Loss (per epoch): 305.7768 samp_prob: 0.2315)


Epoch 9, step 0/96, (Training Loss: 3.2114, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:51:41<12:20, 740.12s/it] 

0

[FINISHED] Epoch 9, (Training Loss (per epoch): 3.2114 samp_prob: 0.1680)


Epoch 9, step 1/96, (Training Loss: 3.2086, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:51:49<12:20, 740.12s/it]

[[ 1  4 24 ...  2  2  2]
 [ 1  6  5 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4 12 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 9, step 1/96......

id: 144
answer: ['<BOS>', 'a', 'police', 'officer', 'points', 'the', 'gun', 'at', 'a', 'white', 'car', 'parked', 'on', 'one', 'side', 'of', 'a', 'highway', 'and', 'screams', 'at', 'the', 'person', 'inside', 'it']
prediction: ['<BOS>', 'a', 'man', 'is', 'is', 'car', 'a']
1

[FINISHED] Epoch 9, (Training Loss (per epoch): 6.4200 samp_prob: 0.1680)


Epoch 9, step 2/96, (Training Loss: 3.1296, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:51:57<12:20, 740.12s/it]

2

[FINISHED] Epoch 9, (Training Loss (per epoch): 9.5496 samp_prob: 0.1680)


Epoch 9, step 3/96, (Training Loss: 3.2269, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:52:04<12:20, 740.12s/it]

3

[FINISHED] Epoch 9, (Training Loss (per epoch): 12.7765 samp_prob: 0.1680)


Epoch 9, step 4/96, (Training Loss: 3.0600, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:52:12<12:20, 740.12s/it]

4

[FINISHED] Epoch 9, (Training Loss (per epoch): 15.8365 samp_prob: 0.1680)


Epoch 9, step 5/96, (Training Loss: 3.0883, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:52:20<12:20, 740.12s/it]

5

[FINISHED] Epoch 9, (Training Loss (per epoch): 18.9248 samp_prob: 0.1680)


Epoch 9, step 6/96, (Training Loss: 3.0962, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:52:27<12:20, 740.12s/it]

6

[FINISHED] Epoch 9, (Training Loss (per epoch): 22.0211 samp_prob: 0.1680)


Epoch 9, step 7/96, (Training Loss: 3.1639, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:52:35<12:20, 740.12s/it]

7

[FINISHED] Epoch 9, (Training Loss (per epoch): 25.1850 samp_prob: 0.1680)


Epoch 9, step 8/96, (Training Loss: 3.1364, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:52:42<12:20, 740.12s/it]

8

[FINISHED] Epoch 9, (Training Loss (per epoch): 28.3214 samp_prob: 0.1680)


Epoch 9, step 9/96, (Training Loss: 3.1233, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:52:50<12:20, 740.12s/it]

9

[FINISHED] Epoch 9, (Training Loss (per epoch): 31.4447 samp_prob: 0.1680)


Epoch 9, step 10/96, (Training Loss: 3.0589, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:52:58<12:20, 740.12s/it]

10

[FINISHED] Epoch 9, (Training Loss (per epoch): 34.5036 samp_prob: 0.1680)


Epoch 9, step 11/96, (Training Loss: 3.1493, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:53:05<12:20, 740.12s/it]

11

[FINISHED] Epoch 9, (Training Loss (per epoch): 37.6529 samp_prob: 0.1680)


Epoch 9, step 12/96, (Training Loss: 3.1882, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:53:13<12:20, 740.12s/it]

12

[FINISHED] Epoch 9, (Training Loss (per epoch): 40.8411 samp_prob: 0.1680)


Epoch 9, step 13/96, (Training Loss: 3.2418, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:53:21<12:20, 740.12s/it]

13

[FINISHED] Epoch 9, (Training Loss (per epoch): 44.0828 samp_prob: 0.1680)


Epoch 9, step 14/96, (Training Loss: 3.2156, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:53:28<12:20, 740.12s/it]

14

[FINISHED] Epoch 9, (Training Loss (per epoch): 47.2985 samp_prob: 0.1680)


Epoch 9, step 15/96, (Training Loss: 3.1675, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:53:36<12:20, 740.12s/it]

15

[FINISHED] Epoch 9, (Training Loss (per epoch): 50.4660 samp_prob: 0.1680)


Epoch 9, step 16/96, (Training Loss: 3.1043, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:53:44<12:20, 740.12s/it]

[[ 1  4 23 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4 56 ...  2  2  2]
 ...
 [ 1  4 24 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 9, step 16/96......

id: 232
answer: ['<BOS>', 'several', 'teams', 'are', 'playing', 'soccer']
prediction: ['<BOS>', 'men', 'men', 'soccer', 'are', 'the', 'soccer']
16

[FINISHED] Epoch 9, (Training Loss (per epoch): 53.5703 samp_prob: 0.1680)


Epoch 9, step 17/96, (Training Loss: 3.0537, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:53:51<12:20, 740.12s/it]

17

[FINISHED] Epoch 9, (Training Loss (per epoch): 56.6240 samp_prob: 0.1680)


Epoch 9, step 18/96, (Training Loss: 2.9847, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:53:59<12:20, 740.12s/it]

18

[FINISHED] Epoch 9, (Training Loss (per epoch): 59.6087 samp_prob: 0.1680)


Epoch 9, step 19/96, (Training Loss: 2.8623, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:54:07<12:20, 740.12s/it]

19

[FINISHED] Epoch 9, (Training Loss (per epoch): 62.4710 samp_prob: 0.1680)


Epoch 9, step 20/96, (Training Loss: 3.1806, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:54:14<12:20, 740.12s/it]

20

[FINISHED] Epoch 9, (Training Loss (per epoch): 65.6516 samp_prob: 0.1680)


Epoch 9, step 21/96, (Training Loss: 3.0802, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:54:22<12:20, 740.12s/it]

21

[FINISHED] Epoch 9, (Training Loss (per epoch): 68.7318 samp_prob: 0.1680)


Epoch 9, step 22/96, (Training Loss: 3.0543, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:54:30<12:20, 740.12s/it]

22

[FINISHED] Epoch 9, (Training Loss (per epoch): 71.7861 samp_prob: 0.1680)


Epoch 9, step 23/96, (Training Loss: 3.1677, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:54:37<12:20, 740.12s/it]

23

[FINISHED] Epoch 9, (Training Loss (per epoch): 74.9538 samp_prob: 0.1680)


Epoch 9, step 24/96, (Training Loss: 3.3242, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:54:45<12:20, 740.12s/it]

24

[FINISHED] Epoch 9, (Training Loss (per epoch): 78.2780 samp_prob: 0.1680)


Epoch 9, step 25/96, (Training Loss: 3.0523, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:54:52<12:20, 740.12s/it]

25

[FINISHED] Epoch 9, (Training Loss (per epoch): 81.3303 samp_prob: 0.1680)


Epoch 9, step 26/96, (Training Loss: 3.1015, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:55:00<12:20, 740.12s/it]

26

[FINISHED] Epoch 9, (Training Loss (per epoch): 84.4318 samp_prob: 0.1680)


Epoch 9, step 27/96, (Training Loss: 2.9918, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:55:08<12:20, 740.12s/it]

27

[FINISHED] Epoch 9, (Training Loss (per epoch): 87.4236 samp_prob: 0.1680)


Epoch 9, step 28/96, (Training Loss: 2.9659, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:55:15<12:20, 740.12s/it]

28

[FINISHED] Epoch 9, (Training Loss (per epoch): 90.3894 samp_prob: 0.1680)


Epoch 9, step 29/96, (Training Loss: 3.0595, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:55:23<12:20, 740.12s/it]

29

[FINISHED] Epoch 9, (Training Loss (per epoch): 93.4490 samp_prob: 0.1680)


Epoch 9, step 30/96, (Training Loss: 3.2254, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:55:30<12:20, 740.12s/it]

30

[FINISHED] Epoch 9, (Training Loss (per epoch): 96.6744 samp_prob: 0.1680)


Epoch 9, step 31/96, (Training Loss: 3.1123, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:55:38<12:20, 740.12s/it]

[[ 1  4 39 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  6 17 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]]

[Train. Prediction] Epoch 9, step 31/96......

id: 113
answer: ['<BOS>', 'a', 'small', 'panda', 'is', 'laying', 'close', 'to', 'a', 'larger', 'panda']
prediction: ['<BOS>', 'two', 'panda', 'panda', 'are', 'are']
31

[FINISHED] Epoch 9, (Training Loss (per epoch): 99.7867 samp_prob: 0.1680)


Epoch 9, step 32/96, (Training Loss: 3.0800, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:55:46<12:20, 740.12s/it]

32

[FINISHED] Epoch 9, (Training Loss (per epoch): 102.8668 samp_prob: 0.1680)


Epoch 9, step 33/96, (Training Loss: 3.1210, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:55:54<12:20, 740.12s/it]

33

[FINISHED] Epoch 9, (Training Loss (per epoch): 105.9877 samp_prob: 0.1680)


Epoch 9, step 34/96, (Training Loss: 3.1395, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:56:01<12:20, 740.12s/it]

34

[FINISHED] Epoch 9, (Training Loss (per epoch): 109.1272 samp_prob: 0.1680)


Epoch 9, step 35/96, (Training Loss: 3.1520, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:56:09<12:20, 740.12s/it]

35

[FINISHED] Epoch 9, (Training Loss (per epoch): 112.2792 samp_prob: 0.1680)


Epoch 9, step 36/96, (Training Loss: 3.1813, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:56:16<12:20, 740.12s/it]

36

[FINISHED] Epoch 9, (Training Loss (per epoch): 115.4605 samp_prob: 0.1680)


Epoch 9, step 37/96, (Training Loss: 3.1037, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:56:24<12:20, 740.12s/it]

37

[FINISHED] Epoch 9, (Training Loss (per epoch): 118.5642 samp_prob: 0.1680)


Epoch 9, step 38/96, (Training Loss: 3.0897, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:56:32<12:20, 740.12s/it]

38

[FINISHED] Epoch 9, (Training Loss (per epoch): 121.6539 samp_prob: 0.1680)


Epoch 9, step 39/96, (Training Loss: 3.1246, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:56:39<12:20, 740.12s/it]

39

[FINISHED] Epoch 9, (Training Loss (per epoch): 124.7785 samp_prob: 0.1680)


Epoch 9, step 40/96, (Training Loss: 3.0296, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:56:47<12:20, 740.12s/it]

40

[FINISHED] Epoch 9, (Training Loss (per epoch): 127.8081 samp_prob: 0.1680)


Epoch 9, step 41/96, (Training Loss: 3.0451, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:56:54<12:20, 740.12s/it]

41

[FINISHED] Epoch 9, (Training Loss (per epoch): 130.8532 samp_prob: 0.1680)


Epoch 9, step 42/96, (Training Loss: 3.1493, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:57:02<12:20, 740.12s/it]

42

[FINISHED] Epoch 9, (Training Loss (per epoch): 134.0025 samp_prob: 0.1680)


Epoch 9, step 43/96, (Training Loss: 3.1527, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:57:10<12:20, 740.12s/it]

43

[FINISHED] Epoch 9, (Training Loss (per epoch): 137.1552 samp_prob: 0.1680)


Epoch 9, step 44/96, (Training Loss: 3.0902, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:57:17<12:20, 740.12s/it]

44

[FINISHED] Epoch 9, (Training Loss (per epoch): 140.2454 samp_prob: 0.1680)


Epoch 9, step 45/96, (Training Loss: 3.0972, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:57:25<12:20, 740.12s/it]

45

[FINISHED] Epoch 9, (Training Loss (per epoch): 143.3426 samp_prob: 0.1680)


Epoch 9, step 46/96, (Training Loss: 3.1707, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:57:33<12:20, 740.12s/it]

[[  1   4   7 ...   2   2   2]
 [  1   4  90 ...   2   2   2]
 [  1   4   7 ...   2   2   2]
 ...
 [  1  20  51 ...   2   2   2]
 [  1 103  56 ...   2   2   2]
 [  1   4   8 ...   2   2   2]]

[Train. Prediction] Epoch 9, step 46/96......

id: 108
answer: ['<BOS>', 'a', 'dog', 'is', 'jumping', 'in', 'front', 'of', 'a', 'walking', 'baby']
prediction: ['<BOS>', 'a', 'cat', 'is', 'a', 'a', 'a']
46

[FINISHED] Epoch 9, (Training Loss (per epoch): 146.5133 samp_prob: 0.1680)


Epoch 9, step 47/96, (Training Loss: 3.0847, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:57:41<12:20, 740.12s/it]

47

[FINISHED] Epoch 9, (Training Loss (per epoch): 149.5979 samp_prob: 0.1680)


Epoch 9, step 48/96, (Training Loss: 3.0969, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:57:48<12:20, 740.12s/it]

48

[FINISHED] Epoch 9, (Training Loss (per epoch): 152.6948 samp_prob: 0.1680)


Epoch 9, step 49/96, (Training Loss: 3.0867, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:57:56<12:20, 740.12s/it]

49

[FINISHED] Epoch 9, (Training Loss (per epoch): 155.7814 samp_prob: 0.1680)


Epoch 9, step 50/96, (Training Loss: 3.1990, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:58:04<12:20, 740.12s/it]

50

[FINISHED] Epoch 9, (Training Loss (per epoch): 158.9804 samp_prob: 0.1680)


Epoch 9, step 51/96, (Training Loss: 3.0236, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:58:11<12:20, 740.12s/it]

51

[FINISHED] Epoch 9, (Training Loss (per epoch): 162.0041 samp_prob: 0.1680)


Epoch 9, step 52/96, (Training Loss: 3.0278, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:58:19<12:20, 740.12s/it]

52

[FINISHED] Epoch 9, (Training Loss (per epoch): 165.0319 samp_prob: 0.1680)


Epoch 9, step 53/96, (Training Loss: 3.1681, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:58:26<12:20, 740.12s/it]

53

[FINISHED] Epoch 9, (Training Loss (per epoch): 168.2000 samp_prob: 0.1680)


Epoch 9, step 54/96, (Training Loss: 3.0438, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:58:34<12:20, 740.12s/it]

54

[FINISHED] Epoch 9, (Training Loss (per epoch): 171.2438 samp_prob: 0.1680)


Epoch 9, step 55/96, (Training Loss: 3.0769, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:58:42<12:20, 740.12s/it]

55

[FINISHED] Epoch 9, (Training Loss (per epoch): 174.3207 samp_prob: 0.1680)


Epoch 9, step 56/96, (Training Loss: 3.0275, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:58:49<12:20, 740.12s/it]

56

[FINISHED] Epoch 9, (Training Loss (per epoch): 177.3482 samp_prob: 0.1680)


Epoch 9, step 57/96, (Training Loss: 2.9481, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:58:57<12:20, 740.12s/it]

57

[FINISHED] Epoch 9, (Training Loss (per epoch): 180.2963 samp_prob: 0.1680)


Epoch 9, step 58/96, (Training Loss: 3.0056, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:59:04<12:20, 740.12s/it]

58

[FINISHED] Epoch 9, (Training Loss (per epoch): 183.3020 samp_prob: 0.1680)


Epoch 9, step 59/96, (Training Loss: 3.0919, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:59:12<12:20, 740.12s/it]

59

[FINISHED] Epoch 9, (Training Loss (per epoch): 186.3938 samp_prob: 0.1680)


Epoch 9, step 60/96, (Training Loss: 3.0355, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:59:20<12:20, 740.12s/it]

60

[FINISHED] Epoch 9, (Training Loss (per epoch): 189.4294 samp_prob: 0.1680)


Epoch 9, step 61/96, (Training Loss: 3.1627, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:59:28<12:20, 740.12s/it]

[[  1   4   7 ...   2   2   2]
 [  1   4   7 ...   2   2   2]
 [  1   4   7 ...   2   2   2]
 ...
 [  1   4  23 ...   2   2   2]
 [  1   4  17 ...   2   2   2]
 [  1  20 105 ...   2   6   2]]

[Train. Prediction] Epoch 9, step 61/96......

id: 39
answer: ['<BOS>', 'a', 'man', 'tight', 'roping', 'on', 'the', 'beach']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'on', 'on', 'on']
61

[FINISHED] Epoch 9, (Training Loss (per epoch): 192.5921 samp_prob: 0.1680)


Epoch 9, step 62/96, (Training Loss: 3.1539, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:59:35<12:20, 740.12s/it]

62

[FINISHED] Epoch 9, (Training Loss (per epoch): 195.7459 samp_prob: 0.1680)


Epoch 9, step 63/96, (Training Loss: 3.1243, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:59:43<12:20, 740.12s/it]

63

[FINISHED] Epoch 9, (Training Loss (per epoch): 198.8703 samp_prob: 0.1680)


Epoch 9, step 64/96, (Training Loss: 3.1708, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:59:51<12:20, 740.12s/it]

64

[FINISHED] Epoch 9, (Training Loss (per epoch): 202.0410 samp_prob: 0.1680)


Epoch 9, step 65/96, (Training Loss: 2.8905, samp_prob: 0.1680):  90%|█████████ | 9/10 [1:59:58<12:20, 740.12s/it]

65

[FINISHED] Epoch 9, (Training Loss (per epoch): 204.9315 samp_prob: 0.1680)


Epoch 9, step 66/96, (Training Loss: 3.2088, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:00:06<12:20, 740.12s/it]

66

[FINISHED] Epoch 9, (Training Loss (per epoch): 208.1403 samp_prob: 0.1680)


Epoch 9, step 67/96, (Training Loss: 3.2239, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:00:13<12:20, 740.12s/it]

67

[FINISHED] Epoch 9, (Training Loss (per epoch): 211.3642 samp_prob: 0.1680)


Epoch 9, step 68/96, (Training Loss: 3.0698, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:00:21<12:20, 740.12s/it]

68

[FINISHED] Epoch 9, (Training Loss (per epoch): 214.4340 samp_prob: 0.1680)


Epoch 9, step 69/96, (Training Loss: 2.9337, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:00:29<12:20, 740.12s/it]

69

[FINISHED] Epoch 9, (Training Loss (per epoch): 217.3677 samp_prob: 0.1680)


Epoch 9, step 70/96, (Training Loss: 3.1841, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:00:37<12:20, 740.12s/it]

70

[FINISHED] Epoch 9, (Training Loss (per epoch): 220.5517 samp_prob: 0.1680)


Epoch 9, step 71/96, (Training Loss: 3.0077, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:00:44<12:20, 740.12s/it]

71

[FINISHED] Epoch 9, (Training Loss (per epoch): 223.5594 samp_prob: 0.1680)


Epoch 9, step 72/96, (Training Loss: 3.1475, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:00:52<12:20, 740.12s/it]

72

[FINISHED] Epoch 9, (Training Loss (per epoch): 226.7069 samp_prob: 0.1680)


Epoch 9, step 73/96, (Training Loss: 3.0322, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:00:59<12:20, 740.12s/it]

73

[FINISHED] Epoch 9, (Training Loss (per epoch): 229.7391 samp_prob: 0.1680)


Epoch 9, step 74/96, (Training Loss: 3.1045, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:01:07<12:20, 740.12s/it]

74

[FINISHED] Epoch 9, (Training Loss (per epoch): 232.8436 samp_prob: 0.1680)


Epoch 9, step 75/96, (Training Loss: 3.1369, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:01:15<12:20, 740.12s/it]

75

[FINISHED] Epoch 9, (Training Loss (per epoch): 235.9805 samp_prob: 0.1680)


Epoch 9, step 76/96, (Training Loss: 3.1672, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:01:23<12:20, 740.12s/it]

[[ 1  4 17 ...  2  2  2]
 [ 1  6 39 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 ...
 [ 1  4  7 ...  2  2  2]
 [ 1  4  8 ...  2  2  2]
 [ 1  4  7 ...  2  2  2]]

[Train. Prediction] Epoch 9, step 76/96......

id: 186
answer: ['<BOS>', 'a', 'person', 'is', 'playing', 'a', 'football']
prediction: ['<BOS>', 'a', 'man', 'is', 'kicking', 'a', 'a', 'ball']
76

[FINISHED] Epoch 9, (Training Loss (per epoch): 239.1478 samp_prob: 0.1680)


Epoch 9, step 77/96, (Training Loss: 3.0419, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:01:30<12:20, 740.12s/it]

77

[FINISHED] Epoch 9, (Training Loss (per epoch): 242.1897 samp_prob: 0.1680)


Epoch 9, step 78/96, (Training Loss: 2.9515, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:01:38<12:20, 740.12s/it]

78

[FINISHED] Epoch 9, (Training Loss (per epoch): 245.1412 samp_prob: 0.1680)


Epoch 9, step 79/96, (Training Loss: 3.0107, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:01:45<12:20, 740.12s/it]

79

[FINISHED] Epoch 9, (Training Loss (per epoch): 248.1519 samp_prob: 0.1680)


Epoch 9, step 80/96, (Training Loss: 3.0932, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:01:53<12:20, 740.12s/it]

80

[FINISHED] Epoch 9, (Training Loss (per epoch): 251.2451 samp_prob: 0.1680)


Epoch 9, step 81/96, (Training Loss: 2.9371, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:02:01<12:20, 740.12s/it]

81

[FINISHED] Epoch 9, (Training Loss (per epoch): 254.1821 samp_prob: 0.1680)


Epoch 9, step 82/96, (Training Loss: 3.1828, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:02:08<12:20, 740.12s/it]

82

[FINISHED] Epoch 9, (Training Loss (per epoch): 257.3650 samp_prob: 0.1680)


Epoch 9, step 83/96, (Training Loss: 3.0983, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:02:16<12:20, 740.12s/it]

83

[FINISHED] Epoch 9, (Training Loss (per epoch): 260.4632 samp_prob: 0.1680)


Epoch 9, step 84/96, (Training Loss: 3.0149, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:02:23<12:20, 740.12s/it]

84

[FINISHED] Epoch 9, (Training Loss (per epoch): 263.4781 samp_prob: 0.1680)


Epoch 9, step 85/96, (Training Loss: 3.1547, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:02:31<12:20, 740.12s/it]

85

[FINISHED] Epoch 9, (Training Loss (per epoch): 266.6328 samp_prob: 0.1680)


Epoch 9, step 86/96, (Training Loss: 2.9704, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:02:39<12:20, 740.12s/it]

86

[FINISHED] Epoch 9, (Training Loss (per epoch): 269.6032 samp_prob: 0.1680)


Epoch 9, step 87/96, (Training Loss: 3.1633, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:02:46<12:20, 740.12s/it]

87

[FINISHED] Epoch 9, (Training Loss (per epoch): 272.7665 samp_prob: 0.1680)


Epoch 9, step 88/96, (Training Loss: 3.1403, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:02:54<12:20, 740.12s/it]

88

[FINISHED] Epoch 9, (Training Loss (per epoch): 275.9067 samp_prob: 0.1680)


Epoch 9, step 89/96, (Training Loss: 3.0831, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:03:02<12:20, 740.12s/it]

89

[FINISHED] Epoch 9, (Training Loss (per epoch): 278.9898 samp_prob: 0.1680)


Epoch 9, step 90/96, (Training Loss: 3.0252, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:03:09<12:20, 740.12s/it]

90

[FINISHED] Epoch 9, (Training Loss (per epoch): 282.0150 samp_prob: 0.1680)


Epoch 9, step 91/96, (Training Loss: 3.0971, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:03:17<12:20, 740.12s/it]

[[  1   4   7 ...   2   2   2]
 [  1  20  12 ...   2   2   2]
 [  1   4 132 ...   2   2   2]
 ...
 [  1   4   7 ...   2   2   2]
 [  1   4   8 ...   2   2   2]
 [  1   4   7 ...   2   2   2]]

[Train. Prediction] Epoch 9, step 91/96......

id: 16
answer: ['<BOS>', 'someone', 'stirred', 'the', 'ot', 'of', 'chili']
prediction: ['<BOS>', 'a', 'man', 'is', 'a', 'into', 'a']
91

[FINISHED] Epoch 9, (Training Loss (per epoch): 285.1122 samp_prob: 0.1680)


Epoch 9, step 92/96, (Training Loss: 2.9416, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:03:25<12:20, 740.12s/it]

92

[FINISHED] Epoch 9, (Training Loss (per epoch): 288.0537 samp_prob: 0.1680)


Epoch 9, step 93/96, (Training Loss: 3.0037, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:03:33<12:20, 740.12s/it]

93

[FINISHED] Epoch 9, (Training Loss (per epoch): 291.0574 samp_prob: 0.1680)


Epoch 9, step 94/96, (Training Loss: 3.1286, samp_prob: 0.1680):  90%|█████████ | 9/10 [2:03:40<12:20, 740.12s/it]

94

[FINISHED] Epoch 9, (Training Loss (per epoch): 294.1860 samp_prob: 0.1680)


Epoch 9, step 95/96, (Training Loss: 3.0916, samp_prob: 0.1680): 100%|██████████| 10/10 [2:03:48<00:00, 742.82s/it]

95

[FINISHED] Epoch 9, (Training Loss (per epoch): 297.2776 samp_prob: 0.1680)





In [21]:
# model = Video_Caption_Generator(dim_image=n_features, 
#                                 n_words = n_words, 
#                                 dim_hidden = n_hidden, 
#                                 batch_size=batch_size, 
#                                 n_lstm_steps=80,
#                                 n_video_lstm_step=80,
#                                 n_caption_lstm_step=80,
#                                 bias_init_vector=bias_init_vector)

In [22]:
# tf_loss, tf_video, tf_video_mask, tf_caption, tf_caption_mask, tf_probs = model.build_model()


In [23]:
# n_words = n_words

# with tf.Graph().as_default() as graph:
    

#     weights_enc = tf.Variable(tf.random_uniform([n_features, n_hidden],-0.1,0.1),name="weights_enc")
#     bias_enc = tf.Variable(tf.zeros([n_hidden]),name="bias_enc")

#     weights_dec = tf.Variable(tf.random_uniform([n_hidden, n_words],-0.1,0.1),name="weights_dec")
#     bias_dec = tf.Variable(tf.zeros([n_words]),name="bias_dec")


#     x_video = tf.placeholder(tf.float32, (None, no_of_frames, n_features),'video_features') #inputs

#     batch_size = tf.shape(x_video)[0]
    
#     x_video_drop = tf.nn.dropout(x_video, 0.5)
    
#     x_video_flat = tf.reshape(x_video_drop,[-1,n_features])

#     y_label = tf.placeholder(tf.int32,(None, sizeof_sentence),'captions') #outputs


#     #sampling = tf.placeholder(tf.bool, [sizeof_sentence], name='sampling')
#     padding = tf.zeros([batch_size, n_hidden])

#     loss = 0.0

#     ########## DATA ###########
#     # Example: For i = 0
#     #batch_x = np.array(vid_batch[0])
#     #batch_y = np.array(intencode_batch[0])
#     ###########################

#     input_embedding = tf.matmul(x_video_flat,weights_enc) + bias_enc
#     input_embedding = tf.reshape(input_embedding,[-1, no_of_frames,n_hidden])
#     input_embed = tf.transpose(input_embedding, perm=[1, 0, 2])

#     with tf.device("/cpu:0"):
#         output_embedding = tf.Variable(tf.random_uniform((n_words, n_hidden),-0.1,0.1), name='dec_embedding')
#     # output_embed = tf.nn.embedding_lookup(output_embedding,y_label)
    
#     ## ENCODING #################################
    
#     with tf.variable_scope("LSTM1"):
#         lstm1 = tf.nn.rnn_cell.BasicLSTMCell(n_hidden,state_is_tuple=True)
#         lstm1 = tf.contrib.rnn.DropoutWrapper(lstm1, output_keep_prob=0.5)    

#     with tf.variable_scope("LSTM2"):
#         lstm2 = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, state_is_tuple=True)
#         lstm2 = tf.contrib.rnn.DropoutWrapper(lstm2, output_keep_prob=0.5)    


#     state1 = lstm1.zero_state(batch_size, dtype=tf.float32)
#     state2 = lstm2.zero_state(batch_size, dtype=tf.float32)
    
#     for i in range(0, no_of_frames):
        
#         if i > 0:
#                 tf.get_variable_scope().reuse_variables()
                
#         with tf.variable_scope("LSTM1"):
#             output1, state1 = lstm1(input_embed[i,:,:], state1)

#         with tf.variable_scope("LSTM2"):
#             output2, state2 = lstm2(tf.concat([padding, output1], axis=1), state2)
    
#     ## DECODING ##################################
    
#     bos = tf.ones([batch_size, n_hidden])
#     padding_in = tf.zeros([batch_size, n_hidden])

#     logits = []
#     cross_ent_list=[]
#     max_prob_index = None


#     for i in range(0, MAX_WORDS):
        
#         tf.get_variable_scope().reuse_variables()

        
#         with tf.variable_scope("LSTM1"):
#             output1, state1 = lstm1(padding_in, state1)
            
#         if i == 0:
            
#             with tf.variable_scope("LSTM2"):
#                 con = tf.concat([bos, output1], axis=1)
#                 output2, state2 = lstm2(con, state2)
                
#         else:
            
#             with tf.device("/cpu:0"):
            
#                 feed_in = y_label[:,i]
#                 #feed_in = tf.argmax()
#                 output_embed = tf.nn.embedding_lookup(output_embedding,feed_in)
                
#             with tf.variable_scope("LSTM2"):
#                 con = tf.concat([output_embed, output1], axis=1)
#                 output2, state2 = lstm2(con, state2)

#         logit_words = tf.matmul(output2, weights_dec) + bias_dec
#         logits.append(logit_words)

#         word_i = y_label[:,i]

#         one_hot_labels = tf.one_hot(word_i, n_words, on_value = 1, off_value = None, axis = 1) 
#         cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit_words, labels=one_hot_labels)
#         cross_ent_list.append(cross_entropy)
        
        
#         #current_loss = tf.reduce_sum(cross_entropy)/batch_size
#         #loss = loss + current_loss

#     cross_entropy_tensor = tf.stack(cross_ent_list, 1)
#     loss = tf.reduce_sum(cross_entropy_tensor, axis=1)
#     loss = tf.divide(loss, tf.cast(tf.Variable(sizeof_sentence), tf.float32))

#     loss = tf.reduce_mean(loss, axis=0)
    
#     summary = tf.summary.scalar('training_loss', loss)

#     params = tf.trainable_variables()
#     #optimizer = tf.train.AdamOptimizer(learning_rate)#.minimize(loss_op)
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
#     train_op = optimizer.minimize(loss)

#     #train_step = optimizer.minimize(loss)
    
# #     gradients, variables = zip(*optimizer.compute_gradients(loss))
# #     gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
# #     train_op = optimizer.apply_gradients(zip(gradients, params))
    
# #     logits = tf.stack(logits, axis = 0)
# #     logits = tf.reshape(logits, (sizeof_sentence, batch_size, n_words))
# #     logits = tf.transpose(logits, [1, 0, 2])
# #     preds = tf.argmax(logits,2)
# #     correct_pred = tf.equal(tf.argmax(preds,1), tf.argmax(y_label,1))
# #     accuracy = tf.reduce_mean(correct_pred)

#     logits = tf.stack(logits,axis=0)
#     logits = tf.transpose(logits, [1, 0, 2])
#     output_preds = tf.argmax(logits,2)
    
#     #correct_pred = tf.equal(tf.argmax(output_preds, 1), tf.argmax(y_label, 1))
#     #accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
#     saver = tf.train.Saver(max_to_keep=3)


In [24]:
# loss

In [25]:
# run_opts = tf.RunOptions(report_tensor_allocations_upon_oom = True)

# gpu_config = tf.ConfigProto()

# with tf.Session(graph=graph,config=gpu_config) as sess:

#     loss_list_train = []
#     loss_list_test = []
#     preds_dict = {}

#     sess.run(tf.global_variables_initializer())
#     epochs = 10

#     #training
#     n=0

#     for epoch in range(epochs):

#         for i in range(n_batches):

#             batch_x = np.array(vid_batch[i])
#             #batch_x = np.reshape(batch_x,[-1,n_features])
#             batch_y = np.array(intencode_batch[i])

#             _, batch_loss, preds = sess.run([train_op, loss, logits], feed_dict = {x_video: batch_x, y_label: batch_y})        

#             loss_list_train.append(batch_loss)
#             print("train: %f " % (batch_loss))

            
#             n = n+1

       
#     #testing
    
#         saver.save(sess,ckpt_path, global_step=n)
#         print('Model saved at ' + ckpt_path)
        
    
#     for i in range(n_batches_test):

#         batch_x_test = np.array(vid_batch_test[i])
#         #batch_x = np.reshape(batch_x,[-1,n_features])
#         batch_y_test = np.array(intencode_batch_test[i])

#         acc = sess.run(accuracy, feed_dict = {x_video: batch_x_test, y_label: batch_y_test})        
#         print("accuracy %f" % acc)

# #         loss_list_test.append(batch_loss)
# #         print("test:", batch_loss)
    
# #         preds_dict[i] = batch_preds 

In [26]:
# with tf.variable_scope("encoding") as encoding_scope:
#     lstm_enc = tf.contrib.rnn.BasicLSTMCell(n_hidden)
#     _, last_state = tf.nn.dynamic_rnn(lstm_enc, inputs=input_embed, dtype=tf.float32)

In [27]:
# with tf.variable_scope("decoding") as decoding_scope:
#     # TODO: create the decoder LSTMs, this is very similar to the above
#     # you will need to set initial_state=last_state from the encoder
#     lstm_dec = tf.contrib.rnn.BasicLSTMCell(n_hidden)
#     dec_outputs, _ = tf.nn.dynamic_rnn(lstm_dec,inputs=output_embed, dtype=tf.float32)

In [28]:
# #connect outputs to 
# logits = tf.contrib.layers.fully_connected(dec_outputs, num_outputs=len(index2token), activation_fn=None) 

# with tf.name_scope("optimization"):
#     # Loss function
#     loss = tf.contrib.seq2seq.sequence_loss(logits, targets, tf.ones([batch_size, sizeof_sentence]))
#     # Optimizer
#     optimizer = tf.train.RMSPropOptimizer(1e-3).minimize(loss)

In [29]:
# output_dec.get_shape().as_list()

In [30]:
# state_dec[0].get_shape().as_list()

In [31]:
# x_video.get_shape().as_list()

In [32]:
# from utilities import show_graph
# show_graph(tf.get_default_graph().as_graph_def())

In [33]:
# def RNN(x, weights1, biases1):
    
#     x = tf.unstack(x,no_of_frames,1)
    
#     lstm_encoder = tf.keras.layers.LSTM(n_hidden, return_state=True) #reuse=tf.AUTO_REUSE)
#     output_encoder,state_h,state_c = lstm_encoder(x) #,dtype=tf.float32)
#     encoder_states = [state_h,state_c]
    
#     decoder
    
#     return tf.matmul(output1[-1],weights1) + bias1

In [34]:
# np.shape(vid_batch[1])

In [35]:
# logits = RNN(x_video,weights1,bias1)
# prediction = tf.nn.softmax(logits)


# loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_label))


# optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
# train_op = optimizer.minimize(loss_op)

# # Evaluate model (with test logits, for dropout to be disabled)
# correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_label, 1))
# accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [36]:
# batch_y = np.array(intencode_batch[1])
# np.shape(batch_y)

In [37]:
# batch_x = np.array(vid_batch[0])
# print(np.shape(batch_x))
# batch_x = np.reshape(batch_x,[-1,n_features])
# np.shape(batch_x)

In [38]:
# preds_dict[0]
# def predicted_sentence(preds_dict):
    

In [39]:
# batch_x = np.array(vid_batch[0])
# batch_y = np.array(intencode_batch[0])

# with tf.Session() as sess:
#     sess.run(init)

#     sess.run(train_op, feed_dict={x_video: batch_x, y_label: batch_y})

In [40]:


# image_emb = tf.nn.xw_plus_b(x_video, weights1, bias1) 
# #image_emb = tf.reshape(image_emb, [batch_size, no_of_frames, n_hidden])

# #lstm2 = tf.keras.layers.LSTMCell(n_hidden)

# padding = tf.zeros([batch_size, n_hidden])


# #Only read the frames


        

            
                
# logit_words = tf.nn.xw_plus_b(output2, weights2, bias2)
# cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words,onehot_encoded)

# loss = tf.reduce_sum(cross_entropy)

In [41]:
# with tf.Session() as sess:
#     with sess.as_default():
#         print(tf.nn.embedding_lookup(onehot_encoded,[1]).eval())

In [42]:
# inputs