In [None]:
# Reference Github
# https://github.com/ChanCheeKean/DataScience/blob/main/13%20-%20NLP/C04%20-%20BERT%20(Pytorch%20Scratch).ipynb

In [None]:
!pip install transformers datasets tokenizers
!wget http://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip
!unzip -qq cornell_movie_dialogs_corpus.zip
!rm cornell_movie_dialogs_corpus.zip
!mkdir datasets
!mv cornell\ movie-dialogs\ corpus/movie_conversations.txt ./datasets
!mv cornell\ movie-dialogs\ corpus/movie_lines.txt ./datasets

--2025-10-19 05:16:48--  http://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip
Resolving www.cs.cornell.edu (www.cs.cornell.edu)... 132.236.207.53
Connecting to www.cs.cornell.edu (www.cs.cornell.edu)|132.236.207.53|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip [following]
--2025-10-19 05:16:49--  https://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip
Connecting to www.cs.cornell.edu (www.cs.cornell.edu)|132.236.207.53|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9916637 (9.5M) [application/zip]
Saving to: ‘cornell_movie_dialogs_corpus.zip’


2025-10-19 05:16:53 (3.96 MB/s) - ‘cornell_movie_dialogs_corpus.zip’ saved [9916637/9916637]



In [None]:
import os
from pathlib import Path
import torch
import re
import random
import transformers, datasets
from tokenizers import BertWordPieceTokenizer
from transformers import BertTokenizer
import tqdm
from torch.utils.data import Dataset, DataLoader
import itertools
import math
import torch.nn.functional as F
import numpy as np
from torch.optim import Adam

# 1 ) Tokenization (Word Piece Tokenizer)

[Huggingface WordPieceTokenizer](https://huggingface.co/learn/nlp-course/chapter6/6?fw=pt)

The tokenizer's primary job is to split the input text into smaller tokens. These tokens are usually words, subwords (WordPiece tokens), or characters, depending on the specific tokenizer and its configuration.

Subword Tokenization (WordPiece): BERT often uses subword tokenization, where words are further divided into smaller units called subword tokens. For instance, "unhappiness" might be broken down into ["un", "##hap", "##piness"]


By dividing the frequency of the pair by the product of the frequencies of each of its parts, the algorithm prioritizes the merging of pairs where the individual parts are less frequent in the vocabulary.

**score=(freq_of_pair)/(freq_of_first_element×freq_of_second_element)**

## 1.1 Tokenizer from Scratch

In [None]:
from collections import defaultdict
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

corpus = [
    "This is the Hugging Face Course.",
    "This chapter is about tokenization.",
    "This section shows several tokenizer algorithms.",
    "Hopefully, you will be able to understand how they are trained and generate tokens.",
]

### get the frequency of each word ###
word_freqs = defaultdict(int)
for text in corpus:
    words_with_offsets = tokenizer.backend_tokenizer.pre_tokenizer.pre_tokenize_str(text)
    new_words = [word for word, offset in words_with_offsets]
    print(new_words)
    for word in new_words:
        word_freqs[word] += 1

print(f"\nFinal Word Frequency: {word_freqs}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

['This', 'is', 'the', 'Hugging', 'Face', 'Course', '.']
['This', 'chapter', 'is', 'about', 'tokenization', '.']
['This', 'section', 'shows', 'several', 'tokenizer', 'algorithms', '.']
['Hopefully', ',', 'you', 'will', 'be', 'able', 'to', 'understand', 'how', 'they', 'are', 'trained', 'and', 'generate', 'tokens', '.']

Final Word Frequency: defaultdict(<class 'int'>, {'This': 3, 'is': 2, 'the': 1, 'Hugging': 1, 'Face': 1, 'Course': 1, '.': 4, 'chapter': 1, 'about': 1, 'tokenization': 1, 'section': 1, 'shows': 1, 'several': 1, 'tokenizer': 1, 'algorithms': 1, 'Hopefully': 1, ',': 1, 'you': 1, 'will': 1, 'be': 1, 'able': 1, 'to': 1, 'understand': 1, 'how': 1, 'they': 1, 'are': 1, 'trained': 1, 'and': 1, 'generate': 1, 'tokens': 1})


In [None]:
### split all word into alphabet ###
alphabet = []
for word in word_freqs.keys():
    if word[0] not in alphabet:
        alphabet.append(word[0])
    for letter in word[1:]:
        if f"##{letter}" not in alphabet:
            alphabet.append(f"##{letter}")

alphabet.sort()
print(f'All alphabets: {alphabet}')

### insert special token and subword ###
vocab = ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]"] + alphabet.copy()
splits = {word: [c if i == 0 else f"##{c}" for i, c in enumerate(word)] for word in word_freqs.keys()}
print(f'\nSplitted Words: {splits}')

All alphabets: ['##a', '##b', '##c', '##d', '##e', '##f', '##g', '##h', '##i', '##k', '##l', '##m', '##n', '##o', '##p', '##r', '##s', '##t', '##u', '##v', '##w', '##y', '##z', ',', '.', 'C', 'F', 'H', 'T', 'a', 'b', 'c', 'g', 'h', 'i', 's', 't', 'u', 'w', 'y']

Splitted Words: {'This': ['T', '##h', '##i', '##s'], 'is': ['i', '##s'], 'the': ['t', '##h', '##e'], 'Hugging': ['H', '##u', '##g', '##g', '##i', '##n', '##g'], 'Face': ['F', '##a', '##c', '##e'], 'Course': ['C', '##o', '##u', '##r', '##s', '##e'], '.': ['.'], 'chapter': ['c', '##h', '##a', '##p', '##t', '##e', '##r'], 'about': ['a', '##b', '##o', '##u', '##t'], 'tokenization': ['t', '##o', '##k', '##e', '##n', '##i', '##z', '##a', '##t', '##i', '##o', '##n'], 'section': ['s', '##e', '##c', '##t', '##i', '##o', '##n'], 'shows': ['s', '##h', '##o', '##w', '##s'], 'several': ['s', '##e', '##v', '##e', '##r', '##a', '##l'], 'tokenizer': ['t', '##o', '##k', '##e', '##n', '##i', '##z', '##e', '##r'], 'algorithms': ['a', '##l', '##g'

In [None]:
 ### compute score for merging ###
def compute_pair_scores(splits):
    letter_freqs = defaultdict(int)
    pair_freqs = defaultdict(int)

    for word, freq in word_freqs.items():
        split = splits[word]
        if len(split) == 1:
            letter_freqs[split[0]] += freq
            continue
        for i in range(len(split) - 1):
            pair = (split[i], split[i + 1])
            letter_freqs[split[i]] += freq
            pair_freqs[pair] += freq
        letter_freqs[split[-1]] += freq

    scores = {
        pair: freq / (letter_freqs[pair[0]] * letter_freqs[pair[1]])
        for pair, freq in pair_freqs.items()
    }
    return scores

pair_scores = compute_pair_scores(splits)
print(f'Scores for each Pair: {pair_scores}')

Scores for each Pair: {('T', '##h'): 0.125, ('##h', '##i'): 0.03409090909090909, ('##i', '##s'): 0.02727272727272727, ('i', '##s'): 0.1, ('t', '##h'): 0.03571428571428571, ('##h', '##e'): 0.011904761904761904, ('H', '##u'): 0.1, ('##u', '##g'): 0.05, ('##g', '##g'): 0.0625, ('##g', '##i'): 0.022727272727272728, ('##i', '##n'): 0.01652892561983471, ('##n', '##g'): 0.022727272727272728, ('F', '##a'): 0.14285714285714285, ('##a', '##c'): 0.07142857142857142, ('##c', '##e'): 0.023809523809523808, ('C', '##o'): 0.07692307692307693, ('##o', '##u'): 0.046153846153846156, ('##u', '##r'): 0.022222222222222223, ('##r', '##s'): 0.022222222222222223, ('##s', '##e'): 0.004761904761904762, ('c', '##h'): 0.125, ('##h', '##a'): 0.017857142857142856, ('##a', '##p'): 0.07142857142857142, ('##p', '##t'): 0.07142857142857142, ('##t', '##e'): 0.013605442176870748, ('##e', '##r'): 0.026455026455026454, ('a', '##b'): 0.2, ('##b', '##o'): 0.038461538461538464, ('##u', '##t'): 0.02857142857142857, ('t', '##o')

In [None]:
### finding pair with best score ###
best_pair = ""
max_score = None
for pair, score in pair_scores.items():
    if max_score is None or max_score < score:
        best_pair = pair
        max_score = score

print(best_pair, max_score)
vocab.append("ab")

### merge pair ###
def merge_pair(a, b, splits):
    for word in word_freqs:
        split = splits[word]
        if len(split) == 1:
            continue
        i = 0
        while i < len(split) - 1:
            if split[i] == a and split[i + 1] == b:
                merge = a + b[2:] if b.startswith("##") else a + b
                split = split[:i] + [merge] + split[i + 2 :]
            else:
                i += 1
        splits[word] = split
    return splits

splits = merge_pair("a", "##b", splits)
print(splits["about"])

('a', '##b') 0.2
['ab', '##o', '##u', '##t']


In [None]:
### keep looping to merge more pair
vocab_size = 70
while len(vocab) < vocab_size:
    scores = compute_pair_scores(splits)
    best_pair, max_score = "", None
    for pair, score in scores.items():
        if max_score is None or max_score < score:
            best_pair = pair
            max_score = score
    splits = merge_pair(*best_pair, splits)
    new_token = (
        best_pair[0] + best_pair[1][2:]
        if best_pair[1].startswith("##")
        else best_pair[0] + best_pair[1]
    )
    vocab.append(new_token)

print(f'Final Vocab: {vocab}')

Final Vocab: ['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]', '##a', '##b', '##c', '##d', '##e', '##f', '##g', '##h', '##i', '##k', '##l', '##m', '##n', '##o', '##p', '##r', '##s', '##t', '##u', '##v', '##w', '##y', '##z', ',', '.', 'C', 'F', 'H', 'T', 'a', 'b', 'c', 'g', 'h', 'i', 's', 't', 'u', 'w', 'y', 'ab', '##fu', 'Fa', 'Fac', '##ct', '##ful', '##full', '##fully', 'Th', 'ch', '##hm', 'cha', 'chap', 'chapt', '##thm', 'Hu', 'Hug', 'Hugg', 'sh', 'th', 'is', '##thms', '##za', '##zat', '##ut']


In [None]:
### ro encode a word ###
def encode_word(word):
    tokens = []
    while len(word) > 0:
        i = len(word)
        while i > 0 and word[:i] not in vocab:
            i -= 1
        if i == 0:
            return ["[UNK]"]
        tokens.append(word[:i])
        word = word[i:]
        if len(word) > 0:
            word = f"##{word}"
    return tokens

print(encode_word("Hugging"))
print(encode_word("HOgging"))

['Hugg', '##i', '##n', '##g']
['[UNK]']


## 1.2 Tokenizer Training

In [None]:
### data processing
MAX_LEN = 64

### loading all data into memory
corpus_movie_conv = './datasets/movie_conversations.txt'
corpus_movie_lines = './datasets/movie_lines.txt'
with open(corpus_movie_conv, 'r', encoding='iso-8859-1') as c:
    conv = c.readlines()
with open(corpus_movie_lines, 'r', encoding='iso-8859-1') as l:
    lines = l.readlines()

### splitting text using special lines
lines_dic = {}
for line in lines:
    objects = line.split(" +++$+++ ")
    lines_dic[objects[0]] = objects[-1]

### generate question answer pairs
pairs = []
for con in conv:
    ids = eval(con.split(" +++$+++ ")[-1])
    for i in range(len(ids)):
        qa_pairs = []

        if i == len(ids) - 1:
            break

        first = lines_dic[ids[i]].strip()
        second = lines_dic[ids[i+1]].strip()

        qa_pairs.append(' '.join(first.split()[:MAX_LEN]))
        qa_pairs.append(' '.join(second.split()[:MAX_LEN]))
        pairs.append(qa_pairs)

# sample
print(pairs[20])

["I really, really, really wanna go, but I can't. Not unless my sister goes.", "I'm workin' on it. But she doesn't seem to be goin' for him."]


In [None]:
# WordPiece tokenizer

### save data as txt file
os.mkdir('./data')
text_data = []
file_count = 0

for sample in tqdm.tqdm([x[0] for x in pairs]):
    text_data.append(sample)

    # once we hit the 10K mark, save to file
    if len(text_data) == 10000:
        with open(f'./data/text_{file_count}.txt', 'w', encoding='utf-8') as fp:
            fp.write('\n'.join(text_data))
        text_data = []
        file_count += 1

paths = [str(x) for x in Path('./data').glob('**/*.txt')]
print(len(paths))

100%|██████████| 221616/221616 [00:00<00:00, 2003385.67it/s]

22





In [None]:
### training own tokenizer
tokenizer = BertWordPieceTokenizer(
    clean_text=True,
    handle_chinese_chars=False,
    strip_accents=False,
    lowercase=True
)

tokenizer.train(
    files=paths,
    vocab_size=30_000,
    min_frequency=5,
    limit_alphabet=1000,
    wordpieces_prefix='##',
    special_tokens=['[PAD]', '[CLS]', '[SEP]', '[MASK]', '[UNK]']
    )

os.mkdir('./bert-it-1')
tokenizer.save_model('./bert-it-1', 'bert-it')
tokenizer = BertTokenizer.from_pretrained('./bert-it-1/bert-it-vocab.txt', local_files_only=True)
token_ids = tokenizer('I like surfboarding!')['input_ids']
print(token_ids)
print(tokenizer.convert_ids_to_tokens(token_ids))

[1, 48, 250, 4033, 3588, 154, 5, 2]
['[CLS]', 'i', 'like', 'surf', '##board', '##ing', '!', '[SEP]']




# 2) Pre-processing

In [None]:
class BERTDataset(Dataset):
    def __init__(self, data_pair, tokenizer, seq_len=64):

        self.tokenizer = tokenizer
        self.seq_len = seq_len
        self.corpus_lines = len(data_pair)
        self.lines = data_pair

    def __len__(self):
        return self.corpus_lines

    def __getitem__(self, item):

        # Step 1: get random sentence pair, either negative or positive (saved as is_next_label)
        t1, t2, is_next_label = self.get_sent(item)

        # Step 2: replace random words in sentence with mask / random words
        t1_random, t1_label = self.random_word(t1)
        t2_random, t2_label = self.random_word(t2)

        # Step 3: Adding CLS and SEP tokens to the start and end of sentences
        # Adding PAD token for labels
        t1 = [self.tokenizer.vocab['[CLS]']] + t1_random + [self.tokenizer.vocab['[SEP]']]
        t2 = t2_random + [self.tokenizer.vocab['[SEP]']]
        t1_label = [self.tokenizer.vocab['[PAD]']] + t1_label + [self.tokenizer.vocab['[PAD]']]
        t2_label = t2_label + [self.tokenizer.vocab['[PAD]']]

        # Step 4: combine sentence 1 and 2 as one input
        # adding PAD tokens to make the sentence same length as seq_len
        segment_label = ([1 for _ in range(len(t1))] + [2 for _ in range(len(t2))])[:self.seq_len]
        bert_input = (t1 + t2)[:self.seq_len]
        bert_label = (t1_label + t2_label)[:self.seq_len]
        padding = [self.tokenizer.vocab['[PAD]'] for _ in range(self.seq_len - len(bert_input))]
        bert_input.extend(padding), bert_label.extend(padding), segment_label.extend(padding)

        output = {"bert_input": bert_input,
                  "bert_label": bert_label,
                  "segment_label": segment_label,
                  "is_next": is_next_label}

        return {key: torch.tensor(value) for key, value in output.items()}

    def random_word(self, sentence):
        tokens = sentence.split()
        output_label = []
        output = []

        # 15% of the tokens would be replaced
        for i, token in enumerate(tokens):
            prob = random.random()

            # remove cls and sep token
            token_id = self.tokenizer(token)['input_ids'][1:-1]

            # 15% chance of altering token
            if prob < 0.15:
                prob /= 0.15

                # 80% chance change token to mask token
                if prob < 0.8:
                    for i in range(len(token_id)):
                        output.append(self.tokenizer.vocab['[MASK]'])

                # 10% chance change token to random token
                elif prob < 0.9:
                    for i in range(len(token_id)):
                        output.append(random.randrange(len(self.tokenizer.vocab)))

                # 10% chance change token to current token
                else:
                    output.append(token_id)

                output_label.append(token_id)

            else:
                output.append(token_id)
                for i in range(len(token_id)):
                    output_label.append(0)

        # flattening
        output = list(itertools.chain(*[[x] if not isinstance(x, list) else x for x in output]))
        output_label = list(itertools.chain(*[[x] if not isinstance(x, list) else x for x in output_label]))
        assert len(output) == len(output_label)
        return output, output_label

    def get_sent(self, index):
        '''return random sentence pair'''
        t1, t2 = self.get_corpus_line(index)

        # negative or positive pair, for next sentence prediction
        if random.random() > 0.5:
            return t1, t2, 1
        else:
            return t1, self.get_random_line(), 0

    def get_corpus_line(self, item):
        '''return sentence pair'''
        return self.lines[item][0], self.lines[item][1]

    def get_random_line(self):
        '''return random single sentence'''
        return self.lines[random.randrange(len(self.lines))][1]

In [None]:
# test
print("\n")
train_data = BERTDataset(pairs, seq_len=MAX_LEN, tokenizer=tokenizer)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, pin_memory=True)
sample_data = next(iter(train_loader))
print('Batch Size', sample_data['bert_input'].size())

# 3 is MASK
result = train_data[random.randrange(len(train_data))]
result



Batch Size torch.Size([32, 64])


{'bert_input': tensor([    1,     3,     3,     3,     3,     2,    48,   204,    11,    59,
           210,    15, 11528,    48,   460,    15,   266,    11,    58,   213,
           218,   717,    11,    58,     3,     3,     2,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0]),
 'bert_label': tensor([   0,  569,    5,  569,    5,    0,    0,    0,    0,    0,    0,    0,
          250,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
         1003,   17,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0]),
 'segment_label': t

# 3) Modeling

In [None]:
### embedding
class PositionalEmbedding(torch.nn.Module):

    def __init__(self, d_model, max_len=128):
        super().__init__()

        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False

        for pos in range(max_len):
            # for each dimension of the each position
            for i in range(0, d_model, 2):
                pe[pos, i] = math.sin(pos / (10000 ** ((2 * i)/d_model)))
                pe[pos, i + 1] = math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))

        # include the batch size
        self.pe = pe.unsqueeze(0)
        # self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe

class BERTEmbedding(torch.nn.Module):
    """
    BERT Embedding which is consisted with under features
        1. TokenEmbedding : normal embedding matrix
        2. PositionalEmbedding : adding positional information using sin, cos
        2. SegmentEmbedding : adding sentence segment info, (sent_A:1, sent_B:2)
        sum of all these features are output of BERTEmbedding
    """

    def __init__(self, vocab_size, embed_size, seq_len=64, dropout=0.1):
        """
        :param vocab_size: total vocab size
        :param embed_size: embedding size of token embedding
        :param dropout: dropout rate
        """

        super().__init__()
        self.embed_size = embed_size
        # (m, seq_len) --> (m, seq_len, embed_size)
        # padding_idx is not updated during training, remains as fixed pad (0)
        self.token = torch.nn.Embedding(vocab_size, embed_size, padding_idx=0)
        self.segment = torch.nn.Embedding(3, embed_size, padding_idx=0)
        self.position = PositionalEmbedding(d_model=embed_size, max_len=seq_len)
        self.dropout = torch.nn.Dropout(p=dropout)

    def forward(self, sequence, segment_label):
        x = self.token(sequence) + self.position(sequence) + self.segment(segment_label)
        return self.dropout(x)

### testing
embed_layer = BERTEmbedding(vocab_size=len(tokenizer.vocab), embed_size=768, seq_len=MAX_LEN)
embed_result = embed_layer(sample_data['bert_input'], sample_data['segment_label'])
print(embed_result.size())

torch.Size([32, 64, 768])


In [None]:
### attention layers
class MultiHeadedAttention(torch.nn.Module):

    def __init__(self, heads, d_model, dropout=0.1):
        super(MultiHeadedAttention, self).__init__()

        assert d_model % heads == 0
        self.d_k = d_model // heads
        self.heads = heads
        self.dropout = torch.nn.Dropout(dropout)

        self.query = torch.nn.Linear(d_model, d_model)
        self.key = torch.nn.Linear(d_model, d_model)
        self.value = torch.nn.Linear(d_model, d_model)
        self.output_linear = torch.nn.Linear(d_model, d_model)

    def forward(self, query, key, value, mask):
        """
        query, key, value of shape: (batch_size, max_len, d_model)
        mask of shape: (batch_size, 1, 1, max_words)
        """
        # (batch_size, max_len, d_model)
        query = self.query(query)
        key = self.key(key)
        value = self.value(value)

        # (batch_size, max_len, d_model) --> (batch_size, max_len, h, d_k) --> (batch_size, h, max_len, d_k)
        query = query.view(query.shape[0], -1, self.heads, self.d_k).permute(0, 2, 1, 3)
        key = key.view(key.shape[0], -1, self.heads, self.d_k).permute(0, 2, 1, 3)
        value = value.view(value.shape[0], -1, self.heads, self.d_k).permute(0, 2, 1, 3)

        # (batch_size, h, max_len, d_k) matmul (batch_size, h, d_k, max_len) --> (batch_size, h, max_len, max_len)
        scores = torch.matmul(query, key.permute(0, 1, 3, 2)) / math.sqrt(query.size(-1))

        # fill 0 mask with super small number so it wont affect the softmax weight
        # (batch_size, h, max_len, max_len)
        scores = scores.masked_fill(mask == 0, -1e9)

        # (batch_size, h, max_len, max_len)
        # softmax to put attention weight for all non-pad tokens
        # max_len X max_len matrix of attention
        weights = F.softmax(scores, dim=-1)
        weights = self.dropout(weights)

        # (batch_size, h, max_len, max_len) matmul (batch_size, h, max_len, d_k) --> (batch_size, h, max_len, d_k)
        context = torch.matmul(weights, value)

        # (batch_size, h, max_len, d_k) --> (batch_size, max_len, h, d_k) --> (batch_size, max_len, d_model)
        context = context.permute(0, 2, 1, 3).contiguous().view(context.shape[0], -1, self.heads * self.d_k)

        # (batch_size, max_len, d_model)
        return self.output_linear(context)

class FeedForward(torch.nn.Module):
    "Implements FFN equation"

    def __init__(self, d_model, middle_dim=2048, dropout=0.1):
        super(FeedForward, self).__init__()

        self.fc1 = torch.nn.Linear(d_model, middle_dim)
        self.fc2 = torch.nn.Linear(middle_dim, d_model)
        self.dropout = torch.nn.Dropout(dropout)
        self.activation = torch.nn.GELU()

    def forward(self, x):
        out = self.activation(self.fc1(x))
        out = self.fc2(self.dropout(out))
        return out

class EncoderLayer(torch.nn.Module):
    def __init__(
        self,
        d_model=768,
        heads=12,
        feed_forward_hidden=768 * 4,
        dropout=0.1
        ):
        super(EncoderLayer, self).__init__()
        self.layernorm = torch.nn.LayerNorm(d_model)
        self.self_multihead = MultiHeadedAttention(heads, d_model)
        self.feed_forward = FeedForward(d_model, middle_dim=feed_forward_hidden)
        self.dropout = torch.nn.Dropout(dropout)

    def forward(self, embeddings, mask):
        # embeddings: (batch_size, max_len, d_model)
        # encoder mask: (batch_size, 1, 1, max_len)
        # result: (batch_size, max_len, d_model)
        interacted = self.dropout(self.self_multihead(embeddings, embeddings, embeddings, mask))
        # residual layer
        interacted = self.layernorm(interacted + embeddings)
        # bottleneck
        feed_forward_out = self.dropout(self.feed_forward(interacted))
        encoded = self.layernorm(feed_forward_out + interacted)
        return encoded

### testing
mask = (sample_data['bert_input'] > 0).unsqueeze(1).repeat(1, sample_data['bert_input'].size(1), 1).unsqueeze(1)
transformer_block = EncoderLayer()
transformer_result = transformer_block(embed_result, mask)
transformer_result.size()

torch.Size([32, 64, 768])

In [None]:
class BERT(torch.nn.Module):
    """
    BERT model : Bidirectional Encoder Representations from Transformers.
    """

    def __init__(self, vocab_size, d_model=768, n_layers=12, heads=12, dropout=0.1):
        """
        :param vocab_size: vocab_size of total words
        :param hidden: BERT model hidden size
        :param n_layers: numbers of Transformer blocks(layers)
        :param attn_heads: number of attention heads
        :param dropout: dropout rate
        """

        super().__init__()
        self.d_model = d_model
        self.n_layers = n_layers
        self.heads = heads

        # paper noted they used 4*hidden_size for ff_network_hidden_size
        self.feed_forward_hidden = d_model * 4

        # embedding for BERT, sum of positional, segment, token embeddings
        self.embedding = BERTEmbedding(vocab_size=vocab_size, embed_size=d_model)

        # multi-layers transformer blocks, deep network
        self.encoder_blocks = torch.nn.ModuleList(
            [EncoderLayer(d_model, heads, d_model * 4, dropout) for _ in range(n_layers)])

    def forward(self, x, segment_info):
        # attention masking for padded token
        # (batch_size, 1, seq_len, seq_len)
        mask = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1)

        # embedding the indexed sequence to sequence of vectors
        x = self.embedding(x, segment_info)

        # running over multiple transformer blocks
        for encoder in self.encoder_blocks:
            x = encoder.forward(x, mask)
        return x

class NextSentencePrediction(torch.nn.Module):
    """
    2-class classification model : is_next, is_not_next
    """

    def __init__(self, hidden):
        """
        :param hidden: BERT model output size
        """
        super().__init__()
        self.linear = torch.nn.Linear(hidden, 2)
        self.softmax = torch.nn.LogSoftmax(dim=-1)

    def forward(self, x):
        # use only the first token which is the [CLS]
        return self.softmax(self.linear(x[:, 0]))

class MaskedLanguageModel(torch.nn.Module):
    """
    predicting origin token from masked input sequence
    n-class classification problem, n-class = vocab_size
    """

    def __init__(self, hidden, vocab_size):
        """
        :param hidden: output size of BERT model
        :param vocab_size: total vocab size
        """
        super().__init__()
        self.linear = torch.nn.Linear(hidden, vocab_size)
        self.softmax = torch.nn.LogSoftmax(dim=-1)

    def forward(self, x):
        return self.softmax(self.linear(x))

class BERTLM(torch.nn.Module):
    """
    BERT Language Model
    Next Sentence Prediction Model + Masked Language Model
    """

    def __init__(self, bert: BERT, vocab_size):
        """
        :param bert: BERT model which should be trained
        :param vocab_size: total vocab size for masked_lm
        """

        super().__init__()
        self.bert = bert
        self.next_sentence = NextSentencePrediction(self.bert.d_model)
        self.mask_lm = MaskedLanguageModel(self.bert.d_model, vocab_size)

    def forward(self, x, segment_label):
        x = self.bert(x, segment_label)
        return self.next_sentence(x), self.mask_lm(x)

### test
bert_model = BERT(len(tokenizer.vocab))
bert_result = bert_model(sample_data['bert_input'], sample_data['segment_label'])
print(bert_result.size())

bert_lm = BERTLM(bert_model, len(tokenizer.vocab))
final_result = bert_lm(sample_data['bert_input'], sample_data['segment_label'])
print(final_result[0].size(), final_result[1].size())

torch.Size([32, 64, 768])
torch.Size([32, 2]) torch.Size([32, 64, 21160])


#4) WandB

In [None]:
import wandb
wandb.login(key="f79ce17bd630557b1ec0aa5a04f83efdf903876b") # API Key is in your wandb account, under settings (wandb.ai/settings)
run = wandb.init(
    name = "baseline-bert", ## Wandb creates random run names if you skip this field
    project = "idl-midterm", ### Project should be created in your wandb account
)

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mekumara[0m ([33mekumara-carnegie-mellon-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
import os
checkpoint_dir = 'checkpoints'

# Create the directory if it doesn't exist
os.makedirs(checkpoint_dir, exist_ok=True)

In [None]:
def save_model(model, optimizer, metrics, epoch, path):
    torch.save(
        {'model_state_dict'         : model.state_dict(),
         'optimizer_state_dict'     : optimizer.state_dict(),
         'metric'                   : metrics,
         'epoch'                    : epoch},
         path)


def load_model(model, optimizer=None, path='./checkpoint.pth'):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    else:
        optimizer = None
    epoch = checkpoint['epoch']
    metrics = checkpoint['metric']
    return model, optimizer, epoch, metrics

# 5) Training

In [None]:
### optimizer
class ScheduledOptim():
    '''A simple wrapper class for learning rate scheduling'''

    def __init__(self, optimizer, d_model, n_warmup_steps):
        self._optimizer = optimizer
        self.n_warmup_steps = n_warmup_steps
        self.n_current_steps = 0
        self.init_lr = np.power(d_model, -0.5)

    def step_and_update_lr(self):
        "Step with the inner optimizer"
        self._update_learning_rate()
        self._optimizer.step()

    def zero_grad(self):
        "Zero out the gradients by the inner optimizer"
        self._optimizer.zero_grad()

    def _get_lr_scale(self):
        return np.min([
            np.power(self.n_current_steps, -0.5),
            np.power(self.n_warmup_steps, -1.5) * self.n_current_steps])

    def _update_learning_rate(self):
        ''' Learning rate scheduling per step '''

        self.n_current_steps += 1
        lr = self.init_lr * self._get_lr_scale()

        for param_group in self._optimizer.param_groups:
            param_group['lr'] = lr

In [None]:
### trainer
class BERTTrainer:
    def __init__(
        self,
        model,
        train_dataloader,
        test_dataloader=None,
        lr= 1e-4,
        weight_decay=0.01,
        betas=(0.9, 0.999),
        warmup_steps=10000,
        log_freq=10,
        device='cuda'
        ):

        self.device = device
        self.model = model
        self.train_data = train_dataloader
        self.test_data = test_dataloader

        # Setting the Adam optimizer with hyper-param
        self.optim = Adam(self.model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)
        self.optim_schedule = ScheduledOptim(
            self.optim, self.model.bert.d_model, n_warmup_steps=warmup_steps
            )

        # Using Negative Log Likelihood Loss function for predicting the masked_token
        self.criterion = torch.nn.NLLLoss(ignore_index=0)
        self.log_freq = log_freq
        print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))

    def train(self, epoch):
        self.iteration(epoch, self.train_data)

    def test(self, epoch):
        self.iteration(epoch, self.test_data, train=False)

    def iteration(self, epoch, data_loader, train=True):

        avg_loss = 0.0
        total_correct = 0
        total_element = 0

        mode = "train" if train else "test"

        # progress bar
        data_iter = tqdm.tqdm(
            enumerate(data_loader),
            desc="EP_%s:%d" % (mode, epoch),
            total=len(data_loader),
            bar_format="{l_bar}{r_bar}"
        )

        for i, data in data_iter:


            # 0. batch_data will be sent into the device(GPU or cpu)
            data = {key: value.to(self.device) for key, value in data.items()}

            # 1. forward the next_sentence_prediction and masked_lm model
            next_sent_output, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"])

            # 2-1. NLL(negative log likelihood) loss of is_next classification result
            next_loss = self.criterion(next_sent_output, data["is_next"])

            # 2-2. NLLLoss of predicting masked token word
            # transpose to (m, vocab_size, seq_len) vs (m, seq_len)
            # criterion(mask_lm_output.view(-1, mask_lm_output.size(-1)), data["bert_label"].view(-1))
            mask_loss = self.criterion(mask_lm_output.transpose(1, 2), data["bert_label"])

            # 2-3. Adding next_loss and mask_loss : 3.4 Pre-training Procedure
            loss = next_loss + mask_loss

            # 3. backward and optimization only in train
            if train:
                self.optim_schedule.zero_grad()
                loss.backward()
                self.optim_schedule.step_and_update_lr()

            # next sentence prediction accuracy
            correct = next_sent_output.argmax(dim=-1).eq(data["is_next"]).sum().item()
            avg_loss += loss.item()
            total_correct += correct
            total_element += data["is_next"].nelement()

            post_fix = {
                "epoch": epoch,
                "iter": i,
                "avg_loss": avg_loss / (i + 1),
                "avg_acc": total_correct / total_element * 100,
                "loss": loss.item()
            }

            if i % self.log_freq == 0:
                data_iter.write(str(post_fix))

            # Saving to WandB
            if i%50 == 0:
              metrics = {
                  "avg_loss": avg_loss / (i + 1),
                  "avg_acc": total_correct / total_element * 100,
              }

              wandb.log(metrics)
              save_model(self.model, self.optim, epoch, metrics ,os.path.join('checkpoints', 'best_ret.pth'))
              wandb.save(os.path.join('checkpoints', 'best_ret.pth'))
              print("Saved best retrieval model")

        print(
            f"EP{epoch}, {mode}: \
            avg_loss={avg_loss / len(data_iter)}, \
            total_acc={total_correct * 100.0 / total_element}"
        )






### test
train_data = BERTDataset(pairs, seq_len=MAX_LEN, tokenizer=tokenizer)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, pin_memory=True)
bert_model = BERT(len(tokenizer.vocab))
bert_lm = BERTLM(bert_model, len(tokenizer.vocab))
bert_trainer = BERTTrainer(bert_lm, train_loader, device='cpu')
epochs = 2

for epoch in range(epochs):
    bert_trainer.train(epoch)

Total Parameters: 117562794


EP_train:0:   0%|| 0/6926 [00:04<?, ?it/s]

{'epoch': 0, 'iter': 0, 'avg_loss': 10.690760612487793, 'avg_acc': 46.875, 'loss': 10.690760612487793}


EP_train:0:   0%|| 1/6926 [00:05<11:28:12,  5.96s/it]

Saved best retrieval model


EP_train:0:   0%|| 11/6926 [00:56<8:49:12,  4.59s/it]

{'epoch': 0, 'iter': 10, 'avg_loss': 10.646477005698465, 'avg_acc': 51.42045454545454, 'loss': 10.454598426818848}


EP_train:0:   0%|| 21/6926 [01:39<8:15:24,  4.30s/it]

{'epoch': 0, 'iter': 20, 'avg_loss': 10.493613016037713, 'avg_acc': 52.67857142857143, 'loss': 10.180461883544922}


EP_train:0:   0%|| 31/6926 [02:21<8:00:16,  4.18s/it]

{'epoch': 0, 'iter': 30, 'avg_loss': 10.358673526394751, 'avg_acc': 52.21774193548387, 'loss': 10.065469741821289}


EP_train:0:   1%|| 41/6926 [03:04<8:07:53,  4.25s/it]

{'epoch': 0, 'iter': 40, 'avg_loss': 10.247738372988817, 'avg_acc': 52.21036585365854, 'loss': 9.798974990844727}


EP_train:0:   1%|| 50/6926 [03:46<8:07:25,  4.25s/it]

{'epoch': 0, 'iter': 50, 'avg_loss': 10.141657511393229, 'avg_acc': 51.470588235294116, 'loss': 9.658942222595215}


EP_train:0:   1%|| 51/6926 [03:48<9:10:21,  4.80s/it]

Saved best retrieval model


EP_train:0:   1%|| 61/6926 [04:32<8:14:51,  4.33s/it]

{'epoch': 0, 'iter': 60, 'avg_loss': 10.036072324533931, 'avg_acc': 50.870901639344254, 'loss': 9.240114212036133}


EP_train:0:   1%|| 71/6926 [05:16<8:23:35,  4.41s/it]

{'epoch': 0, 'iter': 70, 'avg_loss': 9.93045312585965, 'avg_acc': 50.70422535211267, 'loss': 9.379166603088379}


EP_train:0:   1%|| 81/6926 [05:58<7:58:56,  4.20s/it]

{'epoch': 0, 'iter': 80, 'avg_loss': 9.808631143452208, 'avg_acc': 50.11574074074075, 'loss': 8.86337661743164}


EP_train:0:   1%|| 91/6926 [06:40<8:14:37,  4.34s/it]

{'epoch': 0, 'iter': 90, 'avg_loss': 9.706417660136799, 'avg_acc': 49.65659340659341, 'loss': 8.724859237670898}


EP_train:0:   1%|| 100/6926 [07:23<8:17:49,  4.38s/it]

{'epoch': 0, 'iter': 100, 'avg_loss': 9.61286975369595, 'avg_acc': 49.62871287128713, 'loss': 8.615191459655762}


EP_train:0:   1%|| 101/6926 [07:25<8:58:23,  4.73s/it]

Saved best retrieval model


EP_train:0:   2%|| 111/6926 [08:09<8:26:09,  4.46s/it]

{'epoch': 0, 'iter': 110, 'avg_loss': 9.51564342052013, 'avg_acc': 49.35247747747748, 'loss': 8.398938179016113}


EP_train:0:   2%|| 121/6926 [08:52<7:56:14,  4.20s/it]

{'epoch': 0, 'iter': 120, 'avg_loss': 9.429592369016538, 'avg_acc': 49.199380165289256, 'loss': 8.46854019165039}


EP_train:0:   2%|| 131/6926 [09:35<8:13:33,  4.36s/it]

{'epoch': 0, 'iter': 130, 'avg_loss': 9.354604073153197, 'avg_acc': 49.35591603053435, 'loss': 8.648183822631836}


EP_train:0:   2%|| 141/6926 [10:17<7:53:46,  4.19s/it]

{'epoch': 0, 'iter': 140, 'avg_loss': 9.28503464976101, 'avg_acc': 49.77836879432624, 'loss': 8.523433685302734}


EP_train:0:   2%|| 150/6926 [11:00<7:54:10,  4.20s/it]

{'epoch': 0, 'iter': 150, 'avg_loss': 9.227450361314988, 'avg_acc': 49.64817880794702, 'loss': 8.39984130859375}


EP_train:0:   2%|| 151/6926 [11:02<9:07:33,  4.85s/it]

Saved best retrieval model


EP_train:0:   2%|| 161/6926 [11:45<8:09:29,  4.34s/it]

{'epoch': 0, 'iter': 160, 'avg_loss': 9.159559723753366, 'avg_acc': 49.767080745341616, 'loss': 8.261091232299805}


EP_train:0:   2%|| 171/6926 [12:29<8:05:22,  4.31s/it]

{'epoch': 0, 'iter': 170, 'avg_loss': 9.097580845593011, 'avg_acc': 49.92690058479532, 'loss': 8.235220909118652}


EP_train:0:   3%|| 181/6926 [13:12<7:54:37,  4.22s/it]

{'epoch': 0, 'iter': 180, 'avg_loss': 9.043909233578002, 'avg_acc': 50.017265193370164, 'loss': 8.305355072021484}


EP_train:0:   3%|| 191/6926 [13:54<7:55:20,  4.23s/it]

{'epoch': 0, 'iter': 190, 'avg_loss': 8.993996844865888, 'avg_acc': 49.967277486910994, 'loss': 7.944213390350342}


EP_train:0:   3%|| 200/6926 [14:37<7:59:58,  4.28s/it]

{'epoch': 0, 'iter': 200, 'avg_loss': 8.948381502236893, 'avg_acc': 50.01554726368159, 'loss': 8.196093559265137}


EP_train:0:   3%|| 201/6926 [14:39<8:46:18,  4.70s/it]

Saved best retrieval model


EP_train:0:   3%|| 211/6926 [15:24<8:13:31,  4.41s/it]

{'epoch': 0, 'iter': 210, 'avg_loss': 8.902123148407416, 'avg_acc': 50.355450236966824, 'loss': 7.987750053405762}


EP_train:0:   3%|| 221/6926 [16:08<8:08:42,  4.37s/it]

{'epoch': 0, 'iter': 220, 'avg_loss': 8.861587891211876, 'avg_acc': 50.254524886877824, 'loss': 8.094436645507812}


EP_train:0:   3%|| 231/6926 [16:50<7:46:18,  4.18s/it]

{'epoch': 0, 'iter': 230, 'avg_loss': 8.823937554380079, 'avg_acc': 50.21645021645021, 'loss': 7.700632572174072}


EP_train:0:   3%|| 241/6926 [17:33<8:09:20,  4.39s/it]

{'epoch': 0, 'iter': 240, 'avg_loss': 8.784534135794738, 'avg_acc': 50.142634854771785, 'loss': 7.734757900238037}


EP_train:0:   4%|| 250/6926 [18:15<7:58:07,  4.30s/it]

{'epoch': 0, 'iter': 250, 'avg_loss': 8.75065777121312, 'avg_acc': 50.199203187250994, 'loss': 7.916275501251221}


EP_train:0:   4%|| 251/6926 [18:20<10:18:22,  5.56s/it]

Saved best retrieval model


EP_train:0:   4%|| 261/6926 [19:05<8:16:15,  4.47s/it]

{'epoch': 0, 'iter': 260, 'avg_loss': 8.713793392839102, 'avg_acc': 49.940134099616856, 'loss': 7.671811103820801}


EP_train:0:   4%|| 271/6926 [19:49<8:08:03,  4.40s/it]

{'epoch': 0, 'iter': 270, 'avg_loss': 8.680683491414763, 'avg_acc': 50.01153136531366, 'loss': 8.132514953613281}


EP_train:0:   4%|| 281/6926 [20:31<7:47:29,  4.22s/it]

{'epoch': 0, 'iter': 280, 'avg_loss': 8.647273666069601, 'avg_acc': 49.988879003558715, 'loss': 7.51145601272583}


EP_train:0:   4%|| 291/6926 [21:15<8:02:40,  4.36s/it]

{'epoch': 0, 'iter': 290, 'avg_loss': 8.617369209368205, 'avg_acc': 50.075171821305844, 'loss': 8.088874816894531}


EP_train:0:   4%|| 300/6926 [21:57<7:54:09,  4.29s/it]

{'epoch': 0, 'iter': 300, 'avg_loss': 8.588375031354023, 'avg_acc': 50.01038205980066, 'loss': 7.532581329345703}


EP_train:0:   4%|| 301/6926 [22:01<9:56:09,  5.40s/it]

Saved best retrieval model


EP_train:0:   4%|| 311/6926 [22:45<8:10:38,  4.45s/it]

{'epoch': 0, 'iter': 310, 'avg_loss': 8.558218216972719, 'avg_acc': 49.979903536977496, 'loss': 7.777943134307861}


EP_train:0:   5%|| 321/6926 [23:30<8:01:53,  4.38s/it]

{'epoch': 0, 'iter': 320, 'avg_loss': 8.52342805387075, 'avg_acc': 49.980529595015575, 'loss': 7.570486068725586}


EP_train:0:   5%|| 331/6926 [24:12<7:46:48,  4.25s/it]

{'epoch': 0, 'iter': 330, 'avg_loss': 8.497213814553897, 'avg_acc': 50.0094410876133, 'loss': 7.587167263031006}


EP_train:0:   5%|| 341/6926 [24:56<7:56:34,  4.34s/it]

{'epoch': 0, 'iter': 340, 'avg_loss': 8.468269612432575, 'avg_acc': 50.0733137829912, 'loss': 7.3385090827941895}


EP_train:0:   5%|| 350/6926 [25:38<7:50:10,  4.29s/it]

{'epoch': 0, 'iter': 350, 'avg_loss': 8.439055821834467, 'avg_acc': 50.035612535612536, 'loss': 7.450539588928223}


EP_train:0:   5%|| 351/6926 [25:40<8:35:20,  4.70s/it]

Saved best retrieval model


EP_train:0:   5%|| 361/6926 [26:26<8:15:47,  4.53s/it]

{'epoch': 0, 'iter': 360, 'avg_loss': 8.412821950674717, 'avg_acc': 49.99134349030471, 'loss': 7.43425178527832}


EP_train:0:   5%|| 371/6926 [27:10<8:03:09,  4.42s/it]

{'epoch': 0, 'iter': 370, 'avg_loss': 8.384412666536727, 'avg_acc': 49.90734501347709, 'loss': 7.599414825439453}


EP_train:0:   6%|| 381/6926 [27:53<7:46:49,  4.28s/it]

{'epoch': 0, 'iter': 380, 'avg_loss': 8.3580590220574, 'avg_acc': 49.737532808398946, 'loss': 7.4740471839904785}


EP_train:0:   6%|| 391/6926 [28:36<8:02:50,  4.43s/it]

{'epoch': 0, 'iter': 390, 'avg_loss': 8.329935845816532, 'avg_acc': 49.736253196930946, 'loss': 6.960076332092285}


EP_train:0:   6%|| 400/6926 [29:18<7:49:04,  4.31s/it]

{'epoch': 0, 'iter': 400, 'avg_loss': 8.303705681589179, 'avg_acc': 49.836346633416454, 'loss': 7.194542407989502}


EP_train:0:   6%|| 401/6926 [29:20<8:41:53,  4.80s/it]

Saved best retrieval model


EP_train:0:   6%|| 411/6926 [30:06<8:08:58,  4.50s/it]

{'epoch': 0, 'iter': 410, 'avg_loss': 8.279543856924757, 'avg_acc': 49.85553527980535, 'loss': 7.311093807220459}


EP_train:0:   6%|| 421/6926 [30:50<7:46:10,  4.30s/it]

{'epoch': 0, 'iter': 420, 'avg_loss': 8.251034692460738, 'avg_acc': 49.7624703087886, 'loss': 6.898789882659912}


EP_train:0:   6%|| 431/6926 [31:33<7:48:28,  4.33s/it]

{'epoch': 0, 'iter': 430, 'avg_loss': 8.224460747844935, 'avg_acc': 49.86223897911833, 'loss': 6.939790725708008}


EP_train:0:   6%|| 441/6926 [32:16<7:40:10,  4.26s/it]

{'epoch': 0, 'iter': 440, 'avg_loss': 8.199203365784383, 'avg_acc': 49.851190476190474, 'loss': 6.993175506591797}


EP_train:0:   6%|| 450/6926 [32:58<7:38:03,  4.24s/it]

{'epoch': 0, 'iter': 450, 'avg_loss': 8.171193003390158, 'avg_acc': 49.91685144124168, 'loss': 6.693414688110352}


EP_train:0:   7%|| 451/6926 [33:00<8:40:08,  4.82s/it]

Saved best retrieval model


EP_train:0:   7%|| 461/6926 [33:45<8:16:08,  4.60s/it]

{'epoch': 0, 'iter': 460, 'avg_loss': 8.149705346908073, 'avg_acc': 49.830531453362255, 'loss': 7.356692314147949}


EP_train:0:   7%|| 471/6926 [34:29<7:40:56,  4.28s/it]

{'epoch': 0, 'iter': 470, 'avg_loss': 8.126916198973444, 'avg_acc': 49.89384288747346, 'loss': 7.243117809295654}


EP_train:0:   7%|| 481/6926 [35:12<7:45:21,  4.33s/it]

{'epoch': 0, 'iter': 480, 'avg_loss': 8.101669584143435, 'avg_acc': 49.857068607068605, 'loss': 7.109935760498047}


EP_train:0:   7%|| 491/6926 [35:55<7:35:38,  4.25s/it]

{'epoch': 0, 'iter': 490, 'avg_loss': 8.081600795209772, 'avg_acc': 49.84725050916497, 'loss': 7.063744068145752}


EP_train:0:   7%|| 500/6926 [36:38<7:37:46,  4.27s/it]

{'epoch': 0, 'iter': 500, 'avg_loss': 8.057259960326844, 'avg_acc': 49.831586826347305, 'loss': 7.040037155151367}


EP_train:0:   7%|| 501/6926 [36:40<8:50:06,  4.95s/it]

Saved best retrieval model


EP_train:0:   7%|| 511/6926 [37:25<7:55:23,  4.45s/it]

{'epoch': 0, 'iter': 510, 'avg_loss': 8.034603735705412, 'avg_acc': 49.8899217221135, 'loss': 6.9747467041015625}


EP_train:0:   8%|| 521/6926 [38:10<7:44:27,  4.35s/it]

{'epoch': 0, 'iter': 520, 'avg_loss': 8.00827222868028, 'avg_acc': 49.92802303262956, 'loss': 7.030702590942383}


EP_train:0:   8%|| 531/6926 [38:53<7:47:09,  4.38s/it]

{'epoch': 0, 'iter': 530, 'avg_loss': 7.985155306742465, 'avg_acc': 50.00588512241054, 'loss': 6.527298450469971}


EP_train:0:   8%|| 541/6926 [39:36<7:36:17,  4.29s/it]

{'epoch': 0, 'iter': 540, 'avg_loss': 7.961790918642844, 'avg_acc': 49.98267097966728, 'loss': 6.264068126678467}


EP_train:0:   8%|| 550/6926 [40:20<7:35:01,  4.28s/it]

{'epoch': 0, 'iter': 550, 'avg_loss': 7.938423694153663, 'avg_acc': 50.01134301270418, 'loss': 6.695698261260986}


EP_train:0:   8%|| 551/6926 [40:21<8:52:18,  5.01s/it]

Saved best retrieval model


EP_train:0:   8%|| 561/6926 [41:06<7:44:29,  4.38s/it]

{'epoch': 0, 'iter': 560, 'avg_loss': 7.914496613058814, 'avg_acc': 49.94986631016043, 'loss': 6.39730167388916}


EP_train:0:   8%|| 571/6926 [41:51<7:36:04,  4.31s/it]

{'epoch': 0, 'iter': 570, 'avg_loss': 7.890501479968925, 'avg_acc': 49.90696147110333, 'loss': 6.911412239074707}


EP_train:0:   8%|| 581/6926 [42:34<7:51:06,  4.45s/it]

{'epoch': 0, 'iter': 580, 'avg_loss': 7.867538154227804, 'avg_acc': 49.91932013769363, 'loss': 6.453516960144043}


EP_train:0:   9%|| 591/6926 [43:17<7:27:57,  4.24s/it]

{'epoch': 0, 'iter': 590, 'avg_loss': 7.843743287165556, 'avg_acc': 49.957698815566836, 'loss': 6.574686050415039}


EP_train:0:   9%|| 600/6926 [43:59<7:26:17,  4.23s/it]

{'epoch': 0, 'iter': 600, 'avg_loss': 7.8212417175686495, 'avg_acc': 49.95320299500832, 'loss': 6.4789910316467285}


EP_train:0:   9%|| 601/6926 [44:01<8:25:45,  4.80s/it]

Saved best retrieval model


EP_train:0:   9%|| 611/6926 [44:45<7:44:46,  4.42s/it]

{'epoch': 0, 'iter': 610, 'avg_loss': 7.797546969693148, 'avg_acc': 50.06137479541734, 'loss': 5.884100914001465}


EP_train:0:   9%|| 621/6926 [45:31<7:44:54,  4.42s/it]

{'epoch': 0, 'iter': 620, 'avg_loss': 7.775191188818399, 'avg_acc': 50.10064412238325, 'loss': 6.115276336669922}


EP_train:0:   9%|| 631/6926 [46:13<7:22:27,  4.22s/it]

{'epoch': 0, 'iter': 630, 'avg_loss': 7.7536875457280034, 'avg_acc': 50.15352614896989, 'loss': 6.218494415283203}


EP_train:0:   9%|| 641/6926 [46:56<7:25:37,  4.25s/it]

{'epoch': 0, 'iter': 640, 'avg_loss': 7.733514373648371, 'avg_acc': 50.195007800312005, 'loss': 6.296043395996094}


EP_train:0:   9%|| 650/6926 [47:39<7:32:14,  4.32s/it]

{'epoch': 0, 'iter': 650, 'avg_loss': 7.7116830579696165, 'avg_acc': 50.22561443932412, 'loss': 6.713423252105713}


EP_train:0:   9%|| 651/6926 [47:40<8:23:53,  4.82s/it]

Saved best retrieval model


EP_train:0:  10%|| 661/6926 [48:25<7:40:28,  4.41s/it]

{'epoch': 0, 'iter': 660, 'avg_loss': 7.693629688286024, 'avg_acc': 50.184379727685325, 'loss': 6.756500244140625}


EP_train:0:  10%|| 671/6926 [49:09<7:46:13,  4.47s/it]

{'epoch': 0, 'iter': 670, 'avg_loss': 7.672980561519344, 'avg_acc': 50.1443740685544, 'loss': 6.087534427642822}


EP_train:0:  10%|| 681/6926 [49:52<7:20:31,  4.23s/it]

{'epoch': 0, 'iter': 680, 'avg_loss': 7.654182978838726, 'avg_acc': 50.13307635829663, 'loss': 6.073360443115234}


EP_train:0:  10%|| 691/6926 [50:34<7:38:01,  4.41s/it]

{'epoch': 0, 'iter': 690, 'avg_loss': 7.635043428534192, 'avg_acc': 50.16732995658466, 'loss': 6.500669479370117}


EP_train:0:  10%|| 700/6926 [51:17<7:27:08,  4.31s/it]

{'epoch': 0, 'iter': 700, 'avg_loss': 7.615971034671713, 'avg_acc': 50.23626961483595, 'loss': 6.216538906097412}


EP_train:0:  10%|| 701/6926 [51:19<8:19:23,  4.81s/it]

Saved best retrieval model


EP_train:0:  10%|| 711/6926 [52:04<7:46:27,  4.50s/it]

{'epoch': 0, 'iter': 710, 'avg_loss': 7.594702308188008, 'avg_acc': 50.215365682137836, 'loss': 6.194437503814697}


EP_train:0:  10%|| 721/6926 [52:49<7:33:57,  4.39s/it]

{'epoch': 0, 'iter': 720, 'avg_loss': 7.577523170661662, 'avg_acc': 50.2123786407767, 'loss': 6.093455791473389}


EP_train:0:  11%|| 731/6926 [53:32<7:36:45,  4.42s/it]

{'epoch': 0, 'iter': 730, 'avg_loss': 7.558755824406072, 'avg_acc': 50.2265731874145, 'loss': 6.035729885101318}


EP_train:0:  11%|| 741/6926 [54:16<7:30:16,  4.37s/it]

{'epoch': 0, 'iter': 740, 'avg_loss': 7.541424859390568, 'avg_acc': 50.24038461538461, 'loss': 5.851457595825195}


EP_train:0:  11%|| 750/6926 [54:58<7:21:43,  4.29s/it]

{'epoch': 0, 'iter': 750, 'avg_loss': 7.522869437734551, 'avg_acc': 50.22470039946738, 'loss': 6.356179714202881}


EP_train:0:  11%|| 751/6926 [55:00<8:03:33,  4.70s/it]

Saved best retrieval model


EP_train:0:  11%|| 761/6926 [55:45<7:46:01,  4.54s/it]

{'epoch': 0, 'iter': 760, 'avg_loss': 7.50497322483539, 'avg_acc': 50.31208935611038, 'loss': 6.24270486831665}


EP_train:0:  11%|| 771/6926 [56:30<7:40:18,  4.49s/it]

{'epoch': 0, 'iter': 770, 'avg_loss': 7.485819889568324, 'avg_acc': 50.360732814526585, 'loss': 6.346202373504639}


EP_train:0:  11%|| 781/6926 [57:12<7:15:41,  4.25s/it]

{'epoch': 0, 'iter': 780, 'avg_loss': 7.468860498005847, 'avg_acc': 50.36811779769527, 'loss': 6.448996543884277}


EP_train:0:  11%|| 791/6926 [57:56<7:35:40,  4.46s/it]

{'epoch': 0, 'iter': 790, 'avg_loss': 7.452080355884152, 'avg_acc': 50.33580910240202, 'loss': 6.2828192710876465}


EP_train:0:  12%|| 800/6926 [58:38<7:17:56,  4.29s/it]

{'epoch': 0, 'iter': 800, 'avg_loss': 7.436083894841531, 'avg_acc': 50.362827715355806, 'loss': 6.434177398681641}


EP_train:0:  12%|| 801/6926 [58:40<8:08:51,  4.79s/it]

Saved best retrieval model


EP_train:0:  12%|| 811/6926 [59:25<7:33:52,  4.45s/it]

{'epoch': 0, 'iter': 810, 'avg_loss': 7.420256092574829, 'avg_acc': 50.31211467324292, 'loss': 6.362661361694336}


EP_train:0:  12%|| 821/6926 [1:00:09<7:13:02,  4.26s/it]

{'epoch': 0, 'iter': 820, 'avg_loss': 7.405882606552812, 'avg_acc': 50.28547503045067, 'loss': 6.234382629394531}


EP_train:0:  12%|| 831/6926 [1:00:52<7:19:52,  4.33s/it]

{'epoch': 0, 'iter': 830, 'avg_loss': 7.390290581649248, 'avg_acc': 50.30460288808665, 'loss': 5.988588333129883}


EP_train:0:  12%|| 841/6926 [1:01:35<7:07:03,  4.21s/it]

{'epoch': 0, 'iter': 840, 'avg_loss': 7.375400180907368, 'avg_acc': 50.30098097502973, 'loss': 5.7018585205078125}


EP_train:0:  12%|| 850/6926 [1:02:19<7:31:37,  4.46s/it]

{'epoch': 0, 'iter': 850, 'avg_loss': 7.359289836659415, 'avg_acc': 50.37455934195064, 'loss': 6.193755149841309}


EP_train:0:  12%|| 851/6926 [1:02:20<8:12:07,  4.86s/it]

Saved best retrieval model


EP_train:0:  12%|| 861/6926 [1:03:06<7:35:53,  4.51s/it]

{'epoch': 0, 'iter': 860, 'avg_loss': 7.345063926173974, 'avg_acc': 50.34843205574913, 'loss': 5.926050662994385}


EP_train:0:  13%|| 871/6926 [1:03:50<7:17:01,  4.33s/it]

{'epoch': 0, 'iter': 870, 'avg_loss': 7.331413930921412, 'avg_acc': 50.32649253731343, 'loss': 6.013798236846924}


EP_train:0:  13%|| 881/6926 [1:04:34<7:12:32,  4.29s/it]

{'epoch': 0, 'iter': 880, 'avg_loss': 7.317966981318428, 'avg_acc': 50.33342792281499, 'loss': 6.030752658843994}


EP_train:0:  13%|| 891/6926 [1:05:16<7:13:02,  4.31s/it]

{'epoch': 0, 'iter': 890, 'avg_loss': 7.303826560075034, 'avg_acc': 50.34722222222222, 'loss': 5.9634552001953125}


EP_train:0:  13%|| 900/6926 [1:05:59<7:22:06,  4.40s/it]

{'epoch': 0, 'iter': 900, 'avg_loss': 7.289087603015455, 'avg_acc': 50.315621531631514, 'loss': 5.8802971839904785}


EP_train:0:  13%|| 901/6926 [1:06:01<8:09:50,  4.88s/it]

Saved best retrieval model


EP_train:0:  13%|| 911/6926 [1:06:47<7:31:07,  4.50s/it]

{'epoch': 0, 'iter': 910, 'avg_loss': 7.27502114058326, 'avg_acc': 50.319017563117455, 'loss': 5.999695301055908}


EP_train:0:  13%|| 921/6926 [1:07:31<7:13:36,  4.33s/it]

{'epoch': 0, 'iter': 920, 'avg_loss': 7.260901007408946, 'avg_acc': 50.366449511400646, 'loss': 6.099306106567383}


EP_train:0:  13%|| 931/6926 [1:08:15<7:14:05,  4.34s/it]

{'epoch': 0, 'iter': 930, 'avg_loss': 7.246011810323221, 'avg_acc': 50.355800214822764, 'loss': 5.957241058349609}


EP_train:0:  14%|| 941/6926 [1:08:58<7:04:54,  4.26s/it]

{'epoch': 0, 'iter': 940, 'avg_loss': 7.233733280558895, 'avg_acc': 50.358660998937296, 'loss': 6.277170658111572}


EP_train:0:  14%|| 950/6926 [1:09:41<7:22:15,  4.44s/it]

{'epoch': 0, 'iter': 950, 'avg_loss': 7.220800435881509, 'avg_acc': 50.35817560462671, 'loss': 5.6109619140625}


EP_train:0:  14%|| 951/6926 [1:09:42<7:56:03,  4.78s/it]

Saved best retrieval model


EP_train:0:  14%|| 961/6926 [1:10:27<7:28:00,  4.51s/it]

{'epoch': 0, 'iter': 960, 'avg_loss': 7.208943904872739, 'avg_acc': 50.32843392299687, 'loss': 6.314985275268555}


EP_train:0:  14%|| 971/6926 [1:11:10<7:04:25,  4.28s/it]

{'epoch': 0, 'iter': 970, 'avg_loss': 7.196581143674841, 'avg_acc': 50.363671472708546, 'loss': 5.829097270965576}


EP_train:0:  14%|| 981/6926 [1:11:54<7:17:25,  4.41s/it]

{'epoch': 0, 'iter': 980, 'avg_loss': 7.185136652624692, 'avg_acc': 50.32810907237513, 'loss': 5.8015899658203125}


EP_train:0:  14%|| 991/6926 [1:12:36<7:00:59,  4.26s/it]

{'epoch': 0, 'iter': 990, 'avg_loss': 7.1734583452419125, 'avg_acc': 50.318491422805245, 'loss': 5.964298725128174}


EP_train:0:  14%|| 1000/6926 [1:13:20<7:01:04,  4.26s/it]

{'epoch': 0, 'iter': 1000, 'avg_loss': 7.160881334965999, 'avg_acc': 50.312187812187815, 'loss': 5.986917018890381}


EP_train:0:  14%|| 1001/6926 [1:13:22<8:08:22,  4.95s/it]

Saved best retrieval model


EP_train:0:  15%|| 1011/6926 [1:14:07<7:17:54,  4.44s/it]

{'epoch': 0, 'iter': 1010, 'avg_loss': 7.148645583529147, 'avg_acc': 50.302917903066266, 'loss': 5.994096279144287}


EP_train:0:  15%|| 1021/6926 [1:14:51<7:06:53,  4.34s/it]

{'epoch': 0, 'iter': 1020, 'avg_loss': 7.137995062332078, 'avg_acc': 50.2968903036239, 'loss': 5.974404335021973}


EP_train:0:  15%|| 1031/6926 [1:15:34<7:15:36,  4.43s/it]

{'epoch': 0, 'iter': 1030, 'avg_loss': 7.1258917553232894, 'avg_acc': 50.315227934044614, 'loss': 5.906978130340576}


EP_train:0:  15%|| 1041/6926 [1:16:17<6:55:22,  4.23s/it]

{'epoch': 0, 'iter': 1040, 'avg_loss': 7.113862044071487, 'avg_acc': 50.28518251681076, 'loss': 5.977720260620117}


EP_train:0:  15%|| 1050/6926 [1:17:00<6:54:47,  4.24s/it]

{'epoch': 0, 'iter': 1050, 'avg_loss': 7.101457202014869, 'avg_acc': 50.279495718363464, 'loss': 5.763434886932373}


EP_train:0:  15%|| 1051/6926 [1:17:02<8:06:35,  4.97s/it]

Saved best retrieval model


EP_train:0:  15%|| 1061/6926 [1:17:47<7:18:37,  4.49s/it]

{'epoch': 0, 'iter': 1060, 'avg_loss': 7.089359555797236, 'avg_acc': 50.238572101790766, 'loss': 5.927086353302002}


EP_train:0:  15%|| 1071/6926 [1:18:32<7:07:51,  4.38s/it]

{'epoch': 0, 'iter': 1070, 'avg_loss': 7.0783302202946, 'avg_acc': 50.224673202614376, 'loss': 6.231943130493164}


EP_train:0:  16%|| 1081/6926 [1:19:14<6:47:47,  4.19s/it]

{'epoch': 0, 'iter': 1080, 'avg_loss': 7.068109515857961, 'avg_acc': 50.222594819611466, 'loss': 5.862454891204834}


EP_train:0:  16%|| 1091/6926 [1:19:57<6:57:34,  4.29s/it]

{'epoch': 0, 'iter': 1090, 'avg_loss': 7.059001859670162, 'avg_acc': 50.19477543538039, 'loss': 5.998575210571289}


EP_train:0:  16%|| 1100/6926 [1:20:40<6:56:05,  4.29s/it]

{'epoch': 0, 'iter': 1100, 'avg_loss': 7.050482921444428, 'avg_acc': 50.20152134423251, 'loss': 6.19914436340332}


EP_train:0:  16%|| 1101/6926 [1:20:42<7:53:43,  4.88s/it]

Saved best retrieval model


EP_train:0:  16%|| 1111/6926 [1:21:27<7:14:01,  4.48s/it]

{'epoch': 0, 'iter': 1110, 'avg_loss': 7.041398447553496, 'avg_acc': 50.18845634563457, 'loss': 5.741978645324707}


EP_train:0:  16%|| 1121/6926 [1:22:11<7:00:26,  4.35s/it]

{'epoch': 0, 'iter': 1120, 'avg_loss': 7.030261126508892, 'avg_acc': 50.200713648528094, 'loss': 6.062503814697266}


EP_train:0:  16%|| 1131/6926 [1:22:54<6:51:00,  4.26s/it]

{'epoch': 0, 'iter': 1130, 'avg_loss': 7.0209630726713925, 'avg_acc': 50.21828028293546, 'loss': 5.812361240386963}


EP_train:0:  16%|| 1141/6926 [1:23:37<6:54:54,  4.30s/it]

{'epoch': 0, 'iter': 1140, 'avg_loss': 7.011217281130925, 'avg_acc': 50.202673093777385, 'loss': 6.36078405380249}


EP_train:0:  17%|| 1150/6926 [1:24:20<6:54:04,  4.30s/it]

{'epoch': 0, 'iter': 1150, 'avg_loss': 7.0012238400589375, 'avg_acc': 50.16561685490878, 'loss': 5.389241695404053}


EP_train:0:  17%|| 1151/6926 [1:24:22<7:35:57,  4.74s/it]

Saved best retrieval model


EP_train:0:  17%|| 1161/6926 [1:25:06<7:01:16,  4.38s/it]

{'epoch': 0, 'iter': 1160, 'avg_loss': 6.991933901077915, 'avg_acc': 50.196490094745904, 'loss': 6.0083794593811035}


EP_train:0:  17%|| 1171/6926 [1:25:51<7:05:20,  4.43s/it]

{'epoch': 0, 'iter': 1170, 'avg_loss': 6.9820668442853995, 'avg_acc': 50.17613151152861, 'loss': 6.098381519317627}


EP_train:0:  17%|| 1181/6926 [1:26:34<6:45:58,  4.24s/it]

{'epoch': 0, 'iter': 1180, 'avg_loss': 6.972683828630052, 'avg_acc': 50.193162574089754, 'loss': 5.99972677230835}


EP_train:0:  17%|| 1191/6926 [1:27:17<6:50:38,  4.30s/it]

{'epoch': 0, 'iter': 1190, 'avg_loss': 6.962981099545906, 'avg_acc': 50.236146095717885, 'loss': 5.438050746917725}


EP_train:0:  17%|| 1200/6926 [1:28:00<6:53:42,  4.33s/it]

{'epoch': 0, 'iter': 1200, 'avg_loss': 6.953642870960982, 'avg_acc': 50.226373855120734, 'loss': 5.843163013458252}


EP_train:0:  17%|| 1201/6926 [1:28:02<7:33:23,  4.75s/it]

Saved best retrieval model


EP_train:0:  17%|| 1211/6926 [1:28:46<7:02:46,  4.44s/it]

{'epoch': 0, 'iter': 1210, 'avg_loss': 6.94512887634983, 'avg_acc': 50.23482658959537, 'loss': 5.8827595710754395}


EP_train:0:  18%|| 1221/6926 [1:29:32<7:07:27,  4.50s/it]

{'epoch': 0, 'iter': 1220, 'avg_loss': 6.936114928255698, 'avg_acc': 50.24570024570024, 'loss': 5.293395042419434}


EP_train:0:  18%|| 1231/6926 [1:30:14<6:46:14,  4.28s/it]

{'epoch': 0, 'iter': 1230, 'avg_loss': 6.927641145394741, 'avg_acc': 50.27162875710805, 'loss': 6.201196670532227}


EP_train:0:  18%|| 1241/6926 [1:30:57<6:51:25,  4.34s/it]

{'epoch': 0, 'iter': 1240, 'avg_loss': 6.919307940434679, 'avg_acc': 50.30469379532635, 'loss': 6.192110061645508}


EP_train:0:  18%|| 1250/6926 [1:31:41<6:55:27,  4.39s/it]

{'epoch': 0, 'iter': 1250, 'avg_loss': 6.91116536321114, 'avg_acc': 50.32224220623501, 'loss': 5.633075714111328}


EP_train:0:  18%|| 1251/6926 [1:31:43<7:40:23,  4.87s/it]

Saved best retrieval model


EP_train:0:  18%|| 1261/6926 [1:32:28<7:04:26,  4.50s/it]

{'epoch': 0, 'iter': 1260, 'avg_loss': 6.9024831010272445, 'avg_acc': 50.28003568596352, 'loss': 5.71074914932251}


EP_train:0:  18%|| 1271/6926 [1:33:11<6:45:43,  4.30s/it]

{'epoch': 0, 'iter': 1270, 'avg_loss': 6.894303694379133, 'avg_acc': 50.28766719118805, 'loss': 6.072355270385742}


EP_train:0:  18%|| 1281/6926 [1:33:54<6:44:36,  4.30s/it]

{'epoch': 0, 'iter': 1280, 'avg_loss': 6.88575996708628, 'avg_acc': 50.28786104605777, 'loss': 5.968165397644043}


EP_train:0:  19%|| 1291/6926 [1:34:37<6:51:06,  4.38s/it]

{'epoch': 0, 'iter': 1290, 'avg_loss': 6.8773431449228255, 'avg_acc': 50.273528272656854, 'loss': 6.440834045410156}


EP_train:0:  19%|| 1300/6926 [1:35:21<6:53:30,  4.41s/it]

{'epoch': 0, 'iter': 1300, 'avg_loss': 6.869656455782539, 'avg_acc': 50.257013835511145, 'loss': 5.718069076538086}


EP_train:0:  19%|| 1301/6926 [1:35:22<7:30:51,  4.81s/it]

Saved best retrieval model


EP_train:0:  19%|| 1311/6926 [1:36:08<7:03:58,  4.53s/it]

{'epoch': 0, 'iter': 1310, 'avg_loss': 6.86256950598105, 'avg_acc': 50.247902364607164, 'loss': 5.987573146820068}


EP_train:0:  19%|| 1321/6926 [1:36:51<6:35:46,  4.24s/it]

{'epoch': 0, 'iter': 1320, 'avg_loss': 6.854588245822118, 'avg_acc': 50.22946631339894, 'loss': 5.840002536773682}


EP_train:0:  19%|| 1331/6926 [1:37:35<6:47:41,  4.37s/it]

{'epoch': 0, 'iter': 1330, 'avg_loss': 6.846933047096144, 'avg_acc': 50.25356874530428, 'loss': 5.883354663848877}


EP_train:0:  19%|| 1341/6926 [1:38:17<6:34:36,  4.24s/it]

{'epoch': 0, 'iter': 1340, 'avg_loss': 6.838906746849385, 'avg_acc': 50.21206189410887, 'loss': 5.26711893081665}


EP_train:0:  19%|| 1350/6926 [1:39:00<6:34:49,  4.25s/it]

{'epoch': 0, 'iter': 1350, 'avg_loss': 6.831764542730008, 'avg_acc': 50.20586602516654, 'loss': 5.829281806945801}


EP_train:0:  20%|| 1351/6926 [1:39:02<7:23:52,  4.78s/it]

Saved best retrieval model


EP_train:0:  20%|| 1361/6926 [1:39:47<7:03:51,  4.57s/it]

{'epoch': 0, 'iter': 1360, 'avg_loss': 6.8250206177239905, 'avg_acc': 50.22042615723733, 'loss': 5.112452983856201}


EP_train:0:  20%|| 1371/6926 [1:40:30<6:32:59,  4.24s/it]

{'epoch': 0, 'iter': 1370, 'avg_loss': 6.818632273670533, 'avg_acc': 50.22109773887673, 'loss': 5.6088714599609375}


EP_train:0:  20%|| 1381/6926 [1:41:14<6:46:37,  4.40s/it]

{'epoch': 0, 'iter': 1380, 'avg_loss': 6.810885889300568, 'avg_acc': 50.219496741491675, 'loss': 5.527835369110107}


EP_train:0:  20%|| 1391/6926 [1:41:56<6:28:24,  4.21s/it]

{'epoch': 0, 'iter': 1390, 'avg_loss': 6.804720819981984, 'avg_acc': 50.21791876347951, 'loss': 5.893643856048584}


EP_train:0:  20%|| 1400/6926 [1:42:39<6:31:51,  4.25s/it]

{'epoch': 0, 'iter': 1400, 'avg_loss': 6.798516391942027, 'avg_acc': 50.234207708779444, 'loss': 6.114810943603516}


EP_train:0:  20%|| 1401/6926 [1:42:41<7:43:15,  5.03s/it]

Saved best retrieval model


EP_train:0:  20%|| 1411/6926 [1:43:26<6:44:41,  4.40s/it]

{'epoch': 0, 'iter': 1410, 'avg_loss': 6.791167186558627, 'avg_acc': 50.234762579730685, 'loss': 6.045042514801025}


EP_train:0:  21%|| 1421/6926 [1:44:10<6:39:10,  4.35s/it]

{'epoch': 0, 'iter': 1420, 'avg_loss': 6.784784095879594, 'avg_acc': 50.24850457424349, 'loss': 5.544323444366455}


EP_train:0:  21%|| 1431/6926 [1:44:53<6:34:20,  4.31s/it]

{'epoch': 0, 'iter': 1430, 'avg_loss': 6.778532150823032, 'avg_acc': 50.270789657582114, 'loss': 5.927574157714844}


EP_train:0:  21%|| 1441/6926 [1:45:36<6:32:10,  4.29s/it]

{'epoch': 0, 'iter': 1440, 'avg_loss': 6.772601140529889, 'avg_acc': 50.312283136710626, 'loss': 6.163679599761963}


EP_train:0:  21%|| 1450/6926 [1:46:20<6:28:27,  4.26s/it]

{'epoch': 0, 'iter': 1450, 'avg_loss': 6.766662626575388, 'avg_acc': 50.307977257064096, 'loss': 5.664700984954834}


EP_train:0:  21%|| 1451/6926 [1:46:21<7:32:36,  4.96s/it]

Saved best retrieval model


EP_train:0:  21%|| 1461/6926 [1:47:06<6:45:49,  4.46s/it]

{'epoch': 0, 'iter': 1460, 'avg_loss': 6.759358662599411, 'avg_acc': 50.3187029431896, 'loss': 5.088288307189941}


EP_train:0:  21%|| 1471/6926 [1:47:51<6:34:12,  4.34s/it]

{'epoch': 0, 'iter': 1470, 'avg_loss': 6.755225784514406, 'avg_acc': 50.348402447314754, 'loss': 6.119655132293701}


EP_train:0:  21%|| 1481/6926 [1:48:33<6:18:55,  4.18s/it]

{'epoch': 0, 'iter': 1480, 'avg_loss': 6.749019250283766, 'avg_acc': 50.384031060094536, 'loss': 5.860291004180908}


EP_train:0:  22%|| 1491/6926 [1:49:16<6:27:54,  4.28s/it]

{'epoch': 0, 'iter': 1490, 'avg_loss': 6.744062917493159, 'avg_acc': 50.40451039570758, 'loss': 5.760251522064209}


EP_train:0:  22%|| 1500/6926 [1:49:59<6:28:48,  4.30s/it]

{'epoch': 0, 'iter': 1500, 'avg_loss': 6.738279732126621, 'avg_acc': 50.39556962025317, 'loss': 5.72380256652832}


EP_train:0:  22%|| 1501/6926 [1:50:01<7:07:39,  4.73s/it]

Saved best retrieval model


EP_train:0:  22%|| 1511/6926 [1:50:45<6:37:04,  4.40s/it]

{'epoch': 0, 'iter': 1510, 'avg_loss': 6.732605637184916, 'avg_acc': 50.39708802117803, 'loss': 6.08982515335083}


EP_train:0:  22%|| 1521/6926 [1:51:30<6:43:29,  4.48s/it]

{'epoch': 0, 'iter': 1520, 'avg_loss': 6.725798682113136, 'avg_acc': 50.38214990138067, 'loss': 5.23518180847168}


EP_train:0:  22%|| 1531/6926 [1:52:12<6:18:59,  4.21s/it]

{'epoch': 0, 'iter': 1530, 'avg_loss': 6.719122153804786, 'avg_acc': 50.4041476159373, 'loss': 5.734588146209717}


EP_train:0:  22%|| 1541/6926 [1:52:54<6:17:44,  4.21s/it]

{'epoch': 0, 'iter': 1540, 'avg_loss': 6.713256712473498, 'avg_acc': 50.38530175210902, 'loss': 5.917321681976318}


EP_train:0:  22%|| 1550/6926 [1:53:37<6:26:47,  4.32s/it]

{'epoch': 0, 'iter': 1550, 'avg_loss': 6.707162568063447, 'avg_acc': 50.392891682785304, 'loss': 6.052020072937012}


EP_train:0:  22%|| 1551/6926 [1:53:39<7:14:01,  4.84s/it]

Saved best retrieval model


EP_train:0:  23%|| 1561/6926 [1:54:24<6:50:59,  4.60s/it]

{'epoch': 0, 'iter': 1560, 'avg_loss': 6.7024020678259335, 'avg_acc': 50.36835361947469, 'loss': 5.874720096588135}


EP_train:0:  23%|| 1571/6926 [1:55:07<6:20:10,  4.26s/it]

{'epoch': 0, 'iter': 1570, 'avg_loss': 6.696387818783882, 'avg_acc': 50.34412794398472, 'loss': 5.6764984130859375}


EP_train:0:  23%|| 1581/6926 [1:55:51<6:29:44,  4.37s/it]

{'epoch': 0, 'iter': 1580, 'avg_loss': 6.691885214678326, 'avg_acc': 50.363693864642634, 'loss': 5.651889801025391}


EP_train:0:  23%|| 1591/6926 [1:56:33<6:16:32,  4.23s/it]

{'epoch': 0, 'iter': 1590, 'avg_loss': 6.686401524534771, 'avg_acc': 50.381049654305464, 'loss': 5.990461349487305}


EP_train:0:  23%|| 1600/6926 [1:57:17<6:20:19,  4.28s/it]

{'epoch': 0, 'iter': 1600, 'avg_loss': 6.681555516864269, 'avg_acc': 50.39038101186758, 'loss': 6.5478363037109375}


EP_train:0:  23%|| 1601/6926 [1:57:19<7:24:41,  5.01s/it]

Saved best retrieval model


EP_train:0:  23%|| 1611/6926 [1:58:03<6:29:54,  4.40s/it]

{'epoch': 0, 'iter': 1610, 'avg_loss': 6.676237914667301, 'avg_acc': 50.38601800124146, 'loss': 5.719473361968994}


EP_train:0:  23%|| 1621/6926 [1:58:48<6:21:42,  4.32s/it]

{'epoch': 0, 'iter': 1620, 'avg_loss': 6.670923581320441, 'avg_acc': 50.3778531770512, 'loss': 5.77672004699707}


EP_train:0:  24%|| 1631/6926 [1:59:32<6:35:07,  4.48s/it]

{'epoch': 0, 'iter': 1630, 'avg_loss': 6.6656049082865385, 'avg_acc': 50.39278050275904, 'loss': 5.978705883026123}


EP_train:0:  24%|| 1641/6926 [2:00:15<6:16:53,  4.28s/it]

{'epoch': 0, 'iter': 1640, 'avg_loss': 6.660366784211414, 'avg_acc': 50.40181291895186, 'loss': 5.923183441162109}


EP_train:0:  24%|| 1650/6926 [2:00:59<6:14:55,  4.26s/it]

{'epoch': 0, 'iter': 1650, 'avg_loss': 6.654836878207581, 'avg_acc': 50.414521502119925, 'loss': 5.469391822814941}


EP_train:0:  24%|| 1651/6926 [2:01:01<7:10:38,  4.90s/it]

Saved best retrieval model


EP_train:0:  24%|| 1661/6926 [2:01:45<6:33:08,  4.48s/it]

{'epoch': 0, 'iter': 1660, 'avg_loss': 6.650366195779762, 'avg_acc': 50.42519566526189, 'loss': 5.7305073738098145}


EP_train:0:  24%|| 1671/6926 [2:02:30<6:15:45,  4.29s/it]

{'epoch': 0, 'iter': 1670, 'avg_loss': 6.644989363523674, 'avg_acc': 50.43948234590066, 'loss': 5.8946533203125}


EP_train:0:  24%|| 1681/6926 [2:03:13<6:28:57,  4.45s/it]

{'epoch': 0, 'iter': 1680, 'avg_loss': 6.640289670863654, 'avg_acc': 50.43500892325996, 'loss': 5.794483661651611}


EP_train:0:  24%|| 1691/6926 [2:03:56<6:10:15,  4.24s/it]

{'epoch': 0, 'iter': 1690, 'avg_loss': 6.635143175130762, 'avg_acc': 50.41210821998817, 'loss': 5.549271583557129}


EP_train:0:  25%|| 1700/6926 [2:04:39<6:11:12,  4.26s/it]

{'epoch': 0, 'iter': 1700, 'avg_loss': 6.6291854024143655, 'avg_acc': 50.380291005291, 'loss': 5.387052536010742}


EP_train:0:  25%|| 1701/6926 [2:04:41<7:04:17,  4.87s/it]

Saved best retrieval model


EP_train:0:  25%|| 1711/6926 [2:05:26<6:26:38,  4.45s/it]

{'epoch': 0, 'iter': 1710, 'avg_loss': 6.623897965507574, 'avg_acc': 50.39267971946231, 'loss': 6.010360240936279}


EP_train:0:  25%|| 1721/6926 [2:06:11<6:23:14,  4.42s/it]

{'epoch': 0, 'iter': 1720, 'avg_loss': 6.619659194414315, 'avg_acc': 50.377687391051715, 'loss': 6.440831661224365}


EP_train:0:  25%|| 1731/6926 [2:06:53<6:11:09,  4.29s/it]

{'epoch': 0, 'iter': 1730, 'avg_loss': 6.615138777276122, 'avg_acc': 50.36286828422877, 'loss': 5.931415557861328}


EP_train:0:  25%|| 1741/6926 [2:07:37<6:12:26,  4.31s/it]

{'epoch': 0, 'iter': 1740, 'avg_loss': 6.6098141385384634, 'avg_acc': 50.362578977599085, 'loss': 5.655142784118652}


EP_train:0:  25%|| 1750/6926 [2:08:19<6:06:25,  4.25s/it]

{'epoch': 0, 'iter': 1750, 'avg_loss': 6.606659919176695, 'avg_acc': 50.342661336379216, 'loss': 5.450732231140137}


EP_train:0:  25%|| 1751/6926 [2:08:21<7:02:22,  4.90s/it]

Saved best retrieval model


EP_train:0:  25%|| 1761/6926 [2:09:06<6:20:24,  4.42s/it]

{'epoch': 0, 'iter': 1760, 'avg_loss': 6.601827114837404, 'avg_acc': 50.34071550255537, 'loss': 5.293978691101074}


EP_train:0:  26%|| 1771/6926 [2:09:50<6:13:12,  4.34s/it]

{'epoch': 0, 'iter': 1770, 'avg_loss': 6.597281156859783, 'avg_acc': 50.34761434217956, 'loss': 6.178958415985107}


EP_train:0:  26%|| 1781/6926 [2:10:34<6:05:53,  4.27s/it]

{'epoch': 0, 'iter': 1780, 'avg_loss': 6.592671956057497, 'avg_acc': 50.32811622683886, 'loss': 5.815314769744873}


EP_train:0:  26%|| 1791/6926 [2:11:16<6:09:14,  4.31s/it]

{'epoch': 0, 'iter': 1790, 'avg_loss': 6.5879663513200635, 'avg_acc': 50.305346175321056, 'loss': 6.194333076477051}


EP_train:0:  26%|| 1800/6926 [2:12:00<6:05:39,  4.28s/it]

{'epoch': 0, 'iter': 1800, 'avg_loss': 6.584102093677002, 'avg_acc': 50.279358689616885, 'loss': 6.365004062652588}


EP_train:0:  26%|| 1801/6926 [2:12:01<6:49:49,  4.80s/it]

Saved best retrieval model


EP_train:0:  26%|| 1811/6926 [2:12:46<6:17:23,  4.43s/it]

{'epoch': 0, 'iter': 1810, 'avg_loss': 6.580012137425137, 'avg_acc': 50.284718387631145, 'loss': 6.168032646179199}


EP_train:0:  26%|| 1821/6926 [2:13:30<6:10:17,  4.35s/it]

{'epoch': 0, 'iter': 1820, 'avg_loss': 6.57599076852898, 'avg_acc': 50.27972267984624, 'loss': 6.119528293609619}


EP_train:0:  26%|| 1831/6926 [2:14:12<5:58:08,  4.22s/it]

{'epoch': 0, 'iter': 1830, 'avg_loss': 6.571321536386553, 'avg_acc': 50.273074822501364, 'loss': 5.71048641204834}


EP_train:0:  27%|| 1841/6926 [2:14:56<6:11:31,  4.38s/it]

{'epoch': 0, 'iter': 1840, 'avg_loss': 6.566627792791463, 'avg_acc': 50.29026344378056, 'loss': 5.74479341506958}


EP_train:0:  27%|| 1850/6926 [2:15:38<6:09:49,  4.37s/it]

{'epoch': 0, 'iter': 1850, 'avg_loss': 6.56280013099353, 'avg_acc': 50.290383576445166, 'loss': 6.395748615264893}


EP_train:0:  27%|| 1851/6926 [2:15:40<6:53:53,  4.89s/it]

Saved best retrieval model


EP_train:0:  27%|| 1861/6926 [2:16:26<6:20:10,  4.50s/it]

{'epoch': 0, 'iter': 1860, 'avg_loss': 6.559245966122651, 'avg_acc': 50.282106394411606, 'loss': 6.02446985244751}


EP_train:0:  27%|| 1871/6926 [2:17:10<6:10:12,  4.39s/it]

{'epoch': 0, 'iter': 1870, 'avg_loss': 6.555020809428441, 'avg_acc': 50.308992517370385, 'loss': 5.6470537185668945}


EP_train:0:  27%|| 1881/6926 [2:17:52<5:54:47,  4.22s/it]

{'epoch': 0, 'iter': 1880, 'avg_loss': 6.551124362854296, 'avg_acc': 50.305688463583195, 'loss': 5.969376564025879}


EP_train:0:  27%|| 1891/6926 [2:18:36<6:08:52,  4.40s/it]

{'epoch': 0, 'iter': 1890, 'avg_loss': 6.546781213872336, 'avg_acc': 50.30241935483871, 'loss': 5.5698089599609375}


EP_train:0:  27%|| 1900/6926 [2:19:18<5:58:19,  4.28s/it]

{'epoch': 0, 'iter': 1900, 'avg_loss': 6.543100018930209, 'avg_acc': 50.29260915307733, 'loss': 5.721053123474121}


EP_train:0:  27%|| 1901/6926 [2:19:20<6:35:37,  4.72s/it]

Saved best retrieval model


EP_train:0:  28%|| 1911/6926 [2:20:05<6:14:12,  4.48s/it]

{'epoch': 0, 'iter': 1910, 'avg_loss': 6.539375448900639, 'avg_acc': 50.31070120355835, 'loss': 6.0144500732421875}


EP_train:0:  28%|| 1921/6926 [2:20:49<6:00:24,  4.32s/it]

{'epoch': 0, 'iter': 1920, 'avg_loss': 6.536183623334258, 'avg_acc': 50.29932326913066, 'loss': 5.959018707275391}


EP_train:0:  28%|| 1931/6926 [2:21:32<5:57:27,  4.29s/it]

{'epoch': 0, 'iter': 1930, 'avg_loss': 6.532822220267439, 'avg_acc': 50.296154842050754, 'loss': 6.161360740661621}


EP_train:0:  28%|| 1941/6926 [2:22:15<5:55:11,  4.28s/it]

{'epoch': 0, 'iter': 1940, 'avg_loss': 6.529408343117118, 'avg_acc': 50.30911901081917, 'loss': 6.036574363708496}


EP_train:0:  28%|| 1950/6926 [2:22:58<6:02:03,  4.37s/it]

{'epoch': 0, 'iter': 1950, 'avg_loss': 6.526086287887081, 'avg_acc': 50.31233982573039, 'loss': 5.956161022186279}


EP_train:0:  28%|| 1951/6926 [2:23:00<6:50:09,  4.95s/it]

Saved best retrieval model


EP_train:0:  28%|| 1961/6926 [2:23:45<6:13:33,  4.51s/it]

{'epoch': 0, 'iter': 1960, 'avg_loss': 6.521732927766885, 'avg_acc': 50.3139342172361, 'loss': 5.874303817749023}


EP_train:0:  28%|| 1971/6926 [2:24:29<5:55:56,  4.31s/it]

{'epoch': 0, 'iter': 1970, 'avg_loss': 6.518047782988914, 'avg_acc': 50.31709791983765, 'loss': 6.087735652923584}


EP_train:0:  29%|| 1981/6926 [2:25:13<5:57:30,  4.34s/it]

{'epoch': 0, 'iter': 1980, 'avg_loss': 6.513850540419169, 'avg_acc': 50.31234225138819, 'loss': 5.513937950134277}


EP_train:0:  29%|| 1991/6926 [2:25:56<5:58:28,  4.36s/it]

{'epoch': 0, 'iter': 1990, 'avg_loss': 6.511422571855714, 'avg_acc': 50.30920391762933, 'loss': 6.152897834777832}


EP_train:0:  29%|| 2000/6926 [2:26:39<5:54:21,  4.32s/it]

{'epoch': 0, 'iter': 2000, 'avg_loss': 6.507826743871793, 'avg_acc': 50.304535232383806, 'loss': 5.407118797302246}


EP_train:0:  29%|| 2001/6926 [2:26:42<7:16:02,  5.31s/it]

Saved best retrieval model


EP_train:0:  29%|| 2011/6926 [2:27:27<6:09:01,  4.50s/it]

{'epoch': 0, 'iter': 2010, 'avg_loss': 6.505494876977284, 'avg_acc': 50.29214321233218, 'loss': 5.795477390289307}


EP_train:0:  29%|| 2021/6926 [2:28:13<6:12:27,  4.56s/it]

{'epoch': 0, 'iter': 2020, 'avg_loss': 6.50237686123251, 'avg_acc': 50.2906976744186, 'loss': 5.730064868927002}


EP_train:0:  29%|| 2031/6926 [2:28:57<5:54:16,  4.34s/it]

{'epoch': 0, 'iter': 2030, 'avg_loss': 6.498539589356461, 'avg_acc': 50.312346134908914, 'loss': 5.266168117523193}


EP_train:0:  29%|| 2041/6926 [2:29:40<5:47:24,  4.27s/it]

{'epoch': 0, 'iter': 2040, 'avg_loss': 6.494783816881942, 'avg_acc': 50.34450024497795, 'loss': 5.9767165184021}


EP_train:0:  30%|| 2050/6926 [2:30:23<5:53:26,  4.35s/it]

{'epoch': 0, 'iter': 2050, 'avg_loss': 6.492308302311593, 'avg_acc': 50.347391516333495, 'loss': 5.979189872741699}


EP_train:0:  30%|| 2051/6926 [2:30:25<6:28:58,  4.79s/it]

Saved best retrieval model


EP_train:0:  30%|| 2061/6926 [2:31:10<6:02:09,  4.47s/it]

{'epoch': 0, 'iter': 2060, 'avg_loss': 6.489881361878537, 'avg_acc': 50.35025473071325, 'loss': 6.020854949951172}


EP_train:0:  30%|| 2071/6926 [2:31:54<6:00:33,  4.46s/it]

{'epoch': 0, 'iter': 2070, 'avg_loss': 6.486170269106391, 'avg_acc': 50.345545630130374, 'loss': 5.933510780334473}


EP_train:0:  30%|| 2081/6926 [2:32:37<5:45:32,  4.28s/it]

{'epoch': 0, 'iter': 2080, 'avg_loss': 6.482547104215461, 'avg_acc': 50.35139356078808, 'loss': 5.755361557006836}


EP_train:0:  30%|| 2091/6926 [2:33:21<5:56:15,  4.42s/it]

{'epoch': 0, 'iter': 2090, 'avg_loss': 6.479303889956444, 'avg_acc': 50.34522955523673, 'loss': 5.993529796600342}


EP_train:0:  30%|| 2100/6926 [2:34:03<5:43:30,  4.27s/it]

{'epoch': 0, 'iter': 2100, 'avg_loss': 6.476558638322586, 'avg_acc': 50.352510709186106, 'loss': 5.5897698402404785}


EP_train:0:  30%|| 2101/6926 [2:34:10<8:25:36,  6.29s/it]

Saved best retrieval model


EP_train:0:  30%|| 2111/6926 [2:34:54<5:57:35,  4.46s/it]

{'epoch': 0, 'iter': 2110, 'avg_loss': 6.473904952343354, 'avg_acc': 50.37008526764567, 'loss': 5.745266437530518}


EP_train:0:  31%|| 2121/6926 [2:35:39<5:57:53,  4.47s/it]

{'epoch': 0, 'iter': 2120, 'avg_loss': 6.4706863812937145, 'avg_acc': 50.39191419141914, 'loss': 5.835095405578613}


EP_train:0:  31%|| 2131/6926 [2:36:24<5:48:35,  4.36s/it]

{'epoch': 0, 'iter': 2130, 'avg_loss': 6.467656868827673, 'avg_acc': 50.38567573908963, 'loss': 5.885980606079102}


EP_train:0:  31%|| 2141/6926 [2:37:07<5:46:02,  4.34s/it]

{'epoch': 0, 'iter': 2140, 'avg_loss': 6.4646438269457045, 'avg_acc': 50.38679355441382, 'loss': 5.447040557861328}


EP_train:0:  31%|| 2150/6926 [2:37:50<5:46:06,  4.35s/it]

{'epoch': 0, 'iter': 2150, 'avg_loss': 6.461426742461492, 'avg_acc': 50.37482566248257, 'loss': 5.592595100402832}


EP_train:0:  31%|| 2151/6926 [2:37:55<7:22:15,  5.56s/it]

Saved best retrieval model


EP_train:0:  31%|| 2161/6926 [2:38:39<5:56:12,  4.49s/it]

{'epoch': 0, 'iter': 2160, 'avg_loss': 6.4584904537880545, 'avg_acc': 50.37164507172606, 'loss': 6.173855781555176}


EP_train:0:  31%|| 2171/6926 [2:39:25<5:55:09,  4.48s/it]

{'epoch': 0, 'iter': 2170, 'avg_loss': 6.4552457687328735, 'avg_acc': 50.349781206817134, 'loss': 5.406325817108154}


EP_train:0:  31%|| 2181/6926 [2:40:09<5:39:41,  4.30s/it]

{'epoch': 0, 'iter': 2180, 'avg_loss': 6.4526710298180525, 'avg_acc': 50.34817744154058, 'loss': 6.239919662475586}


EP_train:0:  32%|| 2191/6926 [2:40:53<5:45:22,  4.38s/it]

{'epoch': 0, 'iter': 2190, 'avg_loss': 6.4491691985450235, 'avg_acc': 50.34230944774076, 'loss': 6.032062530517578}


EP_train:0:  32%|| 2200/6926 [2:41:35<5:36:30,  4.27s/it]

{'epoch': 0, 'iter': 2200, 'avg_loss': 6.4466503726954025, 'avg_acc': 50.3478532485234, 'loss': 5.880181789398193}


EP_train:0:  32%|| 2201/6926 [2:41:42<8:24:37,  6.41s/it]

Saved best retrieval model


EP_train:0:  32%|| 2211/6926 [2:42:27<5:51:31,  4.47s/it]

{'epoch': 0, 'iter': 2210, 'avg_loss': 6.444003278990236, 'avg_acc': 50.34203980099502, 'loss': 5.877943992614746}


EP_train:0:  32%|| 2221/6926 [2:43:13<6:03:07,  4.63s/it]

{'epoch': 0, 'iter': 2220, 'avg_loss': 6.440858920164765, 'avg_acc': 50.3475348941918, 'loss': 5.704591751098633}


EP_train:0:  32%|| 2231/6926 [2:43:57<5:45:42,  4.42s/it]

{'epoch': 0, 'iter': 2230, 'avg_loss': 6.438072107341566, 'avg_acc': 50.3375728372927, 'loss': 5.660091876983643}


EP_train:0:  32%|| 2241/6926 [2:44:41<5:35:12,  4.29s/it]

{'epoch': 0, 'iter': 2240, 'avg_loss': 6.434854425739252, 'avg_acc': 50.34303882195449, 'loss': 5.564455032348633}


EP_train:0:  32%|| 2250/6926 [2:45:24<5:32:13,  4.26s/it]

{'epoch': 0, 'iter': 2250, 'avg_loss': 6.4325812238949975, 'avg_acc': 50.35678587294535, 'loss': 5.858077526092529}


EP_train:0:  33%|| 2251/6926 [2:45:32<8:49:30,  6.80s/it]

Saved best retrieval model


EP_train:0:  33%|| 2261/6926 [2:46:17<5:53:47,  4.55s/it]

{'epoch': 0, 'iter': 2260, 'avg_loss': 6.430658435357561, 'avg_acc': 50.34691508182221, 'loss': 5.725393772125244}


EP_train:0:  33%|| 2271/6926 [2:47:03<6:03:24,  4.68s/it]

{'epoch': 0, 'iter': 2270, 'avg_loss': 6.427816837802956, 'avg_acc': 50.32749889916336, 'loss': 5.719665050506592}


EP_train:0:  33%|| 2281/6926 [2:47:47<5:37:03,  4.35s/it]

{'epoch': 0, 'iter': 2280, 'avg_loss': 6.425469509711761, 'avg_acc': 50.33702323542306, 'loss': 5.815391540527344}


EP_train:0:  33%|| 2291/6926 [2:48:31<5:32:14,  4.30s/it]

{'epoch': 0, 'iter': 2290, 'avg_loss': 6.42270066653834, 'avg_acc': 50.33555216062855, 'loss': 5.8050103187561035}


EP_train:0:  33%|| 2300/6926 [2:49:13<5:30:47,  4.29s/it]

{'epoch': 0, 'iter': 2300, 'avg_loss': 6.41951293253163, 'avg_acc': 50.315080399826165, 'loss': 5.657952785491943}


EP_train:0:  33%|| 2301/6926 [2:49:19<7:51:52,  6.12s/it]

Saved best retrieval model


EP_train:0:  33%|| 2311/6926 [2:50:03<5:41:40,  4.44s/it]

{'epoch': 0, 'iter': 2310, 'avg_loss': 6.417114644069168, 'avg_acc': 50.3272392903505, 'loss': 5.988988399505615}


EP_train:0:  34%|| 2321/6926 [2:50:48<5:42:47,  4.47s/it]

{'epoch': 0, 'iter': 2320, 'avg_loss': 6.414860261097801, 'avg_acc': 50.33660060318829, 'loss': 6.195119380950928}


EP_train:0:  34%|| 2331/6926 [2:51:33<5:33:30,  4.35s/it]

{'epoch': 0, 'iter': 2330, 'avg_loss': 6.412593746635342, 'avg_acc': 50.33917846417847, 'loss': 5.524521350860596}


EP_train:0:  34%|| 2341/6926 [2:52:16<5:29:20,  4.31s/it]

{'epoch': 0, 'iter': 2340, 'avg_loss': 6.409894873982869, 'avg_acc': 50.351078598889366, 'loss': 5.318197250366211}


EP_train:0:  34%|| 2350/6926 [2:52:58<5:20:40,  4.20s/it]

{'epoch': 0, 'iter': 2350, 'avg_loss': 6.407354502107984, 'avg_acc': 50.35888983411314, 'loss': 5.7339653968811035}


EP_train:0:  34%|| 2351/6926 [2:53:04<7:39:26,  6.03s/it]

Saved best retrieval model


EP_train:0:  34%|| 2361/6926 [2:53:47<5:33:19,  4.38s/it]

{'epoch': 0, 'iter': 2360, 'avg_loss': 6.404431118494573, 'avg_acc': 50.352075391783146, 'loss': 5.385279178619385}


EP_train:0:  34%|| 2371/6926 [2:54:32<5:35:59,  4.43s/it]

{'epoch': 0, 'iter': 2370, 'avg_loss': 6.400972929246212, 'avg_acc': 50.34268241248419, 'loss': 5.308396816253662}


EP_train:0:  34%|| 2381/6926 [2:55:16<5:26:32,  4.31s/it]

{'epoch': 0, 'iter': 2380, 'avg_loss': 6.3987410259767525, 'avg_acc': 50.3412431751365, 'loss': 6.012655735015869}


EP_train:0:  35%|| 2391/6926 [2:55:59<5:22:19,  4.26s/it]

{'epoch': 0, 'iter': 2390, 'avg_loss': 6.395933697882779, 'avg_acc': 50.329360100376405, 'loss': 5.680666446685791}


EP_train:0:  35%|| 2400/6926 [2:56:42<5:25:42,  4.32s/it]

{'epoch': 0, 'iter': 2400, 'avg_loss': 6.3932142215984555, 'avg_acc': 50.34881299458559, 'loss': 5.505230903625488}


EP_train:0:  35%|| 2401/6926 [2:56:49<8:15:33,  6.57s/it]

Saved best retrieval model


EP_train:0:  35%|| 2411/6926 [2:57:33<5:33:41,  4.43s/it]

{'epoch': 0, 'iter': 2410, 'avg_loss': 6.390598609209159, 'avg_acc': 50.32922024056408, 'loss': 6.136170387268066}


EP_train:0:  35%|| 2421/6926 [2:58:18<5:33:33,  4.44s/it]

{'epoch': 0, 'iter': 2420, 'avg_loss': 6.387776983222505, 'avg_acc': 50.334314332920286, 'loss': 5.256832122802734}


EP_train:0:  35%|| 2431/6926 [2:59:02<5:20:38,  4.28s/it]

{'epoch': 0, 'iter': 2430, 'avg_loss': 6.385867526628313, 'avg_acc': 50.3162278897573, 'loss': 6.280676364898682}


EP_train:0:  35%|| 2441/6926 [2:59:45<5:19:32,  4.27s/it]

{'epoch': 0, 'iter': 2440, 'avg_loss': 6.384225848267089, 'avg_acc': 50.32901474805408, 'loss': 6.081324577331543}


EP_train:0:  35%|| 2450/6926 [3:00:27<5:18:29,  4.27s/it]

{'epoch': 0, 'iter': 2450, 'avg_loss': 6.38171061434487, 'avg_acc': 50.33404732762138, 'loss': 5.518251419067383}


EP_train:0:  35%|| 2451/6926 [3:00:32<6:58:24,  5.61s/it]

Saved best retrieval model


EP_train:0:  36%|| 2461/6926 [3:01:15<5:27:26,  4.40s/it]

{'epoch': 0, 'iter': 2460, 'avg_loss': 6.37944900945937, 'avg_acc': 50.349197480698905, 'loss': 5.883328437805176}


EP_train:0:  36%|| 2471/6926 [3:02:01<5:31:50,  4.47s/it]

{'epoch': 0, 'iter': 2470, 'avg_loss': 6.377334275548635, 'avg_acc': 50.33640226628895, 'loss': 5.4878926277160645}


EP_train:0:  36%|| 2481/6926 [3:02:44<5:18:20,  4.30s/it]

{'epoch': 0, 'iter': 2480, 'avg_loss': 6.375003905797956, 'avg_acc': 50.31993147924224, 'loss': 5.625571250915527}


EP_train:0:  36%|| 2491/6926 [3:03:27<5:13:23,  4.24s/it]

{'epoch': 0, 'iter': 2490, 'avg_loss': 6.371975160420348, 'avg_acc': 50.324919710959456, 'loss': 5.656708717346191}


EP_train:0:  36%|| 2500/6926 [3:04:09<5:13:26,  4.25s/it]

{'epoch': 0, 'iter': 2500, 'avg_loss': 6.369313337859131, 'avg_acc': 50.31737305077969, 'loss': 5.900962829589844}


EP_train:0:  36%|| 2501/6926 [3:04:17<8:01:00,  6.52s/it]

Saved best retrieval model


EP_train:0:  36%|| 2511/6926 [3:05:00<5:24:08,  4.40s/it]

{'epoch': 0, 'iter': 2510, 'avg_loss': 6.367466335038368, 'avg_acc': 50.31859816806053, 'loss': 5.603295803070068}


EP_train:0:  36%|| 2521/6926 [3:05:46<5:29:12,  4.48s/it]

{'epoch': 0, 'iter': 2520, 'avg_loss': 6.365100842815976, 'avg_acc': 50.32353232844109, 'loss': 5.7093825340271}


EP_train:0:  37%|| 2531/6926 [3:06:30<5:16:52,  4.33s/it]

{'epoch': 0, 'iter': 2530, 'avg_loss': 6.363211764784398, 'avg_acc': 50.33089687870407, 'loss': 5.36748743057251}


EP_train:0:  37%|| 2541/6926 [3:07:12<5:12:48,  4.28s/it]

{'epoch': 0, 'iter': 2540, 'avg_loss': 6.361575120079944, 'avg_acc': 50.31606650924832, 'loss': 6.143217086791992}


EP_train:0:  37%|| 2550/6926 [3:07:55<5:17:53,  4.36s/it]

{'epoch': 0, 'iter': 2550, 'avg_loss': 6.359225526037332, 'avg_acc': 50.302577420619365, 'loss': 5.140578269958496}


EP_train:0:  37%|| 2551/6926 [3:08:00<7:09:51,  5.90s/it]

Saved best retrieval model


EP_train:0:  37%|| 2561/6926 [3:08:44<5:20:51,  4.41s/it]

{'epoch': 0, 'iter': 2560, 'avg_loss': 6.357155211499686, 'avg_acc': 50.30139593908629, 'loss': 5.964903354644775}


EP_train:0:  37%|| 2571/6926 [3:09:30<5:30:54,  4.56s/it]

{'epoch': 0, 'iter': 2570, 'avg_loss': 6.3554500708177555, 'avg_acc': 50.300223648385845, 'loss': 5.828378200531006}


EP_train:0:  37%|| 2581/6926 [3:10:15<5:18:22,  4.40s/it]

{'epoch': 0, 'iter': 2580, 'avg_loss': 6.353048167363566, 'avg_acc': 50.3039035257652, 'loss': 5.619988441467285}


EP_train:0:  37%|| 2591/6926 [3:10:58<5:14:51,  4.36s/it]

{'epoch': 0, 'iter': 2590, 'avg_loss': 6.351233874541459, 'avg_acc': 50.30996719413354, 'loss': 5.899896621704102}


EP_train:0:  38%|| 2600/6926 [3:11:42<5:08:10,  4.27s/it]

{'epoch': 0, 'iter': 2600, 'avg_loss': 6.349414563463175, 'avg_acc': 50.3087754709727, 'loss': 5.458110809326172}


EP_train:0:  38%|| 2601/6926 [3:11:50<8:17:04,  6.90s/it]

Saved best retrieval model


EP_train:0:  38%|| 2611/6926 [3:12:36<5:31:15,  4.61s/it]

{'epoch': 0, 'iter': 2610, 'avg_loss': 6.347335967739986, 'avg_acc': 50.292033703561856, 'loss': 5.649038314819336}


EP_train:0:  38%|| 2621/6926 [3:13:21<5:28:31,  4.58s/it]

{'epoch': 0, 'iter': 2620, 'avg_loss': 6.3454000894371125, 'avg_acc': 50.288534910339564, 'loss': 6.113557815551758}


EP_train:0:  38%|| 2631/6926 [3:14:05<5:10:00,  4.33s/it]

{'epoch': 0, 'iter': 2630, 'avg_loss': 6.343313268634676, 'avg_acc': 50.27674838464462, 'loss': 6.075923919677734}


EP_train:0:  38%|| 2641/6926 [3:14:49<5:17:20,  4.44s/it]

{'epoch': 0, 'iter': 2640, 'avg_loss': 6.341566618262525, 'avg_acc': 50.27215070049223, 'loss': 6.097493648529053}


EP_train:0:  38%|| 2650/6926 [3:15:31<5:09:31,  4.34s/it]

{'epoch': 0, 'iter': 2650, 'avg_loss': 6.339800409441487, 'avg_acc': 50.256978498679736, 'loss': 5.765728950500488}


EP_train:0:  38%|| 2651/6926 [3:15:40<8:03:39,  6.79s/it]

Saved best retrieval model


EP_train:0:  38%|| 2661/6926 [3:16:24<5:17:11,  4.46s/it]

{'epoch': 0, 'iter': 2660, 'avg_loss': 6.338160407493904, 'avg_acc': 50.247792183389706, 'loss': 5.782388687133789}


EP_train:0:  39%|| 2671/6926 [3:17:10<5:19:32,  4.51s/it]

{'epoch': 0, 'iter': 2670, 'avg_loss': 6.33610535935488, 'avg_acc': 50.2620741295395, 'loss': 5.67753267288208}


EP_train:0:  39%|| 2681/6926 [3:17:54<5:06:22,  4.33s/it]

{'epoch': 0, 'iter': 2680, 'avg_loss': 6.33339401627156, 'avg_acc': 50.2622622155912, 'loss': 5.70634126663208}


EP_train:0:  39%|| 2691/6926 [3:18:37<5:03:51,  4.30s/it]

{'epoch': 0, 'iter': 2690, 'avg_loss': 6.331617486826862, 'avg_acc': 50.25199739873653, 'loss': 5.752986431121826}


EP_train:0:  39%|| 2700/6926 [3:19:20<4:56:40,  4.21s/it]

{'epoch': 0, 'iter': 2700, 'avg_loss': 6.3294816144260375, 'avg_acc': 50.25106442058497, 'loss': 5.844699859619141}


EP_train:0:  39%|| 2701/6926 [3:19:28<7:49:53,  6.67s/it]

Saved best retrieval model


EP_train:0:  39%|| 2711/6926 [3:20:12<5:14:27,  4.48s/it]

{'epoch': 0, 'iter': 2710, 'avg_loss': 6.327252813301945, 'avg_acc': 50.24898561416451, 'loss': 5.323202610015869}


EP_train:0:  39%|| 2721/6926 [3:20:57<5:14:13,  4.48s/it]

{'epoch': 0, 'iter': 2720, 'avg_loss': 6.3250074549573, 'avg_acc': 50.23773428886439, 'loss': 5.652805805206299}


EP_train:0:  39%|| 2731/6926 [3:21:41<4:59:44,  4.29s/it]

{'epoch': 0, 'iter': 2730, 'avg_loss': 6.322594772892005, 'avg_acc': 50.23457524716222, 'loss': 5.572971820831299}


EP_train:0:  40%|| 2741/6926 [3:22:24<4:59:15,  4.29s/it]

{'epoch': 0, 'iter': 2740, 'avg_loss': 6.3209977064720695, 'avg_acc': 50.225738781466625, 'loss': 6.24658203125}


EP_train:0:  40%|| 2750/6926 [3:23:07<4:58:52,  4.29s/it]

{'epoch': 0, 'iter': 2750, 'avg_loss': 6.318500926996395, 'avg_acc': 50.24763722282807, 'loss': 5.68319845199585}


EP_train:0:  40%|| 2751/6926 [3:23:15<7:43:02,  6.65s/it]

Saved best retrieval model


EP_train:0:  40%|| 2761/6926 [3:23:59<5:09:43,  4.46s/it]

{'epoch': 0, 'iter': 2760, 'avg_loss': 6.316597606254116, 'avg_acc': 50.24560847519015, 'loss': 5.74418830871582}


EP_train:0:  40%|| 2771/6926 [3:24:45<5:12:17,  4.51s/it]

{'epoch': 0, 'iter': 2770, 'avg_loss': 6.314561099276479, 'avg_acc': 50.242466618549265, 'loss': 5.788613796234131}


EP_train:0:  40%|| 2781/6926 [3:25:28<4:52:42,  4.24s/it]

{'epoch': 0, 'iter': 2780, 'avg_loss': 6.312722427988172, 'avg_acc': 50.24271844660194, 'loss': 5.695810794830322}


EP_train:0:  40%|| 2791/6926 [3:26:11<4:56:27,  4.30s/it]

{'epoch': 0, 'iter': 2790, 'avg_loss': 6.310758265232536, 'avg_acc': 50.23401110713006, 'loss': 5.932072639465332}


EP_train:0:  40%|| 2800/6926 [3:26:54<4:55:04,  4.29s/it]

{'epoch': 0, 'iter': 2800, 'avg_loss': 6.309707510007445, 'avg_acc': 50.24098536237058, 'loss': 5.651429653167725}


EP_train:0:  40%|| 2801/6926 [3:27:02<7:38:24,  6.67s/it]

Saved best retrieval model


EP_train:0:  41%|| 2811/6926 [3:27:45<4:59:38,  4.37s/it]

{'epoch': 0, 'iter': 2810, 'avg_loss': 6.307211673543869, 'avg_acc': 50.23679295624333, 'loss': 5.9347147941589355}


EP_train:0:  41%|| 2821/6926 [3:28:30<5:02:52,  4.43s/it]

{'epoch': 0, 'iter': 2820, 'avg_loss': 6.305421984056259, 'avg_acc': 50.228199220134705, 'loss': 5.852119445800781}


EP_train:0:  41%|| 2831/6926 [3:29:14<4:53:41,  4.30s/it]

{'epoch': 0, 'iter': 2830, 'avg_loss': 6.303973099739886, 'avg_acc': 50.247262451430586, 'loss': 6.09130334854126}


EP_train:0:  41%|| 2841/6926 [3:29:57<4:51:54,  4.29s/it]

{'epoch': 0, 'iter': 2840, 'avg_loss': 6.302194249751659, 'avg_acc': 50.243092221048926, 'loss': 6.427637577056885}


EP_train:0:  41%|| 2850/6926 [3:30:39<4:48:18,  4.24s/it]

{'epoch': 0, 'iter': 2850, 'avg_loss': 6.300690065890352, 'avg_acc': 50.25429673798667, 'loss': 5.687358379364014}


EP_train:0:  41%|| 2851/6926 [3:30:45<6:50:11,  6.04s/it]

Saved best retrieval model


EP_train:0:  41%|| 2861/6926 [3:31:29<5:01:14,  4.45s/it]

{'epoch': 0, 'iter': 2860, 'avg_loss': 6.298533233134574, 'avg_acc': 50.24466969591052, 'loss': 5.585170745849609}


EP_train:0:  41%|| 2871/6926 [3:32:13<4:58:50,  4.42s/it]

{'epoch': 0, 'iter': 2870, 'avg_loss': 6.297384371370524, 'avg_acc': 50.25361372344131, 'loss': 6.061003684997559}


EP_train:0:  42%|| 2881/6926 [3:32:57<4:50:04,  4.30s/it]

{'epoch': 0, 'iter': 2880, 'avg_loss': 6.29568419742816, 'avg_acc': 50.27442728219368, 'loss': 6.030300140380859}


EP_train:0:  42%|| 2891/6926 [3:33:40<4:48:47,  4.29s/it]

{'epoch': 0, 'iter': 2890, 'avg_loss': 6.293505731316761, 'avg_acc': 50.27888273953649, 'loss': 5.328939437866211}


EP_train:0:  42%|| 2900/6926 [3:34:23<4:50:39,  4.33s/it]

{'epoch': 0, 'iter': 2900, 'avg_loss': 6.291969790289378, 'avg_acc': 50.27361254739745, 'loss': 5.989199161529541}


EP_train:0:  42%|| 2901/6926 [3:34:28<6:26:28,  5.76s/it]

Saved best retrieval model


EP_train:0:  42%|| 2911/6926 [3:35:12<4:55:03,  4.41s/it]

{'epoch': 0, 'iter': 2910, 'avg_loss': 6.29006705665785, 'avg_acc': 50.28662830642391, 'loss': 5.5309529304504395}


EP_train:0:  42%|| 2921/6926 [3:35:57<5:00:01,  4.49s/it]

{'epoch': 0, 'iter': 2920, 'avg_loss': 6.288607107617601, 'avg_acc': 50.28885655597398, 'loss': 5.896523952484131}


EP_train:0:  42%|| 2931/6926 [3:36:41<4:42:39,  4.25s/it]

{'epoch': 0, 'iter': 2930, 'avg_loss': 6.287227639725736, 'avg_acc': 50.2942681678608, 'loss': 5.97820520401001}


EP_train:0:  42%|| 2941/6926 [3:37:24<4:45:33,  4.30s/it]

{'epoch': 0, 'iter': 2940, 'avg_loss': 6.285152112331086, 'avg_acc': 50.29751785107106, 'loss': 5.530604362487793}


EP_train:0:  43%|| 2950/6926 [3:38:06<4:41:57,  4.25s/it]

{'epoch': 0, 'iter': 2950, 'avg_loss': 6.283885970030021, 'avg_acc': 50.28380210098272, 'loss': 5.647937297821045}


EP_train:0:  43%|| 2951/6926 [3:38:14<7:12:12,  6.52s/it]

Saved best retrieval model


EP_train:0:  43%|| 2961/6926 [3:38:59<5:02:38,  4.58s/it]

{'epoch': 0, 'iter': 2960, 'avg_loss': 6.281703033466591, 'avg_acc': 50.29339750084431, 'loss': 5.225362300872803}


EP_train:0:  43%|| 2971/6926 [3:39:44<4:55:07,  4.48s/it]

{'epoch': 0, 'iter': 2970, 'avg_loss': 6.279821490085875, 'avg_acc': 50.28925445977785, 'loss': 5.56142520904541}


EP_train:0:  43%|| 2981/6926 [3:40:28<4:40:25,  4.26s/it]

{'epoch': 0, 'iter': 2980, 'avg_loss': 6.27791700887824, 'avg_acc': 50.27884937940289, 'loss': 5.497019290924072}


EP_train:0:  43%|| 2991/6926 [3:41:11<4:41:34,  4.29s/it]

{'epoch': 0, 'iter': 2990, 'avg_loss': 6.27590226999536, 'avg_acc': 50.26120026746908, 'loss': 5.743422031402588}


EP_train:0:  43%|| 3000/6926 [3:41:54<4:40:14,  4.28s/it]

{'epoch': 0, 'iter': 3000, 'avg_loss': 6.274355177798298, 'avg_acc': 50.265536487837394, 'loss': 5.632959365844727}


EP_train:0:  43%|| 3001/6926 [3:42:01<7:08:21,  6.55s/it]

Saved best retrieval model


EP_train:0:  43%|| 3011/6926 [3:42:46<4:52:29,  4.48s/it]

{'epoch': 0, 'iter': 3010, 'avg_loss': 6.2727215480899465, 'avg_acc': 50.2584274327466, 'loss': 5.842557430267334}


EP_train:0:  44%|| 3021/6926 [3:43:30<4:50:13,  4.46s/it]

{'epoch': 0, 'iter': 3020, 'avg_loss': 6.271516743893262, 'avg_acc': 50.25343429328038, 'loss': 5.449770450592041}


EP_train:0:  44%|| 3031/6926 [3:44:14<4:36:38,  4.26s/it]

{'epoch': 0, 'iter': 3030, 'avg_loss': 6.2700290123018565, 'avg_acc': 50.2567222038931, 'loss': 5.260344505310059}


EP_train:0:  44%|| 3041/6926 [3:44:57<4:37:13,  4.28s/it]

{'epoch': 0, 'iter': 3040, 'avg_loss': 6.268257886396745, 'avg_acc': 50.26718184807629, 'loss': 5.4676666259765625}


EP_train:0:  44%|| 3050/6926 [3:45:40<4:37:01,  4.29s/it]

{'epoch': 0, 'iter': 3050, 'avg_loss': 6.266234790641962, 'avg_acc': 50.271427400852176, 'loss': 5.740623950958252}


EP_train:0:  44%|| 3051/6926 [3:45:47<7:01:28,  6.53s/it]

Saved best retrieval model


EP_train:0:  44%|| 3061/6926 [3:46:31<4:47:19,  4.46s/it]

{'epoch': 0, 'iter': 3060, 'avg_loss': 6.2651668964200296, 'avg_acc': 50.27258248938256, 'loss': 5.411430358886719}


EP_train:0:  44%|| 3071/6926 [3:47:16<4:48:42,  4.49s/it]

{'epoch': 0, 'iter': 3070, 'avg_loss': 6.263920817535112, 'avg_acc': 50.256431129925105, 'loss': 5.9320173263549805}


EP_train:0:  44%|| 3081/6926 [3:48:00<4:37:40,  4.33s/it]

{'epoch': 0, 'iter': 3080, 'avg_loss': 6.262169860392877, 'avg_acc': 50.2555988315482, 'loss': 5.65457010269165}


EP_train:0:  45%|| 3091/6926 [3:48:42<4:30:40,  4.23s/it]

{'epoch': 0, 'iter': 3090, 'avg_loss': 6.260790768891467, 'avg_acc': 50.2487059204141, 'loss': 6.662380218505859}


EP_train:0:  45%|| 3100/6926 [3:49:25<4:33:31,  4.29s/it]

{'epoch': 0, 'iter': 3100, 'avg_loss': 6.259374046663975, 'avg_acc': 50.248911641406, 'loss': 5.711381912231445}


EP_train:0:  45%|| 3101/6926 [3:49:32<6:58:08,  6.56s/it]

Saved best retrieval model


EP_train:0:  45%|| 3111/6926 [3:50:16<4:39:24,  4.39s/it]

{'epoch': 0, 'iter': 3110, 'avg_loss': 6.257692717003769, 'avg_acc': 50.25413854066216, 'loss': 5.426158905029297}


EP_train:0:  45%|| 3121/6926 [3:51:01<4:45:53,  4.51s/it]

{'epoch': 0, 'iter': 3120, 'avg_loss': 6.25637332524523, 'avg_acc': 50.25532681832746, 'loss': 5.585482597351074}


EP_train:0:  45%|| 3131/6926 [3:51:45<4:31:12,  4.29s/it]

{'epoch': 0, 'iter': 3130, 'avg_loss': 6.255209438034004, 'avg_acc': 50.253513254551265, 'loss': 5.960078716278076}


EP_train:0:  45%|| 3141/6926 [3:52:27<4:30:36,  4.29s/it]

{'epoch': 0, 'iter': 3140, 'avg_loss': 6.25402999463183, 'avg_acc': 50.26464501751035, 'loss': 5.839365005493164}


EP_train:0:  45%|| 3150/6926 [3:53:10<4:31:34,  4.32s/it]

{'epoch': 0, 'iter': 3150, 'avg_loss': 6.252038107793016, 'avg_acc': 50.26479688987623, 'loss': 5.795003890991211}


EP_train:0:  45%|| 3151/6926 [3:53:18<6:58:50,  6.66s/it]

Saved best retrieval model


EP_train:0:  46%|| 3161/6926 [3:54:02<4:39:00,  4.45s/it]

{'epoch': 0, 'iter': 3160, 'avg_loss': 6.250412461728244, 'avg_acc': 50.25802752293578, 'loss': 5.614687442779541}


EP_train:0:  46%|| 3171/6926 [3:54:47<4:36:03,  4.41s/it]

{'epoch': 0, 'iter': 3170, 'avg_loss': 6.2485166453142265, 'avg_acc': 50.26706874802901, 'loss': 5.721968173980713}


EP_train:0:  46%|| 3181/6926 [3:55:31<4:29:06,  4.31s/it]

{'epoch': 0, 'iter': 3180, 'avg_loss': 6.247056375543714, 'avg_acc': 50.27998270983968, 'loss': 5.449464321136475}


EP_train:0:  46%|| 3191/6926 [3:56:13<4:27:06,  4.29s/it]

{'epoch': 0, 'iter': 3190, 'avg_loss': 6.245585794248614, 'avg_acc': 50.26833281103102, 'loss': 5.638674259185791}


EP_train:0:  46%|| 3200/6926 [3:56:56<4:30:29,  4.36s/it]

{'epoch': 0, 'iter': 3200, 'avg_loss': 6.2440029120154765, 'avg_acc': 50.273352077475785, 'loss': 5.799440860748291}


EP_train:0:  46%|| 3201/6926 [3:57:03<6:19:56,  6.12s/it]

Saved best retrieval model


EP_train:0:  46%|| 3211/6926 [3:57:46<4:33:08,  4.41s/it]

{'epoch': 0, 'iter': 3210, 'avg_loss': 6.242765431090777, 'avg_acc': 50.28320616630333, 'loss': 5.854527950286865}


EP_train:0:  47%|| 3221/6926 [3:58:32<4:36:58,  4.49s/it]

{'epoch': 0, 'iter': 3220, 'avg_loss': 6.24102278238348, 'avg_acc': 50.28620769947222, 'loss': 5.797836780548096}


EP_train:0:  47%|| 3231/6926 [3:59:16<4:24:22,  4.29s/it]

{'epoch': 0, 'iter': 3230, 'avg_loss': 6.239498261068303, 'avg_acc': 50.296928195605076, 'loss': 5.695809364318848}


EP_train:0:  47%|| 3241/6926 [3:59:59<4:25:20,  4.32s/it]

{'epoch': 0, 'iter': 3240, 'avg_loss': 6.238391889756346, 'avg_acc': 50.30565411909904, 'loss': 5.449069023132324}


EP_train:0:  47%|| 3250/6926 [4:00:41<4:23:24,  4.30s/it]

{'epoch': 0, 'iter': 3250, 'avg_loss': 6.237173533080285, 'avg_acc': 50.30663641956321, 'loss': 5.6749653816223145}


EP_train:0:  47%|| 3251/6926 [4:00:49<6:43:23,  6.59s/it]

Saved best retrieval model


EP_train:0:  47%|| 3261/6926 [4:01:33<4:32:39,  4.46s/it]

{'epoch': 0, 'iter': 3260, 'avg_loss': 6.235795972976755, 'avg_acc': 50.29898804047838, 'loss': 5.991837024688721}


EP_train:0:  47%|| 3271/6926 [4:02:18<4:28:20,  4.41s/it]

{'epoch': 0, 'iter': 3270, 'avg_loss': 6.234570284423315, 'avg_acc': 50.31144909813513, 'loss': 5.786679267883301}


EP_train:0:  47%|| 3281/6926 [4:03:02<4:18:37,  4.26s/it]

{'epoch': 0, 'iter': 3280, 'avg_loss': 6.2333091364078355, 'avg_acc': 50.314309661688505, 'loss': 5.481211185455322}


EP_train:0:  48%|| 3291/6926 [4:03:44<4:18:06,  4.26s/it]

{'epoch': 0, 'iter': 3290, 'avg_loss': 6.231999642288963, 'avg_acc': 50.318102400486175, 'loss': 5.761023044586182}


EP_train:0:  48%|| 3300/6926 [4:04:26<4:18:35,  4.28s/it]

{'epoch': 0, 'iter': 3300, 'avg_loss': 6.230721575821649, 'avg_acc': 50.323765525598304, 'loss': 6.059300899505615}


EP_train:0:  48%|| 3301/6926 [4:04:34<6:39:19,  6.61s/it]

Saved best retrieval model


EP_train:0:  48%|| 3311/6926 [4:05:18<4:26:00,  4.42s/it]

{'epoch': 0, 'iter': 3310, 'avg_loss': 6.228998161305549, 'avg_acc': 50.31051797040169, 'loss': 5.841487884521484}


EP_train:0:  48%|| 3321/6926 [4:06:04<4:29:30,  4.49s/it]

{'epoch': 0, 'iter': 3320, 'avg_loss': 6.227311249491466, 'avg_acc': 50.30393706714845, 'loss': 5.947638511657715}


EP_train:0:  48%|| 3331/6926 [4:06:49<4:35:31,  4.60s/it]

{'epoch': 0, 'iter': 3330, 'avg_loss': 6.225724502427196, 'avg_acc': 50.308653557490246, 'loss': 5.360589981079102}


EP_train:0:  48%|| 3341/6926 [4:07:31<4:12:14,  4.22s/it]

{'epoch': 0, 'iter': 3340, 'avg_loss': 6.224391503949181, 'avg_acc': 50.30492367554624, 'loss': 5.851339340209961}


EP_train:0:  48%|| 3350/6926 [4:08:14<4:08:36,  4.17s/it]

{'epoch': 0, 'iter': 3350, 'avg_loss': 6.223011243382485, 'avg_acc': 50.29375559534467, 'loss': 5.654421329498291}


EP_train:0:  48%|| 3351/6926 [4:08:21<6:35:00,  6.63s/it]

Saved best retrieval model


EP_train:0:  49%|| 3361/6926 [4:09:05<4:22:26,  4.42s/it]

{'epoch': 0, 'iter': 3360, 'avg_loss': 6.221460183225052, 'avg_acc': 50.27707527521571, 'loss': 5.319999694824219}


EP_train:0:  49%|| 3371/6926 [4:09:50<4:23:23,  4.45s/it]

{'epoch': 0, 'iter': 3370, 'avg_loss': 6.219567120092908, 'avg_acc': 50.28181548501928, 'loss': 5.731027126312256}


EP_train:0:  49%|| 3381/6926 [4:10:33<4:14:10,  4.30s/it]

{'epoch': 0, 'iter': 3380, 'avg_loss': 6.218381786092574, 'avg_acc': 50.28652765454008, 'loss': 5.310576915740967}


EP_train:0:  49%|| 3391/6926 [4:11:16<4:09:36,  4.24s/it]

{'epoch': 0, 'iter': 3390, 'avg_loss': 6.217413198065525, 'avg_acc': 50.28107490415806, 'loss': 5.467369556427002}


EP_train:0:  49%|| 3400/6926 [4:11:58<4:12:56,  4.30s/it]

{'epoch': 0, 'iter': 3400, 'avg_loss': 6.215762336756755, 'avg_acc': 50.28024845633637, 'loss': 5.93480110168457}


EP_train:0:  49%|| 3401/6926 [4:12:06<6:23:20,  6.53s/it]

Saved best retrieval model


EP_train:0:  49%|| 3411/6926 [4:12:50<4:17:54,  4.40s/it]

{'epoch': 0, 'iter': 3410, 'avg_loss': 6.2143142781834975, 'avg_acc': 50.285839929639394, 'loss': 5.871475696563721}


EP_train:0:  49%|| 3421/6926 [4:13:35<4:21:21,  4.47s/it]

{'epoch': 0, 'iter': 3420, 'avg_loss': 6.21358469361208, 'avg_acc': 50.272215726395785, 'loss': 5.444864273071289}


EP_train:0:  50%|| 3431/6926 [4:14:19<4:10:09,  4.29s/it]

{'epoch': 0, 'iter': 3430, 'avg_loss': 6.212495702795231, 'avg_acc': 50.27961964441854, 'loss': 5.361253261566162}


EP_train:0:  50%|| 3441/6926 [4:15:01<4:06:59,  4.25s/it]

{'epoch': 0, 'iter': 3440, 'avg_loss': 6.211370404395071, 'avg_acc': 50.273358035454805, 'loss': 6.030195713043213}


EP_train:0:  50%|| 3450/6926 [4:15:44<4:10:29,  4.32s/it]

{'epoch': 0, 'iter': 3450, 'avg_loss': 6.209735689353888, 'avg_acc': 50.27528252680382, 'loss': 5.498987197875977}


EP_train:0:  50%|| 3451/6926 [4:15:50<5:48:47,  6.02s/it]

Saved best retrieval model


EP_train:0:  50%|| 3461/6926 [4:16:33<4:13:55,  4.40s/it]

{'epoch': 0, 'iter': 3460, 'avg_loss': 6.208228271636478, 'avg_acc': 50.270875469517485, 'loss': 5.835233211517334}


EP_train:0:  50%|| 3471/6926 [4:17:18<4:15:41,  4.44s/it]

{'epoch': 0, 'iter': 3470, 'avg_loss': 6.207078651679899, 'avg_acc': 50.27549697493517, 'loss': 6.171468257904053}


EP_train:0:  50%|| 3481/6926 [4:18:01<4:06:05,  4.29s/it]

{'epoch': 0, 'iter': 3480, 'avg_loss': 6.205918077969133, 'avg_acc': 50.2729100833094, 'loss': 5.872857093811035}


EP_train:0:  50%|| 3491/6926 [4:18:44<4:06:53,  4.31s/it]

{'epoch': 0, 'iter': 3490, 'avg_loss': 6.204868418820025, 'avg_acc': 50.269442853050705, 'loss': 5.803617000579834}


EP_train:0:  51%|| 3500/6926 [4:19:26<4:00:21,  4.21s/it]

{'epoch': 0, 'iter': 3500, 'avg_loss': 6.203235888951031, 'avg_acc': 50.274921451013995, 'loss': 5.363388538360596}


EP_train:0:  51%|| 3501/6926 [4:19:34<6:09:06,  6.47s/it]

Saved best retrieval model


EP_train:0:  51%|| 3511/6926 [4:20:18<4:13:59,  4.46s/it]

{'epoch': 0, 'iter': 3510, 'avg_loss': 6.201729617476022, 'avg_acc': 50.272358302477926, 'loss': 5.921825408935547}


EP_train:0:  51%|| 3521/6926 [4:21:03<4:16:11,  4.51s/it]

{'epoch': 0, 'iter': 3520, 'avg_loss': 6.200552084743215, 'avg_acc': 50.264484521442775, 'loss': 5.986263751983643}


EP_train:0:  51%|| 3531/6926 [4:21:47<4:03:06,  4.30s/it]

{'epoch': 0, 'iter': 3530, 'avg_loss': 6.199457406389986, 'avg_acc': 50.259310393656186, 'loss': 5.790916919708252}


EP_train:0:  51%|| 3541/6926 [4:22:30<4:03:27,  4.32s/it]

{'epoch': 0, 'iter': 3540, 'avg_loss': 6.197996528968067, 'avg_acc': 50.266520756848344, 'loss': 5.243339538574219}


EP_train:0:  51%|| 3550/6926 [4:23:13<3:59:22,  4.25s/it]

{'epoch': 0, 'iter': 3550, 'avg_loss': 6.196760511707165, 'avg_acc': 50.26929034074909, 'loss': 5.9207892417907715}


EP_train:0:  51%|| 3551/6926 [4:23:19<5:40:33,  6.05s/it]

Saved best retrieval model


EP_train:0:  51%|| 3561/6926 [4:24:03<4:06:24,  4.39s/it]

{'epoch': 0, 'iter': 3560, 'avg_loss': 6.195107254878887, 'avg_acc': 50.26590143218197, 'loss': 5.320302963256836}


EP_train:0:  52%|| 3571/6926 [4:24:48<4:11:53,  4.50s/it]

{'epoch': 0, 'iter': 3570, 'avg_loss': 6.19433444289392, 'avg_acc': 50.26253150378045, 'loss': 6.020817279815674}


EP_train:0:  52%|| 3581/6926 [4:25:33<3:58:56,  4.29s/it]

{'epoch': 0, 'iter': 3580, 'avg_loss': 6.192751053424229, 'avg_acc': 50.269652331750905, 'loss': 5.707965850830078}


EP_train:0:  52%|| 3591/6926 [4:26:16<4:01:56,  4.35s/it]

{'epoch': 0, 'iter': 3590, 'avg_loss': 6.191334690565653, 'avg_acc': 50.264550264550266, 'loss': 5.578312873840332}


EP_train:0:  52%|| 3600/6926 [4:26:59<3:54:20,  4.23s/it]

{'epoch': 0, 'iter': 3600, 'avg_loss': 6.190132978127354, 'avg_acc': 50.25860871980006, 'loss': 5.725433826446533}


EP_train:0:  52%|| 3601/6926 [4:27:05<5:37:30,  6.09s/it]

Saved best retrieval model


EP_train:0:  52%|| 3611/6926 [4:27:49<4:07:04,  4.47s/it]

{'epoch': 0, 'iter': 3610, 'avg_loss': 6.18887789313465, 'avg_acc': 50.257027139296596, 'loss': 5.643332481384277}


EP_train:0:  52%|| 3621/6926 [4:28:34<4:04:52,  4.45s/it]

{'epoch': 0, 'iter': 3620, 'avg_loss': 6.187819291794047, 'avg_acc': 50.25631731565866, 'loss': 5.497509956359863}


EP_train:0:  52%|| 3631/6926 [4:29:19<3:56:18,  4.30s/it]

{'epoch': 0, 'iter': 3630, 'avg_loss': 6.186607321967963, 'avg_acc': 50.26593913522446, 'loss': 5.907775402069092}


EP_train:0:  53%|| 3641/6926 [4:30:02<3:56:40,  4.32s/it]

{'epoch': 0, 'iter': 3640, 'avg_loss': 6.185288321739433, 'avg_acc': 50.25834248832738, 'loss': 5.4971604347229}


EP_train:0:  53%|| 3650/6926 [4:30:45<3:55:41,  4.32s/it]

{'epoch': 0, 'iter': 3650, 'avg_loss': 6.184434500108905, 'avg_acc': 50.24479594631608, 'loss': 5.683117389678955}


EP_train:0:  53%|| 3651/6926 [4:30:53<6:06:12,  6.71s/it]

Saved best retrieval model


EP_train:0:  53%|| 3661/6926 [4:31:37<4:03:57,  4.48s/it]

{'epoch': 0, 'iter': 3660, 'avg_loss': 6.183214130703786, 'avg_acc': 50.23900573613766, 'loss': 5.389834880828857}


EP_train:0:  53%|| 3671/6926 [4:32:24<4:08:10,  4.57s/it]

{'epoch': 0, 'iter': 3670, 'avg_loss': 6.181859475330383, 'avg_acc': 50.248569871969494, 'loss': 5.67887020111084}


EP_train:0:  53%|| 3681/6926 [4:33:09<4:09:45,  4.62s/it]

{'epoch': 0, 'iter': 3680, 'avg_loss': 6.180506574625038, 'avg_acc': 50.230915512089105, 'loss': 5.828097820281982}


EP_train:0:  53%|| 3691/6926 [4:33:52<3:54:49,  4.36s/it]

{'epoch': 0, 'iter': 3690, 'avg_loss': 6.179173033530071, 'avg_acc': 50.229443240314275, 'loss': 5.045592784881592}


EP_train:0:  53%|| 3700/6926 [4:34:37<3:55:36,  4.38s/it]

{'epoch': 0, 'iter': 3700, 'avg_loss': 6.177755277242508, 'avg_acc': 50.2203796271278, 'loss': 5.914209842681885}


EP_train:0:  53%|| 3701/6926 [4:34:45<6:11:05,  6.90s/it]

Saved best retrieval model


EP_train:0:  54%|| 3711/6926 [4:35:32<4:10:52,  4.68s/it]

{'epoch': 0, 'iter': 3710, 'avg_loss': 6.176595554490579, 'avg_acc': 50.228206682834816, 'loss': 5.749670505523682}


EP_train:0:  54%|| 3721/6926 [4:36:19<4:09:26,  4.67s/it]

{'epoch': 0, 'iter': 3720, 'avg_loss': 6.1754823012942754, 'avg_acc': 50.224234076861066, 'loss': 5.974781036376953}


EP_train:0:  54%|| 3731/6926 [4:37:04<3:59:41,  4.50s/it]

{'epoch': 0, 'iter': 3730, 'avg_loss': 6.174239057537315, 'avg_acc': 50.22195792012866, 'loss': 5.257681369781494}


EP_train:0:  54%|| 3741/6926 [4:37:49<3:58:03,  4.48s/it]

{'epoch': 0, 'iter': 3740, 'avg_loss': 6.173092702658091, 'avg_acc': 50.230553327987174, 'loss': 5.690974235534668}


EP_train:0:  54%|| 3750/6926 [4:38:34<4:00:00,  4.53s/it]

{'epoch': 0, 'iter': 3750, 'avg_loss': 6.17174797190313, 'avg_acc': 50.23743668355105, 'loss': 6.191376686096191}


EP_train:0:  54%|| 3751/6926 [4:38:42<6:02:33,  6.85s/it]

Saved best retrieval model


EP_train:0:  54%|| 3761/6926 [4:39:29<4:06:04,  4.67s/it]

{'epoch': 0, 'iter': 3760, 'avg_loss': 6.170657553339347, 'avg_acc': 50.247607019409735, 'loss': 5.420197010040283}


EP_train:0:  54%|| 3771/6926 [4:40:17<4:08:29,  4.73s/it]

{'epoch': 0, 'iter': 3770, 'avg_loss': 6.169470721800932, 'avg_acc': 50.24197825510475, 'loss': 5.556365966796875}


EP_train:0:  55%|| 3781/6926 [4:41:04<4:00:41,  4.59s/it]

{'epoch': 0, 'iter': 3780, 'avg_loss': 6.168182035061267, 'avg_acc': 50.24795027770431, 'loss': 5.726067066192627}


EP_train:0:  55%|| 3791/6926 [4:41:51<4:03:31,  4.66s/it]

{'epoch': 0, 'iter': 3790, 'avg_loss': 6.167065435775318, 'avg_acc': 50.2464719071485, 'loss': 6.000579833984375}


EP_train:0:  55%|| 3800/6926 [4:42:39<4:12:49,  4.85s/it]

{'epoch': 0, 'iter': 3800, 'avg_loss': 6.166024576893169, 'avg_acc': 50.23184688239937, 'loss': 5.945281505584717}


EP_train:0:  55%|| 3801/6926 [4:42:45<5:46:16,  6.65s/it]

Saved best retrieval model


EP_train:0:  55%|| 3811/6926 [4:43:36<4:34:14,  5.28s/it]

{'epoch': 0, 'iter': 3810, 'avg_loss': 6.16498614346002, 'avg_acc': 50.2271385463133, 'loss': 6.054616451263428}


EP_train:0:  55%|| 3821/6926 [4:44:27<4:26:44,  5.15s/it]

{'epoch': 0, 'iter': 3820, 'avg_loss': 6.163571991180444, 'avg_acc': 50.23881182936404, 'loss': 5.572788715362549}


EP_train:0:  55%|| 3831/6926 [4:45:20<4:31:00,  5.25s/it]

{'epoch': 0, 'iter': 3830, 'avg_loss': 6.162631827961587, 'avg_acc': 50.22513703993735, 'loss': 5.593054294586182}


EP_train:0:  55%|| 3841/6926 [4:46:13<4:37:04,  5.39s/it]

{'epoch': 0, 'iter': 3840, 'avg_loss': 6.161503781505129, 'avg_acc': 50.20909268419682, 'loss': 6.11372709274292}


EP_train:0:  56%|| 3850/6926 [4:47:09<4:45:53,  5.58s/it]

{'epoch': 0, 'iter': 3850, 'avg_loss': 6.160565502455995, 'avg_acc': 50.20449233965204, 'loss': 5.931253910064697}


EP_train:0:  56%|| 3851/6926 [4:47:16<6:39:22,  7.79s/it]

Saved best retrieval model


EP_train:0:  56%|| 3861/6926 [4:48:14<4:55:09,  5.78s/it]

{'epoch': 0, 'iter': 3860, 'avg_loss': 6.159298021414835, 'avg_acc': 50.20315332815333, 'loss': 5.595473289489746}


EP_train:0:  56%|| 3871/6926 [4:49:15<5:11:55,  6.13s/it]

{'epoch': 0, 'iter': 3870, 'avg_loss': 6.158217885887798, 'avg_acc': 50.20020666494446, 'loss': 5.650211334228516}


EP_train:0:  56%|| 3881/6926 [4:50:20<5:26:03,  6.42s/it]

{'epoch': 0, 'iter': 3880, 'avg_loss': 6.15738116217409, 'avg_acc': 50.206937644936865, 'loss': 5.654520511627197}


EP_train:0:  56%|| 3891/6926 [4:51:30<5:52:30,  6.97s/it]

{'epoch': 0, 'iter': 3890, 'avg_loss': 6.156082052462697, 'avg_acc': 50.20479953739398, 'loss': 5.518021583557129}


EP_train:0:  56%|| 3900/6926 [4:52:43<6:18:03,  7.50s/it]

{'epoch': 0, 'iter': 3900, 'avg_loss': 6.155034687787379, 'avg_acc': 50.21308638810561, 'loss': 6.4183669090271}


EP_train:0:  56%|| 3901/6926 [4:52:46<6:59:35,  8.32s/it]

Saved best retrieval model


EP_train:0:  56%|| 3911/6926 [4:54:03<6:29:11,  7.75s/it]

{'epoch': 0, 'iter': 3910, 'avg_loss': 6.153883781252661, 'avg_acc': 50.19895806699054, 'loss': 5.492412567138672}


EP_train:0:  57%|| 3921/6926 [4:55:20<6:38:52,  7.96s/it]

{'epoch': 0, 'iter': 3920, 'avg_loss': 6.152967483405707, 'avg_acc': 50.1984506503443, 'loss': 5.534367084503174}


EP_train:0:  57%|| 3931/6926 [4:56:44<6:52:51,  8.27s/it]

{'epoch': 0, 'iter': 3930, 'avg_loss': 6.151979574020568, 'avg_acc': 50.20351055711016, 'loss': 5.812830448150635}


EP_train:0:  57%|| 3941/6926 [4:58:14<7:35:27,  9.15s/it]

{'epoch': 0, 'iter': 3940, 'avg_loss': 6.15040219843433, 'avg_acc': 50.197443542248166, 'loss': 5.680819034576416}


EP_train:0:  57%|| 3950/6926 [4:59:44<7:47:49,  9.43s/it]

{'epoch': 0, 'iter': 3950, 'avg_loss': 6.149446516150434, 'avg_acc': 50.19615287269046, 'loss': 5.704598903656006}


EP_train:0:  57%|| 3951/6926 [4:59:51<9:34:19, 11.58s/it]

Saved best retrieval model


EP_train:0:  57%|| 3961/6926 [5:01:28<8:03:31,  9.78s/it]

{'epoch': 0, 'iter': 3960, 'avg_loss': 6.14832325687134, 'avg_acc': 50.18697929815703, 'loss': 5.716264724731445}


EP_train:0:  57%|| 3971/6926 [5:03:06<8:11:41,  9.98s/it]

{'epoch': 0, 'iter': 3970, 'avg_loss': 6.14704631156028, 'avg_acc': 50.1833606144548, 'loss': 5.889084815979004}


EP_train:0:  57%|| 3981/6926 [5:04:50<8:02:54,  9.84s/it]

{'epoch': 0, 'iter': 3980, 'avg_loss': 6.145689940698121, 'avg_acc': 50.17897513187641, 'loss': 5.5328288078308105}


EP_train:0:  58%|| 3991/6926 [5:06:33<8:35:56, 10.55s/it]

{'epoch': 0, 'iter': 3990, 'avg_loss': 6.145216521390008, 'avg_acc': 50.173828614382366, 'loss': 5.830503940582275}


EP_train:0:  58%|| 4000/6926 [5:08:23<8:54:19, 10.96s/it]

{'epoch': 0, 'iter': 4000, 'avg_loss': 6.144057665607745, 'avg_acc': 50.16792676830792, 'loss': 5.767881870269775}


EP_train:0:  58%|| 4001/6926 [5:08:31<10:54:13, 13.42s/it]

Saved best retrieval model


EP_train:0:  58%|| 4011/6926 [5:10:29<9:43:59, 12.02s/it]

{'epoch': 0, 'iter': 4010, 'avg_loss': 6.143169707177257, 'avg_acc': 50.16361256544503, 'loss': 5.525236129760742}


EP_train:0:  58%|| 4021/6926 [5:12:21<8:50:42, 10.96s/it]

{'epoch': 0, 'iter': 4020, 'avg_loss': 6.14234300087827, 'avg_acc': 50.14921661278289, 'loss': 6.033045291900635}


EP_train:0:  58%|| 4031/6926 [5:14:17<9:02:51, 11.25s/it]

{'epoch': 0, 'iter': 4030, 'avg_loss': 6.1414106137638385, 'avg_acc': 50.148846440089315, 'loss': 6.163334369659424}


EP_train:0:  58%|| 4041/6926 [5:16:24<10:21:11, 12.92s/it]

{'epoch': 0, 'iter': 4040, 'avg_loss': 6.140882841226813, 'avg_acc': 50.13610492452363, 'loss': 6.089602947235107}


EP_train:0:  58%|| 4050/6926 [5:18:31<9:48:59, 12.29s/it]

{'epoch': 0, 'iter': 4050, 'avg_loss': 6.139835512099576, 'avg_acc': 50.134997531473715, 'loss': 5.784618854522705}


EP_train:0:  58%|| 4051/6926 [5:18:39<11:55:27, 14.93s/it]

Saved best retrieval model


EP_train:0:  59%|| 4061/6926 [5:20:46<10:09:10, 12.76s/it]

{'epoch': 0, 'iter': 4060, 'avg_loss': 6.1387460983847495, 'avg_acc': 50.13928219650332, 'loss': 5.795241355895996}


EP_train:0:  59%|| 4071/6926 [5:22:51<9:46:12, 12.32s/it]

{'epoch': 0, 'iter': 4070, 'avg_loss': 6.1375610999995445, 'avg_acc': 50.135101940555145, 'loss': 5.954671382904053}


EP_train:0:  59%|| 4081/6926 [5:24:57<10:10:41, 12.88s/it]

{'epoch': 0, 'iter': 4080, 'avg_loss': 6.136556741040994, 'avg_acc': 50.140896839010054, 'loss': 6.113717079162598}


EP_train:0:  59%|| 4091/6926 [5:26:59<9:49:53, 12.48s/it]

{'epoch': 0, 'iter': 4090, 'avg_loss': 6.135358682444555, 'avg_acc': 50.1527743827915, 'loss': 6.148143291473389}


EP_train:0:  59%|| 4100/6926 [5:29:12<10:33:07, 13.44s/it]

{'epoch': 0, 'iter': 4100, 'avg_loss': 6.134333620534179, 'avg_acc': 50.161545964398925, 'loss': 5.535987854003906}


EP_train:0:  59%|| 4101/6926 [5:29:20<12:05:19, 15.41s/it]

Saved best retrieval model


EP_train:0:  59%|| 4111/6926 [5:31:28<9:42:15, 12.41s/it]

{'epoch': 0, 'iter': 4110, 'avg_loss': 6.133276744295982, 'avg_acc': 50.1634334711749, 'loss': 5.9814839363098145}


EP_train:0:  60%|| 4121/6926 [5:33:41<10:05:30, 12.95s/it]

{'epoch': 0, 'iter': 4120, 'avg_loss': 6.132282276982042, 'avg_acc': 50.17820310604222, 'loss': 5.160263538360596}


EP_train:0:  60%|| 4131/6926 [5:35:48<9:48:06, 12.62s/it]

{'epoch': 0, 'iter': 4130, 'avg_loss': 6.131469841183487, 'avg_acc': 50.17701525054466, 'loss': 5.649102210998535}


EP_train:0:  60%|| 4141/6926 [5:38:04<10:28:45, 13.55s/it]

{'epoch': 0, 'iter': 4140, 'avg_loss': 6.130831252201489, 'avg_acc': 50.17960637527167, 'loss': 6.006808757781982}


EP_train:0:  60%|| 4150/6926 [5:40:07<9:40:07, 12.54s/it]

{'epoch': 0, 'iter': 4150, 'avg_loss': 6.129525120072811, 'avg_acc': 50.179173693086, 'loss': 5.651297569274902}


EP_train:0:  60%|| 4151/6926 [5:40:13<10:51:47, 14.09s/it]

Saved best retrieval model


EP_train:0:  60%|| 4161/6926 [5:42:27<10:22:01, 13.50s/it]

{'epoch': 0, 'iter': 4160, 'avg_loss': 6.128952256133711, 'avg_acc': 50.1870043258832, 'loss': 6.10413122177124}


EP_train:0:  60%|| 4171/6926 [5:44:32<9:21:30, 12.23s/it]

{'epoch': 0, 'iter': 4170, 'avg_loss': 6.12800329144811, 'avg_acc': 50.18580676096859, 'loss': 5.5118632316589355}


EP_train:0:  60%|| 4181/6926 [5:46:34<9:08:55, 12.00s/it]

{'epoch': 0, 'iter': 4180, 'avg_loss': 6.126984535920736, 'avg_acc': 50.18909949772782, 'loss': 5.261847019195557}


EP_train:0:  61%|| 4191/6926 [5:48:38<9:33:14, 12.58s/it]

{'epoch': 0, 'iter': 4190, 'avg_loss': 6.126050737358681, 'avg_acc': 50.17224409448819, 'loss': 5.5466742515563965}


EP_train:0:  61%|| 4200/6926 [5:50:39<9:17:25, 12.27s/it]

{'epoch': 0, 'iter': 4200, 'avg_loss': 6.1248277497558306, 'avg_acc': 50.16067602951678, 'loss': 5.338186740875244}


EP_train:0:  61%|| 4201/6926 [5:50:47<10:53:24, 14.39s/it]

Saved best retrieval model


EP_train:0:  61%|| 4211/6926 [5:52:52<9:42:34, 12.87s/it]

{'epoch': 0, 'iter': 4210, 'avg_loss': 6.12379006619296, 'avg_acc': 50.157326050819286, 'loss': 5.744561195373535}


EP_train:0:  61%|| 4221/6926 [5:54:45<8:45:21, 11.65s/it]

{'epoch': 0, 'iter': 4220, 'avg_loss': 6.122892028656765, 'avg_acc': 50.15769367448472, 'loss': 6.021520614624023}


EP_train:0:  61%|| 4231/6926 [5:56:41<8:32:40, 11.41s/it]

{'epoch': 0, 'iter': 4230, 'avg_loss': 6.12228341362939, 'avg_acc': 50.16470692507682, 'loss': 5.923513889312744}


EP_train:0:  61%|| 4241/6926 [5:58:35<9:09:26, 12.28s/it]

{'epoch': 0, 'iter': 4240, 'avg_loss': 6.121555252025724, 'avg_acc': 50.159160575336, 'loss': 6.186089038848877}


EP_train:0:  61%|| 4250/6926 [6:00:29<8:29:48, 11.43s/it]

{'epoch': 0, 'iter': 4250, 'avg_loss': 6.120355980055833, 'avg_acc': 50.16613737944014, 'loss': 5.93173885345459}


EP_train:0:  61%|| 4251/6926 [6:00:37<10:13:42, 13.77s/it]

Saved best retrieval model


EP_train:0:  62%|| 4261/6926 [6:02:29<8:15:39, 11.16s/it]

{'epoch': 0, 'iter': 4260, 'avg_loss': 6.119372777327829, 'avg_acc': 50.170881248533206, 'loss': 5.7677388191223145}


EP_train:0:  62%|| 4271/6926 [6:04:20<7:59:56, 10.85s/it]

{'epoch': 0, 'iter': 4270, 'avg_loss': 6.118526783739759, 'avg_acc': 50.174871224537576, 'loss': 5.4158830642700195}


EP_train:0:  62%|| 4281/6926 [6:06:03<7:34:24, 10.31s/it]

{'epoch': 0, 'iter': 4280, 'avg_loss': 6.117547668438319, 'avg_acc': 50.16935295491708, 'loss': 5.718791484832764}


EP_train:0:  62%|| 4291/6926 [6:07:42<7:11:53,  9.83s/it]

{'epoch': 0, 'iter': 4290, 'avg_loss': 6.116721455408015, 'avg_acc': 50.16895828478211, 'loss': 5.444436073303223}


EP_train:0:  62%|| 4300/6926 [6:09:22<7:30:09, 10.29s/it]

{'epoch': 0, 'iter': 4300, 'avg_loss': 6.115876526312617, 'avg_acc': 50.16783887468031, 'loss': 5.972867012023926}


EP_train:0:  62%|| 4301/6926 [6:09:30<9:14:52, 12.68s/it]

Saved best retrieval model


EP_train:0:  62%|| 4311/6926 [6:11:09<7:03:07,  9.71s/it]

{'epoch': 0, 'iter': 4310, 'avg_loss': 6.114846105007029, 'avg_acc': 50.16889932730225, 'loss': 5.619204521179199}


EP_train:0:  62%|| 4321/6926 [6:12:45<6:57:56,  9.63s/it]

{'epoch': 0, 'iter': 4320, 'avg_loss': 6.1140242462714385, 'avg_acc': 50.15259777829206, 'loss': 5.115797996520996}


EP_train:0:  63%|| 4331/6926 [6:14:20<6:34:49,  9.13s/it]

{'epoch': 0, 'iter': 4330, 'avg_loss': 6.113120372343272, 'avg_acc': 50.15873932117294, 'loss': 5.13975715637207}


EP_train:0:  63%|| 4341/6926 [6:15:52<6:49:46,  9.51s/it]

{'epoch': 0, 'iter': 4340, 'avg_loss': 6.112229410060346, 'avg_acc': 50.15333448514168, 'loss': 5.80522346496582}


EP_train:0:  63%|| 4350/6926 [6:17:22<6:19:53,  8.85s/it]

{'epoch': 0, 'iter': 4350, 'avg_loss': 6.111311883275412, 'avg_acc': 50.15585497586762, 'loss': 5.818477630615234}


EP_train:0:  63%|| 4351/6926 [6:17:30<8:05:40, 11.32s/it]

Saved best retrieval model


EP_train:0:  63%|| 4361/6926 [6:19:02<6:33:46,  9.21s/it]

{'epoch': 0, 'iter': 4360, 'avg_loss': 6.1104165277085265, 'avg_acc': 50.154781013529, 'loss': 6.093033790588379}


EP_train:0:  63%|| 4371/6926 [6:20:30<6:12:59,  8.76s/it]

{'epoch': 0, 'iter': 4370, 'avg_loss': 6.10951326084857, 'avg_acc': 50.15371196522535, 'loss': 5.853055477142334}


EP_train:0:  63%|| 4381/6926 [6:21:54<5:59:50,  8.48s/it]

{'epoch': 0, 'iter': 4380, 'avg_loss': 6.108464426821265, 'avg_acc': 50.153361104770596, 'loss': 5.882725715637207}


EP_train:0:  63%|| 4391/6926 [6:23:19<6:04:20,  8.62s/it]

{'epoch': 0, 'iter': 4390, 'avg_loss': 6.107750626474761, 'avg_acc': 50.15230015941698, 'loss': 6.145420074462891}


EP_train:0:  64%|| 4400/6926 [6:24:44<6:08:31,  8.75s/it]

{'epoch': 0, 'iter': 4400, 'avg_loss': 6.106914226317021, 'avg_acc': 50.15053396955238, 'loss': 5.986966133117676}


EP_train:0:  64%|| 4401/6926 [6:24:50<7:10:18, 10.23s/it]

Saved best retrieval model


EP_train:0:  64%|| 4411/6926 [6:26:17<6:09:34,  8.82s/it]

{'epoch': 0, 'iter': 4410, 'avg_loss': 6.1059717883384845, 'avg_acc': 50.15090115620041, 'loss': 5.444355010986328}


EP_train:0:  64%|| 4421/6926 [6:27:42<5:50:47,  8.40s/it]

{'epoch': 0, 'iter': 4420, 'avg_loss': 6.105151483738107, 'avg_acc': 50.154800950011314, 'loss': 5.570769786834717}


EP_train:0:  64%|| 4431/6926 [6:29:11<6:06:03,  8.80s/it]

{'epoch': 0, 'iter': 4430, 'avg_loss': 6.104602432767523, 'avg_acc': 50.15374633265629, 'loss': 6.523870944976807}


EP_train:0:  64%|| 4441/6926 [6:30:38<6:01:17,  8.72s/it]

{'epoch': 0, 'iter': 4440, 'avg_loss': 6.103922325221034, 'avg_acc': 50.14143773924792, 'loss': 5.280694007873535}


EP_train:0:  64%|| 4450/6926 [6:32:05<5:52:11,  8.53s/it]

{'epoch': 0, 'iter': 4450, 'avg_loss': 6.10316531177693, 'avg_acc': 50.13690743653112, 'loss': 5.900311470031738}


EP_train:0:  64%|| 4451/6926 [6:32:12<7:34:57, 11.03s/it]

Saved best retrieval model


EP_train:0:  64%|| 4461/6926 [6:33:50<6:51:57, 10.03s/it]

{'epoch': 0, 'iter': 4460, 'avg_loss': 6.102342507038061, 'avg_acc': 50.133097960098624, 'loss': 5.7909955978393555}


EP_train:0:  65%|| 4471/6926 [6:35:24<6:27:44,  9.48s/it]

{'epoch': 0, 'iter': 4470, 'avg_loss': 6.101363852903416, 'avg_acc': 50.130004473272194, 'loss': 5.406078815460205}


EP_train:0:  65%|| 4481/6926 [6:36:59<6:32:52,  9.64s/it]

{'epoch': 0, 'iter': 4480, 'avg_loss': 6.100507005467209, 'avg_acc': 50.127622182548535, 'loss': 5.602375507354736}


EP_train:0:  65%|| 4491/6926 [6:38:35<6:35:12,  9.74s/it]

{'epoch': 0, 'iter': 4490, 'avg_loss': 6.100046441433451, 'avg_acc': 50.12316299265197, 'loss': 5.879668712615967}


EP_train:0:  65%|| 4500/6926 [6:40:22<7:19:33, 10.87s/it]

{'epoch': 0, 'iter': 4500, 'avg_loss': 6.099236712935659, 'avg_acc': 50.11941790713175, 'loss': 5.585508823394775}


EP_train:0:  65%|| 4501/6926 [6:40:30<8:51:24, 13.15s/it]

Saved best retrieval model


EP_train:0:  65%|| 4511/6926 [6:42:19<7:22:46, 11.00s/it]

{'epoch': 0, 'iter': 4510, 'avg_loss': 6.098348060063316, 'avg_acc': 50.12400243848371, 'loss': 6.007582664489746}


EP_train:0:  65%|| 4521/6926 [6:44:16<7:49:30, 11.71s/it]

{'epoch': 0, 'iter': 4520, 'avg_loss': 6.097503967555164, 'avg_acc': 50.1306403450564, 'loss': 5.710354804992676}


EP_train:0:  65%|| 4531/6926 [6:46:10<7:33:49, 11.37s/it]

{'epoch': 0, 'iter': 4530, 'avg_loss': 6.096670978355029, 'avg_acc': 50.13931803133966, 'loss': 5.605131149291992}


EP_train:0:  66%|| 4541/6926 [6:48:07<7:53:24, 11.91s/it]

{'epoch': 0, 'iter': 4540, 'avg_loss': 6.095803279494484, 'avg_acc': 50.133505835719006, 'loss': 5.74021577835083}


EP_train:0:  66%|| 4550/6926 [6:50:02<7:38:09, 11.57s/it]

{'epoch': 0, 'iter': 4550, 'avg_loss': 6.094985039777427, 'avg_acc': 50.144885739397935, 'loss': 5.818680763244629}


EP_train:0:  66%|| 4551/6926 [6:50:10<8:55:57, 13.54s/it]

Saved best retrieval model


EP_train:0:  66%|| 4561/6926 [6:52:11<7:57:03, 12.10s/it]

{'epoch': 0, 'iter': 4560, 'avg_loss': 6.0942057645313055, 'avg_acc': 50.15141964481473, 'loss': 5.664440631866455}


EP_train:0:  66%|| 4571/6926 [6:54:21<8:35:10, 13.13s/it]

{'epoch': 0, 'iter': 4570, 'avg_loss': 6.093605378346432, 'avg_acc': 50.160659593086855, 'loss': 5.227679252624512}


EP_train:0:  66%|| 4581/6926 [6:56:26<7:53:42, 12.12s/it]

{'epoch': 0, 'iter': 4580, 'avg_loss': 6.0928081738535065, 'avg_acc': 50.170541366513866, 'loss': 5.489528179168701}


EP_train:0:  66%|| 4591/6926 [6:58:41<8:48:13, 13.57s/it]

{'epoch': 0, 'iter': 4590, 'avg_loss': 6.091611782519341, 'avg_acc': 50.168808538444786, 'loss': 5.751520156860352}


EP_train:0:  66%|| 4600/6926 [7:01:03<9:18:28, 14.41s/it]

{'epoch': 0, 'iter': 4600, 'avg_loss': 6.090592887603156, 'avg_acc': 50.167762442947186, 'loss': 5.863829612731934}


EP_train:0:  66%|| 4601/6926 [7:01:11<10:59:29, 17.02s/it]

Saved best retrieval model


EP_train:0:  67%|| 4611/6926 [7:03:34<9:28:35, 14.74s/it]

{'epoch': 0, 'iter': 4610, 'avg_loss': 6.0900971169224585, 'avg_acc': 50.176886792452834, 'loss': 5.996328353881836}


EP_train:0:  67%|| 4621/6926 [7:05:55<9:17:53, 14.52s/it]

{'epoch': 0, 'iter': 4620, 'avg_loss': 6.089401083213379, 'avg_acc': 50.176504003462455, 'loss': 5.809654235839844}


EP_train:0:  67%|| 4631/6926 [7:08:18<9:17:12, 14.57s/it]

{'epoch': 0, 'iter': 4630, 'avg_loss': 6.088588004646639, 'avg_acc': 50.168025264521695, 'loss': 5.853965759277344}


EP_train:0:  67%|| 4641/6926 [7:10:47<9:52:39, 15.56s/it]

{'epoch': 0, 'iter': 4640, 'avg_loss': 6.0879058284097844, 'avg_acc': 50.16900991165697, 'loss': 5.6214680671691895}


EP_train:0:  67%|| 4650/6926 [7:13:08<9:11:46, 14.55s/it]

{'epoch': 0, 'iter': 4650, 'avg_loss': 6.086819623419352, 'avg_acc': 50.162599440980436, 'loss': 5.838261604309082}


EP_train:0:  67%|| 4651/6926 [7:13:16<10:47:48, 17.09s/it]

Saved best retrieval model


EP_train:0:  67%|| 4661/6926 [7:15:50<9:25:07, 14.97s/it]

{'epoch': 0, 'iter': 4660, 'avg_loss': 6.085976744383554, 'avg_acc': 50.167614245869984, 'loss': 5.614851474761963}


EP_train:0:  67%|| 4671/6926 [7:18:26<10:09:22, 16.21s/it]

{'epoch': 0, 'iter': 4670, 'avg_loss': 6.0853085114649135, 'avg_acc': 50.153874973239134, 'loss': 5.76530122756958}


EP_train:0:  68%|| 4681/6926 [7:21:04<9:57:46, 15.98s/it]

{'epoch': 0, 'iter': 4680, 'avg_loss': 6.084654147440082, 'avg_acc': 50.16356013672292, 'loss': 5.479865550994873}


EP_train:0:  68%|| 4691/6926 [7:23:48<10:23:19, 16.73s/it]

{'epoch': 0, 'iter': 4690, 'avg_loss': 6.084136604170026, 'avg_acc': 50.16387763803027, 'loss': 6.211831569671631}


EP_train:0:  68%|| 4700/6926 [7:26:32<10:03:53, 16.28s/it]

{'epoch': 0, 'iter': 4700, 'avg_loss': 6.083296862340333, 'avg_acc': 50.167517549457564, 'loss': 5.726365089416504}


EP_train:0:  68%|| 4701/6926 [7:26:38<11:14:52, 18.20s/it]

Saved best retrieval model


EP_train:0:  68%|| 4711/6926 [7:29:22<10:23:58, 16.90s/it]

{'epoch': 0, 'iter': 4710, 'avg_loss': 6.082825359481329, 'avg_acc': 50.15323179791976, 'loss': 5.978856563568115}


EP_train:0:  68%|| 4721/6926 [7:32:20<10:55:46, 17.84s/it]

{'epoch': 0, 'iter': 4720, 'avg_loss': 6.081661793822531, 'avg_acc': 50.16283626350349, 'loss': 5.269367694854736}


EP_train:0:  68%|| 4731/6926 [7:35:20<11:09:49, 18.31s/it]

{'epoch': 0, 'iter': 4730, 'avg_loss': 6.080770514828285, 'avg_acc': 50.16117099978863, 'loss': 5.721548557281494}


EP_train:0:  68%|| 4741/6926 [7:38:38<12:00:13, 19.78s/it]

{'epoch': 0, 'iter': 4740, 'avg_loss': 6.079878965879487, 'avg_acc': 50.16017190466147, 'loss': 5.694086074829102}


EP_train:0:  69%|| 4750/6926 [7:42:01<12:14:05, 20.24s/it]

{'epoch': 0, 'iter': 4750, 'avg_loss': 6.0791489580660665, 'avg_acc': 50.16049252788887, 'loss': 5.877301216125488}


EP_train:0:  69%|| 4751/6926 [7:42:08<13:45:32, 22.77s/it]

Saved best retrieval model


EP_train:0:  69%|| 4761/6926 [7:45:45<13:12:52, 21.97s/it]

{'epoch': 0, 'iter': 4760, 'avg_loss': 6.0783615251519905, 'avg_acc': 50.15359168241966, 'loss': 5.704066276550293}


EP_train:0:  69%|| 4771/6926 [7:49:36<13:41:49, 22.88s/it]

{'epoch': 0, 'iter': 4770, 'avg_loss': 6.077862701983703, 'avg_acc': 50.152614755816394, 'loss': 5.9593329429626465}


EP_train:0:  69%|| 4781/6926 [7:53:41<14:59:32, 25.16s/it]

{'epoch': 0, 'iter': 4780, 'avg_loss': 6.077238438843035, 'avg_acc': 50.13791570801087, 'loss': 5.700730323791504}


EP_train:0:  69%|| 4791/6926 [7:57:54<15:27:31, 26.07s/it]

{'epoch': 0, 'iter': 4790, 'avg_loss': 6.0762901842606585, 'avg_acc': 50.13828010853684, 'loss': 5.663917541503906}


EP_train:0:  69%|| 4800/6926 [8:02:22<16:06:43, 27.28s/it]

{'epoch': 0, 'iter': 4800, 'avg_loss': 6.075592184632899, 'avg_acc': 50.13343574255364, 'loss': 5.410046100616455}


EP_train:0:  69%|| 4801/6926 [8:02:29<17:08:44, 29.05s/it]

Saved best retrieval model


EP_train:0:  69%|| 4811/6926 [8:07:07<16:03:25, 27.33s/it]

{'epoch': 0, 'iter': 4810, 'avg_loss': 6.075107769420008, 'avg_acc': 50.13250883392226, 'loss': 6.142484664916992}


EP_train:0:  70%|| 4821/6926 [8:11:58<17:15:20, 29.51s/it]

{'epoch': 0, 'iter': 4820, 'avg_loss': 6.074476512975402, 'avg_acc': 50.131585770587016, 'loss': 5.418532371520996}


EP_train:0:  70%|| 4831/6926 [8:16:54<17:16:53, 29.70s/it]

{'epoch': 0, 'iter': 4830, 'avg_loss': 6.0737788898805185, 'avg_acc': 50.1351945766922, 'loss': 5.607942581176758}


EP_train:0:  70%|| 4841/6926 [8:21:52<17:20:36, 29.95s/it]

{'epoch': 0, 'iter': 4840, 'avg_loss': 6.072968078629829, 'avg_acc': 50.13556083453832, 'loss': 5.66105842590332}


EP_train:0:  70%|| 4850/6926 [8:26:49<16:57:57, 29.42s/it]

{'epoch': 0, 'iter': 4850, 'avg_loss': 6.072080916509115, 'avg_acc': 50.1410791589363, 'loss': 5.873908519744873}


EP_train:0:  70%|| 4851/6926 [8:26:55<18:11:18, 31.56s/it]

Saved best retrieval model


EP_train:0:  70%|| 4861/6926 [8:31:58<17:18:37, 30.18s/it]

{'epoch': 0, 'iter': 4860, 'avg_loss': 6.071088284235682, 'avg_acc': 50.14786052252623, 'loss': 5.9908833503723145}


EP_train:0:  70%|| 4871/6926 [8:36:54<16:48:09, 29.44s/it]

{'epoch': 0, 'iter': 4870, 'avg_loss': 6.070672290326633, 'avg_acc': 50.152689386163004, 'loss': 6.275834083557129}


EP_train:0:  70%|| 4881/6926 [8:41:42<16:12:12, 28.52s/it]

{'epoch': 0, 'iter': 4880, 'avg_loss': 6.069755879417596, 'avg_acc': 50.14469371030527, 'loss': 5.536945819854736}


EP_train:0:  71%|| 4891/6926 [8:46:33<16:25:09, 29.05s/it]

{'epoch': 0, 'iter': 4890, 'avg_loss': 6.069140871520228, 'avg_acc': 50.14503680228992, 'loss': 5.645779609680176}


EP_train:0:  71%|| 4900/6926 [8:51:16<15:52:13, 28.20s/it]

{'epoch': 0, 'iter': 4900, 'avg_loss': 6.068308880854032, 'avg_acc': 50.14984186900633, 'loss': 6.022558689117432}


EP_train:0:  71%|| 4901/6926 [8:51:18<16:19:31, 29.02s/it]

Saved best retrieval model


EP_train:0:  71%|| 4911/6926 [8:56:00<15:41:11, 28.03s/it]

{'epoch': 0, 'iter': 4910, 'avg_loss': 6.067225301758477, 'avg_acc': 50.14635512115658, 'loss': 5.373054504394531}


EP_train:0:  71%|| 4921/6926 [9:00:37<15:17:47, 27.47s/it]

{'epoch': 0, 'iter': 4920, 'avg_loss': 6.066385637379061, 'avg_acc': 50.139072343019706, 'loss': 5.6651129722595215}


EP_train:0:  71%|| 4931/6926 [9:05:14<15:20:58, 27.70s/it]

{'epoch': 0, 'iter': 4930, 'avg_loss': 6.0658727639878505, 'avg_acc': 50.13372034070168, 'loss': 5.45974063873291}


EP_train:0:  71%|| 4941/6926 [9:09:47<15:01:44, 27.26s/it]

{'epoch': 0, 'iter': 4940, 'avg_loss': 6.0649469245713, 'avg_acc': 50.13534709572961, 'loss': 6.001916408538818}


EP_train:0:  71%|| 4950/6926 [9:14:22<15:06:27, 27.52s/it]

{'epoch': 0, 'iter': 4950, 'avg_loss': 6.064186339184771, 'avg_acc': 50.13570490809938, 'loss': 5.4578328132629395}


EP_train:0:  71%|| 4951/6926 [9:14:28<16:04:11, 29.29s/it]

Saved best retrieval model


EP_train:0:  72%|| 4961/6926 [9:19:02<14:50:54, 27.20s/it]

{'epoch': 0, 'iter': 4960, 'avg_loss': 6.063863762923004, 'avg_acc': 50.1278724047571, 'loss': 5.897871017456055}


EP_train:0:  72%|| 4971/6926 [9:23:32<14:41:21, 27.05s/it]

{'epoch': 0, 'iter': 4970, 'avg_loss': 6.063304238097813, 'avg_acc': 50.13075839871253, 'loss': 5.495190143585205}


EP_train:0:  72%|| 4981/6926 [9:27:58<14:20:54, 26.56s/it]

{'epoch': 0, 'iter': 4980, 'avg_loss': 6.062623691712008, 'avg_acc': 50.12986850030114, 'loss': 5.8766655921936035}


EP_train:0:  72%|| 4991/6926 [9:32:22<14:06:27, 26.25s/it]

{'epoch': 0, 'iter': 4990, 'avg_loss': 6.0619144613578975, 'avg_acc': 50.13524343818874, 'loss': 5.577427864074707}


EP_train:0:  72%|| 5000/6926 [9:36:39<13:39:26, 25.53s/it]

{'epoch': 0, 'iter': 5000, 'avg_loss': 6.0613747436364775, 'avg_acc': 50.13997200559888, 'loss': 6.709413051605225}


EP_train:0:  72%|| 5001/6926 [9:36:45<14:35:43, 27.30s/it]

Saved best retrieval model


EP_train:0:  72%|| 5011/6926 [9:41:03<13:31:29, 25.43s/it]

{'epoch': 0, 'iter': 5010, 'avg_loss': 6.060476881185945, 'avg_acc': 50.13969267611256, 'loss': 5.113682746887207}


EP_train:0:  72%|| 5021/6926 [9:45:17<13:25:27, 25.37s/it]

{'epoch': 0, 'iter': 5020, 'avg_loss': 6.059930577930571, 'avg_acc': 50.140036845249945, 'loss': 5.558013916015625}


EP_train:0:  73%|| 5031/6926 [9:49:29<13:14:22, 25.15s/it]

{'epoch': 0, 'iter': 5030, 'avg_loss': 6.059482629150834, 'avg_acc': 50.146591134963224, 'loss': 5.336319446563721}


EP_train:0:  73%|| 5041/6926 [9:53:42<13:12:22, 25.22s/it]

{'epoch': 0, 'iter': 5040, 'avg_loss': 6.058899913603199, 'avg_acc': 50.15063975401706, 'loss': 5.635272979736328}


EP_train:0:  73%|| 5050/6926 [9:57:51<12:58:15, 24.89s/it]

{'epoch': 0, 'iter': 5050, 'avg_loss': 6.058099334062007, 'avg_acc': 50.14848544842605, 'loss': 5.487118244171143}


EP_train:0:  73%|| 5051/6926 [9:57:57<13:51:21, 26.60s/it]

Saved best retrieval model


EP_train:0:  73%|| 5061/6926 [10:02:08<12:56:55, 24.99s/it]

{'epoch': 0, 'iter': 5060, 'avg_loss': 6.057291962322664, 'avg_acc': 50.14819205690575, 'loss': 5.681541919708252}


EP_train:0:  73%|| 5071/6926 [10:06:18<12:54:34, 25.05s/it]

{'epoch': 0, 'iter': 5070, 'avg_loss': 6.056676941636375, 'avg_acc': 50.147899822520216, 'loss': 5.868673324584961}


EP_train:0:  73%|| 5081/6926 [10:10:27<12:46:32, 24.93s/it]

{'epoch': 0, 'iter': 5080, 'avg_loss': 6.055991246178966, 'avg_acc': 50.141458374335755, 'loss': 5.7510085105896}


EP_train:0:  74%|| 5091/6926 [10:14:33<12:30:34, 24.54s/it]

{'epoch': 0, 'iter': 5090, 'avg_loss': 6.055353306103633, 'avg_acc': 50.14793262620311, 'loss': 5.735667705535889}


EP_train:0:  74%|| 5100/6926 [10:18:39<12:30:57, 24.68s/it]

{'epoch': 0, 'iter': 5100, 'avg_loss': 6.054754476691199, 'avg_acc': 50.1464173691433, 'loss': 6.059706687927246}


EP_train:0:  74%|| 5101/6926 [10:18:41<12:47:53, 25.25s/it]

Saved best retrieval model


EP_train:0:  74%|| 5111/6926 [10:22:47<12:23:01, 24.56s/it]

{'epoch': 0, 'iter': 5110, 'avg_loss': 6.054081965509727, 'avg_acc': 50.13818235179026, 'loss': 5.622142791748047}


EP_train:0:  74%|| 5121/6926 [10:26:51<12:14:07, 24.40s/it]

{'epoch': 0, 'iter': 5120, 'avg_loss': 6.053310362721439, 'avg_acc': 50.12997949619214, 'loss': 5.802914142608643}


EP_train:0:  74%|| 5131/6926 [10:30:54<12:08:31, 24.35s/it]

{'epoch': 0, 'iter': 5130, 'avg_loss': 6.052597096825501, 'avg_acc': 50.13947086337945, 'loss': 5.642709255218506}


EP_train:0:  74%|| 5141/6926 [10:34:57<12:03:07, 24.31s/it]

{'epoch': 0, 'iter': 5140, 'avg_loss': 6.051937610252987, 'avg_acc': 50.13798385528108, 'loss': 5.878718852996826}


EP_train:0:  74%|| 5150/6926 [10:38:59<11:55:19, 24.17s/it]

{'epoch': 0, 'iter': 5150, 'avg_loss': 6.051362953008297, 'avg_acc': 50.14499611725879, 'loss': 5.745620250701904}


EP_train:0:  74%|| 5151/6926 [10:39:01<12:11:10, 24.72s/it]

Saved best retrieval model


EP_train:0:  75%|| 5161/6926 [10:43:05<11:59:22, 24.45s/it]

{'epoch': 0, 'iter': 5160, 'avg_loss': 6.050913507128565, 'avg_acc': 50.14410966866887, 'loss': 6.369186878204346}


EP_train:0:  75%|| 5171/6926 [10:47:09<11:51:36, 24.33s/it]

{'epoch': 0, 'iter': 5170, 'avg_loss': 6.050408626428035, 'avg_acc': 50.134161670856706, 'loss': 5.774247169494629}


EP_train:0:  75%|| 5181/6926 [10:51:14<11:53:30, 24.53s/it]

{'epoch': 0, 'iter': 5180, 'avg_loss': 6.049812072631683, 'avg_acc': 50.13088689442192, 'loss': 5.643261909484863}


EP_train:0:  75%|| 5191/6926 [10:55:19<11:49:17, 24.53s/it]

{'epoch': 0, 'iter': 5190, 'avg_loss': 6.0490964235678, 'avg_acc': 50.13725679059912, 'loss': 6.085369110107422}


EP_train:0:  75%|| 5200/6926 [10:59:24<11:44:04, 24.48s/it]

{'epoch': 0, 'iter': 5200, 'avg_loss': 6.048256601239369, 'avg_acc': 50.13038358008075, 'loss': 6.1174235343933105}


EP_train:0:  75%|| 5201/6926 [10:59:26<12:01:35, 25.10s/it]

Saved best retrieval model


EP_train:0:  75%|| 5211/6926 [11:03:31<11:39:12, 24.46s/it]

{'epoch': 0, 'iter': 5210, 'avg_loss': 6.0476842898367185, 'avg_acc': 50.12893398579927, 'loss': 5.562658309936523}


EP_train:0:  75%|| 5221/6926 [11:07:35<11:35:09, 24.46s/it]

{'epoch': 0, 'iter': 5220, 'avg_loss': 6.046937660201942, 'avg_acc': 50.131679754836235, 'loss': 6.137288570404053}


EP_train:0:  76%|| 5231/6926 [11:11:40<11:33:34, 24.55s/it]

{'epoch': 0, 'iter': 5230, 'avg_loss': 6.046331661058826, 'avg_acc': 50.126051424201876, 'loss': 5.916568756103516}


EP_train:0:  76%|| 5241/6926 [11:15:44<11:28:03, 24.50s/it]

{'epoch': 0, 'iter': 5240, 'avg_loss': 6.045569059480763, 'avg_acc': 50.131177256248804, 'loss': 5.842819690704346}


EP_train:0:  76%|| 5250/6926 [11:19:48<11:22:05, 24.42s/it]

{'epoch': 0, 'iter': 5250, 'avg_loss': 6.045130255834826, 'avg_acc': 50.13152256713007, 'loss': 5.725595474243164}


EP_train:0:  76%|| 5251/6926 [11:19:56<12:27:09, 26.76s/it]

Saved best retrieval model


EP_train:0:  76%|| 5261/6926 [11:23:59<11:10:46, 24.17s/it]

{'epoch': 0, 'iter': 5260, 'avg_loss': 6.044951333493883, 'avg_acc': 50.13067857821707, 'loss': 6.138767242431641}


EP_train:0:  76%|| 5271/6926 [11:28:01<11:10:12, 24.30s/it]

{'epoch': 0, 'iter': 5270, 'avg_loss': 6.0443804556720515, 'avg_acc': 50.13220925820527, 'loss': 5.707474708557129}


EP_train:0:  76%|| 5281/6926 [11:32:05<11:08:57, 24.40s/it]

{'epoch': 0, 'iter': 5280, 'avg_loss': 6.043933011437654, 'avg_acc': 50.12722495739443, 'loss': 6.0576629638671875}


EP_train:0:  76%|| 5291/6926 [11:36:06<10:53:35, 23.98s/it]

{'epoch': 0, 'iter': 5290, 'avg_loss': 6.043243952902385, 'avg_acc': 50.12639387639387, 'loss': 5.902557849884033}


EP_train:0:  77%|| 5300/6926 [11:40:06<10:50:31, 24.00s/it]

{'epoch': 0, 'iter': 5300, 'avg_loss': 6.0426037879602745, 'avg_acc': 50.11849179400113, 'loss': 5.871405124664307}


EP_train:0:  77%|| 5301/6926 [11:40:14<11:55:14, 26.41s/it]

Saved best retrieval model


EP_train:0:  77%|| 5311/6926 [11:44:18<10:55:23, 24.35s/it]

{'epoch': 0, 'iter': 5310, 'avg_loss': 6.041958540088824, 'avg_acc': 50.11709188476746, 'loss': 6.085629463195801}


EP_train:0:  77%|| 5321/6926 [11:48:17<10:40:14, 23.93s/it]

{'epoch': 0, 'iter': 5320, 'avg_loss': 6.041396775341553, 'avg_acc': 50.11922101108814, 'loss': 5.808494567871094}


EP_train:0:  77%|| 5331/6926 [11:52:19<10:47:31, 24.36s/it]

{'epoch': 0, 'iter': 5330, 'avg_loss': 6.040767768784013, 'avg_acc': 50.12427311948978, 'loss': 5.713390350341797}


EP_train:0:  77%|| 5341/6926 [11:56:22<10:38:30, 24.17s/it]

{'epoch': 0, 'iter': 5340, 'avg_loss': 6.039986043136188, 'avg_acc': 50.12813611683205, 'loss': 5.262433052062988}


EP_train:0:  77%|| 5350/6926 [12:00:21<10:29:51, 23.98s/it]

{'epoch': 0, 'iter': 5350, 'avg_loss': 6.039247726863889, 'avg_acc': 50.127312651840775, 'loss': 5.265787601470947}


EP_train:0:  77%|| 5351/6926 [12:00:29<11:28:25, 26.23s/it]

Saved best retrieval model


EP_train:0:  77%|| 5361/6926 [12:04:31<10:27:39, 24.06s/it]

{'epoch': 0, 'iter': 5360, 'avg_loss': 6.038825732606313, 'avg_acc': 50.139316358888266, 'loss': 6.17657470703125}


EP_train:0:  78%|| 5371/6926 [12:08:32<10:27:26, 24.21s/it]

{'epoch': 0, 'iter': 5370, 'avg_loss': 6.038335440878406, 'avg_acc': 50.13672965928132, 'loss': 5.88759708404541}


EP_train:0:  78%|| 5381/6926 [12:12:34<10:24:16, 24.24s/it]

{'epoch': 0, 'iter': 5380, 'avg_loss': 6.0381056981954275, 'avg_acc': 50.144025274112614, 'loss': 5.717916965484619}


EP_train:0:  78%|| 5391/6926 [12:16:33<10:11:40, 23.91s/it]

{'epoch': 0, 'iter': 5390, 'avg_loss': 6.037478664991166, 'avg_acc': 50.1472361342979, 'loss': 6.080750465393066}


EP_train:0:  78%|| 5400/6926 [12:20:33<10:12:34, 24.09s/it]

{'epoch': 0, 'iter': 5400, 'avg_loss': 6.0368396863741385, 'avg_acc': 50.141756156267355, 'loss': 5.5526814460754395}


EP_train:0:  78%|| 5401/6926 [12:20:41<11:09:31, 26.34s/it]

Saved best retrieval model


EP_train:0:  78%|| 5411/6926 [12:24:42<10:03:41, 23.91s/it]

{'epoch': 0, 'iter': 5410, 'avg_loss': 6.036109628837753, 'avg_acc': 50.143804287562375, 'loss': 5.886404991149902}




{'epoch': 0, 'iter': 5420, 'avg_loss': 6.035103006174678, 'avg_acc': 50.148727172108465, 'loss': 5.606515407562256}

EP_train:0:  78%|| 5421/6926 [12:28:41<9:59:31, 23.90s/it]




EP_train:0:  78%|| 5431/6926 [12:32:41<9:59:27, 24.06s/it] 

{'epoch': 0, 'iter': 5430, 'avg_loss': 6.034381815268176, 'avg_acc': 50.1403977168109, 'loss': 5.84054708480835}


EP_train:0:  79%|| 5441/6926 [12:36:39<9:48:41, 23.79s/it]

{'epoch': 0, 'iter': 5440, 'avg_loss': 6.033504968091772, 'avg_acc': 50.13554493659254, 'loss': 5.701897144317627}


EP_train:0:  79%|| 5450/6926 [12:40:39<9:52:37, 24.09s/it]

{'epoch': 0, 'iter': 5450, 'avg_loss': 6.032939919986281, 'avg_acc': 50.13701614382682, 'loss': 5.669153213500977}


EP_train:0:  79%|| 5451/6926 [12:40:41<10:10:25, 24.83s/it]

Saved best retrieval model


EP_train:0:  79%|| 5461/6926 [12:44:43<9:46:56, 24.04s/it]

{'epoch': 0, 'iter': 5460, 'avg_loss': 6.032493443306582, 'avg_acc': 50.13504852591101, 'loss': 5.296822547912598}


EP_train:0:  79%|| 5471/6926 [12:48:41<9:38:18, 23.85s/it]

{'epoch': 0, 'iter': 5470, 'avg_loss': 6.031979992105982, 'avg_acc': 50.13194571376348, 'loss': 5.764551639556885}


EP_train:0:  79%|| 5481/6926 [12:52:41<9:33:12, 23.80s/it]

{'epoch': 0, 'iter': 5480, 'avg_loss': 6.031208689598636, 'avg_acc': 50.1311348294107, 'loss': 5.136031627655029}


EP_train:0:  79%|| 5491/6926 [12:56:39<9:30:57, 23.87s/it]

{'epoch': 0, 'iter': 5490, 'avg_loss': 6.030398459669845, 'avg_acc': 50.12179020214897, 'loss': 5.800483226776123}


EP_train:0:  79%|| 5500/6926 [13:00:38<9:26:01, 23.82s/it]

{'epoch': 0, 'iter': 5500, 'avg_loss': 6.0296348321440005, 'avg_acc': 50.11872841301581, 'loss': 5.531843662261963}


EP_train:0:  79%|| 5501/6926 [13:00:45<10:22:23, 26.21s/it]

Saved best retrieval model


EP_train:0:  80%|| 5511/6926 [13:04:50<9:34:12, 24.35s/it]

{'epoch': 0, 'iter': 5510, 'avg_loss': 6.029105065241415, 'avg_acc': 50.11511068771548, 'loss': 5.57829475402832}


EP_train:0:  80%|| 5521/6926 [13:08:49<9:16:10, 23.75s/it]

{'epoch': 0, 'iter': 5520, 'avg_loss': 6.028489948204301, 'avg_acc': 50.120562398116284, 'loss': 5.64046573638916}


EP_train:0:  80%|| 5531/6926 [13:12:48<9:19:10, 24.05s/it]

{'epoch': 0, 'iter': 5530, 'avg_loss': 6.0282094960961885, 'avg_acc': 50.11469444946665, 'loss': 5.735381126403809}


EP_train:0:  80%|| 5541/6926 [13:16:46<9:09:08, 23.79s/it]

{'epoch': 0, 'iter': 5540, 'avg_loss': 6.027746896927934, 'avg_acc': 50.116179390001804, 'loss': 5.896994590759277}


EP_train:0:  80%|| 5550/6926 [13:20:44<9:05:18, 23.78s/it]

{'epoch': 0, 'iter': 5550, 'avg_loss': 6.027381607915363, 'avg_acc': 50.11540713384976, 'loss': 5.859691143035889}


EP_train:0:  80%|| 5551/6926 [13:20:52<9:58:03, 26.10s/it]

Saved best retrieval model


EP_train:0:  80%|| 5561/6926 [13:24:51<9:01:46, 23.81s/it]

{'epoch': 0, 'iter': 5560, 'avg_loss': 6.0267002559827185, 'avg_acc': 50.111827908649516, 'loss': 6.000855922698975}


EP_train:0:  80%|| 5571/6926 [13:28:50<9:01:47, 23.99s/it]

{'epoch': 0, 'iter': 5570, 'avg_loss': 6.02634503430127, 'avg_acc': 50.116114701130854, 'loss': 5.7850141525268555}


EP_train:0:  81%|| 5581/6926 [13:32:49<8:59:06, 24.05s/it]

{'epoch': 0, 'iter': 5580, 'avg_loss': 6.0258694325610955, 'avg_acc': 50.116466583049636, 'loss': 5.799572944641113}


EP_train:0:  81%|| 5591/6926 [13:36:47<8:49:57, 23.82s/it]

{'epoch': 0, 'iter': 5590, 'avg_loss': 6.025524134287103, 'avg_acc': 50.11066893221249, 'loss': 5.578330993652344}


EP_train:0:  81%|| 5600/6926 [13:40:46<8:47:46, 23.88s/it]

{'epoch': 0, 'iter': 5600, 'avg_loss': 6.025114222820945, 'avg_acc': 50.10656579182289, 'loss': 6.093204021453857}


EP_train:0:  81%|| 5601/6926 [13:40:53<9:31:59, 25.90s/it]

Saved best retrieval model


EP_train:0:  81%|| 5611/6926 [13:44:53<8:42:46, 23.85s/it]

{'epoch': 0, 'iter': 5610, 'avg_loss': 6.0245912385653995, 'avg_acc': 50.10693281055071, 'loss': 5.977016448974609}


EP_train:0:  81%|| 5621/6926 [13:48:52<8:38:11, 23.82s/it]

{'epoch': 0, 'iter': 5620, 'avg_loss': 6.024055819179723, 'avg_acc': 50.102850916207075, 'loss': 5.849597930908203}


EP_train:0:  81%|| 5631/6926 [13:52:49<8:31:47, 23.71s/it]

{'epoch': 0, 'iter': 5630, 'avg_loss': 6.023538654258293, 'avg_acc': 50.096563665423545, 'loss': 6.09079647064209}


EP_train:0:  81%|| 5641/6926 [13:56:47<8:26:30, 23.65s/it]

{'epoch': 0, 'iter': 5640, 'avg_loss': 6.023164341369856, 'avg_acc': 50.09528452402057, 'loss': 5.717893123626709}


EP_train:0:  82%|| 5650/6926 [14:00:44<8:23:06, 23.66s/it]

{'epoch': 0, 'iter': 5650, 'avg_loss': 6.022488721808044, 'avg_acc': 50.095115908688726, 'loss': 6.039089202880859}


EP_train:0:  82%|| 5651/6926 [14:00:51<9:13:27, 26.04s/it]

Saved best retrieval model


EP_train:0:  82%|| 5661/6926 [14:04:50<8:23:44, 23.89s/it]

{'epoch': 0, 'iter': 5660, 'avg_loss': 6.021937611709107, 'avg_acc': 50.09660395689808, 'loss': 6.028720855712891}


EP_train:0:  82%|| 5671/6926 [14:08:46<8:13:51, 23.61s/it]

{'epoch': 0, 'iter': 5670, 'avg_loss': 6.021389651227052, 'avg_acc': 50.09478046199965, 'loss': 5.710719108581543}


EP_train:0:  82%|| 5681/6926 [14:12:45<8:11:10, 23.67s/it]

{'epoch': 0, 'iter': 5680, 'avg_loss': 6.020782034490713, 'avg_acc': 50.1023147333216, 'loss': 5.727542400360107}


EP_train:0:  82%|| 5691/6926 [14:16:43<8:13:33, 23.98s/it]

{'epoch': 0, 'iter': 5690, 'avg_loss': 6.020180839789267, 'avg_acc': 50.10048761201897, 'loss': 5.4988274574279785}


EP_train:0:  82%|| 5700/6926 [14:20:39<8:03:40, 23.67s/it]

{'epoch': 0, 'iter': 5700, 'avg_loss': 6.019400827300442, 'avg_acc': 50.107985441150674, 'loss': 5.661442279815674}


EP_train:0:  82%|| 5701/6926 [14:20:48<9:00:01, 26.45s/it]

Saved best retrieval model


EP_train:0:  82%|| 5711/6926 [14:24:46<8:02:38, 23.83s/it]

{'epoch': 0, 'iter': 5710, 'avg_loss': 6.018419740162888, 'avg_acc': 50.109985116441955, 'loss': 5.468029022216797}


EP_train:0:  83%|| 5721/6926 [14:28:45<7:56:20, 23.72s/it]

{'epoch': 0, 'iter': 5720, 'avg_loss': 6.018011976134379, 'avg_acc': 50.11252403425974, 'loss': 5.74117374420166}


EP_train:0:  83%|| 5731/6926 [14:32:43<7:58:25, 24.02s/it]

{'epoch': 0, 'iter': 5730, 'avg_loss': 6.017584873855769, 'avg_acc': 50.1128729715582, 'loss': 6.006847858428955}


EP_train:0:  83%|| 5741/6926 [14:36:41<7:47:28, 23.67s/it]

{'epoch': 0, 'iter': 5740, 'avg_loss': 6.0172368730532755, 'avg_acc': 50.112132032746906, 'loss': 5.486562252044678}


EP_train:0:  83%|| 5750/6926 [14:40:40<7:49:17, 23.94s/it]

{'epoch': 0, 'iter': 5750, 'avg_loss': 6.016880701363885, 'avg_acc': 50.110850286906626, 'loss': 5.946107864379883}


EP_train:0:  83%|| 5751/6926 [14:40:48<8:40:48, 26.59s/it]

Saved best retrieval model


EP_train:0:  83%|| 5761/6926 [14:44:47<7:44:42, 23.93s/it]

{'epoch': 0, 'iter': 5760, 'avg_loss': 6.0167797804913405, 'avg_acc': 50.11174275299427, 'loss': 5.878998756408691}


EP_train:0:  83%|| 5771/6926 [14:48:45<7:38:32, 23.82s/it]

{'epoch': 0, 'iter': 5770, 'avg_loss': 6.016322390579966, 'avg_acc': 50.118588632819275, 'loss': 5.937015533447266}


EP_train:0:  83%|| 5781/6926 [14:52:45<7:36:52, 23.94s/it]

{'epoch': 0, 'iter': 5780, 'avg_loss': 6.015825824575881, 'avg_acc': 50.116761805915935, 'loss': 5.769193649291992}


EP_train:0:  84%|| 5791/6926 [14:56:41<7:27:05, 23.64s/it]

{'epoch': 0, 'iter': 5790, 'avg_loss': 6.01510451219177, 'avg_acc': 50.11332239682266, 'loss': 5.677530288696289}


EP_train:0:  84%|| 5800/6926 [15:00:41<7:30:36, 24.01s/it]

{'epoch': 0, 'iter': 5800, 'avg_loss': 6.0148200660794995, 'avg_acc': 50.112049646612654, 'loss': 5.694416522979736}


EP_train:0:  84%|| 5801/6926 [15:00:49<8:15:36, 26.43s/it]

Saved best retrieval model


EP_train:0:  84%|| 5811/6926 [15:04:47<7:22:05, 23.79s/it]

{'epoch': 0, 'iter': 5810, 'avg_loss': 6.014452164400016, 'avg_acc': 50.116159008776464, 'loss': 5.800669193267822}


EP_train:0:  84%|| 5821/6926 [15:08:46<7:15:59, 23.67s/it]

{'epoch': 0, 'iter': 5820, 'avg_loss': 6.014150449611583, 'avg_acc': 50.11005411441333, 'loss': 5.913928985595703}


EP_train:0:  84%|| 5831/6926 [15:12:43<7:16:23, 23.91s/it]

{'epoch': 0, 'iter': 5830, 'avg_loss': 6.0136661881178775, 'avg_acc': 50.11629651860744, 'loss': 5.437040328979492}


EP_train:0:  84%|| 5841/6926 [15:16:40<7:06:51, 23.61s/it]

{'epoch': 0, 'iter': 5840, 'avg_loss': 6.013366274302228, 'avg_acc': 50.11663242595446, 'loss': 6.080371379852295}


EP_train:0:  84%|| 5850/6926 [15:20:37<7:04:32, 23.67s/it]

{'epoch': 0, 'iter': 5850, 'avg_loss': 6.012879432195111, 'avg_acc': 50.11750128183217, 'loss': 5.80903959274292}


EP_train:0:  84%|| 5851/6926 [15:20:45<7:47:02, 26.07s/it]

Saved best retrieval model


EP_train:0:  85%|| 5861/6926 [15:24:42<7:00:09, 23.67s/it]

{'epoch': 0, 'iter': 5860, 'avg_loss': 6.012396911125788, 'avg_acc': 50.109836205425694, 'loss': 5.466487407684326}


EP_train:0:  85%|| 5871/6926 [15:28:40<6:55:15, 23.62s/it]

{'epoch': 0, 'iter': 5870, 'avg_loss': 6.011776186339271, 'avg_acc': 50.106455459035935, 'loss': 5.6409783363342285}


EP_train:0:  85%|| 5881/6926 [15:32:37<6:56:15, 23.90s/it]

{'epoch': 0, 'iter': 5880, 'avg_loss': 6.011299218597957, 'avg_acc': 50.109994048631194, 'loss': 5.957446575164795}


EP_train:0:  85%|| 5891/6926 [15:36:34<6:50:38, 23.81s/it]

{'epoch': 0, 'iter': 5890, 'avg_loss': 6.01087793982549, 'avg_acc': 50.10503310134102, 'loss': 5.903570652008057}


EP_train:0:  85%|| 5900/6926 [15:40:32<6:44:29, 23.65s/it]

{'epoch': 0, 'iter': 5900, 'avg_loss': 6.010593863104061, 'avg_acc': 50.11120996441281, 'loss': 5.9296956062316895}


EP_train:0:  85%|| 5901/6926 [15:40:39<7:22:55, 25.93s/it]

Saved best retrieval model


EP_train:0:  85%|| 5911/6926 [15:44:36<6:40:38, 23.68s/it]

{'epoch': 0, 'iter': 5910, 'avg_loss': 6.009874310152237, 'avg_acc': 50.11366520047369, 'loss': 5.876570224761963}


EP_train:0:  85%|| 5921/6926 [15:48:35<6:38:04, 23.77s/it]

{'epoch': 0, 'iter': 5920, 'avg_loss': 6.009535756229528, 'avg_acc': 50.11927883803412, 'loss': 5.65482759475708}


EP_train:0:  86%|| 5931/6926 [15:52:33<6:35:37, 23.86s/it]

{'epoch': 0, 'iter': 5930, 'avg_loss': 6.008869472874133, 'avg_acc': 50.123292867981796, 'loss': 5.218014717102051}


EP_train:0:  86%|| 5941/6926 [15:56:32<6:29:44, 23.74s/it]

{'epoch': 0, 'iter': 5940, 'avg_loss': 6.008422763755759, 'avg_acc': 50.124137350614376, 'loss': 5.901102542877197}


EP_train:0:  86%|| 5950/6926 [16:00:29<6:28:20, 23.87s/it]

{'epoch': 0, 'iter': 5950, 'avg_loss': 6.008036866105519, 'avg_acc': 50.13128045706604, 'loss': 5.839601993560791}


EP_train:0:  86%|| 5951/6926 [16:00:37<7:07:44, 26.32s/it]

Saved best retrieval model


EP_train:0:  86%|| 5961/6926 [16:04:36<6:25:38, 23.98s/it]

{'epoch': 0, 'iter': 5960, 'avg_loss': 6.007469114211757, 'avg_acc': 50.13630263378628, 'loss': 5.497256278991699}


EP_train:0:  86%|| 5971/6926 [16:08:32<6:16:46, 23.67s/it]

{'epoch': 0, 'iter': 5970, 'avg_loss': 6.007038632822364, 'avg_acc': 50.13607435940378, 'loss': 5.823355674743652}


EP_train:0:  86%|| 5981/6926 [16:12:29<6:12:05, 23.63s/it]

{'epoch': 0, 'iter': 5980, 'avg_loss': 6.006701746227709, 'avg_acc': 50.13009948169203, 'loss': 5.5844221115112305}


EP_train:0:  87%|| 5991/6926 [16:16:28<6:14:56, 24.06s/it]

{'epoch': 0, 'iter': 5990, 'avg_loss': 6.006482670968173, 'avg_acc': 50.12936070772825, 'loss': 5.587740898132324}


EP_train:0:  87%|| 6000/6926 [16:20:25<6:04:15, 23.60s/it]

{'epoch': 0, 'iter': 6000, 'avg_loss': 6.006135679447458, 'avg_acc': 50.12914514247625, 'loss': 5.148585796356201}


EP_train:0:  87%|| 6001/6926 [16:20:32<6:40:09, 25.96s/it]

Saved best retrieval model


EP_train:0:  87%|| 6011/6926 [16:24:31<6:06:52, 24.06s/it]

{'epoch': 0, 'iter': 6010, 'avg_loss': 6.005680895430775, 'avg_acc': 50.12477125270338, 'loss': 5.698615550994873}


EP_train:0:  87%|| 6021/6926 [16:28:31<5:58:28, 23.77s/it]

{'epoch': 0, 'iter': 6020, 'avg_loss': 6.005523829974124, 'avg_acc': 50.12248795881082, 'loss': 5.456023216247559}


EP_train:0:  87%|| 6031/6926 [16:32:29<5:55:28, 23.83s/it]

{'epoch': 0, 'iter': 6030, 'avg_loss': 6.004824544517303, 'avg_acc': 50.12073039296966, 'loss': 5.645665645599365}


EP_train:0:  87%|| 6041/6926 [16:36:26<5:51:41, 23.84s/it]

{'epoch': 0, 'iter': 6040, 'avg_loss': 6.004284225420296, 'avg_acc': 50.118978645919555, 'loss': 5.883465766906738}


EP_train:0:  87%|| 6050/6926 [16:40:25<5:48:30, 23.87s/it]

{'epoch': 0, 'iter': 6050, 'avg_loss': 6.003840820707776, 'avg_acc': 50.119814906627, 'loss': 5.416088581085205}


EP_train:0:  87%|| 6051/6926 [16:40:27<5:54:20, 24.30s/it]

Saved best retrieval model


EP_train:0:  88%|| 6061/6926 [16:44:28<5:43:02, 23.79s/it]

{'epoch': 0, 'iter': 6060, 'avg_loss': 6.003275867421878, 'avg_acc': 50.12219518231314, 'loss': 5.445356845855713}


EP_train:0:  88%|| 6071/6926 [16:48:27<5:42:41, 24.05s/it]

{'epoch': 0, 'iter': 6070, 'avg_loss': 6.002976571957886, 'avg_acc': 50.12765606984022, 'loss': 5.836785316467285}


EP_train:0:  88%|| 6081/6926 [16:52:26<5:35:52, 23.85s/it]

{'epoch': 0, 'iter': 6080, 'avg_loss': 6.002243203368749, 'avg_acc': 50.12230718631804, 'loss': 5.959044933319092}


EP_train:0:  88%|| 6091/6926 [16:56:24<5:32:53, 23.92s/it]

{'epoch': 0, 'iter': 6090, 'avg_loss': 6.001856118895662, 'avg_acc': 50.11389755376785, 'loss': 5.9417829513549805}


EP_train:0:  88%|| 6100/6926 [17:00:25<5:28:28, 23.86s/it]

{'epoch': 0, 'iter': 6100, 'avg_loss': 6.001260774015618, 'avg_acc': 50.11114981150631, 'loss': 5.875706195831299}


EP_train:0:  88%|| 6101/6926 [17:00:26<5:34:36, 24.34s/it]

Saved best retrieval model


EP_train:0:  88%|| 6111/6926 [17:04:31<5:31:42, 24.42s/it]

{'epoch': 0, 'iter': 6110, 'avg_loss': 6.000818247725659, 'avg_acc': 50.108922434953364, 'loss': 5.481055736541748}


EP_train:0:  88%|| 6121/6926 [17:08:30<5:20:20, 23.88s/it]

{'epoch': 0, 'iter': 6120, 'avg_loss': 6.000433206675081, 'avg_acc': 50.10670233621958, 'loss': 5.764591693878174}


EP_train:0:  89%|| 6131/6926 [17:12:29<5:18:31, 24.04s/it]

{'epoch': 0, 'iter': 6130, 'avg_loss': 5.999829139158828, 'avg_acc': 50.10550888925135, 'loss': 5.923630714416504}


EP_train:0:  89%|| 6141/6926 [17:16:29<5:11:13, 23.79s/it]

{'epoch': 0, 'iter': 6140, 'avg_loss': 5.999147479531431, 'avg_acc': 50.107372577756074, 'loss': 5.707187652587891}


EP_train:0:  89%|| 6150/6926 [17:20:27<5:09:14, 23.91s/it]

{'epoch': 0, 'iter': 6150, 'avg_loss': 5.998802615770226, 'avg_acc': 50.1011014469192, 'loss': 5.4215922355651855}


EP_train:0:  89%|| 6151/6926 [17:20:29<5:15:21, 24.42s/it]

Saved best retrieval model


EP_train:0:  89%|| 6161/6926 [17:24:29<5:06:37, 24.05s/it]

{'epoch': 0, 'iter': 6160, 'avg_loss': 5.998491894326182, 'avg_acc': 50.09738678785911, 'loss': 5.526274681091309}


EP_train:0:  89%|| 6171/6926 [17:28:26<4:58:06, 23.69s/it]

{'epoch': 0, 'iter': 6170, 'avg_loss': 5.997767054035914, 'avg_acc': 50.09267136606709, 'loss': 6.178563594818115}


EP_train:0:  89%|| 6181/6926 [17:32:23<4:54:04, 23.68s/it]

{'epoch': 0, 'iter': 6180, 'avg_loss': 5.997335862658244, 'avg_acc': 50.09555492638732, 'loss': 5.377073764801025}


EP_train:0:  89%|| 6191/6926 [17:36:19<4:49:34, 23.64s/it]

{'epoch': 0, 'iter': 6190, 'avg_loss': 5.99686436952749, 'avg_acc': 50.106505411080605, 'loss': 5.6456427574157715}


EP_train:0:  90%|| 6200/6926 [17:40:18<4:47:04, 23.73s/it]

{'epoch': 0, 'iter': 6200, 'avg_loss': 5.996490891528426, 'avg_acc': 50.111373165618446, 'loss': 5.641448497772217}


EP_train:0:  90%|| 6201/6926 [17:40:19<4:53:20, 24.28s/it]

Saved best retrieval model


EP_train:0:  90%|| 6211/6926 [17:44:19<4:44:26, 23.87s/it]

{'epoch': 0, 'iter': 6210, 'avg_loss': 5.996301761016698, 'avg_acc': 50.1076718724843, 'loss': 6.052840709686279}


EP_train:0:  90%|| 6221/6926 [17:48:17<4:39:03, 23.75s/it]

{'epoch': 0, 'iter': 6220, 'avg_loss': 5.99578917898409, 'avg_acc': 50.11101511011091, 'loss': 5.5181050300598145}


EP_train:0:  90%|| 6231/6926 [17:52:15<4:35:25, 23.78s/it]

{'epoch': 0, 'iter': 6230, 'avg_loss': 5.995252596626043, 'avg_acc': 50.10732627186647, 'loss': 5.439176559448242}


EP_train:0:  90%|| 6241/6926 [17:56:16<4:34:34, 24.05s/it]

{'epoch': 0, 'iter': 6240, 'avg_loss': 5.994971874031076, 'avg_acc': 50.11166079153981, 'loss': 5.798752784729004}


EP_train:0:  90%|| 6250/6926 [18:00:13<4:27:37, 23.75s/it]

{'epoch': 0, 'iter': 6250, 'avg_loss': 5.994647535471473, 'avg_acc': 50.111482162853946, 'loss': 6.147024631500244}


EP_train:0:  90%|| 6251/6926 [18:00:15<4:32:53, 24.26s/it]

Saved best retrieval model


EP_train:0:  90%|| 6261/6926 [18:04:14<4:23:34, 23.78s/it]

{'epoch': 0, 'iter': 6260, 'avg_loss': 5.994257122920812, 'avg_acc': 50.11180322632167, 'loss': 5.097224235534668}


EP_train:0:  91%|| 6271/6926 [18:08:12<4:20:56, 23.90s/it]

{'epoch': 0, 'iter': 6270, 'avg_loss': 5.99372390087969, 'avg_acc': 50.108136660819646, 'loss': 5.582675933837891}


EP_train:0:  91%|| 6281/6926 [18:12:10<4:14:20, 23.66s/it]

{'epoch': 0, 'iter': 6280, 'avg_loss': 5.993402066266455, 'avg_acc': 50.113437350740334, 'loss': 5.589354038238525}


EP_train:0:  91%|| 6291/6926 [18:16:08<4:12:09, 23.83s/it]

{'epoch': 0, 'iter': 6290, 'avg_loss': 5.99314279750314, 'avg_acc': 50.12368860276586, 'loss': 6.2076897621154785}


EP_train:0:  91%|| 6300/6926 [18:20:04<4:05:55, 23.57s/it]

{'epoch': 0, 'iter': 6300, 'avg_loss': 5.992751159056503, 'avg_acc': 50.124980161879066, 'loss': 5.716874122619629}


EP_train:0:  91%|| 6301/6926 [18:20:06<4:11:05, 24.10s/it]

Saved best retrieval model


EP_train:0:  91%|| 6311/6926 [18:24:06<4:04:32, 23.86s/it]

{'epoch': 0, 'iter': 6310, 'avg_loss': 5.9923724729235435, 'avg_acc': 50.11834495325622, 'loss': 5.564499855041504}


EP_train:0:  91%|| 6321/6926 [18:28:02<3:59:32, 23.76s/it]

{'epoch': 0, 'iter': 6320, 'avg_loss': 5.99202887986661, 'avg_acc': 50.11271950640721, 'loss': 5.628361225128174}


EP_train:0:  91%|| 6331/6926 [18:32:01<3:55:33, 23.75s/it]

{'epoch': 0, 'iter': 6330, 'avg_loss': 5.99171673571813, 'avg_acc': 50.10859263939346, 'loss': 5.907199859619141}


EP_train:0:  92%|| 6341/6926 [18:35:59<3:52:05, 23.80s/it]

{'epoch': 0, 'iter': 6340, 'avg_loss': 5.991132928038824, 'avg_acc': 50.103493139883305, 'loss': 6.124569892883301}


EP_train:0:  92%|| 6350/6926 [18:39:56<3:47:48, 23.73s/it]

{'epoch': 0, 'iter': 6350, 'avg_loss': 5.990886098023494, 'avg_acc': 50.09988584474886, 'loss': 5.846919059753418}


EP_train:0:  92%|| 6351/6926 [18:39:57<3:51:54, 24.20s/it]

Saved best retrieval model


EP_train:0:  92%|| 6361/6926 [18:43:58<3:44:51, 23.88s/it]

{'epoch': 0, 'iter': 6360, 'avg_loss': 5.990380645191832, 'avg_acc': 50.09334224178589, 'loss': 5.837847709655762}


EP_train:0:  92%|| 6371/6926 [18:47:54<3:39:23, 23.72s/it]

{'epoch': 0, 'iter': 6370, 'avg_loss': 5.989903581290244, 'avg_acc': 50.092705226808974, 'loss': 5.0577263832092285}


EP_train:0:  92%|| 6381/6926 [18:51:51<3:34:20, 23.60s/it]

{'epoch': 0, 'iter': 6380, 'avg_loss': 5.989439935524287, 'avg_acc': 50.0940291490362, 'loss': 5.790432453155518}


EP_train:0:  92%|| 6391/6926 [18:55:49<3:31:09, 23.68s/it]

{'epoch': 0, 'iter': 6390, 'avg_loss': 5.988877584743604, 'avg_acc': 50.091437177280554, 'loss': 5.546839714050293}


EP_train:0:  92%|| 6400/6926 [18:59:47<3:29:50, 23.94s/it]

{'epoch': 0, 'iter': 6400, 'avg_loss': 5.988604730413289, 'avg_acc': 50.089341509139196, 'loss': 5.923066139221191}


EP_train:0:  92%|| 6401/6926 [18:59:49<3:33:39, 24.42s/it]

Saved best retrieval model


EP_train:0:  93%|| 6411/6926 [19:03:50<3:24:38, 23.84s/it]

{'epoch': 0, 'iter': 6410, 'avg_loss': 5.987980913026268, 'avg_acc': 50.09261425674622, 'loss': 5.041503429412842}


EP_train:0:  93%|| 6421/6926 [19:07:48<3:21:01, 23.88s/it]

{'epoch': 0, 'iter': 6420, 'avg_loss': 5.987690715436307, 'avg_acc': 50.08614312412397, 'loss': 5.882030010223389}


EP_train:0:  93%|| 6431/6926 [19:11:48<3:16:23, 23.80s/it]

{'epoch': 0, 'iter': 6430, 'avg_loss': 5.987362596188265, 'avg_acc': 50.0869810293889, 'loss': 5.881158828735352}


EP_train:0:  93%|| 6441/6926 [19:15:45<3:12:22, 23.80s/it]

{'epoch': 0, 'iter': 6440, 'avg_loss': 5.987058575563116, 'avg_acc': 50.08199425555038, 'loss': 5.733219623565674}


EP_train:0:  93%|| 6450/6926 [19:19:42<3:08:20, 23.74s/it]

{'epoch': 0, 'iter': 6450, 'avg_loss': 5.986751210170279, 'avg_acc': 50.08235157339948, 'loss': 5.669462203979492}


EP_train:0:  93%|| 6451/6926 [19:19:50<3:26:15, 26.05s/it]

Saved best retrieval model


EP_train:0:  93%|| 6461/6926 [19:23:50<3:04:34, 23.82s/it]

{'epoch': 0, 'iter': 6460, 'avg_loss': 5.9863864809823575, 'avg_acc': 50.089962854047364, 'loss': 6.138749122619629}


EP_train:0:  93%|| 6471/6926 [19:27:49<3:00:53, 23.85s/it]

{'epoch': 0, 'iter': 6470, 'avg_loss': 5.985981632690123, 'avg_acc': 50.0864433626951, 'loss': 6.006915092468262}


EP_train:0:  94%|| 6481/6926 [19:31:46<2:56:28, 23.79s/it]

{'epoch': 0, 'iter': 6480, 'avg_loss': 5.985879050197286, 'avg_acc': 50.08679216170344, 'loss': 6.0520806312561035}


EP_train:0:  94%|| 6491/6926 [19:35:45<2:52:25, 23.78s/it]

{'epoch': 0, 'iter': 6490, 'avg_loss': 5.985496952948411, 'avg_acc': 50.079436912648276, 'loss': 5.623685836791992}


EP_train:0:  94%|| 6500/6926 [19:39:43<2:49:15, 23.84s/it]

{'epoch': 0, 'iter': 6500, 'avg_loss': 5.9853198834518855, 'avg_acc': 50.08171819720043, 'loss': 6.006680488586426}


EP_train:0:  94%|| 6501/6926 [19:39:51<3:04:52, 26.10s/it]

Saved best retrieval model


EP_train:0:  94%|| 6511/6926 [19:43:51<2:44:51, 23.83s/it]

{'epoch': 0, 'iter': 6510, 'avg_loss': 5.984919941529684, 'avg_acc': 50.08351251727845, 'loss': 5.787853240966797}


EP_train:0:  94%|| 6521/6926 [19:47:48<2:40:08, 23.72s/it]

{'epoch': 0, 'iter': 6520, 'avg_loss': 5.984751818279166, 'avg_acc': 50.090572764913354, 'loss': 5.816812515258789}


EP_train:0:  94%|| 6531/6926 [19:51:46<2:37:12, 23.88s/it]

{'epoch': 0, 'iter': 6530, 'avg_loss': 5.984528793016048, 'avg_acc': 50.09043408360129, 'loss': 5.906198024749756}


EP_train:0:  94%|| 6541/6926 [19:55:43<2:31:35, 23.62s/it]

{'epoch': 0, 'iter': 6540, 'avg_loss': 5.9839289108559806, 'avg_acc': 50.09411787188503, 'loss': 5.173263072967529}


EP_train:0:  95%|| 6550/6926 [19:59:43<2:30:43, 24.05s/it]

{'epoch': 0, 'iter': 6550, 'avg_loss': 5.983595533855743, 'avg_acc': 50.10256067775912, 'loss': 6.048302173614502}


EP_train:0:  95%|| 6551/6926 [19:59:44<2:33:23, 24.54s/it]

Saved best retrieval model


EP_train:0:  95%|| 6561/6926 [20:03:46<2:25:34, 23.93s/it]

{'epoch': 0, 'iter': 6560, 'avg_loss': 5.9834074795273375, 'avg_acc': 50.101451760402384, 'loss': 5.539169788360596}


EP_train:0:  95%|| 6571/6926 [20:07:43<2:20:40, 23.78s/it]

{'epoch': 0, 'iter': 6570, 'avg_loss': 5.983059261727235, 'avg_acc': 50.10367523968955, 'loss': 5.739795684814453}


EP_train:0:  95%|| 6581/6926 [20:11:41<2:16:18, 23.70s/it]

{'epoch': 0, 'iter': 6580, 'avg_loss': 5.982895316187869, 'avg_acc': 50.10161829509193, 'loss': 5.706655979156494}


EP_train:0:  95%|| 6591/6926 [20:15:39<2:12:24, 23.71s/it]

{'epoch': 0, 'iter': 6590, 'avg_loss': 5.982616928021294, 'avg_acc': 50.10051585495372, 'loss': 5.56647253036499}




{'epoch': 0, 'iter': 6600, 'avg_loss': 5.982445098869441, 'avg_acc': 50.09562945008332, 'loss': 6.005154132843018}


EP_train:0:  95%|| 6601/6926 [20:19:39<2:12:24, 24.45s/it]

Saved best retrieval model


EP_train:0:  95%|| 6611/6926 [20:23:41<2:05:33, 23.92s/it]

{'epoch': 0, 'iter': 6610, 'avg_loss': 5.982296967322482, 'avg_acc': 50.095012101043714, 'loss': 5.807528972625732}


EP_train:0:  96%|| 6621/6926 [20:27:38<2:01:04, 23.82s/it]

{'epoch': 0, 'iter': 6620, 'avg_loss': 5.9818290325292125, 'avg_acc': 50.096284549161766, 'loss': 5.711157321929932}


EP_train:0:  96%|| 6631/6926 [20:31:36<1:56:25, 23.68s/it]

{'epoch': 0, 'iter': 6630, 'avg_loss': 5.9813800227712175, 'avg_acc': 50.100380787211584, 'loss': 5.663313388824463}


EP_train:0:  96%|| 6641/6926 [20:35:33<1:52:40, 23.72s/it]

{'epoch': 0, 'iter': 6640, 'avg_loss': 5.981130831537073, 'avg_acc': 50.105405812377654, 'loss': 6.147189617156982}


EP_train:0:  96%|| 6650/6926 [20:39:30<1:49:14, 23.75s/it]

{'epoch': 0, 'iter': 6650, 'avg_loss': 5.980677112495607, 'avg_acc': 50.10477747707112, 'loss': 5.425020694732666}


EP_train:0:  96%|| 6651/6926 [20:39:32<1:51:18, 24.29s/it]

Saved best retrieval model


EP_train:0:  96%|| 6661/6926 [20:43:31<1:44:59, 23.77s/it]

{'epoch': 0, 'iter': 6660, 'avg_loss': 5.980434207976392, 'avg_acc': 50.10462017715058, 'loss': 5.944828987121582}


EP_train:0:  96%|| 6671/6926 [20:47:29<1:41:47, 23.95s/it]

{'epoch': 0, 'iter': 6670, 'avg_loss': 5.980145956476023, 'avg_acc': 50.09931044820867, 'loss': 5.914470672607422}


EP_train:0:  96%|| 6681/6926 [20:51:29<1:37:18, 23.83s/it]

{'epoch': 0, 'iter': 6680, 'avg_loss': 5.979874083817745, 'avg_acc': 50.102903756922615, 'loss': 5.770446300506592}


EP_train:0:  97%|| 6691/6926 [20:55:28<1:33:41, 23.92s/it]

{'epoch': 0, 'iter': 6690, 'avg_loss': 5.979357836154202, 'avg_acc': 50.10041473621283, 'loss': 5.772716522216797}


EP_train:0:  97%|| 6700/6926 [20:59:26<1:29:24, 23.74s/it]

{'epoch': 0, 'iter': 6700, 'avg_loss': 5.9789065738308524, 'avg_acc': 50.090937919713475, 'loss': 5.325893878936768}


EP_train:0:  97%|| 6701/6926 [20:59:27<1:30:51, 24.23s/it]

Saved best retrieval model


EP_train:0:  97%|| 6711/6926 [21:03:28<1:26:12, 24.06s/it]

{'epoch': 0, 'iter': 6710, 'avg_loss': 5.978417873098356, 'avg_acc': 50.08940545373268, 'loss': 5.613637924194336}


EP_train:0:  97%|| 6721/6926 [21:07:25<1:20:59, 23.70s/it]

{'epoch': 0, 'iter': 6720, 'avg_loss': 5.978153690627033, 'avg_acc': 50.09578187769678, 'loss': 5.681968688964844}


EP_train:0:  97%|| 6731/6926 [21:11:24<1:17:18, 23.79s/it]

{'epoch': 0, 'iter': 6730, 'avg_loss': 5.977688618921946, 'avg_acc': 50.09656811766454, 'loss': 5.340581893920898}


EP_train:0:  97%|| 6741/6926 [21:15:21<1:13:26, 23.82s/it]

{'epoch': 0, 'iter': 6740, 'avg_loss': 5.977257803486992, 'avg_acc': 50.0996699302774, 'loss': 5.994866371154785}


EP_train:0:  97%|| 6750/6926 [21:19:20<1:09:56, 23.84s/it]

{'epoch': 0, 'iter': 6750, 'avg_loss': 5.976829349191219, 'avg_acc': 50.095356243519475, 'loss': 5.626577377319336}


EP_train:0:  97%|| 6751/6926 [21:19:21<1:11:00, 24.34s/it]

Saved best retrieval model


EP_train:0:  98%|| 6761/6926 [21:23:20<1:05:49, 23.94s/it]

{'epoch': 0, 'iter': 6760, 'avg_loss': 5.976232164623011, 'avg_acc': 50.09567741458364, 'loss': 5.489656448364258}


EP_train:0:  98%|| 6771/6926 [21:27:18<1:01:09, 23.67s/it]

{'epoch': 0, 'iter': 6770, 'avg_loss': 5.975881407873931, 'avg_acc': 50.0983052724856, 'loss': 5.54753303527832}


EP_train:0:  98%|| 6781/6926 [21:31:16<57:13, 23.68s/it]

{'epoch': 0, 'iter': 6780, 'avg_loss': 5.975358678358984, 'avg_acc': 50.094012682495205, 'loss': 5.843764781951904}


EP_train:0:  98%|| 6791/6926 [21:35:13<53:45, 23.90s/it]

{'epoch': 0, 'iter': 6790, 'avg_loss': 5.975104550693193, 'avg_acc': 50.09571491680165, 'loss': 5.72597599029541}


EP_train:0:  98%|| 6800/6926 [21:39:11<49:58, 23.79s/it]

{'epoch': 0, 'iter': 6800, 'avg_loss': 5.974977238566328, 'avg_acc': 50.08868181149831, 'loss': 5.608332633972168}


EP_train:0:  98%|| 6801/6926 [21:39:13<50:33, 24.27s/it]

Saved best retrieval model


EP_train:0:  98%|| 6811/6926 [21:43:13<46:04, 24.04s/it]

{'epoch': 0, 'iter': 6810, 'avg_loss': 5.974841539598285, 'avg_acc': 50.08946924093378, 'loss': 6.181768417358398}


EP_train:0:  98%|| 6821/6926 [21:47:10<41:30, 23.72s/it]

{'epoch': 0, 'iter': 6820, 'avg_loss': 5.974572516766973, 'avg_acc': 50.093461369300684, 'loss': 5.7095255851745605}


EP_train:0:  99%|| 6831/6926 [21:51:09<37:41, 23.81s/it]

{'epoch': 0, 'iter': 6830, 'avg_loss': 5.974465894203482, 'avg_acc': 50.09469696969697, 'loss': 6.010291576385498}


EP_train:0:  99%|| 6841/6926 [21:55:08<33:56, 23.96s/it]

{'epoch': 0, 'iter': 6840, 'avg_loss': 5.974121236292317, 'avg_acc': 50.09364493495103, 'loss': 5.850353717803955}


EP_train:0:  99%|| 6850/6926 [21:59:08<30:21, 23.97s/it]

{'epoch': 0, 'iter': 6850, 'avg_loss': 5.973965905422956, 'avg_acc': 50.08438549116917, 'loss': 5.769160747528076}


EP_train:0:  99%|| 6851/6926 [21:59:10<30:32, 24.44s/it]

Saved best retrieval model


EP_train:0:  99%|| 6861/6926 [22:03:10<26:03, 24.05s/it]

{'epoch': 0, 'iter': 6860, 'avg_loss': 5.9737133784503325, 'avg_acc': 50.086995335956864, 'loss': 5.815117835998535}


EP_train:0:  99%|| 6871/6926 [22:07:11<22:02, 24.04s/it]

{'epoch': 0, 'iter': 6870, 'avg_loss': 5.973445369069223, 'avg_acc': 50.08595910347838, 'loss': 5.671870231628418}


EP_train:0:  99%|| 6881/6926 [22:11:10<17:55, 23.90s/it]

{'epoch': 0, 'iter': 6880, 'avg_loss': 5.9733760253303085, 'avg_acc': 50.08492588286586, 'loss': 5.716362953186035}


EP_train:0:  99%|| 6891/6926 [22:15:08<13:51, 23.77s/it]

{'epoch': 0, 'iter': 6890, 'avg_loss': 5.973078276557131, 'avg_acc': 50.08616311130461, 'loss': 5.533623218536377}


EP_train:0: 100%|| 6900/6926 [22:19:11<10:30, 24.26s/it]

{'epoch': 0, 'iter': 6900, 'avg_loss': 5.972761377703019, 'avg_acc': 50.09192508332126, 'loss': 5.806764125823975}


EP_train:0: 100%|| 6901/6926 [22:19:12<10:18, 24.74s/it]

Saved best retrieval model


EP_train:0: 100%|| 6911/6926 [22:23:14<05:59, 23.98s/it]

{'epoch': 0, 'iter': 6910, 'avg_loss': 5.972566638353713, 'avg_acc': 50.09495731442628, 'loss': 5.64250373840332}


EP_train:0: 100%|| 6921/6926 [22:27:13<01:59, 23.99s/it]

{'epoch': 0, 'iter': 6920, 'avg_loss': 5.97237973233176, 'avg_acc': 50.09798078312383, 'loss': 5.856398105621338}


EP_train:0: 100%|| 6926/6926 [22:29:00<00:00, 11.69s/it]


EP0, train:             avg_loss=5.9721945791687885,             total_acc=50.09836834885568


EP_train:1:   0%|| 0/6926 [00:23<?, ?it/s]

{'epoch': 1, 'iter': 0, 'avg_loss': 5.796343803405762, 'avg_acc': 50.0, 'loss': 5.796343803405762}


EP_train:1:   0%|| 1/6926 [00:25<49:00:35, 25.48s/it]

Saved best retrieval model


EP_train:1:   0%|| 11/6926 [04:24<45:46:49, 23.83s/it]

{'epoch': 1, 'iter': 10, 'avg_loss': 5.737021402879194, 'avg_acc': 49.43181818181818, 'loss': 5.387223720550537}


EP_train:1:   0%|| 21/6926 [08:23<45:42:24, 23.83s/it]

{'epoch': 1, 'iter': 20, 'avg_loss': 5.716131142207554, 'avg_acc': 50.148809523809526, 'loss': 5.459205150604248}


EP_train:1:   0%|| 23/6926 [09:32<47:44:19, 24.90s/it]


KeyboardInterrupt: 