In [1]:
import torch
import os
from os.path import exists
import torch.nn as nn
# from torch.nn.functional import log_softmax, pad, one_hot
import math
import copy
import time
from torch.optim.lr_scheduler import LambdaLR
import pandas as pd
from torch.utils.data import DataLoader
import random
import json
import csv
from pathlib import Path
import shutil
import re
import threading

### utils.py ###

class Dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
    
    def __iadd__(self, other):
        for k, v in self.items():
            if k in other and other[k]:
                self[k] += other[k]
            # end
        # end

        return self
    # end
# end


# Takes the file paths as arguments
def parse_csv_file_to_json(path_file_csv):
    # create a dictionary
    elements = []

    # Open a csv reader called DictReader
    with open(path_file_csv, encoding='utf-8') as file_csv:
    #with open(path_file_csv) as file_csv:
        reader_csv = csv.DictReader(file_csv)

        # Convert each row into a dictionary
        # and add it to data
        for dict_head_value in reader_csv:
            element = {}

            for head, value in dict_head_value.items():
                if value and (value[0] in ["[", "{"]):
                    element[head] = value
                else:
                    element[head] = value

            elements.append(element)
        # end
    # end

    return elements
# end

### utils.py ###



### core.py ###

"Produce N identical layers."
def clones(module, N):
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
# end


class MultiHeadedAttention(nn.Module):

    "Take in model size and number of heads."
    def __init__(self, h, d_model, dropout=0.1):
        super(MultiHeadedAttention, self).__init__()
        assert d_model % h == 0
        # We assume d_v always equals d_k
        self.d_k = d_model // h
        self.h = h
        self.linears = clones(nn.Linear(d_model, d_model), 4)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)
    # end


    "Compute 'Scaled Dot Product Attention'"
    def attention(self, query, key, value, mask=None, dropout=None):
        d_k = query.size(-1)
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
        if mask is not None:
            # print('jinyuj: scores: {}, mask: {}'.format(scores.shape, mask.shape))
            scores = scores.masked_fill(mask == 0, -1e9)
        # end
        p_attn = scores.softmax(dim=-1)
        if dropout is not None:
            p_attn = dropout(p_attn)
        # end
        return torch.matmul(p_attn, value), p_attn
    # end


    "Implements Figure 2"
    def forward(self, query, key, value, mask=None):
        if mask is not None:
            # Same mask applied to all h heads.
            mask = mask.unsqueeze(1)
        nbatches = query.size(0)

        # 1) Do all the linear projections in batch from d_model => h x d_k
        # print('jinyuj: self.h: {}, self.d_k: {}'.format(self.h, self.d_k))
        query, key, value = [
            lin(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
            for lin, x in zip(self.linears, (query, key, value))
        ]

        # 2) Apply attention on all the projected vectors in batch.
        x, self.attn = self.attention(
            query, key, value, mask=mask, dropout=self.dropout
        )

        # 3) "Concat" using a view and apply a final linear.
        x = (
            x.transpose(1, 2)
            .contiguous()
            .view(nbatches, -1, self.h * self.d_k)
        )
        del query
        del key
        del value
        return self.linears[-1](x)
    # end
# end class


"""
A residual connection followed by a layer norm.
Note for code simplicity the norm is first as opposed to last.
"""
class ResidualLayer(nn.Module):

    def __init__(self, size, dropout=0.1, eps=1e-6):
        super(ResidualLayer, self).__init__()
        self.norm = torch.nn.LayerNorm(size, eps)
        self.dropout = nn.Dropout(p=dropout)
    # end

    "Apply residual connection to any sublayer with the same size."
    def forward(self, x, sublayer):
        return x + self.dropout(sublayer(self.norm(x)))
    # end
# end class


class PositionwiseFeedForward(nn.Module):
    "Implements FFN equation."

    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.w_2(self.dropout(self.w_1(x).relu()))
    # end
# end


class SimpleIDEmbeddings(nn.Module):
    def __init__(self, size_vocab, dim_hidden, id_pad):
        super(SimpleIDEmbeddings, self).__init__()
        self.lut = nn.Embedding(size_vocab, dim_hidden, padding_idx=id_pad)
        self.dim_hidden = dim_hidden

    def forward(self, x):
        result = self.lut(x)
        return result * math.sqrt(self.dim_hidden)
    # end

    def get_shape(self):
        return (self.lut.num_embeddings, self.lut.embedding_dim)
    # end
# end


"Implement the PE function."
class PositionalEncoding(nn.Module):

    def __init__(self, dim_positional, max_len=512):
        super(PositionalEncoding, self).__init__()

        # Compute the positional encodings once in log space.
        self.dim_positional = dim_positional
        pe = torch.zeros(max_len, dim_positional)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, dim_positional, 2) * -(math.log(10000.0) / dim_positional)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).to('cuda')
        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[:, : x.size(1)].requires_grad_(False)
        return x
    # end
# end


class SimpleEmbedder(nn.Module):    # no segment embedder as we do not need that
    def __init__(self, size_vocab=None, dim_hidden=128, dropout=0.1, id_pad=0):
        super(SimpleEmbedder, self).__init__()
        self.size_vocab = size_vocab
        self.dim_hidden = dim_hidden
        self.id_pad = id_pad

        self.embedder = nn.Sequential(
            SimpleIDEmbeddings(size_vocab, dim_hidden, id_pad),
            PositionalEncoding(dim_hidden),
            nn.Dropout(p=dropout)
        )
    # end

    def forward(self, ids_input):   # (batch, seqs_with_padding)
        return self.embedder(ids_input)
    # end

    def get_vocab_size(self):
        return self.size_vocab
    # end
# end

### core.py ###



class SimpleEncoderLayer(nn.Module):

    def __init__(self, dim_hidden, dim_feedforward, n_head, dropout=0.1):
        super(SimpleEncoderLayer, self).__init__()

        self.n_head = n_head
        self.dim_hidden = dim_hidden
        self.dim_feedforward = dim_feedforward

        self.layer_attention = MultiHeadedAttention(n_head, dim_hidden)
        self.layer_feedforward = PositionwiseFeedForward(dim_hidden, dim_feedforward, dropout)
        self.layers_residual = clones(ResidualLayer(dim_hidden, dropout), 2)
    # end

    def forward(self, embeddings, masks, *args):
        embeddings = self.layers_residual[0](embeddings, lambda embeddings: self.layer_attention(embeddings, embeddings, embeddings, masks))
        return self.layers_residual[1](embeddings, self.layer_feedforward)
    # end
# end



class SimpleDecoderLayer(nn.Module):

    def __init__(self, dim_hidden, dim_feedforward, n_head, dropout=0.1):
        super(SimpleDecoderLayer, self).__init__()

        self.n_head = n_head
        self.dim_hidden = dim_hidden
        self.dim_feedforward = dim_feedforward

        self.layer_attention_decoder = MultiHeadedAttention(n_head, dim_hidden)
        self.layer_attention_encoder = MultiHeadedAttention(n_head, dim_hidden)
        self.layer_feedforward = PositionwiseFeedForward(dim_hidden, dim_feedforward, dropout)
        self.layers_residual = clones(ResidualLayer(dim_hidden, dropout), 3)

    def forward(self, embeddings, masks_encoder, output_encoder, masks_decoder, *args):
        embeddings = self.layers_residual[0](embeddings, lambda embeddings: self.layer_attention_decoder(embeddings, embeddings, embeddings, masks_decoder))
        embeddings = self.layers_residual[1](embeddings, lambda embeddings: self.layer_attention_encoder(embeddings, output_encoder, output_encoder, masks_encoder))
        return self.layers_residual[2](embeddings, self.layer_feedforward)
    # end
# end


class SimpleTransformerStack(nn.Module):

    def __init__(self, obj_layer, n_layers):
        super(SimpleTransformerStack, self).__init__()
        self.layers = clones(obj_layer, n_layers)

        self.norm = torch.nn.LayerNorm(obj_layer.dim_hidden)
    # end

    def forward(self, embedding_encoder=None, masks_encoder=None, output_encoder=None, embedding_decoder=None, masks_decoder=None ,noncache=False, **kwargs):  # input -> (batch, len_seq, vocab)

        if output_encoder is not None and embedding_decoder is not None and masks_decoder is not None:
            embeddings = embedding_decoder
        else:
            embeddings = embedding_encoder
        # end

        for layer in self.layers:
            embeddings = layer(embeddings, masks_encoder, output_encoder, masks_decoder)
        # end

        output = self.norm(embeddings)
        return output
    # end

# end


class SimpleEncoderDecoder(nn.Module):
    def __init__(self, encoder, decoder, embedder_encoder, embedder_decoder, pooling=True):
        super(SimpleEncoderDecoder, self).__init__()

        self.pooling = pooling
        
        self.embedder_encoder = embedder_encoder
        self.encoder = encoder

        self.embedder_decoder = embedder_decoder
        self.decoder = decoder

    # end

    def forward(self, ids_encoder=None, masks_encoder=None, ids_decoder=None, masks_decoder=None, **kwargs):
        output_encoder = None
        output_encoder_pooled = None
        output_decoder = None
        
        # output_encoder = self.embed_and_encode(input_ids=ids_encoder, attention_mask=masks_encoder)
        output_encoder = self.embed_and_encode(ids_encoder=ids_encoder, masks_encoder=masks_encoder)
        output = output_encoder
        
        if self.pooling:
            output_encoder_refilled = output_encoder.masked_fill(masks_encoder.transpose(-1,-2)==False, 0)
            output_encoder_pooled = torch.mean(output_encoder_refilled, dim=-2)
            
            output_encoder_pooled_expanded = output_encoder_pooled.unsqueeze(-2).expand(output_encoder.shape)
            output = output_encoder_pooled_expanded
        # end
        
        if self.embedder_decoder and self.decoder:
            output_decoder = self.embed_and_decode(ids_decoder=ids_decoder, masks_encoder=masks_encoder, output_encoder=output, masks_decoder=masks_decoder)
        # end if
        
        return {'output_encoder': output_encoder, 'output_encoder_pooled': output_encoder_pooled, 'output_decoder': output_decoder}
    # end
    
    def embed_and_encode(self, ids_encoder=None, masks_encoder=None, **kwargs):
        
        inputs_embeds = self.embedder_encoder(input_ids=ids_encoder)
        output_encoder = self.encoder(
            inputs_embeds=inputs_embeds,
            attention_mask=masks_encoder,
        )
        
        return output_encoder.last_hidden_state
    # end

    
    def embed_and_decode(self, ids_decoder=None, masks_encoder=None, output_encoder=None, masks_decoder=None, **kwargs):
        
        embedding_decoder = self.embedder_decoder(ids_decoder)
        output_decoder = self.decoder(
            masks_encoder=masks_encoder,
            output_encoder=output_encoder,    #(len_seq, dim_hidden) -> (1, dim_hidden)
            embedding_decoder=embedding_decoder,
            masks_decoder=masks_decoder,
        )

        return output_decoder
    # end
    

    def get_vocab_size(self, name_embedder):
        embedder = getattr(self, f'embedder_{name_embedder}')
        return embedder.get_vocab_size()
    # end

# end

class LinearAndNorm(nn.Module):
    def __init__(self, dim_in = None, dim_out = None, dropout=0.1, eps_norm=1e-12):
        super(LinearAndNorm, self).__init__()

        self.linear = torch.nn.Linear(dim_in, dim_out)
        self.norm = torch.nn.LayerNorm(dim_out, eps_norm)
        self.dropout = torch.nn.Dropout(p=dropout)
    # end

    def forward(self, seqs_in):
        return self.dropout(self.norm(self.linear(seqs_in).relu()))
    # end
# end




class Batch:

    def __init__(self, **kwargs):
        self.kwargs = {}
        for k, v in kwargs.items():
            if v is not None and type(v) is not bool:
                self.kwargs[k] = v.cuda()
            # end
        # end
        
    # end

    def __call__(self):
        return self.kwargs
    # end
# end



class Collator_Base:

    def __init__(self, tokenizer, size_seq_max, need_masked=0.3):
        self.tokenizer = tokenizer
        self.size_seq_max = size_seq_max
        self.need_masked = need_masked

        index_special_token_2_id = {k: v for k, v in zip(tokenizer.all_special_tokens, tokenizer.all_special_ids)}

        self.id_pad = index_special_token_2_id['[PAD]']
        self.id_mask = index_special_token_2_id['[MASK]']
        self.id_cls = index_special_token_2_id['[CLS]']
        self.id_sep = index_special_token_2_id['[SEP]']
        self.id_unk = index_special_token_2_id['[UNK]']
        
        self.regex_special_token = re.compile(r'\[(PAD|MASK|CLS|SEP|EOL|UNK)\]')
        
        self.index_randtoken_start = 999
        self.index_randtoken_end = 30521
    # end

    def _preprocess(self, line):
        line = re.sub(self.regex_special_token, r'<\1>', line)
        line = re.sub(r'''('|"|`){2}''', '', line)
        line = re.sub(r'\.{2,3}', '', line)
        line = re.sub(r' {2,}', ' ', line)
        line = line.lstrip().rstrip()
        return line
    # end
    
    def _get_random_tokens(self):
        return random.randint(self.index_randtoken_start, self.index_randtoken_end)
    # end

    
    def pad_sequences(self, sequences, size_seq_max, need_diagonal=False,
                      need_masked=0):  # need_diagonal and need_masked cannot both set, one for bert seq one for s2s seq
        
        sequences = copy.deepcopy(sequences)
        
        id_pad = self.id_pad
        id_mask = self.id_mask

        sequences_masked_padded = []
        labels_padded = []

        for sequence in sequences:

            len_seq = len(sequence)
            label = copy.deepcopy(sequence)

            if need_masked:
                indexs_masked = list(range(1, len_seq - 1))  # 0 = cls, -1 = sep
                random.shuffle(indexs_masked)
                anchor_mask_all = round(need_masked * (len_seq - 2)) or 1
                anchor_mask_replace = int(anchor_mask_all / 2)

                if anchor_mask_replace:  # not 0
                    indexs_replaced = indexs_masked[:anchor_mask_replace]
                    for index_replaced in indexs_replaced:
                        sequence[index_replaced] = self._get_random_tokens()
                    # end
                # end

                indexs_masked = indexs_masked[anchor_mask_replace:anchor_mask_all]
            # end


            count_pad = size_seq_max - len_seq
            
            label = torch.LongTensor(label)
            label_padded = torch.cat((label, torch.LongTensor([id_pad] * count_pad)))
            labels_padded.append(label_padded)

            if need_masked:

                sequence_masked = torch.LongTensor(sequence)
                sequence_masked.index_fill_(0, torch.LongTensor(indexs_masked), id_mask)
                sequence_masked_padded = torch.cat((sequence_masked, torch.LongTensor([id_pad] * count_pad)))

                sequences_masked_padded.append(sequence_masked_padded)
            # end
        #   # end for

        inputs = torch.stack(labels_padded)  # (batch, size_seq_max)
        if need_masked:
            inputs_masked_padded = torch.stack(sequences_masked_padded)
        # end

        masks_segment = (inputs != self.id_pad).unsqueeze(-2)  # (nbatch, 1, seq)
        masks_attention = self.make_std_mask(inputs, self.id_pad) if need_diagonal else masks_segment

        if need_masked:
            masks_masked = (inputs_masked_padded != id_mask).unsqueeze(-2)
            masks_attention = masks_attention & masks_masked
            return inputs_masked_padded, masks_attention, masks_segment, inputs  # (inputs, masks_attention, masks_segment, labels)
        else:
            return inputs, masks_attention, masks_segment, None
        # end
    # end


    def subsequent_mask(self, size):
        "Mask out subsequent positions."
        attn_shape = (1, size, size)
        subsequent_mask = torch.triu(torch.ones(attn_shape), diagonal=1).type(
            torch.uint8
        )
        return subsequent_mask == 0

    def make_std_mask(self, tgt, pad):
        "Create a mask to hide padding and future words."
        tgt_mask = (tgt != pad).unsqueeze(-2)
        tgt_mask = tgt_mask & self.subsequent_mask(tgt.size(-1)).type_as(
            tgt_mask.data
        )
        return tgt_mask
    # end
# end


class Collator_SC(Collator_Base):

    def __call__(self, list_corpus_source):

        tokens_input_encoder = []
        tokens_input_decoder = []
        tokens_label_decoder = []
        labels_similarity = []
        labels_sc = []

        for corpus_source in list_corpus_source:  # (line0, line1, sim), output of zip remove single case
            if len(corpus_source) == 3:  # (line0, line1, sim)
                corpus_line = [corpus_source[0], corpus_source[1]]
                labels_similarity.append(corpus_source[2])
            elif len(corpus_source) == 2:  # (line, label_sc)
                corpus_line = [corpus_source[0]]
                labels_sc.append(corpus_source[1])
            else:
                corpus_line = [corpus_source[0]]
            # end

            for line in corpus_line:
                tokens = self.tokenizer.encode(self._preprocess(line), add_special_tokens=False)

                # TODO: check edge
                if len(tokens) > self.size_seq_max - 2:
                    tokens = tokens[:self.size_seq_max - 2]
                # end

                tokens_input_encoder.append([self.id_cls] + tokens + [self.id_sep])
                tokens_input_decoder.append([self.id_cls] + tokens)
                tokens_label_decoder.append(tokens + [self.id_sep])
            # end

        # end

        inputs_encoder, masks_encoder, segments_encoder, labels_encoder = self.pad_sequences(tokens_input_encoder,
                                                                                             self.size_seq_max,
                                                                                             need_masked=self.need_masked)
        inputs_decoder, masks_decoder, segments_decoder, _ = self.pad_sequences(tokens_input_decoder, self.size_seq_max,
                                                                                need_diagonal=True)
        labels_decoder, masks_label, segments_label, _ = self.pad_sequences(tokens_label_decoder, self.size_seq_max)
        # labels_similarity = torch.Tensor(labels_similarity).unsqueeze(0).transpose(0,1)
        labels_similarity = torch.Tensor(labels_similarity)
        labels_sc = torch.LongTensor(labels_sc)

        return Batch(
            ids_encoder=inputs_encoder,  # contains [mask]s
            masks_encoder=masks_encoder,
            labels_encoder=labels_encoder,  # doesn't contain [mask]
            segments_encoder=segments_encoder,
            ids_decoder=inputs_decoder,
            masks_decoder=masks_decoder,
            labels_decoder=labels_decoder,
            segments_label=segments_label,
            labels_similarity=labels_similarity,
            labels_sc=labels_sc
        )

    # end
# end


class SimpleEncodedDataset(torch.utils.data.Dataset):

    # info_file_rows = {'path_file': 1,000,000,...}
    def __init__(self, folder_dataset_base, info_file_rows, split=0.001):
        self.folder_dataset_base = folder_dataset_base
        self.list_tokenized_eval = []
        self.dict_filename_loaded = {filename: False for filename, num_rows in info_file_rows.items()}
        self.list_corpus_idx_filename_train = []

        for filename, num_lines in info_file_rows.items():
            idxs_eval = list(range(num_lines))
            random.shuffle(idxs_eval)
            idxs_eval = idxs_eval[:round(len(idxs_eval) * split)]

            for idx_eval in idxs_eval:
                self.list_tokenized_eval.append((idx_eval, filename))
            # end

            set_idxs_eval = set(idxs_eval)
            for idx_train in range(num_lines):
                if idx_train in set_idxs_eval:
                    continue
                # end

                self.list_corpus_idx_filename_train.append((idx_train, filename))
            # end
        # end

        self.is_train = True
        self.rows_cached = []
        self.filename_cached = None
    # end


    def __getitem__(self, idx):  # should not have problem now
        # if eval, use all cached eval tokenized
        if not self.is_train:
            return self.list_tokenized_eval[idx]
        # end

        # if train
        idxs_in_file, filename_current = self.list_corpus_idx_filename_train[idx]

        # if file not fully used
        if filename_current != self.filename_cached:

            # load new file
            print('switch from {} to {}'.format(self.filename_cached, filename_current))
            path_file = os.path.join(self.folder_dataset_base, filename_current)
            with open(path_file, 'r') as file:  # update rows_cached
                self.rows_cached = file.read().splitlines()
            # end

            self.filename_cached = filename_current

            if not self.dict_filename_loaded[filename_current]:
                for id_list_eval, tokenized_eval in enumerate(self.list_tokenized_eval):
                    if type(tokenized_eval) is tuple:
                        if tokenized_eval[1] == filename_current:
                            self.list_tokenized_eval[id_list_eval] = self._fransfer_one_line_to_tokenized(self.rows_cached[tokenized_eval[0]])
                        # end
                    # end
                # end
                self.dict_filename_loaded[filename_current] = True
            # end
        # end

        return self._fransfer_one_line_to_tokenized(self.rows_cached[idxs_in_file])
    # end

    def __len__(self):
        if self.is_train:
            return len(self.list_corpus_idx_filename_train)
        else:
            return len(self.list_tokenized_eval)
        # end
    # end

    def _fransfer_one_line_to_tokenized(self, str_line):
        tokenized = [int(t) for t in str_line.split(', ') if t]
        return tokenized
    # end

    def train(self):
        self.is_train = True
    # end

    def eval(self):
        self.is_train = False
    # end
# end



In [2]:
def GOSV(path_base, filename_base, postfix, index_label_2_id, split=0.1):
    filename = f'{filename_base}{postfix}'
    path_file = os.path.join(path_base, filename)
    contents = parse_csv_file_to_json(path_file)
    
    list_corpus = [(content['processed'], index_label_2_id[content['target']]) for content in contents]
    
    indexs_all = list(range(len(list_corpus)))
    random.shuffle(indexs_all)
    
    index_split = int(split * len(list_corpus))
    
    indexs_eval = indexs_all[:index_split]
    indexs_train = indexs_all[index_split:]
    
    list_corpus_eval = [list_corpus[i_e] for i_e in indexs_eval]
    list_corpus_train = [list_corpus[i_t] for i_t in indexs_train]
    
    return list_corpus_train, list_corpus_eval, None
# end

In [3]:
class DistilBertEncoderHead_MLM(nn.Module):

    @classmethod
    def get_info_accuracy_template(cls):
        return Dotdict({
            'corrects_segmented': 0,
            'corrects_masked': 0,
            'num_segmented': 0,
            'num_masked': 0 
        })
    # end
    
    def __init__(self, model, size_vocab, dim_hidden=128, dropout=0.1):
        super(DistilBertEncoderHead_MLM, self).__init__()
        
        self.ffn = LinearAndNorm(dim_in=dim_hidden, dim_out=dim_hidden, dropout=dropout)
        self.extractor = torch.nn.Linear(dim_hidden, size_vocab, bias=False)
        self.extractor.weight = nn.Parameter(model.embedder_encoder.word_embeddings.weight)

        self.func_loss = torch.nn.CrossEntropyLoss().cuda()
    # end


    def forward(self, output_encoder=None, labels_encoder=None, segments_encoder=None, masks_encoder=None, **kwargs):   # labels_input -> (batch, seq, labels)
        output_ffn = self.ffn(output_encoder)
        output_mlm = self.extractor(output_ffn) # output_mlm = prediction_logits
        
        return {'output': output_mlm, 'labels_encoder': labels_encoder, 'segments_encoder': segments_encoder, 'masks_encoder': masks_encoder}


    
    def compute_loss(self, output=None, labels_encoder=None, segments_encoder=None, masks_encoder=None):
        
        output_mlm = output
        labels_mlm = labels_encoder
        
        info_acc = DistilBertEncoderHead_MLM.get_info_accuracy_template()
        
        segments_encoder_2d = segments_encoder.transpose(-1,-2)[:,:,0]
        hidden_mlm_segmented = output_mlm.masked_select(segments_encoder_2d.unsqueeze(-1)).reshape(-1, output_mlm.shape[-1]) # should be (segmented_all_batchs, size_vocab)
        
        loss_segments = self.func_loss(hidden_mlm_segmented, labels_mlm.masked_select(segments_encoder_2d))
        info_acc.corrects_segmented = torch.sum(hidden_mlm_segmented.argmax(-1) == labels_mlm.masked_select(segments_encoder_2d)).cpu().item()
        info_acc.num_segmented = hidden_mlm_segmented.shape[0]
        
        masks_masked = torch.logical_xor(masks_encoder, segments_encoder) & segments_encoder # True is masked
        masks_masked_perbatch = masks_masked[:,0,:]
        hidden_mlm_masked = output_mlm.masked_select(masks_masked_perbatch.unsqueeze(-1)).reshape(-1, output_mlm.shape[-1])

        if hidden_mlm_masked.shape[0] != 0:
            loss_masked = self.func_loss(hidden_mlm_masked, labels_mlm.masked_select(masks_masked_perbatch))       
            info_acc.corrects_masked = torch.sum(hidden_mlm_masked.argmax(-1) == labels_mlm.masked_select(masks_masked_perbatch)).cpu().item()
            info_acc.num_masked = hidden_mlm_masked.shape[0]
        else:
            loss_masked = 0
            info_acc.corrects_masked = 0
            info_acc.num_masked = 1
        # end
        
        loss_mlm = loss_segments + loss_masked * 3
        
        return loss_mlm, info_acc
    # end
# end


class SimpleDecoderHead_S2S(nn.Module):

    @classmethod
    def get_info_accuracy_template(cls):
        return Dotdict({
            'corrects_segmented': 0,
            'num_segmented': 0 
        })
    # end


    def __init__(self, model, size_vocab, dim_hidden=128, dropout=0.1):
        super(SimpleDecoderHead_S2S, self).__init__()
        
        self.ffn = LinearAndNorm(dim_in=dim_hidden, dim_out=dim_hidden, dropout=dropout)
        self.extractor = torch.nn.Linear(dim_hidden, size_vocab, bias=False)
        self.extractor.weight = nn.Parameter(model.embedder_decoder.embedder[0].lut.weight)

        self.func_loss = torch.nn.CrossEntropyLoss().cuda()
    # end


    def forward(self, output_decoder=None, labels_decoder=None, segments_label=None, **kwargs):   # labels_input -> (batch, seq, labels)
        
        output_ffn = self.ffn(output_decoder)
        output_s2s = self.extractor(output_ffn)   # output_mlm = prediction_logits
        
        return {'output': output_s2s, 'labels_decoder': labels_decoder, 'segments_label': segments_label}
    # end


    def compute_loss(self, output=None, labels_decoder=None, segments_label=None):
        output_s2s = output
        labels_s2s = labels_decoder
        
        info_acc = SimpleDecoderHead_S2S.get_info_accuracy_template()
        
        segments_label_2d = segments_label.transpose(-1,-2)[:,:,0]
        hidden_s2s_segmented = output_s2s.masked_select(segments_label_2d.unsqueeze(-1)).reshape(-1, output_s2s.shape[-1])

        loss_segments = self.func_loss(hidden_s2s_segmented, labels_s2s.masked_select(segments_label_2d))
        info_acc.corrects_segmented = torch.sum(hidden_s2s_segmented.argmax(-1) == labels_s2s.masked_select(segments_label_2d)).cpu().item()
        info_acc.num_segmented = hidden_s2s_segmented.shape[0]
        
        return loss_segments * 4, info_acc
    # end
# end


class SimpleEncoderHead_AveragePooling_SC(nn.Module):  # SC-> SequenceClassification

    @classmethod
    def get_info_accuracy_template(cls):
        return Dotdict({
            'corrects': 0,
            'num': 0 
        })
    # end
    
    def __init__(self, num_labels, dim_hidden=128, dropout=0.1):
        super(SimpleEncoderHead_AveragePooling_SC, self).__init__()
        
        self.ffn = LinearAndNorm(dim_in=dim_hidden, dim_out=dim_hidden, dropout=dropout)
        self.classifier = torch.nn.Linear(dim_hidden, num_labels, bias=False)
        self.func_loss = torch.nn.CrossEntropyLoss()
    # end


    def forward(self, output_encoder_pooled=None, labels_sc=None, **kwargs):   # labels_input -> (batch, seq, labels)
        output_ffn = self.ffn(output_encoder_pooled)
        output_sc = self.classifier(output_ffn) # output_sc = prediction_logits

        return {'output': output_sc, 'labels_sc': labels_sc}
    # end
    
    def compute_loss(self, output=None, labels_sc=None):
        
        labels_sc = labels_sc
        output_sc = output
        
        info_acc = SimpleEncoderHead_AveragePooling_SC.get_info_accuracy_template()
        
        loss_sc = self.func_loss(output_sc, labels_sc)
        info_acc.corrects = torch.sum(output_sc.argmax(-1) == labels_sc).cpu().item()
        info_acc.num = output_sc.shape[0]
        
        return loss_sc, info_acc
    # end
# end


In [4]:
class Trainer(nn.Module):
    def __init__(self, model):
        super(Trainer, self).__init__()
        self.index_name_head = set()
        self.model = model
    # end

    def register(self, head):
        name_head = head.__class__.__name__
        setattr(self, name_head, head)
        self.index_name_head.add(name_head)
        return self
    # end

    def forward(self, **kwargs):
        output_model = self.model(**kwargs)
        dict_head_output = {}
        
        for name in self.index_name_head:
            head = getattr(self, name)
            dict_head_output[name] = head.forward(**{**output_model, **kwargs})
        # end
        
        return dict_head_output
    # end

    def get_head(self, name_klass):
        if type(name_klass) is type:
            name_klass = klass.__name__
        # end
        
        return getattr(self, name_klass)
    # end
# end


class SaverAndLoader:
    def __init__(self, path_checkpoints='./checkpoints'):
        self.dict_name_item = {}
        self.path_checkpoints = path_checkpoints
        self.metadata = None
    # end
    
    def add_item(self, item, name=None):
        if not name:
            name = item.__class__.__name__
        # end
        
        self.dict_name_item[name] = item
        return self
    # end
    
    
    def update_checkpoint(self, name_checkpoint, name_checkpoint_previous=None, metadata=None):  # epoch_n
        if not self.dict_name_item:
            print(f'[ALERT] no item added, skip saving checkpoint.')
            return
        # end
        
        if name_checkpoint_previous:
            result = self._delete_checkpoint_folder(name_checkpoint_previous)
            if result:
                print(f'[INFO] {name_checkpoint_previous} is cleared.')
            else:
                print(f'[ALERT] {name_checkpoint_previous} fail to be cleared.')
            # end
        # end
        
        folder_checkpoint = self._create_checkpoint_folder(name_checkpoint)
        for name_item, item in self.dict_name_item.items():
            path_checkpoint_item = os.path.join(folder_checkpoint, f'{name_item}.pt')
            
            if hasattr(item, 'save_pretrained'):
                item.save_pretrained(path_checkpoint_item)
            elif hasattr(item, 'save'):
                item.save(path_checkpoint_item)
            else:
                torch.save(item.state_dict(), path_checkpoint_item)
            # end
            
            size_file_saved_MB = os.path.getsize(path_checkpoint_item) / 1024 / 1024
            print(f'[INFO] {name_item} is saved, {size_file_saved_MB} MB')
        # end
        
        if metadata:
            path_file_metadata = os.path.join(folder_checkpoint, 'metadata.json')
            with open(path_file_metadata,'w+') as file:
                file.write(json.dumps(metadata, indent=4))
            # end
            print(f'[INFO] metadata updated at {path_file_metadata}, : {metadata}')
            self.metadata = metadata
        # end
        
        print(f'[INFO] {name_checkpoint} is saved')
    # end
    
    
    def load_item_special(self, name_checkpoint, klass_item, name_item_default):
        name_item = klass_item.__name__
        path_checkpoint_item = os.path.join(self.path_checkpoints, name_checkpoint, f'{name_item}.pt')
        
        if os.path.exists(path_checkpoint_item):
            target = path_checkpoint_item
        else:
            target = name_item_default
        # end
            
        if hasattr(klass_item, 'from_pretrained'):
            instance_item = klass_item.from_pretrained(target)
        else:
            instance_item = klass_item(target)
        # end

        print(f'[INFO] {klass_item} loaded for {target}')
        
        return instance_item
    # end

    
    def load_item_state(self, name_checkpoint, instance_item, name_item=None):
        if not name_item:
            name_item = instance_item.__class__.__name__
        # end
        
        path_checkpoint_item = os.path.join(self.path_checkpoints, name_checkpoint, f'{name_item}.pt')
        if not os.path.exists(path_checkpoint_item):
            print(f'[ERROR] {path_checkpoint_item} not exists')
            return None
        # end
        
        if issubclass(instance_item.__class__, torch.nn.Module):
            instance_item.load_state_dict(torch.load(path_checkpoint_item), strict=False)
        else:
            instance_item.load_state_dict(torch.load(path_checkpoint_item))
        # end
        
        print(f'[INFO] {name_item} loaded for {name_checkpoint}.')
        return instance_item
    # end
    
    def load_metadata(self, name_checkpoint):
        path_folder_checkpoint = os.path.join(self.path_checkpoints, name_checkpoint)
        path_metadata = os.path.join(path_folder_checkpoint, 'metadata.json')
        
        if os.path.exists(path_metadata):
            with open(path_metadata, 'r') as file:
                self.metadata = json.load(file)
            # end
            print(f'[INFO] {path_metadata} loaded: {self.metadata}')
        else:
            print(f'[WARN] no metadata found.')
        # end
    # end
    
    
    def list_items(self):
        return list(self.dict_name_item.keys())
    # end
    
    def _create_checkpoint_folder(self, name_checkpoint):
        path_folder_target = os.path.join(self.path_checkpoints, name_checkpoint)
        Path(path_folder_target).mkdir(parents=True, exist_ok=True)
        return path_folder_target
    # end
    
    def _delete_checkpoint_folder(self, name_checkpoint_previous):
        path_folder_target = os.path.join(self.path_checkpoints, name_checkpoint_previous)
        if os.path.exists(path_folder_target):
            shutil.rmtree(path_folder_target, ignore_errors=True)
        # end
        return (not os.path.exists(path_folder_target))
    # end
# end


In [5]:

class Builder:

    @classmethod
    def build_model_with_s2s_from_pretrained_encoder(cls, klass_encoder, name_encoder):
        
        encoder_base = klass_encoder.from_pretrained(name_encoder)
        
        size_vocab = encoder_base.embeddings.word_embeddings.num_embeddings
        dim_hidden = encoder_base.embeddings.word_embeddings.embedding_dim
        n_head = int(dim_hidden / 64)
        dim_feedforward = encoder_base.transformer.layer[0].ffn.lin1.out_features
        n_layer = len(encoder_base.transformer.layer)
        
        embedder_encoder = encoder_base.embeddings
        encoderstack = encoder_base
        
        embedder_decoder = SimpleEmbedder(size_vocab=size_vocab, dim_hidden=dim_hidden)
        sample_decoder = SimpleDecoderLayer(dim_hidden, dim_feedforward, n_head)
        decoderstack = SimpleTransformerStack(sample_decoder, n_layer)

        model = SimpleEncoderDecoder(encoderstack, decoderstack, embedder_encoder, embedder_decoder, pooling=True)
        head_s2s = SimpleDecoderHead_S2S(model, size_vocab, dim_hidden)
        trainer = Trainer(model).register(head_s2s)
        return trainer
    # end
    
    
    
    @classmethod
    def load_model_with_3heads_from_distilbert(cls, klass_encoder, name_encoder, loader, num_labels, name_checkpoint):
        
        encoder_base = loader.load_item_special(name_checkpoint, klass_encoder, name_encoder)
        
        size_vocab = encoder_base.embeddings.word_embeddings.num_embeddings
        dim_hidden = encoder_base.embeddings.word_embeddings.embedding_dim
        n_head = int(dim_hidden / 64)
        dim_feedforward = encoder_base.transformer.layer[0].ffn.lin1.out_features
        n_layer = len(encoder_base.transformer.layer)
        
        embedder_encoder = encoder_base.embeddings
        encoderstack = encoder_base
        
        embedder_decoder = SimpleEmbedder(size_vocab=size_vocab, dim_hidden=dim_hidden)
        sample_decoder = SimpleDecoderLayer(dim_hidden, dim_feedforward, n_head)
        decoderstack = SimpleTransformerStack(sample_decoder, n_layer)
        model = SimpleEncoderDecoder(encoderstack, decoderstack, embedder_encoder, embedder_decoder, pooling=True)
        head_s2s = SimpleDecoderHead_S2S(model, size_vocab, dim_hidden)
        head_sc = SimpleEncoderHead_AveragePooling_SC(num_labels, dim_hidden)
        head_mlm = DistilBertEncoderHead_MLM(model, size_vocab, dim_hidden)
        
        loader.load_item_state(name_checkpoint, embedder_decoder)
        loader.load_item_state(name_checkpoint, decoderstack)
        loader.load_item_state(name_checkpoint, head_sc)
        loader.load_item_state(name_checkpoint, head_s2s)
        loader.load_item_state(name_checkpoint, head_mlm)

        loader.add_item(encoder_base)
        loader.add_item(embedder_decoder)
        loader.add_item(decoderstack)
        loader.add_item(head_s2s)
        loader.add_item(head_sc)
        loader.add_item(head_mlm)

        trainer = Trainer(model).register(head_s2s).register(head_sc).register(head_mlm)
        return trainer
    # end
# end


In [6]:
def train_a_batch(batch, trainer, optimizer=None, scheduler=None):
    dict_head_output = trainer.forward(**batch())
    
    loss_s2s, info_acc_s2s = trainer.module.get_head('SimpleDecoderHead_S2S').compute_loss(**dict_head_output['SimpleDecoderHead_S2S'])
    loss_mlm, info_acc_mlm = trainer.module.get_head('DistilBertEncoderHead_MLM').compute_loss(**dict_head_output['DistilBertEncoderHead_MLM'])
    loss_sc, info_acc_sc = trainer.module.get_head('SimpleEncoderHead_AveragePooling_SC').compute_loss(**dict_head_output['SimpleEncoderHead_AveragePooling_SC'])
    
    # crossentropy loss
    loss_all = loss_s2s + loss_mlm + loss_sc
    loss_all_value = loss_all.item()
    
    
    loss_all.backward()
    
    if optimizer:
        optimizer.step()
        optimizer.zero_grad(set_to_none=True)
    # end
    
    if scheduler:
        scheduler.step()
    # end
    
    return loss_all_value, Dotdict({'s2s': info_acc_s2s, 'mlm': info_acc_mlm, 'sc': info_acc_sc})
# end


def evaluate_a_batch(batch, trainer, *args, **kwargs):
    
    with torch.no_grad():
        dict_head_output = trainer.forward(**batch())
    # end
    
    loss_s2s, info_acc_s2s = trainer.module.get_head('SimpleDecoderHead_S2S').compute_loss(**dict_head_output['SimpleDecoderHead_S2S'])
    loss_mlm, info_acc_mlm = trainer.module.get_head('DistilBertEncoderHead_MLM').compute_loss(**dict_head_output['DistilBertEncoderHead_MLM'])
    loss_sc, info_acc_sc = trainer.module.get_head('SimpleEncoderHead_AveragePooling_SC').compute_loss(**dict_head_output['SimpleEncoderHead_AveragePooling_SC'])
    
    # crossentropy loss
    loss_all = loss_s2s + loss_mlm + loss_sc
    loss_all_value = loss_all.item()
    
    return loss_all_value, Dotdict({'s2s': info_acc_s2s, 'mlm': info_acc_mlm, 'sc': info_acc_sc})
# end


# For s2s head
def greedy_generate(model, head, tokenizer, collator, **kwargs):
    id_start = collator.id_cls
    id_end = collator.id_sep
    id_pad = collator.id_pad
    size_seq_max = collator.size_seq_max

    ids_encoder_twin = kwargs['ids_encoder']
    masks_encoder_twin = kwargs['masks_encoder']
    
    ids_decoder_all = []
    
    for j in range(ids_encoder_twin.shape[0]):
        ids_encoder = ids_encoder_twin[j,].unsqueeze(0)
        masks_encoder = masks_encoder_twin[j,].unsqueeze(0)

        output_encoder = model.embed_and_encode(ids_encoder=ids_encoder, masks_encoder=masks_encoder)
        output_encoder_refilled = output_encoder.masked_fill(masks_encoder.transpose(-1,-2)==False, 0)
        output_encoder_pooled = torch.mean(output_encoder_refilled, dim=-2)
        output_encoder_pooled_expanded = output_encoder_pooled.unsqueeze(-2).expand(output_encoder.shape)
        output_encoder = output_encoder_pooled_expanded

        ids_decoder = torch.zeros(1, 1).fill_(id_start).type_as(ids_encoder.data)

        for i in range(size_seq_max - 1):
            masks_decoder = collator.subsequent_mask(ids_decoder.size(1)).type_as(ids_encoder.data)
            output_decoder = model.embed_and_decode(ids_decoder=ids_decoder, masks_encoder=masks_encoder, output_encoder=output_encoder, masks_decoder=masks_decoder)

            output_ffn = head.ffn(output_decoder)
            output_s2s = head.extractor(output_ffn)   # output_mlm = prediction_logits

            logits_nextword = torch.softmax(output_s2s[:, -1], dim=-1)  # mynote: select dim2=-1, remain=all; last is the next

            id_nextword = torch.argmax(logits_nextword, dim=-1)
            id_nextword = id_nextword.data[0]

            if id_nextword == id_end:
                break
            # end

            ids_decoder = torch.cat([ids_decoder, torch.zeros(1, 1).type_as(ids_encoder.data).fill_(id_nextword)], dim=1)
        # end
        
        ids_pad = torch.full((1, size_seq_max - ids_decoder.shape[-1]), id_pad).type_as(ids_decoder.data)
        
        ids_decoder_all.append(torch.cat([ids_decoder, ids_pad], dim=-1).squeeze(0))
    # end for 

    return torch.stack(ids_decoder_all)
# end


In [7]:
def main(
    folder_data, folder_output, version_data, version_data_last, postfix_train, postfix_test,
    tokenizer, collator, index_label_2_labelid, index_labelid_2_label, num_labels,
    epochs, seq_max, batch_size, dim_hidden, dim_feedforward, n_head, n_layer,
    lr_base_optimizer, betas_optimizer, eps_optimizer, warmup, gpus
):

    loader = SaverAndLoader('checkpoints_distilbert_0')
    trainer = Builder.load_model_with_3heads_from_distilbert(DistilBertModel, 'distilbert-base-uncased', loader, num_labels, str(version_data_last))
    trainer = trainer.to('cuda')
    trainer = torch.nn.DataParallel(trainer, device_ids=gpus)

    train_source, valid_source, _ = GOSV(folder_data, version_data, postfix_train, index_label_2_labelid, split=0.1)
    test_source, _, _ = GOSV(folder_data, version_data, postfix_test, index_label_2_labelid, split=0)
    
    dataloader_train = DataLoader(train_source, batch_size, shuffle=False, collate_fn=collator)
    dataloader_eval = DataLoader(valid_source, batch_size, shuffle=False, collate_fn=collator)
    dataloader_test = DataLoader(test_source, 1, shuffle=False, collate_fn=collator)

    optimizer = torch.optim.AdamW(trainer.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01)
    lr_scheduler = transformers.get_scheduler(
        name="cosine_with_restarts", optimizer=optimizer, num_warmup_steps=1000, num_training_steps=len(dataloader_train) * epochs
    )
    
    ### start train/eval epochs ####################################
    for e in range(epochs):
        
        info_acc_heads_train = Dotdict({
            'mlm': DistilBertEncoderHead_MLM.get_info_accuracy_template(),
            'sc': SimpleEncoderHead_AveragePooling_SC.get_info_accuracy_template(),
            's2s': SimpleDecoderHead_S2S.get_info_accuracy_template(),
        })


        info_acc_heads_eval = Dotdict({
            'mlm': DistilBertEncoderHead_MLM.get_info_accuracy_template(),
            'sc': SimpleEncoderHead_AveragePooling_SC.get_info_accuracy_template(),
            's2s': SimpleDecoderHead_S2S.get_info_accuracy_template(),
        })

        # train phase
        
        losss_per_e = []
        for i, batch in enumerate(tqdm(dataloader_train)):
            loss_current, info_acc_heads_batch = train_a_batch(batch, trainer, optimizer, lr_scheduler)
            info_acc_heads_train += info_acc_heads_batch

            losss_per_e.append(loss_current)
        # end

        loss_average_per_e = sum(losss_per_e) / len(losss_per_e)
        print('[{}] Epoch: {} training ends. Status: Average loss: {}, Average MLM accuracy: {}, Average SC accuracy: {}, Average S2S accuracy: {}'.format(
            datetime.utcnow(), e, loss_average_per_e,
            info_acc_heads_train.mlm.corrects_masked / info_acc_heads_train.mlm.num_masked,
            info_acc_heads_train.sc.corrects / info_acc_heads_train.sc.num,
            info_acc_heads_train.s2s.corrects_segmented / info_acc_heads_train.s2s.num_segmented,
        ))

        # eval phase start
        losss_per_e = []
        for i, batch in enumerate(tqdm(dataloader_eval)):
            loss_current, info_acc_heads_batch = evaluate_a_batch(batch, trainer)
            info_acc_heads_eval += info_acc_heads_batch

            losss_per_e.append(loss_current)
        # end

        loss_average_per_e = sum(losss_per_e) / len(losss_per_e)
        print('[{}] Epoch: {} Evalutation ends. Status: Average loss: {}, Average MLM accuracy: {}, Average SC accuracy: {}, Average S2S accuracy: {}'.format(        
            datetime.utcnow(), e, loss_average_per_e,
            info_acc_heads_eval.mlm.corrects_masked / info_acc_heads_eval.mlm.num_masked,
            info_acc_heads_eval.sc.corrects / info_acc_heads_eval.sc.num,
            info_acc_heads_eval.s2s.corrects_segmented / info_acc_heads_eval.s2s.num_segmented,
        ))
        # eval phase end
    # end
    ### end train/eval epochs ####################################
    
    
    ### start test  ##############################################
    trainer.eval()
    
    list_corpus_test = []
    for i, batch in enumerate(tqdm(dataloader_test)):

        info_batch = batch()
        with torch.no_grad():
            dict_head_output = trainer.forward(**info_batch)
        # end

        # this is to calcluate s2s acc
        loss_s2s, info_acc_s2s = trainer.module.get_head('SimpleDecoderHead_S2S').compute_loss(**dict_head_output['SimpleDecoderHead_S2S'])
        num_s2s = info_acc_s2s['num_segmented']
        corrects_s2s = info_acc_s2s['corrects_segmented']
        info_s2s = {'num': num_s2s, 'corrects': corrects_s2s}
        # end

        # this is to calculate sc
        label_sc = dict_head_output['SimpleEncoderHead_AveragePooling_SC']['labels_sc'].squeeze(0).detach().cpu()
        logit_sc = dict_head_output['SimpleEncoderHead_AveragePooling_SC']['output'].squeeze(0).detach().cpu()
        
        pred_sc = logit_sc.argmax(-1) # (seq)
        conf_sc = torch.index_select(logit_sc.softmax(-1), -1, pred_sc)
        info_sc = {'pred': pred_sc.item(), 'label': label_sc.item(), 'conf': conf_sc.item()}
        
        # this is to calculate greedy decode acc
        list_acc_greedy_decode = []
        result = greedy_generate(trainer.module.model, trainer.module.get_head('SimpleDecoderHead_S2S'), tokenizer, collator, **info_batch)
        result_cpu_list = result.cpu().tolist()
        labels_decoder_cpu_list = info_batch['labels_decoder'].cpu().tolist()
        
        for result_cpu, labels_decoder in zip(result_cpu_list, labels_decoder_cpu_list):
        
            sentence_predicted = tokenizer.decode(result_cpu).split(' [PAD]')[0]
            set_tokens_predicted = set(sentence_predicted.split()) - set(['[CLS]','[SEP]'])
            # print('predicted: {}'.format(set_tokens_predicted))

            sentence_origin = tokenizer.decode(labels_decoder).split(' [PAD]')[0]
            set_tokens_origin = set(sentence_origin.split()) - set(['[CLS]','[SEP]'])
            # print('origin: {}'.format(set_tokens_origin))
            set_hit = set_tokens_predicted & set_tokens_origin
            list_acc_greedy_decode.append({'num': len(set_tokens_origin), 'corrects': len(set_hit)})
        # end for
        info_greedy = {'greedy': list_acc_greedy_decode}
        # end this

        list_corpus_test.append({'info_sc': info_sc,'info_s2s': info_s2s, 'info_greedy': info_greedy})
    # end for
    
    path_file_output = os.path.join(folder_output, f'{version_data}.json')
    with open(path_file_output, 'w+') as file:
        file.write(json.dumps(list_corpus_test))
    # end
        
    ### end   test  ##############################################
    
    loader.update_checkpoint(str(version_data), str(version_data_last))
# end


In [8]:
import re
import json
import transformers
from torch.utils.data import DataLoader, Dataset
from torchtext.data.functional import to_map_style_dataset
from transformers import AutoTokenizer, DistilBertModel
from datetime import datetime
from tqdm import tqdm


GPUS = [0]
torch.cuda.set_device(GPUS[0])

epochs=8

# source
seq_max = 256
batch_size = 16


# model & head
dim_hidden = 768
# dim_feedforward = 768
dim_feedforward = 3072
n_head = 12
n_layer = 6

# optimizer
lr_base_optimizer = 1e-4
betas_optimizer = (0.9, 0.999)
eps_optimizer = 1e-9

# scheduler
warmup = 200

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
collator = Collator_SC(tokenizer, seq_max)  # TODO: here

# labels
index_label_2_labelid = {label:id_label for id_label, label in enumerate(sorted(["product","testcase","testbed","usererror","targetvm","nimbus","infra"]))}
index_labelid_2_label = {id_label: label for label, id_label in index_label_2_labelid.items()}
num_labels=len(index_label_2_labelid)


folder_data = 'data'
folder_output = 'outputs_gosv_distilbert'
postfix_train = '_train_0.35_15.csv'
postfix_test = '_test.csv'

versions_data = sorted([int(filename.split('_test.csv')[0]) for filename in os.listdir(folder_data) if '_test.csv' in filename and filename[0] != '.'])
# versions_data = versions_data[1:]     #TODO remove

version_data_last = 202205240000
for version_data in versions_data:
    main(
        folder_data, folder_output, version_data, version_data_last, postfix_train, postfix_test,
        tokenizer, collator, index_label_2_labelid, index_labelid_2_label, num_labels,
        epochs, seq_max, batch_size, dim_hidden, dim_feedforward, n_head, n_layer,
        lr_base_optimizer, betas_optimizer, eps_optimizer, warmup, GPUS
    )
    
    print('[INFO] finish {}'.format(version_data))
    version_data_last = version_data
# end

[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202205240000/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202205240000.
[INFO] SimpleTransformerStack loaded for 202205240000.
[ERROR] checkpoints_distilbert_0/202205240000/SimpleEncoderHead_AveragePooling_SC.pt not exists
[INFO] SimpleDecoderHead_S2S loaded for 202205240000.
[ERROR] checkpoints_distilbert_0/202205240000/DistilBertEncoderHead_MLM.pt not exists


  1%|          | 6/1099 [00:03<09:16,  1.97it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (959 > 512). Running this sequence through the model will result in indexing errors
100%|██████████| 1099/1099 [08:39<00:00,  2.12it/s]
  1%|          | 1/123 [00:00<00:23,  5.14it/s]

[2024-01-07 12:53:34.046099] Epoch: 0 training ends. Status: Average loss: 17.814967320982817, Average MLM accuracy: 0.6112179693079746, Average SC accuracy: 0.770836889116459, Average S2S accuracy: 0.6563106630406336


100%|██████████| 123/123 [00:23<00:00,  5.29it/s]
  0%|          | 0/1099 [00:00<?, ?it/s]

[2024-01-07 12:53:57.294862] Epoch: 0 Evalutation ends. Status: Average loss: 5.216482904868397, Average MLM accuracy: 0.8164133738601824, Average SC accuracy: 0.9708141321044547, Average S2S accuracy: 0.8496993173003116


100%|██████████| 1099/1099 [08:41<00:00,  2.11it/s]
  1%|          | 1/123 [00:00<00:23,  5.22it/s]

[2024-01-07 13:02:38.982100] Epoch: 1 training ends. Status: Average loss: 3.6548315772585047, Average MLM accuracy: 0.856833529205038, Average SC accuracy: 0.9838994140069409, Average S2S accuracy: 0.8906421037060638


100%|██████████| 123/123 [00:23<00:00,  5.27it/s]
  0%|          | 0/1099 [00:00<?, ?it/s]

[2024-01-07 13:03:02.329203] Epoch: 1 Evalutation ends. Status: Average loss: 2.7108537520819564, Average MLM accuracy: 0.8835190813914218, Average SC accuracy: 0.9877112135176651, Average S2S accuracy: 0.912753724856267


100%|██████████| 1099/1099 [08:41<00:00,  2.11it/s]
  1%|          | 1/123 [00:00<00:23,  5.23it/s]

[2024-01-07 13:11:43.928007] Epoch: 2 training ends. Status: Average loss: 2.242826208241752, Average MLM accuracy: 0.8998673614758775, Average SC accuracy: 0.9920350457984867, Average S2S accuracy: 0.9287791723086376


100%|██████████| 123/123 [00:23<00:00,  5.29it/s]
  0%|          | 0/1099 [00:00<?, ?it/s]

[2024-01-07 13:12:07.190544] Epoch: 2 Evalutation ends. Status: Average loss: 1.8781375511874998, Average MLM accuracy: 0.912225599459642, Average SC accuracy: 0.9912954429083461, Average S2S accuracy: 0.9392788698600542


100%|██████████| 1099/1099 [08:41<00:00,  2.11it/s]
  1%|          | 1/123 [00:00<00:22,  5.32it/s]

[2024-01-07 13:20:48.931507] Epoch: 3 training ends. Status: Average loss: 1.6675181298281954, Average MLM accuracy: 0.9186332563702391, Average SC accuracy: 0.9939124992888434, Average S2S accuracy: 0.9473561439641341


100%|██████████| 123/123 [00:23<00:00,  5.28it/s]
  0%|          | 0/1099 [00:00<?, ?it/s]

[2024-01-07 13:21:12.214827] Epoch: 3 Evalutation ends. Status: Average loss: 1.480723783252685, Average MLM accuracy: 0.9265113137453563, Average SC accuracy: 0.9933435739887353, Average S2S accuracy: 0.9524346911076205


100%|██████████| 1099/1099 [08:41<00:00,  2.11it/s]
  1%|          | 1/123 [00:00<00:22,  5.43it/s]

[2024-01-07 13:29:53.640991] Epoch: 4 training ends. Status: Average loss: 1.3201876048395262, Average MLM accuracy: 0.930948611074372, Average SC accuracy: 0.996415770609319, Average S2S accuracy: 0.9588970563135077


100%|██████████| 123/123 [00:23<00:00,  5.28it/s]
  0%|          | 0/1099 [00:00<?, ?it/s]

[2024-01-07 13:30:16.941704] Epoch: 4 Evalutation ends. Status: Average loss: 1.2127141517897446, Average MLM accuracy: 0.9369132049983114, Average SC accuracy: 0.9943676395289298, Average S2S accuracy: 0.9601106146330552


100%|██████████| 1099/1099 [08:41<00:00,  2.11it/s]
  0%|          | 0/123 [00:00<?, ?it/s]

[2024-01-07 13:38:58.661637] Epoch: 5 training ends. Status: Average loss: 1.0828731208632922, Average MLM accuracy: 0.9412233730996981, Average SC accuracy: 0.9981794390396541, Average S2S accuracy: 0.9664565388072779


100%|██████████| 123/123 [00:23<00:00,  5.28it/s]
  0%|          | 0/1099 [00:00<?, ?it/s]

[2024-01-07 13:39:21.975938] Epoch: 5 Evalutation ends. Status: Average loss: 1.0686523689305394, Average MLM accuracy: 0.9419790611279973, Average SC accuracy: 0.996415770609319, Average S2S accuracy: 0.9661090184476334


100%|██████████| 1099/1099 [08:42<00:00,  2.10it/s]
  1%|          | 1/123 [00:00<00:23,  5.17it/s]

[2024-01-07 13:48:04.183358] Epoch: 6 training ends. Status: Average loss: 0.9444738935892749, Average MLM accuracy: 0.9470919664889336, Average SC accuracy: 0.9989190419297946, Average S2S accuracy: 0.970729265045572


100%|██████████| 123/123 [00:23<00:00,  5.30it/s]
  0%|          | 0/1099 [00:00<?, ?it/s]

[2024-01-07 13:48:27.404503] Epoch: 6 Evalutation ends. Status: Average loss: 0.9802532915661974, Average MLM accuracy: 0.9468760553866937, Average SC accuracy: 0.9953917050691244, Average S2S accuracy: 0.9689353849907737


100%|██████████| 1099/1099 [08:42<00:00,  2.10it/s]
  1%|          | 1/123 [00:00<00:23,  5.28it/s]

[2024-01-07 13:57:09.935713] Epoch: 7 training ends. Status: Average loss: 0.8757479597049155, Average MLM accuracy: 0.9505912050455544, Average SC accuracy: 0.9996017522899243, Average S2S accuracy: 0.9730016044058578


100%|██████████| 123/123 [00:23<00:00,  5.31it/s]
  0%|          | 0/84 [00:00<?, ?it/s]

[2024-01-07 13:57:33.101792] Epoch: 7 Evalutation ends. Status: Average loss: 0.9394208949998142, Average MLM accuracy: 0.9487673083417765, Average SC accuracy: 0.9974398361495136, Average S2S accuracy: 0.9693878069733987


100%|██████████| 84/84 [02:19<00:00,  1.66s/it]


[INFO] 202205240000 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202206171000 is saved
[INFO] finish 202206171000
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202206171000/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202206171000.
[INFO] SimpleTransformerStack loaded for 202206171000.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202206171000.
[INFO] SimpleDecoderHead_S2S loaded for 202206171000.
[INFO] DistilBertEncoderHead_MLM loaded for 202206171000.


100%|██████████| 114/114 [00:55<00:00,  2.06it/s]
  8%|▊         | 1/13 [00:00<00:02,  5.14it/s]

[2024-01-07 14:01:20.765694] Epoch: 0 training ends. Status: Average loss: 11.762188058150443, Average MLM accuracy: 0.7689515908966872, Average SC accuracy: 0.6012068019747668, Average S2S accuracy: 0.7946625061586468


100%|██████████| 13/13 [00:02<00:00,  5.24it/s]
  0%|          | 0/114 [00:00<?, ?it/s]

[2024-01-07 14:01:23.249075] Epoch: 0 Evalutation ends. Status: Average loss: 7.926815693195049, Average MLM accuracy: 0.7945238672158953, Average SC accuracy: 0.8415841584158416, Average S2S accuracy: 0.828152898735561


100%|██████████| 114/114 [00:55<00:00,  2.06it/s]
  8%|▊         | 1/13 [00:00<00:02,  5.04it/s]

[2024-01-07 14:02:18.591622] Epoch: 1 training ends. Status: Average loss: 6.510778745015462, Average MLM accuracy: 0.813349342356601, Average SC accuracy: 0.9177180471749863, Average S2S accuracy: 0.8503243553949745


100%|██████████| 13/13 [00:02<00:00,  5.18it/s]
  0%|          | 0/114 [00:00<?, ?it/s]

[2024-01-07 14:02:21.101981] Epoch: 1 Evalutation ends. Status: Average loss: 4.88905745286208, Average MLM accuracy: 0.8444390598497699, Average SC accuracy: 0.9554455445544554, Average S2S accuracy: 0.876252596290493


100%|██████████| 114/114 [00:55<00:00,  2.06it/s]
  8%|▊         | 1/13 [00:00<00:02,  5.11it/s]

[2024-01-07 14:03:16.339447] Epoch: 2 training ends. Status: Average loss: 4.208862296321936, Average MLM accuracy: 0.8555640451891066, Average SC accuracy: 0.9731212287438289, Average S2S accuracy: 0.8903842995565774


100%|██████████| 13/13 [00:02<00:00,  5.09it/s]
  0%|          | 0/114 [00:00<?, ?it/s]

[2024-01-07 14:03:18.894146] Epoch: 2 Evalutation ends. Status: Average loss: 3.447206607231727, Average MLM accuracy: 0.8735158710928035, Average SC accuracy: 0.9801980198019802, Average S2S accuracy: 0.9050030973290092


100%|██████████| 114/114 [00:55<00:00,  2.06it/s]
  8%|▊         | 1/13 [00:00<00:02,  5.08it/s]

[2024-01-07 14:04:14.130563] Epoch: 3 training ends. Status: Average loss: 3.0832792583264803, Average MLM accuracy: 0.8789772417180592, Average SC accuracy: 0.9846407021393307, Average S2S accuracy: 0.9143044834948267


100%|██████████| 13/13 [00:02<00:00,  5.20it/s]
  0%|          | 0/114 [00:00<?, ?it/s]

[2024-01-07 14:04:16.632228] Epoch: 3 Evalutation ends. Status: Average loss: 2.6842597539608297, Average MLM accuracy: 0.8890235037557548, Average SC accuracy: 0.9801980198019802, Average S2S accuracy: 0.9227854097584084


100%|██████████| 114/114 [00:55<00:00,  2.07it/s]
  8%|▊         | 1/13 [00:00<00:02,  5.05it/s]

[2024-01-07 14:05:11.693239] Epoch: 4 training ends. Status: Average loss: 2.4385487907811214, Average MLM accuracy: 0.8951318015608798, Average SC accuracy: 0.9945145364783324, Average S2S accuracy: 0.9299638692724586


100%|██████████| 13/13 [00:02<00:00,  5.02it/s]
  0%|          | 0/114 [00:00<?, ?it/s]

[2024-01-07 14:05:14.284787] Epoch: 4 Evalutation ends. Status: Average loss: 2.173705449471107, Average MLM accuracy: 0.9055003634601405, Average SC accuracy: 0.995049504950495, Average S2S accuracy: 0.9348103341471413


100%|██████████| 114/114 [00:55<00:00,  2.07it/s]
  8%|▊         | 1/13 [00:00<00:02,  5.07it/s]

[2024-01-07 14:06:09.488175] Epoch: 5 training ends. Status: Average loss: 1.9477687833601969, Average MLM accuracy: 0.9130055121977843, Average SC accuracy: 0.9945145364783324, Average S2S accuracy: 0.9398546559369355


100%|██████████| 13/13 [00:02<00:00,  5.01it/s]
  0%|          | 0/114 [00:00<?, ?it/s]

[2024-01-07 14:06:12.084930] Epoch: 5 Evalutation ends. Status: Average loss: 1.8259183993706336, Average MLM accuracy: 0.9197964623212987, Average SC accuracy: 0.9900990099009901, Average S2S accuracy: 0.9404219655285501


100%|██████████| 114/114 [00:55<00:00,  2.07it/s]
  8%|▊         | 1/13 [00:00<00:02,  5.15it/s]

[2024-01-07 14:07:07.147375] Epoch: 6 training ends. Status: Average loss: 1.7372685859077854, Average MLM accuracy: 0.9180811002565082, Average SC accuracy: 0.9989029072956664, Average S2S accuracy: 0.9473189357858434


100%|██████████| 13/13 [00:02<00:00,  5.18it/s]
  0%|          | 0/114 [00:00<?, ?it/s]

[2024-01-07 14:07:09.656624] Epoch: 6 Evalutation ends. Status: Average loss: 1.6667013993630042, Average MLM accuracy: 0.9263387448509813, Average SC accuracy: 0.995049504950495, Average S2S accuracy: 0.9438472470210982


100%|██████████| 114/114 [00:55<00:00,  2.06it/s]
  8%|▊         | 1/13 [00:00<00:02,  5.27it/s]

[2024-01-07 14:08:04.903071] Epoch: 7 training ends. Status: Average loss: 1.5140744479078996, Average MLM accuracy: 0.9273044807073078, Average SC accuracy: 0.9934174437739989, Average S2S accuracy: 0.9527098045656102


100%|██████████| 13/13 [00:02<00:00,  5.06it/s]
  0%|          | 0/28 [00:00<?, ?it/s]

[2024-01-07 14:08:07.474304] Epoch: 7 Evalutation ends. Status: Average loss: 1.6630092584169829, Average MLM accuracy: 0.9229464502059608, Average SC accuracy: 0.995049504950495, Average S2S accuracy: 0.9454505702729293


100%|██████████| 28/28 [00:42<00:00,  1.51s/it]


[INFO] 202206171000 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202207021500 is saved
[INFO] finish 202207021500
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202207021500/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202207021500.
[INFO] SimpleTransformerStack loaded for 202207021500.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207021500.
[INFO] SimpleDecoderHead_S2S loaded for 202207021500.
[INFO] DistilBertEncoderHead_MLM loaded for 202207021500.


100%|██████████| 68/68 [00:33<00:00,  2.04it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:09:54.785493] Epoch: 0 training ends. Status: Average loss: 9.635117124108707, Average MLM accuracy: 0.8319914526653184, Average SC accuracy: 0.4842592592592593, Average S2S accuracy: 0.8480654437964734


100%|██████████| 8/8 [00:01<00:00,  5.23it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:09:56.316754] Epoch: 0 Evalutation ends. Status: Average loss: 6.077065289020538, Average MLM accuracy: 0.844151212553495, Average SC accuracy: 0.75, Average S2S accuracy: 0.8677265671002623


100%|██████████| 68/68 [00:33<00:00,  2.05it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:10:29.447345] Epoch: 1 training ends. Status: Average loss: 5.516400305663838, Average MLM accuracy: 0.8488190178196665, Average SC accuracy: 0.9314814814814815, Average S2S accuracy: 0.8737354063312749


100%|██████████| 8/8 [00:01<00:00,  5.31it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:10:30.954349] Epoch: 1 Evalutation ends. Status: Average loss: 4.224519342184067, Average MLM accuracy: 0.8659058487874465, Average SC accuracy: 0.975, Average S2S accuracy: 0.8934746533911461


100%|██████████| 68/68 [00:33<00:00,  2.04it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:11:04.362992] Epoch: 2 training ends. Status: Average loss: 4.099269183243022, Average MLM accuracy: 0.8640439577212196, Average SC accuracy: 0.9953703703703703, Average S2S accuracy: 0.8962889976054353


100%|██████████| 8/8 [00:01<00:00,  5.18it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:11:05.908325] Epoch: 2 Evalutation ends. Status: Average loss: 3.201701521873474, Average MLM accuracy: 0.8862339514978602, Average SC accuracy: 1.0, Average S2S accuracy: 0.91113966061774


100%|██████████| 68/68 [00:32<00:00,  2.07it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:11:38.809141] Epoch: 3 training ends. Status: Average loss: 3.057203271809746, Average MLM accuracy: 0.884839928263441, Average SC accuracy: 0.9981481481481481, Average S2S accuracy: 0.9152908422222477


100%|██████████| 8/8 [00:01<00:00,  5.27it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:11:40.328151] Epoch: 3 Evalutation ends. Status: Average loss: 2.692049413919449, Average MLM accuracy: 0.8969329529243937, Average SC accuracy: 1.0, Average S2S accuracy: 0.9219527862534126


100%|██████████| 68/68 [00:32<00:00,  2.06it/s]
 12%|█▎        | 1/8 [00:00<00:01,  5.25it/s]

[2024-01-07 14:12:13.303801] Epoch: 4 training ends. Status: Average loss: 2.3635363736573387, Average MLM accuracy: 0.9038424848322967, Average SC accuracy: 0.9981481481481481, Average S2S accuracy: 0.929451999862513


100%|██████████| 8/8 [00:01<00:00,  5.22it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:12:14.836917] Epoch: 4 Evalutation ends. Status: Average loss: 2.2018468379974365, Average MLM accuracy: 0.9054921540656206, Average SC accuracy: 1.0, Average S2S accuracy: 0.9353353674856807


100%|██████████| 68/68 [00:32<00:00,  2.07it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:12:47.732284] Epoch: 5 training ends. Status: Average loss: 1.9653021780883564, Average MLM accuracy: 0.9109779829816461, Average SC accuracy: 0.9990740740740741, Average S2S accuracy: 0.9380392067001982


100%|██████████| 8/8 [00:01<00:00,  5.31it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:12:49.241581] Epoch: 5 Evalutation ends. Status: Average loss: 1.712263897061348, Average MLM accuracy: 0.9233238231098431, Average SC accuracy: 1.0, Average S2S accuracy: 0.9409025212783042


100%|██████████| 68/68 [00:32<00:00,  2.07it/s]
 12%|█▎        | 1/8 [00:00<00:01,  5.28it/s]

[2024-01-07 14:13:22.057219] Epoch: 6 training ends. Status: Average loss: 1.6450000370250029, Average MLM accuracy: 0.9253634525126875, Average SC accuracy: 1.0, Average S2S accuracy: 0.9449421981874635


100%|██████████| 8/8 [00:01<00:00,  5.24it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:13:23.584078] Epoch: 6 Evalutation ends. Status: Average loss: 1.6285381019115448, Average MLM accuracy: 0.9179743223965763, Average SC accuracy: 1.0, Average S2S accuracy: 0.9461484931213533


100%|██████████| 68/68 [00:33<00:00,  2.05it/s]
 12%|█▎        | 1/8 [00:00<00:01,  5.19it/s]

[2024-01-07 14:13:56.831035] Epoch: 7 training ends. Status: Average loss: 1.411150328376714, Average MLM accuracy: 0.9330331590796352, Average SC accuracy: 1.0, Average S2S accuracy: 0.9506135355919387


100%|██████████| 8/8 [00:01<00:00,  5.65it/s]
  0%|          | 0/25 [00:00<?, ?it/s]

[2024-01-07 14:13:58.247756] Epoch: 7 Evalutation ends. Status: Average loss: 1.3954379558563232, Average MLM accuracy: 0.9397289586305279, Average SC accuracy: 1.0, Average S2S accuracy: 0.9485038274182325


100%|██████████| 25/25 [00:37<00:00,  1.51s/it]


[INFO] 202207021500 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202207041600 is saved
[INFO] finish 202207041600
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202207041600/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202207041600.
[INFO] SimpleTransformerStack loaded for 202207041600.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207041600.
[INFO] SimpleDecoderHead_S2S loaded for 202207041600.
[INFO] DistilBertEncoderHead_MLM loaded for 202207041600.


100%|██████████| 41/41 [00:19<00:00,  2.07it/s]
 20%|██        | 1/5 [00:00<00:00,  5.08it/s]

[2024-01-07 14:15:27.050311] Epoch: 0 training ends. Status: Average loss: 9.515302251024943, Average MLM accuracy: 0.8226477935054122, Average SC accuracy: 0.5324074074074074, Average S2S accuracy: 0.8453390935937973


100%|██████████| 5/5 [00:00<00:00,  5.47it/s]
  0%|          | 0/41 [00:00<?, ?it/s]

[2024-01-07 14:15:27.965915] Epoch: 0 Evalutation ends. Status: Average loss: 8.710472106933594, Average MLM accuracy: 0.829683698296837, Average SC accuracy: 0.625, Average S2S accuracy: 0.8373560065825563


100%|██████████| 41/41 [00:19<00:00,  2.08it/s]
 20%|██        | 1/5 [00:00<00:00,  5.04it/s]

[2024-01-07 14:15:47.704179] Epoch: 1 training ends. Status: Average loss: 7.020535724919017, Average MLM accuracy: 0.8397862892034416, Average SC accuracy: 0.7469135802469136, Average S2S accuracy: 0.8623694288785232


100%|██████████| 5/5 [00:00<00:00,  5.43it/s]
  0%|          | 0/41 [00:00<?, ?it/s]

[2024-01-07 14:15:48.625919] Epoch: 1 Evalutation ends. Status: Average loss: 6.7658744812011715, Average MLM accuracy: 0.8369829683698297, Average SC accuracy: 0.8611111111111112, Average S2S accuracy: 0.8552751874200036


100%|██████████| 41/41 [00:19<00:00,  2.08it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2024-01-07 14:16:08.382586] Epoch: 2 training ends. Status: Average loss: 5.541399223048512, Average MLM accuracy: 0.8427005273383291, Average SC accuracy: 0.9197530864197531, Average S2S accuracy: 0.8773022780160494


100%|██████████| 5/5 [00:00<00:00,  5.26it/s]
  0%|          | 0/41 [00:00<?, ?it/s]

[2024-01-07 14:16:09.335161] Epoch: 2 Evalutation ends. Status: Average loss: 5.749992179870605, Average MLM accuracy: 0.8369829683698297, Average SC accuracy: 0.9027777777777778, Average S2S accuracy: 0.8753885536661181


100%|██████████| 41/41 [00:19<00:00,  2.07it/s]
 20%|██        | 1/5 [00:00<00:00,  5.10it/s]

[2024-01-07 14:16:29.158607] Epoch: 3 training ends. Status: Average loss: 4.425810784828372, Average MLM accuracy: 0.8541493200111019, Average SC accuracy: 0.9691358024691358, Average S2S accuracy: 0.8928821129303238


100%|██████████| 5/5 [00:00<00:00,  5.37it/s]
  0%|          | 0/41 [00:00<?, ?it/s]

[2024-01-07 14:16:30.090456] Epoch: 3 Evalutation ends. Status: Average loss: 4.43701753616333, Average MLM accuracy: 0.8540145985401459, Average SC accuracy: 0.9444444444444444, Average S2S accuracy: 0.8897421832144816


100%|██████████| 41/41 [00:19<00:00,  2.07it/s]
 20%|██        | 1/5 [00:00<00:00,  5.05it/s]

[2024-01-07 14:16:49.889946] Epoch: 4 training ends. Status: Average loss: 3.541132682707252, Average MLM accuracy: 0.8715653621981682, Average SC accuracy: 0.9907407407407407, Average S2S accuracy: 0.9094219912552568


100%|██████████| 5/5 [00:00<00:00,  5.47it/s]
  0%|          | 0/41 [00:00<?, ?it/s]

[2024-01-07 14:16:50.806120] Epoch: 4 Evalutation ends. Status: Average loss: 3.4956140518188477, Average MLM accuracy: 0.8728710462287105, Average SC accuracy: 0.9861111111111112, Average S2S accuracy: 0.9034558420186506


100%|██████████| 41/41 [00:19<00:00,  2.08it/s]
 20%|██        | 1/5 [00:00<00:00,  5.04it/s]

[2024-01-07 14:17:10.545445] Epoch: 5 training ends. Status: Average loss: 2.90801934207358, Average MLM accuracy: 0.8863447127393839, Average SC accuracy: 0.9938271604938271, Average S2S accuracy: 0.9200555155537468


100%|██████████| 5/5 [00:00<00:00,  5.38it/s]
  0%|          | 0/41 [00:00<?, ?it/s]

[2024-01-07 14:17:11.478227] Epoch: 5 Evalutation ends. Status: Average loss: 2.9375860691070557, Average MLM accuracy: 0.8868613138686131, Average SC accuracy: 1.0, Average S2S accuracy: 0.9152495885902359


100%|██████████| 41/41 [00:19<00:00,  2.08it/s]
 20%|██        | 1/5 [00:00<00:00,  5.13it/s]

[2024-01-07 14:17:31.201471] Epoch: 6 training ends. Status: Average loss: 2.5198568629055487, Average MLM accuracy: 0.8944629475437136, Average SC accuracy: 0.9984567901234568, Average S2S accuracy: 0.9287689530309197


100%|██████████| 5/5 [00:00<00:00,  5.47it/s]
  0%|          | 0/41 [00:00<?, ?it/s]

[2024-01-07 14:17:32.117023] Epoch: 6 Evalutation ends. Status: Average loss: 2.4201481103897096, Average MLM accuracy: 0.9075425790754258, Average SC accuracy: 0.9722222222222222, Average S2S accuracy: 0.9267690619857378


100%|██████████| 41/41 [00:19<00:00,  2.07it/s]
 20%|██        | 1/5 [00:00<00:00,  5.03it/s]

[2024-01-07 14:17:51.882988] Epoch: 7 training ends. Status: Average loss: 2.1475719329787464, Average MLM accuracy: 0.9085484318623369, Average SC accuracy: 0.9938271604938271, Average S2S accuracy: 0.936031890137641


100%|██████████| 5/5 [00:00<00:00,  5.40it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2024-01-07 14:17:52.809913] Epoch: 7 Evalutation ends. Status: Average loss: 2.4151899099349974, Average MLM accuracy: 0.8978102189781022, Average SC accuracy: 1.0, Average S2S accuracy: 0.9320716767233498


100%|██████████| 39/39 [01:00<00:00,  1.56s/it]


[INFO] 202207041600 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202207151000 is saved
[INFO] finish 202207151000
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202207151000/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202207151000.
[INFO] SimpleTransformerStack loaded for 202207151000.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207151000.
[INFO] SimpleDecoderHead_S2S loaded for 202207151000.
[INFO] DistilBertEncoderHead_MLM loaded for 202207151000.


100%|██████████| 89/89 [00:43<00:00,  2.05it/s]
 10%|█         | 1/10 [00:00<00:01,  5.13it/s]

[2024-01-07 14:20:08.311786] Epoch: 0 training ends. Status: Average loss: 5.889685788851105, Average MLM accuracy: 0.8697862694300518, Average SC accuracy: 0.689703808180536, Average S2S accuracy: 0.9006820763593991


100%|██████████| 10/10 [00:01<00:00,  5.06it/s]
  0%|          | 0/89 [00:00<?, ?it/s]

[2024-01-07 14:20:10.288018] Epoch: 0 Evalutation ends. Status: Average loss: 2.906373453140259, Average MLM accuracy: 0.8917495611468695, Average SC accuracy: 0.9872611464968153, Average S2S accuracy: 0.9212334801762114


100%|██████████| 89/89 [00:43<00:00,  2.06it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[2024-01-07 14:20:53.544949] Epoch: 1 training ends. Status: Average loss: 2.5650442302896734, Average MLM accuracy: 0.89889896373057, Average SC accuracy: 0.9816643159379408, Average S2S accuracy: 0.9293741376536379


100%|██████████| 10/10 [00:01<00:00,  5.12it/s]
  0%|          | 0/89 [00:00<?, ?it/s]

[2024-01-07 14:20:55.499803] Epoch: 1 Evalutation ends. Status: Average loss: 2.0251110196113586, Average MLM accuracy: 0.9210064365125804, Average SC accuracy: 0.9872611464968153, Average S2S accuracy: 0.9398237885462555


100%|██████████| 89/89 [00:43<00:00,  2.06it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[2024-01-07 14:21:38.708226] Epoch: 2 training ends. Status: Average loss: 1.7941714043027899, Average MLM accuracy: 0.9238665803108809, Average SC accuracy: 0.9978843441466855, Average S2S accuracy: 0.9447708291688809


100%|██████████| 10/10 [00:01<00:00,  5.15it/s]
  0%|          | 0/89 [00:00<?, ?it/s]

[2024-01-07 14:21:40.649949] Epoch: 2 Evalutation ends. Status: Average loss: 1.6057424664497375, Average MLM accuracy: 0.9286132241076653, Average SC accuracy: 0.9936305732484076, Average S2S accuracy: 0.9474008810572687


100%|██████████| 89/89 [00:43<00:00,  2.06it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[2024-01-07 14:22:23.774563] Epoch: 3 training ends. Status: Average loss: 1.4829696691438052, Average MLM accuracy: 0.9315090673575129, Average SC accuracy: 0.998589562764457, Average S2S accuracy: 0.9513624463090357


100%|██████████| 10/10 [00:01<00:00,  5.09it/s]
  0%|          | 0/89 [00:00<?, ?it/s]

[2024-01-07 14:22:25.741402] Epoch: 3 Evalutation ends. Status: Average loss: 1.2122354686260224, Average MLM accuracy: 0.9414862492685782, Average SC accuracy: 0.9936305732484076, Average S2S accuracy: 0.9557268722466961


100%|██████████| 89/89 [00:42<00:00,  2.07it/s]
 10%|█         | 1/10 [00:00<00:01,  5.05it/s]

[2024-01-07 14:23:08.689638] Epoch: 4 training ends. Status: Average loss: 1.2472392688976246, Average MLM accuracy: 0.9396373056994819, Average SC accuracy: 1.0, Average S2S accuracy: 0.9578370524940397


100%|██████████| 10/10 [00:01<00:00,  5.38it/s]
  0%|          | 0/89 [00:00<?, ?it/s]

[2024-01-07 14:23:10.548473] Epoch: 4 Evalutation ends. Status: Average loss: 1.1429334938526154, Average MLM accuracy: 0.9449970743124634, Average SC accuracy: 0.9936305732484076, Average S2S accuracy: 0.9596916299559471


100%|██████████| 89/89 [00:42<00:00,  2.07it/s]
 10%|█         | 1/10 [00:00<00:01,  5.03it/s]

[2024-01-07 14:23:53.504187] Epoch: 5 training ends. Status: Average loss: 1.1354308510094546, Average MLM accuracy: 0.9417746113989637, Average SC accuracy: 1.0, Average S2S accuracy: 0.9611133592382587


100%|██████████| 10/10 [00:01<00:00,  5.05it/s]
  0%|          | 0/89 [00:00<?, ?it/s]

[2024-01-07 14:23:55.485691] Epoch: 5 Evalutation ends. Status: Average loss: 1.071858686208725, Average MLM accuracy: 0.9476301930953774, Average SC accuracy: 1.0, Average S2S accuracy: 0.9595154185022027


100%|██████████| 89/89 [00:43<00:00,  2.06it/s]
 10%|█         | 1/10 [00:00<00:01,  5.01it/s]

[2024-01-07 14:24:38.755769] Epoch: 6 training ends. Status: Average loss: 1.0587836698199926, Average MLM accuracy: 0.9409974093264248, Average SC accuracy: 1.0, Average S2S accuracy: 0.9648089552384342


100%|██████████| 10/10 [00:01<00:00,  5.10it/s]
  0%|          | 0/89 [00:00<?, ?it/s]

[2024-01-07 14:24:40.718571] Epoch: 6 Evalutation ends. Status: Average loss: 1.0082664906978607, Average MLM accuracy: 0.9429490930368637, Average SC accuracy: 1.0, Average S2S accuracy: 0.9624229074889867


100%|██████████| 89/89 [00:43<00:00,  2.05it/s]
 10%|█         | 1/10 [00:00<00:01,  5.02it/s]

[2024-01-07 14:25:24.035071] Epoch: 7 training ends. Status: Average loss: 0.9293818286295688, Average MLM accuracy: 0.9499676165803109, Average SC accuracy: 1.0, Average S2S accuracy: 0.966807892388925


100%|██████████| 10/10 [00:01<00:00,  5.10it/s]
  0%|          | 0/28 [00:00<?, ?it/s]

[2024-01-07 14:25:25.998095] Epoch: 7 Evalutation ends. Status: Average loss: 0.9378378391265869, Average MLM accuracy: 0.9534815681685196, Average SC accuracy: 1.0, Average S2S accuracy: 0.9645814977973568


100%|██████████| 28/28 [00:43<00:00,  1.54s/it]


[INFO] 202207151000 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202207221500 is saved
[INFO] finish 202207221500
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202207221500/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202207221500.
[INFO] SimpleTransformerStack loaded for 202207221500.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207221500.
[INFO] SimpleDecoderHead_S2S loaded for 202207221500.
[INFO] DistilBertEncoderHead_MLM loaded for 202207221500.


100%|██████████| 68/68 [00:33<00:00,  2.05it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:27:22.181553] Epoch: 0 training ends. Status: Average loss: 2.4392897188663483, Average MLM accuracy: 0.8994295777343899, Average SC accuracy: 0.9416666666666667, Average S2S accuracy: 0.9263161525064234


100%|██████████| 8/8 [00:01<00:00,  4.96it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:27:23.796778] Epoch: 0 Evalutation ends. Status: Average loss: 1.891677662730217, Average MLM accuracy: 0.9104582021685904, Average SC accuracy: 1.0, Average S2S accuracy: 0.9404924044002095


100%|██████████| 68/68 [00:33<00:00,  2.05it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:27:56.896974] Epoch: 1 training ends. Status: Average loss: 1.5982560042072744, Average MLM accuracy: 0.9261896558324719, Average SC accuracy: 0.9990740740740741, Average S2S accuracy: 0.9457674466727596


100%|██████████| 8/8 [00:01<00:00,  5.23it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:27:58.428800] Epoch: 1 Evalutation ends. Status: Average loss: 1.3933252021670341, Average MLM accuracy: 0.937740468695348, Average SC accuracy: 1.0, Average S2S accuracy: 0.9489261393399686


100%|██████████| 68/68 [00:33<00:00,  2.05it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:28:31.600161] Epoch: 2 training ends. Status: Average loss: 1.395983793279704, Average MLM accuracy: 0.9317790283679798, Average SC accuracy: 1.0, Average S2S accuracy: 0.951883338219149


100%|██████████| 8/8 [00:01<00:00,  5.22it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:28:33.138016] Epoch: 2 Evalutation ends. Status: Average loss: 1.2706120237708092, Average MLM accuracy: 0.9317943336831059, Average SC accuracy: 1.0, Average S2S accuracy: 0.9545311681508644


100%|██████████| 68/68 [00:32<00:00,  2.07it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:29:06.027623] Epoch: 3 training ends. Status: Average loss: 1.2313992004184162, Average MLM accuracy: 0.9408904712683281, Average SC accuracy: 1.0, Average S2S accuracy: 0.955401125461997


100%|██████████| 8/8 [00:01<00:00,  5.26it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:29:07.550214] Epoch: 3 Evalutation ends. Status: Average loss: 1.1866464987397194, Average MLM accuracy: 0.944386149003148, Average SC accuracy: 1.0, Average S2S accuracy: 0.9566265060240964


100%|██████████| 68/68 [00:32<00:00,  2.08it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:29:40.213924] Epoch: 4 training ends. Status: Average loss: 1.1268754557651632, Average MLM accuracy: 0.9423069560889705, Average SC accuracy: 1.0, Average S2S accuracy: 0.9597926114971863


100%|██████████| 8/8 [00:01<00:00,  5.34it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:29:41.715080] Epoch: 4 Evalutation ends. Status: Average loss: 1.0971015617251396, Average MLM accuracy: 0.9422875131164743, Average SC accuracy: 1.0, Average S2S accuracy: 0.9565741225772656


100%|██████████| 68/68 [00:32<00:00,  2.06it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:30:14.691537] Epoch: 5 training ends. Status: Average loss: 1.0637115020962322, Average MLM accuracy: 0.9462118601891198, Average SC accuracy: 1.0, Average S2S accuracy: 0.9607525305650877


100%|██████████| 8/8 [00:01<00:00,  5.25it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:30:16.217495] Epoch: 5 Evalutation ends. Status: Average loss: 1.008591242134571, Average MLM accuracy: 0.9489331934242742, Average SC accuracy: 1.0, Average S2S accuracy: 0.9587742273441593


100%|██████████| 68/68 [00:33<00:00,  2.05it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:30:49.430720] Epoch: 6 training ends. Status: Average loss: 1.020790498922853, Average MLM accuracy: 0.9480494621186019, Average SC accuracy: 1.0, Average S2S accuracy: 0.9630287458398717


100%|██████████| 8/8 [00:01<00:00,  5.17it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 14:30:50.980260] Epoch: 6 Evalutation ends. Status: Average loss: 1.065606139600277, Average MLM accuracy: 0.9429870584120322, Average SC accuracy: 1.0, Average S2S accuracy: 0.9586170770036668


100%|██████████| 68/68 [00:33<00:00,  2.06it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:31:24.047771] Epoch: 7 training ends. Status: Average loss: 0.9802092357593424, Average MLM accuracy: 0.9488534129627503, Average SC accuracy: 1.0, Average S2S accuracy: 0.9643277979916424


100%|██████████| 8/8 [00:01<00:00,  5.20it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:31:25.587099] Epoch: 7 Evalutation ends. Status: Average loss: 1.0028955936431885, Average MLM accuracy: 0.9482336481287164, Average SC accuracy: 1.0, Average S2S accuracy: 0.9601885804085909


100%|██████████| 3/3 [00:03<00:00,  1.18s/it]


[INFO] 202207221500 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202207260728 is saved
[INFO] finish 202207260728
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202207260728/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202207260728.
[INFO] SimpleTransformerStack loaded for 202207260728.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207260728.
[INFO] SimpleDecoderHead_S2S loaded for 202207260728.
[INFO] DistilBertEncoderHead_MLM loaded for 202207260728.


100%|██████████| 3/3 [00:01<00:00,  2.41it/s]
100%|██████████| 1/1 [00:00<00:00, 12.70it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:32:16.024607] Epoch: 0 training ends. Status: Average loss: 7.89420747756958, Average MLM accuracy: 0.832807570977918, Average SC accuracy: 0.8536585365853658, Average S2S accuracy: 0.8379022646007152
[2024-01-07 14:32:16.104846] Epoch: 0 Evalutation ends. Status: Average loss: 8.919853210449219, Average MLM accuracy: 0.8307692307692308, Average SC accuracy: 1.0, Average S2S accuracy: 0.8236658932714617


100%|██████████| 3/3 [00:01<00:00,  2.41it/s]
100%|██████████| 1/1 [00:00<00:00, 13.47it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:32:17.352601] Epoch: 1 training ends. Status: Average loss: 7.948771794637044, Average MLM accuracy: 0.8264984227129337, Average SC accuracy: 0.9024390243902439, Average S2S accuracy: 0.8374255065554231
[2024-01-07 14:32:17.428581] Epoch: 1 Evalutation ends. Status: Average loss: 8.198777198791504, Average MLM accuracy: 0.8461538461538461, Average SC accuracy: 1.0, Average S2S accuracy: 0.8352668213457076


100%|██████████| 3/3 [00:01<00:00,  2.44it/s]
100%|██████████| 1/1 [00:00<00:00, 13.54it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:32:18.660200] Epoch: 2 training ends. Status: Average loss: 8.309551239013672, Average MLM accuracy: 0.8233438485804416, Average SC accuracy: 0.8292682926829268, Average S2S accuracy: 0.8431466030989273
[2024-01-07 14:32:18.735652] Epoch: 2 Evalutation ends. Status: Average loss: 8.201781272888184, Average MLM accuracy: 0.7846153846153846, Average SC accuracy: 1.0, Average S2S accuracy: 0.839907192575406


100%|██████████| 3/3 [00:01<00:00,  2.43it/s]
100%|██████████| 1/1 [00:00<00:00, 11.88it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:32:19.972482] Epoch: 3 training ends. Status: Average loss: 8.703080336252848, Average MLM accuracy: 0.8170347003154574, Average SC accuracy: 0.8780487804878049, Average S2S accuracy: 0.8357568533969011
[2024-01-07 14:32:20.058260] Epoch: 3 Evalutation ends. Status: Average loss: 8.61390495300293, Average MLM accuracy: 0.8307692307692308, Average SC accuracy: 1.0, Average S2S accuracy: 0.839907192575406


100%|██████████| 3/3 [00:01<00:00,  2.43it/s]
100%|██████████| 1/1 [00:00<00:00, 14.80it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:32:21.294260] Epoch: 4 training ends. Status: Average loss: 8.327290376027426, Average MLM accuracy: 0.8186119873817035, Average SC accuracy: 0.926829268292683, Average S2S accuracy: 0.8417163289630513
[2024-01-07 14:32:21.363204] Epoch: 4 Evalutation ends. Status: Average loss: 8.48620319366455, Average MLM accuracy: 0.7846153846153846, Average SC accuracy: 1.0, Average S2S accuracy: 0.8445475638051044


100%|██████████| 3/3 [00:01<00:00,  2.41it/s]
100%|██████████| 1/1 [00:00<00:00, 13.53it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:32:22.610218] Epoch: 5 training ends. Status: Average loss: 7.939464410146077, Average MLM accuracy: 0.8201892744479495, Average SC accuracy: 0.9512195121951219, Average S2S accuracy: 0.8438617401668653
[2024-01-07 14:32:22.685674] Epoch: 5 Evalutation ends. Status: Average loss: 7.5998358726501465, Average MLM accuracy: 0.8307692307692308, Average SC accuracy: 1.0, Average S2S accuracy: 0.8352668213457076


100%|██████████| 3/3 [00:01<00:00,  2.41it/s]
100%|██████████| 1/1 [00:00<00:00, 12.93it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:32:23.930814] Epoch: 6 training ends. Status: Average loss: 6.994482040405273, Average MLM accuracy: 0.8375394321766562, Average SC accuracy: 0.975609756097561, Average S2S accuracy: 0.8517282479141836
[2024-01-07 14:32:24.009946] Epoch: 6 Evalutation ends. Status: Average loss: 10.033530235290527, Average MLM accuracy: 0.7384615384615385, Average SC accuracy: 0.75, Average S2S accuracy: 0.8352668213457076


100%|██████████| 3/3 [00:01<00:00,  2.38it/s]
100%|██████████| 1/1 [00:00<00:00, 13.15it/s]
  0%|          | 0/20 [00:00<?, ?it/s]

[2024-01-07 14:32:25.274516] Epoch: 7 training ends. Status: Average loss: 7.243401050567627, Average MLM accuracy: 0.8138801261829653, Average SC accuracy: 1.0, Average S2S accuracy: 0.8500595947556615
[2024-01-07 14:32:25.352654] Epoch: 7 Evalutation ends. Status: Average loss: 6.4517364501953125, Average MLM accuracy: 0.8461538461538461, Average SC accuracy: 1.0, Average S2S accuracy: 0.8538283062645011


100%|██████████| 20/20 [00:44<00:00,  2.24s/it]


[INFO] 202207260728 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202208031141 is saved
[INFO] finish 202208031141
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202208031141/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202208031141.
[INFO] SimpleTransformerStack loaded for 202208031141.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202208031141.
[INFO] SimpleDecoderHead_S2S loaded for 202208031141.
[INFO] DistilBertEncoderHead_MLM loaded for 202208031141.


100%|██████████| 46/46 [00:22<00:00,  2.02it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 14:34:09.491422] Epoch: 0 training ends. Status: Average loss: 9.677568497865096, Average MLM accuracy: 0.8496523478426516, Average SC accuracy: 0.39780521262002744, Average S2S accuracy: 0.8509076367167974


100%|██████████| 6/6 [00:01<00:00,  5.65it/s]
  0%|          | 0/46 [00:00<?, ?it/s]

[2024-01-07 14:34:10.554157] Epoch: 0 Evalutation ends. Status: Average loss: 6.6069315274556475, Average MLM accuracy: 0.8476430976430976, Average SC accuracy: 0.8395061728395061, Average S2S accuracy: 0.8563957151858853


100%|██████████| 46/46 [00:22<00:00,  2.02it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 14:34:33.341036] Epoch: 1 training ends. Status: Average loss: 5.162183828975843, Average MLM accuracy: 0.8625107153062196, Average SC accuracy: 0.9410150891632373, Average S2S accuracy: 0.8807575303140666


100%|██████████| 6/6 [00:01<00:00,  5.34it/s]
  0%|          | 0/46 [00:00<?, ?it/s]

[2024-01-07 14:34:34.467032] Epoch: 1 Evalutation ends. Status: Average loss: 4.539217829704285, Average MLM accuracy: 0.8695286195286195, Average SC accuracy: 1.0, Average S2S accuracy: 0.8962822936357908


100%|██████████| 46/46 [00:22<00:00,  2.03it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 14:34:57.168021] Epoch: 2 training ends. Status: Average loss: 3.41664078961248, Average MLM accuracy: 0.8819411372511667, Average SC accuracy: 0.9917695473251029, Average S2S accuracy: 0.9095719610951626


100%|██████████| 6/6 [00:01<00:00,  5.46it/s]
  0%|          | 0/46 [00:00<?, ?it/s]

[2024-01-07 14:34:58.269207] Epoch: 2 Evalutation ends. Status: Average loss: 3.043222705523173, Average MLM accuracy: 0.8964646464646465, Average SC accuracy: 0.9876543209876543, Average S2S accuracy: 0.9182734719596723


100%|██████████| 46/46 [00:22<00:00,  2.01it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 14:35:21.142098] Epoch: 3 training ends. Status: Average loss: 2.3811476023300835, Average MLM accuracy: 0.9058957995999619, Average SC accuracy: 1.0, Average S2S accuracy: 0.928995815301998


100%|██████████| 6/6 [00:01<00:00,  5.47it/s]
  0%|          | 0/46 [00:00<?, ?it/s]

[2024-01-07 14:35:22.240708] Epoch: 3 Evalutation ends. Status: Average loss: 2.540780464808146, Average MLM accuracy: 0.9090909090909091, Average SC accuracy: 1.0, Average S2S accuracy: 0.9341524889729048


100%|██████████| 46/46 [00:22<00:00,  2.02it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 14:35:45.033692] Epoch: 4 training ends. Status: Average loss: 1.7711612919102544, Average MLM accuracy: 0.9196590151442995, Average SC accuracy: 1.0, Average S2S accuracy: 0.9419926589256895


100%|██████████| 6/6 [00:01<00:00,  5.53it/s]
  0%|          | 0/46 [00:00<?, ?it/s]

[2024-01-07 14:35:46.119766] Epoch: 4 Evalutation ends. Status: Average loss: 1.681884268919627, Average MLM accuracy: 0.92003367003367, Average SC accuracy: 1.0, Average S2S accuracy: 0.941650913673598


100%|██████████| 46/46 [00:22<00:00,  2.02it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 14:36:08.864851] Epoch: 5 training ends. Status: Average loss: 1.4577063244322073, Average MLM accuracy: 0.9304695685303362, Average SC accuracy: 1.0, Average S2S accuracy: 0.9499978576631389


100%|██████████| 6/6 [00:01<00:00,  5.41it/s]
  0%|          | 0/46 [00:00<?, ?it/s]

[2024-01-07 14:36:09.974894] Epoch: 5 Evalutation ends. Status: Average loss: 1.5510368347167969, Average MLM accuracy: 0.9234006734006734, Average SC accuracy: 1.0, Average S2S accuracy: 0.9471329552614997


100%|██████████| 46/46 [00:22<00:00,  2.02it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 14:36:32.731528] Epoch: 6 training ends. Status: Average loss: 1.1902339432550513, Average MLM accuracy: 0.9413753690827698, Average SC accuracy: 0.9986282578875172, Average S2S accuracy: 0.9565391262122056


100%|██████████| 6/6 [00:01<00:00,  5.40it/s]
  0%|          | 0/46 [00:00<?, ?it/s]

[2024-01-07 14:36:33.844192] Epoch: 6 Evalutation ends. Status: Average loss: 1.0698603391647339, Average MLM accuracy: 0.9457070707070707, Average SC accuracy: 1.0, Average S2S accuracy: 0.9543163201008191


100%|██████████| 46/46 [00:22<00:00,  2.03it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 14:36:56.539490] Epoch: 7 training ends. Status: Average loss: 1.1034042407637057, Average MLM accuracy: 0.9445185255738642, Average SC accuracy: 1.0, Average S2S accuracy: 0.9592598940257366


100%|██████████| 6/6 [00:01<00:00,  5.38it/s]
  0%|          | 0/29 [00:00<?, ?it/s]

[2024-01-07 14:36:57.657477] Epoch: 7 Evalutation ends. Status: Average loss: 0.949873353044192, Average MLM accuracy: 0.9457070707070707, Average SC accuracy: 1.0, Average S2S accuracy: 0.9560176433522369


100%|██████████| 29/29 [01:03<00:00,  2.20s/it]


[INFO] 202208031141 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202208172100 is saved
[INFO] finish 202208172100
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202208172100/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202208172100.
[INFO] SimpleTransformerStack loaded for 202208172100.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202208172100.
[INFO] SimpleDecoderHead_S2S loaded for 202208172100.
[INFO] DistilBertEncoderHead_MLM loaded for 202208172100.


100%|██████████| 61/61 [00:30<00:00,  2.02it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 14:39:11.095108] Epoch: 0 training ends. Status: Average loss: 10.993288290305216, Average MLM accuracy: 0.8299416920878866, Average SC accuracy: 0.32613168724279834, Average S2S accuracy: 0.8414449241310663


100%|██████████| 7/7 [00:01<00:00,  4.73it/s]
  0%|          | 0/61 [00:00<?, ?it/s]

[2024-01-07 14:39:12.576997] Epoch: 0 Evalutation ends. Status: Average loss: 8.899551527840751, Average MLM accuracy: 0.822401614530777, Average SC accuracy: 0.5092592592592593, Average S2S accuracy: 0.8476934711368792


100%|██████████| 61/61 [00:30<00:00,  2.03it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 14:39:42.657080] Epoch: 1 training ends. Status: Average loss: 6.886541819963299, Average MLM accuracy: 0.8384368845812257, Average SC accuracy: 0.6676954732510288, Average S2S accuracy: 0.866555168462598


100%|██████████| 7/7 [00:01<00:00,  4.78it/s]
  0%|          | 0/61 [00:00<?, ?it/s]

[2024-01-07 14:39:44.123423] Epoch: 1 Evalutation ends. Status: Average loss: 5.9542098726545065, Average MLM accuracy: 0.8412378069290279, Average SC accuracy: 0.7777777777777778, Average S2S accuracy: 0.8710360473909755


100%|██████████| 61/61 [00:30<00:00,  2.02it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 14:40:14.313835] Epoch: 2 training ends. Status: Average loss: 4.758258053513824, Average MLM accuracy: 0.8580916708499053, Average SC accuracy: 0.9310699588477366, Average S2S accuracy: 0.8862081737057141


100%|██████████| 7/7 [00:01<00:00,  4.94it/s]
  0%|          | 0/61 [00:00<?, ?it/s]

[2024-01-07 14:40:15.731691] Epoch: 2 Evalutation ends. Status: Average loss: 4.376091820853097, Average MLM accuracy: 0.8600739993272788, Average SC accuracy: 0.9722222222222222, Average S2S accuracy: 0.8876733047643055


100%|██████████| 61/61 [00:30<00:00,  2.02it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 14:40:45.872136] Epoch: 3 training ends. Status: Average loss: 3.704405710345409, Average MLM accuracy: 0.8759315750859172, Average SC accuracy: 0.9897119341563786, Average S2S accuracy: 0.9021458581696548


100%|██████████| 7/7 [00:01<00:00,  4.82it/s]
  0%|          | 0/61 [00:00<?, ?it/s]

[2024-01-07 14:40:47.326659] Epoch: 3 Evalutation ends. Status: Average loss: 3.4997083800179616, Average MLM accuracy: 0.8748738647830474, Average SC accuracy: 1.0, Average S2S accuracy: 0.9007814469372322


100%|██████████| 61/61 [00:30<00:00,  2.03it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 14:41:17.440915] Epoch: 4 training ends. Status: Average loss: 2.990019552043227, Average MLM accuracy: 0.8844267675792563, Average SC accuracy: 0.9897119341563786, Average S2S accuracy: 0.9166714892533478


100%|██████████| 7/7 [00:01<00:00,  5.02it/s]
  0%|          | 0/61 [00:00<?, ?it/s]

[2024-01-07 14:41:18.837693] Epoch: 4 Evalutation ends. Status: Average loss: 2.587134667805263, Average MLM accuracy: 0.8990918264379415, Average SC accuracy: 1.0, Average S2S accuracy: 0.9172170405848248


100%|██████████| 61/61 [00:30<00:00,  2.03it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 14:41:48.845946] Epoch: 5 training ends. Status: Average loss: 2.4395208436934674, Average MLM accuracy: 0.8991775109085994, Average SC accuracy: 1.0, Average S2S accuracy: 0.9296461764603757


100%|██████████| 7/7 [00:01<00:00,  4.80it/s]
  0%|          | 0/61 [00:00<?, ?it/s]

[2024-01-07 14:41:50.305567] Epoch: 5 Evalutation ends. Status: Average loss: 2.619860989706857, Average MLM accuracy: 0.8812647157753112, Average SC accuracy: 1.0, Average S2S accuracy: 0.9284093773632468


100%|██████████| 61/61 [00:30<00:00,  2.02it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 14:42:20.553357] Epoch: 6 training ends. Status: Average loss: 2.1012623720481747, Average MLM accuracy: 0.9064756535506043, Average SC accuracy: 0.9989711934156379, Average S2S accuracy: 0.9377539091887638


100%|██████████| 7/7 [00:01<00:00,  4.84it/s]
  0%|          | 0/61 [00:00<?, ?it/s]

[2024-01-07 14:42:22.000894] Epoch: 6 Evalutation ends. Status: Average loss: 2.09310850075313, Average MLM accuracy: 0.9078372014799866, Average SC accuracy: 0.9907407407407407, Average S2S accuracy: 0.9340055457524578


100%|██████████| 61/61 [00:30<00:00,  2.01it/s]
 14%|█▍        | 1/7 [00:00<00:01,  5.14it/s]

[2024-01-07 14:42:52.287007] Epoch: 7 training ends. Status: Average loss: 1.830347453961607, Average MLM accuracy: 0.9155114492026103, Average SC accuracy: 1.0, Average S2S accuracy: 0.9440271299436336


100%|██████████| 7/7 [00:01<00:00,  4.92it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:42:53.711054] Epoch: 7 Evalutation ends. Status: Average loss: 1.8980909926550729, Average MLM accuracy: 0.9145644130507904, Average SC accuracy: 1.0, Average S2S accuracy: 0.9385429795815478


100%|██████████| 3/3 [00:05<00:00,  1.97s/it]


[INFO] 202208172100 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202208190706 is saved
[INFO] finish 202208190706
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202208190706/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202208190706.
[INFO] SimpleTransformerStack loaded for 202208190706.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202208190706.
[INFO] SimpleDecoderHead_S2S loaded for 202208190706.
[INFO] DistilBertEncoderHead_MLM loaded for 202208190706.


100%|██████████| 3/3 [00:01<00:00,  2.31it/s]
100%|██████████| 1/1 [00:00<00:00, 10.84it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:43:34.606743] Epoch: 0 training ends. Status: Average loss: 5.022616863250732, Average MLM accuracy: 0.8926174496644296, Average SC accuracy: 0.1951219512195122, Average S2S accuracy: 0.9243297780340156
[2024-01-07 14:43:34.700517] Epoch: 0 Evalutation ends. Status: Average loss: 4.044739723205566, Average MLM accuracy: 0.9074074074074074, Average SC accuracy: 0.5, Average S2S accuracy: 0.9371508379888268


100%|██████████| 3/3 [00:01<00:00,  2.34it/s]
100%|██████████| 1/1 [00:00<00:00, 12.59it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:43:35.986897] Epoch: 1 training ends. Status: Average loss: 3.9248514970143638, Average MLM accuracy: 0.9194630872483222, Average SC accuracy: 0.43902439024390244, Average S2S accuracy: 0.9331219371576823
[2024-01-07 14:43:36.068020] Epoch: 1 Evalutation ends. Status: Average loss: 5.335386753082275, Average MLM accuracy: 0.9166666666666666, Average SC accuracy: 0.5, Average S2S accuracy: 0.9413407821229051


100%|██████████| 3/3 [00:01<00:00,  2.30it/s]
100%|██████████| 1/1 [00:00<00:00, 12.73it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:43:37.374920] Epoch: 2 training ends. Status: Average loss: 4.089390277862549, Average MLM accuracy: 0.925215723873442, Average SC accuracy: 0.34146341463414637, Average S2S accuracy: 0.928221389449409
[2024-01-07 14:43:37.454840] Epoch: 2 Evalutation ends. Status: Average loss: 4.157168388366699, Average MLM accuracy: 0.9074074074074074, Average SC accuracy: 0.5, Average S2S accuracy: 0.9357541899441341


100%|██████████| 3/3 [00:01<00:00,  2.36it/s]
100%|██████████| 1/1 [00:00<00:00, 11.94it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:43:38.729014] Epoch: 3 training ends. Status: Average loss: 3.8674262364705405, Average MLM accuracy: 0.9060402684563759, Average SC accuracy: 0.43902439024390244, Average S2S accuracy: 0.9230325742288844
[2024-01-07 14:43:38.814470] Epoch: 3 Evalutation ends. Status: Average loss: 4.731461524963379, Average MLM accuracy: 0.9074074074074074, Average SC accuracy: 0.5, Average S2S accuracy: 0.9371508379888268


100%|██████████| 3/3 [00:01<00:00,  2.30it/s]
100%|██████████| 1/1 [00:00<00:00, 12.48it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:43:40.119803] Epoch: 4 training ends. Status: Average loss: 3.652512788772583, Average MLM accuracy: 0.900287631831256, Average SC accuracy: 0.5121951219512195, Average S2S accuracy: 0.9296627270106659
[2024-01-07 14:43:40.202762] Epoch: 4 Evalutation ends. Status: Average loss: 3.329683303833008, Average MLM accuracy: 0.9166666666666666, Average SC accuracy: 0.75, Average S2S accuracy: 0.9231843575418994


100%|██████████| 3/3 [00:01<00:00,  2.34it/s]
100%|██████████| 1/1 [00:00<00:00, 11.42it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:43:41.486424] Epoch: 5 training ends. Status: Average loss: 3.5229763984680176, Average MLM accuracy: 0.8935762224352828, Average SC accuracy: 0.6097560975609756, Average S2S accuracy: 0.9348515422311905
[2024-01-07 14:43:41.575542] Epoch: 5 Evalutation ends. Status: Average loss: 4.643406867980957, Average MLM accuracy: 0.8425925925925926, Average SC accuracy: 0.75, Average S2S accuracy: 0.9315642458100558


100%|██████████| 3/3 [00:01<00:00,  2.35it/s]
100%|██████████| 1/1 [00:00<00:00, 12.30it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 14:43:42.851863] Epoch: 6 training ends. Status: Average loss: 2.9412675698598227, Average MLM accuracy: 0.9022051773729626, Average SC accuracy: 0.7317073170731707, Average S2S accuracy: 0.936004612280196
[2024-01-07 14:43:42.934891] Epoch: 6 Evalutation ends. Status: Average loss: 3.7078866958618164, Average MLM accuracy: 0.9074074074074074, Average SC accuracy: 0.75, Average S2S accuracy: 0.9329608938547486


100%|██████████| 3/3 [00:01<00:00,  2.33it/s]
100%|██████████| 1/1 [00:00<00:00, 12.89it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 14:43:44.225739] Epoch: 7 training ends. Status: Average loss: 3.012552181879679, Average MLM accuracy: 0.9031639501438159, Average SC accuracy: 0.8048780487804879, Average S2S accuracy: 0.9319688671086769
[2024-01-07 14:43:44.305105] Epoch: 7 Evalutation ends. Status: Average loss: 3.758840322494507, Average MLM accuracy: 0.8888888888888888, Average SC accuracy: 0.75, Average S2S accuracy: 0.9287709497206704


100%|██████████| 2/2 [00:02<00:00,  1.36s/it]


[INFO] 202208190706 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202208240500 is saved
[INFO] finish 202208240500
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202208240500/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202208240500.
[INFO] SimpleTransformerStack loaded for 202208240500.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202208240500.
[INFO] SimpleDecoderHead_S2S loaded for 202208240500.
[INFO] DistilBertEncoderHead_MLM loaded for 202208240500.


100%|██████████| 2/2 [00:00<00:00,  2.42it/s]
100%|██████████| 1/1 [00:00<00:00, 14.42it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 14:44:27.764332] Epoch: 0 training ends. Status: Average loss: 9.981627941131592, Average MLM accuracy: 0.8066666666666666, Average SC accuracy: 0.5555555555555556, Average S2S accuracy: 0.8332219251336899
[2024-01-07 14:44:27.835226] Epoch: 0 Evalutation ends. Status: Average loss: 9.931421279907227, Average MLM accuracy: 0.8461538461538461, Average SC accuracy: 0.0, Average S2S accuracy: 0.8362068965517241


100%|██████████| 2/2 [00:00<00:00,  2.49it/s]
100%|██████████| 1/1 [00:00<00:00, 14.32it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 14:44:28.639080] Epoch: 1 training ends. Status: Average loss: 9.510560512542725, Average MLM accuracy: 0.8266666666666667, Average SC accuracy: 0.5185185185185185, Average S2S accuracy: 0.8419117647058824
[2024-01-07 14:44:28.710893] Epoch: 1 Evalutation ends. Status: Average loss: 8.911649703979492, Average MLM accuracy: 0.8653846153846154, Average SC accuracy: 0.0, Average S2S accuracy: 0.8563218390804598


100%|██████████| 2/2 [00:00<00:00,  2.46it/s]
100%|██████████| 1/1 [00:00<00:00, 16.89it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 14:44:29.525195] Epoch: 2 training ends. Status: Average loss: 10.410637855529785, Average MLM accuracy: 0.7955555555555556, Average SC accuracy: 0.48148148148148145, Average S2S accuracy: 0.8362299465240641
[2024-01-07 14:44:29.585686] Epoch: 2 Evalutation ends. Status: Average loss: 7.6095194816589355, Average MLM accuracy: 0.8461538461538461, Average SC accuracy: 0.6666666666666666, Average S2S accuracy: 0.8505747126436781


100%|██████████| 2/2 [00:00<00:00,  2.50it/s]
100%|██████████| 1/1 [00:00<00:00, 16.74it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 14:44:30.386457] Epoch: 3 training ends. Status: Average loss: 9.673038482666016, Average MLM accuracy: 0.8133333333333334, Average SC accuracy: 0.5185185185185185, Average S2S accuracy: 0.8372326203208557
[2024-01-07 14:44:30.447367] Epoch: 3 Evalutation ends. Status: Average loss: 7.311341285705566, Average MLM accuracy: 0.8461538461538461, Average SC accuracy: 0.6666666666666666, Average S2S accuracy: 0.8103448275862069


100%|██████████| 2/2 [00:00<00:00,  2.49it/s]
100%|██████████| 1/1 [00:00<00:00, 16.30it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 14:44:31.252697] Epoch: 4 training ends. Status: Average loss: 9.3927583694458, Average MLM accuracy: 0.8111111111111111, Average SC accuracy: 0.6296296296296297, Average S2S accuracy: 0.8372326203208557
[2024-01-07 14:44:31.315566] Epoch: 4 Evalutation ends. Status: Average loss: 10.702653884887695, Average MLM accuracy: 0.7884615384615384, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.8333333333333334


100%|██████████| 2/2 [00:00<00:00,  2.48it/s]
100%|██████████| 1/1 [00:00<00:00, 17.35it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 14:44:32.123084] Epoch: 5 training ends. Status: Average loss: 8.939143180847168, Average MLM accuracy: 0.8266666666666667, Average SC accuracy: 0.7407407407407407, Average S2S accuracy: 0.8368983957219251
[2024-01-07 14:44:32.182356] Epoch: 5 Evalutation ends. Status: Average loss: 9.350645065307617, Average MLM accuracy: 0.8076923076923077, Average SC accuracy: 0.6666666666666666, Average S2S accuracy: 0.8448275862068966


100%|██████████| 2/2 [00:00<00:00,  2.45it/s]
100%|██████████| 1/1 [00:00<00:00, 15.26it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 14:44:33.001099] Epoch: 6 training ends. Status: Average loss: 9.16410207748413, Average MLM accuracy: 0.8066666666666666, Average SC accuracy: 0.6666666666666666, Average S2S accuracy: 0.8475935828877005
[2024-01-07 14:44:33.068076] Epoch: 6 Evalutation ends. Status: Average loss: 8.414825439453125, Average MLM accuracy: 0.7884615384615384, Average SC accuracy: 0.6666666666666666, Average S2S accuracy: 0.8563218390804598


100%|██████████| 2/2 [00:00<00:00,  2.45it/s]
100%|██████████| 1/1 [00:00<00:00, 17.33it/s]
  0%|          | 0/37 [00:00<?, ?it/s]

[2024-01-07 14:44:33.885725] Epoch: 7 training ends. Status: Average loss: 9.169209480285645, Average MLM accuracy: 0.8044444444444444, Average SC accuracy: 0.6666666666666666, Average S2S accuracy: 0.838903743315508
[2024-01-07 14:44:33.944983] Epoch: 7 Evalutation ends. Status: Average loss: 9.135176658630371, Average MLM accuracy: 0.7307692307692307, Average SC accuracy: 1.0, Average S2S accuracy: 0.8333333333333334


100%|██████████| 37/37 [01:12<00:00,  1.95s/it]


[INFO] 202208240500 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202209081034 is saved
[INFO] finish 202209081034
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202209081034/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202209081034.
[INFO] SimpleTransformerStack loaded for 202209081034.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202209081034.
[INFO] SimpleDecoderHead_S2S loaded for 202209081034.
[INFO] DistilBertEncoderHead_MLM loaded for 202209081034.


100%|██████████| 117/117 [00:57<00:00,  2.03it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 14:47:29.156498] Epoch: 0 training ends. Status: Average loss: 6.8482618311531525, Average MLM accuracy: 0.8366416309012875, Average SC accuracy: 0.6951154052603328, Average S2S accuracy: 0.8681531889414718


100%|██████████| 13/13 [00:02<00:00,  4.89it/s]
  0%|          | 0/117 [00:00<?, ?it/s]

[2024-01-07 14:47:31.815097] Epoch: 0 Evalutation ends. Status: Average loss: 4.532434481840867, Average MLM accuracy: 0.8574181117533719, Average SC accuracy: 0.9033816425120773, Average S2S accuracy: 0.8895702292286233


100%|██████████| 117/117 [00:57<00:00,  2.04it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 14:48:29.305615] Epoch: 1 training ends. Status: Average loss: 3.5670893548900247, Average MLM accuracy: 0.8717811158798283, Average SC accuracy: 0.9334406870638755, Average S2S accuracy: 0.908114269922968


100%|██████████| 13/13 [00:02<00:00,  4.91it/s]
  0%|          | 0/117 [00:00<?, ?it/s]

[2024-01-07 14:48:31.953818] Epoch: 1 Evalutation ends. Status: Average loss: 2.6287651520508986, Average MLM accuracy: 0.8912243825538623, Average SC accuracy: 0.9710144927536232, Average S2S accuracy: 0.923072874174277


100%|██████████| 117/117 [00:58<00:00,  2.01it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 14:49:30.109259] Epoch: 2 training ends. Status: Average loss: 2.2673236987529655, Average MLM accuracy: 0.9044681177191907, Average SC accuracy: 0.9618894256575417, Average S2S accuracy: 0.9333022441765404


100%|██████████| 13/13 [00:02<00:00,  4.85it/s]
  0%|          | 0/117 [00:00<?, ?it/s]

[2024-01-07 14:49:32.790422] Epoch: 2 Evalutation ends. Status: Average loss: 1.8296588475887592, Average MLM accuracy: 0.9201261166579086, Average SC accuracy: 0.9903381642512077, Average S2S accuracy: 0.9392846803695029


100%|██████████| 117/117 [00:57<00:00,  2.02it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 14:50:30.703835] Epoch: 3 training ends. Status: Average loss: 1.7117751816399076, Average MLM accuracy: 0.9228617412630288, Average SC accuracy: 0.9715512614063339, Average S2S accuracy: 0.9457494213962486


100%|██████████| 13/13 [00:02<00:00,  4.73it/s]
  0%|          | 0/117 [00:00<?, ?it/s]

[2024-01-07 14:50:33.456856] Epoch: 3 Evalutation ends. Status: Average loss: 1.5750922881639922, Average MLM accuracy: 0.9267822736030829, Average SC accuracy: 0.9903381642512077, Average S2S accuracy: 0.945600968497513


100%|██████████| 117/117 [00:57<00:00,  2.02it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 14:51:31.311553] Epoch: 4 training ends. Status: Average loss: 1.4300830787063663, Average MLM accuracy: 0.9315220723482526, Average SC accuracy: 0.974771873322598, Average S2S accuracy: 0.9529920434787615


100%|██████████| 13/13 [00:02<00:00,  4.78it/s]
  0%|          | 0/117 [00:00<?, ?it/s]

[2024-01-07 14:51:34.034173] Epoch: 4 Evalutation ends. Status: Average loss: 1.3833547280384944, Average MLM accuracy: 0.931511648274654, Average SC accuracy: 0.9806763285024155, Average S2S accuracy: 0.9527857462431245


100%|██████████| 117/117 [00:57<00:00,  2.02it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 14:52:31.903064] Epoch: 5 training ends. Status: Average loss: 1.2782926798885703, Average MLM accuracy: 0.93579475781729, Average SC accuracy: 0.9817498658078369, Average S2S accuracy: 0.9574884567112278


100%|██████████| 13/13 [00:02<00:00,  4.77it/s]
  0%|          | 0/117 [00:00<?, ?it/s]

[2024-01-07 14:52:34.631734] Epoch: 5 Evalutation ends. Status: Average loss: 1.3532302654706514, Average MLM accuracy: 0.9301103520756699, Average SC accuracy: 0.9806763285024155, Average S2S accuracy: 0.9548122220175277


100%|██████████| 117/117 [00:57<00:00,  2.04it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 14:53:31.891202] Epoch: 6 training ends. Status: Average loss: 1.1607520677085617, Average MLM accuracy: 0.9397225628448804, Average SC accuracy: 0.9817498658078369, Average S2S accuracy: 0.9611241608806291


100%|██████████| 13/13 [00:02<00:00,  5.00it/s]
  0%|          | 0/117 [00:00<?, ?it/s]

[2024-01-07 14:53:34.494850] Epoch: 6 Evalutation ends. Status: Average loss: 1.12365945485922, Average MLM accuracy: 0.9421965317919075, Average SC accuracy: 0.9806763285024155, Average S2S accuracy: 0.959996841855936


100%|██████████| 117/117 [00:58<00:00,  2.02it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 14:54:32.553758] Epoch: 7 training ends. Status: Average loss: 1.0354087541246006, Average MLM accuracy: 0.9471949724095647, Average SC accuracy: 0.9812130971551262, Average S2S accuracy: 0.9641467177910579


100%|██████████| 13/13 [00:02<00:00,  4.92it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 14:54:35.195359] Epoch: 7 Evalutation ends. Status: Average loss: 0.9648493207418002, Average MLM accuracy: 0.9521807672096689, Average SC accuracy: 0.9806763285024155, Average S2S accuracy: 0.9638655683343422


100%|██████████| 7/7 [00:14<00:00,  2.05s/it]


[INFO] 202209081034 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202209081634 is saved
[INFO] finish 202209081634
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202209081634/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202209081634.
[INFO] SimpleTransformerStack loaded for 202209081634.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202209081634.
[INFO] SimpleDecoderHead_S2S loaded for 202209081634.
[INFO] DistilBertEncoderHead_MLM loaded for 202209081634.


100%|██████████| 8/8 [00:03<00:00,  2.15it/s]
100%|██████████| 1/1 [00:00<00:00,  5.91it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:55:29.957362] Epoch: 0 training ends. Status: Average loss: 4.47921147942543, Average MLM accuracy: 0.8811706629055007, Average SC accuracy: 0.9918032786885246, Average S2S accuracy: 0.8884183781494812
[2024-01-07 14:55:30.128184] Epoch: 0 Evalutation ends. Status: Average loss: 6.021142959594727, Average MLM accuracy: 0.8452722063037249, Average SC accuracy: 1.0, Average S2S accuracy: 0.8657747086750108


100%|██████████| 8/8 [00:03<00:00,  2.14it/s]
100%|██████████| 1/1 [00:00<00:00,  5.82it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:55:33.865881] Epoch: 1 training ends. Status: Average loss: 4.079125642776489, Average MLM accuracy: 0.8896332863187588, Average SC accuracy: 0.9918032786885246, Average S2S accuracy: 0.8942409485496506
[2024-01-07 14:55:34.039284] Epoch: 1 Evalutation ends. Status: Average loss: 5.338700771331787, Average MLM accuracy: 0.8624641833810889, Average SC accuracy: 1.0, Average S2S accuracy: 0.8739749676305567


100%|██████████| 8/8 [00:03<00:00,  2.13it/s]
100%|██████████| 1/1 [00:00<00:00,  6.02it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:55:37.794910] Epoch: 2 training ends. Status: Average loss: 4.204493880271912, Average MLM accuracy: 0.8744710860366713, Average SC accuracy: 0.9918032786885246, Average S2S accuracy: 0.8946114757569341
[2024-01-07 14:55:37.962133] Epoch: 2 Evalutation ends. Status: Average loss: 5.265561103820801, Average MLM accuracy: 0.8510028653295129, Average SC accuracy: 1.0, Average S2S accuracy: 0.8692274492878722


100%|██████████| 8/8 [00:03<00:00,  2.14it/s]
100%|██████████| 1/1 [00:00<00:00,  5.87it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:55:41.701067] Epoch: 3 training ends. Status: Average loss: 3.914519786834717, Average MLM accuracy: 0.8698871650211566, Average SC accuracy: 1.0, Average S2S accuracy: 0.8996929917425365
[2024-01-07 14:55:41.873868] Epoch: 3 Evalutation ends. Status: Average loss: 5.05389404296875, Average MLM accuracy: 0.8481375358166189, Average SC accuracy: 1.0, Average S2S accuracy: 0.8821752265861027


100%|██████████| 8/8 [00:03<00:00,  2.12it/s]
100%|██████████| 1/1 [00:00<00:00,  5.71it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:55:45.642676] Epoch: 4 training ends. Status: Average loss: 3.5512629747390747, Average MLM accuracy: 0.8811706629055007, Average SC accuracy: 1.0, Average S2S accuracy: 0.9069976709718399
[2024-01-07 14:55:45.819254] Epoch: 4 Evalutation ends. Status: Average loss: 4.225988388061523, Average MLM accuracy: 0.8796561604584527, Average SC accuracy: 1.0, Average S2S accuracy: 0.8877859300820026


100%|██████████| 8/8 [00:03<00:00,  2.13it/s]
100%|██████████| 1/1 [00:00<00:00,  5.85it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:55:49.578442] Epoch: 5 training ends. Status: Average loss: 3.3771631717681885, Average MLM accuracy: 0.8758815232722144, Average SC accuracy: 1.0, Average S2S accuracy: 0.9109676053355917
[2024-01-07 14:55:49.751002] Epoch: 5 Evalutation ends. Status: Average loss: 4.215287685394287, Average MLM accuracy: 0.8424068767908309, Average SC accuracy: 1.0, Average S2S accuracy: 0.8916702632714717


100%|██████████| 8/8 [00:03<00:00,  2.13it/s]
100%|██████████| 1/1 [00:00<00:00,  5.88it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 14:55:53.500832] Epoch: 6 training ends. Status: Average loss: 3.2170281410217285, Average MLM accuracy: 0.8730606488011283, Average SC accuracy: 1.0, Average S2S accuracy: 0.9130849036629262
[2024-01-07 14:55:53.672493] Epoch: 6 Evalutation ends. Status: Average loss: 3.533682346343994, Average MLM accuracy: 0.8681948424068768, Average SC accuracy: 1.0, Average S2S accuracy: 0.8972809667673716


100%|██████████| 8/8 [00:03<00:00,  2.14it/s]
100%|██████████| 1/1 [00:00<00:00,  5.74it/s]
  0%|          | 0/22 [00:00<?, ?it/s]

[2024-01-07 14:55:57.418907] Epoch: 7 training ends. Status: Average loss: 2.5440843999385834, Average MLM accuracy: 0.9044428772919605, Average SC accuracy: 1.0, Average S2S accuracy: 0.9246241795468981
[2024-01-07 14:55:57.594910] Epoch: 7 Evalutation ends. Status: Average loss: 3.4470455646514893, Average MLM accuracy: 0.8681948424068768, Average SC accuracy: 1.0, Average S2S accuracy: 0.8968493741907639


100%|██████████| 22/22 [00:29<00:00,  1.33s/it]


[INFO] 202209081634 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202209151400 is saved
[INFO] finish 202209151400
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202209151400/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202209151400.
[INFO] SimpleTransformerStack loaded for 202209151400.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202209151400.
[INFO] SimpleDecoderHead_S2S loaded for 202209151400.
[INFO] DistilBertEncoderHead_MLM loaded for 202209151400.


100%|██████████| 36/36 [00:17<00:00,  2.09it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2024-01-07 14:57:20.338679] Epoch: 0 training ends. Status: Average loss: 8.310297131538391, Average MLM accuracy: 0.8335269709543568, Average SC accuracy: 0.48500881834215165, Average S2S accuracy: 0.8695706455922194


100%|██████████| 4/4 [00:00<00:00,  5.06it/s]
  0%|          | 0/36 [00:00<?, ?it/s]

[2024-01-07 14:57:21.131270] Epoch: 0 Evalutation ends. Status: Average loss: 5.506852149963379, Average MLM accuracy: 0.8468536770280516, Average SC accuracy: 0.873015873015873, Average S2S accuracy: 0.8777347310847766


100%|██████████| 36/36 [00:17<00:00,  2.08it/s]
 25%|██▌       | 1/4 [00:00<00:00,  5.05it/s]

[2024-01-07 14:57:38.415262] Epoch: 1 training ends. Status: Average loss: 4.7776111430592, Average MLM accuracy: 0.8492116182572614, Average SC accuracy: 0.9523809523809523, Average S2S accuracy: 0.8902830318239136


100%|██████████| 4/4 [00:00<00:00,  5.00it/s]
  0%|          | 0/36 [00:00<?, ?it/s]

[2024-01-07 14:57:39.216132] Epoch: 1 Evalutation ends. Status: Average loss: 3.9621925354003906, Average MLM accuracy: 0.8536770280515542, Average SC accuracy: 0.9682539682539683, Average S2S accuracy: 0.9070191431175935


100%|██████████| 36/36 [00:17<00:00,  2.10it/s]
 25%|██▌       | 1/4 [00:00<00:00,  5.13it/s]

[2024-01-07 14:57:56.349991] Epoch: 2 training ends. Status: Average loss: 3.5113918251461453, Average MLM accuracy: 0.8745228215767635, Average SC accuracy: 0.9805996472663139, Average S2S accuracy: 0.9176873041437257


100%|██████████| 4/4 [00:00<00:00,  5.15it/s]
  0%|          | 0/36 [00:00<?, ?it/s]

[2024-01-07 14:57:57.128377] Epoch: 2 Evalutation ends. Status: Average loss: 2.6216235756874084, Average MLM accuracy: 0.8968915845337376, Average SC accuracy: 1.0, Average S2S accuracy: 0.9291248860528715


100%|██████████| 36/36 [00:17<00:00,  2.10it/s]
 25%|██▌       | 1/4 [00:00<00:00,  5.25it/s]

[2024-01-07 14:58:14.272865] Epoch: 3 training ends. Status: Average loss: 2.7243240541881986, Average MLM accuracy: 0.8955186721991701, Average SC accuracy: 0.9964726631393298, Average S2S accuracy: 0.9305591969736694


100%|██████████| 4/4 [00:00<00:00,  5.28it/s]
  0%|          | 0/36 [00:00<?, ?it/s]

[2024-01-07 14:58:15.032511] Epoch: 3 Evalutation ends. Status: Average loss: 2.1034069061279297, Average MLM accuracy: 0.9128127369219106, Average SC accuracy: 1.0, Average S2S accuracy: 0.9380127620783957


100%|██████████| 36/36 [00:17<00:00,  2.09it/s]
 25%|██▌       | 1/4 [00:00<00:00,  5.09it/s]

[2024-01-07 14:58:32.286767] Epoch: 4 training ends. Status: Average loss: 2.3209023078282676, Average MLM accuracy: 0.9030705394190871, Average SC accuracy: 0.9982363315696648, Average S2S accuracy: 0.9421451490068292


100%|██████████| 4/4 [00:00<00:00,  4.95it/s]
  0%|          | 0/36 [00:00<?, ?it/s]

[2024-01-07 14:58:33.096492] Epoch: 4 Evalutation ends. Status: Average loss: 1.8359241485595703, Average MLM accuracy: 0.9166034874905231, Average SC accuracy: 1.0, Average S2S accuracy: 0.9456472196900638


100%|██████████| 36/36 [00:17<00:00,  2.07it/s]
 25%|██▌       | 1/4 [00:00<00:00,  5.26it/s]

[2024-01-07 14:58:50.526231] Epoch: 5 training ends. Status: Average loss: 1.9131746490796406, Average MLM accuracy: 0.9175103734439835, Average SC accuracy: 1.0, Average S2S accuracy: 0.9467395782613581


100%|██████████| 4/4 [00:00<00:00,  5.07it/s]
  0%|          | 0/36 [00:00<?, ?it/s]

[2024-01-07 14:58:51.317454] Epoch: 5 Evalutation ends. Status: Average loss: 1.5427784323692322, Average MLM accuracy: 0.9302501895375285, Average SC accuracy: 1.0, Average S2S accuracy: 0.9514585232452142


100%|██████████| 36/36 [00:17<00:00,  2.10it/s]
 25%|██▌       | 1/4 [00:00<00:00,  5.72it/s]

[2024-01-07 14:59:08.431381] Epoch: 6 training ends. Status: Average loss: 1.6706663469473522, Average MLM accuracy: 0.9247302904564315, Average SC accuracy: 1.0, Average S2S accuracy: 0.9539183739715594


100%|██████████| 4/4 [00:00<00:00,  5.52it/s]
  0%|          | 0/36 [00:00<?, ?it/s]

[2024-01-07 14:59:09.158776] Epoch: 6 Evalutation ends. Status: Average loss: 1.4778027832508087, Average MLM accuracy: 0.9158453373768006, Average SC accuracy: 1.0, Average S2S accuracy: 0.9584092980856882


100%|██████████| 36/36 [00:17<00:00,  2.09it/s]
 25%|██▌       | 1/4 [00:00<00:00,  5.19it/s]

[2024-01-07 14:59:26.421717] Epoch: 7 training ends. Status: Average loss: 1.4456655813588037, Average MLM accuracy: 0.9303734439834025, Average SC accuracy: 1.0, Average S2S accuracy: 0.9590995917450091


100%|██████████| 4/4 [00:00<00:00,  5.19it/s]
  0%|          | 0/95 [00:00<?, ?it/s]

[2024-01-07 14:59:27.194195] Epoch: 7 Evalutation ends. Status: Average loss: 1.3236964344978333, Average MLM accuracy: 0.9272175890826384, Average SC accuracy: 1.0, Average S2S accuracy: 0.960232452142206


100%|██████████| 95/95 [02:54<00:00,  1.84s/it]


[INFO] 202209151400 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202210102130 is saved
[INFO] finish 202210102130
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202210102130/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202210102130.
[INFO] SimpleTransformerStack loaded for 202210102130.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202210102130.
[INFO] SimpleDecoderHead_S2S loaded for 202210102130.
[INFO] DistilBertEncoderHead_MLM loaded for 202210102130.


100%|██████████| 193/193 [01:35<00:00,  2.03it/s]
  5%|▍         | 1/22 [00:00<00:04,  5.09it/s]

[2024-01-07 15:04:42.198086] Epoch: 0 training ends. Status: Average loss: 8.044287299862797, Average MLM accuracy: 0.8150091519219036, Average SC accuracy: 0.8148148148148148, Average S2S accuracy: 0.8531934512297734


100%|██████████| 22/22 [00:04<00:00,  4.95it/s]
  0%|          | 0/193 [00:00<?, ?it/s]

[2024-01-07 15:04:46.641696] Epoch: 0 Evalutation ends. Status: Average loss: 4.339210521091115, Average MLM accuracy: 0.8599265115243291, Average SC accuracy: 0.9853801169590644, Average S2S accuracy: 0.8984156726281097


100%|██████████| 193/193 [01:35<00:00,  2.03it/s]
  0%|          | 0/22 [00:00<?, ?it/s]

[2024-01-07 15:06:21.948246] Epoch: 1 training ends. Status: Average loss: 3.1777888709399367, Average MLM accuracy: 0.8829286150091519, Average SC accuracy: 0.9889538661468485, Average S2S accuracy: 0.9155097918083545


100%|██████████| 22/22 [00:04<00:00,  5.17it/s]
  0%|          | 0/193 [00:00<?, ?it/s]

[2024-01-07 15:06:26.203935] Epoch: 1 Evalutation ends. Status: Average loss: 2.4061022346669976, Average MLM accuracy: 0.9047990201536578, Average SC accuracy: 0.9970760233918129, Average S2S accuracy: 0.9324778746256671


100%|██████████| 193/193 [01:35<00:00,  2.02it/s]
  0%|          | 0/22 [00:00<?, ?it/s]

[2024-01-07 15:08:01.534467] Epoch: 2 training ends. Status: Average loss: 1.991989674345817, Average MLM accuracy: 0.9147773032336791, Average SC accuracy: 0.9983755685510072, Average S2S accuracy: 0.9397862905517252


100%|██████████| 22/22 [00:04<00:00,  4.97it/s]
  0%|          | 0/193 [00:00<?, ?it/s]

[2024-01-07 15:08:05.962606] Epoch: 2 Evalutation ends. Status: Average loss: 1.6862205158580432, Average MLM accuracy: 0.926734216679657, Average SC accuracy: 1.0, Average S2S accuracy: 0.9479028993023606


100%|██████████| 193/193 [01:35<00:00,  2.03it/s]
  0%|          | 0/22 [00:00<?, ?it/s]

[2024-01-07 15:09:41.223327] Epoch: 3 training ends. Status: Average loss: 1.5386206470622918, Average MLM accuracy: 0.9269676632092739, Average SC accuracy: 1.0, Average S2S accuracy: 0.9510235432359255


100%|██████████| 22/22 [00:04<00:00,  5.00it/s]
  0%|          | 0/193 [00:00<?, ?it/s]

[2024-01-07 15:09:45.627220] Epoch: 3 Evalutation ends. Status: Average loss: 1.3750830428166823, Average MLM accuracy: 0.9340830642467431, Average SC accuracy: 1.0, Average S2S accuracy: 0.9553477322536932


100%|██████████| 193/193 [01:35<00:00,  2.03it/s]
  0%|          | 0/22 [00:00<?, ?it/s]

[2024-01-07 15:11:20.780976] Epoch: 4 training ends. Status: Average loss: 1.2647460455103858, Average MLM accuracy: 0.936790726052471, Average SC accuracy: 0.9990253411306043, Average S2S accuracy: 0.958281011976015


100%|██████████| 22/22 [00:04<00:00,  4.98it/s]
  0%|          | 0/193 [00:00<?, ?it/s]

[2024-01-07 15:11:25.202064] Epoch: 4 Evalutation ends. Status: Average loss: 1.2777701860124415, Average MLM accuracy: 0.9349738336488141, Average SC accuracy: 1.0, Average S2S accuracy: 0.9583591253576029


100%|██████████| 193/193 [01:34<00:00,  2.03it/s]
  0%|          | 0/22 [00:00<?, ?it/s]

[2024-01-07 15:13:00.044806] Epoch: 5 training ends. Status: Average loss: 1.119335633176596, Average MLM accuracy: 0.9396583282489323, Average SC accuracy: 1.0, Average S2S accuracy: 0.9624111147977936


100%|██████████| 22/22 [00:04<00:00,  4.94it/s]
  0%|          | 0/193 [00:00<?, ?it/s]

[2024-01-07 15:13:04.503627] Epoch: 5 Evalutation ends. Status: Average loss: 1.0538779334588484, Average MLM accuracy: 0.9446609508963367, Average SC accuracy: 1.0, Average S2S accuracy: 0.9635788734043799


100%|██████████| 193/193 [01:35<00:00,  2.02it/s]
  5%|▍         | 1/22 [00:00<00:04,  5.05it/s]

[2024-01-07 15:14:39.995572] Epoch: 6 training ends. Status: Average loss: 0.9167081244251271, Average MLM accuracy: 0.9483343502135448, Average SC accuracy: 1.0, Average S2S accuracy: 0.9696667503808384


100%|██████████| 22/22 [00:04<00:00,  5.02it/s]
  0%|          | 0/193 [00:00<?, ?it/s]

[2024-01-07 15:14:44.377995] Epoch: 6 Evalutation ends. Status: Average loss: 0.9158408425071023, Average MLM accuracy: 0.9506736443603162, Average SC accuracy: 1.0, Average S2S accuracy: 0.9681963428303749


100%|██████████| 193/193 [01:34<00:00,  2.04it/s]
  0%|          | 0/22 [00:00<?, ?it/s]

[2024-01-07 15:16:18.874397] Epoch: 7 training ends. Status: Average loss: 0.7889720260170457, Average MLM accuracy: 0.9542281879194631, Average SC accuracy: 1.0, Average S2S accuracy: 0.974280806662426


100%|██████████| 22/22 [00:04<00:00,  4.92it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

[2024-01-07 15:16:23.348441] Epoch: 7 Evalutation ends. Status: Average loss: 0.8394198539582166, Average MLM accuracy: 0.9560182607727424, Average SC accuracy: 1.0, Average S2S accuracy: 0.969969718769344


100%|██████████| 12/12 [00:19<00:00,  1.63s/it]


[INFO] 202210102130 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202210110644 is saved
[INFO] finish 202210110644
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202210110644/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202210110644.
[INFO] SimpleTransformerStack loaded for 202210110644.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202210110644.
[INFO] SimpleDecoderHead_S2S loaded for 202210110644.
[INFO] DistilBertEncoderHead_MLM loaded for 202210110644.


100%|██████████| 23/23 [00:10<00:00,  2.12it/s]
 33%|███▎      | 1/3 [00:00<00:00,  5.05it/s]

[2024-01-07 15:17:24.643899] Epoch: 0 training ends. Status: Average loss: 9.479487211807914, Average MLM accuracy: 0.920734310516579, Average SC accuracy: 0.28493150684931506, Average S2S accuracy: 0.9440722587273389


100%|██████████| 3/3 [00:00<00:00,  5.89it/s]
  0%|          | 0/23 [00:00<?, ?it/s]

[2024-01-07 15:17:25.155223] Epoch: 0 Evalutation ends. Status: Average loss: 9.437661170959473, Average MLM accuracy: 0.9152317880794701, Average SC accuracy: 0.225, Average S2S accuracy: 0.9417495029821074


100%|██████████| 23/23 [00:10<00:00,  2.11it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 15:17:36.038109] Epoch: 1 training ends. Status: Average loss: 6.08734307081803, Average MLM accuracy: 0.9244343247474028, Average SC accuracy: 0.3917808219178082, Average S2S accuracy: 0.9455063033753558


100%|██████████| 3/3 [00:00<00:00,  5.94it/s]
  0%|          | 0/23 [00:00<?, ?it/s]

[2024-01-07 15:17:36.544688] Epoch: 1 Evalutation ends. Status: Average loss: 4.1789398193359375, Average MLM accuracy: 0.9218543046357616, Average SC accuracy: 0.6, Average S2S accuracy: 0.9407554671968191


100%|██████████| 23/23 [00:10<00:00,  2.10it/s]
 33%|███▎      | 1/3 [00:00<00:00,  5.15it/s]

[2024-01-07 15:17:47.473997] Epoch: 2 training ends. Status: Average loss: 3.2047365852024243, Average MLM accuracy: 0.9195958445994024, Average SC accuracy: 0.7041095890410959, Average S2S accuracy: 0.9473898247040945


100%|██████████| 3/3 [00:00<00:00,  6.06it/s]
  0%|          | 0/23 [00:00<?, ?it/s]

[2024-01-07 15:17:47.970491] Epoch: 2 Evalutation ends. Status: Average loss: 2.881769140561422, Average MLM accuracy: 0.9033112582781457, Average SC accuracy: 0.775, Average S2S accuracy: 0.9461232604373757


100%|██████████| 23/23 [00:11<00:00,  2.09it/s]
 33%|███▎      | 1/3 [00:00<00:00,  5.26it/s]

[2024-01-07 15:17:58.990627] Epoch: 3 training ends. Status: Average loss: 2.48728316763173, Average MLM accuracy: 0.9230112423509321, Average SC accuracy: 0.8575342465753425, Average S2S accuracy: 0.9526337193125147


100%|██████████| 3/3 [00:00<00:00,  6.10it/s]
  0%|          | 0/23 [00:00<?, ?it/s]

[2024-01-07 15:17:59.483928] Epoch: 3 Evalutation ends. Status: Average loss: 2.3181944688161216, Average MLM accuracy: 0.9298013245033112, Average SC accuracy: 0.85, Average S2S accuracy: 0.9495029821073558


100%|██████████| 23/23 [00:10<00:00,  2.10it/s]
 33%|███▎      | 1/3 [00:00<00:00,  5.07it/s]

[2024-01-07 15:18:10.456966] Epoch: 4 training ends. Status: Average loss: 2.2391025605409043, Average MLM accuracy: 0.9193112281201081, Average SC accuracy: 0.936986301369863, Average S2S accuracy: 0.9542175895207723


100%|██████████| 3/3 [00:00<00:00,  5.94it/s]
  0%|          | 0/23 [00:00<?, ?it/s]

[2024-01-07 15:18:10.963273] Epoch: 4 Evalutation ends. Status: Average loss: 2.2621387243270874, Average MLM accuracy: 0.9298013245033112, Average SC accuracy: 0.9, Average S2S accuracy: 0.9538767395626243


100%|██████████| 23/23 [00:10<00:00,  2.10it/s]
 33%|███▎      | 1/3 [00:00<00:00,  5.42it/s]

[2024-01-07 15:18:21.920030] Epoch: 5 training ends. Status: Average loss: 2.032361090183258, Average MLM accuracy: 0.9274227977799915, Average SC accuracy: 0.936986301369863, Average S2S accuracy: 0.9578990175723978


100%|██████████| 3/3 [00:00<00:00,  6.22it/s]
  0%|          | 0/23 [00:00<?, ?it/s]

[2024-01-07 15:18:22.403682] Epoch: 5 Evalutation ends. Status: Average loss: 2.066739281018575, Average MLM accuracy: 0.9298013245033112, Average SC accuracy: 0.925, Average S2S accuracy: 0.9568588469184891


100%|██████████| 23/23 [00:11<00:00,  2.08it/s]
 33%|███▎      | 1/3 [00:00<00:00,  5.04it/s]

[2024-01-07 15:18:33.460584] Epoch: 6 training ends. Status: Average loss: 1.8468331150386645, Average MLM accuracy: 0.9346805180019924, Average SC accuracy: 0.9506849315068493, Average S2S accuracy: 0.9595685023865071


100%|██████████| 3/3 [00:00<00:00,  5.90it/s]
  0%|          | 0/23 [00:00<?, ?it/s]

[2024-01-07 15:18:33.970423] Epoch: 6 Evalutation ends. Status: Average loss: 2.06770396232605, Average MLM accuracy: 0.919205298013245, Average SC accuracy: 0.925, Average S2S accuracy: 0.9578528827037773


100%|██████████| 23/23 [00:10<00:00,  2.10it/s]
 33%|███▎      | 1/3 [00:00<00:00,  5.13it/s]

[2024-01-07 15:18:44.942787] Epoch: 7 training ends. Status: Average loss: 1.716817171677299, Average MLM accuracy: 0.9388074569517575, Average SC accuracy: 0.9534246575342465, Average S2S accuracy: 0.9619443076988934


100%|██████████| 3/3 [00:00<00:00,  5.95it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2024-01-07 15:18:45.448728] Epoch: 7 Evalutation ends. Status: Average loss: 1.952022393544515, Average MLM accuracy: 0.9311258278145695, Average SC accuracy: 0.9, Average S2S accuracy: 0.9596421471172962


100%|██████████| 1/1 [00:04<00:00,  4.81s/it]


[INFO] 202210110644 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202210121247 is saved
[INFO] finish 202210121247
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202210121247/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202210121247.
[INFO] SimpleTransformerStack loaded for 202210121247.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202210121247.
[INFO] SimpleDecoderHead_S2S loaded for 202210121247.
[INFO] DistilBertEncoderHead_MLM loaded for 202210121247.


100%|██████████| 1/1 [00:00<00:00,  2.03it/s]
100%|██████████| 1/1 [00:00<00:00, 16.11it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2024-01-07 15:19:30.355372] Epoch: 0 training ends. Status: Average loss: 14.204964637756348, Average MLM accuracy: 0.7030075187969925, Average SC accuracy: 0.6428571428571429, Average S2S accuracy: 0.7316526610644257
[2024-01-07 15:19:30.419149] Epoch: 0 Evalutation ends. Status: Average loss: 12.232487678527832, Average MLM accuracy: 0.8157894736842105, Average SC accuracy: 0.0, Average S2S accuracy: 0.7450980392156863


100%|██████████| 1/1 [00:00<00:00,  2.12it/s]
100%|██████████| 1/1 [00:00<00:00, 18.65it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2024-01-07 15:19:30.891513] Epoch: 1 training ends. Status: Average loss: 13.74630355834961, Average MLM accuracy: 0.7048872180451128, Average SC accuracy: 0.5714285714285714, Average S2S accuracy: 0.7369747899159664
[2024-01-07 15:19:30.946588] Epoch: 1 Evalutation ends. Status: Average loss: 12.879925727844238, Average MLM accuracy: 0.7105263157894737, Average SC accuracy: 0.0, Average S2S accuracy: 0.7215686274509804


100%|██████████| 1/1 [00:00<00:00,  2.07it/s]
100%|██████████| 1/1 [00:00<00:00, 17.92it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2024-01-07 15:19:31.429781] Epoch: 2 training ends. Status: Average loss: 14.268159866333008, Average MLM accuracy: 0.6973684210526315, Average SC accuracy: 0.2857142857142857, Average S2S accuracy: 0.7400560224089636
[2024-01-07 15:19:31.486709] Epoch: 2 Evalutation ends. Status: Average loss: 13.62256145477295, Average MLM accuracy: 0.7631578947368421, Average SC accuracy: 1.0, Average S2S accuracy: 0.7333333333333333


100%|██████████| 1/1 [00:00<00:00,  2.06it/s]
100%|██████████| 1/1 [00:00<00:00, 16.51it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2024-01-07 15:19:31.973219] Epoch: 3 training ends. Status: Average loss: 13.759286880493164, Average MLM accuracy: 0.7199248120300752, Average SC accuracy: 0.5, Average S2S accuracy: 0.7355742296918768
[2024-01-07 15:19:32.035622] Epoch: 3 Evalutation ends. Status: Average loss: 12.85170841217041, Average MLM accuracy: 0.7894736842105263, Average SC accuracy: 1.0, Average S2S accuracy: 0.7529411764705882


100%|██████████| 1/1 [00:00<00:00,  2.05it/s]
100%|██████████| 1/1 [00:00<00:00, 18.54it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2024-01-07 15:19:32.525449] Epoch: 4 training ends. Status: Average loss: 14.531805038452148, Average MLM accuracy: 0.693609022556391, Average SC accuracy: 0.2857142857142857, Average S2S accuracy: 0.7380952380952381
[2024-01-07 15:19:32.580921] Epoch: 4 Evalutation ends. Status: Average loss: 10.976428985595703, Average MLM accuracy: 0.7631578947368421, Average SC accuracy: 0.0, Average S2S accuracy: 0.7294117647058823


100%|██████████| 1/1 [00:00<00:00,  2.09it/s]
100%|██████████| 1/1 [00:00<00:00, 16.89it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2024-01-07 15:19:33.061211] Epoch: 5 training ends. Status: Average loss: 13.651839256286621, Average MLM accuracy: 0.7274436090225563, Average SC accuracy: 0.42857142857142855, Average S2S accuracy: 0.738655462184874
[2024-01-07 15:19:33.122354] Epoch: 5 Evalutation ends. Status: Average loss: 12.669811248779297, Average MLM accuracy: 0.7368421052631579, Average SC accuracy: 1.0, Average S2S accuracy: 0.7568627450980392


100%|██████████| 1/1 [00:00<00:00,  2.07it/s]
100%|██████████| 1/1 [00:00<00:00, 17.31it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2024-01-07 15:19:33.607062] Epoch: 6 training ends. Status: Average loss: 13.454585075378418, Average MLM accuracy: 0.7124060150375939, Average SC accuracy: 0.42857142857142855, Average S2S accuracy: 0.7389355742296919
[2024-01-07 15:19:33.666074] Epoch: 6 Evalutation ends. Status: Average loss: 12.037069320678711, Average MLM accuracy: 0.7368421052631579, Average SC accuracy: 1.0, Average S2S accuracy: 0.7568627450980392


100%|██████████| 1/1 [00:00<00:00,  2.02it/s]
100%|██████████| 1/1 [00:00<00:00, 22.56it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 15:19:34.162864] Epoch: 7 training ends. Status: Average loss: 13.964005470275879, Average MLM accuracy: 0.6879699248120301, Average SC accuracy: 0.42857142857142855, Average S2S accuracy: 0.7364145658263306
[2024-01-07 15:19:34.208229] Epoch: 7 Evalutation ends. Status: Average loss: 11.433520317077637, Average MLM accuracy: 0.7894736842105263, Average SC accuracy: 1.0, Average S2S accuracy: 0.7529411764705882


100%|██████████| 3/3 [00:08<00:00,  2.74s/it]


[INFO] 202210121247 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202210130649 is saved
[INFO] finish 202210130649
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202210130649/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202210130649.
[INFO] SimpleTransformerStack loaded for 202210130649.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202210130649.
[INFO] SimpleDecoderHead_S2S loaded for 202210130649.
[INFO] DistilBertEncoderHead_MLM loaded for 202210130649.


100%|██████████| 3/3 [00:01<00:00,  2.30it/s]
100%|██████████| 1/1 [00:00<00:00, 12.55it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 15:20:22.857143] Epoch: 0 training ends. Status: Average loss: 3.193876107533773, Average MLM accuracy: 0.8924485125858124, Average SC accuracy: 1.0, Average S2S accuracy: 0.919917864476386
[2024-01-07 15:20:22.938656] Epoch: 0 Evalutation ends. Status: Average loss: 3.4827938079833984, Average MLM accuracy: 0.875, Average SC accuracy: 1.0, Average S2S accuracy: 0.9209659714599341


100%|██████████| 3/3 [00:01<00:00,  2.30it/s]
100%|██████████| 1/1 [00:00<00:00, 12.76it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 15:20:24.243802] Epoch: 1 training ends. Status: Average loss: 3.1165921688079834, Average MLM accuracy: 0.8924485125858124, Average SC accuracy: 1.0, Average S2S accuracy: 0.9174081679215149
[2024-01-07 15:20:24.323750] Epoch: 1 Evalutation ends. Status: Average loss: 2.6508328914642334, Average MLM accuracy: 0.9264705882352942, Average SC accuracy: 1.0, Average S2S accuracy: 0.9308452250274424


100%|██████████| 3/3 [00:01<00:00,  2.31it/s]
100%|██████████| 1/1 [00:00<00:00, 11.02it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 15:20:25.623279] Epoch: 2 training ends. Status: Average loss: 3.1766359011332193, Average MLM accuracy: 0.9000762776506483, Average SC accuracy: 1.0, Average S2S accuracy: 0.918777093315081
[2024-01-07 15:20:25.715514] Epoch: 2 Evalutation ends. Status: Average loss: 3.448474407196045, Average MLM accuracy: 0.8823529411764706, Average SC accuracy: 1.0, Average S2S accuracy: 0.9264544456641054


100%|██████████| 3/3 [00:01<00:00,  2.25it/s]
100%|██████████| 1/1 [00:00<00:00, 11.94it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 15:20:27.048537] Epoch: 3 training ends. Status: Average loss: 3.315661350886027, Average MLM accuracy: 0.8939740655987796, Average SC accuracy: 1.0, Average S2S accuracy: 0.918777093315081
[2024-01-07 15:20:27.133812] Epoch: 3 Evalutation ends. Status: Average loss: 2.9294536113739014, Average MLM accuracy: 0.9338235294117647, Average SC accuracy: 1.0, Average S2S accuracy: 0.9198682766190999


100%|██████████| 3/3 [00:01<00:00,  2.28it/s]
100%|██████████| 1/1 [00:00<00:00, 12.12it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 15:20:28.453745] Epoch: 4 training ends. Status: Average loss: 3.586582819620768, Average MLM accuracy: 0.8771929824561403, Average SC accuracy: 1.0, Average S2S accuracy: 0.9146703171343828
[2024-01-07 15:20:28.537926] Epoch: 4 Evalutation ends. Status: Average loss: 3.053008794784546, Average MLM accuracy: 0.875, Average SC accuracy: 1.0, Average S2S accuracy: 0.9264544456641054


100%|██████████| 3/3 [00:01<00:00,  2.25it/s]
100%|██████████| 1/1 [00:00<00:00, 13.32it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 15:20:29.869617] Epoch: 5 training ends. Status: Average loss: 3.058168967564901, Average MLM accuracy: 0.9054157131960335, Average SC accuracy: 1.0, Average S2S accuracy: 0.917636322153776
[2024-01-07 15:20:29.945994] Epoch: 5 Evalutation ends. Status: Average loss: 2.763751268386841, Average MLM accuracy: 0.8970588235294118, Average SC accuracy: 1.0, Average S2S accuracy: 0.9330406147091108


100%|██████████| 3/3 [00:01<00:00,  2.29it/s]
100%|██████████| 1/1 [00:00<00:00, 12.71it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 15:20:31.255521] Epoch: 6 training ends. Status: Average loss: 3.121002515157064, Average MLM accuracy: 0.8878718535469108, Average SC accuracy: 1.0, Average S2S accuracy: 0.9156970111795574
[2024-01-07 15:20:31.335817] Epoch: 6 Evalutation ends. Status: Average loss: 2.7045302391052246, Average MLM accuracy: 0.8970588235294118, Average SC accuracy: 1.0, Average S2S accuracy: 0.9363336992316136


100%|██████████| 3/3 [00:01<00:00,  2.32it/s]
100%|██████████| 1/1 [00:00<00:00, 12.27it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 15:20:32.628709] Epoch: 7 training ends. Status: Average loss: 3.1456112066904702, Average MLM accuracy: 0.8802440884820748, Average SC accuracy: 1.0, Average S2S accuracy: 0.9228838694957792
[2024-01-07 15:20:32.711820] Epoch: 7 Evalutation ends. Status: Average loss: 3.3540639877319336, Average MLM accuracy: 0.875, Average SC accuracy: 1.0, Average S2S accuracy: 0.9308452250274424


100%|██████████| 11/11 [00:35<00:00,  3.24s/it]


[INFO] 202210130649 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202210181310 is saved
[INFO] finish 202210181310
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202210181310/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202210181310.
[INFO] SimpleTransformerStack loaded for 202210181310.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202210181310.
[INFO] SimpleDecoderHead_S2S loaded for 202210181310.
[INFO] DistilBertEncoderHead_MLM loaded for 202210181310.


100%|██████████| 17/17 [00:08<00:00,  2.00it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:21:55.930533] Epoch: 0 training ends. Status: Average loss: 13.114077904645134, Average MLM accuracy: 0.799421892673118, Average SC accuracy: 0.26296296296296295, Average S2S accuracy: 0.8152464007818667


100%|██████████| 2/2 [00:00<00:00,  4.89it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 15:21:56.340657] Epoch: 0 Evalutation ends. Status: Average loss: 11.562343120574951, Average MLM accuracy: 0.7850574712643679, Average SC accuracy: 0.43333333333333335, Average S2S accuracy: 0.8208390646492435


100%|██████████| 17/17 [00:08<00:00,  2.02it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:22:04.776702] Epoch: 1 training ends. Status: Average loss: 11.167953547309427, Average MLM accuracy: 0.8006786477315572, Average SC accuracy: 0.43333333333333335, Average S2S accuracy: 0.8196631958801639


100%|██████████| 2/2 [00:00<00:00,  4.98it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 15:22:05.179319] Epoch: 1 Evalutation ends. Status: Average loss: 9.919548034667969, Average MLM accuracy: 0.7827586206896552, Average SC accuracy: 0.6, Average S2S accuracy: 0.8218707015130674


100%|██████████| 17/17 [00:08<00:00,  2.01it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:22:13.652379] Epoch: 2 training ends. Status: Average loss: 9.150292620939368, Average MLM accuracy: 0.7951489254744251, Average SC accuracy: 0.5925925925925926, Average S2S accuracy: 0.8272375296019246


100%|██████████| 2/2 [00:00<00:00,  4.93it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 15:22:14.060153] Epoch: 2 Evalutation ends. Status: Average loss: 8.703383922576904, Average MLM accuracy: 0.7942528735632184, Average SC accuracy: 0.6, Average S2S accuracy: 0.8325309491059147


100%|██████████| 17/17 [00:08<00:00,  2.01it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:22:22.524723] Epoch: 3 training ends. Status: Average loss: 7.659470614264993, Average MLM accuracy: 0.8045745884127183, Average SC accuracy: 0.7185185185185186, Average S2S accuracy: 0.8377250686012856


100%|██████████| 2/2 [00:00<00:00,  4.95it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 15:22:22.930935] Epoch: 3 Evalutation ends. Status: Average loss: 7.033999681472778, Average MLM accuracy: 0.8, Average SC accuracy: 0.8333333333333334, Average S2S accuracy: 0.8469738651994498


100%|██████████| 17/17 [00:08<00:00,  2.01it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:22:31.400724] Epoch: 4 training ends. Status: Average loss: 6.501824771656709, Average MLM accuracy: 0.8196556491139877, Average SC accuracy: 0.9148148148148149, Average S2S accuracy: 0.8500545051310002


100%|██████████| 2/2 [00:00<00:00,  5.09it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 15:22:31.795109] Epoch: 4 Evalutation ends. Status: Average loss: 6.297447443008423, Average MLM accuracy: 0.8103448275862069, Average SC accuracy: 0.9333333333333333, Average S2S accuracy: 0.8552269601100413


100%|██████████| 17/17 [00:08<00:00,  2.01it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:22:40.246684] Epoch: 5 training ends. Status: Average loss: 5.800651297849767, Average MLM accuracy: 0.8143772778685434, Average SC accuracy: 0.9703703703703703, Average S2S accuracy: 0.8607675826034658


100%|██████████| 2/2 [00:00<00:00,  4.87it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 15:22:40.659030] Epoch: 5 Evalutation ends. Status: Average loss: 5.967469692230225, Average MLM accuracy: 0.8241379310344827, Average SC accuracy: 0.9666666666666667, Average S2S accuracy: 0.8627922971114168


100%|██████████| 17/17 [00:08<00:00,  2.01it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:22:49.135357] Epoch: 6 training ends. Status: Average loss: 5.051235844107235, Average MLM accuracy: 0.8385069749905744, Average SC accuracy: 0.9592592592592593, Average S2S accuracy: 0.870785249783859


100%|██████████| 2/2 [00:00<00:00,  4.92it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 15:22:49.543008] Epoch: 6 Evalutation ends. Status: Average loss: 5.0835793018341064, Average MLM accuracy: 0.8344827586206897, Average SC accuracy: 0.9666666666666667, Average S2S accuracy: 0.8698418156808804


100%|██████████| 17/17 [00:08<00:00,  2.00it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:22:58.059831] Epoch: 7 training ends. Status: Average loss: 4.6168127901413865, Average MLM accuracy: 0.838884001508106, Average SC accuracy: 0.9888888888888889, Average S2S accuracy: 0.8806149682366651


100%|██████████| 2/2 [00:00<00:00,  4.90it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 15:22:58.470847] Epoch: 7 Evalutation ends. Status: Average loss: 4.534365892410278, Average MLM accuracy: 0.8413793103448276, Average SC accuracy: 1.0, Average S2S accuracy: 0.8813617606602476


100%|██████████| 8/8 [00:21<00:00,  2.65s/it]


[INFO] 202210181310 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202210200116 is saved
[INFO] finish 202210200116
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202210200116/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202210200116.
[INFO] SimpleTransformerStack loaded for 202210200116.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202210200116.
[INFO] SimpleDecoderHead_S2S loaded for 202210200116.
[INFO] DistilBertEncoderHead_MLM loaded for 202210200116.


100%|██████████| 11/11 [00:05<00:00,  2.13it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:24:04.641559] Epoch: 0 training ends. Status: Average loss: 11.434135263616389, Average MLM accuracy: 0.8339645206298585, Average SC accuracy: 0.06172839506172839, Average S2S accuracy: 0.8638169489500254


100%|██████████| 2/2 [00:00<00:00,  6.99it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 15:24:04.929151] Epoch: 0 Evalutation ends. Status: Average loss: 10.10607624053955, Average MLM accuracy: 0.8493589743589743, Average SC accuracy: 0.1111111111111111, Average S2S accuracy: 0.8628434886499403


100%|██████████| 11/11 [00:05<00:00,  2.13it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:24:10.103284] Epoch: 1 training ends. Status: Average loss: 10.598578799854625, Average MLM accuracy: 0.8445286027506478, Average SC accuracy: 0.07407407407407407, Average S2S accuracy: 0.8671027869880814


100%|██████████| 2/2 [00:00<00:00,  6.75it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 15:24:10.400948] Epoch: 1 Evalutation ends. Status: Average loss: 8.516211986541748, Average MLM accuracy: 0.8461538461538461, Average SC accuracy: 0.2777777777777778, Average S2S accuracy: 0.869295101553166


100%|██████████| 11/11 [00:05<00:00,  2.14it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:24:15.543982] Epoch: 2 training ends. Status: Average loss: 9.948950507424094, Average MLM accuracy: 0.8339645206298585, Average SC accuracy: 0.08024691358024691, Average S2S accuracy: 0.8692236460853721


100%|██████████| 2/2 [00:00<00:00,  7.25it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 15:24:15.822028] Epoch: 2 Evalutation ends. Status: Average loss: 9.152724742889404, Average MLM accuracy: 0.8141025641025641, Average SC accuracy: 0.0, Average S2S accuracy: 0.8623655913978494


100%|██████████| 11/11 [00:05<00:00,  2.13it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:24:20.989781] Epoch: 3 training ends. Status: Average loss: 8.402018243616277, Average MLM accuracy: 0.8391469005381702, Average SC accuracy: 0.13580246913580246, Average S2S accuracy: 0.8726289691429937


100%|██████████| 2/2 [00:00<00:00,  7.02it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 15:24:21.276478] Epoch: 3 Evalutation ends. Status: Average loss: 7.791592597961426, Average MLM accuracy: 0.8605769230769231, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.8702508960573476


100%|██████████| 11/11 [00:05<00:00,  2.10it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:24:26.510005] Epoch: 4 training ends. Status: Average loss: 6.962450070814653, Average MLM accuracy: 0.8449272473589795, Average SC accuracy: 0.3395061728395062, Average S2S accuracy: 0.8748693132598501


100%|██████████| 2/2 [00:00<00:00,  7.05it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 15:24:26.795301] Epoch: 4 Evalutation ends. Status: Average loss: 7.183140516281128, Average MLM accuracy: 0.8365384615384616, Average SC accuracy: 0.6111111111111112, Average S2S accuracy: 0.8764635603345281


100%|██████████| 11/11 [00:05<00:00,  2.12it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:24:31.991619] Epoch: 5 training ends. Status: Average loss: 5.458438938314265, Average MLM accuracy: 0.845525214271477, Average SC accuracy: 0.6790123456790124, Average S2S accuracy: 0.8795292290229113


100%|██████████| 2/2 [00:00<00:00,  7.13it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 15:24:32.273240] Epoch: 5 Evalutation ends. Status: Average loss: 6.341653108596802, Average MLM accuracy: 0.8365384615384616, Average SC accuracy: 0.8333333333333334, Average S2S accuracy: 0.8783751493428913


100%|██████████| 11/11 [00:05<00:00,  2.13it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:24:37.444647] Epoch: 6 training ends. Status: Average loss: 5.031796758825129, Average MLM accuracy: 0.8405421566673311, Average SC accuracy: 0.8518518518518519, Average S2S accuracy: 0.8811721480419392


100%|██████████| 2/2 [00:00<00:00,  7.01it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 15:24:37.732099] Epoch: 6 Evalutation ends. Status: Average loss: 5.51567816734314, Average MLM accuracy: 0.8173076923076923, Average SC accuracy: 0.7777777777777778, Average S2S accuracy: 0.8833930704898447


100%|██████████| 11/11 [00:05<00:00,  2.12it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 15:24:42.932211] Epoch: 7 training ends. Status: Average loss: 4.46821269122037, Average MLM accuracy: 0.8554913294797688, Average SC accuracy: 0.9012345679012346, Average S2S accuracy: 0.888162021686531


100%|██████████| 2/2 [00:00<00:00,  7.08it/s]
  0%|          | 0/147 [00:00<?, ?it/s]

[2024-01-07 15:24:43.216202] Epoch: 7 Evalutation ends. Status: Average loss: 4.782933473587036, Average MLM accuracy: 0.8413461538461539, Average SC accuracy: 1.0, Average S2S accuracy: 0.8724014336917563


100%|██████████| 147/147 [07:36<00:00,  3.10s/it]


[INFO] 202210200116 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202212132000 is saved
[INFO] finish 202212132000
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202212132000/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202212132000.
[INFO] SimpleTransformerStack loaded for 202212132000.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202212132000.
[INFO] SimpleDecoderHead_S2S loaded for 202212132000.
[INFO] DistilBertEncoderHead_MLM loaded for 202212132000.


100%|██████████| 401/401 [03:22<00:00,  1.98it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

[2024-01-07 15:36:28.610312] Epoch: 0 training ends. Status: Average loss: 5.4595340552769995, Average MLM accuracy: 0.8445738581074, Average SC accuracy: 0.8788398565414003, Average S2S accuracy: 0.8779197252128838


100%|██████████| 45/45 [00:09<00:00,  4.63it/s]
  0%|          | 0/401 [00:00<?, ?it/s]

[2024-01-07 15:36:38.328107] Epoch: 0 Evalutation ends. Status: Average loss: 3.251995144950019, Average MLM accuracy: 0.8696256444743331, Average SC accuracy: 0.9985955056179775, Average S2S accuracy: 0.9111388732167325


100%|██████████| 401/401 [03:21<00:00,  1.99it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

[2024-01-07 15:40:00.102495] Epoch: 1 training ends. Status: Average loss: 2.305763268411308, Average MLM accuracy: 0.8982489233548413, Average SC accuracy: 0.9984406673943552, Average S2S accuracy: 0.9308869447720346


100%|██████████| 45/45 [00:09<00:00,  4.65it/s]
  0%|          | 0/401 [00:00<?, ?it/s]

[2024-01-07 15:40:09.772332] Epoch: 1 Evalutation ends. Status: Average loss: 1.8365456183751425, Average MLM accuracy: 0.9136964806097287, Average SC accuracy: 1.0, Average S2S accuracy: 0.9410144810725128


100%|██████████| 401/401 [03:21<00:00,  1.99it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

[2024-01-07 15:43:31.191474] Epoch: 2 training ends. Status: Average loss: 1.5672435017297988, Average MLM accuracy: 0.9222819388096272, Average SC accuracy: 0.9996881334788711, Average S2S accuracy: 0.948146337890698


100%|██████████| 45/45 [00:09<00:00,  4.70it/s]
  0%|          | 0/401 [00:00<?, ?it/s]

[2024-01-07 15:43:40.767952] Epoch: 2 Evalutation ends. Status: Average loss: 1.4453477912478976, Average MLM accuracy: 0.9246357318986774, Average SC accuracy: 1.0, Average S2S accuracy: 0.9512775046344806


100%|██████████| 401/401 [03:21<00:00,  1.99it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

[2024-01-07 15:47:02.067972] Epoch: 3 training ends. Status: Average loss: 1.2444348477960525, Average MLM accuracy: 0.934178680679272, Average SC accuracy: 1.0, Average S2S accuracy: 0.9581720917093032


100%|██████████| 45/45 [00:09<00:00,  4.69it/s]
  0%|          | 0/401 [00:00<?, ?it/s]

[2024-01-07 15:47:11.663638] Epoch: 3 Evalutation ends. Status: Average loss: 1.1768911335203383, Average MLM accuracy: 0.9374131360681461, Average SC accuracy: 1.0, Average S2S accuracy: 0.9595322532978695


100%|██████████| 401/401 [03:21<00:00,  1.99it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

[2024-01-07 15:50:33.630581] Epoch: 4 training ends. Status: Average loss: 1.0184615811802205, Average MLM accuracy: 0.9435753102684252, Average SC accuracy: 1.0, Average S2S accuracy: 0.9655424967901146


100%|██████████| 45/45 [00:09<00:00,  4.63it/s]
  0%|          | 0/401 [00:00<?, ?it/s]

[2024-01-07 15:50:43.354918] Epoch: 4 Evalutation ends. Status: Average loss: 1.0022632135285272, Average MLM accuracy: 0.9471867294328626, Average SC accuracy: 1.0, Average S2S accuracy: 0.9637906020794713


100%|██████████| 401/401 [03:22<00:00,  1.98it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

[2024-01-07 15:54:05.886817] Epoch: 5 training ends. Status: Average loss: 0.8714863888343374, Average MLM accuracy: 0.9485206421446073, Average SC accuracy: 1.0, Average S2S accuracy: 0.9710004230023601


100%|██████████| 45/45 [00:09<00:00,  4.64it/s]
  0%|          | 0/401 [00:00<?, ?it/s]

[2024-01-07 15:54:15.581285] Epoch: 5 Evalutation ends. Status: Average loss: 0.9357544355922275, Average MLM accuracy: 0.947052230441605, Average SC accuracy: 1.0, Average S2S accuracy: 0.9675720695306413


100%|██████████| 401/401 [03:22<00:00,  1.98it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

[2024-01-07 15:57:38.275733] Epoch: 6 training ends. Status: Average loss: 0.7627806977886809, Average MLM accuracy: 0.9543492472216816, Average SC accuracy: 1.0, Average S2S accuracy: 0.9745847327536856


100%|██████████| 45/45 [00:09<00:00,  4.66it/s]
  0%|          | 0/401 [00:00<?, ?it/s]

[2024-01-07 15:57:47.944576] Epoch: 6 Evalutation ends. Status: Average loss: 0.8218455698755053, Average MLM accuracy: 0.9530150190540237, Average SC accuracy: 1.0, Average S2S accuracy: 0.9714005534510088


100%|██████████| 401/401 [03:22<00:00,  1.98it/s]
  0%|          | 0/45 [00:00<?, ?it/s]

[2024-01-07 16:01:10.648577] Epoch: 7 training ends. Status: Average loss: 0.7066856018474275, Average MLM accuracy: 0.9570938814617423, Average SC accuracy: 1.0, Average S2S accuracy: 0.9766227670931068


100%|██████████| 45/45 [00:09<00:00,  4.70it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2024-01-07 16:01:20.228263] Epoch: 7 Evalutation ends. Status: Average loss: 0.7933448036511739, Average MLM accuracy: 0.953866845998655, Average SC accuracy: 1.0, Average S2S accuracy: 0.9721796835119959


100%|██████████| 5/5 [00:16<00:00,  3.24s/it]


[INFO] 202212132000 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202302070551 is saved
[INFO] finish 202302070551
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202302070551/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202302070551.
[INFO] SimpleTransformerStack loaded for 202302070551.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202302070551.
[INFO] SimpleDecoderHead_S2S loaded for 202302070551.
[INFO] DistilBertEncoderHead_MLM loaded for 202302070551.


100%|██████████| 7/7 [00:03<00:00,  2.03it/s]
100%|██████████| 1/1 [00:00<00:00,  5.74it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:02:08.127902] Epoch: 0 training ends. Status: Average loss: 5.2237303256988525, Average MLM accuracy: 0.8963169297431136, Average SC accuracy: 0.8703703703703703, Average S2S accuracy: 0.9046451374109373
[2024-01-07 16:02:08.303688] Epoch: 0 Evalutation ends. Status: Average loss: 4.297547340393066, Average MLM accuracy: 0.8970976253298153, Average SC accuracy: 0.9166666666666666, Average S2S accuracy: 0.910271546635183


100%|██████████| 7/7 [00:03<00:00,  2.05it/s]
100%|██████████| 1/1 [00:00<00:00,  5.94it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:02:11.722557] Epoch: 1 training ends. Status: Average loss: 4.8463347639356344, Average MLM accuracy: 0.9015784586815228, Average SC accuracy: 0.9444444444444444, Average S2S accuracy: 0.9059868603682798
[2024-01-07 16:02:11.892335] Epoch: 1 Evalutation ends. Status: Average loss: 4.122052192687988, Average MLM accuracy: 0.9076517150395779, Average SC accuracy: 0.9166666666666666, Average S2S accuracy: 0.9169618260527351


100%|██████████| 7/7 [00:03<00:00,  2.02it/s]
100%|██████████| 1/1 [00:00<00:00,  5.84it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:02:15.360332] Epoch: 2 training ends. Status: Average loss: 5.001253979546683, Average MLM accuracy: 0.8870318786753327, Average SC accuracy: 0.9074074074074074, Average S2S accuracy: 0.9070047191635051
[2024-01-07 16:02:15.533057] Epoch: 2 Evalutation ends. Status: Average loss: 3.6195404529571533, Average MLM accuracy: 0.920844327176781, Average SC accuracy: 1.0, Average S2S accuracy: 0.9236521054702873


100%|██████████| 7/7 [00:03<00:00,  2.06it/s]
100%|██████████| 1/1 [00:00<00:00,  5.84it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:02:18.935656] Epoch: 3 training ends. Status: Average loss: 4.49456126349313, Average MLM accuracy: 0.889817393995667, Average SC accuracy: 0.9907407407407407, Average S2S accuracy: 0.9110298880355325
[2024-01-07 16:02:19.108062] Epoch: 3 Evalutation ends. Status: Average loss: 4.125542640686035, Average MLM accuracy: 0.8970976253298153, Average SC accuracy: 1.0, Average S2S accuracy: 0.9244391971664699


100%|██████████| 7/7 [00:03<00:00,  2.03it/s]
100%|██████████| 1/1 [00:00<00:00,  5.73it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:02:22.552176] Epoch: 4 training ends. Status: Average loss: 4.331792422703335, Average MLM accuracy: 0.8910554008047045, Average SC accuracy: 0.9907407407407407, Average S2S accuracy: 0.9134820024058481
[2024-01-07 16:02:22.728424] Epoch: 4 Evalutation ends. Status: Average loss: 3.080254077911377, Average MLM accuracy: 0.9287598944591029, Average SC accuracy: 1.0, Average S2S accuracy: 0.9244391971664699


100%|██████████| 7/7 [00:03<00:00,  2.06it/s]
100%|██████████| 1/1 [00:00<00:00,  5.54it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:02:26.132337] Epoch: 5 training ends. Status: Average loss: 3.91190436908177, Average MLM accuracy: 0.8941504178272981, Average SC accuracy: 1.0, Average S2S accuracy: 0.9166743777181456
[2024-01-07 16:02:26.319942] Epoch: 5 Evalutation ends. Status: Average loss: 3.1036007404327393, Average MLM accuracy: 0.920844327176781, Average SC accuracy: 1.0, Average S2S accuracy: 0.931129476584022


100%|██████████| 7/7 [00:03<00:00,  2.05it/s]
100%|██████████| 1/1 [00:00<00:00,  5.70it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:02:29.732945] Epoch: 6 training ends. Status: Average loss: 3.7383759362357005, Average MLM accuracy: 0.8950789229340761, Average SC accuracy: 1.0, Average S2S accuracy: 0.9203294161191821
[2024-01-07 16:02:29.909735] Epoch: 6 Evalutation ends. Status: Average loss: 2.6324660778045654, Average MLM accuracy: 0.9234828496042217, Average SC accuracy: 1.0, Average S2S accuracy: 0.9374262101534829


100%|██████████| 7/7 [00:03<00:00,  2.04it/s]
100%|██████████| 1/1 [00:00<00:00,  5.84it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:02:33.342825] Epoch: 7 training ends. Status: Average loss: 3.579082829611642, Average MLM accuracy: 0.8938409161250387, Average SC accuracy: 1.0, Average S2S accuracy: 0.9232441935782363
[2024-01-07 16:02:33.515373] Epoch: 7 Evalutation ends. Status: Average loss: 2.8242247104644775, Average MLM accuracy: 0.920844327176781, Average SC accuracy: 1.0, Average S2S accuracy: 0.9315230224321134


100%|██████████| 2/2 [00:04<00:00,  2.01s/it]


[INFO] 202302070551 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202302071150 is saved
[INFO] finish 202302071150
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202302071150/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202302071150.
[INFO] SimpleTransformerStack loaded for 202302071150.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202302071150.
[INFO] SimpleDecoderHead_S2S loaded for 202302071150.
[INFO] DistilBertEncoderHead_MLM loaded for 202302071150.


100%|██████████| 2/2 [00:00<00:00,  2.35it/s]
100%|██████████| 1/1 [00:00<00:00, 18.40it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:03:23.387056] Epoch: 0 training ends. Status: Average loss: 5.9195544719696045, Average MLM accuracy: 0.8441011235955056, Average SC accuracy: 1.0, Average S2S accuracy: 0.8791278577476714
[2024-01-07 16:03:23.442924] Epoch: 0 Evalutation ends. Status: Average loss: 11.206652641296387, Average MLM accuracy: 0.6491228070175439, Average SC accuracy: 1.0, Average S2S accuracy: 0.8567639257294429


100%|██████████| 2/2 [00:00<00:00,  2.39it/s]
100%|██████████| 1/1 [00:00<00:00, 15.77it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:03:24.282893] Epoch: 1 training ends. Status: Average loss: 5.784548044204712, Average MLM accuracy: 0.8581460674157303, Average SC accuracy: 1.0, Average S2S accuracy: 0.8831498729889924
[2024-01-07 16:03:24.347626] Epoch: 1 Evalutation ends. Status: Average loss: 6.508799076080322, Average MLM accuracy: 0.8771929824561403, Average SC accuracy: 1.0, Average S2S accuracy: 0.8647214854111406


100%|██████████| 2/2 [00:00<00:00,  2.40it/s]
100%|██████████| 1/1 [00:00<00:00, 15.24it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:03:25.181601] Epoch: 2 training ends. Status: Average loss: 6.769122362136841, Average MLM accuracy: 0.8132022471910112, Average SC accuracy: 1.0, Average S2S accuracy: 0.8742591024555462
[2024-01-07 16:03:25.248480] Epoch: 2 Evalutation ends. Status: Average loss: 9.721503257751465, Average MLM accuracy: 0.8070175438596491, Average SC accuracy: 1.0, Average S2S accuracy: 0.8726790450928382


100%|██████████| 2/2 [00:00<00:00,  2.36it/s]
100%|██████████| 1/1 [00:00<00:00, 15.15it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:03:26.097792] Epoch: 3 training ends. Status: Average loss: 5.922796964645386, Average MLM accuracy: 0.8328651685393258, Average SC accuracy: 1.0, Average S2S accuracy: 0.8774343776460627
[2024-01-07 16:03:26.165540] Epoch: 3 Evalutation ends. Status: Average loss: 8.113926887512207, Average MLM accuracy: 0.7894736842105263, Average SC accuracy: 1.0, Average S2S accuracy: 0.8859416445623343


100%|██████████| 2/2 [00:00<00:00,  2.35it/s]
100%|██████████| 1/1 [00:00<00:00, 15.12it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:03:27.017567] Epoch: 4 training ends. Status: Average loss: 6.081466436386108, Average MLM accuracy: 0.8441011235955056, Average SC accuracy: 1.0, Average S2S accuracy: 0.8712955122777307
[2024-01-07 16:03:27.085207] Epoch: 4 Evalutation ends. Status: Average loss: 5.95161771774292, Average MLM accuracy: 0.8596491228070176, Average SC accuracy: 1.0, Average S2S accuracy: 0.8992042440318302


100%|██████████| 2/2 [00:00<00:00,  2.39it/s]
100%|██████████| 1/1 [00:00<00:00, 18.50it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:03:27.925271] Epoch: 5 training ends. Status: Average loss: 6.041656255722046, Average MLM accuracy: 0.8469101123595506, Average SC accuracy: 1.0, Average S2S accuracy: 0.8810330228619814
[2024-01-07 16:03:27.981157] Epoch: 5 Evalutation ends. Status: Average loss: 7.699670791625977, Average MLM accuracy: 0.8421052631578947, Average SC accuracy: 1.0, Average S2S accuracy: 0.8806366047745358


100%|██████████| 2/2 [00:00<00:00,  2.36it/s]
100%|██████████| 1/1 [00:00<00:00, 15.31it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:03:28.829870] Epoch: 6 training ends. Status: Average loss: 5.9659013748168945, Average MLM accuracy: 0.8455056179775281, Average SC accuracy: 1.0, Average S2S accuracy: 0.8776460626587638
[2024-01-07 16:03:28.896588] Epoch: 6 Evalutation ends. Status: Average loss: 8.209550857543945, Average MLM accuracy: 0.7894736842105263, Average SC accuracy: 1.0, Average S2S accuracy: 0.8779840848806366


100%|██████████| 2/2 [00:00<00:00,  2.37it/s]
100%|██████████| 1/1 [00:00<00:00, 15.45it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

[2024-01-07 16:03:29.741091] Epoch: 7 training ends. Status: Average loss: 6.016155004501343, Average MLM accuracy: 0.8398876404494382, Average SC accuracy: 1.0, Average S2S accuracy: 0.8835732430143945
[2024-01-07 16:03:29.807268] Epoch: 7 Evalutation ends. Status: Average loss: 8.565337181091309, Average MLM accuracy: 0.7543859649122807, Average SC accuracy: 1.0, Average S2S accuracy: 0.883289124668435


100%|██████████| 16/16 [00:45<00:00,  2.86s/it]


[INFO] 202302071150 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202304102150 is saved
[INFO] finish 202304102150
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202304102150/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202304102150.
[INFO] SimpleTransformerStack loaded for 202304102150.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202304102150.
[INFO] SimpleDecoderHead_S2S loaded for 202304102150.
[INFO] DistilBertEncoderHead_MLM loaded for 202304102150.


100%|██████████| 24/24 [00:12<00:00,  1.99it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:05:04.314538] Epoch: 0 training ends. Status: Average loss: 12.17647365729014, Average MLM accuracy: 0.8266513309234308, Average SC accuracy: 0.4444444444444444, Average S2S accuracy: 0.847109265583921


100%|██████████| 3/3 [00:00<00:00,  5.29it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:05:04.883326] Epoch: 0 Evalutation ends. Status: Average loss: 9.966163476308187, Average MLM accuracy: 0.8329519450800915, Average SC accuracy: 0.5952380952380952, Average S2S accuracy: 0.8605368360936607


100%|██████████| 24/24 [00:12<00:00,  1.96it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:05:17.143667] Epoch: 1 training ends. Status: Average loss: 9.856812377770742, Average MLM accuracy: 0.8273907328294446, Average SC accuracy: 0.5608465608465608, Average S2S accuracy: 0.8513968402172044


100%|██████████| 3/3 [00:00<00:00,  5.18it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:05:17.724149] Epoch: 1 Evalutation ends. Status: Average loss: 8.83530330657959, Average MLM accuracy: 0.8299008390541571, Average SC accuracy: 0.5238095238095238, Average S2S accuracy: 0.8683038263849229


100%|██████████| 24/24 [00:12<00:00,  1.99it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:05:29.808899] Epoch: 2 training ends. Status: Average loss: 8.168728013833364, Average MLM accuracy: 0.8250903713440684, Average SC accuracy: 0.6746031746031746, Average S2S accuracy: 0.8572692203739649


100%|██████████| 3/3 [00:00<00:00,  5.13it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:05:30.395068] Epoch: 2 Evalutation ends. Status: Average loss: 7.414387543996175, Average MLM accuracy: 0.8360030511060259, Average SC accuracy: 0.7142857142857143, Average S2S accuracy: 0.8680753854940034


100%|██████████| 24/24 [00:12<00:00,  1.99it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:05:42.449603] Epoch: 3 training ends. Status: Average loss: 6.83220632870992, Average MLM accuracy: 0.8279658232007887, Average SC accuracy: 0.8650793650793651, Average S2S accuracy: 0.8619253544313128


100%|██████████| 3/3 [00:00<00:00,  5.23it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:05:43.025057] Epoch: 3 Evalutation ends. Status: Average loss: 5.988915125528972, Average MLM accuracy: 0.8535469107551488, Average SC accuracy: 0.8333333333333334, Average S2S accuracy: 0.8732153055396916


100%|██████████| 24/24 [00:12<00:00,  2.00it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:05:55.052618] Epoch: 4 training ends. Status: Average loss: 5.913630247116089, Average MLM accuracy: 0.835770621097601, Average SC accuracy: 0.9497354497354498, Average S2S accuracy: 0.869824811420428


100%|██████████| 3/3 [00:00<00:00,  5.26it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:05:55.626015] Epoch: 4 Evalutation ends. Status: Average loss: 5.5228250821431475, Average MLM accuracy: 0.8428680396643783, Average SC accuracy: 0.9523809523809523, Average S2S accuracy: 0.8776699029126214


100%|██████████| 24/24 [00:12<00:00,  1.98it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:06:07.748333] Epoch: 5 training ends. Status: Average loss: 5.338665266831716, Average MLM accuracy: 0.8423430824843904, Average SC accuracy: 0.9761904761904762, Average S2S accuracy: 0.8780928278336078


100%|██████████| 3/3 [00:00<00:00,  5.13it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:06:08.334355] Epoch: 5 Evalutation ends. Status: Average loss: 4.9347914059956866, Average MLM accuracy: 0.8466819221967964, Average SC accuracy: 0.9285714285714286, Average S2S accuracy: 0.8877213021130782


100%|██████████| 24/24 [00:12<00:00,  2.00it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:06:20.343661] Epoch: 6 training ends. Status: Average loss: 4.6863028307755785, Average MLM accuracy: 0.854337824515281, Average SC accuracy: 0.9841269841269841, Average S2S accuracy: 0.8873313840634905


100%|██████████| 3/3 [00:00<00:00,  5.20it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:06:20.923342] Epoch: 6 Evalutation ends. Status: Average loss: 4.4199090003967285, Average MLM accuracy: 0.8649885583524027, Average SC accuracy: 0.9761904761904762, Average S2S accuracy: 0.8970873786407767


100%|██████████| 24/24 [00:11<00:00,  2.01it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:06:32.889942] Epoch: 7 training ends. Status: Average loss: 4.3330472608407335, Average MLM accuracy: 0.8514623726585606, Average SC accuracy: 0.9973544973544973, Average S2S accuracy: 0.8941988746652252


100%|██████████| 3/3 [00:00<00:00,  5.18it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 16:06:33.471236] Epoch: 7 Evalutation ends. Status: Average loss: 4.00932240486145, Average MLM accuracy: 0.8558352402745996, Average SC accuracy: 1.0, Average S2S accuracy: 0.9009708737864077


100%|██████████| 11/11 [00:39<00:00,  3.64s/it]


[INFO] 202304102150 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202304110353 is saved
[INFO] finish 202304110353
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202304110353/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202304110353.
[INFO] SimpleTransformerStack loaded for 202304110353.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202304110353.
[INFO] SimpleDecoderHead_S2S loaded for 202304110353.
[INFO] DistilBertEncoderHead_MLM loaded for 202304110353.


100%|██████████| 24/24 [00:12<00:00,  1.98it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:08:02.109602] Epoch: 0 training ends. Status: Average loss: 8.908516128857931, Average MLM accuracy: 0.8189195455599351, Average SC accuracy: 0.5052910052910053, Average S2S accuracy: 0.8443981963232744


100%|██████████| 3/3 [00:00<00:00,  4.88it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:08:02.726391] Epoch: 0 Evalutation ends. Status: Average loss: 7.800165971120198, Average MLM accuracy: 0.8404040404040404, Average SC accuracy: 0.5476190476190477, Average S2S accuracy: 0.8526453429893381


100%|██████████| 24/24 [00:12<00:00,  1.98it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:08:14.841229] Epoch: 1 training ends. Status: Average loss: 7.707891205946605, Average MLM accuracy: 0.8119638302805472, Average SC accuracy: 0.6957671957671958, Average S2S accuracy: 0.848329286622731


100%|██████████| 3/3 [00:00<00:00,  4.95it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:08:15.449137] Epoch: 1 Evalutation ends. Status: Average loss: 6.4831366539001465, Average MLM accuracy: 0.8262626262626263, Average SC accuracy: 0.7142857142857143, Average S2S accuracy: 0.8652182659424663


100%|██████████| 24/24 [00:12<00:00,  1.97it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:08:27.605041] Epoch: 2 training ends. Status: Average loss: 6.509627282619476, Average MLM accuracy: 0.8203106886158127, Average SC accuracy: 0.8412698412698413, Average S2S accuracy: 0.8583188807954677


100%|██████████| 3/3 [00:00<00:00,  4.94it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:08:28.214362] Epoch: 2 Evalutation ends. Status: Average loss: 5.21148665746053, Average MLM accuracy: 0.8363636363636363, Average SC accuracy: 0.8809523809523809, Average S2S accuracy: 0.8715550191108429


100%|██████████| 24/24 [00:12<00:00,  1.95it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:08:40.509783] Epoch: 3 training ends. Status: Average loss: 5.545106967290242, Average MLM accuracy: 0.8305124043589149, Average SC accuracy: 0.9523809523809523, Average S2S accuracy: 0.866874783211932


100%|██████████| 3/3 [00:00<00:00,  4.96it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:08:41.115908] Epoch: 3 Evalutation ends. Status: Average loss: 4.677212238311768, Average MLM accuracy: 0.8444444444444444, Average SC accuracy: 1.0, Average S2S accuracy: 0.8797022731844699


100%|██████████| 24/24 [00:12<00:00,  1.97it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:08:53.272866] Epoch: 4 training ends. Status: Average loss: 4.9208309451739, Average MLM accuracy: 0.8397866913980988, Average SC accuracy: 0.9947089947089947, Average S2S accuracy: 0.8755925540524916


100%|██████████| 3/3 [00:00<00:00,  5.09it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:08:53.867518] Epoch: 4 Evalutation ends. Status: Average loss: 4.332878589630127, Average MLM accuracy: 0.8336700336700337, Average SC accuracy: 1.0, Average S2S accuracy: 0.8876483604908469


100%|██████████| 24/24 [00:12<00:00,  1.98it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:09:05.988919] Epoch: 5 training ends. Status: Average loss: 4.517930716276169, Average MLM accuracy: 0.8436509776644254, Average SC accuracy: 0.9920634920634921, Average S2S accuracy: 0.8884957798589432


100%|██████████| 3/3 [00:00<00:00,  4.96it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:09:06.595605] Epoch: 5 Evalutation ends. Status: Average loss: 4.1666271686553955, Average MLM accuracy: 0.857912457912458, Average SC accuracy: 1.0, Average S2S accuracy: 0.8955944477972239


100%|██████████| 24/24 [00:12<00:00,  1.96it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:09:18.814545] Epoch: 6 training ends. Status: Average loss: 3.9831136663754783, Average MLM accuracy: 0.860731122961589, Average SC accuracy: 0.9973544973544973, Average S2S accuracy: 0.8954676841253324


100%|██████████| 3/3 [00:00<00:00,  4.90it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2024-01-07 16:09:19.427821] Epoch: 6 Evalutation ends. Status: Average loss: 3.7073825200398765, Average MLM accuracy: 0.8565656565656565, Average SC accuracy: 1.0, Average S2S accuracy: 0.9027358680346007


100%|██████████| 24/24 [00:12<00:00,  1.98it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2024-01-07 16:09:31.578946] Epoch: 7 training ends. Status: Average loss: 3.577641357978185, Average MLM accuracy: 0.8690779812968544, Average SC accuracy: 1.0, Average S2S accuracy: 0.9039773384206267


100%|██████████| 3/3 [00:00<00:00,  4.96it/s]
  0%|          | 0/26 [00:00<?, ?it/s]

[2024-01-07 16:09:32.185486] Epoch: 7 Evalutation ends. Status: Average loss: 3.290250062942505, Average MLM accuracy: 0.868013468013468, Average SC accuracy: 1.0, Average S2S accuracy: 0.9104807885737276


100%|██████████| 26/26 [01:16<00:00,  2.96s/it]


[INFO] 202304110353 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202304110953 is saved
[INFO] finish 202304110953
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202304110953/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202304110953.
[INFO] SimpleTransformerStack loaded for 202304110953.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202304110953.
[INFO] SimpleDecoderHead_S2S loaded for 202304110953.
[INFO] DistilBertEncoderHead_MLM loaded for 202304110953.


100%|██████████| 68/68 [00:34<00:00,  1.97it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 16:11:51.879323] Epoch: 0 training ends. Status: Average loss: 7.151860689415651, Average MLM accuracy: 0.8492061323203891, Average SC accuracy: 0.549074074074074, Average S2S accuracy: 0.8765365067053132


100%|██████████| 8/8 [00:01<00:00,  4.75it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 16:11:53.565590] Epoch: 0 Evalutation ends. Status: Average loss: 5.550716400146484, Average MLM accuracy: 0.8520420640743458, Average SC accuracy: 0.8083333333333333, Average S2S accuracy: 0.8820132314777587


100%|██████████| 68/68 [00:34<00:00,  1.97it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 16:12:28.151921] Epoch: 1 training ends. Status: Average loss: 4.523497532395756, Average MLM accuracy: 0.8606564096958434, Average SC accuracy: 0.9074074074074074, Average S2S accuracy: 0.8926251760527444


100%|██████████| 8/8 [00:01<00:00,  4.70it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 16:12:29.856441] Epoch: 1 Evalutation ends. Status: Average loss: 4.007551610469818, Average MLM accuracy: 0.8654927855221326, Average SC accuracy: 0.9666666666666667, Average S2S accuracy: 0.8987901604590811


100%|██████████| 68/68 [00:34<00:00,  1.98it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 16:13:04.282727] Epoch: 2 training ends. Status: Average loss: 3.400945849278394, Average MLM accuracy: 0.8751947093706447, Average SC accuracy: 0.9833333333333333, Average S2S accuracy: 0.9096038047804699


100%|██████████| 8/8 [00:01<00:00,  4.71it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 16:13:05.982679] Epoch: 2 Evalutation ends. Status: Average loss: 3.170511841773987, Average MLM accuracy: 0.8811445341159208, Average SC accuracy: 0.9666666666666667, Average S2S accuracy: 0.9160422530063234


100%|██████████| 68/68 [00:34<00:00,  1.97it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 16:13:40.464388] Epoch: 3 training ends. Status: Average loss: 2.6841647660031036, Average MLM accuracy: 0.8890771459022218, Average SC accuracy: 0.9972222222222222, Average S2S accuracy: 0.9237941662754383


100%|██████████| 8/8 [00:01<00:00,  4.76it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 16:13:42.145476] Epoch: 3 Evalutation ends. Status: Average loss: 2.54176789522171, Average MLM accuracy: 0.8953289312790413, Average SC accuracy: 0.9916666666666667, Average S2S accuracy: 0.9249607076282028


100%|██████████| 68/68 [00:34<00:00,  1.97it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 16:14:16.700505] Epoch: 4 training ends. Status: Average loss: 2.1791300282758823, Average MLM accuracy: 0.901784494302189, Average SC accuracy: 0.9981481481481481, Average S2S accuracy: 0.9358902655589802


100%|██████████| 8/8 [00:01<00:00,  4.79it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 16:14:18.374651] Epoch: 4 Evalutation ends. Status: Average loss: 2.031500071287155, Average MLM accuracy: 0.9117143555881634, Average SC accuracy: 1.0, Average S2S accuracy: 0.9354874081655031


100%|██████████| 68/68 [00:34<00:00,  1.98it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 16:14:52.726107] Epoch: 5 training ends. Status: Average loss: 1.8656835608622606, Average MLM accuracy: 0.9120596835460334, Average SC accuracy: 0.9990740740740741, Average S2S accuracy: 0.943818252332061


100%|██████████| 8/8 [00:01<00:00,  4.76it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 16:14:54.408142] Epoch: 5 Evalutation ends. Status: Average loss: 1.7879390865564346, Average MLM accuracy: 0.9207630227439472, Average SC accuracy: 1.0, Average S2S accuracy: 0.9391425125187324


100%|██████████| 68/68 [00:34<00:00,  1.97it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 16:15:29.004491] Epoch: 6 training ends. Status: Average loss: 1.6864201058359707, Average MLM accuracy: 0.9176891755253737, Average SC accuracy: 0.9981481481481481, Average S2S accuracy: 0.9483333673531873


100%|██████████| 8/8 [00:01<00:00,  4.82it/s]
  0%|          | 0/68 [00:00<?, ?it/s]

[2024-01-07 16:15:30.667446] Epoch: 6 Evalutation ends. Status: Average loss: 1.6219951808452606, Average MLM accuracy: 0.9293225727561751, Average SC accuracy: 1.0, Average S2S accuracy: 0.9439672502649951


100%|██████████| 68/68 [00:34<00:00,  1.97it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 16:16:05.244066] Epoch: 7 training ends. Status: Average loss: 1.463145582114949, Average MLM accuracy: 0.9234279780285847, Average SC accuracy: 1.0, Average S2S accuracy: 0.9543303872139781


100%|██████████| 8/8 [00:01<00:00,  4.72it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:16:06.939069] Epoch: 7 Evalutation ends. Status: Average loss: 1.5368415862321854, Average MLM accuracy: 0.9276106627537295, Average SC accuracy: 1.0, Average S2S accuracy: 0.9504733360137432


100%|██████████| 6/6 [00:21<00:00,  3.59s/it]


[INFO] 202304110953 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202304140402 is saved
[INFO] finish 202304140402
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202304140402/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202304140402.
[INFO] SimpleTransformerStack loaded for 202304140402.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202304140402.
[INFO] SimpleDecoderHead_S2S loaded for 202304140402.
[INFO] DistilBertEncoderHead_MLM loaded for 202304140402.


100%|██████████| 6/6 [00:02<00:00,  2.25it/s]
100%|██████████| 1/1 [00:00<00:00,  7.27it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:17:15.989637] Epoch: 0 training ends. Status: Average loss: 5.916920026143392, Average MLM accuracy: 0.8463810930576071, Average SC accuracy: 0.5061728395061729, Average S2S accuracy: 0.8705531961833324
[2024-01-07 16:17:16.128670] Epoch: 0 Evalutation ends. Status: Average loss: 7.563488960266113, Average MLM accuracy: 0.8571428571428571, Average SC accuracy: 0.4444444444444444, Average S2S accuracy: 0.8774954627949183


100%|██████████| 6/6 [00:02<00:00,  2.25it/s]
100%|██████████| 1/1 [00:00<00:00,  7.00it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:17:18.794998] Epoch: 1 training ends. Status: Average loss: 5.745885411898295, Average MLM accuracy: 0.8452732644017725, Average SC accuracy: 0.6049382716049383, Average S2S accuracy: 0.8722078208593017
[2024-01-07 16:17:18.939549] Epoch: 1 Evalutation ends. Status: Average loss: 6.502096176147461, Average MLM accuracy: 0.8571428571428571, Average SC accuracy: 0.4444444444444444, Average S2S accuracy: 0.8747731397459165


100%|██████████| 6/6 [00:02<00:00,  2.23it/s]
100%|██████████| 1/1 [00:00<00:00,  6.70it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:17:21.631519] Epoch: 2 training ends. Status: Average loss: 5.078354199727376, Average MLM accuracy: 0.8489660265878878, Average SC accuracy: 0.7160493827160493, Average S2S accuracy: 0.8711598918978545
[2024-01-07 16:17:21.782300] Epoch: 2 Evalutation ends. Status: Average loss: 5.988649368286133, Average MLM accuracy: 0.8480243161094225, Average SC accuracy: 0.5555555555555556, Average S2S accuracy: 0.8779491833030852


100%|██████████| 6/6 [00:02<00:00,  2.24it/s]
100%|██████████| 1/1 [00:00<00:00,  6.96it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:17:24.465775] Epoch: 3 training ends. Status: Average loss: 4.9029635190963745, Average MLM accuracy: 0.8474889217134417, Average SC accuracy: 0.8271604938271605, Average S2S accuracy: 0.874027908002868
[2024-01-07 16:17:24.610683] Epoch: 3 Evalutation ends. Status: Average loss: 4.271271228790283, Average MLM accuracy: 0.8723404255319149, Average SC accuracy: 0.6666666666666666, Average S2S accuracy: 0.8779491833030852


100%|██████████| 6/6 [00:02<00:00,  2.23it/s]
100%|██████████| 1/1 [00:00<00:00,  6.96it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:17:27.297610] Epoch: 4 training ends. Status: Average loss: 4.4083198706309, Average MLM accuracy: 0.8600443131462334, Average SC accuracy: 0.8765432098765432, Average S2S accuracy: 0.8775577739782693
[2024-01-07 16:17:27.442798] Epoch: 4 Evalutation ends. Status: Average loss: 4.269934177398682, Average MLM accuracy: 0.8814589665653495, Average SC accuracy: 0.7777777777777778, Average S2S accuracy: 0.8915607985480943


100%|██████████| 6/6 [00:02<00:00,  2.23it/s]
100%|██████████| 1/1 [00:00<00:00,  6.99it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:17:30.130525] Epoch: 5 training ends. Status: Average loss: 3.8052829106648765, Average MLM accuracy: 0.8607828655834564, Average SC accuracy: 0.9753086419753086, Average S2S accuracy: 0.880536098395014
[2024-01-07 16:17:30.275136] Epoch: 5 Evalutation ends. Status: Average loss: 3.835242986679077, Average MLM accuracy: 0.8723404255319149, Average SC accuracy: 0.8888888888888888, Average S2S accuracy: 0.882940108892922


100%|██████████| 6/6 [00:02<00:00,  2.26it/s]
100%|██████████| 1/1 [00:00<00:00,  6.90it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:17:32.937081] Epoch: 6 training ends. Status: Average loss: 3.971314867337545, Average MLM accuracy: 0.854135893648449, Average SC accuracy: 1.0, Average S2S accuracy: 0.883183497876565
[2024-01-07 16:17:33.083104] Epoch: 6 Evalutation ends. Status: Average loss: 3.878369092941284, Average MLM accuracy: 0.8541033434650456, Average SC accuracy: 1.0, Average S2S accuracy: 0.8874773139745916


100%|██████████| 6/6 [00:02<00:00,  2.26it/s]
100%|██████████| 1/1 [00:00<00:00,  7.02it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2024-01-07 16:17:35.736716] Epoch: 7 training ends. Status: Average loss: 3.8267316619555154, Average MLM accuracy: 0.8497045790251108, Average SC accuracy: 1.0, Average S2S accuracy: 0.8872649054106226
[2024-01-07 16:17:35.880928] Epoch: 7 Evalutation ends. Status: Average loss: 3.5873401165008545, Average MLM accuracy: 0.8875379939209727, Average SC accuracy: 1.0, Average S2S accuracy: 0.8888384754990926


100%|██████████| 5/5 [00:14<00:00,  3.00s/it]


[INFO] 202304140402 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202304191023 is saved
[INFO] finish 202304191023
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202304191023/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202304191023.
[INFO] SimpleTransformerStack loaded for 202304191023.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202304191023.
[INFO] SimpleDecoderHead_S2S loaded for 202304191023.
[INFO] DistilBertEncoderHead_MLM loaded for 202304191023.


100%|██████████| 6/6 [00:02<00:00,  2.24it/s]
100%|██████████| 1/1 [00:00<00:00,  7.16it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:18:25.259343] Epoch: 0 training ends. Status: Average loss: 11.420117060343424, Average MLM accuracy: 0.8278908951210142, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.8582003902215081
[2024-01-07 16:18:25.401016] Epoch: 0 Evalutation ends. Status: Average loss: 9.9931001663208, Average MLM accuracy: 0.8590604026845637, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.8823232323232323


100%|██████████| 6/6 [00:02<00:00,  2.27it/s]
100%|██████████| 1/1 [00:00<00:00,  7.22it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:18:28.040633] Epoch: 1 training ends. Status: Average loss: 11.046744108200073, Average MLM accuracy: 0.8282750672301191, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.860954894984506
[2024-01-07 16:18:28.180524] Epoch: 1 Evalutation ends. Status: Average loss: 9.360074996948242, Average MLM accuracy: 0.8590604026845637, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.8782828282828283


100%|██████████| 6/6 [00:02<00:00,  2.29it/s]
100%|██████████| 1/1 [00:00<00:00,  7.45it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:18:30.796885] Epoch: 2 training ends. Status: Average loss: 10.908458391825357, Average MLM accuracy: 0.8194391087207069, Average SC accuracy: 0.32098765432098764, Average S2S accuracy: 0.8584873178009871
[2024-01-07 16:18:30.932249] Epoch: 2 Evalutation ends. Status: Average loss: 8.74905014038086, Average MLM accuracy: 0.8657718120805369, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.8782828282828283


100%|██████████| 6/6 [00:02<00:00,  2.25it/s]
100%|██████████| 1/1 [00:00<00:00,  6.80it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:18:33.596625] Epoch: 3 training ends. Status: Average loss: 10.871202945709229, Average MLM accuracy: 0.8044563964656166, Average SC accuracy: 0.2962962962962963, Average S2S accuracy: 0.8613565935957764
[2024-01-07 16:18:33.745576] Epoch: 3 Evalutation ends. Status: Average loss: 7.5669660568237305, Average MLM accuracy: 0.8691275167785235, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.8747474747474747


100%|██████████| 6/6 [00:02<00:00,  2.27it/s]
100%|██████████| 1/1 [00:00<00:00,  7.47it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:18:36.393554] Epoch: 4 training ends. Status: Average loss: 9.724324385325113, Average MLM accuracy: 0.8332693046484825, Average SC accuracy: 0.2716049382716049, Average S2S accuracy: 0.862561689429588
[2024-01-07 16:18:36.528337] Epoch: 4 Evalutation ends. Status: Average loss: 6.816292762756348, Average MLM accuracy: 0.8154362416107382, Average SC accuracy: 0.4444444444444444, Average S2S accuracy: 0.8888888888888888


100%|██████████| 6/6 [00:02<00:00,  2.27it/s]
100%|██████████| 1/1 [00:00<00:00,  7.39it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:18:39.172812] Epoch: 5 training ends. Status: Average loss: 7.595500548680623, Average MLM accuracy: 0.8593930080676143, Average SC accuracy: 0.25925925925925924, Average S2S accuracy: 0.864455411454149
[2024-01-07 16:18:39.310068] Epoch: 5 Evalutation ends. Status: Average loss: 5.282875061035156, Average MLM accuracy: 0.8590604026845637, Average SC accuracy: 0.7777777777777778, Average S2S accuracy: 0.8772727272727273


100%|██████████| 6/6 [00:02<00:00,  2.28it/s]
100%|██████████| 1/1 [00:00<00:00,  7.51it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:18:41.938009] Epoch: 6 training ends. Status: Average loss: 7.1385438442230225, Average MLM accuracy: 0.822512485593546, Average SC accuracy: 0.4444444444444444, Average S2S accuracy: 0.8672099162171468
[2024-01-07 16:18:42.072248] Epoch: 6 Evalutation ends. Status: Average loss: 5.008134365081787, Average MLM accuracy: 0.8456375838926175, Average SC accuracy: 0.6666666666666666, Average S2S accuracy: 0.8858585858585859


100%|██████████| 6/6 [00:02<00:00,  2.28it/s]
100%|██████████| 1/1 [00:00<00:00,  7.68it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2024-01-07 16:18:44.706862] Epoch: 7 training ends. Status: Average loss: 5.704757769902547, Average MLM accuracy: 0.8467153284671532, Average SC accuracy: 0.7037037037037037, Average S2S accuracy: 0.8692184092734994
[2024-01-07 16:18:44.839429] Epoch: 7 Evalutation ends. Status: Average loss: 3.8672726154327393, Average MLM accuracy: 0.87248322147651, Average SC accuracy: 0.8888888888888888, Average S2S accuracy: 0.8858585858585859


100%|██████████| 8/8 [00:17<00:00,  2.19s/it]


[INFO] 202304191023 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202304270449 is saved
[INFO] finish 202304270449
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202304270449/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202304270449.
[INFO] SimpleTransformerStack loaded for 202304270449.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202304270449.
[INFO] SimpleDecoderHead_S2S loaded for 202304270449.
[INFO] DistilBertEncoderHead_MLM loaded for 202304270449.


100%|██████████| 12/12 [00:06<00:00,  1.99it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:19:41.928076] Epoch: 0 training ends. Status: Average loss: 3.7009155551592507, Average MLM accuracy: 0.8884950048340315, Average SC accuracy: 0.8835978835978836, Average S2S accuracy: 0.9063981327750908


100%|██████████| 2/2 [00:00<00:00,  6.56it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

[2024-01-07 16:19:42.234722] Epoch: 0 Evalutation ends. Status: Average loss: 3.270758271217346, Average MLM accuracy: 0.8867924528301887, Average SC accuracy: 0.9047619047619048, Average S2S accuracy: 0.9073191857947164


100%|██████████| 12/12 [00:05<00:00,  2.01it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:19:48.203086] Epoch: 1 training ends. Status: Average loss: 3.418259620666504, Average MLM accuracy: 0.8910731550112794, Average SC accuracy: 0.9365079365079365, Average S2S accuracy: 0.9078899877282899


100%|██████████| 2/2 [00:00<00:00,  6.85it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

[2024-01-07 16:19:48.496703] Epoch: 1 Evalutation ends. Status: Average loss: 2.887826085090637, Average MLM accuracy: 0.8896952104499274, Average SC accuracy: 1.0, Average S2S accuracy: 0.9116500649631876


100%|██████████| 12/12 [00:06<00:00,  1.99it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:19:54.525829] Epoch: 2 training ends. Status: Average loss: 2.9984677036603293, Average MLM accuracy: 0.8926844988720593, Average SC accuracy: 0.9735449735449735, Average S2S accuracy: 0.9133039774778027


100%|██████████| 2/2 [00:00<00:00,  6.54it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

[2024-01-07 16:19:54.833510] Epoch: 2 Evalutation ends. Status: Average loss: 2.7494207620620728, Average MLM accuracy: 0.8940493468795355, Average SC accuracy: 1.0, Average S2S accuracy: 0.9174967518406236


100%|██████████| 12/12 [00:05<00:00,  2.00it/s]
 50%|█████     | 1/2 [00:00<00:00,  5.03it/s]

[2024-01-07 16:20:00.829750] Epoch: 3 training ends. Status: Average loss: 2.9514119227727256, Average MLM accuracy: 0.8896229455365775, Average SC accuracy: 1.0, Average S2S accuracy: 0.9188142159339734


100%|██████████| 2/2 [00:00<00:00,  7.42it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

[2024-01-07 16:20:01.101453] Epoch: 3 Evalutation ends. Status: Average loss: 2.6809641122817993, Average MLM accuracy: 0.9042089985486212, Average SC accuracy: 1.0, Average S2S accuracy: 0.9174967518406236


100%|██████████| 12/12 [00:06<00:00,  1.98it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:20:07.149990] Epoch: 4 training ends. Status: Average loss: 2.5005058149496713, Average MLM accuracy: 0.9031582339671286, Average SC accuracy: 0.9947089947089947, Average S2S accuracy: 0.9254313145167112


100%|██████████| 2/2 [00:00<00:00,  6.01it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

[2024-01-07 16:20:07.484451] Epoch: 4 Evalutation ends. Status: Average loss: 2.3373918533325195, Average MLM accuracy: 0.888243831640058, Average SC accuracy: 1.0, Average S2S accuracy: 0.9272412299696838


100%|██████████| 12/12 [00:05<00:00,  2.04it/s]
 50%|█████     | 1/2 [00:00<00:00,  5.23it/s]

[2024-01-07 16:20:13.367616] Epoch: 5 training ends. Status: Average loss: 2.407801737387975, Average MLM accuracy: 0.9089590718659362, Average SC accuracy: 0.9947089947089947, Average S2S accuracy: 0.9282706513631223


100%|██████████| 2/2 [00:00<00:00,  7.59it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

[2024-01-07 16:20:13.632853] Epoch: 5 Evalutation ends. Status: Average loss: 2.4335598945617676, Average MLM accuracy: 0.8998548621190131, Average SC accuracy: 1.0, Average S2S accuracy: 0.9268081420528367


100%|██████████| 12/12 [00:05<00:00,  2.04it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:20:19.527466] Epoch: 6 training ends. Status: Average loss: 2.3675299982229867, Average MLM accuracy: 0.897357396068321, Average SC accuracy: 1.0, Average S2S accuracy: 0.9316874804494815


100%|██████████| 2/2 [00:00<00:00,  6.65it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

[2024-01-07 16:20:19.829513] Epoch: 6 Evalutation ends. Status: Average loss: 2.4097471237182617, Average MLM accuracy: 0.8984034833091437, Average SC accuracy: 1.0, Average S2S accuracy: 0.9289735816370723


100%|██████████| 12/12 [00:05<00:00,  2.03it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:20:25.754721] Epoch: 7 training ends. Status: Average loss: 2.1347878177960715, Average MLM accuracy: 0.9023525620367386, Average SC accuracy: 1.0, Average S2S accuracy: 0.9360667966024206


100%|██████████| 2/2 [00:00<00:00,  6.58it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:20:26.060344] Epoch: 7 Evalutation ends. Status: Average loss: 2.0787758827209473, Average MLM accuracy: 0.9071117561683599, Average SC accuracy: 1.0, Average S2S accuracy: 0.9361195322650498


100%|██████████| 7/7 [00:21<00:00,  3.05s/it]


[INFO] 202304270449 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202305091133 is saved
[INFO] finish 202305091133
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202305091133/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202305091133.
[INFO] SimpleTransformerStack loaded for 202305091133.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202305091133.
[INFO] SimpleDecoderHead_S2S loaded for 202305091133.
[INFO] DistilBertEncoderHead_MLM loaded for 202305091133.


100%|██████████| 13/13 [00:06<00:00,  2.08it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:21:27.414055] Epoch: 0 training ends. Status: Average loss: 6.374896452977107, Average MLM accuracy: 0.8818966778867262, Average SC accuracy: 0.35960591133004927, Average S2S accuracy: 0.9035699451282163


100%|██████████| 2/2 [00:00<00:00,  7.03it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:21:27.700031] Epoch: 0 Evalutation ends. Status: Average loss: 5.823512077331543, Average MLM accuracy: 0.8815612382234186, Average SC accuracy: 0.3181818181818182, Average S2S accuracy: 0.9081776170383765


100%|██████████| 13/13 [00:06<00:00,  2.03it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:21:34.107271] Epoch: 1 training ends. Status: Average loss: 5.187028719828679, Average MLM accuracy: 0.8716522757207669, Average SC accuracy: 0.4630541871921182, Average S2S accuracy: 0.906127713529939


100%|██████████| 2/2 [00:00<00:00,  6.36it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:21:34.423988] Epoch: 1 Evalutation ends. Status: Average loss: 3.7830151319503784, Average MLM accuracy: 0.9004037685060565, Average SC accuracy: 0.6363636363636364, Average S2S accuracy: 0.9166164356037774


100%|██████████| 13/13 [00:06<00:00,  2.05it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:21:40.766755] Epoch: 2 training ends. Status: Average loss: 3.9182595106271596, Average MLM accuracy: 0.8799941460559052, Average SC accuracy: 0.6995073891625616, Average S2S accuracy: 0.910434383402925


100%|██████████| 2/2 [00:00<00:00,  6.20it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:21:41.090493] Epoch: 2 Evalutation ends. Status: Average loss: 3.7189953327178955, Average MLM accuracy: 0.8613728129205922, Average SC accuracy: 0.8636363636363636, Average S2S accuracy: 0.9158127385975487


100%|██████████| 13/13 [00:06<00:00,  2.01it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:21:47.546133] Epoch: 3 training ends. Status: Average loss: 3.2894187707167406, Average MLM accuracy: 0.8824820722962096, Average SC accuracy: 0.9014778325123153, Average S2S accuracy: 0.9120739785322345


100%|██████████| 2/2 [00:00<00:00,  6.20it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:21:47.870210] Epoch: 3 Evalutation ends. Status: Average loss: 2.6204771995544434, Average MLM accuracy: 0.9084791386271871, Average SC accuracy: 0.9545454545454546, Average S2S accuracy: 0.918625678119349


100%|██████████| 13/13 [00:06<00:00,  2.02it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:21:54.301131] Epoch: 4 training ends. Status: Average loss: 2.9602525601020226, Average MLM accuracy: 0.8824820722962096, Average SC accuracy: 0.9901477832512315, Average S2S accuracy: 0.9151782786437269


100%|██████████| 2/2 [00:00<00:00,  6.38it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:21:54.616385] Epoch: 4 Evalutation ends. Status: Average loss: 2.5711669325828552, Average MLM accuracy: 0.9165545087483177, Average SC accuracy: 0.9090909090909091, Average S2S accuracy: 0.9252561784207354


100%|██████████| 13/13 [00:06<00:00,  2.03it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:22:01.031093] Epoch: 5 training ends. Status: Average loss: 2.7085205775040846, Average MLM accuracy: 0.8877506219815601, Average SC accuracy: 0.9901477832512315, Average S2S accuracy: 0.9217147978925737


100%|██████████| 2/2 [00:00<00:00,  5.96it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:22:01.368458] Epoch: 5 Evalutation ends. Status: Average loss: 2.4492196440696716, Average MLM accuracy: 0.9111709286675639, Average SC accuracy: 0.9545454545454546, Average S2S accuracy: 0.9318866787221217


100%|██████████| 13/13 [00:06<00:00,  2.02it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:22:07.810969] Epoch: 6 training ends. Status: Average loss: 2.6607452722696157, Average MLM accuracy: 0.8858480901507391, Average SC accuracy: 0.9950738916256158, Average S2S accuracy: 0.9240102310736069


100%|██████████| 2/2 [00:00<00:00,  6.53it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:22:08.118940] Epoch: 6 Evalutation ends. Status: Average loss: 2.5089093446731567, Average MLM accuracy: 0.8909825033647375, Average SC accuracy: 1.0, Average S2S accuracy: 0.9300783604581073


100%|██████████| 13/13 [00:06<00:00,  2.02it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:22:14.547853] Epoch: 7 training ends. Status: Average loss: 2.4001229909750132, Average MLM accuracy: 0.8979950241475194, Average SC accuracy: 0.9950738916256158, Average S2S accuracy: 0.9273112826006165


100%|██████████| 2/2 [00:00<00:00,  6.20it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

[2024-01-07 16:22:14.871635] Epoch: 7 Evalutation ends. Status: Average loss: 2.035104811191559, Average MLM accuracy: 0.9125168236877523, Average SC accuracy: 1.0, Average S2S accuracy: 0.9330922242314648


100%|██████████| 15/15 [00:33<00:00,  2.24s/it]


[INFO] 202305091133 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202305221222 is saved
[INFO] finish 202305221222
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202305221222/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202305221222.
[INFO] SimpleTransformerStack loaded for 202305221222.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202305221222.
[INFO] SimpleDecoderHead_S2S loaded for 202305221222.
[INFO] DistilBertEncoderHead_MLM loaded for 202305221222.


100%|██████████| 28/28 [00:14<00:00,  1.97it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2024-01-07 16:23:34.314518] Epoch: 0 training ends. Status: Average loss: 11.677641153335571, Average MLM accuracy: 0.8235585076314302, Average SC accuracy: 0.28699551569506726, Average S2S accuracy: 0.8468710075915152


100%|██████████| 4/4 [00:00<00:00,  5.53it/s]
  0%|          | 0/28 [00:00<?, ?it/s]

[2024-01-07 16:23:35.038880] Epoch: 0 Evalutation ends. Status: Average loss: 8.49845266342163, Average MLM accuracy: 0.8103674540682415, Average SC accuracy: 0.5510204081632653, Average S2S accuracy: 0.8522727272727273


100%|██████████| 28/28 [00:14<00:00,  1.98it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2024-01-07 16:23:49.184051] Epoch: 1 training ends. Status: Average loss: 8.75386667251587, Average MLM accuracy: 0.8196014697569248, Average SC accuracy: 0.6121076233183856, Average S2S accuracy: 0.8536072895439811


100%|██████████| 4/4 [00:00<00:00,  5.44it/s]
  0%|          | 0/28 [00:00<?, ?it/s]

[2024-01-07 16:23:49.921568] Epoch: 1 Evalutation ends. Status: Average loss: 6.667018413543701, Average MLM accuracy: 0.8116797900262467, Average SC accuracy: 0.7346938775510204, Average S2S accuracy: 0.8575626959247649


100%|██████████| 28/28 [00:14<00:00,  1.98it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2024-01-07 16:24:04.046441] Epoch: 2 training ends. Status: Average loss: 6.957707047462463, Average MLM accuracy: 0.823063877897117, Average SC accuracy: 0.8789237668161435, Average S2S accuracy: 0.8637011540370179


100%|██████████| 4/4 [00:00<00:00,  5.80it/s]
  0%|          | 0/28 [00:00<?, ?it/s]

[2024-01-07 16:24:04.737870] Epoch: 2 Evalutation ends. Status: Average loss: 5.5328062772750854, Average MLM accuracy: 0.8313648293963255, Average SC accuracy: 0.9591836734693877, Average S2S accuracy: 0.8720611285266457


100%|██████████| 28/28 [00:14<00:00,  1.98it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2024-01-07 16:24:18.881075] Epoch: 3 training ends. Status: Average loss: 5.796312161854336, Average MLM accuracy: 0.8379027699265121, Average SC accuracy: 0.9484304932735426, Average S2S accuracy: 0.8740906547285954


100%|██████████| 4/4 [00:00<00:00,  5.58it/s]
  0%|          | 0/28 [00:00<?, ?it/s]

[2024-01-07 16:24:19.599913] Epoch: 3 Evalutation ends. Status: Average loss: 4.659641087055206, Average MLM accuracy: 0.8346456692913385, Average SC accuracy: 0.9183673469387755, Average S2S accuracy: 0.8787225705329154


100%|██████████| 28/28 [00:13<00:00,  2.02it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2024-01-07 16:24:33.464963] Epoch: 4 training ends. Status: Average loss: 4.875838705471584, Average MLM accuracy: 0.848855285472018, Average SC accuracy: 0.9910313901345291, Average S2S accuracy: 0.8840261426866995


100%|██████████| 4/4 [00:00<00:00,  5.63it/s]
  0%|          | 0/28 [00:00<?, ?it/s]

[2024-01-07 16:24:34.178516] Epoch: 4 Evalutation ends. Status: Average loss: 3.8357666730880737, Average MLM accuracy: 0.863517060367454, Average SC accuracy: 0.9591836734693877, Average S2S accuracy: 0.8925352664576802


100%|██████████| 28/28 [00:13<00:00,  2.00it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2024-01-07 16:24:48.168974] Epoch: 5 training ends. Status: Average loss: 4.084079725401742, Average MLM accuracy: 0.8578292820802713, Average SC accuracy: 0.9887892376681614, Average S2S accuracy: 0.8967173823526307


100%|██████████| 4/4 [00:00<00:00,  5.57it/s]
  0%|          | 0/28 [00:00<?, ?it/s]

[2024-01-07 16:24:48.889495] Epoch: 5 Evalutation ends. Status: Average loss: 4.040361642837524, Average MLM accuracy: 0.865485564304462, Average SC accuracy: 1.0, Average S2S accuracy: 0.9009600313479624


100%|██████████| 28/28 [00:13<00:00,  2.02it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2024-01-07 16:25:02.779197] Epoch: 6 training ends. Status: Average loss: 3.538552258695875, Average MLM accuracy: 0.8668032786885246, Average SC accuracy: 1.0, Average S2S accuracy: 0.9057976370220988


100%|██████████| 4/4 [00:00<00:00,  5.69it/s]
  0%|          | 0/28 [00:00<?, ?it/s]

[2024-01-07 16:25:03.483821] Epoch: 6 Evalutation ends. Status: Average loss: 3.005684494972229, Average MLM accuracy: 0.8779527559055118, Average SC accuracy: 0.9795918367346939, Average S2S accuracy: 0.9105603448275862


100%|██████████| 28/28 [00:14<00:00,  1.95it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2024-01-07 16:25:17.859566] Epoch: 7 training ends. Status: Average loss: 3.0305339821747372, Average MLM accuracy: 0.8785330695308083, Average SC accuracy: 0.9977578475336323, Average S2S accuracy: 0.9147406320279587


100%|██████████| 4/4 [00:00<00:00,  5.63it/s]
  0%|          | 0/69 [00:00<?, ?it/s]

[2024-01-07 16:25:18.570885] Epoch: 7 Evalutation ends. Status: Average loss: 2.6374127864837646, Average MLM accuracy: 0.8891076115485564, Average SC accuracy: 1.0, Average S2S accuracy: 0.9148706896551724


100%|██████████| 69/69 [03:34<00:00,  3.11s/it]


[INFO] 202305221222 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202307030930 is saved
[INFO] finish 202307030930
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202307030930/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202307030930.
[INFO] SimpleTransformerStack loaded for 202307030930.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202307030930.
[INFO] SimpleDecoderHead_S2S loaded for 202307030930.
[INFO] DistilBertEncoderHead_MLM loaded for 202307030930.


100%|██████████| 148/148 [01:14<00:00,  1.98it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 16:30:44.275576] Epoch: 0 training ends. Status: Average loss: 4.383730024904819, Average MLM accuracy: 0.888589077153023, Average SC accuracy: 0.7257723233178164, Average S2S accuracy: 0.9140849998122185


100%|██████████| 17/17 [00:03<00:00,  4.67it/s]
  0%|          | 0/148 [00:00<?, ?it/s]

[2024-01-07 16:30:47.916816] Epoch: 0 Evalutation ends. Status: Average loss: 3.0224389609168556, Average MLM accuracy: 0.9004662004662005, Average SC accuracy: 0.9083969465648855, Average S2S accuracy: 0.9257517978721552


100%|██████████| 148/148 [01:14<00:00,  1.99it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 16:32:02.456805] Epoch: 1 training ends. Status: Average loss: 2.530618658742389, Average MLM accuracy: 0.9054005793420896, Average SC accuracy: 0.9784172661870504, Average S2S accuracy: 0.9316554228346317


100%|██████████| 17/17 [00:03<00:00,  4.71it/s]
  0%|          | 0/148 [00:00<?, ?it/s]

[2024-01-07 16:32:06.067686] Epoch: 1 Evalutation ends. Status: Average loss: 2.140017221955692, Average MLM accuracy: 0.9156177156177157, Average SC accuracy: 0.9961832061068703, Average S2S accuracy: 0.9372442494210242


100%|██████████| 148/148 [01:14<00:00,  1.98it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 16:33:20.936005] Epoch: 2 training ends. Status: Average loss: 1.9763560077628575, Average MLM accuracy: 0.9131119135484042, Average SC accuracy: 0.996191282268303, Average S2S accuracy: 0.9431120739582571


100%|██████████| 17/17 [00:03<00:00,  4.67it/s]
  0%|          | 0/148 [00:00<?, ?it/s]

[2024-01-07 16:33:24.574527] Epoch: 2 Evalutation ends. Status: Average loss: 1.8203080962685978, Average MLM accuracy: 0.9163170163170163, Average SC accuracy: 0.9961832061068703, Average S2S accuracy: 0.9450800118407077


100%|██████████| 148/148 [01:14<00:00,  1.99it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 16:34:39.114391] Epoch: 3 training ends. Status: Average loss: 1.6174347324145806, Average MLM accuracy: 0.9232834675872651, Average SC accuracy: 0.9991536182818451, Average S2S accuracy: 0.9505383400506418


100%|██████████| 17/17 [00:03<00:00,  4.84it/s]
  0%|          | 0/148 [00:00<?, ?it/s]

[2024-01-07 16:34:42.625866] Epoch: 3 Evalutation ends. Status: Average loss: 1.5971257265876322, Average MLM accuracy: 0.9238927738927739, Average SC accuracy: 1.0, Average S2S accuracy: 0.9481620783924498


100%|██████████| 148/148 [01:14<00:00,  1.98it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 16:35:57.499054] Epoch: 4 training ends. Status: Average loss: 1.418057564142588, Average MLM accuracy: 0.9273044720447601, Average SC accuracy: 0.9987304274227676, Average S2S accuracy: 0.9560057480920406


100%|██████████| 17/17 [00:03<00:00,  4.72it/s]
  0%|          | 0/148 [00:00<?, ?it/s]

[2024-01-07 16:36:01.104882] Epoch: 4 Evalutation ends. Status: Average loss: 1.4280515488456278, Average MLM accuracy: 0.9284382284382284, Average SC accuracy: 1.0, Average S2S accuracy: 0.9529157742603911


100%|██████████| 148/148 [01:14<00:00,  1.98it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 16:37:15.682910] Epoch: 5 training ends. Status: Average loss: 1.2791631890309823, Average MLM accuracy: 0.9311138446887028, Average SC accuracy: 0.9995768091409225, Average S2S accuracy: 0.9597752156028677


100%|██████████| 17/17 [00:03<00:00,  4.80it/s]
  0%|          | 0/148 [00:00<?, ?it/s]

[2024-01-07 16:37:19.228050] Epoch: 5 Evalutation ends. Status: Average loss: 1.2857033820713268, Average MLM accuracy: 0.9347319347319347, Average SC accuracy: 0.9923664122137404, Average S2S accuracy: 0.9555451078723293


100%|██████████| 148/148 [01:14<00:00,  1.98it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 16:38:33.934376] Epoch: 6 training ends. Status: Average loss: 1.1816369041397765, Average MLM accuracy: 0.9353729349364444, Average SC accuracy: 1.0, Average S2S accuracy: 0.9628765761296049


100%|██████████| 17/17 [00:03<00:00,  4.78it/s]
  0%|          | 0/148 [00:00<?, ?it/s]

[2024-01-07 16:38:37.496853] Epoch: 6 Evalutation ends. Status: Average loss: 1.1817616960581612, Average MLM accuracy: 0.9406759906759907, Average SC accuracy: 1.0, Average S2S accuracy: 0.9580699646520051


100%|██████████| 148/148 [01:14<00:00,  1.98it/s]
  0%|          | 0/17 [00:00<?, ?it/s]

[2024-01-07 16:39:52.283446] Epoch: 7 training ends. Status: Average loss: 0.9926001312764915, Average MLM accuracy: 0.9424096927370607, Average SC accuracy: 1.0, Average S2S accuracy: 0.968851982676658


100%|██████████| 17/17 [00:03<00:00,  4.65it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

[2024-01-07 16:39:55.939769] Epoch: 7 Evalutation ends. Status: Average loss: 0.9492937081000384, Average MLM accuracy: 0.95, Average SC accuracy: 1.0, Average S2S accuracy: 0.9659057270716885


100%|██████████| 10/10 [00:30<00:00,  3.05s/it]


[INFO] 202307030930 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202307050919 is saved
[INFO] finish 202307050919
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202307050919/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202307050919.
[INFO] SimpleTransformerStack loaded for 202307050919.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202307050919.
[INFO] SimpleDecoderHead_S2S loaded for 202307050919.
[INFO] DistilBertEncoderHead_MLM loaded for 202307050919.


100%|██████████| 13/13 [00:06<00:00,  1.99it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:41:23.195254] Epoch: 0 training ends. Status: Average loss: 10.127184867858887, Average MLM accuracy: 0.8559977097051246, Average SC accuracy: 0.3399014778325123, Average S2S accuracy: 0.8861666844976998


100%|██████████| 2/2 [00:00<00:00,  6.23it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:41:23.517930] Epoch: 0 Evalutation ends. Status: Average loss: 10.33432674407959, Average MLM accuracy: 0.8442857142857143, Average SC accuracy: 0.4090909090909091, Average S2S accuracy: 0.8869098712446352


100%|██████████| 13/13 [00:06<00:00,  2.01it/s]
 50%|█████     | 1/2 [00:00<00:00,  5.10it/s]

[2024-01-07 16:41:29.994347] Epoch: 1 training ends. Status: Average loss: 8.816838961381178, Average MLM accuracy: 0.8704551961064987, Average SC accuracy: 0.41379310344827586, Average S2S accuracy: 0.8893120787418423


100%|██████████| 2/2 [00:00<00:00,  6.92it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:41:30.285045] Epoch: 1 Evalutation ends. Status: Average loss: 9.272378921508789, Average MLM accuracy: 0.8414285714285714, Average SC accuracy: 0.45454545454545453, Average S2S accuracy: 0.8854077253218884


100%|██████████| 13/13 [00:06<00:00,  2.04it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:41:36.665529] Epoch: 2 training ends. Status: Average loss: 6.73866448035607, Average MLM accuracy: 0.8694531920984827, Average SC accuracy: 0.49261083743842365, Average S2S accuracy: 0.8948753610784209


100%|██████████| 2/2 [00:00<00:00,  6.27it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:41:36.985883] Epoch: 2 Evalutation ends. Status: Average loss: 5.948390483856201, Average MLM accuracy: 0.8657142857142858, Average SC accuracy: 0.5909090909090909, Average S2S accuracy: 0.8924892703862661


100%|██████████| 13/13 [00:06<00:00,  1.99it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:41:43.506367] Epoch: 3 training ends. Status: Average loss: 5.179042596083421, Average MLM accuracy: 0.8704551961064987, Average SC accuracy: 0.645320197044335, Average S2S accuracy: 0.8993045897079277


100%|██████████| 2/2 [00:00<00:00,  6.20it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:41:43.830693] Epoch: 3 Evalutation ends. Status: Average loss: 5.349549412727356, Average MLM accuracy: 0.8585714285714285, Average SC accuracy: 0.7727272727272727, Average S2S accuracy: 0.898068669527897


100%|██████████| 13/13 [00:06<00:00,  1.98it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:41:50.384941] Epoch: 4 training ends. Status: Average loss: 4.339795992924617, Average MLM accuracy: 0.8734612081305468, Average SC accuracy: 0.7586206896551724, Average S2S accuracy: 0.9033700652615813


100%|██████████| 2/2 [00:00<00:00,  6.41it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:41:50.698774] Epoch: 4 Evalutation ends. Status: Average loss: 4.755452394485474, Average MLM accuracy: 0.8442857142857143, Average SC accuracy: 0.8636363636363636, Average S2S accuracy: 0.9049356223175966


100%|██████████| 13/13 [00:06<00:00,  1.98it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:41:57.268698] Epoch: 5 training ends. Status: Average loss: 3.8815160531264086, Average MLM accuracy: 0.8741769252791297, Average SC accuracy: 0.9113300492610837, Average S2S accuracy: 0.9073927463357226


100%|██████████| 2/2 [00:00<00:00,  6.16it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:41:57.594376] Epoch: 5 Evalutation ends. Status: Average loss: 4.096198678016663, Average MLM accuracy: 0.8871428571428571, Average SC accuracy: 0.9545454545454546, Average S2S accuracy: 0.9092274678111588


100%|██████████| 13/13 [00:06<00:00,  1.98it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:42:04.158729] Epoch: 6 training ends. Status: Average loss: 3.7142642644735484, Average MLM accuracy: 0.8697394789579158, Average SC accuracy: 0.9458128078817734, Average S2S accuracy: 0.9128490424735209


100%|██████████| 2/2 [00:00<00:00,  6.25it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

[2024-01-07 16:42:04.479915] Epoch: 6 Evalutation ends. Status: Average loss: 3.581321358680725, Average MLM accuracy: 0.8742857142857143, Average SC accuracy: 0.9090909090909091, Average S2S accuracy: 0.9154506437768241


100%|██████████| 13/13 [00:06<00:00,  2.01it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:42:10.958230] Epoch: 7 training ends. Status: Average loss: 3.202613280369685, Average MLM accuracy: 0.8847695390781564, Average SC accuracy: 0.9655172413793104, Average S2S accuracy: 0.9152241360864448


100%|██████████| 2/2 [00:00<00:00,  6.23it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2024-01-07 16:42:11.280083] Epoch: 7 Evalutation ends. Status: Average loss: 3.8871328830718994, Average MLM accuracy: 0.8742857142857143, Average SC accuracy: 0.8636363636363636, Average S2S accuracy: 0.9145922746781115


100%|██████████| 6/6 [00:18<00:00,  3.07s/it]


[INFO] 202307050919 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202307070327 is saved
[INFO] finish 202307070327
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202307070327/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202307070327.
[INFO] SimpleTransformerStack loaded for 202307070327.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202307070327.
[INFO] SimpleDecoderHead_S2S loaded for 202307070327.
[INFO] DistilBertEncoderHead_MLM loaded for 202307070327.


100%|██████████| 7/7 [00:03<00:00,  1.92it/s]
100%|██████████| 1/1 [00:00<00:00,  5.73it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:43:10.383604] Epoch: 0 training ends. Status: Average loss: 7.752309117998395, Average MLM accuracy: 0.8958024691358024, Average SC accuracy: 0.4537037037037037, Average S2S accuracy: 0.9176518248444002
[2024-01-07 16:43:10.559775] Epoch: 0 Evalutation ends. Status: Average loss: 9.950197219848633, Average MLM accuracy: 0.8851351351351351, Average SC accuracy: 0.25, Average S2S accuracy: 0.9164149043303121


100%|██████████| 7/7 [00:03<00:00,  2.00it/s]
100%|██████████| 1/1 [00:00<00:00,  5.57it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:43:14.062637] Epoch: 1 training ends. Status: Average loss: 7.407309055328369, Average MLM accuracy: 0.8975308641975308, Average SC accuracy: 0.4722222222222222, Average S2S accuracy: 0.9174308547858432
[2024-01-07 16:43:14.243531] Epoch: 1 Evalutation ends. Status: Average loss: 8.206974029541016, Average MLM accuracy: 0.8918918918918919, Average SC accuracy: 0.4166666666666667, Average S2S accuracy: 0.9113796576032226


100%|██████████| 7/7 [00:03<00:00,  2.00it/s]
100%|██████████| 1/1 [00:00<00:00,  5.81it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:43:17.747706] Epoch: 2 training ends. Status: Average loss: 6.451399666922433, Average MLM accuracy: 0.8982716049382716, Average SC accuracy: 0.46296296296296297, Average S2S accuracy: 0.9205980922918278
[2024-01-07 16:43:17.920884] Epoch: 2 Evalutation ends. Status: Average loss: 7.234025478363037, Average MLM accuracy: 0.8963963963963963, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.9123867069486404


100%|██████████| 7/7 [00:03<00:00,  2.02it/s]
100%|██████████| 1/1 [00:00<00:00,  5.70it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:43:21.381747] Epoch: 3 training ends. Status: Average loss: 5.082991940634591, Average MLM accuracy: 0.9034567901234568, Average SC accuracy: 0.49074074074074076, Average S2S accuracy: 0.9214083158398704
[2024-01-07 16:43:21.559088] Epoch: 3 Evalutation ends. Status: Average loss: 5.596089839935303, Average MLM accuracy: 0.8873873873873874, Average SC accuracy: 0.3333333333333333, Average S2S accuracy: 0.9174219536757301


100%|██████████| 7/7 [00:03<00:00,  2.04it/s]
100%|██████████| 1/1 [00:00<00:00,  5.81it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:43:24.986147] Epoch: 4 training ends. Status: Average loss: 4.0482359273093085, Average MLM accuracy: 0.8967901234567901, Average SC accuracy: 0.6666666666666666, Average S2S accuracy: 0.9232129046514197
[2024-01-07 16:43:25.159258] Epoch: 4 Evalutation ends. Status: Average loss: 3.9789888858795166, Average MLM accuracy: 0.9054054054054054, Average SC accuracy: 0.5833333333333334, Average S2S accuracy: 0.9224572004028198


100%|██████████| 7/7 [00:03<00:00,  2.04it/s]
100%|██████████| 1/1 [00:00<00:00,  5.96it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:43:28.588889] Epoch: 5 training ends. Status: Average loss: 3.292083978652954, Average MLM accuracy: 0.9101234567901234, Average SC accuracy: 0.8333333333333334, Average S2S accuracy: 0.9232497329945126
[2024-01-07 16:43:28.757707] Epoch: 5 Evalutation ends. Status: Average loss: 3.4530229568481445, Average MLM accuracy: 0.9121621621621622, Average SC accuracy: 0.8333333333333334, Average S2S accuracy: 0.9261497146693521


100%|██████████| 7/7 [00:03<00:00,  2.04it/s]
100%|██████████| 1/1 [00:00<00:00,  5.87it/s]
  0%|          | 0/7 [00:00<?, ?it/s]

[2024-01-07 16:43:32.191332] Epoch: 6 training ends. Status: Average loss: 2.9023853029523576, Average MLM accuracy: 0.9118518518518518, Average SC accuracy: 0.9537037037037037, Average S2S accuracy: 0.926932567303797
[2024-01-07 16:43:32.362757] Epoch: 6 Evalutation ends. Status: Average loss: 3.205089569091797, Average MLM accuracy: 0.9256756756756757, Average SC accuracy: 0.9166666666666666, Average S2S accuracy: 0.9197717354817053


100%|██████████| 7/7 [00:03<00:00,  2.03it/s]
100%|██████████| 1/1 [00:00<00:00,  5.77it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

[2024-01-07 16:43:35.809506] Epoch: 7 training ends. Status: Average loss: 2.9176201139177596, Average MLM accuracy: 0.9017283950617284, Average SC accuracy: 0.9814814814814815, Average S2S accuracy: 0.9284425293706036
[2024-01-07 16:43:35.983751] Epoch: 7 Evalutation ends. Status: Average loss: 3.012810468673706, Average MLM accuracy: 0.9099099099099099, Average SC accuracy: 1.0, Average S2S accuracy: 0.9288351795904666


100%|██████████| 12/12 [00:35<00:00,  2.96s/it]


[INFO] 202307070327 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202307191009 is saved
[INFO] finish 202307191009
[INFO] <class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'> loaded for checkpoints_distilbert_0/202307191009/DistilBertModel.pt
[INFO] SimpleEmbedder loaded for 202307191009.
[INFO] SimpleTransformerStack loaded for 202307191009.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202307191009.
[INFO] SimpleDecoderHead_S2S loaded for 202307191009.
[INFO] DistilBertEncoderHead_MLM loaded for 202307191009.


100%|██████████| 14/14 [00:06<00:00,  2.05it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:44:46.883152] Epoch: 0 training ends. Status: Average loss: 9.27550983428955, Average MLM accuracy: 0.8641602634467618, Average SC accuracy: 0.3611111111111111, Average S2S accuracy: 0.8883483187584678


100%|██████████| 2/2 [00:00<00:00,  5.88it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[2024-01-07 16:44:47.225337] Epoch: 0 Evalutation ends. Status: Average loss: 9.70907211303711, Average MLM accuracy: 0.8520408163265306, Average SC accuracy: 0.2916666666666667, Average S2S accuracy: 0.8923518164435946


100%|██████████| 14/14 [00:06<00:00,  2.05it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:44:54.069880] Epoch: 1 training ends. Status: Average loss: 8.179764986038208, Average MLM accuracy: 0.8630625686059276, Average SC accuracy: 0.39351851851851855, Average S2S accuracy: 0.8914069877242682


100%|██████████| 2/2 [00:00<00:00,  5.74it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[2024-01-07 16:44:54.419548] Epoch: 1 Evalutation ends. Status: Average loss: 8.96018362045288, Average MLM accuracy: 0.8469387755102041, Average SC accuracy: 0.2916666666666667, Average S2S accuracy: 0.8862332695984704


100%|██████████| 14/14 [00:06<00:00,  2.04it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:45:01.271198] Epoch: 2 training ends. Status: Average loss: 6.85984992980957, Average MLM accuracy: 0.8703347969264544, Average SC accuracy: 0.46296296296296297, Average S2S accuracy: 0.8943014328529786


100%|██████████| 2/2 [00:00<00:00,  6.04it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[2024-01-07 16:45:01.603512] Epoch: 2 Evalutation ends. Status: Average loss: 7.100705146789551, Average MLM accuracy: 0.8724489795918368, Average SC accuracy: 0.2916666666666667, Average S2S accuracy: 0.8959847036328872


100%|██████████| 14/14 [00:06<00:00,  2.04it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:45:08.460685] Epoch: 3 training ends. Status: Average loss: 5.537075247083392, Average MLM accuracy: 0.8743139407244785, Average SC accuracy: 0.5740740740740741, Average S2S accuracy: 0.9002545469474894


100%|██████████| 2/2 [00:00<00:00,  5.66it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[2024-01-07 16:45:08.816052] Epoch: 3 Evalutation ends. Status: Average loss: 5.509334087371826, Average MLM accuracy: 0.8596938775510204, Average SC accuracy: 0.7083333333333334, Average S2S accuracy: 0.8925430210325048


100%|██████████| 14/14 [00:06<00:00,  2.03it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:45:15.705565] Epoch: 4 training ends. Status: Average loss: 4.638342874390738, Average MLM accuracy: 0.8703347969264544, Average SC accuracy: 0.7916666666666666, Average S2S accuracy: 0.9029026563205649


100%|██████████| 2/2 [00:00<00:00,  5.81it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[2024-01-07 16:45:16.051498] Epoch: 4 Evalutation ends. Status: Average loss: 4.528460264205933, Average MLM accuracy: 0.8788265306122449, Average SC accuracy: 0.875, Average S2S accuracy: 0.904397705544933


100%|██████████| 14/14 [00:06<00:00,  2.05it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:45:22.881904] Epoch: 5 training ends. Status: Average loss: 4.1201737608228415, Average MLM accuracy: 0.8792535675082327, Average SC accuracy: 0.8703703703703703, Average S2S accuracy: 0.9070698361867225


100%|██████████| 2/2 [00:00<00:00,  6.08it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[2024-01-07 16:45:23.213289] Epoch: 5 Evalutation ends. Status: Average loss: 4.507698655128479, Average MLM accuracy: 0.889030612244898, Average SC accuracy: 0.875, Average S2S accuracy: 0.9063097514340345


100%|██████████| 14/14 [00:06<00:00,  2.05it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:45:30.040286] Epoch: 6 training ends. Status: Average loss: 3.887909037726266, Average MLM accuracy: 0.8695115257958288, Average SC accuracy: 0.9212962962962963, Average S2S accuracy: 0.9103132569692491


100%|██████████| 2/2 [00:00<00:00,  5.91it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

[2024-01-07 16:45:30.380627] Epoch: 6 Evalutation ends. Status: Average loss: 3.2564752101898193, Average MLM accuracy: 0.9094387755102041, Average SC accuracy: 0.9166666666666666, Average S2S accuracy: 0.9133843212237094


100%|██████████| 14/14 [00:06<00:00,  2.05it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2024-01-07 16:45:37.206773] Epoch: 7 training ends. Status: Average loss: 3.226884961128235, Average MLM accuracy: 0.8887211855104281, Average SC accuracy: 0.9629629629629629, Average S2S accuracy: 0.9178470254957507


100%|██████████| 2/2 [00:00<00:00,  5.85it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

[2024-01-07 16:45:37.551015] Epoch: 7 Evalutation ends. Status: Average loss: 3.1616878509521484, Average MLM accuracy: 0.8979591836734694, Average SC accuracy: 0.9583333333333334, Average S2S accuracy: 0.9208413001912046


100%|██████████| 11/11 [00:42<00:00,  3.90s/it]


[INFO] 202307191009 is cleared.
[INFO] DistilBertModel is saved, 6.103515625e-05 MB
[INFO] SimpleEmbedder is saved, 90.92114543914795 MB
[INFO] SimpleTransformerStack is saved, 216.4018907546997 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] DistilBertEncoderHead_MLM is saved, 91.68087100982666 MB
[INFO] 202308030456 is saved
[INFO] finish 202308030456


In [9]:
print('done')

done
