In [1]:
import torch
import os
from os.path import exists
import torch.nn as nn
# from torch.nn.functional import log_softmax, pad, one_hot
import math
import copy
import time
from torch.optim.lr_scheduler import LambdaLR
import pandas as pd
from torch.utils.data import DataLoader
import random
import json
import csv
from pathlib import Path
import shutil
import re

### utils.py ###

class Dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
    
    def __iadd__(self, other):
        for k, v in self.items():
            if k in other and other[k]:
                self[k] += other[k]
            # end
        # end

        return self
    # end
# end



# Takes the file paths as arguments
def parse_csv_file_to_json(path_file_csv):
    # create a dictionary
    elements = []

    # Open a csv reader called DictReader
    with open(path_file_csv, encoding='utf-8') as file_csv:
    #with open(path_file_csv) as file_csv:
        reader_csv = csv.DictReader(file_csv)

        # Convert each row into a dictionary
        # and add it to data
        for dict_head_value in reader_csv:
            element = {}

            for head, value in dict_head_value.items():
                #print(value)
                if value and (value[0] in ["[", "{"]):
                    #element[head] = eval(value)
                    element[head] = value
                else:
                    element[head] = value

            elements.append(element)
        # end
    # end

    return elements
# end

### utils.py ###




### core.py ###

"Produce N identical layers."
def clones(module, N):
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
# end



class MultiHeadedAttention(nn.Module):

    "Take in model size and number of heads."
    def __init__(self, h, d_model, dropout=0.1):
        super(MultiHeadedAttention, self).__init__()
        assert d_model % h == 0
        # We assume d_v always equals d_k
        self.d_k = d_model // h
        self.h = h
        self.linears = clones(nn.Linear(d_model, d_model), 4)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)
    # end


    "Compute 'Scaled Dot Product Attention'"
    def attention(self, query, key, value, mask=None, dropout=None):
        d_k = query.size(-1)
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
        if mask is not None:
            # print('jinyuj: scores: {}, mask: {}'.format(scores.shape, mask.shape))
            scores = scores.masked_fill(mask == 0, -1e9)
        # end
        p_attn = scores.softmax(dim=-1)
        if dropout is not None:
            p_attn = dropout(p_attn)
        # end
        return torch.matmul(p_attn, value), p_attn
    # end


    "Implements Figure 2"
    def forward(self, query, key, value, mask=None):
        if mask is not None:
            # Same mask applied to all h heads.
            mask = mask.unsqueeze(1)
        nbatches = query.size(0)

        # 1) Do all the linear projections in batch from d_model => h x d_k
        query, key, value = [
            lin(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
            for lin, x in zip(self.linears, (query, key, value))
        ]

        # 2) Apply attention on all the projected vectors in batch.
        x, self.attn = self.attention(
            query, key, value, mask=mask, dropout=self.dropout
        )

        # 3) "Concat" using a view and apply a final linear.
        x = (
            x.transpose(1, 2)
            .contiguous()
            .view(nbatches, -1, self.h * self.d_k)
        )
        del query
        del key
        del value
        return self.linears[-1](x)
    # end
# end class


"""
A residual connection followed by a layer norm.
Note for code simplicity the norm is first as opposed to last.
"""
class ResidualLayer(nn.Module):

    def __init__(self, size, dropout=0.1, eps=1e-6):
        super(ResidualLayer, self).__init__()
        self.norm = torch.nn.LayerNorm(size, eps)
        self.dropout = nn.Dropout(p=dropout)
    # end

    "Apply residual connection to any sublayer with the same size."
    def forward(self, x, sublayer):
        return x + self.dropout(sublayer(self.norm(x)))
    # end
# end class


class PositionwiseFeedForward(nn.Module):
    "Implements FFN equation."

    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.w_2(self.dropout(self.w_1(x).relu()))
    # end
# end


class SimpleIDEmbeddings(nn.Module):
    def __init__(self, size_vocab, dim_hidden, id_pad):
        super(SimpleIDEmbeddings, self).__init__()
        self.lut = nn.Embedding(size_vocab, dim_hidden, padding_idx=id_pad)
        self.dim_hidden = dim_hidden

    def forward(self, x):
        result = self.lut(x)
        return result * math.sqrt(self.dim_hidden)
    # end

    def get_shape(self):
        return (self.lut.num_embeddings, self.lut.embedding_dim)
    # end
# end


"Implement the PE function."
class PositionalEncoding(nn.Module):

    def __init__(self, dim_positional, max_len=512):
        super(PositionalEncoding, self).__init__()

        # Compute the positional encodings once in log space.
        self.dim_positional = dim_positional
        pe = torch.zeros(max_len, dim_positional)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, dim_positional, 2) * -(math.log(10000.0) / dim_positional)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).to('cuda')
        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[:, : x.size(1)].requires_grad_(False)
        return x
    # end
# end


class SimpleEmbedder(nn.Module):    # no segment embedder as we do not need that
    def __init__(self, size_vocab=None, dim_hidden=128, dropout=0.1, id_pad=0):
        super(SimpleEmbedder, self).__init__()
        self.size_vocab = size_vocab
        self.dim_hidden = dim_hidden
        self.id_pad = id_pad

        self.embedder = nn.Sequential(
            SimpleIDEmbeddings(size_vocab, dim_hidden, id_pad),
            PositionalEncoding(dim_hidden),
            nn.Dropout(p=dropout)
        )
    # end

    def forward(self, ids_input):   # (batch, seqs_with_padding)
        return self.embedder(ids_input)
    # end

    def get_vocab_size(self):
        return self.size_vocab
    # end
# end

### core.py ###



class SimpleEncoderLayer(nn.Module):

    def __init__(self, dim_hidden, dim_feedforward, n_head, dropout=0.1):
        super(SimpleEncoderLayer, self).__init__()

        self.n_head = n_head
        self.dim_hidden = dim_hidden
        self.dim_feedforward = dim_feedforward

        self.layer_attention = MultiHeadedAttention(n_head, dim_hidden)
        self.layer_feedforward = PositionwiseFeedForward(dim_hidden, dim_feedforward, dropout)
        self.layers_residual = clones(ResidualLayer(dim_hidden, dropout), 2)
    # end

    def forward(self, embeddings, masks, *args):
        embeddings = self.layers_residual[0](embeddings, lambda embeddings: self.layer_attention(embeddings, embeddings, embeddings, masks))
        return self.layers_residual[1](embeddings, self.layer_feedforward)
    # end
# end



class SimpleDecoderLayer(nn.Module):

    def __init__(self, dim_hidden, dim_feedforward, n_head, dropout=0.1):
        super(SimpleDecoderLayer, self).__init__()

        self.n_head = n_head
        self.dim_hidden = dim_hidden
        self.dim_feedforward = dim_feedforward

        self.layer_attention_decoder = MultiHeadedAttention(n_head, dim_hidden)
        self.layer_attention_encoder = MultiHeadedAttention(n_head, dim_hidden)
        self.layer_feedforward = PositionwiseFeedForward(dim_hidden, dim_feedforward, dropout)
        self.layers_residual = clones(ResidualLayer(dim_hidden, dropout), 3)

    def forward(self, embeddings, masks_encoder, output_encoder, masks_decoder, *args):
        embeddings = self.layers_residual[0](embeddings, lambda embeddings: self.layer_attention_decoder(embeddings, embeddings, embeddings, masks_decoder))
        embeddings = self.layers_residual[1](embeddings, lambda embeddings: self.layer_attention_encoder(embeddings, output_encoder, output_encoder, masks_encoder))
        return self.layers_residual[2](embeddings, self.layer_feedforward)
    # end
# end


class SimpleTransformerStack(nn.Module):

    def __init__(self, obj_layer, n_layers):
        super(SimpleTransformerStack, self).__init__()
        self.layers = clones(obj_layer, n_layers)

        self.norm = torch.nn.LayerNorm(obj_layer.dim_hidden)
        self.keys_cache = ['output']
        self.cache = Dotdict({
            'output': None
        })
    # end

    def forward(self, embedding_encoder=None, masks_encoder=None, output_encoder=None, embedding_decoder=None, masks_decoder=None ,noncache=False, **kwargs):  # input -> (batch, len_seq, vocab)

        if output_encoder is not None and embedding_decoder is not None and masks_decoder is not None:
            embeddings = embedding_decoder
        else:
            embeddings = embedding_encoder
        # end

        for layer in self.layers:
            embeddings = layer(embeddings, masks_encoder, output_encoder, masks_decoder)
        # end

        output = self.norm(embeddings)

        if not noncache:
            self.cache.output = output
        # end

        return output
    # end

    # def get_vocab_size(self):
    #     return self.embedder.embedder_token.shape[-1]
    # # end

    def clear_cache(self):
        for key_cache in self.keys_cache:
            self.cache[key_cache] = None
        # end
    # end
# end


class SimpleEncoderDecoder(nn.Module):
    def __init__(self, encoder, decoder, embedder_encoder, embedder_decoder, pooling=False):
        super(SimpleEncoderDecoder, self).__init__()

        self.pooling = pooling
        
        self.embedder_encoder = embedder_encoder
        self.encoder = encoder

        self.embedder_decoder = embedder_decoder
        self.decoder = decoder
        
        self.keys_cache = ['output_encoder_pooled']
        self.cache = Dotdict({
            'output_encoder_pooled': None
        })
    # end

    def forward(self, ids_encoder=None, masks_encoder=None, ids_decoder=None, masks_decoder=None, nocache=False, **kwargs):
        
        output_encoder = self.embed_and_encode(ids_encoder=ids_encoder, masks_encoder=masks_encoder, nocache=nocache)
        output = output_encoder
        
        if self.pooling:
            output_encoder_refilled = output_encoder.masked_fill(masks_encoder.transpose(-1,-2)==False, 0)
            output_encoder_pooled = torch.mean(output_encoder_refilled, dim=-2)
            self.cache.output_encoder_pooled = output_encoder_pooled
            
            output_encoder_pooled_expanded = output_encoder_pooled.unsqueeze(-2).expand(output_encoder.shape)
            output = output_encoder_pooled_expanded
        # end
        
        if self.embedder_decoder and self.decoder:
            output_decoder = self.embed_and_decode(ids_decoder=ids_decoder, masks_encoder=masks_encoder, output_encoder=output, masks_decoder=masks_decoder, nocache=nocache)
            output = output_decoder
        # end if
        
        return output
    # end
    
    def embed_and_encode(self, ids_encoder=None, masks_encoder=None, nocache=False, **kwargs):
        self.encoder.clear_cache()
        
        embedding_encoder = self.embedder_encoder(ids_encoder)
        output_encoder = self.encoder(
            embedding_encoder=embedding_encoder,
            masks_encoder=masks_encoder,
            nocache=nocache
        )
        
        return output_encoder
    # end

    
    def embed_and_decode(self, ids_decoder=None, masks_encoder=None, output_encoder=None, masks_decoder=None, nocache=False, **kwargs):
        self.decoder.clear_cache()
        
        embedding_decoder = self.embedder_decoder(ids_decoder)
        output_decoder = self.decoder(
            masks_encoder=masks_encoder,
            output_encoder=output_encoder,    #(len_seq, dim_hidden) -> (1, dim_hidden)
            embedding_decoder=embedding_decoder,
            masks_decoder=masks_decoder,
            nocache=nocache
        )

        return output_decoder
    # end
    

    def clear_cache(self):
        self.encoder.clear_cache()
        
        for key_cache in self.keys_cache:
            self.cache[key_cache] = None
        # end
        
        if self.decoder:
            self.decoder.clear_cache()
        # end
    # end


    def get_vocab_size(self, name_embedder):
        embedder = getattr(self, f'embedder_{name_embedder}')
        return embedder.get_vocab_size()
    # end

# end

class LinearAndNorm(nn.Module):
    def __init__(self, dim_in = None, dim_out = None, dropout=0.1, eps_norm=1e-12):
        super(LinearAndNorm, self).__init__()

        self.linear = torch.nn.Linear(dim_in, dim_out)
        self.norm = torch.nn.LayerNorm(dim_out, eps_norm)
        self.dropout = torch.nn.Dropout(p=dropout)
    # end

    def forward(self, seqs_in):
        return self.dropout(self.norm(self.linear(seqs_in).relu()))
    # end
# end



class Batch:
    DEVICE = 'cuda'

    def __init__(self, **kwargs):
        self.kwargs = {}
        for k, v in kwargs.items():
            if v is not None and type(v) is not bool:
                self.kwargs[k] = v.to(Batch.DEVICE)
        # end
    # end

    def __call__(self):
        return self.kwargs
    # end
# end



class Collator_Base:

    def __init__(self, tokenizer, size_seq_max, need_masked=0.3):
        self.tokenizer = tokenizer
        self.size_seq_max = size_seq_max
        self.need_masked = need_masked

        index_special_token_2_id = {k: v for k, v in zip(tokenizer.all_special_tokens, tokenizer.all_special_ids)}

        self.id_pad = index_special_token_2_id['[PAD]']
        self.id_mask = index_special_token_2_id['[MASK]']
        self.id_cls = index_special_token_2_id['[CLS]']
        self.id_sep = index_special_token_2_id['[SEP]']
        self.id_unk = index_special_token_2_id['[UNK]']

        self.regex_special_token = re.compile(r'\[(PAD|MASK|CLS|SEP|EOL|UNK)\]')
    # end

    def _preprocess(self, line):
        line = re.sub(self.regex_special_token, r'<\1>', line)
        line = re.sub(r'''('|"|`){2}''', '', line)
        line = re.sub(r'\.{2,3}', '', line)
        line = re.sub(r' {2,}', ' ', line)
        line = line.lstrip().rstrip()
        return line
    # end

    # return masks_attention?, return masks_segment?
    def pad_sequences(self, sequences, size_seq_max, need_diagonal=False,
                      need_masked=0):
        id_pad = self.id_pad
        id_mask = self.id_mask

        sequences_padded = []
        sequences_masked_padded = []

        for sequence in sequences:
            len_seq = len(sequence)

            count_pad = size_seq_max - len_seq

            sequence = torch.LongTensor(sequence)
            sequence_padded = torch.cat((sequence, torch.LongTensor([id_pad] * count_pad)))
            sequences_padded.append(sequence_padded)

            if need_masked:
                index_masked = list(range(1, len_seq - 1))
                random.shuffle(index_masked)
                anchor_mask = int(need_masked * (len_seq - 2)) or 1
                index_masked = torch.LongTensor(index_masked[:anchor_mask])
                # index_masked = torch.LongTensor(index_masked[:int(need_masked * (len_seq-2))])

                sequence_masked = sequence.detach().clone()
                sequence_masked.index_fill_(0, index_masked, id_mask)
                sequence_masked_padded = torch.cat((sequence_masked, torch.LongTensor([id_pad] * count_pad)))

                sequences_masked_padded.append(sequence_masked_padded)
            # end
        #   # end for

        inputs = torch.stack(sequences_padded)  # (batch, size_seq_max)
        if need_masked:
            inputs_masked_padded = torch.stack(sequences_masked_padded)
        # end

        masks_segment = (inputs != self.id_pad).unsqueeze(-2)  # (nbatch, 1, seq)
        masks_attention = self.make_std_mask(inputs, self.id_pad) if need_diagonal else masks_segment

        if need_masked:
            masks_masked = (inputs_masked_padded != id_mask).unsqueeze(-2)
            masks_attention = masks_attention & masks_masked
            return inputs_masked_padded, masks_attention, masks_segment, inputs  # (inputs, masks_attention, masks_segment, labels)
        else:
            return inputs, masks_attention, masks_segment, None
        # end

    # end

    def subsequent_mask(self, size):
        "Mask out subsequent positions."
        attn_shape = (1, size, size)
        subsequent_mask = torch.triu(torch.ones(attn_shape), diagonal=1).type(
            torch.uint8
        )
        return subsequent_mask == 0

    def make_std_mask(self, tgt, pad):
        "Create a mask to hide padding and future words."
        tgt_mask = (tgt != pad).unsqueeze(-2)
        tgt_mask = tgt_mask & self.subsequent_mask(tgt.size(-1)).type_as(
            tgt_mask.data
        )
        return tgt_mask
    # end
# end


class Collator_SC(Collator_Base):

    def __call__(self, list_corpus_source):

        tokens_input_encoder = []
        tokens_input_decoder = []
        tokens_label_decoder = []
        labels_similarity = []
        labels_sc = []

        for corpus_source in list_corpus_source:  # (line0, line1, sim), output of zip remove single case
            if len(corpus_source) == 3:  # (line0, line1, sim)
                corpus_line = [corpus_source[0], corpus_source[1]]
                labels_similarity.append(corpus_source[2])
            elif len(corpus_source) == 2:  # (line, label_sc)
                corpus_line = [corpus_source[0]]
                labels_sc.append(corpus_source[1])
            else:
                corpus_line = [corpus_source[0]]
            # end

            for line in corpus_line:
                tokens = self.tokenizer.encode(self._preprocess(line), add_special_tokens=False)

                # TODO: check edge
                if len(tokens) > self.size_seq_max - 2:
                    tokens = tokens[:self.size_seq_max - 2]
                # end

                tokens_input_encoder.append([self.id_cls] + tokens + [self.id_sep])
                tokens_input_decoder.append([self.id_cls] + tokens)
                tokens_label_decoder.append(tokens + [self.id_sep])
            # end

        # end

        inputs_encoder, masks_encoder, segments_encoder, labels_encoder = self.pad_sequences(tokens_input_encoder,
                                                                                             self.size_seq_max,
                                                                                             need_masked=self.need_masked)
        inputs_decoder, masks_decoder, segments_decoder, _ = self.pad_sequences(tokens_input_decoder, self.size_seq_max,
                                                                                need_diagonal=True)
        labels_decoder, masks_label, segments_label, _ = self.pad_sequences(tokens_label_decoder, self.size_seq_max)
        # labels_similarity = torch.Tensor(labels_similarity).unsqueeze(0).transpose(0,1)
        labels_similarity = torch.Tensor(labels_similarity)
        labels_sc = torch.LongTensor(labels_sc)

        return Batch(
            ids_encoder=inputs_encoder,  # contains [mask]s
            masks_encoder=masks_encoder,
            labels_encoder=labels_encoder,  # doesn't contain [mask]
            segments_encoder=segments_encoder,
            ids_decoder=inputs_decoder,
            masks_decoder=masks_decoder,
            labels_decoder=labels_decoder,
            segments_label=segments_label,
            labels_similarity=labels_similarity,
            labels_sc=labels_sc
        )

    # end
# end


In [2]:
import spacy


def GOSV(path_base, filename_base, postfix, index_label_2_id, split=0.1):
    filename = f'{filename_base}{postfix}'
    path_file = os.path.join(path_base, filename)
    contents = parse_csv_file_to_json(path_file)
    
    list_corpus = [(content['processed'], index_label_2_id[content['target']]) for content in contents]
    
    indexs_all = list(range(len(list_corpus)))
    random.shuffle(indexs_all)
    
    index_split = int(split * len(list_corpus))
    
    indexs_eval = indexs_all[:index_split]
    indexs_train = indexs_all[index_split:]
    
    list_corpus_eval = [list_corpus[i_e] for i_e in indexs_eval]
    list_corpus_train = [list_corpus[i_t] for i_t in indexs_train]
    
    return list_corpus_train, list_corpus_eval, None
# end

In [3]:
class SimpleEncoderHead_MLM(nn.Module):

    @classmethod
    def get_info_accuracy_template(cls):
        return Dotdict({
            'corrects_segmented': 0,
            'corrects_masked': 0,
            'num_segmented': 0,
            'num_masked': 0 
        })
    # end
    
    def __init__(self, model, size_vocab, dim_hidden=128, dropout=0.1):
        super(SimpleEncoderHead_MLM, self).__init__()
        
        self.ffn = LinearAndNorm(dim_in=dim_hidden, dim_out=dim_hidden, dropout=dropout)
        self.extractor = torch.nn.Linear(dim_hidden, size_vocab, bias=False)
        self.extractor.weight = nn.Parameter(model.embedder_encoder.embedder[0].lut.weight)
        
        self.keys_cache = ['labels_mlm', 'masks_encoder', 'segments_encoder', 'output']
        self.cache = Dotdict({
            'labels_mlm': None,
            'masks_encoder': None,
            'segments_encoder': None,
            'output': None
        })
        
        self.func_loss = torch.nn.CrossEntropyLoss()
    # end


    def forward(self, model, labels_encoder=None, segments_encoder=None, masks_encoder=None, nocache=False, **kwargs):   # labels_input -> (batch, seq, labels)
        output_encoder = model.encoder.cache.output
        output_ffn = self.ffn(output_encoder)
        output_mlm = self.extractor(output_ffn) # output_mlm = prediction_logits

        if not nocache:
            self.cache.labels_mlm = labels_encoder
            self.cache.masks_encoder = masks_encoder
            self.cache.segments_encoder = segments_encoder
            self.cache.output = output_mlm
        # end

        return output_mlm
    # end
    
    def get_loss(self):
        
        labels_mlm = self.cache.labels_mlm
        masks_encoder = self.cache.masks_encoder
        segments_encoder = self.cache.segments_encoder
        output_mlm = self.cache.output
        
        info_acc = SimpleEncoderHead_MLM.get_info_accuracy_template()
        
        segments_encoder_2d = segments_encoder.transpose(-1,-2)[:,:,0]
        hidden_mlm_segmented = output_mlm.masked_select(segments_encoder_2d.unsqueeze(-1)).reshape(-1, output_mlm.shape[-1]) # should be (segmented_all_batchs, size_vocab)
        
        loss_segments = self.func_loss(hidden_mlm_segmented, labels_mlm.masked_select(segments_encoder_2d))
        info_acc.corrects_segmented = torch.sum(hidden_mlm_segmented.argmax(-1) == labels_mlm.masked_select(segments_encoder_2d)).cpu().item()
        info_acc.num_segmented = hidden_mlm_segmented.shape[0]
        
        masks_masked = torch.logical_xor(masks_encoder, segments_encoder) & segments_encoder # True is masked
        masks_masked_perbatch = masks_masked[:,0,:]
        hidden_mlm_masked = output_mlm.masked_select(masks_masked_perbatch.unsqueeze(-1)).reshape(-1, output_mlm.shape[-1])

        if hidden_mlm_masked.shape[0] != 0:
            loss_masked = self.func_loss(hidden_mlm_masked, labels_mlm.masked_select(masks_masked_perbatch))       
            info_acc.corrects_masked = torch.sum(hidden_mlm_masked.argmax(-1) == labels_mlm.masked_select(masks_masked_perbatch)).cpu().item()
            info_acc.num_masked = hidden_mlm_masked.shape[0]
        else:
            loss_masked = 0
            info_acc.corrects_masked = 0
            info_acc.num_masked = 1
        # end
        
        loss_mlm = loss_segments + loss_masked * 3
        
        return loss_mlm, info_acc
    # end
    
    
    def clear_cache(self):
        for key_cache in self.keys_cache:
            self.cache[key_cache] = None
        # end
    # end
# end

In [4]:
class SimpleDecoderHead_S2S(nn.Module):

    @classmethod
    def get_info_accuracy_template(cls):
        return Dotdict({
            'corrects_segmented': 0,
            'num_segmented': 0 
        })
    # end


    def __init__(self, model, size_vocab, dim_hidden=128, dropout=0.1):
        super(SimpleDecoderHead_S2S, self).__init__()
        
        self.ffn = LinearAndNorm(dim_in=dim_hidden, dim_out=dim_hidden, dropout=dropout)
        self.extractor = torch.nn.Linear(dim_hidden, size_vocab, bias=False)
        self.extractor.weight = nn.Parameter(model.embedder_decoder.embedder[0].lut.weight)

        self.func_loss = torch.nn.CrossEntropyLoss()
        
        self.keys_cache = ['output', 'labels_s2s', 'segments_decoder']
        self.cache = Dotdict({
            'output': None,
            'labels_s2s': None,
            'segments_decoder': None
        })

    # end



    def forward(self, model, labels_decoder=None, segments_label=None, nocache=False, **kwargs):   # labels_input -> (batch, seq, labels)
        output_decoder = model.decoder.cache.output
        output_ffn = self.ffn(output_decoder)
        output_s2s = self.extractor(output_ffn)   # output_mlm = prediction_logits
        
        if not nocache:
            self.cache.segments_label = segments_label
            self.cache.labels_s2s =  labels_decoder
            self.cache.output = output_s2s
        # end

        return output_s2s
    # end


    def get_loss(self):
        labels_s2s = self.cache.labels_s2s
        output_s2s = self.cache.output
        info_acc = SimpleDecoderHead_S2S.get_info_accuracy_template()
        
        segments_label = self.cache.segments_label
        segments_label_2d = segments_label.transpose(-1,-2)[:,:,0]
        hidden_s2s_segmented = output_s2s.masked_select(segments_label_2d.unsqueeze(-1)).reshape(-1, output_s2s.shape[-1])

        loss_segments = self.func_loss(hidden_s2s_segmented, labels_s2s.masked_select(segments_label_2d))
        info_acc.corrects_segmented = torch.sum(hidden_s2s_segmented.argmax(-1) == labels_s2s.masked_select(segments_label_2d)).cpu().item()
        info_acc.num_segmented = hidden_s2s_segmented.shape[0]
        
        return loss_segments * 4, info_acc
    # end


    def evaluate(self):
        pass
    # end


    def clear_cache(self):
        for key_cache in self.keys_cache:
            self.cache[key_cache] = None
        # end
    # end
# end



In [5]:
class SimpleEncoderHead_AveragePooling_SC(nn.Module):  # SC-> SequenceClassification

    @classmethod
    def get_info_accuracy_template(cls):
        return Dotdict({
            'corrects': 0,
            'num': 0 
        })
    # end
    
    def __init__(self, num_labels, dim_hidden=128, dropout=0.1):
        super(SimpleEncoderHead_AveragePooling_SC, self).__init__()
        
        self.ffn = LinearAndNorm(dim_in=dim_hidden, dim_out=dim_hidden, dropout=dropout)
        self.classifier = torch.nn.Linear(dim_hidden, num_labels, bias=False)
        
        self.keys_cache = ['labels_sc', 'output']
        self.cache = Dotdict({
            'labels_sc': None,
            'output': None
        })
        
        self.func_loss = torch.nn.CrossEntropyLoss()
    # end


    def forward(self, model, labels_sc=None, nocache=False, **kwargs):   # labels_input -> (batch, seq, labels)
        output_encoder_pooled = model.cache.output_encoder_pooled
        output_ffn = self.ffn(output_encoder_pooled)
        output_sc = self.classifier(output_ffn) # output_sc = prediction_logits

        if not nocache:
            self.cache.labels_sc = labels_sc
            self.cache.output = output_sc
        # end

        return output_sc
    # end
    
    def get_loss(self):
        
        labels_sc = self.cache.labels_sc
        output_sc = self.cache.output
        
        info_acc = SimpleEncoderHead_AveragePooling_SC.get_info_accuracy_template()
        
        loss_sc = self.func_loss(output_sc, labels_sc)
        info_acc.corrects = torch.sum(output_sc.argmax(-1) == labels_sc).cpu().item()
        info_acc.num = output_sc.shape[0]
        
        return loss_sc, info_acc
    # end
    
    
    def clear_cache(self):
        for key_cache in self.keys_cache:
            self.cache[key_cache] = None
        # end
    # end
# end

In [6]:
class HeadManager(nn.Module):
    def __init__(self):
        super(HeadManager, self).__init__()
        self.index_name_head = set()
    # end

    def register(self, head):
        name_head = head.__class__.__name__
        setattr(self, name_head, head)
        self.index_name_head.add(name_head)
        return self
    # end

    def forward(self, model, **kwargs):
        for name in self.index_name_head:
            head = getattr(self, name)
            head.forward(model, **kwargs)
        # end
    # end

    def get_head(self, klass):
        return getattr(self, klass.__name__)
    # end

    def clear_cache(self):
        for name_head in self.index_name_head:
            getattr(self, name_head).clear_cache()
        # end
    # end
# end


class Trainer(nn.Module):
    def __init__(self, model=None, manager=None):
        super(Trainer, self).__init__()
        self.model = model
        self.manager = manager
    # end

    def forward(self, **kwargs):
        self.clear_cache()
        
        self.model.forward(**kwargs)
        self.manager.forward(self.model, **kwargs)
    # end
    
    def clear_cache(self):
        self.model.clear_cache() if self.model else None
        self.manager.clear_cache() if self.manager else None
    # end
# end


class ModelLoader:
    def __init__(self, path_checkpoints='./checkpoints'):
        self.dict_name_item = {}
        self.path_checkpoints = path_checkpoints
    # end
    
    def add_item(self, item, name=None):
        if not name:
            name = item.__class__.__name__
        # end
        
        self.dict_name_item[name] = item
        return self
    # end
    
    
    def update_checkpoint(self, name_checkpoint, name_checkpoint_previous=None):  # epoch_n
        if not self.dict_name_item:
            print(f'[ALERT] no item added, skip saving checkpoint.')
            return
        # end
        
        if name_checkpoint_previous:
            result = self._delete_checkpoint_folder(name_checkpoint_previous)
            if result:
                print(f'[INFO] {name_checkpoint_previous} is cleared.')
            else:
                print(f'[ALERT] {name_checkpoint_previous} fail to be cleared.')
            # end
        # end
        
        folder_checkpoint = self._create_checkpoint_folder(name_checkpoint)
        for name_item, item in self.dict_name_item.items():
            path_checkpoint_item = os.path.join(folder_checkpoint, f'{name_item}.pt')
            torch.save(item.state_dict(), path_checkpoint_item)
            
            size_file_saved_MB = os.path.getsize(path_checkpoint_item) / 1024 / 1024
            print(f'[INFO] {name_item} is saved, {size_file_saved_MB} MB')
        # end
        
        print(f'[INFO] {name_checkpoint} is saved')
    # end

    
    def load_item_state(self, name_checkpoint, instance_item, name_item=None):
        
        if not name_item:
            name_item = instance_item.__class__.__name__
        # end
        
        if name_checkpoint is None:
            print('[ALERT] ignoring loading {} due to no checkpoint'.format(name_item))
            return
        # end
        
        
        path_checkpoint_item = os.path.join(self.path_checkpoints, name_checkpoint, f'{name_item}.pt')
        if not os.path.exists(path_checkpoint_item):
            print(f'[ERROR] {path_checkpoint_item} not exists')
            return
        # end
        if issubclass(instance_item.__class__, torch.nn.Module):
            instance_item.load_state_dict(torch.load(path_checkpoint_item), strict=False)
        else:
            instance_item.load_state_dict(torch.load(path_checkpoint_item))
        # end
        
        print(f'[INFO] {name_item} loaded for {name_checkpoint}.')
        return instance_item
    # end
    
    
    def list_items(self):
        return list(self.dict_name_item.keys())
    # end
    
    def _create_checkpoint_folder(self, name_checkpoint):
        path_folder_target = os.path.join(self.path_checkpoints, name_checkpoint)
        Path(path_folder_target).mkdir(parents=True, exist_ok=True)
        return path_folder_target
    # end
    
    def _delete_checkpoint_folder(self, name_checkpoint_previous):
        path_folder_target = os.path.join(self.path_checkpoints, name_checkpoint_previous)
        if os.path.exists(path_folder_target):
            shutil.rmtree(path_folder_target, ignore_errors=True)
        # end
        return (not os.path.exists(path_folder_target))
    # end
# end

In [7]:
class Builder:
    
    @classmethod
    def build_model_with_mlm_sc_s2s(cls, size_vocab, dim_hidden, dim_feedforward, n_head, n_layer, num_labels):
        embedder_encoder = SimpleEmbedder(size_vocab=size_vocab, dim_hidden=dim_hidden)
        sample_encoder = SimpleEncoderLayer(dim_hidden, dim_feedforward, n_head)
        encoderstack = SimpleTransformerStack(sample_encoder, n_layer)
        
        embedder_decoder = SimpleEmbedder(size_vocab=size_vocab, dim_hidden=dim_hidden)
        sample_decoder = SimpleDecoderLayer(dim_hidden, dim_feedforward, n_head)
        decoderstack = SimpleTransformerStack(sample_decoder, n_layer)

        model = SimpleEncoderDecoder(encoderstack, decoderstack, embedder_encoder, embedder_decoder, pooling=True)
        head_mlm = SimpleEncoderHead_MLM(model, size_vocab, dim_hidden)
        head_sc = SimpleEncoderHead_AveragePooling_SC(num_labels, dim_hidden)
        head_s2s = SimpleDecoderHead_S2S(model, size_vocab, dim_hidden)

        manager = HeadManager().register(head_mlm).register(head_sc).register(head_s2s)
        trainer = Trainer(model=model, manager=manager)

        return trainer
    # end
    
    @classmethod
    def load_model_with_mlm_sc_s2s(cls, size_vocab, dim_hidden, dim_feedforward, n_head, n_layer, num_labels, name_checkpoint):
        embedder_encoder = SimpleEmbedder(size_vocab=size_vocab, dim_hidden=dim_hidden)
        sample_encoder = SimpleEncoderLayer(dim_hidden, dim_feedforward, n_head)
        encoderstack = SimpleTransformerStack(sample_encoder, n_layer)
        
        embedder_decoder = SimpleEmbedder(size_vocab=size_vocab, dim_hidden=dim_hidden)
        sample_decoder = SimpleDecoderLayer(dim_hidden, dim_feedforward, n_head)
        decoderstack = SimpleTransformerStack(sample_decoder, n_layer)

        model = SimpleEncoderDecoder(encoderstack, decoderstack, embedder_encoder, embedder_decoder, pooling=True)
        head_mlm = SimpleEncoderHead_MLM(model, size_vocab, dim_hidden)
        head_sc = SimpleEncoderHead_AveragePooling_SC(num_labels, dim_hidden)
        head_s2s = SimpleDecoderHead_S2S(model, size_vocab, dim_hidden)
        
        loader = ModelLoader()
        loader.add_item(model).load_item_state(name_checkpoint, model)
        loader.add_item(head_s2s).load_item_state(name_checkpoint, head_s2s)
        loader.add_item(head_sc).load_item_state(name_checkpoint, head_sc)
        loader.add_item(head_mlm).load_item_state(name_checkpoint, head_mlm)

        manager = HeadManager().register(head_mlm).register(head_sc).register(head_s2s)
        trainer = Trainer(model=model, manager=manager)

        return trainer, loader
    # end

# end

def train_a_batch(batch, trainer, optimizer=None, scheduler=None):
    trainer.train()
    trainer.forward(**batch())
    
    loss_s2s, info_acc_s2s = trainer.manager.get_head(SimpleDecoderHead_S2S).get_loss()
    loss_mlm, info_acc_mlm = trainer.manager.get_head(SimpleEncoderHead_MLM).get_loss()
    loss_sc, info_acc_sc = trainer.manager.get_head(SimpleEncoderHead_AveragePooling_SC).get_loss()

    # crossentropy loss
    loss_all = loss_mlm + loss_sc + loss_s2s
    loss_all_value = loss_all.item()
    
    loss_all.backward()
    
    if optimizer:
        optimizer.step()
        optimizer.zero_grad(set_to_none=True)
    # end
    
    if scheduler:
        scheduler.step()
    # end
    
    trainer.clear_cache()
    return loss_all_value, Dotdict({'mlm': info_acc_mlm, 'sc': info_acc_sc, 's2s': info_acc_s2s})
# end


def evaluate_a_batch(batch, trainer, *args, **kwargs):
    trainer.eval()
    with torch.no_grad():
        trainer.forward(**batch())
    # end
    
    loss_s2s, info_acc_s2s = trainer.manager.get_head(SimpleDecoderHead_S2S).get_loss()
    loss_mlm, info_acc_mlm = trainer.manager.get_head(SimpleEncoderHead_MLM).get_loss()
    loss_sc, info_acc_sc = trainer.manager.get_head(SimpleEncoderHead_AveragePooling_SC).get_loss()
    
    # crossentropy loss
    loss_all = loss_mlm + loss_sc + loss_s2s
    loss_all_value = loss_all.item()
    
    trainer.clear_cache()
    return loss_all_value, Dotdict({'mlm': info_acc_mlm, 'sc': info_acc_sc, 's2s': info_acc_s2s})
# end

In [8]:
def main(
    folder_data, folder_output, version_data, version_data_last, postfix_train, postfix_test,
    tokenizer, collator, index_label_2_labelid, index_labelid_2_label,
    epochs, seq_max, batch_size, dim_hidden, dim_feedforward, n_head, n_layer,
    lr_base_optimizer, betas_optimizer, eps_optimizer, warmup
):

    trainer, loader = Builder.load_model_with_mlm_sc_s2s(tokenizer.vocab_size, dim_hidden, dim_feedforward, n_head, n_layer, num_labels, str(version_data_last))
    trainer = trainer.to('cuda')

    train_source, valid_source, _ = GOSV(folder_data, version_data, postfix_train, index_label_2_labelid, split=0.1)
    test_source, _, _ = GOSV(folder_data, version_data, postfix_test, index_label_2_labelid, split=0)
    

    dataloader_train = DataLoader(train_source, batch_size, shuffle=False, collate_fn=collator)
    dataloader_eval = DataLoader(valid_source, batch_size, shuffle=False, collate_fn=collator)
    dataloader_test = DataLoader(test_source, 1, shuffle=False, collate_fn=collator)


    for p in trainer.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
        # end
    # end


    optimizer = torch.optim.Adam(trainer.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
    lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.96)
    
    ### start train/eval epochs ####################################
    for e in range(epochs):
        
        info_acc_heads_train = Dotdict({
            'mlm': SimpleEncoderHead_MLM.get_info_accuracy_template(),
            'sc': SimpleEncoderHead_AveragePooling_SC.get_info_accuracy_template(),
            's2s': SimpleDecoderHead_S2S.get_info_accuracy_template(),
        })


        info_acc_heads_eval = Dotdict({
            'mlm': SimpleEncoderHead_MLM.get_info_accuracy_template(),
            'sc': SimpleEncoderHead_AveragePooling_SC.get_info_accuracy_template(),
            's2s': SimpleDecoderHead_S2S.get_info_accuracy_template(),
        })

        # train phase
        
        losss_per_e = []
        for i, batch in enumerate(tqdm(dataloader_train)):
            loss_current, info_acc_heads_batch = train_a_batch(batch, trainer, optimizer, None)
            info_acc_heads_train += info_acc_heads_batch

            losss_per_e.append(loss_current)
            # if i % 200 == 0:
            #     print('Epoch: {} Batch: {}, loss: {}, rate: {}, acc_mlm: {}, acc_sc: {}, acc_s2s: {}'.format(
            #         e, i, loss_current, optimizer.param_groups[0]['lr'],
            #         info_acc_heads_batch.mlm.corrects_masked / info_acc_heads_batch.mlm.num_masked,
            #         info_acc_heads_batch.sc.corrects / info_acc_heads_batch.sc.num,
            #         info_acc_heads_batch.s2s.corrects_segmented / info_acc_heads_batch.s2s.num_segmented,
            #     ), end='\r')
            # # end
        # end

        loss_average_per_e = sum(losss_per_e) / len(losss_per_e)
        print('[{}] Epoch: {} training ends. Status: Average loss: {}, Average MLM accuracy: {}, Average SC accuracy: {}, Average S2S accuracy: {}'.format(
            datetime.utcnow(), e, loss_average_per_e,
            info_acc_heads_train.mlm.corrects_masked / info_acc_heads_train.mlm.num_masked,
            info_acc_heads_train.sc.corrects / info_acc_heads_train.sc.num,
            info_acc_heads_train.s2s.corrects_segmented / info_acc_heads_train.s2s.num_segmented,
        ))

        if e % 2 == 0:
            lr_scheduler.step() # schedule per 2 epoch
        # end


        # eval phase start
        losss_per_e = []
        for i, batch in enumerate(tqdm(dataloader_eval)):
            loss_current, info_acc_heads_batch = evaluate_a_batch(batch, trainer)
            info_acc_heads_eval += info_acc_heads_batch

            losss_per_e.append(loss_current)
        # end

        loss_average_per_e = sum(losss_per_e) / len(losss_per_e)
        # print('[{}] Epoch: {} Evalutation ends. Status: Average loss: {}, Average MLM accuracy: {}, Average SC accuracy: {}'.format(
        print('[{}] Epoch: {} Evalutation ends. Status: Average loss: {}, Average MLM accuracy: {}, Average SC accuracy: {}, Average S2S accuracy: {}'.format(        
            datetime.utcnow(), e, loss_average_per_e,
            info_acc_heads_eval.mlm.corrects_masked / info_acc_heads_eval.mlm.num_masked,
            info_acc_heads_eval.sc.corrects / info_acc_heads_eval.sc.num,
            info_acc_heads_eval.s2s.corrects_segmented / info_acc_heads_eval.s2s.num_segmented,
        ))
        # eval phase end
    # end
    ### end train/eval epochs ####################################
    
    
    ### start test  ##############################################
    trainer.eval()
    
    list_corpus_test = []
    for i, batch in enumerate(tqdm(dataloader_test)):
        with torch.no_grad():
            trainer.forward(**batch())
        # end

        label_sc = trainer.manager.get_head(SimpleEncoderHead_AveragePooling_SC).cache.labels_sc.squeeze(0).detach().cpu() # (batch, label)  -> (label)
        logit_sc = trainer.manager.get_head(SimpleEncoderHead_AveragePooling_SC).cache.output.squeeze(0).detach().cpu()  #(batch, seq, num_label) -> (seq, num_label)
        
        pred_sc = logit_sc.argmax(-1) # (seq)
        conf_sc = torch.index_select(logit_sc.softmax(-1), -1, pred_sc)
        list_corpus_test.append((pred_sc.item(), label_sc.item(), conf_sc.item()))
        
        trainer.clear_cache()
    # end for
    
    path_file_output = os.path.join(folder_output, f'{version_data}.json')
    with open(path_file_output, 'w+') as file:
        file.write(json.dumps(list_corpus_test))
    # end
        
    ### end   test  ##############################################
    
    loader.update_checkpoint(str(version_data), str(version_data_last))
# end


In [9]:
import re
import json
import transformers
from torch.utils.data import DataLoader, Dataset
from torchtext.data.functional import to_map_style_dataset
from transformers import AutoTokenizer
from datetime import datetime
from tqdm import tqdm


gpu = 0
torch.cuda.set_device(gpu)

epochs=6

# source
seq_max = 256
batch_size = 28


# model & head
dim_hidden = 768
# dim_feedforward = 768
dim_feedforward = 3072
n_head = 12
n_layer = 12

# optimizer
lr_base_optimizer = 1e-4
betas_optimizer = (0.9, 0.999)
eps_optimizer = 1e-9

# scheduler
warmup = 200

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
collator = Collator_SC(tokenizer, seq_max)  # TODO: here

# labels
index_label_2_labelid = {label:id_label for id_label, label in enumerate(sorted(["product","testcase","testbed","usererror","targetvm","nimbus","infra"]))}
index_labelid_2_label = {id_label: label for label, id_label in index_label_2_labelid.items()}
num_labels=len(index_label_2_labelid)




folder_data = 'data'
folder_output = 'outputs_gosv'
postfix_train = '_train_0.35_15.csv'
postfix_test = '_test.csv'

versions_data = sorted([int(filename.split('_test.csv')[0]) for filename in os.listdir(folder_data) if '_test.csv' in filename and filename[0] != '.'])
version_data_last = None
for version_data in versions_data:
    main(
        folder_data, folder_output, version_data, version_data_last, postfix_train, postfix_test,
        tokenizer, collator, index_label_2_labelid, index_labelid_2_label,
        epochs, seq_max, batch_size, dim_hidden, dim_feedforward, n_head, n_layer,
        lr_base_optimizer, betas_optimizer, eps_optimizer, warmup
    )
    
    print('[INFO] finish {}'.format(version_data))
    version_data_last = version_data
# end

[ERROR] ./checkpoints/None/SimpleEncoderDecoder.pt not exists
[ERROR] ./checkpoints/None/SimpleDecoderHead_S2S.pt not exists
[ERROR] ./checkpoints/None/SimpleEncoderHead_AveragePooling_SC.pt not exists
[ERROR] ./checkpoints/None/SimpleEncoderHead_MLM.pt not exists


  2%|▏         | 11/628 [00:11<10:04,  1.02it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (672 > 512). Running this sequence through the model will result in indexing errors
100%|██████████| 628/628 [10:16<00:00,  1.02it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

[2023-12-04 06:16:25.986731] Epoch: 0 training ends. Status: Average loss: 25.033128858371903, Average MLM accuracy: 0.19527302458228712, Average SC accuracy: 0.8044034818228366, Average S2S accuracy: 0.6023284848746004


100%|██████████| 70/70 [00:23<00:00,  2.96it/s]
  0%|          | 0/628 [00:00<?, ?it/s]

[2023-12-04 06:16:49.656024] Epoch: 0 Evalutation ends. Status: Average loss: 16.24704031263079, Average MLM accuracy: 0.2377125636794145, Average SC accuracy: 0.9795186891961085, Average S2S accuracy: 0.6995704327666962


100%|██████████| 628/628 [10:13<00:00,  1.02it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

[2023-12-04 06:27:02.809814] Epoch: 1 training ends. Status: Average loss: 14.037578323084837, Average MLM accuracy: 0.3286699060727239, Average SC accuracy: 0.97610513739546, Average S2S accuracy: 0.7024094500153653


100%|██████████| 70/70 [00:23<00:00,  2.94it/s]
  0%|          | 0/628 [00:00<?, ?it/s]

[2023-12-04 06:27:26.595550] Epoch: 1 Evalutation ends. Status: Average loss: 11.120327636173792, Average MLM accuracy: 0.5079823491425701, Average SC accuracy: 0.982078853046595, Average S2S accuracy: 0.7147652590778204


100%|██████████| 628/628 [10:14<00:00,  1.02it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

[2023-12-04 06:37:41.429520] Epoch: 2 training ends. Status: Average loss: 10.449114535264908, Average MLM accuracy: 0.5311676586378995, Average SC accuracy: 0.9838425214769301, Average S2S accuracy: 0.7134718510059975


100%|██████████| 70/70 [00:23<00:00,  2.92it/s]
  0%|          | 0/628 [00:00<?, ?it/s]

[2023-12-04 06:38:05.391087] Epoch: 2 Evalutation ends. Status: Average loss: 8.746621942520141, Average MLM accuracy: 0.6305517686733156, Average SC accuracy: 0.9882232462877624, Average S2S accuracy: 0.7251766260876863


100%|██████████| 628/628 [10:12<00:00,  1.02it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

[2023-12-04 06:48:18.306486] Epoch: 3 training ends. Status: Average loss: 8.938873216604732, Average MLM accuracy: 0.6128076660776838, Average SC accuracy: 0.9860613301473516, Average S2S accuracy: 0.719010705768823


100%|██████████| 70/70 [00:23<00:00,  2.96it/s]
  0%|          | 0/628 [00:00<?, ?it/s]

[2023-12-04 06:48:41.990679] Epoch: 3 Evalutation ends. Status: Average loss: 7.8436979498182025, Average MLM accuracy: 0.6762215684867618, Average SC accuracy: 0.9882232462877624, Average S2S accuracy: 0.725968624283399


100%|██████████| 628/628 [10:15<00:00,  1.02it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

[2023-12-04 06:58:57.153093] Epoch: 4 training ends. Status: Average loss: 8.238358928139801, Average MLM accuracy: 0.6536625437862302, Average SC accuracy: 0.9866302554474597, Average S2S accuracy: 0.722348533385647


100%|██████████| 70/70 [00:23<00:00,  2.92it/s]
  0%|          | 0/628 [00:00<?, ?it/s]

[2023-12-04 06:59:21.111819] Epoch: 4 Evalutation ends. Status: Average loss: 7.257947404044015, Average MLM accuracy: 0.7138731434311545, Average SC accuracy: 0.9866871479774706, Average S2S accuracy: 0.73154408178037


100%|██████████| 628/628 [10:16<00:00,  1.02it/s]
  0%|          | 0/70 [00:00<?, ?it/s]

[2023-12-04 07:09:37.794725] Epoch: 5 training ends. Status: Average loss: 7.825895150755621, Average MLM accuracy: 0.6808080690659971, Average SC accuracy: 0.9875974284576435, Average S2S accuracy: 0.7245144075993836


100%|██████████| 70/70 [00:23<00:00,  2.93it/s]
  2%|▏         | 2/84 [00:00<00:06, 13.37it/s]

[2023-12-04 07:10:01.687680] Epoch: 5 Evalutation ends. Status: Average loss: 6.973699930735997, Average MLM accuracy: 0.7353088900050226, Average SC accuracy: 0.982078853046595, Average S2S accuracy: 0.7356089731822067


100%|██████████| 84/84 [00:04<00:00, 20.27it/s]


[INFO] None is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202206171000 is saved
[INFO] finish 202206171000
[INFO] SimpleEncoderDecoder loaded for 202206171000.
[INFO] SimpleDecoderHead_S2S loaded for 202206171000.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202206171000.
[INFO] SimpleEncoderHead_MLM loaded for 202206171000.


100%|██████████| 66/66 [01:04<00:00,  1.02it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2023-12-04 07:11:35.838679] Epoch: 0 training ends. Status: Average loss: 59.6023976875074, Average MLM accuracy: 0.16470473781496253, Average SC accuracy: 0.3263850795392211, Average S2S accuracy: 0.26012727689686804


100%|██████████| 8/8 [00:02<00:00,  3.15it/s]
  0%|          | 0/66 [00:00<?, ?it/s]

[2023-12-04 07:11:38.380978] Epoch: 0 Evalutation ends. Status: Average loss: 40.02669334411621, Average MLM accuracy: 0.16782773907536416, Average SC accuracy: 0.5643564356435643, Average S2S accuracy: 0.4970347991496028


100%|██████████| 66/66 [01:04<00:00,  1.02it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2023-12-04 07:12:42.874850] Epoch: 1 training ends. Status: Average loss: 33.30804683222915, Average MLM accuracy: 0.17013884063207926, Average SC accuracy: 0.7158529895776193, Average S2S accuracy: 0.5068716419866335


100%|██████████| 8/8 [00:02<00:00,  3.11it/s]
  0%|          | 0/66 [00:00<?, ?it/s]

[2023-12-04 07:12:45.446562] Epoch: 1 Evalutation ends. Status: Average loss: 29.37020993232727, Average MLM accuracy: 0.16972767574414185, Average SC accuracy: 0.8366336633663366, Average S2S accuracy: 0.4994218790794823


100%|██████████| 66/66 [01:04<00:00,  1.02it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2023-12-04 07:13:50.056387] Epoch: 2 training ends. Status: Average loss: 27.24579464305531, Average MLM accuracy: 0.17073645296235043, Average SC accuracy: 0.9149753154141524, Average S2S accuracy: 0.5596129275324335


100%|██████████| 8/8 [00:02<00:00,  3.15it/s]
  0%|          | 0/66 [00:00<?, ?it/s]

[2023-12-04 07:13:52.597400] Epoch: 2 Evalutation ends. Status: Average loss: 25.235288381576538, Average MLM accuracy: 0.1669411019632679, Average SC accuracy: 0.9752475247524752, Average S2S accuracy: 0.601059266718884


100%|██████████| 66/66 [01:04<00:00,  1.02it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2023-12-04 07:14:57.058759] Epoch: 3 training ends. Status: Average loss: 24.069485288677793, Average MLM accuracy: 0.17369671869136796, Average SC accuracy: 0.9775095995611629, Average S2S accuracy: 0.6278502162232997


100%|██████████| 8/8 [00:02<00:00,  3.08it/s]
  0%|          | 0/66 [00:00<?, ?it/s]

[2023-12-04 07:14:59.656623] Epoch: 3 Evalutation ends. Status: Average loss: 22.667009353637695, Average MLM accuracy: 0.16922102596580113, Average SC accuracy: 0.9900990099009901, Average S2S accuracy: 0.6512625414941628


100%|██████████| 66/66 [01:04<00:00,  1.03it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2023-12-04 07:16:03.986083] Epoch: 4 training ends. Status: Average loss: 21.9983447970766, Average MLM accuracy: 0.17254318791433296, Average SC accuracy: 0.9901261656609983, Average S2S accuracy: 0.6608529353950989


100%|██████████| 8/8 [00:02<00:00,  3.04it/s]
  0%|          | 0/66 [00:00<?, ?it/s]

[2023-12-04 07:16:06.616489] Epoch: 4 Evalutation ends. Status: Average loss: 20.794263124465942, Average MLM accuracy: 0.1694743508549715, Average SC accuracy: 0.9851485148514851, Average S2S accuracy: 0.6761030920144717


100%|██████████| 66/66 [01:04<00:00,  1.03it/s]
  0%|          | 0/8 [00:00<?, ?it/s]

[2023-12-04 07:17:10.742905] Epoch: 5 training ends. Status: Average loss: 20.05579844388095, Average MLM accuracy: 0.17768543354689867, Average SC accuracy: 0.9928688974218322, Average S2S accuracy: 0.6802393198794391


100%|██████████| 8/8 [00:02<00:00,  3.02it/s]
 11%|█         | 3/28 [00:00<00:01, 18.99it/s]

[2023-12-04 07:17:13.390028] Epoch: 5 Evalutation ends. Status: Average loss: 19.042710065841675, Average MLM accuracy: 0.18720709309689676, Average SC accuracy: 0.9900990099009901, Average S2S accuracy: 0.692066689045541


100%|██████████| 28/28 [00:01<00:00, 26.78it/s]


[INFO] 202206171000 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202207021500 is saved
[INFO] finish 202207021500
[INFO] SimpleEncoderDecoder loaded for 202207021500.
[INFO] SimpleDecoderHead_S2S loaded for 202207021500.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207021500.
[INFO] SimpleEncoderHead_MLM loaded for 202207021500.


100%|██████████| 39/39 [00:39<00:00,  1.01s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:18:21.838091] Epoch: 0 training ends. Status: Average loss: 66.78886951544347, Average MLM accuracy: 0.1652065810687438, Average SC accuracy: 0.5185185185185185, Average S2S accuracy: 0.2057820199166121


100%|██████████| 5/5 [00:01<00:00,  3.08it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:18:23.463014] Epoch: 0 Evalutation ends. Status: Average loss: 52.458499908447266, Average MLM accuracy: 0.17586694975230008, Average SC accuracy: 0.8, Average S2S accuracy: 0.18496626039650574


100%|██████████| 39/39 [00:38<00:00,  1.01it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:19:02.014596] Epoch: 1 training ends. Status: Average loss: 43.205277858636315, Average MLM accuracy: 0.17606495600318564, Average SC accuracy: 0.7583333333333333, Average S2S accuracy: 0.36878467316770613


100%|██████████| 5/5 [00:01<00:00,  3.10it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:19:03.627831] Epoch: 1 Evalutation ends. Status: Average loss: 35.38463668823242, Average MLM accuracy: 0.17144373673036092, Average SC accuracy: 0.75, Average S2S accuracy: 0.5283778835591358


100%|██████████| 39/39 [00:38<00:00,  1.01it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:19:42.193675] Epoch: 2 training ends. Status: Average loss: 31.94781097998986, Average MLM accuracy: 0.17719158524504186, Average SC accuracy: 0.9712962962962963, Average S2S accuracy: 0.5270838358430102


100%|██████████| 5/5 [00:01<00:00,  3.15it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:19:43.782467] Epoch: 2 Evalutation ends. Status: Average loss: 29.102899932861327, Average MLM accuracy: 0.17781316348195328, Average SC accuracy: 1.0, Average S2S accuracy: 0.5283778835591358


100%|██████████| 39/39 [00:38<00:00,  1.01it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:20:22.322076] Epoch: 3 training ends. Status: Average loss: 28.118008393507736, Average MLM accuracy: 0.17592898350847885, Average SC accuracy: 0.9981481481481481, Average S2S accuracy: 0.5298691751949738


100%|██████████| 5/5 [00:01<00:00,  3.07it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:20:23.952240] Epoch: 3 Evalutation ends. Status: Average loss: 26.552506256103516, Average MLM accuracy: 0.17179759377211606, Average SC accuracy: 1.0, Average S2S accuracy: 0.54836009834179


100%|██████████| 39/39 [00:38<00:00,  1.00it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:21:02.949474] Epoch: 4 training ends. Status: Average loss: 25.78124877734062, Average MLM accuracy: 0.17649229812940698, Average SC accuracy: 1.0, Average S2S accuracy: 0.5822163261086799


100%|██████████| 5/5 [00:01<00:00,  3.15it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:21:04.536321] Epoch: 4 Evalutation ends. Status: Average loss: 24.560700607299804, Average MLM accuracy: 0.1735668789808917, Average SC accuracy: 1.0, Average S2S accuracy: 0.613328451116807


100%|██████████| 39/39 [00:38<00:00,  1.02it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:21:42.831584] Epoch: 5 training ends. Status: Average loss: 23.844105744973206, Average MLM accuracy: 0.17857073483421068, Average SC accuracy: 1.0, Average S2S accuracy: 0.6207573825850247


100%|██████████| 5/5 [00:01<00:00,  3.29it/s]
 12%|█▏        | 3/25 [00:00<00:00, 29.28it/s]

[2023-12-04 07:21:44.351515] Epoch: 5 Evalutation ends. Status: Average loss: 22.487924575805664, Average MLM accuracy: 0.17604387827317763, Average SC accuracy: 1.0, Average S2S accuracy: 0.6423078934979338


100%|██████████| 25/25 [00:01<00:00, 24.97it/s]


[INFO] 202207021500 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202207041600 is saved
[INFO] finish 202207041600
[INFO] SimpleEncoderDecoder loaded for 202207041600.
[INFO] SimpleDecoderHead_S2S loaded for 202207041600.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207041600.
[INFO] SimpleEncoderHead_MLM loaded for 202207041600.


100%|██████████| 24/24 [00:23<00:00,  1.04it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:22:33.690855] Epoch: 0 training ends. Status: Average loss: 71.41076978047688, Average MLM accuracy: 0.1658314511575381, Average SC accuracy: 0.3148148148148148, Average S2S accuracy: 0.228483403936426


100%|██████████| 3/3 [00:00<00:00,  3.11it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2023-12-04 07:22:34.656024] Epoch: 0 Evalutation ends. Status: Average loss: 61.87786356608073, Average MLM accuracy: 0.1792841305036427, Average SC accuracy: 0.3888888888888889, Average S2S accuracy: 0.28400374181478016


100%|██████████| 24/24 [00:22<00:00,  1.05it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:22:57.455600] Epoch: 1 training ends. Status: Average loss: 54.297985553741455, Average MLM accuracy: 0.17550112930547712, Average SC accuracy: 0.5015432098765432, Average S2S accuracy: 0.2978756622292536


100%|██████████| 3/3 [00:00<00:00,  3.31it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2023-12-04 07:22:58.363674] Epoch: 1 Evalutation ends. Status: Average loss: 46.72112274169922, Average MLM accuracy: 0.164079822616408, Average SC accuracy: 0.3888888888888889, Average S2S accuracy: 0.3453695042095416


100%|██████████| 24/24 [00:23<00:00,  1.04it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:23:21.456938] Epoch: 2 training ends. Status: Average loss: 41.117675145467125, Average MLM accuracy: 0.17832439299830605, Average SC accuracy: 0.5462962962962963, Average S2S accuracy: 0.487921146580347


100%|██████████| 3/3 [00:00<00:00,  3.23it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2023-12-04 07:23:22.386781] Epoch: 2 Evalutation ends. Status: Average loss: 36.37058766682943, Average MLM accuracy: 0.16376306620209058, Average SC accuracy: 0.4583333333333333, Average S2S accuracy: 0.5180542563143125


100%|██████████| 24/24 [00:22<00:00,  1.05it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:23:45.288946] Epoch: 3 training ends. Status: Average loss: 33.37723231315613, Average MLM accuracy: 0.17761857707509882, Average SC accuracy: 0.7484567901234568, Average S2S accuracy: 0.5265672325322398


100%|██████████| 3/3 [00:00<00:00,  3.29it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2023-12-04 07:23:46.201991] Epoch: 3 Evalutation ends. Status: Average loss: 31.255734125773113, Average MLM accuracy: 0.17770034843205576, Average SC accuracy: 0.875, Average S2S accuracy: 0.5180542563143125


100%|██████████| 24/24 [00:22<00:00,  1.05it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:24:08.964649] Epoch: 4 training ends. Status: Average loss: 29.781593322753906, Average MLM accuracy: 0.17401891586674195, Average SC accuracy: 0.9290123456790124, Average S2S accuracy: 0.5265984574872238


100%|██████████| 3/3 [00:00<00:00,  3.29it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

[2023-12-04 07:24:09.878967] Epoch: 4 Evalutation ends. Status: Average loss: 28.835294087727863, Average MLM accuracy: 0.17421602787456447, Average SC accuracy: 0.9722222222222222, Average S2S accuracy: 0.5180542563143125


100%|██████████| 24/24 [00:22<00:00,  1.05it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:24:32.653132] Epoch: 5 training ends. Status: Average loss: 27.82017993927002, Average MLM accuracy: 0.17588932806324112, Average SC accuracy: 0.9953703703703703, Average S2S accuracy: 0.5265464158955837


100%|██████████| 3/3 [00:00<00:00,  3.29it/s]
 10%|█         | 4/39 [00:00<00:01, 32.09it/s]

[2023-12-04 07:24:33.566048] Epoch: 5 Evalutation ends. Status: Average loss: 27.10377375284831, Average MLM accuracy: 0.16693063034526448, Average SC accuracy: 1.0, Average S2S accuracy: 0.5221702525724976


100%|██████████| 39/39 [00:01<00:00, 25.75it/s]


[INFO] 202207041600 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202207151000 is saved
[INFO] finish 202207151000
[INFO] SimpleEncoderDecoder loaded for 202207151000.
[INFO] SimpleDecoderHead_S2S loaded for 202207151000.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207151000.
[INFO] SimpleEncoderHead_MLM loaded for 202207151000.


100%|██████████| 51/51 [00:50<00:00,  1.02it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2023-12-04 07:26:18.390864] Epoch: 0 training ends. Status: Average loss: 61.68713737936581, Average MLM accuracy: 0.17266115866026815, Average SC accuracy: 0.5366713681241185, Average S2S accuracy: 0.3117329740299538


100%|██████████| 6/6 [00:02<00:00,  2.92it/s]
  0%|          | 0/51 [00:00<?, ?it/s]

[2023-12-04 07:26:20.445335] Epoch: 0 Evalutation ends. Status: Average loss: 43.39048703511556, Average MLM accuracy: 0.17208734157886701, Average SC accuracy: 0.7707006369426752, Average S2S accuracy: 0.5257806172950599


100%|██████████| 51/51 [00:50<00:00,  1.02it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2023-12-04 07:27:10.633714] Epoch: 1 training ends. Status: Average loss: 34.74462808347216, Average MLM accuracy: 0.18044492818153335, Average SC accuracy: 0.9245416078984485, Average S2S accuracy: 0.5272906806496647


100%|██████████| 6/6 [00:02<00:00,  2.92it/s]
  0%|          | 0/51 [00:00<?, ?it/s]

[2023-12-04 07:27:12.686941] Epoch: 1 Evalutation ends. Status: Average loss: 29.107173919677734, Average MLM accuracy: 0.1809436555199267, Average SC accuracy: 1.0, Average S2S accuracy: 0.5272203725366688


100%|██████████| 51/51 [00:50<00:00,  1.01it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2023-12-04 07:28:03.296279] Epoch: 2 training ends. Status: Average loss: 27.364798265344955, Average MLM accuracy: 0.17628918682695954, Average SC accuracy: 0.9964739069111425, Average S2S accuracy: 0.534445941541859


100%|██████████| 6/6 [00:02<00:00,  2.99it/s]
  0%|          | 0/51 [00:00<?, ?it/s]

[2023-12-04 07:28:05.307246] Epoch: 2 Evalutation ends. Status: Average loss: 25.138713518778484, Average MLM accuracy: 0.18048557031607879, Average SC accuracy: 1.0, Average S2S accuracy: 0.5753621884279673


100%|██████████| 51/51 [00:50<00:00,  1.02it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2023-12-04 07:28:55.375761] Epoch: 3 training ends. Status: Average loss: 24.01051367965399, Average MLM accuracy: 0.1793400286944046, Average SC accuracy: 0.9992947813822285, Average S2S accuracy: 0.6068838376714027


100%|██████████| 6/6 [00:02<00:00,  2.92it/s]
  0%|          | 0/51 [00:00<?, ?it/s]

[2023-12-04 07:28:57.429852] Epoch: 3 Evalutation ends. Status: Average loss: 22.30602200826009, Average MLM accuracy: 0.1803328752481295, Average SC accuracy: 1.0, Average S2S accuracy: 0.626608476558985


100%|██████████| 51/51 [00:50<00:00,  1.01it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2023-12-04 07:29:47.750545] Epoch: 4 training ends. Status: Average loss: 21.2986167084937, Average MLM accuracy: 0.18265472715579084, Average SC accuracy: 1.0, Average S2S accuracy: 0.6534295150863642


100%|██████████| 6/6 [00:02<00:00,  2.90it/s]
  0%|          | 0/51 [00:00<?, ?it/s]

[2023-12-04 07:29:49.823285] Epoch: 4 Evalutation ends. Status: Average loss: 19.534364382425945, Average MLM accuracy: 0.18384486181096352, Average SC accuracy: 1.0, Average S2S accuracy: 0.676460001799694


100%|██████████| 51/51 [00:50<00:00,  1.01it/s]
  0%|          | 0/6 [00:00<?, ?it/s]

[2023-12-04 07:30:40.188512] Epoch: 5 training ends. Status: Average loss: 18.99428311516257, Average MLM accuracy: 0.18461716057322844, Average SC accuracy: 0.9894217207334274, Average S2S accuracy: 0.6817781625912649


100%|██████████| 6/6 [00:02<00:00,  2.88it/s]
  7%|▋         | 2/28 [00:00<00:01, 19.42it/s]

[2023-12-04 07:30:42.276758] Epoch: 5 Evalutation ends. Status: Average loss: 17.530580520629883, Average MLM accuracy: 0.19422812643151627, Average SC accuracy: 1.0, Average S2S accuracy: 0.6787996040673085


100%|██████████| 28/28 [00:01<00:00, 19.17it/s]


[INFO] 202207151000 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202207221500 is saved
[INFO] finish 202207221500
[INFO] SimpleEncoderDecoder loaded for 202207221500.
[INFO] SimpleDecoderHead_S2S loaded for 202207221500.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207221500.
[INFO] SimpleEncoderHead_MLM loaded for 202207221500.


100%|██████████| 39/39 [00:38<00:00,  1.00it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:32:12.357249] Epoch: 0 training ends. Status: Average loss: 64.67467400966547, Average MLM accuracy: 0.17309891643567427, Average SC accuracy: 0.5425925925925926, Average S2S accuracy: 0.31271101430168236


100%|██████████| 5/5 [00:01<00:00,  3.21it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:32:13.915415] Epoch: 0 Evalutation ends. Status: Average loss: 49.82836761474609, Average MLM accuracy: 0.18061113132048018, Average SC accuracy: 0.6916666666666667, Average S2S accuracy: 0.39149828030954426


100%|██████████| 39/39 [00:38<00:00,  1.01it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:32:52.543902] Epoch: 1 training ends. Status: Average loss: 40.66525601118039, Average MLM accuracy: 0.17967008470798038, Average SC accuracy: 0.7675925925925926, Average S2S accuracy: 0.471365108480697


100%|██████████| 5/5 [00:01<00:00,  3.14it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:32:54.138479] Epoch: 1 Evalutation ends. Status: Average loss: 33.2971794128418, Average MLM accuracy: 0.1811567842851946, Average SC accuracy: 0.8916666666666667, Average S2S accuracy: 0.5303095442820293


100%|██████████| 39/39 [00:38<00:00,  1.01it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:33:32.804426] Epoch: 2 training ends. Status: Average loss: 30.424632047995544, Average MLM accuracy: 0.1782356703948516, Average SC accuracy: 0.9768518518518519, Average S2S accuracy: 0.5277234817001519


100%|██████████| 5/5 [00:01<00:00,  3.14it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:33:34.400236] Epoch: 2 Evalutation ends. Status: Average loss: 28.228779602050782, Average MLM accuracy: 0.18006547835576572, Average SC accuracy: 1.0, Average S2S accuracy: 0.5303095442820293


100%|██████████| 39/39 [00:38<00:00,  1.01it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:34:12.971512] Epoch: 3 training ends. Status: Average loss: 27.20786251165928, Average MLM accuracy: 0.17658803233247397, Average SC accuracy: 1.0, Average S2S accuracy: 0.5403571121492649


100%|██████████| 5/5 [00:01<00:00,  3.20it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:34:14.534085] Epoch: 3 Evalutation ends. Status: Average loss: 25.815877151489257, Average MLM accuracy: 0.1867951982539105, Average SC accuracy: 1.0, Average S2S accuracy: 0.5817927773000859


100%|██████████| 39/39 [00:38<00:00,  1.02it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:34:52.903381] Epoch: 4 training ends. Status: Average loss: 24.9584290431096, Average MLM accuracy: 0.1738355075694431, Average SC accuracy: 1.0, Average S2S accuracy: 0.6025164082428134


100%|██████████| 5/5 [00:01<00:00,  3.12it/s]
  0%|          | 0/39 [00:00<?, ?it/s]

[2023-12-04 07:34:54.507823] Epoch: 4 Evalutation ends. Status: Average loss: 23.571857452392578, Average MLM accuracy: 0.17624590760276465, Average SC accuracy: 1.0, Average S2S accuracy: 0.6157029234737748


100%|██████████| 39/39 [00:38<00:00,  1.01it/s]
  0%|          | 0/5 [00:00<?, ?it/s]

[2023-12-04 07:35:33.278610] Epoch: 5 training ends. Status: Average loss: 23.00507501455454, Average MLM accuracy: 0.17656864835526953, Average SC accuracy: 1.0, Average S2S accuracy: 0.633447020721676


100%|██████████| 5/5 [00:01<00:00,  3.23it/s]
100%|██████████| 3/3 [00:00<00:00, 31.17it/s]


[2023-12-04 07:35:34.826207] Epoch: 5 Evalutation ends. Status: Average loss: 21.712709808349608, Average MLM accuracy: 0.1860676609676246, Average SC accuracy: 1.0, Average S2S accuracy: 0.6509565778159931
[INFO] 202207221500 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202207260728 is saved
[INFO] finish 202207260728
[INFO] SimpleEncoderDecoder loaded for 202207260728.
[INFO] SimpleDecoderHead_S2S loaded for 202207260728.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202207260728.
[INFO] SimpleEncoderHead_MLM loaded for 202207260728.


100%|██████████| 2/2 [00:01<00:00,  1.37it/s]
100%|██████████| 1/1 [00:00<00:00, 15.62it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:36:02.593157] Epoch: 0 training ends. Status: Average loss: 81.87540435791016, Average MLM accuracy: 0.0007930214115781126, Average SC accuracy: 0.34146341463414637, Average S2S accuracy: 0.059670304156025075
[2023-12-04 07:36:02.658895] Epoch: 0 Evalutation ends. Status: Average loss: 77.72419738769531, Average MLM accuracy: 0.20430107526881722, Average SC accuracy: 1.0, Average S2S accuracy: 0.23510971786833856


100%|██████████| 2/2 [00:01<00:00,  1.39it/s]
100%|██████████| 1/1 [00:00<00:00, 13.84it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:36:04.100982] Epoch: 1 training ends. Status: Average loss: 76.69381713867188, Average MLM accuracy: 0.17049960348929422, Average SC accuracy: 1.0, Average S2S accuracy: 0.2563269096819132
[2023-12-04 07:36:04.175032] Epoch: 1 Evalutation ends. Status: Average loss: 75.46866607666016, Average MLM accuracy: 0.15053763440860216, Average SC accuracy: 1.0, Average S2S accuracy: 0.26959247648902823


100%|██████████| 2/2 [00:01<00:00,  1.36it/s]
100%|██████████| 1/1 [00:00<00:00, 13.46it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:36:05.651382] Epoch: 2 training ends. Status: Average loss: 74.47569274902344, Average MLM accuracy: 0.1641554321966693, Average SC accuracy: 1.0, Average S2S accuracy: 0.2749013234269793
[2023-12-04 07:36:05.727314] Epoch: 2 Evalutation ends. Status: Average loss: 74.06683349609375, Average MLM accuracy: 0.10752688172043011, Average SC accuracy: 1.0, Average S2S accuracy: 0.2445141065830721


100%|██████████| 2/2 [00:01<00:00,  1.36it/s]
100%|██████████| 1/1 [00:00<00:00, 13.90it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:36:07.200109] Epoch: 3 training ends. Status: Average loss: 72.86184692382812, Average MLM accuracy: 0.16653449643140364, Average SC accuracy: 1.0, Average S2S accuracy: 0.276062224286046
[2023-12-04 07:36:07.273800] Epoch: 3 Evalutation ends. Status: Average loss: 72.54127502441406, Average MLM accuracy: 0.13978494623655913, Average SC accuracy: 1.0, Average S2S accuracy: 0.23510971786833856


100%|██████████| 2/2 [00:01<00:00,  1.36it/s]
100%|██████████| 1/1 [00:00<00:00, 14.53it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:36:08.743701] Epoch: 4 training ends. Status: Average loss: 71.44586181640625, Average MLM accuracy: 0.16098334655035687, Average SC accuracy: 1.0, Average S2S accuracy: 0.2742047829115394
[2023-12-04 07:36:08.814291] Epoch: 4 Evalutation ends. Status: Average loss: 71.83132934570312, Average MLM accuracy: 0.12903225806451613, Average SC accuracy: 1.0, Average S2S accuracy: 0.2445141065830721


100%|██████████| 2/2 [00:01<00:00,  1.38it/s]
100%|██████████| 1/1 [00:00<00:00, 14.88it/s]
 10%|█         | 2/20 [00:00<00:01, 17.35it/s]

[2023-12-04 07:36:10.260581] Epoch: 5 training ends. Status: Average loss: 70.0330696105957, Average MLM accuracy: 0.16732751784298175, Average SC accuracy: 1.0, Average S2S accuracy: 0.2816345484095658
[2023-12-04 07:36:10.329670] Epoch: 5 Evalutation ends. Status: Average loss: 70.12541198730469, Average MLM accuracy: 0.10752688172043011, Average SC accuracy: 1.0, Average S2S accuracy: 0.26332288401253917


100%|██████████| 20/20 [00:01<00:00, 13.15it/s]


[INFO] 202207260728 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202208031141 is saved
[INFO] finish 202208031141
[INFO] SimpleEncoderDecoder loaded for 202208031141.
[INFO] SimpleDecoderHead_S2S loaded for 202208031141.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202208031141.
[INFO] SimpleEncoderHead_MLM loaded for 202208031141.


100%|██████████| 27/27 [00:26<00:00,  1.00it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:37:00.644949] Epoch: 0 training ends. Status: Average loss: 68.90669674343533, Average MLM accuracy: 0.16189125522811404, Average SC accuracy: 0.7517146776406035, Average S2S accuracy: 0.21584031366957762


100%|██████████| 3/3 [00:01<00:00,  2.88it/s]
  0%|          | 0/27 [00:00<?, ?it/s]

[2023-12-04 07:37:01.689276] Epoch: 0 Evalutation ends. Status: Average loss: 57.503098805745445, Average MLM accuracy: 0.17303128371089535, Average SC accuracy: 0.9259259259259259, Average S2S accuracy: 0.26476422035958797


100%|██████████| 27/27 [00:26<00:00,  1.02it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:37:28.223709] Epoch: 1 training ends. Status: Average loss: 48.737848635073064, Average MLM accuracy: 0.17395798278928898, Average SC accuracy: 0.9917695473251029, Average S2S accuracy: 0.35949385136339335


100%|██████████| 3/3 [00:01<00:00,  2.76it/s]
  0%|          | 0/27 [00:00<?, ?it/s]

[2023-12-04 07:37:29.312599] Epoch: 1 Evalutation ends. Status: Average loss: 39.725056966145836, Average MLM accuracy: 0.17281553398058253, Average SC accuracy: 0.9876543209876543, Average S2S accuracy: 0.5131486339497089


100%|██████████| 27/27 [00:26<00:00,  1.03it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:37:55.587351] Epoch: 2 training ends. Status: Average loss: 34.898294024997284, Average MLM accuracy: 0.17290034132974377, Average SC accuracy: 1.0, Average S2S accuracy: 0.5091855284263055


100%|██████████| 3/3 [00:01<00:00,  2.88it/s]
  0%|          | 0/27 [00:00<?, ?it/s]

[2023-12-04 07:37:56.630147] Epoch: 2 Evalutation ends. Status: Average loss: 30.54215431213379, Average MLM accuracy: 0.1745415318230852, Average SC accuracy: 0.9876543209876543, Average S2S accuracy: 0.5210186192334763


100%|██████████| 27/27 [00:26<00:00,  1.03it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:38:22.762936] Epoch: 3 training ends. Status: Average loss: 29.068974177042644, Average MLM accuracy: 0.17496754963703667, Average SC accuracy: 1.0, Average S2S accuracy: 0.5163856710033862


100%|██████████| 3/3 [00:01<00:00,  2.75it/s]
  0%|          | 0/27 [00:00<?, ?it/s]

[2023-12-04 07:38:23.856285] Epoch: 3 Evalutation ends. Status: Average loss: 27.359283447265625, Average MLM accuracy: 0.17411003236245956, Average SC accuracy: 0.9876543209876543, Average S2S accuracy: 0.5210186192334763


100%|██████████| 27/27 [00:26<00:00,  1.03it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:38:50.183184] Epoch: 4 training ends. Status: Average loss: 26.5838948002568, Average MLM accuracy: 0.17306860247103506, Average SC accuracy: 1.0, Average S2S accuracy: 0.5163714132953128


100%|██████████| 3/3 [00:01<00:00,  2.71it/s]
  0%|          | 0/27 [00:00<?, ?it/s]

[2023-12-04 07:38:51.291152] Epoch: 4 Evalutation ends. Status: Average loss: 25.231102625528973, Average MLM accuracy: 0.17432578209277239, Average SC accuracy: 0.9876543209876543, Average S2S accuracy: 0.5244737347239107


100%|██████████| 27/27 [00:26<00:00,  1.03it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

[2023-12-04 07:39:17.582930] Epoch: 5 training ends. Status: Average loss: 24.558164455272532, Average MLM accuracy: 0.17323686361232632, Average SC accuracy: 1.0, Average S2S accuracy: 0.560698627695598


100%|██████████| 3/3 [00:01<00:00,  2.89it/s]
 10%|█         | 3/29 [00:00<00:00, 28.88it/s]

[2023-12-04 07:39:18.622996] Epoch: 5 Evalutation ends. Status: Average loss: 23.31017303466797, Average MLM accuracy: 0.17411003236245956, Average SC accuracy: 0.9876543209876543, Average S2S accuracy: 0.6326700364706636


100%|██████████| 29/29 [00:01<00:00, 22.16it/s]


[INFO] 202208031141 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202208172100 is saved
[INFO] finish 202208172100
[INFO] SimpleEncoderDecoder loaded for 202208172100.
[INFO] SimpleDecoderHead_S2S loaded for 202208172100.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202208172100.
[INFO] SimpleEncoderHead_MLM loaded for 202208172100.


100%|██████████| 35/35 [00:34<00:00,  1.00it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2023-12-04 07:40:20.566442] Epoch: 0 training ends. Status: Average loss: 66.36116583687918, Average MLM accuracy: 0.1692752835171664, Average SC accuracy: 0.31275720164609055, Average S2S accuracy: 0.27443818770598827


100%|██████████| 4/4 [00:01<00:00,  2.78it/s]
  0%|          | 0/35 [00:00<?, ?it/s]

[2023-12-04 07:40:22.005923] Epoch: 0 Evalutation ends. Status: Average loss: 52.66818618774414, Average MLM accuracy: 0.1806730250134965, Average SC accuracy: 0.4351851851851852, Average S2S accuracy: 0.34251171708564127


100%|██████████| 35/35 [00:34<00:00,  1.01it/s]
  0%|          | 0/4 [00:00<?, ?it/s]

[2023-12-04 07:40:56.810223] Epoch: 1 training ends. Status: Average loss: 43.40543071201869, Average MLM accuracy: 0.16993552897312414, Average SC accuracy: 0.4063786008230453, Average S2S accuracy: 0.4188672299648562


100%|██████████| 4/4 [00:01<00:00,  2.81it/s]
  0%|          | 0/35 [00:00<?, ?it/s]

[2023-12-04 07:40:58.234575] Epoch: 1 Evalutation ends. Status: Average loss: 35.62223720550537, Average MLM accuracy: 0.16627676804030952, Average SC accuracy: 0.4722222222222222, Average S2S accuracy: 0.5128887942053686


100%|██████████| 35/35 [00:35<00:00,  1.00s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

[2023-12-04 07:41:33.336368] Epoch: 2 training ends. Status: Average loss: 32.42517340523856, Average MLM accuracy: 0.1747514370048159, Average SC accuracy: 0.6018518518518519, Average S2S accuracy: 0.5176783218392127


100%|██████████| 4/4 [00:01<00:00,  2.87it/s]
  0%|          | 0/35 [00:00<?, ?it/s]

[2023-12-04 07:41:34.730665] Epoch: 2 Evalutation ends. Status: Average loss: 30.20267343521118, Average MLM accuracy: 0.16609681482814467, Average SC accuracy: 0.7592592592592593, Average S2S accuracy: 0.513048572645931


100%|██████████| 35/35 [00:35<00:00,  1.00s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

[2023-12-04 07:42:09.821457] Epoch: 3 training ends. Status: Average loss: 28.477603149414062, Average MLM accuracy: 0.17329501320490912, Average SC accuracy: 0.9053497942386831, Average S2S accuracy: 0.5190357592734258


100%|██████████| 4/4 [00:01<00:00,  2.76it/s]
  0%|          | 0/35 [00:00<?, ?it/s]

[2023-12-04 07:42:11.271084] Epoch: 3 Evalutation ends. Status: Average loss: 27.064322471618652, Average MLM accuracy: 0.181212884649991, Average SC accuracy: 1.0, Average S2S accuracy: 0.5430336599914785


100%|██████████| 35/35 [00:35<00:00,  1.00s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

[2023-12-04 07:42:46.324353] Epoch: 4 training ends. Status: Average loss: 25.991373552594865, Average MLM accuracy: 0.1752174926207861, Average SC accuracy: 0.992798353909465, Average S2S accuracy: 0.5744721236418436


100%|██████████| 4/4 [00:01<00:00,  2.74it/s]
  0%|          | 0/35 [00:00<?, ?it/s]

[2023-12-04 07:42:47.788207] Epoch: 4 Evalutation ends. Status: Average loss: 24.924739837646484, Average MLM accuracy: 0.17167536440525463, Average SC accuracy: 1.0, Average S2S accuracy: 0.6136557307200682


100%|██████████| 35/35 [00:35<00:00,  1.01s/it]
  0%|          | 0/4 [00:00<?, ?it/s]

[2023-12-04 07:43:23.044224] Epoch: 5 training ends. Status: Average loss: 24.083067866734098, Average MLM accuracy: 0.17411061053285692, Average SC accuracy: 1.0, Average S2S accuracy: 0.6220974709100008


100%|██████████| 4/4 [00:01<00:00,  2.73it/s]
 67%|██████▋   | 2/3 [00:00<00:00, 14.05it/s]

[2023-12-04 07:43:24.508636] Epoch: 5 Evalutation ends. Status: Average loss: 23.294678688049316, Average MLM accuracy: 0.17617419470937556, Average SC accuracy: 1.0, Average S2S accuracy: 0.6349595227950575


100%|██████████| 3/3 [00:00<00:00, 12.75it/s]


[INFO] 202208172100 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202208190706 is saved
[INFO] finish 202208190706
[INFO] SimpleEncoderDecoder loaded for 202208190706.
[INFO] SimpleDecoderHead_S2S loaded for 202208190706.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202208190706.
[INFO] SimpleEncoderHead_MLM loaded for 202208190706.


100%|██████████| 2/2 [00:01<00:00,  1.32it/s]
100%|██████████| 1/1 [00:00<00:00, 14.37it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:43:49.181613] Epoch: 0 training ends. Status: Average loss: 82.63142395019531, Average MLM accuracy: 0.0014619883040935672, Average SC accuracy: 0.3170731707317073, Average S2S accuracy: 0.043246360098025086
[2023-12-04 07:43:49.253050] Epoch: 0 Evalutation ends. Status: Average loss: 77.33135986328125, Average MLM accuracy: 0.16587677725118483, Average SC accuracy: 1.0, Average S2S accuracy: 0.1701534170153417


100%|██████████| 2/2 [00:01<00:00,  1.35it/s]
100%|██████████| 1/1 [00:00<00:00, 11.77it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:43:50.732599] Epoch: 1 training ends. Status: Average loss: 77.38998413085938, Average MLM accuracy: 0.14571150097465888, Average SC accuracy: 1.0, Average S2S accuracy: 0.17269713132478015
[2023-12-04 07:43:50.819194] Epoch: 1 Evalutation ends. Status: Average loss: 74.71690368652344, Average MLM accuracy: 0.18009478672985782, Average SC accuracy: 1.0, Average S2S accuracy: 0.2203626220362622


100%|██████████| 2/2 [00:01<00:00,  1.32it/s]
100%|██████████| 1/1 [00:00<00:00, 12.08it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:43:52.336671] Epoch: 2 training ends. Status: Average loss: 75.08311080932617, Average MLM accuracy: 0.15838206627680312, Average SC accuracy: 1.0, Average S2S accuracy: 0.2029695833933977
[2023-12-04 07:43:52.421154] Epoch: 2 Evalutation ends. Status: Average loss: 72.84557342529297, Average MLM accuracy: 0.1895734597156398, Average SC accuracy: 1.0, Average S2S accuracy: 0.2105997210599721


100%|██████████| 2/2 [00:01<00:00,  1.32it/s]
100%|██████████| 1/1 [00:00<00:00, 15.22it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:43:53.935908] Epoch: 3 training ends. Status: Average loss: 73.35622787475586, Average MLM accuracy: 0.15838206627680312, Average SC accuracy: 1.0, Average S2S accuracy: 0.20700591033588006
[2023-12-04 07:43:54.003213] Epoch: 3 Evalutation ends. Status: Average loss: 71.2105712890625, Average MLM accuracy: 0.17061611374407584, Average SC accuracy: 1.0, Average S2S accuracy: 0.19665271966527198


100%|██████████| 2/2 [00:01<00:00,  1.36it/s]
100%|██████████| 1/1 [00:00<00:00, 15.56it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

[2023-12-04 07:43:55.480264] Epoch: 4 training ends. Status: Average loss: 71.90263748168945, Average MLM accuracy: 0.15935672514619884, Average SC accuracy: 1.0, Average S2S accuracy: 0.20988900100908173
[2023-12-04 07:43:55.546202] Epoch: 4 Evalutation ends. Status: Average loss: 69.94770812988281, Average MLM accuracy: 0.1895734597156398, Average SC accuracy: 1.0, Average S2S accuracy: 0.1701534170153417


100%|██████████| 2/2 [00:01<00:00,  1.33it/s]
100%|██████████| 1/1 [00:00<00:00, 11.65it/s]
100%|██████████| 2/2 [00:00<00:00, 19.83it/s]


[2023-12-04 07:43:57.050653] Epoch: 5 training ends. Status: Average loss: 70.44095993041992, Average MLM accuracy: 0.15253411306042886, Average SC accuracy: 1.0, Average S2S accuracy: 0.1910047570996108
[2023-12-04 07:43:57.138090] Epoch: 5 Evalutation ends. Status: Average loss: 68.4708480834961, Average MLM accuracy: 0.1943127962085308, Average SC accuracy: 1.0, Average S2S accuracy: 0.1701534170153417
[INFO] 202208190706 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202208240500 is saved
[INFO] finish 202208240500
[INFO] SimpleEncoderDecoder loaded for 202208240500.
[INFO] SimpleDecoderHead_S2S loaded for 202208240500.
[INFO] SimpleEncoderHead_AveragePooling_SC loaded for 202208240500.
[INFO] SimpleEncoderHead_MLM loaded for 202208240500.


100%|██████████| 1/1 [00:00<00:00,  1.07it/s]
100%|██████████| 1/1 [00:00<00:00, 21.01it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2023-12-04 07:44:24.351899] Epoch: 0 training ends. Status: Average loss: 86.33868408203125, Average MLM accuracy: 0.0, Average SC accuracy: 0.0, Average S2S accuracy: 0.0
[2023-12-04 07:44:24.401704] Epoch: 0 Evalutation ends. Status: Average loss: 79.4050521850586, Average MLM accuracy: 0.0, Average SC accuracy: 1.0, Average S2S accuracy: 0.27514792899408286


100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
100%|██████████| 1/1 [00:00<00:00, 19.84it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2023-12-04 07:44:25.329862] Epoch: 1 training ends. Status: Average loss: 79.775146484375, Average MLM accuracy: 0.0, Average SC accuracy: 1.0, Average S2S accuracy: 0.1988674217188541
[2023-12-04 07:44:25.381814] Epoch: 1 Evalutation ends. Status: Average loss: 77.24745178222656, Average MLM accuracy: 0.18181818181818182, Average SC accuracy: 1.0, Average S2S accuracy: 0.1893491124260355


100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
100%|██████████| 1/1 [00:00<00:00, 19.80it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2023-12-04 07:44:26.308062] Epoch: 2 training ends. Status: Average loss: 77.72224426269531, Average MLM accuracy: 0.1431818181818182, Average SC accuracy: 1.0, Average S2S accuracy: 0.20919387075283144
[2023-12-04 07:44:26.360267] Epoch: 2 Evalutation ends. Status: Average loss: 75.71564483642578, Average MLM accuracy: 0.1919191919191919, Average SC accuracy: 1.0, Average S2S accuracy: 0.22189349112426035


100%|██████████| 1/1 [00:00<00:00,  1.09it/s]
100%|██████████| 1/1 [00:00<00:00, 17.79it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2023-12-04 07:44:27.283406] Epoch: 3 training ends. Status: Average loss: 76.25047302246094, Average MLM accuracy: 0.15568181818181817, Average SC accuracy: 1.0, Average S2S accuracy: 0.22051965356429049
[2023-12-04 07:44:27.341348] Epoch: 3 Evalutation ends. Status: Average loss: 74.60443115234375, Average MLM accuracy: 0.15151515151515152, Average SC accuracy: 1.0, Average S2S accuracy: 0.2958579881656805


100%|██████████| 1/1 [00:00<00:00,  1.08it/s]
100%|██████████| 1/1 [00:00<00:00, 15.73it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

[2023-12-04 07:44:28.268502] Epoch: 4 training ends. Status: Average loss: 74.97944641113281, Average MLM accuracy: 0.17613636363636365, Average SC accuracy: 1.0, Average S2S accuracy: 0.2401732178547635
[2023-12-04 07:44:28.333607] Epoch: 4 Evalutation ends. Status: Average loss: 73.35182189941406, Average MLM accuracy: 0.15151515151515152, Average SC accuracy: 1.0, Average S2S accuracy: 0.33136094674556216


100%|██████████| 1/1 [00:00<00:00,  1.09it/s]
100%|██████████| 1/1 [00:00<00:00, 20.88it/s]
 11%|█         | 4/37 [00:00<00:01, 31.14it/s]

[2023-12-04 07:44:29.249446] Epoch: 5 training ends. Status: Average loss: 73.98074340820312, Average MLM accuracy: 0.18409090909090908, Average SC accuracy: 1.0, Average S2S accuracy: 0.26015989340439705
[2023-12-04 07:44:29.299009] Epoch: 5 Evalutation ends. Status: Average loss: 72.36466979980469, Average MLM accuracy: 0.2727272727272727, Average SC accuracy: 1.0, Average S2S accuracy: 0.4319526627218935


100%|██████████| 37/37 [00:01<00:00, 30.70it/s]


[INFO] 202208240500 is cleared.
[INFO] SimpleEncoderDecoder is saved, 614.9739637374878 MB
[INFO] SimpleDecoderHead_S2S is saved, 91.68078327178955 MB
[INFO] SimpleEncoderHead_AveragePooling_SC is saved, 2.2815237045288086 MB
[INFO] SimpleEncoderHead_MLM is saved, 91.68078327178955 MB
[INFO] 202209081034 is saved
[INFO] finish 202209081034


KeyboardInterrupt: 