In [1]:
from collections import Counter
from typing import *
import random
import json

In [2]:
from tqdm import tqdm
import numpy as np
from sklearn.model_selection import KFold

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader, Subset
from torch.cuda.amp import autocast, GradScaler

from transformers import AutoConfig, AutoModel, AutoTokenizer, AdamW, get_constant_schedule_with_warmup, get_linear_schedule_with_warmup

In [3]:
from trainer import BasicTrainer
from model import DepParser
from utils import arc_rel_loss, uas_las

## Config

In [4]:
class CFG:
    data_file = '/root/diag_dep/data_testset/1to500_1013.json'
    plm = 'hfl/chinese-electra-180g-large-discriminator'
    num_folds = 5
    random_seed = 42
    num_epochs = 10
    batch_size = 64
    lr = 2e-5
    weight_decay = 0.01
    dropout = 0.2
    grad_clip = 1
    scheduler = 'linear'
    warmup_ratio = 0.1
    num_early_stop = 5
    max_length = 80
    num_labels = 40
    hidden_size = 400
    print_every = 1e9
    eval_every = 50
    cuda = True
    fp16 = True

## Seed and Device

In [5]:
def seed_everything(seed=CFG.random_seed):
    np.random.seed(seed%(2**32-1))
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic =True
    torch.backends.cudnn.benchmark = False

seed_everything()

In [6]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(f'Using device: {device}')

Using device: cuda


## Data

In [7]:
rel_dct = {
    'root': '根节点',
    'sasubj-obj': '同主同宾',
    'sasubj': '同主语',
    'dfsubj': '不同主语',
    'subj': '主语',
    'subj-in': '内部主语',
    'obj': '宾语',
    'pred': '谓语',
    'att': '定语',
    'adv': '状语',
    'cmp': '补语',
    'coo': '并列',
    'pobj': '介宾',
    'iobj': '间宾',
    'de': '的',
    'adjct': '附加',
    'app': '称呼',
    'exp': '解释',
    'punc': '标点',
    'frag': '片段',
    'repet': '重复',
    # rst
    'attr': '归属',
    'bckg': '背景',
    'cause': '因果',
    'comp': '比较',
    'cond': '状况',
    'cont': '对比',
    'elbr': '阐述',
    'enbm': '目的',
    'eval': '评价',
    'expl': '解释-例证',
    'joint': '联合',
    'manner': '方式',
    'rstm': '重申',
    'temp': '时序',
    'tp-chg': '主题变更',
    'prob-sol': '问题-解决',
    'qst-ans': '疑问-回答',
    'stm-rsp': '陈述-回应',
    'req-proc': '需求-处理',
}

In [8]:
rel2id = {key:idx for idx, key in enumerate(rel_dct.keys())}
print(rel2id)

{'root': 0, 'sasubj-obj': 1, 'sasubj': 2, 'dfsubj': 3, 'subj': 4, 'subj-in': 5, 'obj': 6, 'pred': 7, 'att': 8, 'adv': 9, 'cmp': 10, 'coo': 11, 'pobj': 12, 'iobj': 13, 'de': 14, 'adjct': 15, 'app': 16, 'exp': 17, 'punc': 18, 'frag': 19, 'repet': 20, 'attr': 21, 'bckg': 22, 'cause': 23, 'comp': 24, 'cond': 25, 'cont': 26, 'elbr': 27, 'enbm': 28, 'eval': 29, 'expl': 30, 'joint': 31, 'manner': 32, 'rstm': 33, 'temp': 34, 'tp-chg': 35, 'prob-sol': 36, 'qst-ans': 37, 'stm-rsp': 38, 'req-proc': 39}


In [9]:
tokenizer = AutoTokenizer.from_pretrained(CFG.plm)
print(len(tokenizer))
 
num_added_toks = tokenizer.add_tokens(['[root]', '[qst]', '[ans]'], special_tokens=True)
tokenizer.root_token = '[root]'
tokenizer.root_token_ids = tokenizer('[root]')['input_ids'][1]
print(f"add token: {tokenizer.root_token} {tokenizer.root_token_ids}")

tokenizer.qst_token = '[qst]'
tokenizer.qst_token_ids = tokenizer('[qst]')['input_ids'][1]
print(f"add token: {tokenizer.qst_token} {tokenizer.qst_token_ids}")

tokenizer.ans_token = '[ans]'
tokenizer.ans_token_ids = tokenizer('[ans]')['input_ids'][1]
print(f"add token: {tokenizer.ans_token} {tokenizer.ans_token_ids}")
print(len(tokenizer))

CFG.tokenizer = tokenizer

21128
add token: [root] 21128
add token: [qst] 21129
add token: [ans] 21130
21131


In [10]:
class Dependency():
    def __init__(self, idx, word, head, rel):
        self.id = idx
        self.word = word
        self.tag = '_'
        self.head = head
        self.rel = rel

    def __str__(self):
        # example:  1	上海	_	NR	NR	_	2	nn	_	_
        values = [str(self.idx), self.word, "_", self.tag, "_", "_", str(self.head), self.rel, "_", "_"]
        return '\t'.join(values)

    def __repr__(self):
        return f"({self.word}, {self.tag}, {self.head}, {self.rel})"

In [11]:
def load_annoted(data_file):
    with open(CFG.data_file, 'r', encoding='utf-8') as f:
        data = json.load(f)[:500]  # have annotated 500 data
        
    sample_lst:List[List[Dependency]] = []
    
    for d in data:
        rel_dct = {}
        for tripple in d['relationship']:
            head, rel, tail = tripple
            head_uttr_idx, head_word_idx = [int(x) for x in head.split('-')]
            tail_uttr_idx, tail_word_idx = [int(x) for x in tail.split('-')]
            if head_uttr_idx != tail_uttr_idx:
                continue
            
            if not rel_dct.get(head_uttr_idx, None):
                rel_dct[head_uttr_idx] = {tail_word_idx: [head_word_idx, rel]}
            else:
                rel_dct[head_uttr_idx][tail_word_idx] = [head_word_idx, rel]
            
        for item in d['dialog']:
            turn = item['turn']
            utterance = item['utterance']
            # dep_lst:List[Dependency] = [Dependency(0, '[root]', -1, '_')]
            dep_lst:List[Dependency] = []
            
            for word_idx, word in enumerate(utterance.split(' ')):
                head_word_idx, rel = rel_dct[turn].get(word_idx + 1, [word_idx, 'adjct'])  # some word annoted missed, padded with last word and 'adjct'
                dep_lst.append(Dependency(word_idx + 1, word, head_word_idx, rel))  # start from 1
            
            sample_lst.append(dep_lst)
        
    return sample_lst

In [12]:
class DialogDataset(Dataset):
    def __init__(self, cfg):
        self.cfg = cfg
        self.inputs, self.offsets, self.heads, self.rels, self.masks = self.read_data()
        
    def read_data(self):
        inputs, offsets = [], []
        tags, heads, rels, masks = [], [], [], []
        
        for deps in load_annoted(self.cfg.data_file):
            # another sentence
            seq_len = len(deps)

            word_lst = [] 
#                 head_tokens = np.ones(self.cfg.max_length, dtype=np.int64)*(-1)  # root index is 0, thus using -1 for padding 
            head_tokens = np.zeros(self.cfg.max_length, dtype=np.int64)  # same as root index is 0, constrainting by mask 
            rel_tokens = np.zeros(self.cfg.max_length, dtype=np.int64)
            mask_tokens = np.zeros(self.cfg.max_length, dtype=np.int64)
            for i, dep in enumerate(deps):
                if i == seq_len or i + 1== self.cfg.max_length:
                    break

                word_lst.append(dep.word)

                if dep.head == -1 or dep.head + 1 >= self.cfg.max_length:
                    head_tokens[i+1] = 0
                    mask_tokens[i+1] = 0
                else:
                    head_tokens[i+1] = int(dep.head)
                    mask_tokens[i+1] = 1
#                     head_tokens[i] = dep.head if dep.head != '_' else 0
                rel_tokens[i+1] = rel2id.get(dep.rel, 0)

            tokenized = tokenizer.encode_plus(word_lst, 
                                              padding='max_length', 
                                              truncation=True,
                                              max_length=self.cfg.max_length, 
                                              return_offsets_mapping=True, 
                                              return_tensors='pt',
                                              is_split_into_words=True)
            inputs.append({"input_ids": tokenized['input_ids'][0],
                          "token_type_ids": tokenized['token_type_ids'][0],
                           "attention_mask": tokenized['attention_mask'][0]
                          })

#                 sentence_word_idx = np.zeros(self.cfg.max_length, dtype=np.int64)
            sentence_word_idx = []
            for idx, (start, end) in enumerate(tokenized.offset_mapping[0][1:]):
                if start == 0 and end != 0:
                    sentence_word_idx.append(idx)
#                         sentence_word_idx[idx] = idx
            if len(sentence_word_idx) < self.cfg.max_length - 1:
                sentence_word_idx.extend([0]* (self.cfg.max_length - 1 - len(sentence_word_idx)))
            offsets.append(torch.as_tensor(sentence_word_idx))
#                 offsets.append(sentence_word_idx)

            heads.append(head_tokens)
            rels.append(rel_tokens)
            masks.append(mask_tokens)
                    
        return inputs, offsets, heads, rels, masks

    def __getitem__(self, idx):
        return self.inputs[idx], self.offsets[idx], self.heads[idx], self.rels[idx], self.masks[idx]
    
    def __len__(self):
        return len(self.rels)

In [13]:
dataset = DialogDataset(CFG)

## Tranining

In [14]:
kfold = KFold(n_splits=CFG.num_folds, shuffle=True)

In [15]:
for fold, (train_ids, val_ids) in enumerate(kfold.split(dataset)):
    print(f'FOLD {fold}')
    print(f'{len(train_ids)}/{len(val_ids)}')
    print('--------------------------------')

    if CFG.cuda and torch.cuda.is_available:
        torch.cuda.empty_cache()

    random.shuffle(train_ids)
    random.shuffle(val_ids)

    tr_dataset = Subset(dataset, train_ids)
    va_dataset = Subset(dataset, val_ids)
    
    tr_iter = DataLoader(tr_dataset, batch_size=CFG.batch_size)
    va_iter = DataLoader(va_dataset, batch_size=CFG.batch_size)
    
    model = DepParser(CFG)
    
    optim = AdamW(model.parameters(), 
                      lr=CFG.lr,
                      weight_decay=CFG.weight_decay
                      )

    training_step = int(CFG.num_epochs * (len(train_ids) / CFG.batch_size))
    warmup_step = int(CFG.warmup_ratio * training_step)  
    lr_scheduler = get_linear_schedule_with_warmup(optimizer=optim, 
                                                        num_warmup_steps=warmup_step, 
                                                        num_training_steps=training_step)

    trainer = BasicTrainer(optim=optim, 
                        lr_scheduler=lr_scheduler,
                        trainset_size=len(train_ids), 
                        loss_fn=arc_rel_loss, 
                        metrics_fn=uas_las, 
                        config=CFG)
    
    best_res, best_state_dict = trainer.train(model=model, train_iter=tr_iter, val_iter=va_iter)
    print(best_res)
    with open("/root/autodl-tmp/diag_dep/k-fold/res.txt", 'a+') as f:
        f.write(f'{fold}\t {str(best_res)}\n')
    
    torch.save(best_state_dict, f"/root/autodl-tmp/diag_dep/k-fold/{fold}/model.bin")

FOLD 0
9988/2497
--------------------------------


Some weights of the model checkpoint at hfl/chinese-electra-180g-large-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/10 [00:00<?, ?it/s]

--epoch 0, step 0, loss 12.48440170288086
  {'UAS': 0.029143897996357013, 'LAS': 0.0}
--Evaluation:
-loss: 5.746634006500244  UAS: 0.21628747228269732  LAS: 0.14235672989230103 

--Best Evaluation: 
-loss: 5.746634006500244  UAS: 0.21628747228269732  LAS: 0.14235672989230103 

--Evaluation:
-loss: 3.9041459560394287  UAS: 0.45574888375386735  LAS: 0.4121431218729301 

--Best Evaluation: 
-loss: 3.9041459560394287  UAS: 0.45574888375386735  LAS: 0.4121431218729301 

--Evaluation:
-loss: 2.881474494934082  UAS: 0.6247411406799045  LAS: 0.5983832697489353 

--Best Evaluation: 
-loss: 2.881474494934082  UAS: 0.6247411406799045  LAS: 0.5983832697489353 



 10%|█         | 1/10 [01:24<12:36, 84.02s/it]

--Evaluation:
-loss: 1.9541970491409302  UAS: 0.7727363686054901  LAS: 0.743163196687916 

--Best Evaluation: 
-loss: 1.9541970491409302  UAS: 0.7727363686054901  LAS: 0.743163196687916 

--Evaluation:
-loss: 1.5896639823913574  UAS: 0.8274045206828886  LAS: 0.7998001201430561 

--Best Evaluation: 
-loss: 1.5896639823913574  UAS: 0.8274045206828886  LAS: 0.7998001201430561 

--Evaluation:
-loss: 1.5325212478637695  UAS: 0.8528775678542752  LAS: 0.8245511262398989 

--Best Evaluation: 
-loss: 1.5325212478637695  UAS: 0.8528775678542752  LAS: 0.8245511262398989 



 20%|██        | 2/10 [02:49<11:17, 84.65s/it]

--Evaluation:
-loss: 1.3750243186950684  UAS: 0.8663015706998187  LAS: 0.8386045014998323 

--Best Evaluation: 
-loss: 1.3750243186950684  UAS: 0.8663015706998187  LAS: 0.8386045014998323 

--Evaluation:
-loss: 1.3982182741165161  UAS: 0.8768280256908938  LAS: 0.8506549240067878 

--Best Evaluation: 
-loss: 1.3982182741165161  UAS: 0.8768280256908938  LAS: 0.8506549240067878 

--Evaluation:
-loss: 1.260231375694275  UAS: 0.8854478448435396  LAS: 0.8591977963083924 

--Best Evaluation: 
-loss: 1.260231375694275  UAS: 0.8854478448435396  LAS: 0.8591977963083924 



 30%|███       | 3/10 [04:14<09:53, 84.77s/it]

--Evaluation:
-loss: 1.2658209800720215  UAS: 0.888610991774791  LAS: 0.8631404276055684 

--Best Evaluation: 
-loss: 1.2658209800720215  UAS: 0.888610991774791  LAS: 0.8631404276055684 

--Evaluation:
-loss: 1.2143877744674683  UAS: 0.8910650733778098  LAS: 0.866115974306757 

--Best Evaluation: 
-loss: 1.2143877744674683  UAS: 0.8910650733778098  LAS: 0.866115974306757 

--Evaluation:
-loss: 1.1883589029312134  UAS: 0.8956653221299815  LAS: 0.8695149009082542 

--Best Evaluation: 
-loss: 1.1883589029312134  UAS: 0.8956653221299815  LAS: 0.8695149009082542 



 40%|████      | 4/10 [05:38<08:28, 84.79s/it]

--Evaluation:
-loss: 1.1978946924209595  UAS: 0.8978227836790518  LAS: 0.8725857406066289 

--Best Evaluation: 
-loss: 1.1978946924209595  UAS: 0.8978227836790518  LAS: 0.8725857406066289 

--Evaluation:
-loss: 1.1764860153198242  UAS: 0.9033512862028832  LAS: 0.8782086412617214 

--Best Evaluation: 
-loss: 1.1764860153198242  UAS: 0.9033512862028832  LAS: 0.8782086412617214 

--Evaluation:
-loss: 1.1967955827713013  UAS: 0.9029925398583031  LAS: 0.8784047383801096 

--Best Evaluation: 
-loss: 1.1967955827713013  UAS: 0.9029925398583031  LAS: 0.8784047383801096 



 50%|█████     | 5/10 [07:03<07:04, 84.81s/it]

--Evaluation:
-loss: 1.170707106590271  UAS: 0.9059841664250716  LAS: 0.8810457338417186 

--Best Evaluation: 
-loss: 1.170707106590271  UAS: 0.9059841664250716  LAS: 0.8810457338417186 

--Evaluation:
-loss: 1.1870372295379639  UAS: 0.9057538976588627  LAS: 0.8808462519932644 

--Best Evaluation: 
-loss: 1.170707106590271  UAS: 0.9059841664250716  LAS: 0.8810457338417186 

--Evaluation:
-loss: 1.1939013004302979  UAS: 0.9050844137505143  LAS: 0.8801627421399135 

--Best Evaluation: 
-loss: 1.170707106590271  UAS: 0.9059841664250716  LAS: 0.8810457338417186 



 60%|██████    | 6/10 [08:28<05:39, 84.84s/it]

--Evaluation:
-loss: 1.1789199113845825  UAS: 0.9083659743971704  LAS: 0.8828047298782629 

--Best Evaluation: 
-loss: 1.1789199113845825  UAS: 0.9083659743971704  LAS: 0.8828047298782629 

--Evaluation:
-loss: 1.1752846240997314  UAS: 0.910187320595002  LAS: 0.8854039237006205 

--Best Evaluation: 
-loss: 1.1752846240997314  UAS: 0.910187320595002  LAS: 0.8854039237006205 

--Evaluation:
-loss: 1.200128436088562  UAS: 0.9107606156134085  LAS: 0.8853995622632526 

--Best Evaluation: 
-loss: 1.1752846240997314  UAS: 0.910187320595002  LAS: 0.8854039237006205 



 70%|███████   | 7/10 [09:53<04:14, 84.87s/it]

--Evaluation:
-loss: 1.1666827201843262  UAS: 0.9108383253195534  LAS: 0.8857600289418904 

--Best Evaluation: 
-loss: 1.1666827201843262  UAS: 0.9108383253195534  LAS: 0.8857600289418904 

--Evaluation:
-loss: 1.1646100282669067  UAS: 0.9123057054357392  LAS: 0.8874526019351511 

--Best Evaluation: 
-loss: 1.1646100282669067  UAS: 0.9123057054357392  LAS: 0.8874526019351511 

--Evaluation:
-loss: 1.1600685119628906  UAS: 0.912246451879384  LAS: 0.8878535881781574 

--Best Evaluation: 
-loss: 1.1600685119628906  UAS: 0.912246451879384  LAS: 0.8878535881781574 

--Evaluation:
-loss: 1.165381669998169  UAS: 0.909334248109533  LAS: 0.8844950093669424 

--Best Evaluation: 
-loss: 1.1600685119628906  UAS: 0.912246451879384  LAS: 0.8878535881781574 



 80%|████████  | 8/10 [11:23<02:53, 86.57s/it]

--Evaluation:
-loss: 1.1821353435516357  UAS: 0.9102144787421321  LAS: 0.8850171910086816 

--Best Evaluation: 
-loss: 1.1600685119628906  UAS: 0.912246451879384  LAS: 0.8878535881781574 

--Evaluation:
-loss: 1.1655914783477783  UAS: 0.9123163368224632  LAS: 0.8875200286278054 

--Best Evaluation: 
-loss: 1.1600685119628906  UAS: 0.912246451879384  LAS: 0.8878535881781574 

--Evaluation:
-loss: 1.168093204498291  UAS: 0.9123320724020237  LAS: 0.8869150972445775 

--Best Evaluation: 
-loss: 1.1600685119628906  UAS: 0.912246451879384  LAS: 0.8878535881781574 



 90%|█████████ | 9/10 [13:10<01:27, 87.80s/it]

--Evaluation:
-loss: 1.1871541738510132  UAS: 0.9131237265439021  LAS: 0.8877065254998554 

--Best Evaluation: 
-loss: 1.1600685119628906  UAS: 0.912246451879384  LAS: 0.8878535881781574 

--early stopping, training finished.
[1.1600685119628906, 0.912246451879384, 0.8878535881781574]





FOLD 1
9988/2497
--------------------------------


Some weights of the model checkpoint at hfl/chinese-electra-180g-large-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/10 [00:00<?, ?it/s]

--epoch 0, step 0, loss 12.119668960571289
  {'UAS': 0.037422037422037424, 'LAS': 0.0}
--Evaluation:
-loss: 5.907177925109863  UAS: 0.22048870820137828  LAS: 0.15766536738905407 

--Best Evaluation: 
-loss: 5.907177925109863  UAS: 0.22048870820137828  LAS: 0.15766536738905407 

--Evaluation:
-loss: 4.036846160888672  UAS: 0.4440117224944476  LAS: 0.39853861919402966 

--Best Evaluation: 
-loss: 4.036846160888672  UAS: 0.4440117224944476  LAS: 0.39853861919402966 

--Evaluation:
-loss: 2.893026351928711  UAS: 0.6220735922294647  LAS: 0.5953583168314043 

--Best Evaluation: 
-loss: 2.893026351928711  UAS: 0.6220735922294647  LAS: 0.5953583168314043 



 10%|█         | 1/10 [01:24<12:44, 84.94s/it]

--Evaluation:
-loss: 2.0721752643585205  UAS: 0.7496286761280382  LAS: 0.7182132284757309 

--Best Evaluation: 
-loss: 2.0721752643585205  UAS: 0.7496286761280382  LAS: 0.7182132284757309 

--Evaluation:
-loss: 1.717354655265808  UAS: 0.8065772538234403  LAS: 0.778800410909781 

--Best Evaluation: 
-loss: 1.717354655265808  UAS: 0.8065772538234403  LAS: 0.778800410909781 

--Evaluation:
-loss: 1.5158298015594482  UAS: 0.8437442253117928  LAS: 0.816230213198375 

--Best Evaluation: 
-loss: 1.5158298015594482  UAS: 0.8437442253117928  LAS: 0.816230213198375 



 20%|██        | 2/10 [02:49<11:20, 85.01s/it]

--Evaluation:
-loss: 1.3810391426086426  UAS: 0.8614247427434674  LAS: 0.8351549877230366 

--Best Evaluation: 
-loss: 1.3810391426086426  UAS: 0.8614247427434674  LAS: 0.8351549877230366 

--Evaluation:
-loss: 1.4089467525482178  UAS: 0.8664665057306429  LAS: 0.8388042999743837 

--Best Evaluation: 
-loss: 1.4089467525482178  UAS: 0.8664665057306429  LAS: 0.8388042999743837 

--Evaluation:
-loss: 1.2257263660430908  UAS: 0.8870780886120538  LAS: 0.8586582606594658 

--Best Evaluation: 
-loss: 1.2257263660430908  UAS: 0.8870780886120538  LAS: 0.8586582606594658 



 30%|███       | 3/10 [04:15<09:55, 85.04s/it]

--Evaluation:
-loss: 1.1999554634094238  UAS: 0.8936547811357818  LAS: 0.8663453051766324 

--Best Evaluation: 
-loss: 1.1999554634094238  UAS: 0.8936547811357818  LAS: 0.8663453051766324 

--Evaluation:
-loss: 1.2007324695587158  UAS: 0.8940192045658382  LAS: 0.8686957693402595 

--Best Evaluation: 
-loss: 1.2007324695587158  UAS: 0.8940192045658382  LAS: 0.8686957693402595 

--Evaluation:
-loss: 1.1760200262069702  UAS: 0.9003620984332683  LAS: 0.8751417893855296 

--Best Evaluation: 
-loss: 1.1760200262069702  UAS: 0.9003620984332683  LAS: 0.8751417893855296 



 40%|████      | 4/10 [05:40<08:30, 85.01s/it]

--Evaluation:
-loss: 1.150768756866455  UAS: 0.9031265358246308  LAS: 0.8775906033564478 

--Best Evaluation: 
-loss: 1.150768756866455  UAS: 0.9031265358246308  LAS: 0.8775906033564478 

--Evaluation:
-loss: 1.128247857093811  UAS: 0.9089034516555976  LAS: 0.8829454963469455 

--Best Evaluation: 
-loss: 1.128247857093811  UAS: 0.9089034516555976  LAS: 0.8829454963469455 

--Evaluation:
-loss: 1.1109331846237183  UAS: 0.9088911299933398  LAS: 0.8830139446912303 

--Best Evaluation: 
-loss: 1.1109331846237183  UAS: 0.9088911299933398  LAS: 0.8830139446912303 



 50%|█████     | 5/10 [07:04<07:04, 84.96s/it]

--Evaluation:
-loss: 1.0847294330596924  UAS: 0.9101721294403001  LAS: 0.8837086017496838 

--Best Evaluation: 
-loss: 1.0847294330596924  UAS: 0.9101721294403001  LAS: 0.8837086017496838 

--Evaluation:
-loss: 1.093933343887329  UAS: 0.9108419180243202  LAS: 0.8847317740253272 

--Best Evaluation: 
-loss: 1.093933343887329  UAS: 0.9108419180243202  LAS: 0.8847317740253272 

--Evaluation:
-loss: 1.068822979927063  UAS: 0.9117336001641736  LAS: 0.8859596731096222 

--Best Evaluation: 
-loss: 1.068822979927063  UAS: 0.9117336001641736  LAS: 0.8859596731096222 



 60%|██████    | 6/10 [08:29<05:39, 84.95s/it]

--Evaluation:
-loss: 1.09757399559021  UAS: 0.9149622483839213  LAS: 0.889798823313693 

--Best Evaluation: 
-loss: 1.09757399559021  UAS: 0.9149622483839213  LAS: 0.889798823313693 

--Evaluation:
-loss: 1.1229772567749023  UAS: 0.9105373336259563  LAS: 0.885336104114973 

--Best Evaluation: 
-loss: 1.09757399559021  UAS: 0.9149622483839213  LAS: 0.889798823313693 

--Evaluation:
-loss: 1.097947597503662  UAS: 0.9138816177316403  LAS: 0.8883246483708802 

--Best Evaluation: 
-loss: 1.09757399559021  UAS: 0.9149622483839213  LAS: 0.889798823313693 



 70%|███████   | 7/10 [09:54<04:14, 84.91s/it]

--Evaluation:
-loss: 1.1059123277664185  UAS: 0.9170924222653803  LAS: 0.891312422227545 

--Best Evaluation: 
-loss: 1.1059123277664185  UAS: 0.9170924222653803  LAS: 0.891312422227545 

--Evaluation:
-loss: 1.1051300764083862  UAS: 0.9164135499370709  LAS: 0.8898873897360492 

--Best Evaluation: 
-loss: 1.1059123277664185  UAS: 0.9170924222653803  LAS: 0.891312422227545 

--Evaluation:
-loss: 1.1196799278259277  UAS: 0.9181761864290404  LAS: 0.8922584454928922 

--Best Evaluation: 
-loss: 1.1196799278259277  UAS: 0.9181761864290404  LAS: 0.8922584454928922 

--Evaluation:
-loss: 1.1007863283157349  UAS: 0.9195411085167744  LAS: 0.8940347446576941 

--Best Evaluation: 
-loss: 1.1007863283157349  UAS: 0.9195411085167744  LAS: 0.8940347446576941 



 80%|████████  | 8/10 [11:24<02:53, 86.59s/it]

--Evaluation:
-loss: 1.0916019678115845  UAS: 0.9192494777875462  LAS: 0.8927960296042073 

--Best Evaluation: 
-loss: 1.1007863283157349  UAS: 0.9195411085167744  LAS: 0.8940347446576941 

--Evaluation:
-loss: 1.0769044160842896  UAS: 0.9192322330264785  LAS: 0.8929850435445874 

--Best Evaluation: 
-loss: 1.1007863283157349  UAS: 0.9195411085167744  LAS: 0.8940347446576941 

--Evaluation:
-loss: 1.092016339302063  UAS: 0.9200813363293016  LAS: 0.8943448573572993 

--Best Evaluation: 
-loss: 1.092016339302063  UAS: 0.9200813363293016  LAS: 0.8943448573572993 



 90%|█████████ | 9/10 [12:49<01:26, 86.10s/it]

--Evaluation:
-loss: 1.0823379755020142  UAS: 0.9203559065313502  LAS: 0.8949600622139633 

--Best Evaluation: 
-loss: 1.0823379755020142  UAS: 0.9203559065313502  LAS: 0.8949600622139633 

--Evaluation:
-loss: 1.0836344957351685  UAS: 0.9201665899159545  LAS: 0.8947464570016186 

--Best Evaluation: 
-loss: 1.0823379755020142  UAS: 0.9203559065313502  LAS: 0.8949600622139633 

--Evaluation:
-loss: 1.0894182920455933  UAS: 0.9200012186597618  LAS: 0.8947867987476064 

--Best Evaluation: 
-loss: 1.0823379755020142  UAS: 0.9203559065313502  LAS: 0.8949600622139633 



100%|██████████| 10/10 [14:14<00:00, 85.49s/it]


--training finished.
[1.0823379755020142, 0.9203559065313502, 0.8949600622139633]
FOLD 2
9988/2497
--------------------------------


Some weights of the model checkpoint at hfl/chinese-electra-180g-large-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/10 [00:00<?, ?it/s]

--epoch 0, step 0, loss 11.927385330200195
  {'UAS': 0.03206412825651302, 'LAS': 0.0}
--Evaluation:
-loss: 5.554752349853516  UAS: 0.21642105889821847  LAS: 0.14041485110970192 

--Best Evaluation: 
-loss: 5.554752349853516  UAS: 0.21642105889821847  LAS: 0.14041485110970192 

--Evaluation:
-loss: 3.9769082069396973  UAS: 0.4402693187332621  LAS: 0.3986943710960296 

--Best Evaluation: 
-loss: 3.9769082069396973  UAS: 0.4402693187332621  LAS: 0.3986943710960296 

--Evaluation:
-loss: 2.847323179244995  UAS: 0.6258732655297812  LAS: 0.5986051297035058 

--Best Evaluation: 
-loss: 2.847323179244995  UAS: 0.6258732655297812  LAS: 0.5986051297035058 



 10%|█         | 1/10 [01:25<12:45, 85.07s/it]

--Evaluation:
-loss: 2.110053300857544  UAS: 0.7533474675878659  LAS: 0.7242431904368365 

--Best Evaluation: 
-loss: 2.110053300857544  UAS: 0.7533474675878659  LAS: 0.7242431904368365 

--Evaluation:
-loss: 1.6753031015396118  UAS: 0.8094130880151306  LAS: 0.7784956551998948 

--Best Evaluation: 
-loss: 1.6753031015396118  UAS: 0.8094130880151306  LAS: 0.7784956551998948 

--Evaluation:
-loss: 1.5487282276153564  UAS: 0.839407364497031  LAS: 0.8086232399439542 

--Best Evaluation: 
-loss: 1.5487282276153564  UAS: 0.839407364497031  LAS: 0.8086232399439542 



 20%|██        | 2/10 [02:50<11:20, 85.05s/it]

--Evaluation:
-loss: 1.4803733825683594  UAS: 0.8550165245384541  LAS: 0.8272154655048836 

--Best Evaluation: 
-loss: 1.4803733825683594  UAS: 0.8550165245384541  LAS: 0.8272154655048836 

--Evaluation:
-loss: 1.3487017154693604  UAS: 0.8684949013514268  LAS: 0.8405962995645406 

--Best Evaluation: 
-loss: 1.3487017154693604  UAS: 0.8684949013514268  LAS: 0.8405962995645406 

--Evaluation:
-loss: 1.393399953842163  UAS: 0.8723797647614083  LAS: 0.8457179239238097 

--Best Evaluation: 
-loss: 1.393399953842163  UAS: 0.8723797647614083  LAS: 0.8457179239238097 



 30%|███       | 3/10 [04:15<09:55, 85.03s/it]

--Evaluation:
-loss: 1.2407268285751343  UAS: 0.8903253509641585  LAS: 0.8619463191605972 

--Best Evaluation: 
-loss: 1.2407268285751343  UAS: 0.8903253509641585  LAS: 0.8619463191605972 

--Evaluation:
-loss: 1.25491201877594  UAS: 0.8933566715752985  LAS: 0.865145171745076 

--Best Evaluation: 
-loss: 1.25491201877594  UAS: 0.8933566715752985  LAS: 0.865145171745076 

--Evaluation:
-loss: 1.2165993452072144  UAS: 0.8958860688961943  LAS: 0.8676234818080911 

--Best Evaluation: 
-loss: 1.2165993452072144  UAS: 0.8958860688961943  LAS: 0.8676234818080911 



 40%|████      | 4/10 [05:40<08:30, 85.00s/it]

--Evaluation:
-loss: 1.1774487495422363  UAS: 0.8995486750213956  LAS: 0.8701839567324632 

--Best Evaluation: 
-loss: 1.1774487495422363  UAS: 0.8995486750213956  LAS: 0.8701839567324632 

--Evaluation:
-loss: 1.1633238792419434  UAS: 0.9057859252450785  LAS: 0.8774196965610412 

--Best Evaluation: 
-loss: 1.1633238792419434  UAS: 0.9057859252450785  LAS: 0.8774196965610412 

--Evaluation:
-loss: 1.139679193496704  UAS: 0.907080019701317  LAS: 0.879109340452051 

--Best Evaluation: 
-loss: 1.139679193496704  UAS: 0.907080019701317  LAS: 0.879109340452051 



 50%|█████     | 5/10 [07:04<07:04, 84.95s/it]

--Evaluation:
-loss: 1.1874316930770874  UAS: 0.9035263388903276  LAS: 0.8760559025558277 

--Best Evaluation: 
-loss: 1.139679193496704  UAS: 0.907080019701317  LAS: 0.879109340452051 

--Evaluation:
-loss: 1.158015251159668  UAS: 0.908773337771951  LAS: 0.8806860089520256 

--Best Evaluation: 
-loss: 1.158015251159668  UAS: 0.908773337771951  LAS: 0.8806860089520256 

--Evaluation:
-loss: 1.1764236688613892  UAS: 0.9088428518888223  LAS: 0.8809885150781156 

--Best Evaluation: 
-loss: 1.1764236688613892  UAS: 0.9088428518888223  LAS: 0.8809885150781156 



 60%|██████    | 6/10 [08:29<05:39, 84.93s/it]

--Evaluation:
-loss: 1.1520694494247437  UAS: 0.9091357242077256  LAS: 0.881706300397268 

--Best Evaluation: 
-loss: 1.1520694494247437  UAS: 0.9091357242077256  LAS: 0.881706300397268 

--Evaluation:
-loss: 1.1523665189743042  UAS: 0.9102190959421407  LAS: 0.8827156081967781 

--Best Evaluation: 
-loss: 1.1523665189743042  UAS: 0.9102190959421407  LAS: 0.8827156081967781 

--Evaluation:
-loss: 1.1394261121749878  UAS: 0.9125924944298986  LAS: 0.884976272721687 

--Best Evaluation: 
-loss: 1.1394261121749878  UAS: 0.9125924944298986  LAS: 0.884976272721687 



 70%|███████   | 7/10 [09:54<04:14, 84.86s/it]

--Evaluation:
-loss: 1.13454008102417  UAS: 0.911678708489796  LAS: 0.8839328709907208 

--Best Evaluation: 
-loss: 1.1394261121749878  UAS: 0.9125924944298986  LAS: 0.884976272721687 

--Evaluation:
-loss: 1.161439299583435  UAS: 0.9125148471064669  LAS: 0.8856906118250402 

--Best Evaluation: 
-loss: 1.161439299583435  UAS: 0.9125148471064669  LAS: 0.8856906118250402 

--Evaluation:
-loss: 1.152514934539795  UAS: 0.9142840858172859  LAS: 0.8866033360205873 

--Best Evaluation: 
-loss: 1.152514934539795  UAS: 0.9142840858172859  LAS: 0.8866033360205873 

--Evaluation:
-loss: 1.1414563655853271  UAS: 0.9141575738401063  LAS: 0.8867212388922804 

--Best Evaluation: 
-loss: 1.1414563655853271  UAS: 0.9141575738401063  LAS: 0.8867212388922804 



 80%|████████  | 8/10 [11:24<02:53, 86.54s/it]

--Evaluation:
-loss: 1.1506189107894897  UAS: 0.9130648456488092  LAS: 0.8866390860087163 

--Best Evaluation: 
-loss: 1.1414563655853271  UAS: 0.9141575738401063  LAS: 0.8867212388922804 

--Evaluation:
-loss: 1.1589581966400146  UAS: 0.9149771819092682  LAS: 0.8880433985823152 

--Best Evaluation: 
-loss: 1.1589581966400146  UAS: 0.9149771819092682  LAS: 0.8880433985823152 

--Evaluation:
-loss: 1.154518485069275  UAS: 0.9157314563552509  LAS: 0.8880511884285927 

--Best Evaluation: 
-loss: 1.154518485069275  UAS: 0.9157314563552509  LAS: 0.8880511884285927 



 90%|█████████ | 9/10 [12:49<01:25, 85.95s/it]

--Evaluation:
-loss: 1.1604574918746948  UAS: 0.9147245215325551  LAS: 0.8881251991524636 

--Best Evaluation: 
-loss: 1.1604574918746948  UAS: 0.9147245215325551  LAS: 0.8881251991524636 

--Evaluation:
-loss: 1.158319115638733  UAS: 0.9157789567878022  LAS: 0.8892334204598814 

--Best Evaluation: 
-loss: 1.158319115638733  UAS: 0.9157789567878022  LAS: 0.8892334204598814 

--Evaluation:
-loss: 1.1578172445297241  UAS: 0.9166002845825507  LAS: 0.8899630503259024 

--Best Evaluation: 
-loss: 1.1578172445297241  UAS: 0.9166002845825507  LAS: 0.8899630503259024 



100%|██████████| 10/10 [14:14<00:00, 85.41s/it]


--training finished.
[1.1578172445297241, 0.9166002845825507, 0.8899630503259024]
FOLD 3
9988/2497
--------------------------------


Some weights of the model checkpoint at hfl/chinese-electra-180g-large-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/10 [00:00<?, ?it/s]

--epoch 0, step 0, loss 11.601288795471191
  {'UAS': 0.02092050209205021, 'LAS': 0.0}
--Evaluation:
-loss: 6.2674455642700195  UAS: 0.19686602298862685  LAS: 0.13908262996956444 

--Best Evaluation: 
-loss: 6.2674455642700195  UAS: 0.19686602298862685  LAS: 0.13908262996956444 

--Evaluation:
-loss: 4.060400009155273  UAS: 0.4346893673042161  LAS: 0.3880838163951574 

--Best Evaluation: 
-loss: 4.060400009155273  UAS: 0.4346893673042161  LAS: 0.3880838163951574 

--Evaluation:
-loss: 2.899322509765625  UAS: 0.6100254243508192  LAS: 0.5857240822811293 

--Best Evaluation: 
-loss: 2.899322509765625  UAS: 0.6100254243508192  LAS: 0.5857240822811293 



 10%|█         | 1/10 [01:24<12:42, 84.68s/it]

--Evaluation:
-loss: 1.967405080795288  UAS: 0.7504875014328946  LAS: 0.722481124698275 

--Best Evaluation: 
-loss: 1.967405080795288  UAS: 0.7504875014328946  LAS: 0.722481124698275 

--Evaluation:
-loss: 1.865120768547058  UAS: 0.8063608932067005  LAS: 0.7801761847203753 

--Best Evaluation: 
-loss: 1.865120768547058  UAS: 0.8063608932067005  LAS: 0.7801761847203753 

--Evaluation:
-loss: 1.5034565925598145  UAS: 0.8424321458932026  LAS: 0.8157495901495043 

--Best Evaluation: 
-loss: 1.5034565925598145  UAS: 0.8424321458932026  LAS: 0.8157495901495043 



 20%|██        | 2/10 [02:49<11:17, 84.72s/it]

--Evaluation:
-loss: 1.439486026763916  UAS: 0.8604051457773454  LAS: 0.8346100414102544 

--Best Evaluation: 
-loss: 1.439486026763916  UAS: 0.8604051457773454  LAS: 0.8346100414102544 

--Evaluation:
-loss: 1.4368647336959839  UAS: 0.866640575851679  LAS: 0.8410647889133785 

--Best Evaluation: 
-loss: 1.4368647336959839  UAS: 0.866640575851679  LAS: 0.8410647889133785 

--Evaluation:
-loss: 1.3684102296829224  UAS: 0.8785545815759224  LAS: 0.8514826157325007 

--Best Evaluation: 
-loss: 1.3684102296829224  UAS: 0.8785545815759224  LAS: 0.8514826157325007 



 30%|███       | 3/10 [04:14<09:52, 84.71s/it]

--Evaluation:
-loss: 1.322923183441162  UAS: 0.8898108972808092  LAS: 0.8635465895461831 

--Best Evaluation: 
-loss: 1.322923183441162  UAS: 0.8898108972808092  LAS: 0.8635465895461831 

--Evaluation:
-loss: 1.2758820056915283  UAS: 0.8890659506928561  LAS: 0.8620556437923589 

--Best Evaluation: 
-loss: 1.322923183441162  UAS: 0.8898108972808092  LAS: 0.8635465895461831 

--Evaluation:
-loss: 1.2525831460952759  UAS: 0.8961317698758625  LAS: 0.8684792622201962 

--Best Evaluation: 
-loss: 1.2525831460952759  UAS: 0.8961317698758625  LAS: 0.8684792622201962 



 40%|████      | 4/10 [05:38<08:28, 84.74s/it]

--Evaluation:
-loss: 1.2435920238494873  UAS: 0.897740543702626  LAS: 0.869774191581902 

--Best Evaluation: 
-loss: 1.2435920238494873  UAS: 0.897740543702626  LAS: 0.869774191581902 

--Evaluation:
-loss: 1.2191853523254395  UAS: 0.9028433742266915  LAS: 0.8757363510536531 

--Best Evaluation: 
-loss: 1.2191853523254395  UAS: 0.9028433742266915  LAS: 0.8757363510536531 

--Evaluation:
-loss: 1.2062904834747314  UAS: 0.9041147838716885  LAS: 0.877275392558757 

--Best Evaluation: 
-loss: 1.2062904834747314  UAS: 0.9041147838716885  LAS: 0.877275392558757 



 50%|█████     | 5/10 [07:03<07:03, 84.71s/it]

--Evaluation:
-loss: 1.2119927406311035  UAS: 0.9062118374366055  LAS: 0.8788591600788701 

--Best Evaluation: 
-loss: 1.2119927406311035  UAS: 0.9062118374366055  LAS: 0.8788591600788701 

--Evaluation:
-loss: 1.1670113801956177  UAS: 0.908936071623557  LAS: 0.8818650759389595 

--Best Evaluation: 
-loss: 1.1670113801956177  UAS: 0.908936071623557  LAS: 0.8818650759389595 

--Evaluation:
-loss: 1.1494710445404053  UAS: 0.9093196829341097  LAS: 0.8818600790928437 

--Best Evaluation: 
-loss: 1.1670113801956177  UAS: 0.908936071623557  LAS: 0.8818650759389595 



 60%|██████    | 6/10 [08:28<05:38, 84.72s/it]

--Evaluation:
-loss: 1.1803704500198364  UAS: 0.9092697377410566  LAS: 0.8834704499882051 

--Best Evaluation: 
-loss: 1.1803704500198364  UAS: 0.9092697377410566  LAS: 0.8834704499882051 

--Evaluation:
-loss: 1.1976077556610107  UAS: 0.9100586246504608  LAS: 0.883530538485491 

--Best Evaluation: 
-loss: 1.1976077556610107  UAS: 0.9100586246504608  LAS: 0.883530538485491 

--Evaluation:
-loss: 1.1530638933181763  UAS: 0.9110707044178352  LAS: 0.8844561907501038 

--Best Evaluation: 
-loss: 1.1530638933181763  UAS: 0.9110707044178352  LAS: 0.8844561907501038 



 70%|███████   | 7/10 [09:53<04:14, 84.72s/it]

--Evaluation:
-loss: 1.1718052625656128  UAS: 0.9110875494785234  LAS: 0.8841455315747215 

--Best Evaluation: 
-loss: 1.1530638933181763  UAS: 0.9110707044178352  LAS: 0.8844561907501038 

--Evaluation:
-loss: 1.1937066316604614  UAS: 0.9121472014689431  LAS: 0.8844575575395612 

--Best Evaluation: 
-loss: 1.1937066316604614  UAS: 0.9121472014689431  LAS: 0.8844575575395612 

--Evaluation:
-loss: 1.1699999570846558  UAS: 0.9111336023584417  LAS: 0.8844123402707214 

--Best Evaluation: 
-loss: 1.1937066316604614  UAS: 0.9121472014689431  LAS: 0.8844575575395612 

--Evaluation:
-loss: 1.201255440711975  UAS: 0.9117833032752849  LAS: 0.8852615358683289 

--Best Evaluation: 
-loss: 1.201255440711975  UAS: 0.9117833032752849  LAS: 0.8852615358683289 



 80%|████████  | 8/10 [11:22<02:52, 86.36s/it]

--Evaluation:
-loss: 1.1894842386245728  UAS: 0.9148203027942594  LAS: 0.8879722033228654 

--Best Evaluation: 
-loss: 1.1894842386245728  UAS: 0.9148203027942594  LAS: 0.8879722033228654 

--Evaluation:
-loss: 1.1871442794799805  UAS: 0.9138190378613531  LAS: 0.8876255892615775 

--Best Evaluation: 
-loss: 1.1894842386245728  UAS: 0.9148203027942594  LAS: 0.8879722033228654 

--Evaluation:
-loss: 1.191157579421997  UAS: 0.9129392206144135  LAS: 0.8869925387043457 

--Best Evaluation: 
-loss: 1.1894842386245728  UAS: 0.9148203027942594  LAS: 0.8879722033228654 



 90%|█████████ | 9/10 [12:47<01:25, 85.87s/it]

--Evaluation:
-loss: 1.18968665599823  UAS: 0.9147206369997559  LAS: 0.888509183037752 

--Best Evaluation: 
-loss: 1.18968665599823  UAS: 0.9147206369997559  LAS: 0.888509183037752 

--Evaluation:
-loss: 1.1909281015396118  UAS: 0.9156567412226464  LAS: 0.8894889887090223 

--Best Evaluation: 
-loss: 1.1909281015396118  UAS: 0.9156567412226464  LAS: 0.8894889887090223 

--Evaluation:
-loss: 1.1856646537780762  UAS: 0.9153214731693653  LAS: 0.8891587387850813 

--Best Evaluation: 
-loss: 1.1909281015396118  UAS: 0.9156567412226464  LAS: 0.8894889887090223 



100%|██████████| 10/10 [14:12<00:00, 85.24s/it]


--training finished.
[1.1909281015396118, 0.9156567412226464, 0.8894889887090223]
FOLD 4
9988/2497
--------------------------------


Some weights of the model checkpoint at hfl/chinese-electra-180g-large-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/10 [00:00<?, ?it/s]

--epoch 0, step 0, loss 12.343706130981445
  {'UAS': 0.048879837067209775, 'LAS': 0.0}
--Evaluation:
-loss: 6.089404582977295  UAS: 0.21617890063360243  LAS: 0.14369135054748144 

--Best Evaluation: 
-loss: 6.089404582977295  UAS: 0.21617890063360243  LAS: 0.14369135054748144 

--Evaluation:
-loss: 3.9046108722686768  UAS: 0.4365997956267718  LAS: 0.38750280457647746 

--Best Evaluation: 
-loss: 3.9046108722686768  UAS: 0.4365997956267718  LAS: 0.38750280457647746 

--Evaluation:
-loss: 2.843172311782837  UAS: 0.6174521983312731  LAS: 0.5881951050364129 

--Best Evaluation: 
-loss: 2.843172311782837  UAS: 0.6174521983312731  LAS: 0.5881951050364129 



 10%|█         | 1/10 [01:24<12:41, 84.59s/it]

--Evaluation:
-loss: 2.047179937362671  UAS: 0.7455836672901783  LAS: 0.7172803712136764 

--Best Evaluation: 
-loss: 2.047179937362671  UAS: 0.7455836672901783  LAS: 0.7172803712136764 

--Evaluation:
-loss: 1.8255021572113037  UAS: 0.8010613460694622  LAS: 0.7722085546448663 

--Best Evaluation: 
-loss: 1.8255021572113037  UAS: 0.8010613460694622  LAS: 0.7722085546448663 

--Evaluation:
-loss: 1.51848566532135  UAS: 0.8440022892829874  LAS: 0.81512135429448 

--Best Evaluation: 
-loss: 1.51848566532135  UAS: 0.8440022892829874  LAS: 0.81512135429448 



 20%|██        | 2/10 [02:49<11:16, 84.62s/it]

--Evaluation:
-loss: 1.4715056419372559  UAS: 0.8568291081624244  LAS: 0.827491059185625 

--Best Evaluation: 
-loss: 1.4715056419372559  UAS: 0.8568291081624244  LAS: 0.827491059185625 

--Evaluation:
-loss: 1.3410851955413818  UAS: 0.8731319121480499  LAS: 0.8444971911180926 

--Best Evaluation: 
-loss: 1.3410851955413818  UAS: 0.8731319121480499  LAS: 0.8444971911180926 

--Evaluation:
-loss: 1.2804311513900757  UAS: 0.8793769837264699  LAS: 0.8519355120859523 

--Best Evaluation: 
-loss: 1.2804311513900757  UAS: 0.8793769837264699  LAS: 0.8519355120859523 



 30%|███       | 3/10 [04:14<09:53, 84.72s/it]

--Evaluation:
-loss: 1.2406094074249268  UAS: 0.8911605670432416  LAS: 0.8631825055746348 

--Best Evaluation: 
-loss: 1.2406094074249268  UAS: 0.8911605670432416  LAS: 0.8631825055746348 

--Evaluation:
-loss: 1.2643330097198486  UAS: 0.8846414217391025  LAS: 0.8573563738241726 

--Best Evaluation: 
-loss: 1.2406094074249268  UAS: 0.8911605670432416  LAS: 0.8631825055746348 

--Evaluation:
-loss: 1.2124613523483276  UAS: 0.8968341294742822  LAS: 0.8687988086608991 

--Best Evaluation: 
-loss: 1.2124613523483276  UAS: 0.8968341294742822  LAS: 0.8687988086608991 



 40%|████      | 4/10 [05:38<08:28, 84.72s/it]

--Evaluation:
-loss: 1.1998474597930908  UAS: 0.9008945440229085  LAS: 0.8722743892348501 

--Best Evaluation: 
-loss: 1.1998474597930908  UAS: 0.9008945440229085  LAS: 0.8722743892348501 

--Evaluation:
-loss: 1.1845993995666504  UAS: 0.8982523618671318  LAS: 0.871184729095826 

--Best Evaluation: 
-loss: 1.1998474597930908  UAS: 0.9008945440229085  LAS: 0.8722743892348501 

--Evaluation:
-loss: 1.1819638013839722  UAS: 0.9002763217573135  LAS: 0.8732201605693293 

--Best Evaluation: 
-loss: 1.1819638013839722  UAS: 0.9002763217573135  LAS: 0.8732201605693293 



 50%|█████     | 5/10 [07:03<07:03, 84.70s/it]

--Evaluation:
-loss: 1.1897050142288208  UAS: 0.9012188949734569  LAS: 0.873935804900238 

--Best Evaluation: 
-loss: 1.1897050142288208  UAS: 0.9012188949734569  LAS: 0.873935804900238 

--Evaluation:
-loss: 1.1639513969421387  UAS: 0.9058684457087092  LAS: 0.8784088549834466 

--Best Evaluation: 
-loss: 1.1639513969421387  UAS: 0.9058684457087092  LAS: 0.8784088549834466 

--Evaluation:
-loss: 1.1420053243637085  UAS: 0.9054892614254104  LAS: 0.8776867350340452 

--Best Evaluation: 
-loss: 1.1639513969421387  UAS: 0.9058684457087092  LAS: 0.8784088549834466 



 60%|██████    | 6/10 [08:28<05:38, 84.68s/it]

--Evaluation:
-loss: 1.171096682548523  UAS: 0.9077192911598034  LAS: 0.8812538847562057 

--Best Evaluation: 
-loss: 1.171096682548523  UAS: 0.9077192911598034  LAS: 0.8812538847562057 

--Evaluation:
-loss: 1.1252961158752441  UAS: 0.9105119297799014  LAS: 0.882577624617855 

--Best Evaluation: 
-loss: 1.1252961158752441  UAS: 0.9105119297799014  LAS: 0.882577624617855 

--Evaluation:
-loss: 1.1399565935134888  UAS: 0.9113925262260484  LAS: 0.8839179655080117 

--Best Evaluation: 
-loss: 1.1399565935134888  UAS: 0.9113925262260484  LAS: 0.8839179655080117 



 70%|███████   | 7/10 [09:52<04:13, 84.65s/it]

--Evaluation:
-loss: 1.12858247756958  UAS: 0.9112316514198734  LAS: 0.8836155673938347 

--Best Evaluation: 
-loss: 1.1399565935134888  UAS: 0.9113925262260484  LAS: 0.8839179655080117 

--Evaluation:
-loss: 1.1597115993499756  UAS: 0.9125286787468045  LAS: 0.8851265273764841 

--Best Evaluation: 
-loss: 1.1597115993499756  UAS: 0.9125286787468045  LAS: 0.8851265273764841 

--Evaluation:
-loss: 1.148713231086731  UAS: 0.9127723990551335  LAS: 0.885498020347863 

--Best Evaluation: 
-loss: 1.148713231086731  UAS: 0.9127723990551335  LAS: 0.885498020347863 

--Evaluation:
-loss: 1.1654762029647827  UAS: 0.9119770847870027  LAS: 0.8854656372923285 

--Best Evaluation: 
-loss: 1.148713231086731  UAS: 0.9127723990551335  LAS: 0.885498020347863 



 80%|████████  | 8/10 [11:22<02:52, 86.35s/it]

--Evaluation:
-loss: 1.1629225015640259  UAS: 0.9150400818827422  LAS: 0.8877007370138673 

--Best Evaluation: 
-loss: 1.1629225015640259  UAS: 0.9150400818827422  LAS: 0.8877007370138673 

--Evaluation:
-loss: 1.1503024101257324  UAS: 0.9145265990765675  LAS: 0.8875098560897979 

--Best Evaluation: 
-loss: 1.1629225015640259  UAS: 0.9150400818827422  LAS: 0.8877007370138673 

--Evaluation:
-loss: 1.1319113969802856  UAS: 0.9146804494568904  LAS: 0.8877384259433594 

--Best Evaluation: 
-loss: 1.1319113969802856  UAS: 0.9146804494568904  LAS: 0.8877384259433594 



 90%|█████████ | 9/10 [12:47<01:25, 85.84s/it]

--Evaluation:
-loss: 1.1490609645843506  UAS: 0.9145987017320885  LAS: 0.8874926147692626 

--Best Evaluation: 
-loss: 1.1319113969802856  UAS: 0.9146804494568904  LAS: 0.8877384259433594 

--Evaluation:
-loss: 1.1492280960083008  UAS: 0.9153546908173654  LAS: 0.8877205735099376 

--Best Evaluation: 
-loss: 1.1319113969802856  UAS: 0.9146804494568904  LAS: 0.8877384259433594 

--Evaluation:
-loss: 1.148605465888977  UAS: 0.9150865520404247  LAS: 0.8878615341804678 

--Best Evaluation: 
-loss: 1.148605465888977  UAS: 0.9150865520404247  LAS: 0.8878615341804678 



100%|██████████| 10/10 [14:12<00:00, 85.20s/it]


--training finished.
[1.148605465888977, 0.9150865520404247, 0.8878615341804678]


In [None]:
import os
os.system("shutdown")