In [1]:
import os
from collections import Counter
from typing import *
import random
import json

In [2]:
from tqdm import tqdm
import numpy as np
from sklearn.model_selection import KFold

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader, Subset
from torch.cuda.amp import autocast, GradScaler

from transformers import AutoConfig, AutoModel, AutoTokenizer, AdamW, get_constant_schedule_with_warmup, get_linear_schedule_with_warmup

In [3]:
from trainer import BasicTrainer
from model import DepParser
from utils import arc_rel_loss, uas_las

## Config

In [4]:
class CFG:
    data_file = '/root/diag_dep/data_new/1to200_0923.json'
    plm = 'hfl/chinese-electra-180g-large-discriminator'
    num_folds = 5
    trn_folds = [0, 1, 2, 3, 4]
    random_seed = 42
    num_epochs = 30
    batch_size = 4
    lr = 2e-5
    weight_decay = 0.01
    dropout = 0.2
    grad_clip = 1
    scheduler = 'linear'
    warmup_ratio = 0.12
    num_early_stop = 5
    max_length = 512
    num_labels = 40
    hidden_size = 400
    print_every = 1e9
    eval_every = 30
    cuda = True
    fp16 = True

## Seed and Device

In [5]:
def seed_everything(seed=CFG.random_seed):
    np.random.seed(seed%(2**32-1))
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic =True
    torch.backends.cudnn.benchmark = False

seed_everything()

In [6]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(f'Using device: {device}')

Using device: cuda


## Data

In [7]:
rel_dct = {
    'root': '根节点',
    'sasubj-obj': '同主同宾',
    'sasubj': '同主语',
    'dfsubj': '不同主语',
    'subj': '主语',
    'subj-in': '内部主语',
    'obj': '宾语',
    'pred': '谓语',
    'att': '定语',
    'adv': '状语',
    'cmp': '补语',
    'coo': '并列',
    'pobj': '介宾',
    'iobj': '间宾',
    'de': '的',
    'adjct': '附加',
    'app': '称呼',
    'exp': '解释',
    'punc': '标点',
    'frag': '片段',
    'repet': '重复',
    # rst
    'attr': '归属',
    'bckg': '背景',
    'cause': '因果',
    'comp': '比较',
    'cond': '状况',
    'cont': '对比',
    'elbr': '阐述',
    'enbm': '目的',
    'eval': '评价',
    'expl': '解释-例证',
    'joint': '联合',
    'manner': '方式',
    'rstm': '重申',
    'temp': '时序',
    'tp-chg': '主题变更',
    'prob-sol': '问题-解决',
    'qst-ans': '疑问-回答',
    'stm-rsp': '陈述-回应',
    'req-proc': '需求-处理',
}

In [8]:
rst_lst = [
    'attr','bckg','cause','comp','cond','cont','elbr','enbm','eval','expl','joint',
    'manner','rstm','temp','tp-chg','prob-sol','qst-ans','stm-rsp','req-proc',
]

In [9]:
rel2id = {key:idx for idx, key in enumerate(rel_dct.keys())}
print(rel2id)

{'root': 0, 'sasubj-obj': 1, 'sasubj': 2, 'dfsubj': 3, 'subj': 4, 'subj-in': 5, 'obj': 6, 'pred': 7, 'att': 8, 'adv': 9, 'cmp': 10, 'coo': 11, 'pobj': 12, 'iobj': 13, 'de': 14, 'adjct': 15, 'app': 16, 'exp': 17, 'punc': 18, 'frag': 19, 'repet': 20, 'attr': 21, 'bckg': 22, 'cause': 23, 'comp': 24, 'cond': 25, 'cont': 26, 'elbr': 27, 'enbm': 28, 'eval': 29, 'expl': 30, 'joint': 31, 'manner': 32, 'rstm': 33, 'temp': 34, 'tp-chg': 35, 'prob-sol': 36, 'qst-ans': 37, 'stm-rsp': 38, 'req-proc': 39}


In [10]:
tokenizer = AutoTokenizer.from_pretrained(CFG.plm)
print(len(tokenizer))
 
num_added_toks = tokenizer.add_tokens(['[root]', '[qst]', '[aws]'], special_tokens=True)
tokenizer.root_token = '[root]'
tokenizer.root_token_ids = tokenizer('[root]')['input_ids'][1]
print(f"add token: {tokenizer.root_token} {tokenizer.root_token_ids}")

tokenizer.root_token = '[qst]'
tokenizer.root_token_ids = tokenizer('[qst]')['input_ids'][1]
print(f"add token: {tokenizer.root_token} {tokenizer.root_token_ids}")

tokenizer.root_token = '[ans]'
tokenizer.root_token_ids = tokenizer('[ans]')['input_ids'][1]
print(f"add token: {tokenizer.root_token} {tokenizer.root_token_ids}")
print(len(tokenizer))

CFG.tokenizer = tokenizer

21128
add token: [root] 21128
add token: [qst] 21129
add token: [ans] 138
21131


In [11]:
with open(CFG.data_file, 'r', encoding='utf-8') as f:
    data = json.load(f)[:200]  # have annotated 200 data

sent_lst = []
max_len, trun_cnt = 0, 0
for d in data:
    word_lst = []
    for item in d['dialog']:
        word_lst.extend(item['utterance'].split(' '))
    if len(word_lst) > max_len:
        max_len = len(word_lst)
    if len(word_lst) > 450:
        trun_cnt += 1
    sent_lst.append(len(word_lst))

print(trun_cnt)
print(max_len)
print(np.mean(sent_lst))

1
626
209.725


In [12]:
class Dependency():
    def __init__(self, idx, word, head, rel):
        self.id = idx
        self.word = word
        self.tag = '_'
        self.head = head
        self.rel = rel

    def __str__(self):
        # example:  1	上海	_	NR	NR	_	2	nn	_	_
        values = [str(self.idx), self.word, "_", self.tag, "_", "_", str(self.head), self.rel, "_", "_"]
        return '\t'.join(values)

    def __repr__(self):
        return f"({self.id}, {self.word}, {self.tag}, {self.head}, {self.rel})"

In [17]:
def load_annoted(data_file):
    with open(CFG.data_file, 'r', encoding='utf-8') as f:
        data = json.load(f)[:200]  # have annotated 200 data
        
    sample_lst:List[List[Dependency]] = []
    
    for d in data:
        rel_dct = {}
        for tripple in d['relationship']:
            head, rel, tail = tripple
            head_uttr_idx, head_word_idx = [int(x) for x in head.split('-')]
            tail_uttr_idx, tail_word_idx = [int(x) for x in tail.split('-')]
            
            if rel == 'root' and head_uttr_idx != 0: # ignore root
                continue
                 
            if not rel_dct.get(tail_uttr_idx, None):
                rel_dct[tail_uttr_idx] = {tail_word_idx: [head, rel]}
            else:
                rel_dct[tail_uttr_idx][tail_word_idx] = [head, rel]
                
        sent_lens_accum = [0]
        for i, item in enumerate(d['dialog']):
            utterance = item['utterance']
            sent_lens_accum.append(sent_lens_accum[i] + len(utterance.split(' ')) + 1)
        
        dep_lst:List[Dependency] = []
        role_lst:List[str] = []
        for item in d['dialog']:
            turn = item['turn']
            utterance = item['utterance']
            # dep_lst:List[Dependency] = [Dependency(0, '[root]', -1, '_')]
            
            role = '[ans]' if item['speaker'] == 'A' else '[qst]'
            dep_lst.append(Dependency(sent_lens_accum[turn], role, -1, '_'))  
            
            for word_idx, word in enumerate(utterance.split(' ')):
                tail2head = rel_dct.get(turn, {1: [f'{turn}-{word_idx}', 'adjct']})
                head, rel = tail2head.get(word_idx + 1, [f'{turn}-{word_idx}', 'adjct'])  # some word annoted missed, padded with last word and 'adjct'
                head_uttr_idx, head_word_idx = [int(x) for x in head.split('-')]
                
                # only parse cross-utterance
                if turn != head_uttr_idx:
                    dep_lst.append(Dependency(sent_lens_accum[turn] + word_idx + 1, word, sent_lens_accum[head_uttr_idx] + head_word_idx, rel))  # add with accumulated length
                else:
                    dep_lst.append(Dependency(sent_lens_accum[turn] + word_idx + 1, word, -1, '_')) 
                
                # only parse rst relation
                # if rel not in rst_lst:
                #     dep_lst.append(Dependency(sent_lens_accum[turn] + word_idx + 1, word, -1, '_')) 
                # else:
                #     dep_lst.append(Dependency(sent_lens_accum[turn] + word_idx + 1, word, sent_lens_accum[head_uttr_idx] + head_word_idx, rel))  # add with accumulated length
        
            role_lst.append(item['speaker'])        
        sample_lst.append(dep_lst)
        
    return sample_lst

In [18]:
data = load_annoted(CFG.data_file)
data[0]

[(0, [ans], _, -1, _),
 (1, 有, _, -1, _),
 (2, 什么, _, -1, _),
 (3, 问题, _, -1, _),
 (4, 我, _, -1, _),
 (5, 可以, _, -1, _),
 (6, 帮, _, -1, _),
 (7, 您, _, -1, _),
 (8, 处理, _, -1, _),
 (9, 或, _, -1, _),
 (10, 解决, _, -1, _),
 (11, 呢, _, -1, _),
 (12, ?, _, -1, _),
 (13, [qst], _, -1, _),
 (14, 你, _, -1, _),
 (15, 好, _, 1, qst-ans),
 (16, [qst], _, -1, _),
 (17, 以前, _, -1, _),
 (18, 的, _, -1, _),
 (19, 手机, _, -1, _),
 (20, 号码, _, -1, _),
 (21, 销, _, 15, elbr),
 (22, 号, _, -1, _),
 (23, 了, _, -1, _),
 (24, ，, _, -1, _),
 (25, 密码, _, -1, _),
 (26, 也, _, -1, _),
 (27, 忘, _, -1, _),
 (28, 了, _, -1, _),
 (29, [qst], _, -1, _),
 (30, 改, _, 27, bckg),
 (31, 密码, _, -1, _),
 (32, 需要, _, -1, _),
 (33, 手机, _, -1, _),
 (34, 验证码, _, -1, _),
 (35, [ans], _, -1, _),
 (36, 还, _, -1, _),
 (37, 记得, _, 30, stm-rsp),
 (38, 您, _, -1, _),
 (39, 以前, _, -1, _),
 (40, 的, _, -1, _),
 (41, 手机, _, -1, _),
 (42, 号码, _, -1, _),
 (43, 吗, _, -1, _),
 (44, [qst], _, -1, _),
 (45, [, _, -1, _),
 (46, 数字, _, 37, qst-ans),
 (47

In [19]:
class DialogDataset(Dataset):
    def __init__(self, cfg):
        self.cfg = cfg
        self.inputs, self.offsets, self.heads, self.rels, self.masks = self.read_data()
        
    def read_data(self):
        inputs, offsets = [], []
        tags, heads, rels, masks = [], [], [], []
        
        for idx, deps in enumerate(load_annoted(self.cfg.data_file)):
            seq_len = len(deps)

            word_lst = [] 
            head_tokens = np.zeros(self.cfg.max_length, dtype=np.int64)  # same as root index is 0, constrainting by mask 
            rel_tokens = np.zeros(self.cfg.max_length, dtype=np.int64)
            mask_tokens = np.zeros(self.cfg.max_length, dtype=np.int64)
            for i, dep in enumerate(deps):
                if i == seq_len or i + 1== self.cfg.max_length:
                    break

                word_lst.append(dep.word)
                
                if int(dep.head) == -1 or int(dep.head) + 1 >= self.cfg.max_length:
                    head_tokens[i+1] = 0
                    mask_tokens[i+1] = 0
                else:
                    head_tokens[i+1] = int(dep.head)
                    mask_tokens[i+1] = 1
#                     head_tokens[i] = dep.head if dep.head != '_' else 0
                rel_tokens[i+1] = rel2id.get(dep.rel, 0)

            tokenized = tokenizer.encode_plus(word_lst, 
                                              padding='max_length', 
                                              truncation=True,
                                              max_length=self.cfg.max_length, 
                                              return_offsets_mapping=True, 
                                              return_tensors='pt',
                                              is_split_into_words=True)
            inputs.append({"input_ids": tokenized['input_ids'][0],
                          "token_type_ids": tokenized['token_type_ids'][0],
                           "attention_mask": tokenized['attention_mask'][0]
                          })

#                 sentence_word_idx = np.zeros(self.cfg.max_length, dtype=np.int64)
            sentence_word_idx = []
            for idx, (start, end) in enumerate(tokenized.offset_mapping[0][1:]):
                if start == 0 and end != 0:
                    sentence_word_idx.append(idx)
#                         sentence_word_idx[idx] = idx
            if len(sentence_word_idx) < self.cfg.max_length - 1:
                sentence_word_idx.extend([0]* (self.cfg.max_length - 1 - len(sentence_word_idx)))
            offsets.append(torch.as_tensor(sentence_word_idx))
#                 offsets.append(sentence_word_idx)

            heads.append(head_tokens)
            rels.append(rel_tokens)
            masks.append(mask_tokens)
                    
        return inputs, offsets, heads, rels, masks

    def __getitem__(self, idx):
        return self.inputs[idx], self.offsets[idx], self.heads[idx], self.rels[idx], self.masks[idx]
    
    def __len__(self):
        return len(self.rels)

In [20]:
dataset = DialogDataset(CFG)

## Tranining

In [21]:
kfold = KFold(n_splits=CFG.num_folds, shuffle=True, random_state=CFG.random_seed)

In [None]:
for fold, (train_ids, val_ids) in enumerate(kfold.split(dataset)):
    if fold not in CFG.trn_folds:
        continue
    print(f'FOLD {fold}')
    print(f'{len(train_ids)}/{len(val_ids)}')
    print('--------------------------------')

    if CFG.cuda and torch.cuda.is_available:
        torch.cuda.empty_cache()

    random.shuffle(train_ids)
    random.shuffle(val_ids)

    tr_dataset = Subset(dataset, train_ids)
    va_dataset = Subset(dataset, val_ids)
    
    tr_iter = DataLoader(tr_dataset, batch_size=CFG.batch_size)
    va_iter = DataLoader(va_dataset, batch_size=CFG.batch_size)
    
    model = DepParser(CFG)
    
    optim = AdamW(model.parameters(), 
                      lr=CFG.lr,
                      weight_decay=CFG.weight_decay
                      )

    training_step = int(CFG.num_epochs * (len(train_ids) / CFG.batch_size))
    warmup_step = int(CFG.warmup_ratio * training_step)  
    lr_scheduler = get_linear_schedule_with_warmup(optimizer=optim, 
                                                        num_warmup_steps=warmup_step, 
                                                        num_training_steps=training_step)

    trainer = BasicTrainer(optim=optim, 
                        lr_scheduler=lr_scheduler,
                        trainset_size=len(train_ids), 
                        loss_fn=arc_rel_loss, 
                        metrics_fn=uas_las, 
                        config=CFG)
    
    best_res, best_state_dict = trainer.train(model=model, train_iter=tr_iter, val_iter=va_iter)
    print(best_res)
    with open("/root/autodl-tmp/diag_dep/k-fold/res.txt", 'a+') as f:
        f.write(f'{fold}\t {str(best_res)}\n')
    
    torch.save(best_state_dict, f"/root/autodl-tmp/diag_dep/k-fold/{fold}/model.bin")

FOLD 0
160/40
--------------------------------


Some weights of the model checkpoint at hfl/chinese-electra-180g-large-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/30 [00:00<?, ?it/s]

--epoch 0, step 0, loss 12.420459747314453
  {'UAS': 0.0125, 'LAS': 0.0}
--Evaluation:
-loss: 9.237998008728027  UAS: 0.008326508477819662  LAS: 0.002053794829024187 

--Best Evaluation: 
-loss: 9.237998008728027  UAS: 0.008326508477819662  LAS: 0.002053794829024187 



  3%|▎         | 1/30 [00:11<05:19, 11.02s/it]

--Evaluation:
-loss: 7.5951433181762695  UAS: 0.014968235494453905  LAS: 0.003648139685141119 

--Best Evaluation: 
-loss: 7.5951433181762695  UAS: 0.014968235494453905  LAS: 0.003648139685141119 



  7%|▋         | 2/30 [00:22<05:09, 11.05s/it]

--Evaluation:
-loss: 7.254802227020264  UAS: 0.019747059709605656  LAS: 0.008428947720345891 

--Best Evaluation: 
-loss: 7.254802227020264  UAS: 0.019747059709605656  LAS: 0.008428947720345891 



 10%|█         | 3/30 [00:33<05:07, 11.39s/it]

--Evaluation:
-loss: 6.797461986541748  UAS: 0.032368498725389605  LAS: 0.02264526990387783 

--Best Evaluation: 
-loss: 6.797461986541748  UAS: 0.032368498725389605  LAS: 0.02264526990387783 

--Evaluation:
-loss: 6.511073589324951  UAS: 0.04527492351320245  LAS: 0.03110681599102032 

--Best Evaluation: 
-loss: 6.511073589324951  UAS: 0.04527492351320245  LAS: 0.03110681599102032 



 13%|█▎        | 4/30 [00:44<04:53, 11.28s/it]

--Evaluation:
-loss: 6.219823837280273  UAS: 0.06567313657064543  LAS: 0.041370523011696816 

--Best Evaluation: 
-loss: 6.219823837280273  UAS: 0.06567313657064543  LAS: 0.041370523011696816 



 17%|█▋        | 5/30 [00:56<04:40, 11.20s/it]

--Evaluation:
-loss: 6.09735107421875  UAS: 0.09978383106436965  LAS: 0.066307697604354 

--Best Evaluation: 
-loss: 6.09735107421875  UAS: 0.09978383106436965  LAS: 0.066307697604354 



 20%|██        | 6/30 [01:07<04:33, 11.40s/it]

--Evaluation:
-loss: 5.697291374206543  UAS: 0.1376150929653745  LAS: 0.09725508860124996 

--Best Evaluation: 
-loss: 5.697291374206543  UAS: 0.1376150929653745  LAS: 0.09725508860124996 

--Evaluation:
-loss: 5.3638153076171875  UAS: 0.19795325278351394  LAS: 0.14035270370371938 

--Best Evaluation: 
-loss: 5.3638153076171875  UAS: 0.19795325278351394  LAS: 0.14035270370371938 



 23%|██▎       | 7/30 [01:18<04:19, 11.30s/it]

--Evaluation:
-loss: 5.113751411437988  UAS: 0.24372962693036354  LAS: 0.17387291645032982 

--Best Evaluation: 
-loss: 5.113751411437988  UAS: 0.24372962693036354  LAS: 0.17387291645032982 



 27%|██▋       | 8/30 [01:30<04:07, 11.24s/it]

--Evaluation:
-loss: 4.912578582763672  UAS: 0.2971943021772584  LAS: 0.21641598272515689 

--Best Evaluation: 
-loss: 4.912578582763672  UAS: 0.2971943021772584  LAS: 0.21641598272515689 



 30%|███       | 9/30 [01:41<04:00, 11.44s/it]

--Evaluation:
-loss: 4.473930358886719  UAS: 0.3684698394185705  LAS: 0.261989951609268 

--Best Evaluation: 
-loss: 4.473930358886719  UAS: 0.3684698394185705  LAS: 0.261989951609268 

--Evaluation:
-loss: 4.67720890045166  UAS: 0.3563118299546073  LAS: 0.26322321824696326 

--Best Evaluation: 
-loss: 4.67720890045166  UAS: 0.3563118299546073  LAS: 0.26322321824696326 



 33%|███▎      | 10/30 [01:53<03:47, 11.35s/it]

--Evaluation:
-loss: 4.476113319396973  UAS: 0.3854136713170101  LAS: 0.2754854944622339 

--Best Evaluation: 
-loss: 4.476113319396973  UAS: 0.3854136713170101  LAS: 0.2754854944622339 



 37%|███▋      | 11/30 [02:04<03:34, 11.29s/it]

--Evaluation:
-loss: 4.435065269470215  UAS: 0.3923501326802664  LAS: 0.279731772148866 

--Best Evaluation: 
-loss: 4.435065269470215  UAS: 0.3923501326802664  LAS: 0.279731772148866 



 40%|████      | 12/30 [02:16<03:26, 11.46s/it]

--Evaluation:
-loss: 4.368740558624268  UAS: 0.4267327053475757  LAS: 0.3172822635871172 

--Best Evaluation: 
-loss: 4.368740558624268  UAS: 0.4267327053475757  LAS: 0.3172822635871172 

--Evaluation:
-loss: 4.581450939178467  UAS: 0.4087768294250834  LAS: 0.30202121572150553 

--Best Evaluation: 
-loss: 4.368740558624268  UAS: 0.4267327053475757  LAS: 0.3172822635871172 



 43%|████▎     | 13/30 [02:27<03:13, 11.37s/it]

--Evaluation:
-loss: 4.643443584442139  UAS: 0.42184379729828025  LAS: 0.31499776148787595 

--Best Evaluation: 
-loss: 4.368740558624268  UAS: 0.4267327053475757  LAS: 0.3172822635871172 



 47%|████▋     | 14/30 [02:38<03:00, 11.29s/it]

--Evaluation:
-loss: 4.539865016937256  UAS: 0.44768208763723755  LAS: 0.3326306435095214 

--Best Evaluation: 
-loss: 4.539865016937256  UAS: 0.44768208763723755  LAS: 0.3326306435095214 



 50%|█████     | 15/30 [02:50<02:51, 11.46s/it]

--Evaluation:
-loss: 4.5826334953308105  UAS: 0.4371837106835824  LAS: 0.3367162119208257 

--Best Evaluation: 
-loss: 4.5826334953308105  UAS: 0.4371837106835824  LAS: 0.3367162119208257 

--Evaluation:
-loss: 4.725501537322998  UAS: 0.4339385354158963  LAS: 0.3262780792039266 

--Best Evaluation: 
-loss: 4.5826334953308105  UAS: 0.4371837106835824  LAS: 0.3367162119208257 



 53%|█████▎    | 16/30 [03:01<02:39, 11.36s/it]

--Evaluation:
-loss: 4.790196895599365  UAS: 0.44920866107582114  LAS: 0.3363960327214688 

--Best Evaluation: 
-loss: 4.5826334953308105  UAS: 0.4371837106835824  LAS: 0.3367162119208257 



 57%|█████▋    | 17/30 [03:12<02:26, 11.31s/it]

--Evaluation:
-loss: 4.740580081939697  UAS: 0.4419349931532855  LAS: 0.34131311781396406 

--Best Evaluation: 
-loss: 4.740580081939697  UAS: 0.4419349931532855  LAS: 0.34131311781396406 



 60%|██████    | 18/30 [03:24<02:17, 11.49s/it]

--Evaluation:
-loss: 4.697535991668701  UAS: 0.44055634585822023  LAS: 0.344872498494059 

--Best Evaluation: 
-loss: 4.697535991668701  UAS: 0.44055634585822023  LAS: 0.344872498494059 

--Evaluation:
-loss: 5.196976184844971  UAS: 0.43695474191276185  LAS: 0.3295741022942424 

--Best Evaluation: 
-loss: 4.697535991668701  UAS: 0.44055634585822023  LAS: 0.344872498494059 



 63%|██████▎   | 19/30 [03:35<02:05, 11.39s/it]

--Evaluation:
-loss: 5.091578960418701  UAS: 0.44453150466677027  LAS: 0.3466463983899431 

--Best Evaluation: 
-loss: 5.091578960418701  UAS: 0.44453150466677027  LAS: 0.3466463983899431 



 67%|██████▋   | 20/30 [03:46<01:53, 11.31s/it]

--Evaluation:
-loss: 4.980196475982666  UAS: 0.4592716842542842  LAS: 0.36068383320972325 

--Best Evaluation: 
-loss: 4.980196475982666  UAS: 0.4592716842542842  LAS: 0.36068383320972325 



 70%|███████   | 21/30 [03:58<01:43, 11.48s/it]

--Evaluation:
-loss: 4.98553991317749  UAS: 0.44577063357778357  LAS: 0.3492707245180393 

--Best Evaluation: 
-loss: 4.980196475982666  UAS: 0.4592716842542842  LAS: 0.36068383320972325 

--Evaluation:
-loss: 5.220759868621826  UAS: 0.44370445799561775  LAS: 0.3444256849070025 

--Best Evaluation: 
-loss: 4.980196475982666  UAS: 0.4592716842542842  LAS: 0.36068383320972325 



 73%|███████▎  | 22/30 [04:09<01:31, 11.38s/it]

--Evaluation:
-loss: 5.102668285369873  UAS: 0.4535242939929435  LAS: 0.35605969745633387 

--Best Evaluation: 
-loss: 4.980196475982666  UAS: 0.4592716842542842  LAS: 0.36068383320972325 



 77%|███████▋  | 23/30 [04:20<01:19, 11.30s/it]

--Evaluation:
-loss: 5.18153715133667  UAS: 0.4623294686694884  LAS: 0.3578992218979399 

--Best Evaluation: 
-loss: 4.980196475982666  UAS: 0.4592716842542842  LAS: 0.36068383320972325 



 77%|███████▋  | 23/30 [04:32<01:23, 11.86s/it]

--Evaluation:
-loss: 5.302924633026123  UAS: 0.4656514945929328  LAS: 0.3603365414112995 

--Best Evaluation: 
-loss: 4.980196475982666  UAS: 0.4592716842542842  LAS: 0.36068383320972325 

--early stopping, training finished.
[4.980196475982666, 0.4592716842542842, 0.36068383320972325]





FOLD 1
160/40
--------------------------------


Some weights of the model checkpoint at hfl/chinese-electra-180g-large-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/30 [00:00<?, ?it/s]

--epoch 0, step 0, loss 15.344053268432617
  {'UAS': 0.008695652173913044, 'LAS': 0.0}
--Evaluation:
-loss: 9.593108177185059  UAS: 0.005233870273064002  LAS: 0.002379619260918253 

--Best Evaluation: 
-loss: 9.593108177185059  UAS: 0.005233870273064002  LAS: 0.002379619260918253 



  3%|▎         | 1/30 [00:11<05:23, 11.17s/it]

--Evaluation:
-loss: 7.6217041015625  UAS: 0.013465050191246362  LAS: 0.005646319132731605 

--Best Evaluation: 
-loss: 7.6217041015625  UAS: 0.013465050191246362  LAS: 0.005646319132731605 



  7%|▋         | 2/30 [00:22<05:12, 11.15s/it]

--Evaluation:
-loss: 7.2412309646606445  UAS: 0.017910814451985457  LAS: 0.009935006185005296 

--Best Evaluation: 
-loss: 7.2412309646606445  UAS: 0.017910814451985457  LAS: 0.009935006185005296 



 10%|█         | 3/30 [00:34<05:10, 11.48s/it]

--Evaluation:
-loss: 6.896188259124756  UAS: 0.03269326225756096  LAS: 0.015012289630893811 

--Best Evaluation: 
-loss: 6.896188259124756  UAS: 0.03269326225756096  LAS: 0.015012289630893811 

--Evaluation:
-loss: 6.640693664550781  UAS: 0.057229758225381865  LAS: 0.03657131880847076 

--Best Evaluation: 
-loss: 6.640693664550781  UAS: 0.057229758225381865  LAS: 0.03657131880847076 



 13%|█▎        | 4/30 [00:45<04:55, 11.37s/it]

--Evaluation:
-loss: 6.469535827636719  UAS: 0.080576559987242  LAS: 0.05915361375736923 

--Best Evaluation: 
-loss: 6.469535827636719  UAS: 0.080576559987242  LAS: 0.05915361375736923 



 17%|█▋        | 5/30 [00:56<04:42, 11.29s/it]

--Evaluation:
-loss: 5.957596302032471  UAS: 0.14630266062353764  LAS: 0.10037627253648676 

--Best Evaluation: 
-loss: 5.957596302032471  UAS: 0.14630266062353764  LAS: 0.10037627253648676 



 20%|██        | 6/30 [01:08<04:35, 11.49s/it]

--Evaluation:
-loss: 5.756810665130615  UAS: 0.17869955506891705  LAS: 0.121845769032863 

--Best Evaluation: 
-loss: 5.756810665130615  UAS: 0.17869955506891705  LAS: 0.121845769032863 

--Evaluation:
-loss: 4.493098735809326  UAS: 0.2562944348903574  LAS: 0.16988305426372544 

--Best Evaluation: 
-loss: 4.493098735809326  UAS: 0.2562944348903574  LAS: 0.16988305426372544 



 23%|██▎       | 7/30 [01:19<04:21, 11.38s/it]

--Evaluation:
-loss: 4.138582229614258  UAS: 0.33457797636535064  LAS: 0.2193959412203823 

--Best Evaluation: 
-loss: 4.138582229614258  UAS: 0.33457797636535064  LAS: 0.2193959412203823 



 27%|██▋       | 8/30 [01:30<04:08, 11.31s/it]

--Evaluation:
-loss: 3.7621376514434814  UAS: 0.4170655903261478  LAS: 0.2795938848565704 

--Best Evaluation: 
-loss: 3.7621376514434814  UAS: 0.4170655903261478  LAS: 0.2795938848565704 



 30%|███       | 9/30 [01:42<04:01, 11.50s/it]

--Evaluation:
-loss: 3.639458417892456  UAS: 0.45339765603214416  LAS: 0.3244622891754768 

--Best Evaluation: 
-loss: 3.639458417892456  UAS: 0.45339765603214416  LAS: 0.3244622891754768 

--Evaluation:
-loss: 4.092998027801514  UAS: 0.45896093822570905  LAS: 0.3384335225121316 

--Best Evaluation: 
-loss: 4.092998027801514  UAS: 0.45896093822570905  LAS: 0.3384335225121316 



 33%|███▎      | 10/30 [01:53<03:48, 11.40s/it]

--Evaluation:
-loss: 4.0768232345581055  UAS: 0.4543949133489624  LAS: 0.35176675668722546 

--Best Evaluation: 
-loss: 4.0768232345581055  UAS: 0.4543949133489624  LAS: 0.35176675668722546 



 37%|███▋      | 11/30 [02:04<03:35, 11.33s/it]

--Evaluation:
-loss: 3.85015869140625  UAS: 0.4875190222540023  LAS: 0.37453331641932697 

--Best Evaluation: 
-loss: 3.85015869140625  UAS: 0.4875190222540023  LAS: 0.37453331641932697 



 40%|████      | 12/30 [02:16<03:26, 11.50s/it]

--Evaluation:
-loss: 3.794692277908325  UAS: 0.5186799917001232  LAS: 0.3957129072816587 

--Best Evaluation: 
-loss: 3.794692277908325  UAS: 0.5186799917001232  LAS: 0.3957129072816587 

--Evaluation:
-loss: 3.672844648361206  UAS: 0.5241887726588745  LAS: 0.41932861889398376 

--Best Evaluation: 
-loss: 3.672844648361206  UAS: 0.5241887726588745  LAS: 0.41932861889398376 



 43%|████▎     | 13/30 [02:28<03:13, 11.39s/it]

--Evaluation:
-loss: 3.5800912380218506  UAS: 0.5434040082789529  LAS: 0.4469169605468729 

--Best Evaluation: 
-loss: 3.5800912380218506  UAS: 0.5434040082789529  LAS: 0.4469169605468729 



 47%|████▋     | 14/30 [02:39<03:01, 11.32s/it]

--Evaluation:
-loss: 3.3812217712402344  UAS: 0.5841552574817654  LAS: 0.4923132014810319 

--Best Evaluation: 
-loss: 3.3812217712402344  UAS: 0.5841552574817654  LAS: 0.4923132014810319 



 50%|█████     | 15/30 [02:51<02:52, 11.50s/it]

--Evaluation:
-loss: 3.671177387237549  UAS: 0.5701353581929208  LAS: 0.47169560529326315 

--Best Evaluation: 
-loss: 3.3812217712402344  UAS: 0.5841552574817654  LAS: 0.4923132014810319 

--Evaluation:
-loss: 3.5767743587493896  UAS: 0.5817544879714955  LAS: 0.4970615287935127 

--Best Evaluation: 
-loss: 3.5767743587493896  UAS: 0.5817544879714955  LAS: 0.4970615287935127 



 53%|█████▎    | 16/30 [03:02<02:39, 11.40s/it]

--Evaluation:
-loss: 3.500401258468628  UAS: 0.5924283148797607  LAS: 0.49568573205868294 

--Best Evaluation: 
-loss: 3.5767743587493896  UAS: 0.5817544879714955  LAS: 0.4970615287935127 



 57%|█████▋    | 17/30 [03:13<02:27, 11.33s/it]

--Evaluation:
-loss: 3.7648704051971436  UAS: 0.589237952332072  LAS: 0.4827067039755605 

--Best Evaluation: 
-loss: 3.5767743587493896  UAS: 0.5817544879714955  LAS: 0.4970615287935127 



 60%|██████    | 18/30 [03:25<02:17, 11.50s/it]

--Evaluation:
-loss: 3.71319317817688  UAS: 0.5990381598232771  LAS: 0.4994183704652948 

--Best Evaluation: 
-loss: 3.71319317817688  UAS: 0.5990381598232771  LAS: 0.4994183704652948 

--Evaluation:
-loss: 3.688588857650757  UAS: 0.6062447445629073  LAS: 0.5110202683044189 

--Best Evaluation: 
-loss: 3.688588857650757  UAS: 0.6062447445629073  LAS: 0.5110202683044189 



 63%|██████▎   | 19/30 [03:36<02:05, 11.40s/it]

--Evaluation:
-loss: 3.618373155593872  UAS: 0.6183148966598788  LAS: 0.5187055255893938 

--Best Evaluation: 
-loss: 3.618373155593872  UAS: 0.6183148966598788  LAS: 0.5187055255893938 



 67%|██████▋   | 20/30 [03:47<01:53, 11.34s/it]

--Evaluation:
-loss: 3.971498966217041  UAS: 0.6036957107752869  LAS: 0.505636185691241 

--Best Evaluation: 
-loss: 3.618373155593872  UAS: 0.6183148966598788  LAS: 0.5187055255893938 



 70%|███████   | 21/30 [03:59<01:43, 11.50s/it]

--Evaluation:
-loss: 3.9098453521728516  UAS: 0.6140714378060135  LAS: 0.5197324076810272 

--Best Evaluation: 
-loss: 3.9098453521728516  UAS: 0.6140714378060135  LAS: 0.5197324076810272 

--Evaluation:
-loss: 3.703080415725708  UAS: 0.6282482596743104  LAS: 0.5275644770912983 

--Best Evaluation: 
-loss: 3.703080415725708  UAS: 0.6282482596743104  LAS: 0.5275644770912983 



 73%|███████▎  | 22/30 [04:10<01:31, 11.39s/it]

--Evaluation:
-loss: 3.9925363063812256  UAS: 0.6262031482659087  LAS: 0.5275732944800317 

--Best Evaluation: 
-loss: 3.9925363063812256  UAS: 0.6262031482659087  LAS: 0.5275732944800317 



 77%|███████▋  | 23/30 [04:21<01:19, 11.32s/it]

--Evaluation:
-loss: 3.88346266746521  UAS: 0.6294773344752199  LAS: 0.532953988474173 

--Best Evaluation: 
-loss: 3.88346266746521  UAS: 0.6294773344752199  LAS: 0.532953988474173 



 80%|████████  | 24/30 [04:33<01:08, 11.48s/it]

--Evaluation:
-loss: 4.023767948150635  UAS: 0.6236511402060441  LAS: 0.5274977554209729 

--Best Evaluation: 
-loss: 3.88346266746521  UAS: 0.6294773344752199  LAS: 0.532953988474173 

--Evaluation:
-loss: 4.039734363555908  UAS: 0.6235936871796721  LAS: 0.5165194468280736 

--Best Evaluation: 
-loss: 3.88346266746521  UAS: 0.6294773344752199  LAS: 0.532953988474173 



 83%|████████▎ | 25/30 [04:44<00:56, 11.38s/it]

--Evaluation:
-loss: 4.113306045532227  UAS: 0.6279559502646642  LAS: 0.5281545214841128 

--Best Evaluation: 
-loss: 3.88346266746521  UAS: 0.6294773344752199  LAS: 0.532953988474173 



 87%|████████▋ | 26/30 [04:55<00:45, 11.31s/it]

--Evaluation:
-loss: 4.093360424041748  UAS: 0.6282744650589878  LAS: 0.5277270865261585 

--Best Evaluation: 
-loss: 3.88346266746521  UAS: 0.6294773344752199  LAS: 0.532953988474173 



 87%|████████▋ | 26/30 [05:07<00:47, 11.84s/it]

--Evaluation:
-loss: 4.153302192687988  UAS: 0.6351307970317447  LAS: 0.5266019115109917 

--Best Evaluation: 
-loss: 3.88346266746521  UAS: 0.6294773344752199  LAS: 0.532953988474173 

--early stopping, training finished.
[3.88346266746521, 0.6294773344752199, 0.532953988474173]





FOLD 2
160/40
--------------------------------


Some weights of the model checkpoint at hfl/chinese-electra-180g-large-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/30 [00:00<?, ?it/s]

--epoch 0, step 0, loss 13.579277038574219
  {'UAS': 0.0, 'LAS': 0.0}
--Evaluation:
-loss: 9.040756225585938  UAS: 0.006117381475472118  LAS: 0.0 

--Best Evaluation: 
-loss: 0  UAS: 0  LAS: 0 



  3%|▎         | 1/30 [00:11<05:25, 11.22s/it]

--Evaluation:
-loss: 7.602872371673584  UAS: 0.022426182167243066  LAS: 0.005869673436877738 

--Best Evaluation: 
-loss: 7.602872371673584  UAS: 0.022426182167243066  LAS: 0.005869673436877738 



  7%|▋         | 2/30 [00:22<05:12, 11.16s/it]

--Evaluation:
-loss: 7.241939067840576  UAS: 0.026077918274109248  LAS: 0.014286777588124504 

--Best Evaluation: 
-loss: 7.241939067840576  UAS: 0.026077918274109248  LAS: 0.014286777588124504 



 10%|█         | 3/30 [00:34<05:09, 11.48s/it]

--Evaluation:
-loss: 6.825639247894287  UAS: 0.036714181339251625  LAS: 0.017157213524694653 

--Best Evaluation: 
-loss: 6.825639247894287  UAS: 0.036714181339251625  LAS: 0.017157213524694653 

--Evaluation:
-loss: 5.935297966003418  UAS: 0.08246960836118891  LAS: 0.04345040921451759 

--Best Evaluation: 
-loss: 5.935297966003418  UAS: 0.08246960836118891  LAS: 0.04345040921451759 



 13%|█▎        | 4/30 [00:45<04:54, 11.34s/it]

--Evaluation:
-loss: 5.0018439292907715  UAS: 0.16234126870910864  LAS: 0.10580226583889571 

--Best Evaluation: 
-loss: 5.0018439292907715  UAS: 0.16234126870910864  LAS: 0.10580226583889571 



 17%|█▋        | 5/30 [00:56<04:41, 11.26s/it]

--Evaluation:
-loss: 4.411702632904053  UAS: 0.24781224671626473  LAS: 0.1417016987483561 

--Best Evaluation: 
-loss: 4.411702632904053  UAS: 0.24781224671626473  LAS: 0.1417016987483561 



 20%|██        | 6/30 [01:08<04:35, 11.47s/it]

--Evaluation:
-loss: 4.214660167694092  UAS: 0.2969055438533055  LAS: 0.1860666603663143 

--Best Evaluation: 
-loss: 4.214660167694092  UAS: 0.2969055438533055  LAS: 0.1860666603663143 

--Evaluation:
-loss: 3.936551809310913  UAS: 0.3597274148433352  LAS: 0.23123454361371146 

--Best Evaluation: 
-loss: 3.936551809310913  UAS: 0.3597274148433352  LAS: 0.23123454361371146 



 23%|██▎       | 7/30 [01:19<04:21, 11.37s/it]

--Evaluation:
-loss: 3.688556671142578  UAS: 0.41187075099147574  LAS: 0.2783759245985598 

--Best Evaluation: 
-loss: 3.688556671142578  UAS: 0.41187075099147574  LAS: 0.2783759245985598 



 27%|██▋       | 8/30 [01:30<04:08, 11.31s/it]

--Evaluation:
-loss: 3.4497878551483154  UAS: 0.48100469352082414  LAS: 0.33154117309878883 

--Best Evaluation: 
-loss: 3.4497878551483154  UAS: 0.48100469352082414  LAS: 0.33154117309878883 



 30%|███       | 9/30 [01:42<04:01, 11.50s/it]

--Evaluation:
-loss: 3.192493438720703  UAS: 0.5414466252350192  LAS: 0.39755755034790713 

--Best Evaluation: 
-loss: 3.192493438720703  UAS: 0.5414466252350192  LAS: 0.39755755034790713 

--Evaluation:
-loss: 3.226663589477539  UAS: 0.5309244268584623  LAS: 0.41057526366315134 

--Best Evaluation: 
-loss: 3.226663589477539  UAS: 0.5309244268584623  LAS: 0.41057526366315134 



 33%|███▎      | 10/30 [01:53<03:47, 11.40s/it]

--Evaluation:
-loss: 2.951122760772705  UAS: 0.590912427201504  LAS: 0.44986835664690206 

--Best Evaluation: 
-loss: 2.951122760772705  UAS: 0.590912427201504  LAS: 0.44986835664690206 



 37%|███▋      | 11/30 [02:04<03:35, 11.33s/it]

--Evaluation:
-loss: 2.951988458633423  UAS: 0.595829470012997  LAS: 0.46388064239927607 

--Best Evaluation: 
-loss: 2.951988458633423  UAS: 0.595829470012997  LAS: 0.46388064239927607 



 40%|████      | 12/30 [02:16<03:27, 11.50s/it]

--Evaluation:
-loss: 3.0204856395721436  UAS: 0.6145231666690847  LAS: 0.4846705316020585 

--Best Evaluation: 
-loss: 3.0204856395721436  UAS: 0.6145231666690847  LAS: 0.4846705316020585 

--Evaluation:
-loss: 2.9875566959381104  UAS: 0.6189866599186453  LAS: 0.5026981956832759 

--Best Evaluation: 
-loss: 2.9875566959381104  UAS: 0.6189866599186453  LAS: 0.5026981956832759 



 43%|████▎     | 13/30 [02:27<03:13, 11.40s/it]

--Evaluation:
-loss: 2.9966869354248047  UAS: 0.6237675067930277  LAS: 0.5088315219492969 

--Best Evaluation: 
-loss: 2.9966869354248047  UAS: 0.6237675067930277  LAS: 0.5088315219492969 



 47%|████▋     | 14/30 [02:39<03:01, 11.33s/it]

--Evaluation:
-loss: 2.916478395462036  UAS: 0.6672833203417562  LAS: 0.5454064890379732 

--Best Evaluation: 
-loss: 2.916478395462036  UAS: 0.6672833203417562  LAS: 0.5454064890379732 



 50%|█████     | 15/30 [02:51<02:52, 11.50s/it]

--Evaluation:
-loss: 3.0277204513549805  UAS: 0.6407685145662981  LAS: 0.5290551339755877 

--Best Evaluation: 
-loss: 2.916478395462036  UAS: 0.6672833203417562  LAS: 0.5454064890379732 

--Evaluation:
-loss: 2.913745403289795  UAS: 0.666976983798276  LAS: 0.5591909402774979 

--Best Evaluation: 
-loss: 2.913745403289795  UAS: 0.666976983798276  LAS: 0.5591909402774979 



 53%|█████▎    | 16/30 [03:02<02:39, 11.40s/it]

--Evaluation:
-loss: 2.8823962211608887  UAS: 0.6889108965331208  LAS: 0.5749471645945412 

--Best Evaluation: 
-loss: 2.8823962211608887  UAS: 0.6889108965331208  LAS: 0.5749471645945412 



 57%|█████▋    | 17/30 [03:13<02:27, 11.33s/it]

--Evaluation:
-loss: 3.1718201637268066  UAS: 0.6567635185024294  LAS: 0.5478470766317038 

--Best Evaluation: 
-loss: 2.8823962211608887  UAS: 0.6889108965331208  LAS: 0.5749471645945412 



 60%|██████    | 18/30 [03:25<02:18, 11.51s/it]

--Evaluation:
-loss: 3.1069211959838867  UAS: 0.6764945013597974  LAS: 0.5550836631418481 

--Best Evaluation: 
-loss: 2.8823962211608887  UAS: 0.6889108965331208  LAS: 0.5749471645945412 

--Evaluation:
-loss: 3.190096616744995  UAS: 0.6819459055182369  LAS: 0.5577603847026762 

--Best Evaluation: 
-loss: 2.8823962211608887  UAS: 0.6889108965331208  LAS: 0.5749471645945412 



 63%|██████▎   | 19/30 [03:36<02:05, 11.40s/it]

--Evaluation:
-loss: 3.1591219902038574  UAS: 0.6830018953881235  LAS: 0.5615565008592693 

--Best Evaluation: 
-loss: 2.8823962211608887  UAS: 0.6889108965331208  LAS: 0.5749471645945412 



 67%|██████▋   | 20/30 [03:47<01:53, 11.34s/it]

--Evaluation:
-loss: 3.296130895614624  UAS: 0.688365580375629  LAS: 0.5798421111650134 

--Best Evaluation: 
-loss: 3.296130895614624  UAS: 0.688365580375629  LAS: 0.5798421111650134 



 70%|███████   | 21/30 [03:59<01:43, 11.50s/it]

--Evaluation:
-loss: 3.2786526679992676  UAS: 0.6899198291153212  LAS: 0.5772904407371535 

--Best Evaluation: 
-loss: 3.296130895614624  UAS: 0.688365580375629  LAS: 0.5798421111650134 

--Evaluation:
-loss: 3.3262925148010254  UAS: 0.6923846792344099  LAS: 0.5804535257681327 

--Best Evaluation: 
-loss: 3.3262925148010254  UAS: 0.6923846792344099  LAS: 0.5804535257681327 



 73%|███████▎  | 22/30 [04:10<01:31, 11.41s/it]

--Evaluation:
-loss: 3.3750576972961426  UAS: 0.6998137476648666  LAS: 0.5862947332897301 

--Best Evaluation: 
-loss: 3.3750576972961426  UAS: 0.6998137476648666  LAS: 0.5862947332897301 



 77%|███████▋  | 23/30 [04:21<01:19, 11.32s/it]

--Evaluation:
-loss: 3.410722494125366  UAS: 0.684790974376389  LAS: 0.5756877283893401 

--Best Evaluation: 
-loss: 3.3750576972961426  UAS: 0.6998137476648666  LAS: 0.5862947332897301 



 80%|████████  | 24/30 [04:33<01:08, 11.48s/it]

--Evaluation:
-loss: 3.413269519805908  UAS: 0.6912649911487984  LAS: 0.5855343171939629 

--Best Evaluation: 
-loss: 3.3750576972961426  UAS: 0.6998137476648666  LAS: 0.5862947332897301 

--Evaluation:
-loss: 3.4641880989074707  UAS: 0.7002526625470588  LAS: 0.5966465125367684 

--Best Evaluation: 
-loss: 3.4641880989074707  UAS: 0.7002526625470588  LAS: 0.5966465125367684 



 83%|████████▎ | 25/30 [04:44<00:56, 11.38s/it]

--Evaluation:
-loss: 3.5556862354278564  UAS: 0.7038430753630631  LAS: 0.5889287883733048 

--Best Evaluation: 
-loss: 3.4641880989074707  UAS: 0.7002526625470588  LAS: 0.5966465125367684 



 87%|████████▋ | 26/30 [04:56<00:45, 11.32s/it]

--Evaluation:
-loss: 3.525531768798828  UAS: 0.7022213736041978  LAS: 0.5879602702715545 

--Best Evaluation: 
-loss: 3.4641880989074707  UAS: 0.7002526625470588  LAS: 0.5966465125367684 



 90%|█████████ | 27/30 [05:07<00:34, 11.48s/it]

--Evaluation:
-loss: 3.520514726638794  UAS: 0.7118414233340481  LAS: 0.5979324867428811 

--Best Evaluation: 
-loss: 3.520514726638794  UAS: 0.7118414233340481  LAS: 0.5979324867428811 

--Evaluation:
-loss: 3.5702531337738037  UAS: 0.708996053363937  LAS: 0.5940504129303517 

--Best Evaluation: 
-loss: 3.520514726638794  UAS: 0.7118414233340481  LAS: 0.5979324867428811 



 93%|█████████▎| 28/30 [05:19<00:22, 11.38s/it]

--Evaluation:
-loss: 3.538168430328369  UAS: 0.7113252481530596  LAS: 0.5965681446727171 

--Best Evaluation: 
-loss: 3.520514726638794  UAS: 0.7118414233340481  LAS: 0.5979324867428811 



 97%|█████████▋| 29/30 [05:30<00:11, 11.31s/it]

--Evaluation:
-loss: 3.583054304122925  UAS: 0.7120807900576698  LAS: 0.5963032784140622 

--Best Evaluation: 
-loss: 3.520514726638794  UAS: 0.7118414233340481  LAS: 0.5979324867428811 



100%|██████████| 30/30 [05:42<00:00, 11.40s/it]

--Evaluation:
-loss: 3.5821754932403564  UAS: 0.7116534396303194  LAS: 0.5958759279867117 

--Best Evaluation: 
-loss: 3.520514726638794  UAS: 0.7118414233340481  LAS: 0.5979324867428811 

--training finished.
[3.520514726638794, 0.7118414233340481, 0.5979324867428811]





FOLD 3
160/40
--------------------------------


Some weights of the model checkpoint at hfl/chinese-electra-180g-large-discriminator were not used when initializing ElectraModel: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  0%|          | 0/30 [00:00<?, ?it/s]

--epoch 0, step 0, loss 14.71179485321045
  {'UAS': 0.0, 'LAS': 0.0}
--Evaluation:
-loss: 9.100911140441895  UAS: 0.0021527777777777778  LAS: 0.0 

--Best Evaluation: 
-loss: 0  UAS: 0  LAS: 0 



  3%|▎         | 1/30 [00:11<05:23, 11.16s/it]

--Evaluation:
-loss: 7.549331188201904  UAS: 0.015232050139749542  LAS: 0.005628832682256789 

--Best Evaluation: 
-loss: 7.549331188201904  UAS: 0.015232050139749542  LAS: 0.005628832682256789 



  7%|▋         | 2/30 [00:22<05:12, 11.15s/it]

--Evaluation:
-loss: 7.788130283355713  UAS: 0.010831175193099948  LAS: 0.0027640036730945824 

--Best Evaluation: 
-loss: 7.549331188201904  UAS: 0.015232050139749542  LAS: 0.005628832682256789 



 10%|█         | 3/30 [00:34<05:09, 11.47s/it]

--Evaluation:
-loss: 6.635403633117676  UAS: 0.03374759157492426  LAS: 0.015850304677553984 

--Best Evaluation: 
-loss: 6.635403633117676  UAS: 0.03374759157492426  LAS: 0.015850304677553984 

--Evaluation:
-loss: 6.091067790985107  UAS: 0.050003148759864266  LAS: 0.03575394428678255 

--Best Evaluation: 
-loss: 6.091067790985107  UAS: 0.050003148759864266  LAS: 0.03575394428678255 



 13%|█▎        | 4/30 [00:45<04:54, 11.34s/it]

--Evaluation:
-loss: 5.639241695404053  UAS: 0.08388768784274286  LAS: 0.04611867523408851 

--Best Evaluation: 
-loss: 5.639241695404053  UAS: 0.08388768784274286  LAS: 0.04611867523408851 



 17%|█▋        | 5/30 [00:56<04:41, 11.26s/it]

--Evaluation:
-loss: 4.8495612144470215  UAS: 0.17930304929196356  LAS: 0.11656831018942326 

--Best Evaluation: 
-loss: 4.8495612144470215  UAS: 0.17930304929196356  LAS: 0.11656831018942326 



 20%|██        | 6/30 [01:08<04:35, 11.47s/it]

--Evaluation:
-loss: 4.420243263244629  UAS: 0.2450870852067733  LAS: 0.14772015085699838 

--Best Evaluation: 
-loss: 4.420243263244629  UAS: 0.2450870852067733  LAS: 0.14772015085699838 

--Evaluation:
-loss: 4.2650227546691895  UAS: 0.27707497577283297  LAS: 0.1853599092062494 

--Best Evaluation: 
-loss: 4.2650227546691895  UAS: 0.27707497577283297  LAS: 0.1853599092062494 



 23%|██▎       | 7/30 [01:19<04:21, 11.37s/it]

--Evaluation:
-loss: 3.903627872467041  UAS: 0.3479518628111582  LAS: 0.24076258545834733 

--Best Evaluation: 
-loss: 3.903627872467041  UAS: 0.3479518628111582  LAS: 0.24076258545834733 



 27%|██▋       | 8/30 [01:30<04:08, 11.31s/it]

--Evaluation:
-loss: 3.856147050857544  UAS: 0.40421908693082687  LAS: 0.3033233525834895 

--Best Evaluation: 
-loss: 3.856147050857544  UAS: 0.40421908693082687  LAS: 0.3033233525834895 



 30%|███       | 9/30 [01:42<04:01, 11.50s/it]

--Evaluation:
-loss: 3.4778900146484375  UAS: 0.44882411931000477  LAS: 0.33622292447988916 

--Best Evaluation: 
-loss: 3.4778900146484375  UAS: 0.44882411931000477  LAS: 0.33622292447988916 

--Evaluation:
-loss: 3.3942792415618896  UAS: 0.502438839923465  LAS: 0.3866880975976771 

--Best Evaluation: 
-loss: 3.3942792415618896  UAS: 0.502438839923465  LAS: 0.3866880975976771 



 33%|███▎      | 10/30 [01:53<03:47, 11.40s/it]

--Evaluation:
-loss: 3.2812390327453613  UAS: 0.5303830460561776  LAS: 0.41767265354451216 

--Best Evaluation: 
-loss: 3.2812390327453613  UAS: 0.5303830460561776  LAS: 0.41767265354451216 



 37%|███▋      | 11/30 [02:04<03:35, 11.33s/it]

--Evaluation:
-loss: 3.248544692993164  UAS: 0.5402743382271818  LAS: 0.4228520454132873 

--Best Evaluation: 
-loss: 3.248544692993164  UAS: 0.5402743382271818  LAS: 0.4228520454132873 



 40%|████      | 12/30 [02:16<03:26, 11.50s/it]

--Evaluation:
-loss: 3.3177764415740967  UAS: 0.5523887283235553  LAS: 0.44693705355303787 

--Best Evaluation: 
-loss: 3.3177764415740967  UAS: 0.5523887283235553  LAS: 0.44693705355303787 

--Evaluation:
-loss: 3.2159554958343506  UAS: 0.5849936874975026  LAS: 0.47858194932450226 

--Best Evaluation: 
-loss: 3.2159554958343506  UAS: 0.5849936874975026  LAS: 0.47858194932450226 



 43%|████▎     | 13/30 [02:27<03:13, 11.40s/it]

--Evaluation:
-loss: 3.3567912578582764  UAS: 0.5777035990484622  LAS: 0.47962008896739006 

--Best Evaluation: 
-loss: 3.3567912578582764  UAS: 0.5777035990484622  LAS: 0.47962008896739006 



 47%|████▋     | 14/30 [02:39<03:01, 11.33s/it]

--Evaluation:
-loss: 3.2534000873565674  UAS: 0.5920171161613659  LAS: 0.4868162715742893 

--Best Evaluation: 
-loss: 3.2534000873565674  UAS: 0.5920171161613659  LAS: 0.4868162715742893 



 50%|█████     | 15/30 [02:51<02:52, 11.50s/it]

--Evaluation:
-loss: 3.305962085723877  UAS: 0.6086582437228103  LAS: 0.49687698004728525 

--Best Evaluation: 
-loss: 3.305962085723877  UAS: 0.6086582437228103  LAS: 0.49687698004728525 

--Evaluation:
-loss: 3.3987081050872803  UAS: 0.6083432377376121  LAS: 0.5079892202591555 

--Best Evaluation: 
-loss: 3.3987081050872803  UAS: 0.6083432377376121  LAS: 0.5079892202591555 



 53%|█████▎    | 16/30 [03:02<02:39, 11.40s/it]

--Evaluation:
-loss: 3.2910892963409424  UAS: 0.6281974231241065  LAS: 0.5247692785294907 

--Best Evaluation: 
-loss: 3.2910892963409424  UAS: 0.6281974231241065  LAS: 0.5247692785294907 



In [None]:
os.system('shutdown')