In [23]:
import os
import argparse
import pickle

import pandas as pd

from sklearn.model_selection import StratifiedKFold, train_test_split
from datasets import load_metric
#from evaluate import load

from seqeval.metrics import classification_report
import torch

# huggingface tokenizer/model
from transformers import AutoModelForTokenClassification

# huggingface trainer
from transformers import Trainer
from transformers import TrainingArguments


# Customize encoder
from ner.ner_dataset import NERCollator
from ner.ner_dataset import NERDatasetPreEncoded


python hf_trainer.py --model_folder models --data_fn data/encoded/train.klue_roberta-base.encoded.pickle --use_kfold --n_splits 5

# **batch size = 1, 2, 4, 8, ... 조절하기 (2^k)**

In [None]:
batch_size = 4

In [None]:
import os
import argparse

os.environ["WANDB_DISABLED"] = "true"

def define_argparser():
    p = argparse.ArgumentParser()

    p.add_argument('--model_folder', required=False, default = 'models', help="Directory to save trained model.")
    p.add_argument('--data_fn', required=False, default = 'data/encoded/train.klue_roberta-base.encoded.pickle', help="Data file name encoded by encoding.py to train the model.")

    p.add_argument('--valid_ratio', type=float, default= 0.2)
    p.add_argument('--batch_size_per_device', type=int, default=batch_size)
    p.add_argument('--n_epochs_per_fold', type=int, default=50)
    p.add_argument('--warmup_ratio', type=float, default= 0.2)
    p.add_argument('--max_length', type=int, default=512)
    #p.add_argument('--max_length', type=int, default=100)

    p.add_argument('--use_kfold', action='store_true', default= True)
    p.add_argument('--n_splits', type=int, default=5)
    p.add_argument('--fold_i', type=int, default=None, help="It would be used to train with a certain fold of data due to resource limitation.")

    config = p.parse_args(args=[])

    return config




In [25]:
config = define_argparser()
print(config)

Namespace(model_folder='models', data_fn='data/encoded/train.klue_roberta-base.encoded.pickle', valid_ratio=0.2, batch_size_per_device=4, n_epochs_per_fold=50, warmup_ratio=0.2, max_length=512, use_kfold=True, n_splits=5, fold_i=None)


In [26]:
def load_data(fn, use_kfold=False, n_splits=5, shuffle=True):
    """
    Load tsv data as Dataframe.
    If use_kfold is true, a new column ['fold'] will be added for indexing each fold.
    load_data라는 이름의 함수를 정의합니다. 이 함수는 네 개의 파라미터를 받습니다:
    fn: 데이터 파일의 이름 또는 경로입니다.
    use_kfold: k-fold 교차 검증을 사용할지 여부를 나타내는 부울 값입니다. 기본값은 False입니다.
    n_splits: 교차 검증에서 몇 개의 fold로 나눌지를 결정하는 정수입니다. 기본값은 5입니다.
    shuffle: 데이터를 셔플할지 여부를 나타내는 부울 값입니다. 기본값은 True입니다.
    """
    # Get sentences and labels from a dataframe.
    with open(fn, "rb") as f: # fn 경로의 파일을 바이너리 읽기 모드("rb")로 엽니다. f는 파일 객체를 참조하는 변수입니다.
        dataset = pickle.load(f) # pickle 모듈을 사용하여 파일에서 객체를 역직렬화합니다. 이를 통해 저장된 데이터가 dataset 변수에 로드됩니다.
    data = pd.DataFrame(dataset.pop('data')) # dataset 딕셔너리에서 'data' 키를 제거하면서 해당 값을 가져와 pandas 데이터프레임으로 변환합니다.

    if use_kfold:
        skf = StratifiedKFold(n_splits=n_splits, random_state=42, shuffle=shuffle)
        # n_splits, random_state=42, shuffle 값을 사용하여 StratifiedKFold 객체를 생성합니다. 이 객체는 클래스 비율을 유지하면서 데이터를 fold로 나눕니다.
        data['fold'] = -1 # 데이터프레임에 'fold'라는 새 열을 추가하고, 모든 값을 -1로 초기화합니다.
        for n_fold, (_, v_idx) in enumerate(skf.split(data, data['sentence_class'])):
            # StratifiedKFold의 split 메소드를 사용해 생성된 인덱스를 반복하면서 각 fold의 검증 인덱스를 가져옵니다.
            data.loc[v_idx, 'fold'] = n_fold
            # 검증 인덱스(v_idx)에 해당하는 데이터의 'fold' 열 값을 현재 fold 번호(n_fold)로 설정합니다.
        data['id'] = [x for x in range(len(data))]
        # 각 데이터 포인트에 대해 유일한 ID를 생성하여 'id' 열에 할당합니다.

    return data, dataset

In [27]:
with open(config.data_fn, "rb") as f:
        org_data = pickle.load(f)

#이전에 encoding.py에서 저장해 놓은 데이터
org_data.keys()

dict_keys(['data', 'label_info', 'pad_token', 'pretrained_model_name'])

In [28]:
#org_data에서 'data'열과 나머지 열을 분리함
data, data_args = load_data(config.data_fn, use_kfold=config.use_kfold,
                     n_splits=config.n_splits, shuffle=True)



In [29]:
data

Unnamed: 0,input_ids,attention_mask,labels,sentence_class,fold,id
0,"[0, 544, 15229, 1545, 11610, 2446, 1545, 2302,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",MC,0,0
1,"[0, 6146, 1258, 2137, 2383, 2]","[1, 1, 1, 1, 1, 1]","[-100, 0, 9, 10, 10, -100]",NM,2,1
2,"[0, 15229, 1545, 9888, 2446, 16, 1545, 5281, 2...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, ...",NC,2,2
3,"[0, 12, 21, 13, 3729, 2145, 555, 2073, 5183, 2...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17,...",NM,2,3
4,"[0, 12, 22, 13, 9679, 2052, 3817, 7750, 19521,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[-100, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 29, 3...",QT,4,4
...,...,...,...,...,...,...
265,"[0, 9964, 2079, 4121, 2]","[1, 1, 1, 1, 1]","[-100, 0, 0, 0, -100]",Out,0,265
266,"[0, 15229, 1545, 26462, 2446, 16, 1545, 19578,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Out,0,266
267,"[0, 21, 18, 5025, 2125, 15463, 2]","[1, 1, 1, 1, 1, 1, 1]","[-100, 0, 0, 19, 20, 20, -100]",DP,4,267
268,"[0, 21, 18, 4103, 2178, 2088, 12, 5331, 2612, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Out,3,268


In [30]:
data_args

{'label_info': {'label_list': ['CN',
   'NC',
   'RL',
   'LC',
   'NM',
   'GD',
   'AG',
   'JB',
   'PS',
   'DP',
   'TI',
   'MC',
   'PC',
   'BP',
   'QT',
   'RA',
   'EQ'],
  'label_to_index': {'O': 0,
   'B-CN': 1,
   'I-CN': 2,
   'B-NC': 3,
   'I-NC': 4,
   'B-RL': 5,
   'I-RL': 6,
   'B-LC': 7,
   'I-LC': 8,
   'B-NM': 9,
   'I-NM': 10,
   'B-GD': 11,
   'I-GD': 12,
   'B-AG': 13,
   'I-AG': 14,
   'B-JB': 15,
   'I-JB': 16,
   'B-PS': 17,
   'I-PS': 18,
   'B-DP': 19,
   'I-DP': 20,
   'B-TI': 21,
   'I-TI': 22,
   'B-MC': 23,
   'I-MC': 24,
   'B-PC': 25,
   'I-PC': 26,
   'B-BP': 27,
   'I-BP': 28,
   'B-QT': 29,
   'I-QT': 30,
   'B-RA': 31,
   'I-RA': 32,
   'B-EQ': 33,
   'I-EQ': 34},
  'index_to_label': {0: 'O',
   1: 'B-CN',
   2: 'I-CN',
   3: 'B-NC',
   4: 'I-NC',
   5: 'B-RL',
   6: 'I-RL',
   7: 'B-LC',
   8: 'I-LC',
   9: 'B-NM',
   10: 'I-NM',
   11: 'B-GD',
   12: 'I-GD',
   13: 'B-AG',
   14: 'I-AG',
   15: 'B-JB',
   16: 'I-JB',
   17: 'B-PS',
   18: 'I-PS

In [31]:
index_to_label = data_args['label_info']['index_to_label']


In [32]:
print(data['sentence_class'].value_counts())

sentence_class
Out    78
NM     34
DP     31
NC     22
JB     13
QT     12
PS     12
AG     11
TI      9
RL      9
LC      8
PC      8
CN      8
BP      7
MC      6
GD      2
Name: count, dtype: int64


In [33]:
def split_dataset(data, use_kfold=False, n_fold=None, valid_ratio=.2, shuffle=False):
    """
    Split data into train and validation.
    Size of validation set will be determined by 'n_fold' when 'use_kfold' is True, otherwise determined by 'valid_ratio'.
    'shuffle' will affect only in case of 'use_kfold' is False.
    """

    if use_kfold == True:
        train = data[data['fold'] != n_fold]
        # data 데이터프레임에서 'fold' 열의 값이 n_fold와 다른 모든 데이터를 학습 데이터로 선택합니다.
        valid = data[data['fold'] == n_fold]
        # data 데이터프레임에서 'fold' 열의 값이 n_fold와 같은 데이터를 검증 데이터로 선택합니다.
    else:
        train, valid = train_test_split(
            data, test_size=valid_ratio, random_state=42, shuffle=shuffle, stratify=data['sentence_class'])
            #data, test_size=valid_ratio, random_state=42, shuffle=shuffle)

    train_dataset = NERDatasetPreEncoded(train['input_ids'].values, train['attention_mask'].values, train['labels'].values)
    valid_dataset = NERDatasetPreEncoded(valid['input_ids'].values, valid['attention_mask'].values, valid['labels'].values)
    # train 데이터프레임에서 필요한 열('input_ids', 'attention_mask', 'labels')을 추출하여 NERDatasetPreEncoded 클래스의 인스턴스를 생성합니다.
    # 이 클래스는 NER(Named Entity Recognition) 작업을 위한 데이터셋을 준비합니다.
    return train_dataset, valid_dataset

In [34]:
n_fold=1

pretrained_model_name = data_args['pretrained_model_name'].replace('/', '_')
# data_args 딕셔너리에서 사전 훈련된 모델의 이름을 가져와서, 이름에 포함된 모든 '/' 문자를 '_'로 변경합니다.
# 파일 시스템에서 경로 구분자로 사용되는 '/'를 파일 이름으로 사용할 수 없기 때문입니다.

label_to_index = data_args['label_info']['label_to_index']
index_to_label = data_args['label_info']['index_to_label']
pad_token = data_args['pad_token']

train_dataset, valid_dataset = split_dataset(
    data, use_kfold=config.use_kfold, n_fold=n_fold, valid_ratio=config.valid_ratio, shuffle=True)
print(
    '|train| =', len(train_dataset),
    '|valid| =', len(valid_dataset),
)


|train| = 216 |valid| = 54


In [35]:
def get_pretrained_model(model_name: str, num_labels: int):
    """
    Basically, use AutoModelForTokenClassification from Huffingface.
    This function remains for future issue.
    """
    model_loader = AutoModelForTokenClassification
    return model_loader.from_pretrained(model_name, num_labels=num_labels)

In [36]:
model = get_pretrained_model(
        data_args['pretrained_model_name'], len(label_to_index))

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [37]:
total_batch_size = config.batch_size_per_device * torch.cuda.device_count()
    # 전체 배치 크기, 총 반복 횟수, 워밍업 단계 수를 계산합니다. 이는 GPU 개수와 설정을 통해 동적으로 결정됩니다.
n_total_iterations = int(len(train_dataset) /
                         total_batch_size * config.n_epochs_per_fold)
n_warmup_steps = int(n_total_iterations * config.warmup_ratio)
print(
    '# of total_iters =', n_total_iterations,
    '# of warmup_iters =', n_warmup_steps,
)


# of total_iters = 2700 # of warmup_iters = 540


In [38]:
import accelerate
import transformers

transformers.__version__, accelerate.__version__

('4.46.3', '1.1.1')

In [39]:
training_args = TrainingArguments(
    output_dir=f"./checkpoints/{pretrained_model_name}.{n_fold}",
    num_train_epochs=config.n_epochs_per_fold,
    per_device_train_batch_size=config.batch_size_per_device,
    per_device_eval_batch_size=config.batch_size_per_device,
    warmup_steps=n_warmup_steps,
    weight_decay=0.01,
    fp16=True,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    save_total_limit=1,  # 체크포인트 최대 1개로 제한

    logging_steps=max(1,n_total_iterations // 100),
    save_steps=n_total_iterations // config.n_epochs_per_fold,
    load_best_model_at_end=True,
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [40]:
class compute_metrics():

    def __init__(self, index_to_label):
        self.index_to_label = index_to_label

    def __call__(self, pred):
        """
        Compute metrics use "seqeval"
        It evaluates based on Entity Level F1 score.
        """
        metric = load_metric('seqeval')

        labels = pred.label_ids
        predictions = pred.predictions.argmax(2)

        # Discard special tokens based on true_labels.
        true_predictions = [[self.index_to_label[p] for p, l in zip(
            prediction, label) if l >= 0] for prediction, label in zip(predictions, labels)]
        true_labels = [[self.index_to_label[l] for p, l in zip(prediction, label) if l >= 0]
                    for prediction, label in zip(predictions, labels)]

        results = metric.compute(
            predictions=true_predictions, references=true_labels)
        eval_results = {
            "precision": results["overall_precision"],
            "recall": results["overall_recall"],
            "f1": results["overall_f1"],
            "accuracy": results["overall_accuracy"],
        }
        print(classification_report(true_labels, true_predictions))

        return eval_results

In [41]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=NERCollator(pad_token=pad_token,
                              with_text=False),
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    compute_metrics=compute_metrics(index_to_label),
)

In [42]:
trainer.train()

  1%|          | 28/2700 [00:05<09:47,  4.55it/s]

{'loss': 3.5329, 'grad_norm': 9.7932767868042, 'learning_rate': 2.3148148148148148e-06, 'epoch': 0.5}


  2%|▏         | 54/2700 [00:11<09:22,  4.71it/s]

{'loss': 2.5653, 'grad_norm': 9.613458633422852, 'learning_rate': 4.814814814814815e-06, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

  2%|▏         | 54/2700 [00:13<09:22,  4.71it/s]

              precision    recall  f1-score   support

          AG       0.00      0.00      0.00         2
          BP       0.00      0.00      0.00        13
          CN       0.00      0.00      0.00         2
          DP       0.00      0.00      0.00         6
          EQ       0.00      0.00      0.00         4
          GD       0.00      0.00      0.00         1
          JB       0.00      0.00      0.00         3
          LC       0.00      0.00      0.00        16
          MC       0.00      0.00      0.00         2
          NC       0.00      0.00      0.00        27
          NM       0.00      0.00      0.00        32
          PC       0.00      0.00      0.00         4
          PS       0.00      0.00      0.00        40
          QT       0.00      0.00      0.00         2
          RL       0.00      0.00      0.00         6
          TI       0.00      0.00      0.00        18

   micro avg       0.00      0.00      0.00       178
   macro avg       0.00   

  3%|▎         | 82/2700 [00:20<09:26,  4.62it/s]

{'loss': 1.3913, 'grad_norm': 1.5160547494888306, 'learning_rate': 7.314814814814815e-06, 'epoch': 1.5}


  4%|▍         | 108/2700 [00:26<07:01,  6.14it/s]

{'loss': 1.0775, 'grad_norm': 4.074846267700195, 'learning_rate': 9.814814814814815e-06, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

  4%|▍         | 108/2700 [00:27<07:01,  6.14it/s]

              precision    recall  f1-score   support

          AG       0.00      0.00      0.00         2
          BP       0.00      0.00      0.00        13
          CN       0.00      0.00      0.00         2
          DP       0.00      0.00      0.00         6
          EQ       0.00      0.00      0.00         4
          GD       0.00      0.00      0.00         1
          JB       0.00      0.00      0.00         3
          LC       0.00      0.00      0.00        16
          MC       0.00      0.00      0.00         2
          NC       0.00      0.00      0.00        27
          NM       0.00      0.00      0.00        32
          PC       0.00      0.00      0.00         4
          PS       0.00      0.00      0.00        40
          QT       0.00      0.00      0.00         2
          RL       0.00      0.00      0.00         6
          TI       0.00      0.00      0.00        18

   micro avg       0.00      0.00      0.00       178
   macro avg       0.00   

  5%|▌         | 135/2700 [00:35<10:04,  4.24it/s]

{'loss': 0.6899, 'grad_norm': 2.4217934608459473, 'learning_rate': 1.2314814814814815e-05, 'epoch': 2.5}


  6%|▌         | 162/2700 [00:41<08:47,  4.81it/s]

{'loss': 0.5419, 'grad_norm': 3.9544365406036377, 'learning_rate': 1.4814814814814815e-05, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))

  6%|▌         | 162/2700 [00:42<08:47,  4.81it/s]

              precision    recall  f1-score   support

          AG       0.00      0.00      0.00         2
          BP       0.57      0.62      0.59        13
          CN       0.00      0.00      0.00         2
          DP       0.00      0.00      0.00         6
          EQ       0.00      0.00      0.00         4
          GD       0.00      0.00      0.00         1
          JB       0.00      0.00      0.00         3
          LC       0.50      0.44      0.47        16
          MC       0.00      0.00      0.00         2
          NC       0.50      0.52      0.51        27
          NM       0.94      1.00      0.97        32
          PC       0.00      0.00      0.00         4
          PS       0.87      1.00      0.93        40
          QT       0.00      0.00      0.00         2
          RL       0.00      0.00      0.00         6
          TI       0.61      0.61      0.61        18

   micro avg       0.73      0.63      0.67       178
   macro avg       0.25   

  7%|▋         | 190/2700 [00:50<09:35,  4.36it/s]

{'loss': 0.4158, 'grad_norm': 2.231895685195923, 'learning_rate': 1.7314814814814813e-05, 'epoch': 3.5}


  8%|▊         | 216/2700 [00:56<09:50,  4.20it/s]

{'loss': 0.2858, 'grad_norm': 2.0006933212280273, 'learning_rate': 1.9814814814814816e-05, 'epoch': 4.0}


  _warn_prf(average, modifier, msg_start, len(result))

  8%|▊         | 216/2700 [00:57<09:50,  4.20it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.40      0.62      0.48        13
          CN       0.00      0.00      0.00         2
          DP       0.00      0.00      0.00         6
          EQ       1.00      0.25      0.40         4
          GD       0.00      0.00      0.00         1
          JB       0.00      0.00      0.00         3
          LC       0.69      0.56      0.62        16
          MC       0.00      0.00      0.00         2
          NC       0.62      0.78      0.69        27
          NM       0.97      1.00      0.98        32
          PC       0.00      0.00      0.00         4
          PS       1.00      0.95      0.97        40
          QT       0.00      0.00      0.00         2
          RL       0.00      0.00      0.00         6
          TI       0.65      0.61      0.63        18

   micro avg       0.76      0.69      0.72       178
   macro avg       0.40   

  9%|▉         | 244/2700 [01:05<09:14,  4.43it/s]

{'loss': 0.2343, 'grad_norm': 3.931413173675537, 'learning_rate': 2.2314814814814816e-05, 'epoch': 4.5}


 10%|█         | 270/2700 [01:11<09:09,  4.43it/s]

{'loss': 0.1852, 'grad_norm': 7.568154811859131, 'learning_rate': 2.4814814814814816e-05, 'epoch': 5.0}


  _warn_prf(average, modifier, msg_start, len(result))

 10%|█         | 270/2700 [01:12<09:09,  4.43it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.38      0.62      0.47        13
          CN       0.50      0.50      0.50         2
          DP       0.67      0.67      0.67         6
          EQ       0.60      0.75      0.67         4
          GD       1.00      1.00      1.00         1
          JB       0.00      0.00      0.00         3
          LC       0.65      0.81      0.72        16
          MC       0.00      0.00      0.00         2
          NC       0.62      0.85      0.72        27
          NM       0.97      1.00      0.98        32
          PC       0.00      0.00      0.00         4
          PS       0.93      1.00      0.96        40
          QT       0.50      0.50      0.50         2
          RL       0.00      0.00      0.00         6
          TI       0.38      0.56      0.45        18

   micro avg       0.69      0.78      0.73       178
   macro avg       0.51   

 11%|█         | 297/2700 [01:20<07:46,  5.15it/s]

{'loss': 0.1787, 'grad_norm': 3.4148693084716797, 'learning_rate': 2.7314814814814816e-05, 'epoch': 5.5}


 12%|█▏        | 324/2700 [01:26<08:51,  4.47it/s]

{'loss': 0.1422, 'grad_norm': 6.265837669372559, 'learning_rate': 2.981481481481482e-05, 'epoch': 6.0}


  _warn_prf(average, modifier, msg_start, len(result))

 12%|█▏        | 324/2700 [01:27<08:51,  4.47it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.67      0.77      0.71        13
          CN       0.50      0.50      0.50         2
          DP       1.00      1.00      1.00         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.88      0.88      0.88        16
          MC       0.00      0.00      0.00         2
          NC       0.79      0.81      0.80        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.50      0.67         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.00      0.00      0.00         6
          TI       0.89      0.89      0.89        18

   micro avg       0.89      0.85      0.87       178
   macro avg       0.75   

 13%|█▎        | 352/2700 [01:36<08:45,  4.47it/s]

{'loss': 0.0881, 'grad_norm': 3.018141269683838, 'learning_rate': 3.231481481481481e-05, 'epoch': 6.5}


 14%|█▍        | 378/2700 [01:42<08:06,  4.77it/s]

{'loss': 0.0867, 'grad_norm': 0.14779935777187347, 'learning_rate': 3.481481481481482e-05, 'epoch': 7.0}


  _warn_prf(average, modifier, msg_start, len(result))

 14%|█▍        | 378/2700 [01:43<08:06,  4.77it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.79      0.85      0.81        13
          CN       1.00      1.00      1.00         2
          DP       0.83      0.83      0.83         6
          EQ       0.60      0.75      0.67         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.72      0.81      0.76        16
          MC       0.67      1.00      0.80         2
          NC       0.81      0.78      0.79        27
          NM       0.94      1.00      0.97        32
          PC       1.00      0.75      0.86         4
          PS       1.00      0.97      0.99        40
          QT       0.50      0.50      0.50         2
          RL       0.00      0.00      0.00         6
          TI       0.94      0.89      0.91        18

   micro avg       0.88      0.86      0.87       178
   macro avg       0.80   

 15%|█▌        | 406/2700 [01:51<08:15,  4.63it/s]

{'loss': 0.0772, 'grad_norm': 0.12775884568691254, 'learning_rate': 3.731481481481482e-05, 'epoch': 7.5}


 16%|█▌        | 432/2700 [01:57<06:33,  5.77it/s]

{'loss': 0.0525, 'grad_norm': 2.8596303462982178, 'learning_rate': 3.981481481481482e-05, 'epoch': 8.0}



 16%|█▌        | 432/2700 [01:58<06:33,  5.77it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.79      0.85      0.81        13
          CN       1.00      1.00      1.00         2
          DP       1.00      0.83      0.91         6
          EQ       0.43      0.75      0.55         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.59      0.81      0.68        16
          MC       0.33      1.00      0.50         2
          NC       0.85      0.85      0.85        27
          NM       0.97      1.00      0.98        32
          PC       0.67      0.50      0.57         4
          PS       1.00      1.00      1.00        40
          QT       0.33      0.50      0.40         2
          RL       0.50      0.17      0.25         6
          TI       0.70      0.78      0.74        18

   micro avg       0.81      0.87      0.84       178
   macro avg       0.76   

 17%|█▋        | 460/2700 [02:06<07:14,  5.15it/s]

{'loss': 0.0472, 'grad_norm': 1.6878100633621216, 'learning_rate': 4.231481481481482e-05, 'epoch': 8.5}


 18%|█▊        | 486/2700 [02:12<07:55,  4.66it/s]

{'loss': 0.0551, 'grad_norm': 3.935394287109375, 'learning_rate': 4.481481481481482e-05, 'epoch': 9.0}



 18%|█▊        | 486/2700 [02:13<07:55,  4.66it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       1.00      1.00      1.00        13
          CN       1.00      1.00      1.00         2
          DP       1.00      1.00      1.00         6
          EQ       0.60      0.75      0.67         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.76      0.81      0.79        16
          MC       1.00      1.00      1.00         2
          NC       0.91      0.74      0.82        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      0.97      0.99        40
          QT       0.33      0.50      0.40         2
          RL       0.50      0.17      0.25         6
          TI       0.89      0.89      0.89        18

   micro avg       0.92      0.88      0.90       178
   macro avg       0.87   

 19%|█▉        | 513/2700 [02:21<07:27,  4.89it/s]

{'loss': 0.0479, 'grad_norm': 0.11536461114883423, 'learning_rate': 4.731481481481482e-05, 'epoch': 9.5}


 20%|██        | 540/2700 [02:27<08:24,  4.28it/s]

{'loss': 0.0531, 'grad_norm': 5.861788749694824, 'learning_rate': 4.981481481481482e-05, 'epoch': 10.0}



 20%|██        | 540/2700 [02:28<08:24,  4.28it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.92      0.92      0.92        13
          CN       1.00      1.00      1.00         2
          DP       1.00      1.00      1.00         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.79      0.94      0.86        16
          MC       0.50      1.00      0.67         2
          NC       0.88      0.78      0.82        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.50      0.67         4
          PS       0.98      1.00      0.99        40
          QT       1.00      0.50      0.67         2
          RL       0.50      0.17      0.25         6
          TI       0.82      0.78      0.80        18

   micro avg       0.91      0.88      0.89       178
   macro avg       0.88   

 21%|██        | 567/2700 [02:36<06:44,  5.28it/s]

{'loss': 0.0332, 'grad_norm': 0.4009791612625122, 'learning_rate': 4.94212962962963e-05, 'epoch': 10.5}


 22%|██▏       | 594/2700 [02:42<07:50,  4.48it/s]

{'loss': 0.0271, 'grad_norm': 0.3277985155582428, 'learning_rate': 4.87962962962963e-05, 'epoch': 11.0}



 22%|██▏       | 594/2700 [02:43<07:50,  4.48it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.85      0.85      0.85        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.67      0.88      0.76        16
          MC       0.50      1.00      0.67         2
          NC       0.79      0.70      0.75        27
          NM       1.00      1.00      1.00        32
          PC       0.50      0.50      0.50         4
          PS       0.98      1.00      0.99        40
          QT       1.00      0.50      0.67         2
          RL       0.33      0.17      0.22         6
          TI       0.94      0.83      0.88        18

   micro avg       0.86      0.86      0.86       178
   macro avg       0.82   

 23%|██▎       | 622/2700 [02:52<08:22,  4.13it/s]

{'loss': 0.0329, 'grad_norm': 8.791969299316406, 'learning_rate': 4.81712962962963e-05, 'epoch': 11.5}


 24%|██▍       | 648/2700 [02:57<07:25,  4.60it/s]

{'loss': 0.033, 'grad_norm': 15.44568157196045, 'learning_rate': 4.75462962962963e-05, 'epoch': 12.0}



 24%|██▍       | 648/2700 [02:58<07:25,  4.60it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       1.00      1.00      1.00        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.63      0.75      0.69        16
          MC       0.50      1.00      0.67         2
          NC       0.77      0.74      0.75        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.50      0.67         4
          PS       1.00      1.00      1.00        40
          QT       0.33      0.50      0.40         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.94      0.94        18

   micro avg       0.88      0.88      0.88       178
   macro avg       0.83   

 25%|██▌       | 675/2700 [03:06<07:24,  4.56it/s]

{'loss': 0.1475, 'grad_norm': 1.5192660093307495, 'learning_rate': 4.6921296296296296e-05, 'epoch': 12.5}


 26%|██▌       | 702/2700 [03:12<06:57,  4.79it/s]

{'loss': 0.0479, 'grad_norm': 17.264196395874023, 'learning_rate': 4.62962962962963e-05, 'epoch': 13.0}



 26%|██▌       | 702/2700 [03:13<06:57,  4.79it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.92      0.85      0.88        13
          CN       1.00      1.00      1.00         2
          DP       0.86      1.00      0.92         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.83      0.94      0.88        16
          MC       0.50      1.00      0.67         2
          NC       0.76      0.81      0.79        27
          NM       1.00      1.00      1.00        32
          PC       0.50      0.50      0.50         4
          PS       0.98      1.00      0.99        40
          QT       0.33      0.50      0.40         2
          RL       0.50      0.17      0.25         6
          TI       0.94      0.94      0.94        18

   micro avg       0.88      0.89      0.89       178
   macro avg       0.80   

 27%|██▋       | 730/2700 [03:22<07:06,  4.61it/s]

{'loss': 0.0306, 'grad_norm': 0.35023757815361023, 'learning_rate': 4.56712962962963e-05, 'epoch': 13.5}


 28%|██▊       | 756/2700 [03:28<08:02,  4.03it/s]

{'loss': 0.0238, 'grad_norm': 0.870308518409729, 'learning_rate': 4.50462962962963e-05, 'epoch': 14.0}



 28%|██▊       | 756/2700 [03:29<08:02,  4.03it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.71      0.92      0.80        13
          CN       1.00      1.00      1.00         2
          DP       0.86      1.00      0.92         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.64      0.88      0.74        16
          MC       0.50      1.00      0.67         2
          NC       1.00      1.00      1.00        27
          NM       1.00      1.00      1.00        32
          PC       0.50      0.50      0.50         4
          PS       0.98      1.00      0.99        40
          QT       0.33      0.50      0.40         2
          RL       0.25      0.17      0.20         6
          TI       1.00      0.94      0.97        18

   micro avg       0.86      0.92      0.89       178
   macro avg       0.76   

 29%|██▉       | 784/2700 [03:37<06:39,  4.79it/s]

{'loss': 0.0152, 'grad_norm': 0.052217934280633926, 'learning_rate': 4.44212962962963e-05, 'epoch': 14.5}


 30%|███       | 810/2700 [03:43<07:10,  4.39it/s]

{'loss': 0.0317, 'grad_norm': 0.07361984997987747, 'learning_rate': 4.3796296296296294e-05, 'epoch': 15.0}



 30%|███       | 810/2700 [03:44<07:10,  4.39it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.78      0.88      0.82        16
          MC       0.40      1.00      0.57         2
          NC       0.89      0.89      0.89        27
          NM       1.00      1.00      1.00        32
          PC       0.75      0.75      0.75         4
          PS       1.00      1.00      1.00        40
          QT       0.25      0.50      0.33         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.94      0.94        18

   micro avg       0.87      0.92      0.89       178
   macro avg       0.79   

 31%|███       | 838/2700 [03:52<05:31,  5.62it/s]

{'loss': 0.02, 'grad_norm': 0.08334632962942123, 'learning_rate': 4.31712962962963e-05, 'epoch': 15.5}


 32%|███▏      | 864/2700 [03:58<07:08,  4.28it/s]

{'loss': 0.0161, 'grad_norm': 0.83243727684021, 'learning_rate': 4.25462962962963e-05, 'epoch': 16.0}



 32%|███▏      | 864/2700 [03:59<07:08,  4.28it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.80      0.92      0.86        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.78      0.88      0.82        16
          MC       0.40      1.00      0.57         2
          NC       1.00      1.00      1.00        27
          NM       1.00      1.00      1.00        32
          PC       0.75      0.75      0.75         4
          PS       1.00      1.00      1.00        40
          QT       0.25      0.50      0.33         2
          RL       0.50      0.33      0.40         6
          TI       1.00      0.94      0.97        18

   micro avg       0.89      0.93      0.91       178
   macro avg       0.79   

 33%|███▎      | 892/2700 [04:07<05:51,  5.14it/s]

{'loss': 0.012, 'grad_norm': 0.4018574655056, 'learning_rate': 4.1921296296296296e-05, 'epoch': 16.5}


 34%|███▍      | 918/2700 [04:13<05:35,  5.30it/s]

{'loss': 0.0081, 'grad_norm': 0.03940945118665695, 'learning_rate': 4.12962962962963e-05, 'epoch': 17.0}



 34%|███▍      | 918/2700 [04:14<05:35,  5.30it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.76      1.00      0.87        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.60      0.75      0.67        16
          MC       0.50      1.00      0.67         2
          NC       0.92      0.89      0.91        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       0.98      1.00      0.99        40
          QT       0.50      0.50      0.50         2
          RL       0.25      0.17      0.20         6
          TI       0.94      0.89      0.91        18

   micro avg       0.86      0.90      0.88       178
   macro avg       0.79   

 35%|███▌      | 946/2700 [04:23<06:57,  4.20it/s]

{'loss': 0.005, 'grad_norm': 0.024226795881986618, 'learning_rate': 4.06712962962963e-05, 'epoch': 17.5}


 36%|███▌      | 972/2700 [04:28<05:36,  5.14it/s]

{'loss': 0.0123, 'grad_norm': 0.03516397625207901, 'learning_rate': 4.00462962962963e-05, 'epoch': 18.0}



 36%|███▌      | 972/2700 [04:29<05:36,  5.14it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.93      1.00      0.96        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.60      0.75      0.67        16
          MC       0.40      1.00      0.57         2
          NC       0.93      0.93      0.93        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       0.98      1.00      0.99        40
          QT       0.50      0.50      0.50         2
          RL       0.25      0.17      0.20         6
          TI       0.94      0.89      0.91        18

   micro avg       0.87      0.90      0.89       178
   macro avg       0.79   

 37%|███▋      | 999/2700 [04:38<05:24,  5.23it/s]

{'loss': 0.0064, 'grad_norm': 0.040530260652303696, 'learning_rate': 3.94212962962963e-05, 'epoch': 18.5}


 38%|███▊      | 1026/2700 [04:44<06:37,  4.22it/s]

{'loss': 0.0074, 'grad_norm': 0.027649549767374992, 'learning_rate': 3.8796296296296295e-05, 'epoch': 19.0}



 38%|███▊      | 1026/2700 [04:45<06:37,  4.22it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.86      0.92      0.89        13
          CN       1.00      1.00      1.00         2
          DP       0.86      1.00      0.92         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.68      0.81      0.74        16
          MC       0.40      1.00      0.57         2
          NC       0.96      0.93      0.94        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       0.98      1.00      0.99        40
          QT       0.50      0.50      0.50         2
          RL       0.25      0.17      0.20         6
          TI       1.00      0.94      0.97        18

   micro avg       0.89      0.91      0.90       178
   macro avg       0.81   

 39%|███▉      | 1054/2700 [04:53<06:18,  4.35it/s]

{'loss': 0.0063, 'grad_norm': 0.09331066906452179, 'learning_rate': 3.81712962962963e-05, 'epoch': 19.5}


 40%|████      | 1080/2700 [04:59<05:54,  4.57it/s]

{'loss': 0.0073, 'grad_norm': 0.18588124215602875, 'learning_rate': 3.75462962962963e-05, 'epoch': 20.0}



 40%|████      | 1080/2700 [05:00<05:54,  4.57it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.80      0.92      0.86        13
          CN       1.00      1.00      1.00         2
          DP       0.67      1.00      0.80         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.75      0.94      0.83        16
          MC       0.40      1.00      0.57         2
          NC       0.96      0.93      0.94        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       0.98      1.00      0.99        40
          QT       0.50      0.50      0.50         2
          RL       0.25      0.17      0.20         6
          TI       0.83      0.83      0.83        18

   micro avg       0.87      0.91      0.89       178
   macro avg       0.78   

 41%|████      | 1108/2700 [05:08<06:32,  4.06it/s]

{'loss': 0.0091, 'grad_norm': 0.018527626991271973, 'learning_rate': 3.6921296296296297e-05, 'epoch': 20.5}


 42%|████▏     | 1134/2700 [05:14<06:32,  3.99it/s]

{'loss': 0.0063, 'grad_norm': 0.03002607636153698, 'learning_rate': 3.62962962962963e-05, 'epoch': 21.0}



 42%|████▏     | 1134/2700 [05:16<06:32,  3.99it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.80      0.92      0.86        13
          CN       1.00      1.00      1.00         2
          DP       0.67      1.00      0.80         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.78      0.88      0.82        16
          MC       0.40      1.00      0.57         2
          NC       0.89      0.89      0.89        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       1.00      0.50      0.67         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.92      0.91       178
   macro avg       0.87   

 43%|████▎     | 1162/2700 [05:24<04:59,  5.13it/s]

{'loss': 0.0043, 'grad_norm': 0.015370615758001804, 'learning_rate': 3.56712962962963e-05, 'epoch': 21.5}


 44%|████▍     | 1188/2700 [05:30<05:40,  4.45it/s]

{'loss': 0.0071, 'grad_norm': 0.021671447902917862, 'learning_rate': 3.50462962962963e-05, 'epoch': 22.0}



 44%|████▍     | 1188/2700 [05:31<05:40,  4.45it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.80      0.92      0.86        13
          CN       1.00      1.00      1.00         2
          DP       0.67      1.00      0.80         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.78      0.88      0.82        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       1.00      0.50      0.67         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.91      0.91       178
   macro avg       0.85   

 45%|████▌     | 1215/2700 [05:40<05:50,  4.24it/s]

{'loss': 0.0077, 'grad_norm': 0.07377448678016663, 'learning_rate': 3.44212962962963e-05, 'epoch': 22.5}


 46%|████▌     | 1242/2700 [05:45<05:10,  4.69it/s]

{'loss': 0.0041, 'grad_norm': 0.016496485099196434, 'learning_rate': 3.3796296296296295e-05, 'epoch': 23.0}



 46%|████▌     | 1242/2700 [05:46<05:10,  4.69it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.81      1.00      0.90        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.74      0.88      0.80        16
          MC       0.40      1.00      0.57         2
          NC       1.00      1.00      1.00        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.33      0.50      0.40         2
          RL       0.50      0.33      0.40         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.94      0.92       178
   macro avg       0.81   

 47%|████▋     | 1270/2700 [05:54<04:21,  5.48it/s]

{'loss': 0.0071, 'grad_norm': 0.028951099142432213, 'learning_rate': 3.31712962962963e-05, 'epoch': 23.5}


 48%|████▊     | 1296/2700 [06:00<05:48,  4.03it/s]

{'loss': 0.0069, 'grad_norm': 0.03373130410909653, 'learning_rate': 3.25462962962963e-05, 'epoch': 24.0}



 48%|████▊     | 1296/2700 [06:01<05:48,  4.03it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.80      0.92      0.86        13
          CN       1.00      1.00      1.00         2
          DP       0.86      1.00      0.92         6
          EQ       0.75      0.75      0.75         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.78      0.88      0.82        16
          MC       0.40      1.00      0.57         2
          NC       1.00      1.00      1.00        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.92      0.93      0.92       178
   macro avg       0.86   

 49%|████▉     | 1324/2700 [06:09<04:30,  5.09it/s]

{'loss': 0.0067, 'grad_norm': 0.016329729929566383, 'learning_rate': 3.19212962962963e-05, 'epoch': 24.5}


 50%|█████     | 1350/2700 [06:15<04:56,  4.55it/s]

{'loss': 0.005, 'grad_norm': 0.01729365810751915, 'learning_rate': 3.1296296296296295e-05, 'epoch': 25.0}



 50%|█████     | 1350/2700 [06:16<04:56,  4.55it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.81      1.00      0.90        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.78      0.88      0.82        16
          MC       0.40      1.00      0.57         2
          NC       0.90      0.96      0.93        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.33      0.50      0.40         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.89      0.93      0.91       178
   macro avg       0.82   

 51%|█████     | 1378/2700 [06:24<05:15,  4.19it/s]

{'loss': 0.0028, 'grad_norm': 0.05708681419491768, 'learning_rate': 3.06712962962963e-05, 'epoch': 25.5}


 52%|█████▏    | 1404/2700 [06:30<04:22,  4.94it/s]

{'loss': 0.0046, 'grad_norm': 0.03403954580426216, 'learning_rate': 3.00462962962963e-05, 'epoch': 26.0}



 52%|█████▏    | 1404/2700 [06:31<04:22,  4.94it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.81      1.00      0.90        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.80      1.00      0.89        16
          MC       0.40      1.00      0.57         2
          NC       0.90      0.96      0.93        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.33      0.50      0.40         2
          RL       0.50      0.33      0.40         6
          TI       0.94      0.89      0.91        18

   micro avg       0.88      0.94      0.91       178
   macro avg       0.81   

 53%|█████▎    | 1432/2700 [06:39<05:02,  4.19it/s]

{'loss': 0.0032, 'grad_norm': 0.010683266445994377, 'learning_rate': 2.9421296296296297e-05, 'epoch': 26.5}


 54%|█████▍    | 1458/2700 [06:46<04:55,  4.20it/s]

{'loss': 0.0024, 'grad_norm': 0.013154189102351665, 'learning_rate': 2.87962962962963e-05, 'epoch': 27.0}



 54%|█████▍    | 1458/2700 [06:47<04:55,  4.20it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.81      1.00      0.90        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.78      0.88      0.82        16
          MC       0.40      1.00      0.57         2
          NC       0.96      0.93      0.94        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.90      0.93      0.91       178
   macro avg       0.83   

 55%|█████▌    | 1486/2700 [06:55<04:35,  4.40it/s]

{'loss': 0.0021, 'grad_norm': 0.018668407574295998, 'learning_rate': 2.8171296296296297e-05, 'epoch': 27.5}


 56%|█████▌    | 1512/2700 [07:00<03:58,  4.99it/s]

{'loss': 0.0023, 'grad_norm': 0.009988418780267239, 'learning_rate': 2.75462962962963e-05, 'epoch': 28.0}



 56%|█████▌    | 1512/2700 [07:02<03:58,  4.99it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.81      1.00      0.90        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.78      0.88      0.82        16
          MC       0.40      1.00      0.57         2
          NC       0.96      0.93      0.94        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.90      0.93      0.91       178
   macro avg       0.83   

 57%|█████▋    | 1539/2700 [07:09<04:01,  4.81it/s]

{'loss': 0.0019, 'grad_norm': 0.008721800521016121, 'learning_rate': 2.69212962962963e-05, 'epoch': 28.5}


 58%|█████▊    | 1566/2700 [07:15<04:32,  4.16it/s]

{'loss': 0.0022, 'grad_norm': 0.013841909356415272, 'learning_rate': 2.6296296296296296e-05, 'epoch': 29.0}



 58%|█████▊    | 1566/2700 [07:17<04:32,  4.16it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.81      1.00      0.90        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.78      0.88      0.82        16
          MC       0.40      1.00      0.57         2
          NC       0.96      0.93      0.94        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.90      0.93      0.91       178
   macro avg       0.83   

 59%|█████▉    | 1594/2700 [07:25<04:00,  4.60it/s]

{'loss': 0.0021, 'grad_norm': 0.018391083925962448, 'learning_rate': 2.5671296296296297e-05, 'epoch': 29.5}


 60%|██████    | 1620/2700 [07:30<03:59,  4.51it/s]

{'loss': 0.0019, 'grad_norm': 0.012724419124424458, 'learning_rate': 2.5046296296296296e-05, 'epoch': 30.0}



 60%|██████    | 1620/2700 [07:32<03:59,  4.51it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.71      0.92      0.80        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       1.00      1.00      1.00        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.90      0.94      0.92       178
   macro avg       0.83   

 61%|██████    | 1648/2700 [07:40<02:52,  6.11it/s]

{'loss': 0.0021, 'grad_norm': 0.03486620634794235, 'learning_rate': 2.4421296296296298e-05, 'epoch': 30.5}


 62%|██████▏   | 1674/2700 [07:46<04:07,  4.15it/s]

{'loss': 0.0018, 'grad_norm': 0.010272719897329807, 'learning_rate': 2.3796296296296296e-05, 'epoch': 31.0}



 62%|██████▏   | 1674/2700 [07:47<04:07,  4.15it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.93      0.96      0.95        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.94      0.92       178
   macro avg       0.83   

 63%|██████▎   | 1702/2700 [07:55<03:55,  4.24it/s]

{'loss': 0.0018, 'grad_norm': 0.01106494665145874, 'learning_rate': 2.3171296296296298e-05, 'epoch': 31.5}


 64%|██████▍   | 1728/2700 [08:01<03:39,  4.43it/s]

{'loss': 0.0019, 'grad_norm': 0.013011044822633266, 'learning_rate': 2.25462962962963e-05, 'epoch': 32.0}



 64%|██████▍   | 1728/2700 [08:02<03:39,  4.43it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.71      0.92      0.80        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.89      0.89      0.89        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.89      0.93      0.91       178
   macro avg       0.83   

 65%|██████▌   | 1756/2700 [08:10<03:37,  4.34it/s]

{'loss': 0.0018, 'grad_norm': 0.009713856503367424, 'learning_rate': 2.1921296296296298e-05, 'epoch': 32.5}


 66%|██████▌   | 1782/2700 [08:16<03:27,  4.42it/s]

{'loss': 0.0017, 'grad_norm': 0.00991462729871273, 'learning_rate': 2.1296296296296296e-05, 'epoch': 33.0}



 66%|██████▌   | 1782/2700 [08:17<03:27,  4.42it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.71      0.92      0.80        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.93      0.96      0.95        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.94      0.92       178
   macro avg       0.83   

 67%|██████▋   | 1810/2700 [08:26<03:00,  4.94it/s]

{'loss': 0.0017, 'grad_norm': 0.008174682967364788, 'learning_rate': 2.0671296296296298e-05, 'epoch': 33.5}


 68%|██████▊   | 1836/2700 [08:31<02:52,  5.00it/s]

{'loss': 0.0014, 'grad_norm': 0.008447281084954739, 'learning_rate': 2.0046296296296296e-05, 'epoch': 34.0}



 68%|██████▊   | 1836/2700 [08:33<02:52,  5.00it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.71      0.92      0.80        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.93      0.96      0.95        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.94      0.92       178
   macro avg       0.83   

 69%|██████▉   | 1864/2700 [08:41<02:39,  5.24it/s]

{'loss': 0.0015, 'grad_norm': 0.012660518288612366, 'learning_rate': 1.9421296296296298e-05, 'epoch': 34.5}


 70%|███████   | 1890/2700 [08:46<02:53,  4.67it/s]

{'loss': 0.0017, 'grad_norm': 0.009579584002494812, 'learning_rate': 1.8796296296296296e-05, 'epoch': 35.0}



 70%|███████   | 1890/2700 [08:47<02:53,  4.67it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.71      0.92      0.80        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.93      0.91       178
   macro avg       0.83   

 71%|███████   | 1918/2700 [08:56<02:48,  4.63it/s]

{'loss': 0.0016, 'grad_norm': 0.009001577273011208, 'learning_rate': 1.8171296296296298e-05, 'epoch': 35.5}


 72%|███████▏  | 1944/2700 [09:01<02:41,  4.69it/s]

{'loss': 0.0015, 'grad_norm': 0.011994605883955956, 'learning_rate': 1.7546296296296297e-05, 'epoch': 36.0}



 72%|███████▏  | 1944/2700 [09:02<02:41,  4.69it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.71      0.92      0.80        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.93      0.91       178
   macro avg       0.83   

 73%|███████▎  | 1972/2700 [09:11<02:53,  4.21it/s]

{'loss': 0.0016, 'grad_norm': 0.0067756627686321735, 'learning_rate': 1.6921296296296295e-05, 'epoch': 36.5}


 74%|███████▍  | 1998/2700 [09:17<02:44,  4.27it/s]

{'loss': 0.0016, 'grad_norm': 0.009427295997738838, 'learning_rate': 1.62962962962963e-05, 'epoch': 37.0}



 74%|███████▍  | 1998/2700 [09:18<02:44,  4.27it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.76      1.00      0.87        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.93      0.96      0.95        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.95      0.93       178
   macro avg       0.83   

 75%|███████▌  | 2026/2700 [09:26<02:24,  4.67it/s]

{'loss': 0.0015, 'grad_norm': 0.007617342751473188, 'learning_rate': 1.56712962962963e-05, 'epoch': 37.5}


 76%|███████▌  | 2052/2700 [09:32<02:22,  4.55it/s]

{'loss': 0.0014, 'grad_norm': 0.006592307239770889, 'learning_rate': 1.5046296296296297e-05, 'epoch': 38.0}



 76%|███████▌  | 2052/2700 [09:33<02:22,  4.55it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.76      1.00      0.87        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.93      0.96      0.95        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.95      0.93       178
   macro avg       0.83   

 77%|███████▋  | 2079/2700 [09:41<02:24,  4.29it/s]

{'loss': 0.0044, 'grad_norm': 0.011196748353540897, 'learning_rate': 1.4421296296296297e-05, 'epoch': 38.5}


 78%|███████▊  | 2106/2700 [09:47<01:38,  6.05it/s]

{'loss': 0.0016, 'grad_norm': 0.02472580410540104, 'learning_rate': 1.3796296296296297e-05, 'epoch': 39.0}



 78%|███████▊  | 2106/2700 [09:48<01:38,  6.05it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.93      0.96      0.95        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.90      0.94      0.92       178
   macro avg       0.85   

 79%|███████▉  | 2134/2700 [09:57<01:56,  4.85it/s]

{'loss': 0.0016, 'grad_norm': 0.006823010742664337, 'learning_rate': 1.3171296296296299e-05, 'epoch': 39.5}


 80%|████████  | 2160/2700 [10:02<01:52,  4.82it/s]

{'loss': 0.0014, 'grad_norm': 0.007592393551021814, 'learning_rate': 1.2546296296296297e-05, 'epoch': 40.0}



 80%|████████  | 2160/2700 [10:03<01:52,  4.82it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.71      0.92      0.80        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       1.00      1.00      1.00        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.91      0.94      0.93       178
   macro avg       0.85   

 81%|████████  | 2188/2700 [10:12<01:53,  4.50it/s]

{'loss': 0.0023, 'grad_norm': 0.010903012938797474, 'learning_rate': 1.1921296296296297e-05, 'epoch': 40.5}


 82%|████████▏ | 2214/2700 [10:17<01:50,  4.40it/s]

{'loss': 0.0014, 'grad_norm': 0.007080866955220699, 'learning_rate': 1.1296296296296297e-05, 'epoch': 41.0}



 82%|████████▏ | 2214/2700 [10:19<01:50,  4.40it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.90      0.92      0.91       178
   macro avg       0.85   

 83%|████████▎ | 2242/2700 [10:27<01:34,  4.82it/s]

{'loss': 0.0014, 'grad_norm': 0.007955792360007763, 'learning_rate': 1.0671296296296297e-05, 'epoch': 41.5}


 84%|████████▍ | 2268/2700 [10:33<01:46,  4.07it/s]

{'loss': 0.0013, 'grad_norm': 0.005066937766969204, 'learning_rate': 1.0046296296296297e-05, 'epoch': 42.0}



 84%|████████▍ | 2268/2700 [10:34<01:46,  4.07it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       0.94      0.89      0.91        18

   micro avg       0.90      0.92      0.91       178
   macro avg       0.85   

 85%|████████▌ | 2296/2700 [10:43<01:29,  4.49it/s]

{'loss': 0.0013, 'grad_norm': 0.01598438248038292, 'learning_rate': 9.421296296296296e-06, 'epoch': 42.5}


 86%|████████▌ | 2322/2700 [10:49<01:21,  4.66it/s]

{'loss': 0.0014, 'grad_norm': 0.006875732447952032, 'learning_rate': 8.796296296296297e-06, 'epoch': 43.0}



 86%|████████▌ | 2322/2700 [10:50<01:21,  4.66it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.91      0.93      0.92       178
   macro avg       0.85   

 87%|████████▋ | 2350/2700 [10:58<01:13,  4.76it/s]

{'loss': 0.0014, 'grad_norm': 0.009354098699986935, 'learning_rate': 8.171296296296296e-06, 'epoch': 43.5}


 88%|████████▊ | 2376/2700 [11:04<01:13,  4.44it/s]

{'loss': 0.0041, 'grad_norm': 0.0066269333474338055, 'learning_rate': 7.569444444444444e-06, 'epoch': 44.0}



 88%|████████▊ | 2376/2700 [11:05<01:13,  4.44it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.91      0.93      0.92       178
   macro avg       0.85   

 89%|████████▉ | 2403/2700 [11:13<01:00,  4.93it/s]

{'loss': 0.0014, 'grad_norm': 0.009227657690644264, 'learning_rate': 6.944444444444445e-06, 'epoch': 44.5}


 90%|█████████ | 2430/2700 [11:19<01:04,  4.16it/s]

{'loss': 0.0014, 'grad_norm': 0.006560611538589001, 'learning_rate': 6.319444444444445e-06, 'epoch': 45.0}



 90%|█████████ | 2430/2700 [11:20<01:04,  4.16it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.91      0.93      0.92       178
   macro avg       0.85   

 91%|█████████ | 2458/2700 [11:28<00:47,  5.06it/s]

{'loss': 0.0013, 'grad_norm': 0.005644718650728464, 'learning_rate': 5.694444444444445e-06, 'epoch': 45.5}


 92%|█████████▏| 2484/2700 [11:34<00:46,  4.64it/s]

{'loss': 0.0013, 'grad_norm': 0.006196257192641497, 'learning_rate': 5.069444444444445e-06, 'epoch': 46.0}



 92%|█████████▏| 2484/2700 [11:35<00:46,  4.64it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       1.00      0.67      0.80         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.91      0.93      0.92       178
   macro avg       0.85   

 93%|█████████▎| 2512/2700 [11:43<00:40,  4.64it/s]

{'loss': 0.0013, 'grad_norm': 0.008749344386160374, 'learning_rate': 4.444444444444445e-06, 'epoch': 46.5}


 94%|█████████▍| 2538/2700 [11:49<00:34,  4.71it/s]

{'loss': 0.0013, 'grad_norm': 0.007088693790137768, 'learning_rate': 3.819444444444445e-06, 'epoch': 47.0}



 94%|█████████▍| 2538/2700 [11:50<00:34,  4.71it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.93      0.91       178
   macro avg       0.83   

 95%|█████████▌| 2566/2700 [11:58<00:28,  4.68it/s]

{'loss': 0.0012, 'grad_norm': 0.007950672879815102, 'learning_rate': 3.1944444444444443e-06, 'epoch': 47.5}


 96%|█████████▌| 2592/2700 [12:04<00:24,  4.49it/s]

{'loss': 0.0015, 'grad_norm': 0.013601520098745823, 'learning_rate': 2.5694444444444443e-06, 'epoch': 48.0}



 96%|█████████▌| 2592/2700 [12:05<00:24,  4.49it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       0.92      0.85      0.88        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.90      0.93      0.91       178
   macro avg       0.83   

 97%|█████████▋| 2620/2700 [12:13<00:14,  5.66it/s]

{'loss': 0.0012, 'grad_norm': 0.006208320148289204, 'learning_rate': 1.9444444444444444e-06, 'epoch': 48.5}


 98%|█████████▊| 2646/2700 [12:19<00:10,  4.99it/s]

{'loss': 0.0015, 'grad_norm': 0.010723874904215336, 'learning_rate': 1.3194444444444444e-06, 'epoch': 49.0}



 98%|█████████▊| 2646/2700 [12:20<00:10,  4.99it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       1.00      1.00      1.00        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.91      0.95      0.93       178
   macro avg       0.84   

 99%|█████████▉| 2674/2700 [12:28<00:04,  5.21it/s]

{'loss': 0.0012, 'grad_norm': 0.00562496529892087, 'learning_rate': 6.944444444444445e-07, 'epoch': 49.5}


100%|██████████| 2700/2700 [12:34<00:00,  5.34it/s]

{'loss': 0.0013, 'grad_norm': 0.007181345950812101, 'learning_rate': 6.944444444444445e-08, 'epoch': 50.0}



100%|██████████| 2700/2700 [12:37<00:00,  5.34it/s]

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.75      0.92      0.83        13
          CN       1.00      1.00      1.00         2
          DP       0.75      1.00      0.86         6
          EQ       0.80      1.00      0.89         4
          GD       1.00      1.00      1.00         1
          JB       0.67      0.67      0.67         3
          LC       0.84      1.00      0.91        16
          MC       0.40      1.00      0.57         2
          NC       1.00      1.00      1.00        27
          NM       1.00      1.00      1.00        32
          PC       1.00      0.75      0.86         4
          PS       1.00      1.00      1.00        40
          QT       0.50      0.50      0.50         2
          RL       0.67      0.33      0.44         6
          TI       1.00      0.94      0.97        18

   micro avg       0.91      0.95      0.93       178
   macro avg       0.84   

100%|██████████| 2700/2700 [12:40<00:00,  5.34it/s]

{'train_runtime': 760.3166, 'train_samples_per_second': 14.205, 'train_steps_per_second': 3.551, 'train_loss': 0.12428200391293676, 'epoch': 50.0}


100%|██████████| 2700/2700 [12:40<00:00,  3.55it/s]


TrainOutput(global_step=2700, training_loss=0.12428200391293676, metrics={'train_runtime': 760.3166, 'train_samples_per_second': 14.205, 'train_steps_per_second': 3.551, 'total_flos': 358534639166760.0, 'train_loss': 0.12428200391293676, 'epoch': 50.0})

# **Result(batch 4, epoch 100, max_length 100)**


             precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       0.67      0.60      0.63        10
          CN       0.50      0.50      0.50         2
          DP       0.75      0.75      0.75         8
          EQ       1.00      0.60      0.75         5
          GD       1.00      1.00      1.00         2
          JB       0.86      1.00      0.92         6
          LC       0.81      0.89      0.85        19
          MC       1.00      1.00      1.00         1
          NC       0.95      0.90      0.93        21
          NM       0.93      1.00      0.97        28
          PC       0.33      0.50      0.40         2
          PS       1.00      1.00      1.00        35
          QT       0.67      0.80      0.73         5
          RL       0.75      0.60      0.67         5
          TI       0.80      0.84      0.82        19

   micro avg       0.87      0.88      0.87       170

   macro avg       0.81      0.81      0.81       170
   
weighted avg       0.87      0.88      0.87       170


{'eval_loss': 0.18080152571201324, 'eval_precision': 0.8670520231213873, 'eval_recall': 0.8823529411764706, 'eval_f1': 0.8746355685131196, 'eval_accuracy': 0.9728424864212432, 'eval_runtime': 1.3323, 'eval_samples_per_second': 40.531, 'eval_steps_per_second': 10.508, 'epoch': 100.0}


# **Result(batch 4, epoch 100, max_length 512)**

              precision    recall  f1-score   support

          BP       0.50      0.67      0.57         3
          CN       1.00      1.00      1.00         3
          DP       0.90      0.90      0.90        10
          EQ       0.00      0.00      0.00         0
          JB       0.00      0.00      0.00         2
          LC       0.58      0.70      0.64        20
          MC       0.00      0.00      0.00         1
          NC       0.88      0.88      0.88         8
          NM       0.98      1.00      0.99        49
          PC       0.50      1.00      0.67         1
          PS       0.93      0.98      0.96        66
          QT       0.50      0.50      0.50         4
          RL       0.33      0.17      0.22         6
          TI       0.72      0.68      0.70        19
          

   micro avg       0.84      0.86      0.85       192

   macro avg       0.56      0.61      0.57       192
   
weighted avg       0.83      0.86      0.85       192

# **Result(batch 1, epoch 100, max_length 512)**

              precision    recall  f1-score   support

          AG       1.00      1.00      1.00         3
          BP       0.78      0.70      0.74        20
          CN       1.00      1.00      1.00         1
          DP       0.60      0.86      0.71         7
          EQ       0.91      0.77      0.83        13
          GD       0.60      0.60      0.60         5
          JB       0.83      0.71      0.77         7
          LC       0.76      0.93      0.83        27
          MC       0.67      1.00      0.80         2
          NC       0.82      0.88      0.85        32
          NM       1.00      1.00      1.00        35
          PC       0.33      0.33      0.33         3
          PS       0.95      1.00      0.98        41
          QT       1.00      1.00      1.00         8
          RL       0.50      0.33      0.40         3
          TI       0.60      0.62      0.61        24

   micro avg       0.82      0.86      0.84       231

   macro avg       0.77      0.80      0.78       231
   
weighted avg       0.83      0.86      0.84       231

# **Result(batch 4, epoch 20, max_length 100)**

            precision    recall  f1-score   support

          AG       1.00      1.00      1.00         2
          BP       1.00      1.00      1.00         5
          CN       1.00      1.00      1.00         1
          DP       0.56      0.71      0.63         7
          EQ       1.00      1.00      1.00         3
          GD       0.60      0.75      0.67         4
          JB       0.50      0.67      0.57         3
          LC       0.70      0.82      0.76        17
          MC       1.00      1.00      1.00         1
          NC       0.91      1.00      0.95        20
          NM       0.94      0.97      0.95        30
          PC       1.00      1.00      1.00         1
          PS       1.00      1.00      1.00        36
          QT       0.80      1.00      0.89         4
          RL       1.00      0.33      0.50         3
          TI       0.85      0.85      0.85        13

   micro avg       0.87      0.92      0.89       150
   macro avg       0.87      0.88      0.86       150
weighted avg       0.88      0.92      0.89       150

{'eval_loss': 0.12187973409891129, 'eval_precision': 0.8679245283018868, 'eval_recall': 0.92, 'eval_f1': 0.8932038834951457, 'eval_accuracy': 0.9761215629522432, 'eval_runtime': 1.233, 'eval_samples_per_second': 43.795, 'eval_steps_per_second': 11.354, 'epoch': 20.0}

In [43]:
fn_prefix = '.'.join([pretrained_model_name, 
                    f"{config.n_epochs_per_fold}_epochs", 
                    f"{config.max_length}_length",
                    f"{n_fold}_fold", 
                    "pth"])
model_fn = os.path.join(config.model_folder, fn_prefix)

torch.save({
    'rnn': None,
    'cnn': None,
    'bert': trainer.model.state_dict(),
    'config': config,
    'vocab': None,
    'classes': index_to_label,
    'pretrained_model_name': data_args['pretrained_model_name']
}, model_fn)

In [44]:
model_fn

'models/klue_roberta-base.50_epochs.512_length.1_fold.pth'