In [1]:
!nvidia-smi

Sun Mar  9 02:03:40 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.127.08             Driver Version: 550.127.08     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100S-PCIE-32GB          On  |   00000000:81:00.0 Off |                    0 |
| N/A   32C    P0             25W /  250W |       1MiB /  32768MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                     

In [2]:
import flair
import matplotlib.pyplot as plt
import os
import pandas as pd

from codealltag_data_processor_v2025 import CodealltagDataProcessor
from flair.data import Corpus, Dictionary
from flair.datasets import CSVClassificationCorpus
from flair.embeddings import TransformerDocumentEmbeddings
from flair.models import TextClassifier
from flair.nn import Model
from flair.trainers import ModelTrainer
from pathlib import Path

In [3]:
flair.cache_root = Path(os.path.join(*['/home', 's81481', '.flair']))

In [4]:
cdp_2022 = CodealltagDataProcessor(data_version='20220513', config_path=['codealltag_data_processor.yml'])

In [5]:
model_tag = "G29B" # "MT5" | "L318B" | "G29B"

In [6]:
def prepare_flair_corpus(data_folder: Path) -> CSVClassificationCorpus:
    
    train_df[['TextTokenized', 'Type']].to_csv(f"{data_folder}/train.csv", 
                                               sep='\t', 
                                               index=False, 
                                               header=['text', 'label'])
    
    dev_df[['TextTokenized', 'Type']].to_csv(f"{data_folder}/dev.csv", 
                                             sep='\t', 
                                             index=False, 
                                             header=['text', 'label'])
    
    test_df[['TextTokenized', 'Type']].to_csv(f"{data_folder}/test.csv", 
                                              sep='\t', 
                                              index=False, 
                                              header=['text', 'label'])
    
    column_name_map = {0: "text", 1: "label"}  
    label_type = "label"
    corpus = CSVClassificationCorpus(
        data_folder,
        column_name_map, 
        skip_header=True, 
        delimiter="\t",
        label_type=label_type,
        train_file='train.csv',
        dev_file='dev.csv',
        test_file='test.csv'
    )
    return corpus

In [7]:
for i in range(0, 5):
    k = i + 1
    
    syntheticity_data = pd.read_csv(f"data_syntheticity_dataset_3000_{model_tag}.csv", index_col=0)
    dataset = cdp_2022.get_train_dev_test_datasetdict_for_syntheticity_dataset(syntheticity_data, 500, k)

    train_df = dataset["train"].to_pandas()
    dev_df = dataset["dev"].to_pandas()
    test_df = dataset["test"].to_pandas()
    
    tmp_data_dir = Path("tmp_data_dir")
    tmp_data_dir.mkdir(parents=True, exist_ok=True)
    for file in tmp_data_dir.glob("*"):
        if file.is_file():
            file.unlink()
    
    model_dir_path = os.path.join(*["logs", "PDSC", "GELECTRA", model_tag, str(len(syntheticity_data)//1000)+"K", "k"+str(k)])
    os.makedirs(model_dir_path, exist_ok=True)
    
    corpus: Corpus = prepare_flair_corpus(data_folder=tmp_data_dir)
    label_dict: Dictionary = corpus.make_label_dictionary(label_type="label")
    classifier = TextClassifier(
        document_embeddings=TransformerDocumentEmbeddings('deepset/gelectra-large', fine_tune=True),
        label_dictionary=label_dict,
        label_type='label',
        multi_label=False
    )
    trainer: ModelTrainer = ModelTrainer(classifier, corpus)
    trainer.fine_tune(
        model_dir_path,
        learning_rate=5.0e-6,
        mini_batch_size=4,
        max_epochs=10,
        checkpoint=True,
        write_weights=True,
        use_tensorboard=True,
        tensorboard_log_dir=model_dir_path,
        save_final_model=False,
        use_final_model_for_eval=False
    )

2025-03-09 02:06:16,599 Reading data from tmp_data_dir
2025-03-09 02:06:16,599 Train: tmp_data_dir/train.csv
2025-03-09 02:06:16,599 Dev: tmp_data_dir/dev.csv
2025-03-09 02:06:16,600 Test: tmp_data_dir/test.csv
2025-03-09 02:06:16,613 Computing label dictionary. Progress:


2000it [00:01, 1654.57it/s]

2025-03-09 02:06:17,848 Dictionary created for label 'label' with 3 values: ORIG (seen 1000 times), PSEUD (seen 1000 times)



  return torch.load(checkpoint_file, map_location="cpu")


2025-03-09 02:06:26,476 tensorboard logging path is logs/PDSC_V2/GELECTRA/G29B/3K/k1
2025-03-09 02:06:32,000 ----------------------------------------------------------------------------------------------------
2025-03-09 02:06:32,002 Model: "TextClassifier(
  (decoder): Linear(in_features=1024, out_features=3, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
  (locked_dropout): LockedDropout(p=0.0)
  (word_dropout): WordDropout(p=0.0)
  (loss_function): CrossEntropyLoss()
  (document_embeddings): TransformerDocumentEmbeddings(
    (model): ElectraModel(
      (embeddings): ElectraEmbeddings(
        (word_embeddings): Embedding(31102, 1024, padding_idx=0)
        (position_embeddings): Embedding(512, 1024)
        (token_type_embeddings): Embedding(2, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): ElectraEncoder(
        (layer): ModuleList(
          (0-23): 24 x ElectraLa

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.93it/s]

2025-03-09 02:07:54,408 Evaluating as a multi-label problem: False
2025-03-09 02:07:54,417 DEV : loss 1.0070340633392334 - f1-score (micro avg)  0.5





2025-03-09 02:07:54,673 BAD EPOCHS (no improvement): 4
2025-03-09 02:07:59,914 saving best model
2025-03-09 02:08:06,394 ----------------------------------------------------------------------------------------------------
2025-03-09 02:08:14,151 epoch 2 - iter 50/500 - loss 0.32939278 - samples/sec: 26.18 - lr: 0.000005
2025-03-09 02:08:21,789 epoch 2 - iter 100/500 - loss 0.25690874 - samples/sec: 27.44 - lr: 0.000005
2025-03-09 02:08:29,533 epoch 2 - iter 150/500 - loss 0.23036828 - samples/sec: 27.08 - lr: 0.000005
2025-03-09 02:08:37,524 epoch 2 - iter 200/500 - loss 0.21800934 - samples/sec: 26.19 - lr: 0.000005
2025-03-09 02:08:44,919 epoch 2 - iter 250/500 - loss 0.21079357 - samples/sec: 28.36 - lr: 0.000005
2025-03-09 02:08:52,054 epoch 2 - iter 300/500 - loss 0.20649138 - samples/sec: 29.45 - lr: 0.000005
2025-03-09 02:08:59,570 epoch 2 - iter 350/500 - loss 0.20188078 - samples/sec: 27.89 - lr: 0.000005
2025-03-09 02:09:07,451 epoch 2 - iter 400/500 - loss 0.19765101 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.79it/s]


2025-03-09 02:09:28,689 Evaluating as a multi-label problem: False
2025-03-09 02:09:28,697 DEV : loss 0.1654728800058365 - f1-score (micro avg)  0.678
2025-03-09 02:09:28,958 BAD EPOCHS (no improvement): 4
2025-03-09 02:09:34,424 saving best model
2025-03-09 02:09:39,842 ----------------------------------------------------------------------------------------------------
2025-03-09 02:09:47,729 epoch 3 - iter 50/500 - loss 0.12073174 - samples/sec: 25.73 - lr: 0.000004
2025-03-09 02:09:56,020 epoch 3 - iter 100/500 - loss 0.12651123 - samples/sec: 25.51 - lr: 0.000004
2025-03-09 02:10:03,175 epoch 3 - iter 150/500 - loss 0.12101520 - samples/sec: 29.34 - lr: 0.000004
2025-03-09 02:10:10,861 epoch 3 - iter 200/500 - loss 0.11591819 - samples/sec: 27.25 - lr: 0.000004
2025-03-09 02:10:18,486 epoch 3 - iter 250/500 - loss 0.11552274 - samples/sec: 27.52 - lr: 0.000004
2025-03-09 02:10:26,337 epoch 3 - iter 300/500 - loss 0.11408100 - samples/sec: 26.65 - lr: 0.000004
2025-03-09 02:10:34,13

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.89it/s]


2025-03-09 02:11:02,915 Evaluating as a multi-label problem: False
2025-03-09 02:11:02,922 DEV : loss 0.13278734683990479 - f1-score (micro avg)  0.808
2025-03-09 02:11:03,460 BAD EPOCHS (no improvement): 4
2025-03-09 02:11:08,503 saving best model
2025-03-09 02:11:13,695 ----------------------------------------------------------------------------------------------------
2025-03-09 02:11:21,056 epoch 4 - iter 50/500 - loss 0.08089427 - samples/sec: 27.56 - lr: 0.000004
2025-03-09 02:11:28,706 epoch 4 - iter 100/500 - loss 0.08241286 - samples/sec: 27.40 - lr: 0.000004
2025-03-09 02:11:36,589 epoch 4 - iter 150/500 - loss 0.06939714 - samples/sec: 26.58 - lr: 0.000004
2025-03-09 02:11:44,497 epoch 4 - iter 200/500 - loss 0.07871837 - samples/sec: 26.50 - lr: 0.000004
2025-03-09 02:11:52,643 epoch 4 - iter 250/500 - loss 0.07970293 - samples/sec: 27.73 - lr: 0.000004
2025-03-09 02:12:00,567 epoch 4 - iter 300/500 - loss 0.08215805 - samples/sec: 26.42 - lr: 0.000004
2025-03-09 02:12:08,1

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.80it/s]

2025-03-09 02:12:37,550 Evaluating as a multi-label problem: False
2025-03-09 02:12:37,558 DEV : loss 0.21050098538398743 - f1-score (micro avg)  0.8





2025-03-09 02:12:38,127 BAD EPOCHS (no improvement): 4
2025-03-09 02:12:43,252 ----------------------------------------------------------------------------------------------------
2025-03-09 02:12:51,099 epoch 5 - iter 50/500 - loss 0.02374298 - samples/sec: 25.86 - lr: 0.000003
2025-03-09 02:12:58,738 epoch 5 - iter 100/500 - loss 0.03177641 - samples/sec: 27.93 - lr: 0.000003
2025-03-09 02:13:05,956 epoch 5 - iter 150/500 - loss 0.03555310 - samples/sec: 29.08 - lr: 0.000003
2025-03-09 02:13:14,278 epoch 5 - iter 200/500 - loss 0.03235690 - samples/sec: 25.10 - lr: 0.000003
2025-03-09 02:13:21,637 epoch 5 - iter 250/500 - loss 0.03199446 - samples/sec: 28.51 - lr: 0.000003
2025-03-09 02:13:29,206 epoch 5 - iter 300/500 - loss 0.03452832 - samples/sec: 27.68 - lr: 0.000003
2025-03-09 02:13:37,209 epoch 5 - iter 350/500 - loss 0.03903043 - samples/sec: 26.11 - lr: 0.000003
2025-03-09 02:13:44,621 epoch 5 - iter 400/500 - loss 0.04262837 - samples/sec: 28.32 - lr: 0.000003
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.81it/s]

2025-03-09 02:14:06,098 Evaluating as a multi-label problem: False
2025-03-09 02:14:06,106 DEV : loss 0.20196621119976044 - f1-score (micro avg)  0.806





2025-03-09 02:14:06,666 BAD EPOCHS (no improvement): 4
2025-03-09 02:14:11,705 ----------------------------------------------------------------------------------------------------
2025-03-09 02:14:18,864 epoch 6 - iter 50/500 - loss 0.00673004 - samples/sec: 28.33 - lr: 0.000003
2025-03-09 02:14:26,366 epoch 6 - iter 100/500 - loss 0.01527146 - samples/sec: 27.95 - lr: 0.000003
2025-03-09 02:14:34,166 epoch 6 - iter 150/500 - loss 0.02118177 - samples/sec: 26.85 - lr: 0.000003
2025-03-09 02:14:41,634 epoch 6 - iter 200/500 - loss 0.02213852 - samples/sec: 28.09 - lr: 0.000003
2025-03-09 02:14:49,306 epoch 6 - iter 250/500 - loss 0.02337053 - samples/sec: 27.45 - lr: 0.000003
2025-03-09 02:14:57,695 epoch 6 - iter 300/500 - loss 0.02457398 - samples/sec: 24.93 - lr: 0.000002
2025-03-09 02:15:05,539 epoch 6 - iter 350/500 - loss 0.03054396 - samples/sec: 26.69 - lr: 0.000002
2025-03-09 02:15:13,169 epoch 6 - iter 400/500 - loss 0.03159195 - samples/sec: 27.46 - lr: 0.000002
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.83it/s]

2025-03-09 02:15:34,420 Evaluating as a multi-label problem: False
2025-03-09 02:15:34,428 DEV : loss 0.23457534611225128 - f1-score (micro avg)  0.822





2025-03-09 02:15:34,680 BAD EPOCHS (no improvement): 4
2025-03-09 02:15:39,783 saving best model
2025-03-09 02:15:45,036 ----------------------------------------------------------------------------------------------------
2025-03-09 02:15:51,982 epoch 7 - iter 50/500 - loss 0.01001899 - samples/sec: 29.22 - lr: 0.000002
2025-03-09 02:15:59,599 epoch 7 - iter 100/500 - loss 0.02052981 - samples/sec: 27.49 - lr: 0.000002
2025-03-09 02:16:07,502 epoch 7 - iter 150/500 - loss 0.02181051 - samples/sec: 26.48 - lr: 0.000002
2025-03-09 02:16:15,601 epoch 7 - iter 200/500 - loss 0.02466200 - samples/sec: 25.83 - lr: 0.000002
2025-03-09 02:16:23,730 epoch 7 - iter 250/500 - loss 0.02631246 - samples/sec: 25.74 - lr: 0.000002
2025-03-09 02:16:31,633 epoch 7 - iter 300/500 - loss 0.02325173 - samples/sec: 26.47 - lr: 0.000002
2025-03-09 02:16:39,638 epoch 7 - iter 350/500 - loss 0.02357794 - samples/sec: 26.13 - lr: 0.000002
2025-03-09 02:16:47,233 epoch 7 - iter 400/500 - loss 0.02256261 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.80it/s]

2025-03-09 02:17:07,717 Evaluating as a multi-label problem: False
2025-03-09 02:17:07,725 DEV : loss 0.31262731552124023 - f1-score (micro avg)  0.804





2025-03-09 02:17:07,976 BAD EPOCHS (no improvement): 4
2025-03-09 02:17:13,048 ----------------------------------------------------------------------------------------------------
2025-03-09 02:17:20,604 epoch 8 - iter 50/500 - loss 0.00607131 - samples/sec: 26.86 - lr: 0.000002
2025-03-09 02:17:27,833 epoch 8 - iter 100/500 - loss 0.01137895 - samples/sec: 29.42 - lr: 0.000002
2025-03-09 02:17:35,915 epoch 8 - iter 150/500 - loss 0.01864725 - samples/sec: 25.87 - lr: 0.000002
2025-03-09 02:17:44,482 epoch 8 - iter 200/500 - loss 0.01928553 - samples/sec: 24.35 - lr: 0.000001
2025-03-09 02:17:52,205 epoch 8 - iter 250/500 - loss 0.01694365 - samples/sec: 27.12 - lr: 0.000001
2025-03-09 02:18:00,353 epoch 8 - iter 300/500 - loss 0.01536073 - samples/sec: 25.68 - lr: 0.000001
2025-03-09 02:18:07,627 epoch 8 - iter 350/500 - loss 0.01372281 - samples/sec: 28.86 - lr: 0.000001
2025-03-09 02:18:15,383 epoch 8 - iter 400/500 - loss 0.01339059 - samples/sec: 27.00 - lr: 0.000001
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.82it/s]


2025-03-09 02:18:36,562 Evaluating as a multi-label problem: False
2025-03-09 02:18:36,569 DEV : loss 0.3091127574443817 - f1-score (micro avg)  0.818
2025-03-09 02:18:36,827 BAD EPOCHS (no improvement): 4
2025-03-09 02:18:41,936 ----------------------------------------------------------------------------------------------------
2025-03-09 02:18:49,706 epoch 9 - iter 50/500 - loss 0.01435573 - samples/sec: 27.00 - lr: 0.000001
2025-03-09 02:18:57,220 epoch 9 - iter 100/500 - loss 0.01986344 - samples/sec: 27.91 - lr: 0.000001
2025-03-09 02:19:04,915 epoch 9 - iter 150/500 - loss 0.01729076 - samples/sec: 27.21 - lr: 0.000001
2025-03-09 02:19:12,942 epoch 9 - iter 200/500 - loss 0.01545313 - samples/sec: 26.05 - lr: 0.000001
2025-03-09 02:19:20,733 epoch 9 - iter 250/500 - loss 0.01396191 - samples/sec: 26.86 - lr: 0.000001
2025-03-09 02:19:28,570 epoch 9 - iter 300/500 - loss 0.01179952 - samples/sec: 26.69 - lr: 0.000001
2025-03-09 02:19:36,873 epoch 9 - iter 350/500 - loss 0.01191017

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.88it/s]

2025-03-09 02:20:05,621 Evaluating as a multi-label problem: False
2025-03-09 02:20:05,629 DEV : loss 0.3255111873149872 - f1-score (micro avg)  0.82





2025-03-09 02:20:06,171 BAD EPOCHS (no improvement): 4
2025-03-09 02:20:11,293 ----------------------------------------------------------------------------------------------------
2025-03-09 02:20:18,562 epoch 10 - iter 50/500 - loss 0.00443440 - samples/sec: 27.91 - lr: 0.000001
2025-03-09 02:20:27,150 epoch 10 - iter 100/500 - loss 0.01078157 - samples/sec: 24.72 - lr: 0.000000
2025-03-09 02:20:34,833 epoch 10 - iter 150/500 - loss 0.00828740 - samples/sec: 27.28 - lr: 0.000000
2025-03-09 02:20:42,176 epoch 10 - iter 200/500 - loss 0.00707670 - samples/sec: 28.57 - lr: 0.000000
2025-03-09 02:20:49,709 epoch 10 - iter 250/500 - loss 0.00637768 - samples/sec: 27.82 - lr: 0.000000
2025-03-09 02:20:57,525 epoch 10 - iter 300/500 - loss 0.00995382 - samples/sec: 26.81 - lr: 0.000000
2025-03-09 02:21:04,858 epoch 10 - iter 350/500 - loss 0.00934097 - samples/sec: 28.62 - lr: 0.000000
2025-03-09 02:21:12,616 epoch 10 - iter 400/500 - loss 0.00944737 - samples/sec: 27.78 - lr: 0.000000
2025-

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.73it/s]


2025-03-09 02:21:34,457 Evaluating as a multi-label problem: False
2025-03-09 02:21:34,465 DEV : loss 0.3139808475971222 - f1-score (micro avg)  0.824
2025-03-09 02:21:35,050 BAD EPOCHS (no improvement): 4
2025-03-09 02:21:40,385 saving best model
2025-03-09 02:21:46,581 ----------------------------------------------------------------------------------------------------
2025-03-09 02:21:46,584 loading file logs/PDSC_V2/GELECTRA/G29B/3K/k1/best-model.pt


100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.89it/s]

2025-03-09 02:22:06,553 Evaluating as a multi-label problem: False
2025-03-09 02:22:06,560 0.822	0.822	0.822	0.822
2025-03-09 02:22:06,561 
Results:
- F-score (micro) 0.822
- F-score (macro) 0.8214
- Accuracy 0.822

By class:
              precision    recall  f1-score   support

       PSEUD     0.7885    0.8800    0.8318       250
        ORIG     0.8643    0.7640    0.8110       250

    accuracy                         0.8220       500
   macro avg     0.8264    0.8220    0.8214       500
weighted avg     0.8264    0.8220    0.8214       500

2025-03-09 02:22:06,561 ----------------------------------------------------------------------------------------------------





2025-03-09 02:22:06,637 Reading data from tmp_data_dir
2025-03-09 02:22:06,637 Train: tmp_data_dir/train.csv
2025-03-09 02:22:06,637 Dev: tmp_data_dir/dev.csv
2025-03-09 02:22:06,638 Test: tmp_data_dir/test.csv
2025-03-09 02:22:06,651 Computing label dictionary. Progress:


2000it [00:01, 1937.90it/s]

2025-03-09 02:22:07,685 Dictionary created for label 'label' with 3 values: ORIG (seen 1000 times), PSEUD (seen 1000 times)



  return torch.load(checkpoint_file, map_location="cpu")


2025-03-09 02:22:20,921 tensorboard logging path is logs/PDSC_V2/GELECTRA/G29B/3K/k2
2025-03-09 02:22:20,943 ----------------------------------------------------------------------------------------------------
2025-03-09 02:22:20,945 Model: "TextClassifier(
  (decoder): Linear(in_features=1024, out_features=3, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
  (locked_dropout): LockedDropout(p=0.0)
  (word_dropout): WordDropout(p=0.0)
  (loss_function): CrossEntropyLoss()
  (document_embeddings): TransformerDocumentEmbeddings(
    (model): ElectraModel(
      (embeddings): ElectraEmbeddings(
        (word_embeddings): Embedding(31102, 1024, padding_idx=0)
        (position_embeddings): Embedding(512, 1024)
        (token_type_embeddings): Embedding(2, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): ElectraEncoder(
        (layer): ModuleList(
          (0-23): 24 x ElectraLa

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.65it/s]

2025-03-09 02:23:42,805 Evaluating as a multi-label problem: False
2025-03-09 02:23:42,812 DEV : loss 1.1682065725326538 - f1-score (micro avg)  0.5





2025-03-09 02:23:43,074 BAD EPOCHS (no improvement): 4
2025-03-09 02:23:48,791 saving best model
2025-03-09 02:23:55,621 ----------------------------------------------------------------------------------------------------
2025-03-09 02:24:02,771 epoch 2 - iter 50/500 - loss 0.29755195 - samples/sec: 28.39 - lr: 0.000005
2025-03-09 02:24:10,750 epoch 2 - iter 100/500 - loss 0.24379061 - samples/sec: 26.23 - lr: 0.000005
2025-03-09 02:24:18,218 epoch 2 - iter 150/500 - loss 0.22518392 - samples/sec: 28.07 - lr: 0.000005
2025-03-09 02:24:25,614 epoch 2 - iter 200/500 - loss 0.21525708 - samples/sec: 28.40 - lr: 0.000005
2025-03-09 02:24:32,998 epoch 2 - iter 250/500 - loss 0.20673071 - samples/sec: 28.45 - lr: 0.000005
2025-03-09 02:24:40,848 epoch 2 - iter 300/500 - loss 0.20698943 - samples/sec: 26.68 - lr: 0.000005
2025-03-09 02:24:48,564 epoch 2 - iter 350/500 - loss 0.20291576 - samples/sec: 27.14 - lr: 0.000005
2025-03-09 02:24:56,790 epoch 2 - iter 400/500 - loss 0.19966468 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.56it/s]

2025-03-09 02:25:17,527 Evaluating as a multi-label problem: False
2025-03-09 02:25:17,535 DEV : loss 0.16280719637870789 - f1-score (micro avg)  0.716





2025-03-09 02:25:18,082 BAD EPOCHS (no improvement): 4
2025-03-09 02:25:23,279 saving best model
2025-03-09 02:25:28,227 ----------------------------------------------------------------------------------------------------
2025-03-09 02:25:35,532 epoch 3 - iter 50/500 - loss 0.11662549 - samples/sec: 27.81 - lr: 0.000004
2025-03-09 02:25:42,956 epoch 3 - iter 100/500 - loss 0.11810268 - samples/sec: 28.25 - lr: 0.000004
2025-03-09 02:25:50,547 epoch 3 - iter 150/500 - loss 0.11373375 - samples/sec: 27.59 - lr: 0.000004
2025-03-09 02:25:58,200 epoch 3 - iter 200/500 - loss 0.11667737 - samples/sec: 27.36 - lr: 0.000004
2025-03-09 02:26:06,084 epoch 3 - iter 250/500 - loss 0.11218897 - samples/sec: 26.55 - lr: 0.000004
2025-03-09 02:26:14,257 epoch 3 - iter 300/500 - loss 0.10798135 - samples/sec: 25.58 - lr: 0.000004
2025-03-09 02:26:21,813 epoch 3 - iter 350/500 - loss 0.10695509 - samples/sec: 27.77 - lr: 0.000004
2025-03-09 02:26:29,273 epoch 3 - iter 400/500 - loss 0.10554906 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.64it/s]


2025-03-09 02:26:50,600 Evaluating as a multi-label problem: False
2025-03-09 02:26:50,608 DEV : loss 0.1445467472076416 - f1-score (micro avg)  0.834
2025-03-09 02:26:51,187 BAD EPOCHS (no improvement): 4
2025-03-09 02:26:56,323 saving best model
2025-03-09 02:27:01,183 ----------------------------------------------------------------------------------------------------
2025-03-09 02:27:08,479 epoch 4 - iter 50/500 - loss 0.05262766 - samples/sec: 27.83 - lr: 0.000004
2025-03-09 02:27:16,155 epoch 4 - iter 100/500 - loss 0.05277209 - samples/sec: 27.85 - lr: 0.000004
2025-03-09 02:27:23,745 epoch 4 - iter 150/500 - loss 0.04997096 - samples/sec: 27.64 - lr: 0.000004
2025-03-09 02:27:31,131 epoch 4 - iter 200/500 - loss 0.05501050 - samples/sec: 28.43 - lr: 0.000004
2025-03-09 02:27:38,587 epoch 4 - iter 250/500 - loss 0.06462547 - samples/sec: 28.11 - lr: 0.000004
2025-03-09 02:27:46,035 epoch 4 - iter 300/500 - loss 0.06255679 - samples/sec: 28.18 - lr: 0.000004
2025-03-09 02:27:53,43

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.59it/s]

2025-03-09 02:28:22,732 Evaluating as a multi-label problem: False
2025-03-09 02:28:22,739 DEV : loss 0.17341069877147675 - f1-score (micro avg)  0.86





2025-03-09 02:28:23,003 BAD EPOCHS (no improvement): 4
2025-03-09 02:28:27,912 saving best model
2025-03-09 02:28:33,252 ----------------------------------------------------------------------------------------------------
2025-03-09 02:28:40,803 epoch 5 - iter 50/500 - loss 0.04325542 - samples/sec: 26.87 - lr: 0.000003
2025-03-09 02:28:48,584 epoch 5 - iter 100/500 - loss 0.03133554 - samples/sec: 26.90 - lr: 0.000003
2025-03-09 02:28:56,184 epoch 5 - iter 150/500 - loss 0.03336327 - samples/sec: 27.58 - lr: 0.000003
2025-03-09 02:29:03,706 epoch 5 - iter 200/500 - loss 0.03486111 - samples/sec: 27.89 - lr: 0.000003
2025-03-09 02:29:11,577 epoch 5 - iter 250/500 - loss 0.03647746 - samples/sec: 26.60 - lr: 0.000003
2025-03-09 02:29:19,096 epoch 5 - iter 300/500 - loss 0.03506222 - samples/sec: 27.89 - lr: 0.000003
2025-03-09 02:29:27,155 epoch 5 - iter 350/500 - loss 0.03372428 - samples/sec: 25.95 - lr: 0.000003
2025-03-09 02:29:35,057 epoch 5 - iter 400/500 - loss 0.03202661 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.67it/s]

2025-03-09 02:29:56,034 Evaluating as a multi-label problem: False
2025-03-09 02:29:56,041 DEV : loss 0.23496483266353607 - f1-score (micro avg)  0.844





2025-03-09 02:29:56,304 BAD EPOCHS (no improvement): 4
2025-03-09 02:30:01,130 ----------------------------------------------------------------------------------------------------
2025-03-09 02:30:08,472 epoch 6 - iter 50/500 - loss 0.03282336 - samples/sec: 27.65 - lr: 0.000003
2025-03-09 02:30:16,406 epoch 6 - iter 100/500 - loss 0.02110225 - samples/sec: 26.39 - lr: 0.000003
2025-03-09 02:30:24,209 epoch 6 - iter 150/500 - loss 0.01633555 - samples/sec: 26.84 - lr: 0.000003
2025-03-09 02:30:32,034 epoch 6 - iter 200/500 - loss 0.01972059 - samples/sec: 26.76 - lr: 0.000003
2025-03-09 02:30:39,937 epoch 6 - iter 250/500 - loss 0.01845259 - samples/sec: 26.49 - lr: 0.000003
2025-03-09 02:30:47,107 epoch 6 - iter 300/500 - loss 0.01787693 - samples/sec: 29.31 - lr: 0.000002
2025-03-09 02:30:54,948 epoch 6 - iter 350/500 - loss 0.01795150 - samples/sec: 26.71 - lr: 0.000002
2025-03-09 02:31:03,043 epoch 6 - iter 400/500 - loss 0.01962907 - samples/sec: 25.84 - lr: 0.000002
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.63it/s]

2025-03-09 02:31:23,848 Evaluating as a multi-label problem: False
2025-03-09 02:31:23,857 DEV : loss 0.21690240502357483 - f1-score (micro avg)  0.846





2025-03-09 02:31:24,406 BAD EPOCHS (no improvement): 4
2025-03-09 02:31:29,337 ----------------------------------------------------------------------------------------------------
2025-03-09 02:31:36,697 epoch 7 - iter 50/500 - loss 0.00211856 - samples/sec: 27.58 - lr: 0.000002
2025-03-09 02:31:44,251 epoch 7 - iter 100/500 - loss 0.00854347 - samples/sec: 27.78 - lr: 0.000002
2025-03-09 02:31:51,488 epoch 7 - iter 150/500 - loss 0.01382980 - samples/sec: 29.03 - lr: 0.000002
2025-03-09 02:31:58,677 epoch 7 - iter 200/500 - loss 0.01436456 - samples/sec: 29.25 - lr: 0.000002
2025-03-09 02:32:06,361 epoch 7 - iter 250/500 - loss 0.01407075 - samples/sec: 27.27 - lr: 0.000002
2025-03-09 02:32:14,472 epoch 7 - iter 300/500 - loss 0.01433560 - samples/sec: 25.77 - lr: 0.000002
2025-03-09 02:32:22,001 epoch 7 - iter 350/500 - loss 0.01249979 - samples/sec: 27.86 - lr: 0.000002
2025-03-09 02:32:29,696 epoch 7 - iter 400/500 - loss 0.01425931 - samples/sec: 27.29 - lr: 0.000002
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.53it/s]

2025-03-09 02:32:50,710 Evaluating as a multi-label problem: False
2025-03-09 02:32:50,718 DEV : loss 0.24917937815189362 - f1-score (micro avg)  0.852





2025-03-09 02:32:51,315 BAD EPOCHS (no improvement): 4
2025-03-09 02:32:56,241 ----------------------------------------------------------------------------------------------------
2025-03-09 02:33:03,492 epoch 8 - iter 50/500 - loss 0.00407805 - samples/sec: 28.00 - lr: 0.000002
2025-03-09 02:33:10,741 epoch 8 - iter 100/500 - loss 0.00651307 - samples/sec: 28.97 - lr: 0.000002
2025-03-09 02:33:18,674 epoch 8 - iter 150/500 - loss 0.00531536 - samples/sec: 26.39 - lr: 0.000002
2025-03-09 02:33:26,462 epoch 8 - iter 200/500 - loss 0.00657285 - samples/sec: 26.91 - lr: 0.000001
2025-03-09 02:33:34,156 epoch 8 - iter 250/500 - loss 0.00578564 - samples/sec: 27.24 - lr: 0.000001
2025-03-09 02:33:41,824 epoch 8 - iter 300/500 - loss 0.00491030 - samples/sec: 27.32 - lr: 0.000001
2025-03-09 02:33:49,548 epoch 8 - iter 350/500 - loss 0.00474843 - samples/sec: 27.12 - lr: 0.000001
2025-03-09 02:33:57,398 epoch 8 - iter 400/500 - loss 0.00536032 - samples/sec: 26.74 - lr: 0.000001
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.62it/s]

2025-03-09 02:34:18,484 Evaluating as a multi-label problem: False
2025-03-09 02:34:18,492 DEV : loss 0.28951847553253174 - f1-score (micro avg)  0.852





2025-03-09 02:34:19,077 BAD EPOCHS (no improvement): 4
2025-03-09 02:34:24,022 ----------------------------------------------------------------------------------------------------
2025-03-09 02:34:31,614 epoch 9 - iter 50/500 - loss 0.01436708 - samples/sec: 26.73 - lr: 0.000001
2025-03-09 02:34:39,586 epoch 9 - iter 100/500 - loss 0.00728044 - samples/sec: 26.25 - lr: 0.000001
2025-03-09 02:34:46,942 epoch 9 - iter 150/500 - loss 0.00506873 - samples/sec: 28.53 - lr: 0.000001
2025-03-09 02:34:54,459 epoch 9 - iter 200/500 - loss 0.00524028 - samples/sec: 27.88 - lr: 0.000001
2025-03-09 02:35:02,129 epoch 9 - iter 250/500 - loss 0.00452530 - samples/sec: 27.32 - lr: 0.000001
2025-03-09 02:35:09,352 epoch 9 - iter 300/500 - loss 0.00491091 - samples/sec: 29.08 - lr: 0.000001
2025-03-09 02:35:16,587 epoch 9 - iter 350/500 - loss 0.00503962 - samples/sec: 29.02 - lr: 0.000001
2025-03-09 02:35:24,340 epoch 9 - iter 400/500 - loss 0.00551101 - samples/sec: 27.00 - lr: 0.000001
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.50it/s]

2025-03-09 02:35:45,941 Evaluating as a multi-label problem: False
2025-03-09 02:35:45,949 DEV : loss 0.2851616442203522 - f1-score (micro avg)  0.856





2025-03-09 02:35:46,209 BAD EPOCHS (no improvement): 4
2025-03-09 02:35:51,225 ----------------------------------------------------------------------------------------------------
2025-03-09 02:35:58,180 epoch 10 - iter 50/500 - loss 0.00117513 - samples/sec: 29.19 - lr: 0.000001
2025-03-09 02:36:06,320 epoch 10 - iter 100/500 - loss 0.00063555 - samples/sec: 25.80 - lr: 0.000000
2025-03-09 02:36:13,909 epoch 10 - iter 150/500 - loss 0.00048373 - samples/sec: 27.63 - lr: 0.000000
2025-03-09 02:36:21,564 epoch 10 - iter 200/500 - loss 0.00054199 - samples/sec: 27.38 - lr: 0.000000
2025-03-09 02:36:29,927 epoch 10 - iter 250/500 - loss 0.00051051 - samples/sec: 24.95 - lr: 0.000000
2025-03-09 02:36:37,277 epoch 10 - iter 300/500 - loss 0.00054026 - samples/sec: 28.54 - lr: 0.000000
2025-03-09 02:36:44,581 epoch 10 - iter 350/500 - loss 0.00176500 - samples/sec: 28.74 - lr: 0.000000
2025-03-09 02:36:52,915 epoch 10 - iter 400/500 - loss 0.00162352 - samples/sec: 25.15 - lr: 0.000000
2025-

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.63it/s]

2025-03-09 02:37:13,844 Evaluating as a multi-label problem: False
2025-03-09 02:37:13,852 DEV : loss 0.29346781969070435 - f1-score (micro avg)  0.854





2025-03-09 02:37:14,116 BAD EPOCHS (no improvement): 4
2025-03-09 02:37:19,011 ----------------------------------------------------------------------------------------------------
2025-03-09 02:37:19,013 loading file logs/PDSC_V2/GELECTRA/G29B/3K/k2/best-model.pt


100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.31it/s]

2025-03-09 02:37:37,071 Evaluating as a multi-label problem: False
2025-03-09 02:37:37,078 0.848	0.848	0.848	0.848
2025-03-09 02:37:37,079 
Results:
- F-score (micro) 0.848
- F-score (macro) 0.8479
- Accuracy 0.848

By class:
              precision    recall  f1-score   support

        ORIG     0.8346    0.8680    0.8510       250
       PSEUD     0.8625    0.8280    0.8449       250

    accuracy                         0.8480       500
   macro avg     0.8486    0.8480    0.8479       500
weighted avg     0.8486    0.8480    0.8479       500

2025-03-09 02:37:37,079 ----------------------------------------------------------------------------------------------------





2025-03-09 02:37:37,157 Reading data from tmp_data_dir
2025-03-09 02:37:37,158 Train: tmp_data_dir/train.csv
2025-03-09 02:37:37,158 Dev: tmp_data_dir/dev.csv
2025-03-09 02:37:37,158 Test: tmp_data_dir/test.csv
2025-03-09 02:37:37,171 Computing label dictionary. Progress:


2000it [00:01, 1458.52it/s]

2025-03-09 02:37:38,544 Dictionary created for label 'label' with 3 values: ORIG (seen 1000 times), PSEUD (seen 1000 times)



  return torch.load(checkpoint_file, map_location="cpu")


2025-03-09 02:37:44,860 tensorboard logging path is logs/PDSC_V2/GELECTRA/G29B/3K/k3
2025-03-09 02:37:44,867 ----------------------------------------------------------------------------------------------------
2025-03-09 02:37:44,868 Model: "TextClassifier(
  (decoder): Linear(in_features=1024, out_features=3, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
  (locked_dropout): LockedDropout(p=0.0)
  (word_dropout): WordDropout(p=0.0)
  (loss_function): CrossEntropyLoss()
  (document_embeddings): TransformerDocumentEmbeddings(
    (model): ElectraModel(
      (embeddings): ElectraEmbeddings(
        (word_embeddings): Embedding(31102, 1024, padding_idx=0)
        (position_embeddings): Embedding(512, 1024)
        (token_type_embeddings): Embedding(2, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): ElectraEncoder(
        (layer): ModuleList(
          (0-23): 24 x ElectraLa

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.73it/s]

2025-03-09 02:39:07,546 Evaluating as a multi-label problem: False
2025-03-09 02:39:07,554 DEV : loss 1.1799248456954956 - f1-score (micro avg)  0.5





2025-03-09 02:39:08,094 BAD EPOCHS (no improvement): 4
2025-03-09 02:39:13,213 saving best model
2025-03-09 02:39:20,040 ----------------------------------------------------------------------------------------------------
2025-03-09 02:39:27,585 epoch 2 - iter 50/500 - loss 0.27911917 - samples/sec: 26.89 - lr: 0.000005
2025-03-09 02:39:35,274 epoch 2 - iter 100/500 - loss 0.24009130 - samples/sec: 27.26 - lr: 0.000005
2025-03-09 02:39:43,210 epoch 2 - iter 150/500 - loss 0.22666732 - samples/sec: 26.35 - lr: 0.000005
2025-03-09 02:39:50,597 epoch 2 - iter 200/500 - loss 0.21635227 - samples/sec: 28.43 - lr: 0.000005
2025-03-09 02:39:58,516 epoch 2 - iter 250/500 - loss 0.21134198 - samples/sec: 26.43 - lr: 0.000005
2025-03-09 02:40:06,521 epoch 2 - iter 300/500 - loss 0.20629209 - samples/sec: 26.12 - lr: 0.000005
2025-03-09 02:40:14,185 epoch 2 - iter 350/500 - loss 0.20162346 - samples/sec: 27.33 - lr: 0.000005
2025-03-09 02:40:21,338 epoch 2 - iter 400/500 - loss 0.19967882 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.76it/s]

2025-03-09 02:40:43,106 Evaluating as a multi-label problem: False
2025-03-09 02:40:43,113 DEV : loss 0.1740008145570755 - f1-score (micro avg)  0.504





2025-03-09 02:40:43,376 BAD EPOCHS (no improvement): 4
2025-03-09 02:40:48,248 saving best model
2025-03-09 02:40:53,111 ----------------------------------------------------------------------------------------------------
2025-03-09 02:41:00,281 epoch 3 - iter 50/500 - loss 0.18399057 - samples/sec: 28.33 - lr: 0.000004
2025-03-09 02:41:07,878 epoch 3 - iter 100/500 - loss 0.18161125 - samples/sec: 27.58 - lr: 0.000004
2025-03-09 02:41:15,695 epoch 3 - iter 150/500 - loss 0.18087672 - samples/sec: 26.77 - lr: 0.000004
2025-03-09 02:41:23,371 epoch 3 - iter 200/500 - loss 0.18029465 - samples/sec: 27.31 - lr: 0.000004
2025-03-09 02:41:31,303 epoch 3 - iter 250/500 - loss 0.17971759 - samples/sec: 26.40 - lr: 0.000004
2025-03-09 02:41:38,984 epoch 3 - iter 300/500 - loss 0.17966611 - samples/sec: 27.27 - lr: 0.000004
2025-03-09 02:41:46,848 epoch 3 - iter 350/500 - loss 0.17925026 - samples/sec: 26.60 - lr: 0.000004
2025-03-09 02:41:54,862 epoch 3 - iter 400/500 - loss 0.17949082 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.80it/s]

2025-03-09 02:42:16,396 Evaluating as a multi-label problem: False
2025-03-09 02:42:16,403 DEV : loss 0.1745978593826294 - f1-score (micro avg)  0.5





2025-03-09 02:42:16,669 BAD EPOCHS (no improvement): 4
2025-03-09 02:42:21,547 ----------------------------------------------------------------------------------------------------
2025-03-09 02:42:29,212 epoch 4 - iter 50/500 - loss 0.17951241 - samples/sec: 26.47 - lr: 0.000004
2025-03-09 02:42:36,523 epoch 4 - iter 100/500 - loss 0.17799640 - samples/sec: 28.73 - lr: 0.000004
2025-03-09 02:42:44,193 epoch 4 - iter 150/500 - loss 0.17853765 - samples/sec: 27.33 - lr: 0.000004
2025-03-09 02:42:52,221 epoch 4 - iter 200/500 - loss 0.17858554 - samples/sec: 26.07 - lr: 0.000004
2025-03-09 02:43:00,463 epoch 4 - iter 250/500 - loss 0.17815321 - samples/sec: 26.34 - lr: 0.000004
2025-03-09 02:43:08,400 epoch 4 - iter 300/500 - loss 0.17766944 - samples/sec: 26.38 - lr: 0.000004
2025-03-09 02:43:15,991 epoch 4 - iter 350/500 - loss 0.17784225 - samples/sec: 27.63 - lr: 0.000004
2025-03-09 02:43:23,597 epoch 4 - iter 400/500 - loss 0.17758685 - samples/sec: 27.56 - lr: 0.000003
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.84it/s]

2025-03-09 02:43:44,643 Evaluating as a multi-label problem: False
2025-03-09 02:43:44,650 DEV : loss 0.1736781746149063 - f1-score (micro avg)  0.496





2025-03-09 02:43:44,914 BAD EPOCHS (no improvement): 4
2025-03-09 02:43:49,821 ----------------------------------------------------------------------------------------------------
2025-03-09 02:43:57,361 epoch 5 - iter 50/500 - loss 0.17597067 - samples/sec: 26.92 - lr: 0.000003
2025-03-09 02:44:05,274 epoch 5 - iter 100/500 - loss 0.17521727 - samples/sec: 27.53 - lr: 0.000003
2025-03-09 02:44:12,674 epoch 5 - iter 150/500 - loss 0.17418397 - samples/sec: 28.34 - lr: 0.000003
2025-03-09 02:44:20,460 epoch 5 - iter 200/500 - loss 0.17493666 - samples/sec: 26.89 - lr: 0.000003
2025-03-09 02:44:28,175 epoch 5 - iter 250/500 - loss 0.17529862 - samples/sec: 27.19 - lr: 0.000003
2025-03-09 02:44:35,916 epoch 5 - iter 300/500 - loss 0.17490072 - samples/sec: 27.07 - lr: 0.000003
2025-03-09 02:44:44,080 epoch 5 - iter 350/500 - loss 0.17499036 - samples/sec: 25.62 - lr: 0.000003
2025-03-09 02:44:51,704 epoch 5 - iter 400/500 - loss 0.17489844 - samples/sec: 27.53 - lr: 0.000003
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.85it/s]

2025-03-09 02:45:12,540 Evaluating as a multi-label problem: False
2025-03-09 02:45:12,547 DEV : loss 0.17425857484340668 - f1-score (micro avg)  0.496





2025-03-09 02:45:13,093 BAD EPOCHS (no improvement): 4
2025-03-09 02:45:17,985 ----------------------------------------------------------------------------------------------------
2025-03-09 02:45:25,724 epoch 6 - iter 50/500 - loss 0.17564812 - samples/sec: 26.24 - lr: 0.000003
2025-03-09 02:45:33,182 epoch 6 - iter 100/500 - loss 0.17636477 - samples/sec: 28.13 - lr: 0.000003
2025-03-09 02:45:40,647 epoch 6 - iter 150/500 - loss 0.17694867 - samples/sec: 28.09 - lr: 0.000003
2025-03-09 02:45:48,319 epoch 6 - iter 200/500 - loss 0.17697793 - samples/sec: 27.30 - lr: 0.000003
2025-03-09 02:45:55,806 epoch 6 - iter 250/500 - loss 0.17646609 - samples/sec: 28.00 - lr: 0.000003
2025-03-09 02:46:03,838 epoch 6 - iter 300/500 - loss 0.17570505 - samples/sec: 26.08 - lr: 0.000002
2025-03-09 02:46:11,431 epoch 6 - iter 350/500 - loss 0.17614616 - samples/sec: 27.60 - lr: 0.000002
2025-03-09 02:46:19,158 epoch 6 - iter 400/500 - loss 0.17655845 - samples/sec: 27.13 - lr: 0.000002
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.86it/s]

2025-03-09 02:46:40,800 Evaluating as a multi-label problem: False
2025-03-09 02:46:40,808 DEV : loss 0.17453239858150482 - f1-score (micro avg)  0.5





2025-03-09 02:46:41,387 BAD EPOCHS (no improvement): 4
2025-03-09 02:46:46,307 ----------------------------------------------------------------------------------------------------
2025-03-09 02:46:54,229 epoch 7 - iter 50/500 - loss 0.17531666 - samples/sec: 25.59 - lr: 0.000002
2025-03-09 02:47:02,038 epoch 7 - iter 100/500 - loss 0.17428954 - samples/sec: 26.83 - lr: 0.000002
2025-03-09 02:47:09,649 epoch 7 - iter 150/500 - loss 0.17414199 - samples/sec: 27.54 - lr: 0.000002
2025-03-09 02:47:16,851 epoch 7 - iter 200/500 - loss 0.17428005 - samples/sec: 29.16 - lr: 0.000002
2025-03-09 02:47:24,208 epoch 7 - iter 250/500 - loss 0.17411689 - samples/sec: 28.53 - lr: 0.000002
2025-03-09 02:47:32,697 epoch 7 - iter 300/500 - loss 0.17506515 - samples/sec: 24.59 - lr: 0.000002
2025-03-09 02:47:40,620 epoch 7 - iter 350/500 - loss 0.17573018 - samples/sec: 26.42 - lr: 0.000002
2025-03-09 02:47:48,247 epoch 7 - iter 400/500 - loss 0.17564821 - samples/sec: 27.45 - lr: 0.000002
2025-03-09 02

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.90it/s]


2025-03-09 02:48:09,439 Evaluating as a multi-label problem: False
2025-03-09 02:48:09,447 DEV : loss 0.1742907166481018 - f1-score (micro avg)  0.486
2025-03-09 02:48:10,022 BAD EPOCHS (no improvement): 4
2025-03-09 02:48:15,162 ----------------------------------------------------------------------------------------------------
2025-03-09 02:48:22,561 epoch 8 - iter 50/500 - loss 0.17800147 - samples/sec: 27.41 - lr: 0.000002
2025-03-09 02:48:30,318 epoch 8 - iter 100/500 - loss 0.17736662 - samples/sec: 27.00 - lr: 0.000002
2025-03-09 02:48:38,322 epoch 8 - iter 150/500 - loss 0.17609905 - samples/sec: 26.13 - lr: 0.000002
2025-03-09 02:48:46,415 epoch 8 - iter 200/500 - loss 0.17743606 - samples/sec: 25.83 - lr: 0.000001
2025-03-09 02:48:54,257 epoch 8 - iter 250/500 - loss 0.17694658 - samples/sec: 26.74 - lr: 0.000001
2025-03-09 02:49:01,792 epoch 8 - iter 300/500 - loss 0.17650981 - samples/sec: 27.81 - lr: 0.000001
2025-03-09 02:49:09,299 epoch 8 - iter 350/500 - loss 0.17624886

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.80it/s]


2025-03-09 02:49:39,079 Evaluating as a multi-label problem: False
2025-03-09 02:49:39,087 DEV : loss 0.18465004861354828 - f1-score (micro avg)  0.544
2025-03-09 02:49:39,350 BAD EPOCHS (no improvement): 4
2025-03-09 02:49:44,268 saving best model
2025-03-09 02:49:49,462 ----------------------------------------------------------------------------------------------------
2025-03-09 02:49:56,451 epoch 9 - iter 50/500 - loss 0.16537085 - samples/sec: 29.04 - lr: 0.000001
2025-03-09 02:50:04,085 epoch 9 - iter 100/500 - loss 0.16685047 - samples/sec: 27.44 - lr: 0.000001
2025-03-09 02:50:11,824 epoch 9 - iter 150/500 - loss 0.16421926 - samples/sec: 27.08 - lr: 0.000001
2025-03-09 02:50:19,769 epoch 9 - iter 200/500 - loss 0.16909809 - samples/sec: 26.32 - lr: 0.000001
2025-03-09 02:50:27,147 epoch 9 - iter 250/500 - loss 0.16809498 - samples/sec: 28.45 - lr: 0.000001
2025-03-09 02:50:35,017 epoch 9 - iter 300/500 - loss 0.16643884 - samples/sec: 26.73 - lr: 0.000001
2025-03-09 02:50:42,5

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.77it/s]


2025-03-09 02:51:12,390 Evaluating as a multi-label problem: False
2025-03-09 02:51:12,398 DEV : loss 0.20700104534626007 - f1-score (micro avg)  0.574
2025-03-09 02:51:12,657 BAD EPOCHS (no improvement): 4
2025-03-09 02:51:17,647 saving best model
2025-03-09 02:51:22,492 ----------------------------------------------------------------------------------------------------
2025-03-09 02:51:30,174 epoch 10 - iter 50/500 - loss 0.15511726 - samples/sec: 26.41 - lr: 0.000001
2025-03-09 02:51:38,512 epoch 10 - iter 100/500 - loss 0.15514675 - samples/sec: 26.07 - lr: 0.000000
2025-03-09 02:51:46,228 epoch 10 - iter 150/500 - loss 0.15115171 - samples/sec: 27.15 - lr: 0.000000
2025-03-09 02:51:53,885 epoch 10 - iter 200/500 - loss 0.14952321 - samples/sec: 27.41 - lr: 0.000000
2025-03-09 02:52:01,244 epoch 10 - iter 250/500 - loss 0.15005018 - samples/sec: 28.51 - lr: 0.000000
2025-03-09 02:52:08,751 epoch 10 - iter 300/500 - loss 0.14980345 - samples/sec: 27.92 - lr: 0.000000
2025-03-09 02:5

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 22.88it/s]


2025-03-09 02:52:45,464 Evaluating as a multi-label problem: False
2025-03-09 02:52:45,473 DEV : loss 0.2200390100479126 - f1-score (micro avg)  0.604
2025-03-09 02:52:46,025 BAD EPOCHS (no improvement): 4
2025-03-09 02:52:51,225 saving best model
2025-03-09 02:52:56,085 ----------------------------------------------------------------------------------------------------
2025-03-09 02:52:56,088 loading file logs/PDSC_V2/GELECTRA/G29B/3K/k3/best-model.pt


100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.34it/s]

2025-03-09 02:53:14,845 Evaluating as a multi-label problem: False
2025-03-09 02:53:14,852 0.626	0.626	0.626	0.626
2025-03-09 02:53:14,852 
Results:
- F-score (micro) 0.626
- F-score (macro) 0.62
- Accuracy 0.626

By class:
              precision    recall  f1-score   support

       PSEUD     0.6006    0.7520    0.6679       250
        ORIG     0.6684    0.5000    0.5721       250

    accuracy                         0.6260       500
   macro avg     0.6345    0.6260    0.6200       500
weighted avg     0.6345    0.6260    0.6200       500

2025-03-09 02:53:14,853 ----------------------------------------------------------------------------------------------------





2025-03-09 02:53:14,930 Reading data from tmp_data_dir
2025-03-09 02:53:14,930 Train: tmp_data_dir/train.csv
2025-03-09 02:53:14,930 Dev: tmp_data_dir/dev.csv
2025-03-09 02:53:14,931 Test: tmp_data_dir/test.csv
2025-03-09 02:53:14,944 Computing label dictionary. Progress:


2000it [00:01, 1957.14it/s]

2025-03-09 02:53:15,967 Dictionary created for label 'label' with 3 values: ORIG (seen 1000 times), PSEUD (seen 1000 times)



  return torch.load(checkpoint_file, map_location="cpu")


2025-03-09 02:53:23,950 tensorboard logging path is logs/PDSC_V2/GELECTRA/G29B/3K/k4
2025-03-09 02:53:24,402 ----------------------------------------------------------------------------------------------------
2025-03-09 02:53:24,404 Model: "TextClassifier(
  (decoder): Linear(in_features=1024, out_features=3, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
  (locked_dropout): LockedDropout(p=0.0)
  (word_dropout): WordDropout(p=0.0)
  (loss_function): CrossEntropyLoss()
  (document_embeddings): TransformerDocumentEmbeddings(
    (model): ElectraModel(
      (embeddings): ElectraEmbeddings(
        (word_embeddings): Embedding(31102, 1024, padding_idx=0)
        (position_embeddings): Embedding(512, 1024)
        (token_type_embeddings): Embedding(2, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): ElectraEncoder(
        (layer): ModuleList(
          (0-23): 24 x ElectraLa

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.90it/s]

2025-03-09 02:54:46,228 Evaluating as a multi-label problem: False
2025-03-09 02:54:46,235 DEV : loss 1.0993297100067139 - f1-score (micro avg)  0.5





2025-03-09 02:54:46,503 BAD EPOCHS (no improvement): 4
2025-03-09 02:54:52,104 saving best model
2025-03-09 02:54:59,226 ----------------------------------------------------------------------------------------------------
2025-03-09 02:55:06,804 epoch 2 - iter 50/500 - loss 0.34014958 - samples/sec: 26.78 - lr: 0.000005
2025-03-09 02:55:14,551 epoch 2 - iter 100/500 - loss 0.26411686 - samples/sec: 27.05 - lr: 0.000005
2025-03-09 02:55:22,478 epoch 2 - iter 150/500 - loss 0.23605682 - samples/sec: 26.40 - lr: 0.000005
2025-03-09 02:55:30,516 epoch 2 - iter 200/500 - loss 0.21925927 - samples/sec: 26.02 - lr: 0.000005
2025-03-09 02:55:38,175 epoch 2 - iter 250/500 - loss 0.20856539 - samples/sec: 27.37 - lr: 0.000005
2025-03-09 02:55:45,802 epoch 2 - iter 300/500 - loss 0.19826574 - samples/sec: 27.50 - lr: 0.000005
2025-03-09 02:55:53,460 epoch 2 - iter 350/500 - loss 0.19106973 - samples/sec: 27.36 - lr: 0.000005
2025-03-09 02:56:00,991 epoch 2 - iter 400/500 - loss 0.18191342 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.85it/s]

2025-03-09 02:56:21,831 Evaluating as a multi-label problem: False
2025-03-09 02:56:21,838 DEV : loss 0.12635788321495056 - f1-score (micro avg)  0.776





2025-03-09 02:56:22,111 BAD EPOCHS (no improvement): 4
2025-03-09 02:56:27,177 saving best model
2025-03-09 02:56:32,193 ----------------------------------------------------------------------------------------------------
2025-03-09 02:56:40,104 epoch 3 - iter 50/500 - loss 0.08248246 - samples/sec: 25.63 - lr: 0.000004
2025-03-09 02:56:47,456 epoch 3 - iter 100/500 - loss 0.09465222 - samples/sec: 28.58 - lr: 0.000004
2025-03-09 02:56:55,601 epoch 3 - iter 150/500 - loss 0.09759991 - samples/sec: 25.68 - lr: 0.000004
2025-03-09 02:57:03,193 epoch 3 - iter 200/500 - loss 0.10460970 - samples/sec: 27.64 - lr: 0.000004
2025-03-09 02:57:10,596 epoch 3 - iter 250/500 - loss 0.10144229 - samples/sec: 28.34 - lr: 0.000004
2025-03-09 02:57:18,090 epoch 3 - iter 300/500 - loss 0.10006553 - samples/sec: 27.99 - lr: 0.000004
2025-03-09 02:57:25,459 epoch 3 - iter 350/500 - loss 0.09616156 - samples/sec: 28.47 - lr: 0.000004
2025-03-09 02:57:33,220 epoch 3 - iter 400/500 - loss 0.09915457 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.89it/s]

2025-03-09 02:57:54,286 Evaluating as a multi-label problem: False
2025-03-09 02:57:54,294 DEV : loss 0.16065356135368347 - f1-score (micro avg)  0.804





2025-03-09 02:57:54,850 BAD EPOCHS (no improvement): 4
2025-03-09 02:57:59,754 saving best model
2025-03-09 02:58:04,624 ----------------------------------------------------------------------------------------------------
2025-03-09 02:58:11,730 epoch 4 - iter 50/500 - loss 0.06264683 - samples/sec: 28.57 - lr: 0.000004
2025-03-09 02:58:18,843 epoch 4 - iter 100/500 - loss 0.05155066 - samples/sec: 29.55 - lr: 0.000004
2025-03-09 02:58:26,386 epoch 4 - iter 150/500 - loss 0.04769386 - samples/sec: 27.81 - lr: 0.000004
2025-03-09 02:58:34,359 epoch 4 - iter 200/500 - loss 0.05228512 - samples/sec: 26.25 - lr: 0.000004
2025-03-09 02:58:41,493 epoch 4 - iter 250/500 - loss 0.05275952 - samples/sec: 29.48 - lr: 0.000004
2025-03-09 02:58:49,245 epoch 4 - iter 300/500 - loss 0.04846530 - samples/sec: 26.99 - lr: 0.000004
2025-03-09 02:58:56,978 epoch 4 - iter 350/500 - loss 0.04965688 - samples/sec: 27.10 - lr: 0.000004
2025-03-09 02:59:04,945 epoch 4 - iter 400/500 - loss 0.05132355 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.90it/s]

2025-03-09 02:59:26,643 Evaluating as a multi-label problem: False
2025-03-09 02:59:26,651 DEV : loss 0.21514379978179932 - f1-score (micro avg)  0.794





2025-03-09 02:59:27,245 BAD EPOCHS (no improvement): 4
2025-03-09 02:59:32,153 ----------------------------------------------------------------------------------------------------
2025-03-09 02:59:39,331 epoch 5 - iter 50/500 - loss 0.02274053 - samples/sec: 28.29 - lr: 0.000003
2025-03-09 02:59:46,652 epoch 5 - iter 100/500 - loss 0.01475410 - samples/sec: 28.70 - lr: 0.000003
2025-03-09 02:59:54,019 epoch 5 - iter 150/500 - loss 0.01952765 - samples/sec: 28.50 - lr: 0.000003
2025-03-09 03:00:01,757 epoch 5 - iter 200/500 - loss 0.01898554 - samples/sec: 27.08 - lr: 0.000003
2025-03-09 03:00:09,357 epoch 5 - iter 250/500 - loss 0.02443609 - samples/sec: 27.61 - lr: 0.000003
2025-03-09 03:00:17,526 epoch 5 - iter 300/500 - loss 0.02998129 - samples/sec: 25.59 - lr: 0.000003
2025-03-09 03:00:24,764 epoch 5 - iter 350/500 - loss 0.02992507 - samples/sec: 29.01 - lr: 0.000003
2025-03-09 03:00:32,159 epoch 5 - iter 400/500 - loss 0.03066500 - samples/sec: 28.43 - lr: 0.000003
2025-03-09 03

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.80it/s]

2025-03-09 03:00:53,770 Evaluating as a multi-label problem: False
2025-03-09 03:00:53,778 DEV : loss 0.2804914712905884 - f1-score (micro avg)  0.794





2025-03-09 03:00:54,048 BAD EPOCHS (no improvement): 4
2025-03-09 03:00:58,950 ----------------------------------------------------------------------------------------------------
2025-03-09 03:01:06,417 epoch 6 - iter 50/500 - loss 0.02168673 - samples/sec: 27.18 - lr: 0.000003
2025-03-09 03:01:13,921 epoch 6 - iter 100/500 - loss 0.01543846 - samples/sec: 27.97 - lr: 0.000003
2025-03-09 03:01:21,315 epoch 6 - iter 150/500 - loss 0.01283153 - samples/sec: 28.40 - lr: 0.000003
2025-03-09 03:01:28,693 epoch 6 - iter 200/500 - loss 0.01552602 - samples/sec: 28.47 - lr: 0.000003
2025-03-09 03:01:36,173 epoch 6 - iter 250/500 - loss 0.01463585 - samples/sec: 28.04 - lr: 0.000003
2025-03-09 03:01:43,411 epoch 6 - iter 300/500 - loss 0.01416336 - samples/sec: 29.01 - lr: 0.000002
2025-03-09 03:01:51,297 epoch 6 - iter 350/500 - loss 0.01349876 - samples/sec: 26.53 - lr: 0.000002
2025-03-09 03:01:59,213 epoch 6 - iter 400/500 - loss 0.01451000 - samples/sec: 26.45 - lr: 0.000002
2025-03-09 03

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.87it/s]

2025-03-09 03:02:20,095 Evaluating as a multi-label problem: False
2025-03-09 03:02:20,103 DEV : loss 0.29091155529022217 - f1-score (micro avg)  0.81





2025-03-09 03:02:20,372 BAD EPOCHS (no improvement): 4
2025-03-09 03:02:25,263 saving best model
2025-03-09 03:02:30,315 ----------------------------------------------------------------------------------------------------
2025-03-09 03:02:37,299 epoch 7 - iter 50/500 - loss 0.00343085 - samples/sec: 29.08 - lr: 0.000002
2025-03-09 03:02:44,757 epoch 7 - iter 100/500 - loss 0.00726025 - samples/sec: 28.15 - lr: 0.000002
2025-03-09 03:02:52,408 epoch 7 - iter 150/500 - loss 0.00956498 - samples/sec: 27.40 - lr: 0.000002
2025-03-09 03:03:00,402 epoch 7 - iter 200/500 - loss 0.00976475 - samples/sec: 26.17 - lr: 0.000002
2025-03-09 03:03:08,265 epoch 7 - iter 250/500 - loss 0.00916973 - samples/sec: 26.61 - lr: 0.000002
2025-03-09 03:03:16,207 epoch 7 - iter 300/500 - loss 0.00963570 - samples/sec: 26.36 - lr: 0.000002
2025-03-09 03:03:23,877 epoch 7 - iter 350/500 - loss 0.00959552 - samples/sec: 27.31 - lr: 0.000002
2025-03-09 03:03:31,749 epoch 7 - iter 400/500 - loss 0.01062457 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.85it/s]

2025-03-09 03:03:53,053 Evaluating as a multi-label problem: False
2025-03-09 03:03:53,060 DEV : loss 0.3562854826450348 - f1-score (micro avg)  0.796





2025-03-09 03:03:53,333 BAD EPOCHS (no improvement): 4
2025-03-09 03:03:58,225 ----------------------------------------------------------------------------------------------------
2025-03-09 03:04:05,997 epoch 8 - iter 50/500 - loss 0.00055276 - samples/sec: 26.10 - lr: 0.000002
2025-03-09 03:04:13,909 epoch 8 - iter 100/500 - loss 0.00109424 - samples/sec: 26.43 - lr: 0.000002
2025-03-09 03:04:21,521 epoch 8 - iter 150/500 - loss 0.00281292 - samples/sec: 27.55 - lr: 0.000002
2025-03-09 03:04:29,017 epoch 8 - iter 200/500 - loss 0.00321801 - samples/sec: 27.97 - lr: 0.000001
2025-03-09 03:04:36,173 epoch 8 - iter 250/500 - loss 0.00278502 - samples/sec: 29.36 - lr: 0.000001
2025-03-09 03:04:43,696 epoch 8 - iter 300/500 - loss 0.00358684 - samples/sec: 27.88 - lr: 0.000001
2025-03-09 03:04:51,890 epoch 8 - iter 350/500 - loss 0.00391596 - samples/sec: 25.50 - lr: 0.000001
2025-03-09 03:04:59,653 epoch 8 - iter 400/500 - loss 0.00348177 - samples/sec: 26.97 - lr: 0.000001
2025-03-09 03

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.83it/s]

2025-03-09 03:05:20,607 Evaluating as a multi-label problem: False





2025-03-09 03:05:20,615 DEV : loss 0.3330846428871155 - f1-score (micro avg)  0.808
2025-03-09 03:05:21,173 BAD EPOCHS (no improvement): 4
2025-03-09 03:05:26,096 ----------------------------------------------------------------------------------------------------
2025-03-09 03:05:33,678 epoch 9 - iter 50/500 - loss 0.00494890 - samples/sec: 26.78 - lr: 0.000001
2025-03-09 03:05:41,315 epoch 9 - iter 100/500 - loss 0.00415358 - samples/sec: 27.49 - lr: 0.000001
2025-03-09 03:05:49,038 epoch 9 - iter 150/500 - loss 0.00381157 - samples/sec: 27.13 - lr: 0.000001
2025-03-09 03:05:56,199 epoch 9 - iter 200/500 - loss 0.00460687 - samples/sec: 29.33 - lr: 0.000001
2025-03-09 03:06:04,044 epoch 9 - iter 250/500 - loss 0.00377010 - samples/sec: 26.71 - lr: 0.000001
2025-03-09 03:06:11,630 epoch 9 - iter 300/500 - loss 0.00317406 - samples/sec: 27.62 - lr: 0.000001
2025-03-09 03:06:18,987 epoch 9 - iter 350/500 - loss 0.00303888 - samples/sec: 28.55 - lr: 0.000001
2025-03-09 03:06:26,808 epoch 

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.77it/s]

2025-03-09 03:06:48,235 Evaluating as a multi-label problem: False
2025-03-09 03:06:48,243 DEV : loss 0.36564967036247253 - f1-score (micro avg)  0.806





2025-03-09 03:06:48,831 BAD EPOCHS (no improvement): 4
2025-03-09 03:06:53,746 ----------------------------------------------------------------------------------------------------
2025-03-09 03:07:01,320 epoch 10 - iter 50/500 - loss 0.00024576 - samples/sec: 26.81 - lr: 0.000001
2025-03-09 03:07:09,037 epoch 10 - iter 100/500 - loss 0.00020678 - samples/sec: 27.14 - lr: 0.000000
2025-03-09 03:07:16,755 epoch 10 - iter 150/500 - loss 0.00165061 - samples/sec: 27.15 - lr: 0.000000
2025-03-09 03:07:24,331 epoch 10 - iter 200/500 - loss 0.00130773 - samples/sec: 27.67 - lr: 0.000000
2025-03-09 03:07:31,655 epoch 10 - iter 250/500 - loss 0.00201104 - samples/sec: 28.65 - lr: 0.000000
2025-03-09 03:07:39,632 epoch 10 - iter 300/500 - loss 0.00184530 - samples/sec: 26.25 - lr: 0.000000
2025-03-09 03:07:47,210 epoch 10 - iter 350/500 - loss 0.00280645 - samples/sec: 27.65 - lr: 0.000000
2025-03-09 03:07:54,703 epoch 10 - iter 400/500 - loss 0.00310394 - samples/sec: 28.02 - lr: 0.000000
2025-

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 21.82it/s]

2025-03-09 03:08:16,276 Evaluating as a multi-label problem: False
2025-03-09 03:08:16,284 DEV : loss 0.36126160621643066 - f1-score (micro avg)  0.808





2025-03-09 03:08:16,558 BAD EPOCHS (no improvement): 4
2025-03-09 03:08:21,432 ----------------------------------------------------------------------------------------------------
2025-03-09 03:08:21,435 loading file logs/PDSC_V2/GELECTRA/G29B/3K/k4/best-model.pt


100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.27it/s]

2025-03-09 03:08:39,711 Evaluating as a multi-label problem: False
2025-03-09 03:08:39,719 0.85	0.85	0.85	0.85
2025-03-09 03:08:39,719 
Results:
- F-score (micro) 0.85
- F-score (macro) 0.85
- Accuracy 0.85

By class:
              precision    recall  f1-score   support

       PSEUD     0.8378    0.8680    0.8527       250
        ORIG     0.8631    0.8320    0.8473       250

    accuracy                         0.8500       500
   macro avg     0.8505    0.8500    0.8500       500
weighted avg     0.8505    0.8500    0.8500       500

2025-03-09 03:08:39,720 ----------------------------------------------------------------------------------------------------





2025-03-09 03:08:39,959 Reading data from tmp_data_dir
2025-03-09 03:08:39,960 Train: tmp_data_dir/train.csv
2025-03-09 03:08:39,961 Dev: tmp_data_dir/dev.csv
2025-03-09 03:08:39,961 Test: tmp_data_dir/test.csv
2025-03-09 03:08:39,973 Computing label dictionary. Progress:


2000it [00:01, 1929.33it/s]

2025-03-09 03:08:41,012 Dictionary created for label 'label' with 3 values: ORIG (seen 1000 times), PSEUD (seen 1000 times)



  return torch.load(checkpoint_file, map_location="cpu")


2025-03-09 03:08:47,997 tensorboard logging path is logs/PDSC_V2/GELECTRA/G29B/3K/k5
2025-03-09 03:08:48,005 ----------------------------------------------------------------------------------------------------
2025-03-09 03:08:48,007 Model: "TextClassifier(
  (decoder): Linear(in_features=1024, out_features=3, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
  (locked_dropout): LockedDropout(p=0.0)
  (word_dropout): WordDropout(p=0.0)
  (loss_function): CrossEntropyLoss()
  (document_embeddings): TransformerDocumentEmbeddings(
    (model): ElectraModel(
      (embeddings): ElectraEmbeddings(
        (word_embeddings): Embedding(31102, 1024, padding_idx=0)
        (position_embeddings): Embedding(512, 1024)
        (token_type_embeddings): Embedding(2, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): ElectraEncoder(
        (layer): ModuleList(
          (0-23): 24 x ElectraLa

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 24.40it/s]

2025-03-09 03:10:11,110 Evaluating as a multi-label problem: False
2025-03-09 03:10:11,118 DEV : loss 1.2460074424743652 - f1-score (micro avg)  0.5





2025-03-09 03:10:11,642 BAD EPOCHS (no improvement): 4
2025-03-09 03:10:16,778 saving best model
2025-03-09 03:10:23,554 ----------------------------------------------------------------------------------------------------
2025-03-09 03:10:30,639 epoch 2 - iter 50/500 - loss 0.28321111 - samples/sec: 28.66 - lr: 0.000005
2025-03-09 03:10:38,382 epoch 2 - iter 100/500 - loss 0.23410798 - samples/sec: 27.06 - lr: 0.000005
2025-03-09 03:10:46,382 epoch 2 - iter 150/500 - loss 0.21849653 - samples/sec: 26.16 - lr: 0.000005
2025-03-09 03:10:54,465 epoch 2 - iter 200/500 - loss 0.21069441 - samples/sec: 25.88 - lr: 0.000005
2025-03-09 03:11:01,638 epoch 2 - iter 250/500 - loss 0.20627667 - samples/sec: 29.29 - lr: 0.000005
2025-03-09 03:11:09,689 epoch 2 - iter 300/500 - loss 0.20218890 - samples/sec: 26.00 - lr: 0.000005
2025-03-09 03:11:17,766 epoch 2 - iter 350/500 - loss 0.19995203 - samples/sec: 25.87 - lr: 0.000005
2025-03-09 03:11:25,789 epoch 2 - iter 400/500 - loss 0.19784771 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.07it/s]

2025-03-09 03:11:46,682 Evaluating as a multi-label problem: False
2025-03-09 03:11:46,689 DEV : loss 0.17325349152088165 - f1-score (micro avg)  0.524





2025-03-09 03:11:46,945 BAD EPOCHS (no improvement): 4
2025-03-09 03:11:51,971 saving best model
2025-03-09 03:11:56,839 ----------------------------------------------------------------------------------------------------
2025-03-09 03:12:04,409 epoch 3 - iter 50/500 - loss 0.17542849 - samples/sec: 26.84 - lr: 0.000004
2025-03-09 03:12:12,225 epoch 3 - iter 100/500 - loss 0.17712977 - samples/sec: 26.81 - lr: 0.000004
2025-03-09 03:12:20,164 epoch 3 - iter 150/500 - loss 0.17663798 - samples/sec: 26.36 - lr: 0.000004
2025-03-09 03:12:27,885 epoch 3 - iter 200/500 - loss 0.17722882 - samples/sec: 27.14 - lr: 0.000004
2025-03-09 03:12:35,799 epoch 3 - iter 250/500 - loss 0.17654867 - samples/sec: 26.44 - lr: 0.000004
2025-03-09 03:12:43,477 epoch 3 - iter 300/500 - loss 0.17648666 - samples/sec: 27.30 - lr: 0.000004
2025-03-09 03:12:51,061 epoch 3 - iter 350/500 - loss 0.17576622 - samples/sec: 27.62 - lr: 0.000004
2025-03-09 03:12:59,321 epoch 3 - iter 400/500 - loss 0.17605150 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 24.45it/s]

2025-03-09 03:13:20,583 Evaluating as a multi-label problem: False
2025-03-09 03:13:20,590 DEV : loss 0.16934840381145477 - f1-score (micro avg)  0.518





2025-03-09 03:13:20,846 BAD EPOCHS (no improvement): 4
2025-03-09 03:13:25,853 ----------------------------------------------------------------------------------------------------
2025-03-09 03:13:33,516 epoch 4 - iter 50/500 - loss 0.15574467 - samples/sec: 26.49 - lr: 0.000004
2025-03-09 03:13:41,458 epoch 4 - iter 100/500 - loss 0.15347760 - samples/sec: 26.39 - lr: 0.000004
2025-03-09 03:13:49,149 epoch 4 - iter 150/500 - loss 0.15278456 - samples/sec: 27.24 - lr: 0.000004
2025-03-09 03:13:56,498 epoch 4 - iter 200/500 - loss 0.14863610 - samples/sec: 28.55 - lr: 0.000004
2025-03-09 03:14:04,648 epoch 4 - iter 250/500 - loss 0.14460188 - samples/sec: 25.66 - lr: 0.000004
2025-03-09 03:14:11,984 epoch 4 - iter 300/500 - loss 0.13939014 - samples/sec: 28.61 - lr: 0.000004
2025-03-09 03:14:20,137 epoch 4 - iter 350/500 - loss 0.13441942 - samples/sec: 25.64 - lr: 0.000004
2025-03-09 03:14:27,837 epoch 4 - iter 400/500 - loss 0.13016391 - samples/sec: 27.22 - lr: 0.000003
2025-03-09 03

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 24.41it/s]

2025-03-09 03:14:49,034 Evaluating as a multi-label problem: False
2025-03-09 03:14:49,042 DEV : loss 0.15144293010234833 - f1-score (micro avg)  0.796





2025-03-09 03:14:49,302 BAD EPOCHS (no improvement): 4
2025-03-09 03:14:54,367 saving best model
2025-03-09 03:14:59,326 ----------------------------------------------------------------------------------------------------
2025-03-09 03:15:06,994 epoch 5 - iter 50/500 - loss 0.05510845 - samples/sec: 26.48 - lr: 0.000003
2025-03-09 03:15:14,894 epoch 5 - iter 100/500 - loss 0.08141910 - samples/sec: 26.49 - lr: 0.000003
2025-03-09 03:15:22,875 epoch 5 - iter 150/500 - loss 0.08127576 - samples/sec: 26.24 - lr: 0.000003
2025-03-09 03:15:30,819 epoch 5 - iter 200/500 - loss 0.08057107 - samples/sec: 26.34 - lr: 0.000003
2025-03-09 03:15:38,928 epoch 5 - iter 250/500 - loss 0.07286670 - samples/sec: 25.79 - lr: 0.000003
2025-03-09 03:15:46,682 epoch 5 - iter 300/500 - loss 0.07625801 - samples/sec: 26.99 - lr: 0.000003
2025-03-09 03:15:54,375 epoch 5 - iter 350/500 - loss 0.07482140 - samples/sec: 27.22 - lr: 0.000003
2025-03-09 03:16:02,337 epoch 5 - iter 400/500 - loss 0.07280247 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 24.45it/s]

2025-03-09 03:16:23,084 Evaluating as a multi-label problem: False
2025-03-09 03:16:23,093 DEV : loss 0.1447685807943344 - f1-score (micro avg)  0.85





2025-03-09 03:16:23,647 BAD EPOCHS (no improvement): 4
2025-03-09 03:16:28,725 saving best model
2025-03-09 03:16:33,632 ----------------------------------------------------------------------------------------------------
2025-03-09 03:16:41,433 epoch 6 - iter 50/500 - loss 0.03735685 - samples/sec: 26.02 - lr: 0.000003
2025-03-09 03:16:48,772 epoch 6 - iter 100/500 - loss 0.04406414 - samples/sec: 28.60 - lr: 0.000003
2025-03-09 03:16:56,211 epoch 6 - iter 150/500 - loss 0.04489136 - samples/sec: 28.22 - lr: 0.000003
2025-03-09 03:17:03,776 epoch 6 - iter 200/500 - loss 0.04620665 - samples/sec: 27.72 - lr: 0.000003
2025-03-09 03:17:11,544 epoch 6 - iter 250/500 - loss 0.04925480 - samples/sec: 26.95 - lr: 0.000003
2025-03-09 03:17:19,625 epoch 6 - iter 300/500 - loss 0.04936539 - samples/sec: 25.88 - lr: 0.000002
2025-03-09 03:17:27,484 epoch 6 - iter 350/500 - loss 0.04678524 - samples/sec: 26.63 - lr: 0.000002
2025-03-09 03:17:35,216 epoch 6 - iter 400/500 - loss 0.04631672 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 24.47it/s]

2025-03-09 03:17:56,643 Evaluating as a multi-label problem: False
2025-03-09 03:17:56,651 DEV : loss 0.14727164804935455 - f1-score (micro avg)  0.868





2025-03-09 03:17:57,230 BAD EPOCHS (no improvement): 4
2025-03-09 03:18:02,269 saving best model
2025-03-09 03:18:07,152 ----------------------------------------------------------------------------------------------------
2025-03-09 03:18:14,842 epoch 7 - iter 50/500 - loss 0.02007418 - samples/sec: 26.38 - lr: 0.000002
2025-03-09 03:18:22,237 epoch 7 - iter 100/500 - loss 0.02633195 - samples/sec: 28.37 - lr: 0.000002
2025-03-09 03:18:30,461 epoch 7 - iter 150/500 - loss 0.02566188 - samples/sec: 25.41 - lr: 0.000002
2025-03-09 03:18:38,465 epoch 7 - iter 200/500 - loss 0.02707551 - samples/sec: 26.15 - lr: 0.000002
2025-03-09 03:18:46,146 epoch 7 - iter 250/500 - loss 0.02675634 - samples/sec: 27.27 - lr: 0.000002
2025-03-09 03:18:53,835 epoch 7 - iter 300/500 - loss 0.02398688 - samples/sec: 27.25 - lr: 0.000002
2025-03-09 03:19:01,594 epoch 7 - iter 350/500 - loss 0.02249460 - samples/sec: 27.03 - lr: 0.000002
2025-03-09 03:19:09,501 epoch 7 - iter 400/500 - loss 0.02307178 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 24.48it/s]

2025-03-09 03:19:31,078 Evaluating as a multi-label problem: False
2025-03-09 03:19:31,085 DEV : loss 0.187855526804924 - f1-score (micro avg)  0.874





2025-03-09 03:19:31,340 BAD EPOCHS (no improvement): 4
2025-03-09 03:19:36,408 saving best model
2025-03-09 03:19:41,405 ----------------------------------------------------------------------------------------------------
2025-03-09 03:19:49,281 epoch 8 - iter 50/500 - loss 0.02647776 - samples/sec: 25.75 - lr: 0.000002
2025-03-09 03:19:56,864 epoch 8 - iter 100/500 - loss 0.02314758 - samples/sec: 27.71 - lr: 0.000002
2025-03-09 03:20:04,687 epoch 8 - iter 150/500 - loss 0.01582190 - samples/sec: 26.77 - lr: 0.000002
2025-03-09 03:20:12,107 epoch 8 - iter 200/500 - loss 0.01400460 - samples/sec: 28.27 - lr: 0.000001
2025-03-09 03:20:19,682 epoch 8 - iter 250/500 - loss 0.01288645 - samples/sec: 27.67 - lr: 0.000001
2025-03-09 03:20:27,711 epoch 8 - iter 300/500 - loss 0.01294674 - samples/sec: 26.05 - lr: 0.000001
2025-03-09 03:20:36,093 epoch 8 - iter 350/500 - loss 0.01248877 - samples/sec: 25.85 - lr: 0.000001
2025-03-09 03:20:43,775 epoch 8 - iter 400/500 - loss 0.01205569 - sampl

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 24.35it/s]

2025-03-09 03:21:04,352 Evaluating as a multi-label problem: False
2025-03-09 03:21:04,360 DEV : loss 0.2120482623577118 - f1-score (micro avg)  0.856





2025-03-09 03:21:04,621 BAD EPOCHS (no improvement): 4
2025-03-09 03:21:09,674 ----------------------------------------------------------------------------------------------------
2025-03-09 03:21:17,219 epoch 9 - iter 50/500 - loss 0.00876841 - samples/sec: 26.89 - lr: 0.000001
2025-03-09 03:21:25,040 epoch 9 - iter 100/500 - loss 0.02042541 - samples/sec: 26.79 - lr: 0.000001
2025-03-09 03:21:32,513 epoch 9 - iter 150/500 - loss 0.01960203 - samples/sec: 28.06 - lr: 0.000001
2025-03-09 03:21:40,766 epoch 9 - iter 200/500 - loss 0.01510279 - samples/sec: 25.33 - lr: 0.000001
2025-03-09 03:21:48,907 epoch 9 - iter 250/500 - loss 0.01438247 - samples/sec: 25.69 - lr: 0.000001
2025-03-09 03:21:56,668 epoch 9 - iter 300/500 - loss 0.01209541 - samples/sec: 26.97 - lr: 0.000001
2025-03-09 03:22:04,349 epoch 9 - iter 350/500 - loss 0.01213079 - samples/sec: 27.29 - lr: 0.000001
2025-03-09 03:22:12,303 epoch 9 - iter 400/500 - loss 0.01070029 - samples/sec: 26.31 - lr: 0.000001
2025-03-09 03

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 24.46it/s]

2025-03-09 03:22:33,003 Evaluating as a multi-label problem: False
2025-03-09 03:22:33,011 DEV : loss 0.24829865992069244 - f1-score (micro avg)  0.846





2025-03-09 03:22:33,271 BAD EPOCHS (no improvement): 4
2025-03-09 03:22:38,364 ----------------------------------------------------------------------------------------------------
2025-03-09 03:22:45,850 epoch 10 - iter 50/500 - loss 0.01320022 - samples/sec: 27.11 - lr: 0.000001
2025-03-09 03:22:53,570 epoch 10 - iter 100/500 - loss 0.00814225 - samples/sec: 27.14 - lr: 0.000000
2025-03-09 03:23:01,284 epoch 10 - iter 150/500 - loss 0.00715945 - samples/sec: 27.15 - lr: 0.000000
2025-03-09 03:23:09,139 epoch 10 - iter 200/500 - loss 0.00706120 - samples/sec: 26.65 - lr: 0.000000
2025-03-09 03:23:17,149 epoch 10 - iter 250/500 - loss 0.00682371 - samples/sec: 26.15 - lr: 0.000000
2025-03-09 03:23:24,849 epoch 10 - iter 300/500 - loss 0.00948452 - samples/sec: 27.23 - lr: 0.000000
2025-03-09 03:23:32,288 epoch 10 - iter 350/500 - loss 0.00976867 - samples/sec: 28.23 - lr: 0.000000
2025-03-09 03:23:40,229 epoch 10 - iter 400/500 - loss 0.00955585 - samples/sec: 26.34 - lr: 0.000000
2025-

100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 24.48it/s]

2025-03-09 03:24:01,377 Evaluating as a multi-label problem: False
2025-03-09 03:24:01,385 DEV : loss 0.23265114426612854 - f1-score (micro avg)  0.86





2025-03-09 03:24:01,932 BAD EPOCHS (no improvement): 4
2025-03-09 03:24:07,047 ----------------------------------------------------------------------------------------------------
2025-03-09 03:24:07,050 loading file logs/PDSC_V2/GELECTRA/G29B/3K/k5/best-model.pt


100%|█████████████████████████████████████████| 125/125 [00:05<00:00, 23.23it/s]

2025-03-09 03:24:25,258 Evaluating as a multi-label problem: False
2025-03-09 03:24:25,266 0.846	0.846	0.846	0.846
2025-03-09 03:24:25,267 
Results:
- F-score (micro) 0.846
- F-score (macro) 0.8456
- Accuracy 0.846

By class:
              precision    recall  f1-score   support

       PSEUD     0.8145    0.8960    0.8533       250
        ORIG     0.8844    0.7960    0.8379       250

    accuracy                         0.8460       500
   macro avg     0.8495    0.8460    0.8456       500
weighted avg     0.8495    0.8460    0.8456       500

2025-03-09 03:24:25,267 ----------------------------------------------------------------------------------------------------



