In [1]:
# if running in Colab run this first, otherwise make sure flair[word-embeddings] is installed
import torch
!pip install flair[word-embeddings]
import flair
# it looks like we have to restart the Colab runtime after installation



In [3]:
import os
from pathlib import Path

from flair.data import Corpus, Sentence
from flair.datasets import ColumnCorpus
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings, CharacterEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer


if __name__ == "__main__":
    # load corpus
    columns = {0: 'text', 1: 'ner'}
    data_folder = '/home/dataset' # upload dataset to this folder in Colab
    corpus: Corpus = ColumnCorpus(data_folder, columns,
                                train_file='train.txt',
                                test_file='test.txt',
                                dev_file='dev.txt',
                                column_delimiter='\t')

    # extract the labels from the corpus
    label_type = 'ner'
    label_dict = corpus.make_label_dictionary(label_type=label_type, add_unk=False)

    # train model
    embedding_types = [
        WordEmbeddings('twitter'), # tried 'glove', better results with 'twitter'
        FlairEmbeddings('news-forward'),
        FlairEmbeddings('news-backward')
    ]

    embeddings = StackedEmbeddings(embeddings=embedding_types)

    tagger = SequenceTagger(hidden_size=256,
                            embeddings=embeddings,
                            tag_dictionary=label_dict,
                            tag_type=label_type,
                            tag_format="BIO")

    trainer = ModelTrainer(tagger, corpus)

    # tuning learning rate
    learning_rates = [0.01, 0.005, 0.0025, 0.001]
    for lr in learning_rates:
        path = '/home/model-lr-' + str(lr)
        trainer.train(path,
                      learning_rate=lr,
                      mini_batch_size=32,
                      max_epochs=25)


2025-05-01 14:57:24,406 Reading data from /home/dataset
2025-05-01 14:57:24,407 Train: /home/dataset/train.txt
2025-05-01 14:57:24,408 Dev: /home/dataset/dev.txt
2025-05-01 14:57:24,410 Test: /home/dataset/test.txt
2025-05-01 14:57:26,364 Computing label dictionary. Progress:


4it [00:00, 5121.25it/s]
6338it [00:00, 39848.75it/s]

2025-05-01 14:57:26,534 Dictionary created for label 'ner' with 3 values: PER (seen 3101 times), ORG (seen 2267 times), LOC (seen 1996 times)





2025-05-01 14:57:43,940 SequenceTagger predicts: Dictionary with 7 tags: O, B-PER, I-PER, B-ORG, I-ORG, B-LOC, I-LOC
2025-05-01 14:57:44,132 ----------------------------------------------------------------------------------------------------
2025-05-01 14:57:44,133 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'twitter'
      (embedding): Embedding(1193515, 100)
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn): LSTM(100, 2048)
      )
    )
    (list_embedding_2): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn): LSTM(100, 2048)
      )
    )
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=4196, out_features=4196, bias=True)
  (rnn): LSTM(41

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2025-05-01 14:57:49,337 epoch 1 - iter 19/199 - loss 2.45095682 - time (sec): 5.18 - samples/sec: 1818.13 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:58:00,251 epoch 1 - iter 38/199 - loss 1.65906477 - time (sec): 16.10 - samples/sec: 1191.93 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:58:04,957 epoch 1 - iter 57/199 - loss 1.30387725 - time (sec): 20.80 - samples/sec: 1375.84 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:58:09,612 epoch 1 - iter 76/199 - loss 1.10409348 - time (sec): 25.46 - samples/sec: 1492.43 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:58:14,565 epoch 1 - iter 95/199 - loss 0.96391163 - time (sec): 30.41 - samples/sec: 1567.98 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:58:19,216 epoch 1 - iter 114/199 - loss 0.86495553 - time (sec): 35.06 - samples/sec: 1629.35 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:58:24,209 epoch 1 - iter 133/199 - loss 0.79371887 - time (sec): 40.06 - samples/sec: 1669.63 - lr: 0.010000 - momentum: 0.00000

100%|██████████| 16/16 [00:07<00:00,  2.20it/s]

2025-05-01 14:58:52,331 DEV : loss 0.281042218208313 - f1-score (micro avg)  0.2896
2025-05-01 14:58:52,418  - 0 epochs without improvement
2025-05-01 14:58:52,419 saving best model





2025-05-01 14:59:06,728 ----------------------------------------------------------------------------------------------------
2025-05-01 14:59:08,676 epoch 2 - iter 19/199 - loss 0.31571540 - time (sec): 1.95 - samples/sec: 4928.01 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:10,554 epoch 2 - iter 38/199 - loss 0.31869651 - time (sec): 3.82 - samples/sec: 5000.67 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:12,861 epoch 2 - iter 57/199 - loss 0.31038453 - time (sec): 6.13 - samples/sec: 4641.60 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:14,825 epoch 2 - iter 76/199 - loss 0.30776057 - time (sec): 8.10 - samples/sec: 4689.77 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:16,664 epoch 2 - iter 95/199 - loss 0.30470669 - time (sec): 9.93 - samples/sec: 4800.31 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:18,547 epoch 2 - iter 114/199 - loss 0.29771326 - time (sec): 11.82 - samples/sec: 4833.63 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:20,4

100%|██████████| 16/16 [00:04<00:00,  3.56it/s]

2025-05-01 14:59:31,897 DEV : loss 0.21926377713680267 - f1-score (micro avg)  0.4923
2025-05-01 14:59:31,961  - 0 epochs without improvement
2025-05-01 14:59:31,962 saving best model





2025-05-01 14:59:38,442 ----------------------------------------------------------------------------------------------------
2025-05-01 14:59:42,171 epoch 3 - iter 19/199 - loss 0.26330778 - time (sec): 3.73 - samples/sec: 2543.30 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:43,983 epoch 3 - iter 38/199 - loss 0.26300577 - time (sec): 5.54 - samples/sec: 3406.03 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:45,782 epoch 3 - iter 57/199 - loss 0.25287364 - time (sec): 7.34 - samples/sec: 3881.59 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:48,082 epoch 3 - iter 76/199 - loss 0.25234294 - time (sec): 9.64 - samples/sec: 3950.09 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:49,971 epoch 3 - iter 95/199 - loss 0.24885947 - time (sec): 11.53 - samples/sec: 4134.92 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:51,911 epoch 3 - iter 114/199 - loss 0.24974972 - time (sec): 13.47 - samples/sec: 4268.55 - lr: 0.010000 - momentum: 0.000000
2025-05-01 14:59:53,

100%|██████████| 16/16 [00:03<00:00,  4.31it/s]

2025-05-01 15:00:04,300 DEV : loss 0.17771248519420624 - f1-score (micro avg)  0.6147





2025-05-01 15:00:04,370  - 0 epochs without improvement
2025-05-01 15:00:04,371 saving best model
2025-05-01 15:00:22,521 ----------------------------------------------------------------------------------------------------
2025-05-01 15:00:24,600 epoch 4 - iter 19/199 - loss 0.20893424 - time (sec): 2.08 - samples/sec: 4654.58 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:00:26,527 epoch 4 - iter 38/199 - loss 0.21197864 - time (sec): 4.00 - samples/sec: 4842.75 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:00:28,395 epoch 4 - iter 57/199 - loss 0.21397906 - time (sec): 5.87 - samples/sec: 4959.50 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:00:30,219 epoch 4 - iter 76/199 - loss 0.21664841 - time (sec): 7.70 - samples/sec: 4972.89 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:00:32,087 epoch 4 - iter 95/199 - loss 0.21357390 - time (sec): 9.56 - samples/sec: 5015.04 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:00:34,217 epoch 4 - iter 114/199 - loss 0.21378203 - 

100%|██████████| 16/16 [00:04<00:00,  3.71it/s]

2025-05-01 15:00:47,016 DEV : loss 0.16606396436691284 - f1-score (micro avg)  0.6355
2025-05-01 15:00:47,081  - 0 epochs without improvement
2025-05-01 15:00:47,083 saving best model





2025-05-01 15:01:03,189 ----------------------------------------------------------------------------------------------------
2025-05-01 15:01:05,154 epoch 5 - iter 19/199 - loss 0.19499963 - time (sec): 1.96 - samples/sec: 4864.93 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:07,064 epoch 5 - iter 38/199 - loss 0.19496428 - time (sec): 3.87 - samples/sec: 4987.95 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:09,082 epoch 5 - iter 57/199 - loss 0.19562748 - time (sec): 5.89 - samples/sec: 4905.21 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:11,263 epoch 5 - iter 76/199 - loss 0.19636573 - time (sec): 8.07 - samples/sec: 4762.34 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:13,099 epoch 5 - iter 95/199 - loss 0.19043085 - time (sec): 9.91 - samples/sec: 4833.72 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:14,947 epoch 5 - iter 114/199 - loss 0.19164522 - time (sec): 11.76 - samples/sec: 4867.59 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:16,8

100%|██████████| 16/16 [00:03<00:00,  4.35it/s]

2025-05-01 15:01:27,463 DEV : loss 0.16350066661834717 - f1-score (micro avg)  0.6384





2025-05-01 15:01:27,530  - 0 epochs without improvement
2025-05-01 15:01:27,531 saving best model
2025-05-01 15:01:42,087 ----------------------------------------------------------------------------------------------------
2025-05-01 15:01:44,146 epoch 6 - iter 19/199 - loss 0.19911013 - time (sec): 2.06 - samples/sec: 4639.29 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:46,377 epoch 6 - iter 38/199 - loss 0.20140125 - time (sec): 4.29 - samples/sec: 4471.80 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:48,279 epoch 6 - iter 57/199 - loss 0.19901345 - time (sec): 6.19 - samples/sec: 4662.54 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:50,093 epoch 6 - iter 76/199 - loss 0.19630793 - time (sec): 8.00 - samples/sec: 4809.34 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:51,993 epoch 6 - iter 95/199 - loss 0.19332679 - time (sec): 9.90 - samples/sec: 4862.73 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:01:53,800 epoch 6 - iter 114/199 - loss 0.18864860 - 

100%|██████████| 16/16 [00:03<00:00,  4.29it/s]

2025-05-01 15:02:07,077 DEV : loss 0.15201134979724884 - f1-score (micro avg)  0.666





2025-05-01 15:02:07,144  - 0 epochs without improvement
2025-05-01 15:02:07,145 saving best model
2025-05-01 15:02:19,648 ----------------------------------------------------------------------------------------------------
2025-05-01 15:02:21,922 epoch 7 - iter 19/199 - loss 0.17019046 - time (sec): 2.27 - samples/sec: 4246.38 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:02:24,054 epoch 7 - iter 38/199 - loss 0.17644946 - time (sec): 4.40 - samples/sec: 4296.48 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:02:25,856 epoch 7 - iter 57/199 - loss 0.17943889 - time (sec): 6.21 - samples/sec: 4594.55 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:02:27,746 epoch 7 - iter 76/199 - loss 0.17247997 - time (sec): 8.10 - samples/sec: 4698.33 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:02:29,604 epoch 7 - iter 95/199 - loss 0.17288817 - time (sec): 9.95 - samples/sec: 4796.70 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:02:31,498 epoch 7 - iter 114/199 - loss 0.17512595 - 

100%|██████████| 16/16 [00:03<00:00,  4.42it/s]

2025-05-01 15:02:43,813 DEV : loss 0.14719776809215546 - f1-score (micro avg)  0.6746





2025-05-01 15:02:43,882  - 0 epochs without improvement
2025-05-01 15:02:43,886 saving best model
2025-05-01 15:02:59,905 ----------------------------------------------------------------------------------------------------
2025-05-01 15:03:01,845 epoch 8 - iter 19/199 - loss 0.17101075 - time (sec): 1.94 - samples/sec: 4851.45 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:03,667 epoch 8 - iter 38/199 - loss 0.17109090 - time (sec): 3.76 - samples/sec: 5070.45 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:05,455 epoch 8 - iter 57/199 - loss 0.16532453 - time (sec): 5.55 - samples/sec: 5124.14 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:07,287 epoch 8 - iter 76/199 - loss 0.16595084 - time (sec): 7.38 - samples/sec: 5182.31 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:09,096 epoch 8 - iter 95/199 - loss 0.17131976 - time (sec): 9.19 - samples/sec: 5181.57 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:11,405 epoch 8 - iter 114/199 - loss 0.16818488 - 

100%|██████████| 16/16 [00:04<00:00,  3.70it/s]

2025-05-01 15:03:24,019 DEV : loss 0.1430206298828125 - f1-score (micro avg)  0.6849





2025-05-01 15:03:24,085  - 0 epochs without improvement
2025-05-01 15:03:24,086 saving best model
2025-05-01 15:03:45,075 ----------------------------------------------------------------------------------------------------
2025-05-01 15:03:47,596 epoch 9 - iter 19/199 - loss 0.16014225 - time (sec): 2.52 - samples/sec: 3903.70 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:49,452 epoch 9 - iter 38/199 - loss 0.15759776 - time (sec): 4.37 - samples/sec: 4408.26 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:51,257 epoch 9 - iter 57/199 - loss 0.16195465 - time (sec): 6.18 - samples/sec: 4649.79 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:53,054 epoch 9 - iter 76/199 - loss 0.16112028 - time (sec): 7.98 - samples/sec: 4792.69 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:54,850 epoch 9 - iter 95/199 - loss 0.16103722 - time (sec): 9.77 - samples/sec: 4868.14 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:03:56,676 epoch 9 - iter 114/199 - loss 0.16248447 - 

100%|██████████| 16/16 [00:05<00:00,  3.16it/s]

2025-05-01 15:04:10,548 DEV : loss 0.13999243080615997 - f1-score (micro avg)  0.6946
2025-05-01 15:04:10,659  - 0 epochs without improvement
2025-05-01 15:04:10,662 saving best model





2025-05-01 15:04:26,019 ----------------------------------------------------------------------------------------------------
2025-05-01 15:04:28,001 epoch 10 - iter 19/199 - loss 0.14783927 - time (sec): 1.98 - samples/sec: 5021.39 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:04:29,863 epoch 10 - iter 38/199 - loss 0.15403283 - time (sec): 3.84 - samples/sec: 5145.29 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:04:31,700 epoch 10 - iter 57/199 - loss 0.15545963 - time (sec): 5.68 - samples/sec: 5181.90 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:04:33,844 epoch 10 - iter 76/199 - loss 0.15691792 - time (sec): 7.82 - samples/sec: 4985.56 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:04:35,959 epoch 10 - iter 95/199 - loss 0.15685195 - time (sec): 9.94 - samples/sec: 4866.72 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:04:37,813 epoch 10 - iter 114/199 - loss 0.15837932 - time (sec): 11.79 - samples/sec: 4915.05 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:0

100%|██████████| 16/16 [00:03<00:00,  4.18it/s]

2025-05-01 15:04:50,022 DEV : loss 0.1332768052816391 - f1-score (micro avg)  0.7021





2025-05-01 15:04:50,087  - 0 epochs without improvement
2025-05-01 15:04:50,088 saving best model
2025-05-01 15:05:00,919 ----------------------------------------------------------------------------------------------------
2025-05-01 15:05:02,769 epoch 11 - iter 19/199 - loss 0.16446019 - time (sec): 1.85 - samples/sec: 5150.85 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:04,614 epoch 11 - iter 38/199 - loss 0.16763468 - time (sec): 3.69 - samples/sec: 5176.97 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:06,374 epoch 11 - iter 57/199 - loss 0.16370033 - time (sec): 5.45 - samples/sec: 5216.60 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:08,320 epoch 11 - iter 76/199 - loss 0.16385554 - time (sec): 7.40 - samples/sec: 5166.53 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:10,608 epoch 11 - iter 95/199 - loss 0.16035088 - time (sec): 9.69 - samples/sec: 4934.52 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:12,528 epoch 11 - iter 114/199 - loss 0.15868

100%|██████████| 16/16 [00:04<00:00,  3.81it/s]

2025-05-01 15:05:24,940 DEV : loss 0.14920419454574585 - f1-score (micro avg)  0.6365





2025-05-01 15:05:25,011  - 1 epochs without improvement
2025-05-01 15:05:25,012 ----------------------------------------------------------------------------------------------------
2025-05-01 15:05:27,042 epoch 12 - iter 19/199 - loss 0.14967415 - time (sec): 2.03 - samples/sec: 4656.56 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:28,988 epoch 12 - iter 38/199 - loss 0.14611337 - time (sec): 3.98 - samples/sec: 4781.53 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:30,772 epoch 12 - iter 57/199 - loss 0.14877595 - time (sec): 5.76 - samples/sec: 4959.07 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:32,737 epoch 12 - iter 76/199 - loss 0.15042837 - time (sec): 7.72 - samples/sec: 4964.97 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:34,879 epoch 12 - iter 95/199 - loss 0.14874411 - time (sec): 9.87 - samples/sec: 4840.08 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:05:36,765 epoch 12 - iter 114/199 - loss 0.15135432 - time (sec): 11.75 - samples/sec: 488

100%|██████████| 16/16 [00:04<00:00,  3.20it/s]

2025-05-01 15:05:50,325 DEV : loss 0.13053101301193237 - f1-score (micro avg)  0.7027





2025-05-01 15:05:50,390  - 0 epochs without improvement
2025-05-01 15:05:50,392 saving best model
2025-05-01 15:05:56,408 ----------------------------------------------------------------------------------------------------
2025-05-01 15:05:58,852 epoch 13 - iter 19/199 - loss 0.14473841 - time (sec): 2.44 - samples/sec: 3895.04 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:00,864 epoch 13 - iter 38/199 - loss 0.14627230 - time (sec): 4.45 - samples/sec: 4355.79 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:02,730 epoch 13 - iter 57/199 - loss 0.14712369 - time (sec): 6.32 - samples/sec: 4545.81 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:04,542 epoch 13 - iter 76/199 - loss 0.15026665 - time (sec): 8.13 - samples/sec: 4690.81 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:06,420 epoch 13 - iter 95/199 - loss 0.14926294 - time (sec): 10.01 - samples/sec: 4759.08 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:08,419 epoch 13 - iter 114/199 - loss 0.1484

100%|██████████| 16/16 [00:03<00:00,  4.10it/s]

2025-05-01 15:06:20,918 DEV : loss 0.13439616560935974 - f1-score (micro avg)  0.6961
2025-05-01 15:06:21,025  - 1 epochs without improvement
2025-05-01 15:06:21,028 ----------------------------------------------------------------------------------------------------





2025-05-01 15:06:23,309 epoch 14 - iter 19/199 - loss 0.13894199 - time (sec): 2.28 - samples/sec: 4167.92 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:25,203 epoch 14 - iter 38/199 - loss 0.13883273 - time (sec): 4.17 - samples/sec: 4615.33 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:27,124 epoch 14 - iter 57/199 - loss 0.14430050 - time (sec): 6.09 - samples/sec: 4716.17 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:28,938 epoch 14 - iter 76/199 - loss 0.14469257 - time (sec): 7.91 - samples/sec: 4862.56 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:30,783 epoch 14 - iter 95/199 - loss 0.14672811 - time (sec): 9.75 - samples/sec: 4925.12 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:32,766 epoch 14 - iter 114/199 - loss 0.14658934 - time (sec): 11.74 - samples/sec: 4907.69 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:34,933 epoch 14 - iter 133/199 - loss 0.14871365 - time (sec): 13.90 - samples/sec: 4841.85 - lr: 0.010000 - momentum: 0.00

100%|██████████| 16/16 [00:03<00:00,  4.08it/s]

2025-05-01 15:06:45,052 DEV : loss 0.1286841779947281 - f1-score (micro avg)  0.6873
2025-05-01 15:06:45,165  - 2 epochs without improvement
2025-05-01 15:06:45,168 ----------------------------------------------------------------------------------------------------





2025-05-01 15:06:47,439 epoch 15 - iter 19/199 - loss 0.13982757 - time (sec): 2.27 - samples/sec: 4250.85 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:49,313 epoch 15 - iter 38/199 - loss 0.14099357 - time (sec): 4.14 - samples/sec: 4640.85 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:51,109 epoch 15 - iter 57/199 - loss 0.14773677 - time (sec): 5.94 - samples/sec: 4834.21 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:52,888 epoch 15 - iter 76/199 - loss 0.14810734 - time (sec): 7.72 - samples/sec: 4935.40 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:54,700 epoch 15 - iter 95/199 - loss 0.14652784 - time (sec): 9.53 - samples/sec: 5010.03 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:56,595 epoch 15 - iter 114/199 - loss 0.14766096 - time (sec): 11.43 - samples/sec: 5010.64 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:06:58,776 epoch 15 - iter 133/199 - loss 0.14591434 - time (sec): 13.61 - samples/sec: 4923.65 - lr: 0.010000 - momentum: 0.00

100%|██████████| 16/16 [00:04<00:00,  3.21it/s]

2025-05-01 15:07:09,996 DEV : loss 0.13569656014442444 - f1-score (micro avg)  0.6717
2025-05-01 15:07:10,065  - 3 epochs without improvement
2025-05-01 15:07:10,067 ----------------------------------------------------------------------------------------------------





2025-05-01 15:07:12,047 epoch 16 - iter 19/199 - loss 0.13455216 - time (sec): 1.98 - samples/sec: 4950.11 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:07:13,896 epoch 16 - iter 38/199 - loss 0.14074371 - time (sec): 3.83 - samples/sec: 5110.16 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:07:15,670 epoch 16 - iter 57/199 - loss 0.14321794 - time (sec): 5.60 - samples/sec: 5190.13 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:07:17,478 epoch 16 - iter 76/199 - loss 0.14227634 - time (sec): 7.41 - samples/sec: 5216.50 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:07:19,221 epoch 16 - iter 95/199 - loss 0.14048544 - time (sec): 9.15 - samples/sec: 5248.81 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:07:21,317 epoch 16 - iter 114/199 - loss 0.14221539 - time (sec): 11.25 - samples/sec: 5120.51 - lr: 0.010000 - momentum: 0.000000
2025-05-01 15:07:23,479 epoch 16 - iter 133/199 - loss 0.14220871 - time (sec): 13.41 - samples/sec: 5015.44 - lr: 0.010000 - momentum: 0.00

100%|██████████| 16/16 [00:04<00:00,  3.76it/s]

2025-05-01 15:07:34,034 DEV : loss 0.13104672729969025 - f1-score (micro avg)  0.6847
2025-05-01 15:07:34,097  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.005]
2025-05-01 15:07:34,098 ----------------------------------------------------------------------------------------------------





2025-05-01 15:07:36,007 epoch 17 - iter 19/199 - loss 0.12572332 - time (sec): 1.91 - samples/sec: 4932.00 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:07:37,804 epoch 17 - iter 38/199 - loss 0.13016257 - time (sec): 3.70 - samples/sec: 5215.55 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:07:39,624 epoch 17 - iter 57/199 - loss 0.13266959 - time (sec): 5.52 - samples/sec: 5242.43 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:07:41,427 epoch 17 - iter 76/199 - loss 0.13231923 - time (sec): 7.33 - samples/sec: 5244.71 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:07:43,172 epoch 17 - iter 95/199 - loss 0.13516139 - time (sec): 9.07 - samples/sec: 5279.91 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:07:45,202 epoch 17 - iter 114/199 - loss 0.13438655 - time (sec): 11.10 - samples/sec: 5164.25 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:07:47,270 epoch 17 - iter 133/199 - loss 0.13591455 - time (sec): 13.17 - samples/sec: 5092.63 - lr: 0.005000 - momentum: 0.00

100%|██████████| 16/16 [00:04<00:00,  3.80it/s]

2025-05-01 15:07:57,782 DEV : loss 0.12349792569875717 - f1-score (micro avg)  0.7121
2025-05-01 15:07:57,896  - 0 epochs without improvement
2025-05-01 15:07:57,897 saving best model





2025-05-01 15:08:19,312 ----------------------------------------------------------------------------------------------------
2025-05-01 15:08:21,818 epoch 18 - iter 19/199 - loss 0.14177451 - time (sec): 2.50 - samples/sec: 3892.66 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:23,744 epoch 18 - iter 38/199 - loss 0.13998654 - time (sec): 4.43 - samples/sec: 4352.19 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:25,601 epoch 18 - iter 57/199 - loss 0.14144443 - time (sec): 6.29 - samples/sec: 4616.89 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:27,434 epoch 18 - iter 76/199 - loss 0.14286955 - time (sec): 8.12 - samples/sec: 4776.37 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:29,225 epoch 18 - iter 95/199 - loss 0.14015797 - time (sec): 9.91 - samples/sec: 4883.59 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:31,074 epoch 18 - iter 114/199 - loss 0.13970448 - time (sec): 11.76 - samples/sec: 4916.37 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:0

100%|██████████| 16/16 [00:03<00:00,  4.34it/s]

2025-05-01 15:08:43,368 DEV : loss 0.12639190256595612 - f1-score (micro avg)  0.6956





2025-05-01 15:08:43,432  - 1 epochs without improvement
2025-05-01 15:08:43,433 ----------------------------------------------------------------------------------------------------
2025-05-01 15:08:45,857 epoch 19 - iter 19/199 - loss 0.13344494 - time (sec): 2.42 - samples/sec: 3967.53 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:47,695 epoch 19 - iter 38/199 - loss 0.12963280 - time (sec): 4.26 - samples/sec: 4519.56 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:50,463 epoch 19 - iter 57/199 - loss 0.13243047 - time (sec): 7.03 - samples/sec: 4111.13 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:52,301 epoch 19 - iter 76/199 - loss 0.13478395 - time (sec): 8.87 - samples/sec: 4336.00 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:54,117 epoch 19 - iter 95/199 - loss 0.13584145 - time (sec): 10.68 - samples/sec: 4478.33 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:08:56,073 epoch 19 - iter 114/199 - loss 0.13618268 - time (sec): 12.64 - samples/sec: 45

100%|██████████| 16/16 [00:04<00:00,  3.99it/s]

2025-05-01 15:09:08,558 DEV : loss 0.13073018193244934 - f1-score (micro avg)  0.681
2025-05-01 15:09:08,662  - 2 epochs without improvement
2025-05-01 15:09:08,663 ----------------------------------------------------------------------------------------------------





2025-05-01 15:09:10,888 epoch 20 - iter 19/199 - loss 0.14270520 - time (sec): 2.22 - samples/sec: 4331.13 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:12,773 epoch 20 - iter 38/199 - loss 0.13804096 - time (sec): 4.11 - samples/sec: 4715.37 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:14,688 epoch 20 - iter 57/199 - loss 0.13732320 - time (sec): 6.02 - samples/sec: 4799.14 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:16,596 epoch 20 - iter 76/199 - loss 0.13890933 - time (sec): 7.93 - samples/sec: 4842.21 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:18,487 epoch 20 - iter 95/199 - loss 0.13744690 - time (sec): 9.82 - samples/sec: 4879.61 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:20,560 epoch 20 - iter 114/199 - loss 0.13421060 - time (sec): 11.90 - samples/sec: 4846.83 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:22,726 epoch 20 - iter 133/199 - loss 0.13643326 - time (sec): 14.06 - samples/sec: 4786.29 - lr: 0.005000 - momentum: 0.00

100%|██████████| 16/16 [00:04<00:00,  3.67it/s]

2025-05-01 15:09:33,508 DEV : loss 0.12422606348991394 - f1-score (micro avg)  0.7019
2025-05-01 15:09:33,572  - 3 epochs without improvement
2025-05-01 15:09:33,573 ----------------------------------------------------------------------------------------------------





2025-05-01 15:09:35,549 epoch 21 - iter 19/199 - loss 0.13933987 - time (sec): 1.97 - samples/sec: 4799.27 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:37,435 epoch 21 - iter 38/199 - loss 0.13761217 - time (sec): 3.86 - samples/sec: 4923.86 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:39,224 epoch 21 - iter 57/199 - loss 0.13724051 - time (sec): 5.65 - samples/sec: 5006.33 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:41,081 epoch 21 - iter 76/199 - loss 0.13583697 - time (sec): 7.51 - samples/sec: 5050.52 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:42,969 epoch 21 - iter 95/199 - loss 0.13514599 - time (sec): 9.39 - samples/sec: 5073.43 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:45,316 epoch 21 - iter 114/199 - loss 0.13388416 - time (sec): 11.74 - samples/sec: 4878.98 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:09:47,246 epoch 21 - iter 133/199 - loss 0.13473046 - time (sec): 13.67 - samples/sec: 4881.01 - lr: 0.005000 - momentum: 0.00

100%|██████████| 16/16 [00:04<00:00,  3.72it/s]

2025-05-01 15:09:58,139 DEV : loss 0.12326453626155853 - f1-score (micro avg)  0.7011





2025-05-01 15:09:58,205  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0025]
2025-05-01 15:09:58,205 ----------------------------------------------------------------------------------------------------
2025-05-01 15:10:00,112 epoch 22 - iter 19/199 - loss 0.13924189 - time (sec): 1.90 - samples/sec: 4957.13 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:01,989 epoch 22 - iter 38/199 - loss 0.13130835 - time (sec): 3.78 - samples/sec: 5031.76 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:03,794 epoch 22 - iter 57/199 - loss 0.13357117 - time (sec): 5.59 - samples/sec: 5113.78 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:05,635 epoch 22 - iter 76/199 - loss 0.13155184 - time (sec): 7.43 - samples/sec: 5152.76 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:07,502 epoch 22 - iter 95/199 - loss 0.13010816 - time (sec): 9.29 - samples/sec: 5146.37 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:09,874 epoch 22 - iter 114/199

100%|██████████| 16/16 [00:04<00:00,  3.70it/s]

2025-05-01 15:10:23,357 DEV : loss 0.12698207795619965 - f1-score (micro avg)  0.6793





2025-05-01 15:10:23,458  - 1 epochs without improvement
2025-05-01 15:10:23,459 ----------------------------------------------------------------------------------------------------
2025-05-01 15:10:25,383 epoch 23 - iter 19/199 - loss 0.14073092 - time (sec): 1.92 - samples/sec: 4978.02 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:27,223 epoch 23 - iter 38/199 - loss 0.13184948 - time (sec): 3.76 - samples/sec: 5073.17 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:29,092 epoch 23 - iter 57/199 - loss 0.13216831 - time (sec): 5.63 - samples/sec: 5125.47 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:30,805 epoch 23 - iter 76/199 - loss 0.13106461 - time (sec): 7.34 - samples/sec: 5200.31 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:33,013 epoch 23 - iter 95/199 - loss 0.13016888 - time (sec): 9.55 - samples/sec: 5011.01 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:35,021 epoch 23 - iter 114/199 - loss 0.13062981 - time (sec): 11.56 - samples/sec: 497

100%|██████████| 16/16 [00:04<00:00,  3.74it/s]

2025-05-01 15:10:47,400 DEV : loss 0.1262766569852829 - f1-score (micro avg)  0.6766





2025-05-01 15:10:47,474  - 2 epochs without improvement
2025-05-01 15:10:47,475 ----------------------------------------------------------------------------------------------------
2025-05-01 15:10:49,417 epoch 24 - iter 19/199 - loss 0.12937996 - time (sec): 1.94 - samples/sec: 4930.28 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:51,237 epoch 24 - iter 38/199 - loss 0.13127579 - time (sec): 3.76 - samples/sec: 5071.53 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:53,101 epoch 24 - iter 57/199 - loss 0.12953458 - time (sec): 5.62 - samples/sec: 5098.31 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:54,969 epoch 24 - iter 76/199 - loss 0.13041287 - time (sec): 7.49 - samples/sec: 5119.03 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:57,310 epoch 24 - iter 95/199 - loss 0.12977822 - time (sec): 9.83 - samples/sec: 4879.57 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:10:59,298 epoch 24 - iter 114/199 - loss 0.13032499 - time (sec): 11.82 - samples/sec: 487

100%|██████████| 16/16 [00:04<00:00,  3.69it/s]

2025-05-01 15:11:11,754 DEV : loss 0.12246964126825333 - f1-score (micro avg)  0.6911





2025-05-01 15:11:11,823  - 3 epochs without improvement
2025-05-01 15:11:11,824 ----------------------------------------------------------------------------------------------------
2025-05-01 15:11:13,703 epoch 25 - iter 19/199 - loss 0.13309657 - time (sec): 1.88 - samples/sec: 4984.39 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:11:15,566 epoch 25 - iter 38/199 - loss 0.12726129 - time (sec): 3.74 - samples/sec: 5141.28 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:11:17,382 epoch 25 - iter 57/199 - loss 0.12831761 - time (sec): 5.56 - samples/sec: 5137.21 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:11:19,188 epoch 25 - iter 76/199 - loss 0.12989220 - time (sec): 7.36 - samples/sec: 5210.05 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:11:21,418 epoch 25 - iter 95/199 - loss 0.12973222 - time (sec): 9.59 - samples/sec: 4963.99 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:11:23,368 epoch 25 - iter 114/199 - loss 0.12992026 - time (sec): 11.54 - samples/sec: 496

100%|██████████| 16/16 [00:05<00:00,  3.13it/s]

2025-05-01 15:11:36,572 DEV : loss 0.12500421702861786 - f1-score (micro avg)  0.6841





2025-05-01 15:11:36,636  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00125]
2025-05-01 15:11:59,722 ----------------------------------------------------------------------------------------------------
2025-05-01 15:11:59,723 Loading model from best epoch ...
2025-05-01 15:12:03,883 SequenceTagger predicts: Dictionary with 9 tags: O, B-PER, I-PER, B-ORG, I-ORG, B-LOC, I-LOC, <START>, <STOP>


100%|██████████| 32/32 [00:11<00:00,  2.85it/s]


2025-05-01 15:12:15,691 
Results:
- F-score (micro) 0.7109
- F-score (macro) 0.6387
- Accuracy 0.6018

By class:
              precision    recall  f1-score   support

         PER     0.8383    0.8508    0.8445      1602
         ORG     0.5804    0.4192    0.4868       792
         LOC     0.7405    0.4834    0.5849       602

   micro avg     0.7665    0.6629    0.7109      2996
   macro avg     0.7197    0.5845    0.6387      2996
weighted avg     0.7504    0.6629    0.6978      2996

2025-05-01 15:12:15,692 ----------------------------------------------------------------------------------------------------
2025-05-01 15:12:15,697 ----------------------------------------------------------------------------------------------------
2025-05-01 15:12:15,698 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'twitter'
      (embedding): Embedding(1193515, 100)
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(


  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2025-05-01 15:12:17,457 epoch 1 - iter 19/199 - loss 0.12675135 - time (sec): 1.74 - samples/sec: 5590.97 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:12:19,530 epoch 1 - iter 38/199 - loss 0.13026125 - time (sec): 3.81 - samples/sec: 5088.58 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:12:21,462 epoch 1 - iter 57/199 - loss 0.12936839 - time (sec): 5.74 - samples/sec: 4966.73 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:12:23,237 epoch 1 - iter 76/199 - loss 0.13395215 - time (sec): 7.52 - samples/sec: 5043.64 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:12:25,092 epoch 1 - iter 95/199 - loss 0.13475331 - time (sec): 9.37 - samples/sec: 5051.99 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:12:26,925 epoch 1 - iter 114/199 - loss 0.13417852 - time (sec): 11.20 - samples/sec: 5091.17 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:12:28,684 epoch 1 - iter 133/199 - loss 0.13499164 - time (sec): 12.96 - samples/sec: 5139.93 - lr: 0.005000 - momentum: 0.000000
20

100%|██████████| 16/16 [00:04<00:00,  3.73it/s]

2025-05-01 15:12:45,246 DEV : loss 0.12516522407531738 - f1-score (micro avg)  0.7017





2025-05-01 15:12:45,314  - 0 epochs without improvement
2025-05-01 15:12:45,316  - 0 epochs without improvement
2025-05-01 15:12:45,317 saving best model
2025-05-01 15:13:02,144 ----------------------------------------------------------------------------------------------------
2025-05-01 15:13:04,022 epoch 2 - iter 19/199 - loss 0.13343646 - time (sec): 1.88 - samples/sec: 4969.56 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:13:06,004 epoch 2 - iter 38/199 - loss 0.13484069 - time (sec): 3.86 - samples/sec: 4949.60 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:13:08,237 epoch 2 - iter 57/199 - loss 0.13286307 - time (sec): 6.09 - samples/sec: 4675.35 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:13:10,117 epoch 2 - iter 76/199 - loss 0.13489646 - time (sec): 7.97 - samples/sec: 4794.93 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:13:11,869 epoch 2 - iter 95/199 - loss 0.13455579 - time (sec): 9.72 - samples/sec: 4894.26 - lr: 0.005000 - momentum: 0.000000
2025-05-01 

100%|██████████| 16/16 [00:03<00:00,  4.27it/s]

2025-05-01 15:13:26,192 DEV : loss 0.12632349133491516 - f1-score (micro avg)  0.6847





2025-05-01 15:13:26,259  - 1 epochs without improvement
2025-05-01 15:13:26,262  - 1 epochs without improvement
2025-05-01 15:13:26,263 ----------------------------------------------------------------------------------------------------
2025-05-01 15:13:28,283 epoch 3 - iter 19/199 - loss 0.14795930 - time (sec): 2.02 - samples/sec: 4875.01 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:13:30,388 epoch 3 - iter 38/199 - loss 0.13908050 - time (sec): 4.12 - samples/sec: 4711.73 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:13:32,535 epoch 3 - iter 57/199 - loss 0.13722296 - time (sec): 6.27 - samples/sec: 4594.85 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:13:34,387 epoch 3 - iter 76/199 - loss 0.13497587 - time (sec): 8.12 - samples/sec: 4745.42 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:13:36,201 epoch 3 - iter 95/199 - loss 0.13538871 - time (sec): 9.94 - samples/sec: 4847.62 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:13:38,048 epoch 3 - iter 114/199 - loss

100%|██████████| 16/16 [00:03<00:00,  4.43it/s]

2025-05-01 15:13:50,208 DEV : loss 0.12076157331466675 - f1-score (micro avg)  0.7117





2025-05-01 15:13:50,274  - 0 epochs without improvement
2025-05-01 15:13:50,275  - 0 epochs without improvement
2025-05-01 15:13:50,277 saving best model
2025-05-01 15:14:07,649 ----------------------------------------------------------------------------------------------------
2025-05-01 15:14:09,605 epoch 4 - iter 19/199 - loss 0.14786844 - time (sec): 1.95 - samples/sec: 4931.68 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:14:11,402 epoch 4 - iter 38/199 - loss 0.13929893 - time (sec): 3.75 - samples/sec: 5154.51 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:14:13,193 epoch 4 - iter 57/199 - loss 0.14080208 - time (sec): 5.54 - samples/sec: 5194.54 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:14:15,015 epoch 4 - iter 76/199 - loss 0.14068899 - time (sec): 7.36 - samples/sec: 5212.75 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:14:16,800 epoch 4 - iter 95/199 - loss 0.13801703 - time (sec): 9.14 - samples/sec: 5245.41 - lr: 0.005000 - momentum: 0.000000
2025-05-01 

100%|██████████| 16/16 [00:04<00:00,  3.74it/s]

2025-05-01 15:14:31,532 DEV : loss 0.12385790050029755 - f1-score (micro avg)  0.7054





2025-05-01 15:14:31,597  - 1 epochs without improvement
2025-05-01 15:14:31,598  - 1 epochs without improvement
2025-05-01 15:14:31,599 ----------------------------------------------------------------------------------------------------
2025-05-01 15:14:34,528 epoch 5 - iter 19/199 - loss 0.13385154 - time (sec): 2.93 - samples/sec: 3315.78 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:14:36,311 epoch 5 - iter 38/199 - loss 0.12807364 - time (sec): 4.71 - samples/sec: 4104.19 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:14:38,149 epoch 5 - iter 57/199 - loss 0.13018609 - time (sec): 6.55 - samples/sec: 4450.23 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:14:39,924 epoch 5 - iter 76/199 - loss 0.12883973 - time (sec): 8.32 - samples/sec: 4653.05 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:14:41,840 epoch 5 - iter 95/199 - loss 0.13225289 - time (sec): 10.24 - samples/sec: 4712.85 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:14:43,974 epoch 5 - iter 114/199 - los

100%|██████████| 16/16 [00:04<00:00,  3.71it/s]

2025-05-01 15:14:56,288 DEV : loss 0.12343230098485947 - f1-score (micro avg)  0.6929





2025-05-01 15:14:56,360  - 2 epochs without improvement
2025-05-01 15:14:56,362  - 2 epochs without improvement
2025-05-01 15:14:56,363 ----------------------------------------------------------------------------------------------------
2025-05-01 15:14:58,230 epoch 6 - iter 19/199 - loss 0.12930617 - time (sec): 1.87 - samples/sec: 5049.54 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:00,058 epoch 6 - iter 38/199 - loss 0.13234553 - time (sec): 3.69 - samples/sec: 5142.52 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:01,797 epoch 6 - iter 57/199 - loss 0.13394285 - time (sec): 5.43 - samples/sec: 5193.90 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:03,535 epoch 6 - iter 76/199 - loss 0.13508482 - time (sec): 7.17 - samples/sec: 5243.31 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:05,309 epoch 6 - iter 95/199 - loss 0.13358968 - time (sec): 8.95 - samples/sec: 5294.66 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:07,571 epoch 6 - iter 114/199 - loss

100%|██████████| 16/16 [00:04<00:00,  3.80it/s]

2025-05-01 15:15:19,804 DEV : loss 0.1256045252084732 - f1-score (micro avg)  0.6825





2025-05-01 15:15:19,868  - 3 epochs without improvement
2025-05-01 15:15:19,869  - 3 epochs without improvement
2025-05-01 15:15:19,870 ----------------------------------------------------------------------------------------------------
2025-05-01 15:15:21,749 epoch 7 - iter 19/199 - loss 0.13703230 - time (sec): 1.88 - samples/sec: 5096.26 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:23,553 epoch 7 - iter 38/199 - loss 0.13536909 - time (sec): 3.68 - samples/sec: 5216.45 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:25,395 epoch 7 - iter 57/199 - loss 0.13236942 - time (sec): 5.52 - samples/sec: 5193.32 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:27,219 epoch 7 - iter 76/199 - loss 0.13199673 - time (sec): 7.35 - samples/sec: 5243.09 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:29,058 epoch 7 - iter 95/199 - loss 0.13078531 - time (sec): 9.19 - samples/sec: 5225.46 - lr: 0.005000 - momentum: 0.000000
2025-05-01 15:15:31,261 epoch 7 - iter 114/199 - loss

100%|██████████| 16/16 [00:04<00:00,  3.82it/s]

2025-05-01 15:15:43,588 DEV : loss 0.11918040364980698 - f1-score (micro avg)  0.7095





2025-05-01 15:15:43,655  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0025]
2025-05-01 15:15:43,658  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00125]
2025-05-01 15:15:43,659 ----------------------------------------------------------------------------------------------------
2025-05-01 15:15:45,552 epoch 8 - iter 19/199 - loss 0.12185187 - time (sec): 1.89 - samples/sec: 4988.76 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:15:47,354 epoch 8 - iter 38/199 - loss 0.12521089 - time (sec): 3.69 - samples/sec: 5187.30 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:15:49,130 epoch 8 - iter 57/199 - loss 0.12484582 - time (sec): 5.47 - samples/sec: 5232.99 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:15:50,940 epoch 8 - iter 76/199 - loss 0.12691005 - time (sec): 7.28 - samples/sec: 5265.42 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:15:52,765 epoch 8 - iter 95/199 - loss 0.12870602 - time (sec): 9.

100%|██████████| 16/16 [00:05<00:00,  3.13it/s]

2025-05-01 15:16:08,244 DEV : loss 0.12214288860559464 - f1-score (micro avg)  0.6949





2025-05-01 15:16:08,309  - 1 epochs without improvement
2025-05-01 15:16:08,309  - 1 epochs without improvement
2025-05-01 15:16:08,311 ----------------------------------------------------------------------------------------------------
2025-05-01 15:16:10,258 epoch 9 - iter 19/199 - loss 0.13505093 - time (sec): 1.95 - samples/sec: 5076.62 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:12,076 epoch 9 - iter 38/199 - loss 0.12381372 - time (sec): 3.76 - samples/sec: 5170.44 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:13,865 epoch 9 - iter 57/199 - loss 0.12733848 - time (sec): 5.55 - samples/sec: 5231.09 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:15,596 epoch 9 - iter 76/199 - loss 0.12852504 - time (sec): 7.28 - samples/sec: 5288.35 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:17,390 epoch 9 - iter 95/199 - loss 0.12907071 - time (sec): 9.08 - samples/sec: 5290.40 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:19,707 epoch 9 - iter 114/199 - loss

100%|██████████| 16/16 [00:04<00:00,  3.76it/s]

2025-05-01 15:16:32,072 DEV : loss 0.12425435334444046 - f1-score (micro avg)  0.6842





2025-05-01 15:16:32,140  - 2 epochs without improvement
2025-05-01 15:16:32,141  - 2 epochs without improvement
2025-05-01 15:16:32,142 ----------------------------------------------------------------------------------------------------
2025-05-01 15:16:34,050 epoch 10 - iter 19/199 - loss 0.13470290 - time (sec): 1.91 - samples/sec: 4878.34 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:35,941 epoch 10 - iter 38/199 - loss 0.13299987 - time (sec): 3.80 - samples/sec: 5040.36 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:37,790 epoch 10 - iter 57/199 - loss 0.13100446 - time (sec): 5.65 - samples/sec: 5107.11 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:39,608 epoch 10 - iter 76/199 - loss 0.13320774 - time (sec): 7.46 - samples/sec: 5146.61 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:41,384 epoch 10 - iter 95/199 - loss 0.13298956 - time (sec): 9.24 - samples/sec: 5180.63 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:43,588 epoch 10 - iter 114/199 

100%|██████████| 16/16 [00:04<00:00,  3.72it/s]

2025-05-01 15:16:55,959 DEV : loss 0.12147682905197144 - f1-score (micro avg)  0.6925





2025-05-01 15:16:56,025  - 3 epochs without improvement
2025-05-01 15:16:56,026  - 3 epochs without improvement
2025-05-01 15:16:56,027 ----------------------------------------------------------------------------------------------------
2025-05-01 15:16:57,938 epoch 11 - iter 19/199 - loss 0.13520808 - time (sec): 1.91 - samples/sec: 4917.61 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:16:59,707 epoch 11 - iter 38/199 - loss 0.13685468 - time (sec): 3.68 - samples/sec: 5109.38 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:17:01,469 epoch 11 - iter 57/199 - loss 0.13576267 - time (sec): 5.44 - samples/sec: 5246.16 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:17:03,280 epoch 11 - iter 76/199 - loss 0.13398348 - time (sec): 7.25 - samples/sec: 5265.32 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:17:05,092 epoch 11 - iter 95/199 - loss 0.13394722 - time (sec): 9.06 - samples/sec: 5268.24 - lr: 0.001250 - momentum: 0.000000
2025-05-01 15:17:07,393 epoch 11 - iter 114/199 

100%|██████████| 16/16 [00:04<00:00,  3.79it/s]

2025-05-01 15:17:19,829 DEV : loss 0.12115610390901566 - f1-score (micro avg)  0.6951





2025-05-01 15:17:19,901  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.000625]
2025-05-01 15:17:19,902  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0003125]
2025-05-01 15:17:19,905 ----------------------------------------------------------------------------------------------------
2025-05-01 15:17:21,851 epoch 12 - iter 19/199 - loss 0.12959766 - time (sec): 1.94 - samples/sec: 5000.44 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:17:23,764 epoch 12 - iter 38/199 - loss 0.12758411 - time (sec): 3.86 - samples/sec: 5071.00 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:17:25,563 epoch 12 - iter 57/199 - loss 0.12650574 - time (sec): 5.66 - samples/sec: 5123.43 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:17:27,335 epoch 12 - iter 76/199 - loss 0.12926226 - time (sec): 7.43 - samples/sec: 5194.23 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:17:29,055 epoch 12 - iter 95/199 - loss 0.13039376 - time 

100%|██████████| 16/16 [00:05<00:00,  3.10it/s]

2025-05-01 15:17:44,369 DEV : loss 0.12100240588188171 - f1-score (micro avg)  0.6928





2025-05-01 15:17:44,435  - 1 epochs without improvement
2025-05-01 15:17:44,436  - 1 epochs without improvement
2025-05-01 15:17:44,437 ----------------------------------------------------------------------------------------------------
2025-05-01 15:17:46,335 epoch 13 - iter 19/199 - loss 0.12186433 - time (sec): 1.90 - samples/sec: 5026.59 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:17:48,127 epoch 13 - iter 38/199 - loss 0.12578476 - time (sec): 3.69 - samples/sec: 5149.76 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:17:49,951 epoch 13 - iter 57/199 - loss 0.12553482 - time (sec): 5.51 - samples/sec: 5216.92 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:17:51,817 epoch 13 - iter 76/199 - loss 0.12546433 - time (sec): 7.38 - samples/sec: 5223.06 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:17:53,652 epoch 13 - iter 95/199 - loss 0.12738393 - time (sec): 9.21 - samples/sec: 5212.92 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:17:55,957 epoch 13 - iter 114/199 

100%|██████████| 16/16 [00:04<00:00,  3.74it/s]

2025-05-01 15:18:08,168 DEV : loss 0.12102121114730835 - f1-score (micro avg)  0.6918





2025-05-01 15:18:08,234  - 2 epochs without improvement
2025-05-01 15:18:08,235  - 2 epochs without improvement
2025-05-01 15:18:08,236 ----------------------------------------------------------------------------------------------------
2025-05-01 15:18:10,163 epoch 14 - iter 19/199 - loss 0.12103762 - time (sec): 1.93 - samples/sec: 5040.05 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:18:11,925 epoch 14 - iter 38/199 - loss 0.12434521 - time (sec): 3.69 - samples/sec: 5164.97 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:18:13,708 epoch 14 - iter 57/199 - loss 0.12738246 - time (sec): 5.47 - samples/sec: 5220.06 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:18:15,479 epoch 14 - iter 76/199 - loss 0.12964234 - time (sec): 7.24 - samples/sec: 5220.59 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:18:17,294 epoch 14 - iter 95/199 - loss 0.12748688 - time (sec): 9.06 - samples/sec: 5251.15 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:18:19,495 epoch 14 - iter 114/199 

100%|██████████| 16/16 [00:04<00:00,  3.75it/s]


2025-05-01 15:18:31,920 DEV : loss 0.12079218775033951 - f1-score (micro avg)  0.6922
2025-05-01 15:18:31,987  - 3 epochs without improvement
2025-05-01 15:18:31,988  - 3 epochs without improvement
2025-05-01 15:18:31,989 ----------------------------------------------------------------------------------------------------
2025-05-01 15:18:33,913 epoch 15 - iter 19/199 - loss 0.14510856 - time (sec): 1.92 - samples/sec: 5015.76 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:18:35,746 epoch 15 - iter 38/199 - loss 0.13770827 - time (sec): 3.76 - samples/sec: 5155.19 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:18:37,563 epoch 15 - iter 57/199 - loss 0.13172813 - time (sec): 5.57 - samples/sec: 5203.90 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:18:39,426 epoch 15 - iter 76/199 - loss 0.13122477 - time (sec): 7.44 - samples/sec: 5189.96 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:18:41,206 epoch 15 - iter 95/199 - loss 0.13120297 - time (sec): 9.22 - samples/sec: 5224.9

100%|██████████| 16/16 [00:04<00:00,  3.87it/s]

2025-05-01 15:18:55,528 DEV : loss 0.12063528597354889 - f1-score (micro avg)  0.6948
2025-05-01 15:18:55,657  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00015625]
2025-05-01 15:18:55,659  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [7.8125e-05]
2025-05-01 15:18:55,660 ----------------------------------------------------------------------------------------------------
2025-05-01 15:18:55,661 learning rate too small - quitting training!
2025-05-01 15:18:55,662 ----------------------------------------------------------------------------------------------------
2025-05-01 15:18:55,663 Saving model ...





2025-05-01 15:19:19,115 Done.
2025-05-01 15:19:19,116 ----------------------------------------------------------------------------------------------------
2025-05-01 15:19:19,118 Loading model from best epoch ...
2025-05-01 15:19:23,670 SequenceTagger predicts: Dictionary with 9 tags: O, B-PER, I-PER, B-ORG, I-ORG, B-LOC, I-LOC, <START>, <STOP>


100%|██████████| 32/32 [00:13<00:00,  2.29it/s]

2025-05-01 15:19:38,411 
Results:
- F-score (micro) 0.719
- F-score (macro) 0.6473
- Accuracy 0.6101

By class:
              precision    recall  f1-score   support

         PER     0.8403    0.8571    0.8486      1602
         ORG     0.6119    0.4141    0.4940       792
         LOC     0.7438    0.5017    0.5992       602

   micro avg     0.7776    0.6686    0.7190      2996
   macro avg     0.7320    0.5910    0.6473      2996
weighted avg     0.7605    0.6686    0.7047      2996

2025-05-01 15:19:38,412 ----------------------------------------------------------------------------------------------------
2025-05-01 15:19:38,417 ----------------------------------------------------------------------------------------------------
2025-05-01 15:19:38,418 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'twitter'
      (embedding): Embedding(1193515, 100)
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
 


  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2025-05-01 15:19:40,100 epoch 1 - iter 19/199 - loss 0.13723003 - time (sec): 1.66 - samples/sec: 5848.20 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:19:42,136 epoch 1 - iter 38/199 - loss 0.13706699 - time (sec): 3.70 - samples/sec: 5218.31 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:19:44,125 epoch 1 - iter 57/199 - loss 0.13351545 - time (sec): 5.69 - samples/sec: 5097.30 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:19:45,952 epoch 1 - iter 76/199 - loss 0.13334308 - time (sec): 7.52 - samples/sec: 5129.06 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:19:47,738 epoch 1 - iter 95/199 - loss 0.13442780 - time (sec): 9.30 - samples/sec: 5166.95 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:19:49,542 epoch 1 - iter 114/199 - loss 0.13600481 - time (sec): 11.11 - samples/sec: 5190.75 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:19:51,360 epoch 1 - iter 133/199 - loss 0.13574969 - time (sec): 12.92 - samples/sec: 5203.53 - lr: 0.002500 - momentum: 0.000000
20

100%|██████████| 16/16 [00:08<00:00,  1.94it/s]

2025-05-01 15:20:06,441 DEV : loss 0.12117098271846771 - f1-score (micro avg)  0.7063
2025-05-01 15:20:06,556  - 0 epochs without improvement
2025-05-01 15:20:06,558  - 0 epochs without improvement
2025-05-01 15:20:06,563  - 0 epochs without improvement
2025-05-01 15:20:06,564 saving best model





2025-05-01 15:20:30,893 ----------------------------------------------------------------------------------------------------
2025-05-01 15:20:32,796 epoch 2 - iter 19/199 - loss 0.13386088 - time (sec): 1.90 - samples/sec: 5092.72 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:20:34,563 epoch 2 - iter 38/199 - loss 0.13064380 - time (sec): 3.67 - samples/sec: 5212.92 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:20:36,369 epoch 2 - iter 57/199 - loss 0.13155360 - time (sec): 5.47 - samples/sec: 5165.69 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:20:38,193 epoch 2 - iter 76/199 - loss 0.13333979 - time (sec): 7.30 - samples/sec: 5234.09 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:20:40,205 epoch 2 - iter 95/199 - loss 0.13186403 - time (sec): 9.31 - samples/sec: 5152.59 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:20:42,356 epoch 2 - iter 114/199 - loss 0.13370857 - time (sec): 11.46 - samples/sec: 5033.71 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:20:44,1

100%|██████████| 16/16 [00:04<00:00,  3.74it/s]

2025-05-01 15:20:54,560 DEV : loss 0.12482283264398575 - f1-score (micro avg)  0.6911





2025-05-01 15:20:54,625  - 1 epochs without improvement
2025-05-01 15:20:54,626  - 1 epochs without improvement
2025-05-01 15:20:54,627  - 1 epochs without improvement
2025-05-01 15:20:54,628 ----------------------------------------------------------------------------------------------------
2025-05-01 15:20:57,543 epoch 3 - iter 19/199 - loss 0.13157891 - time (sec): 2.91 - samples/sec: 3251.23 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:20:59,329 epoch 3 - iter 38/199 - loss 0.13527472 - time (sec): 4.70 - samples/sec: 4064.14 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:01,086 epoch 3 - iter 57/199 - loss 0.13377506 - time (sec): 6.46 - samples/sec: 4427.30 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:02,799 epoch 3 - iter 76/199 - loss 0.13608763 - time (sec): 8.17 - samples/sec: 4647.44 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:05,036 epoch 3 - iter 95/199 - loss 0.13413334 - time (sec): 10.41 - samples/sec: 4591.23 - lr: 0.002500 - momentum: 0.000

100%|██████████| 16/16 [00:04<00:00,  3.72it/s]

2025-05-01 15:21:19,270 DEV : loss 0.12172404676675797 - f1-score (micro avg)  0.7018





2025-05-01 15:21:19,337  - 2 epochs without improvement
2025-05-01 15:21:19,338  - 2 epochs without improvement
2025-05-01 15:21:19,339  - 2 epochs without improvement
2025-05-01 15:21:19,340 ----------------------------------------------------------------------------------------------------
2025-05-01 15:21:21,192 epoch 4 - iter 19/199 - loss 0.12738781 - time (sec): 1.85 - samples/sec: 4978.59 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:23,033 epoch 4 - iter 38/199 - loss 0.12866483 - time (sec): 3.69 - samples/sec: 5125.28 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:24,885 epoch 4 - iter 57/199 - loss 0.13051007 - time (sec): 5.54 - samples/sec: 5137.34 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:26,735 epoch 4 - iter 76/199 - loss 0.13148306 - time (sec): 7.39 - samples/sec: 5179.54 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:28,913 epoch 4 - iter 95/199 - loss 0.13081816 - time (sec): 9.57 - samples/sec: 4991.75 - lr: 0.002500 - momentum: 0.0000

100%|██████████| 16/16 [00:04<00:00,  3.78it/s]

2025-05-01 15:21:43,061 DEV : loss 0.13002142310142517 - f1-score (micro avg)  0.6717





2025-05-01 15:21:43,130  - 3 epochs without improvement
2025-05-01 15:21:43,131  - 3 epochs without improvement
2025-05-01 15:21:43,132  - 3 epochs without improvement
2025-05-01 15:21:43,133 ----------------------------------------------------------------------------------------------------
2025-05-01 15:21:45,035 epoch 5 - iter 19/199 - loss 0.12868486 - time (sec): 1.90 - samples/sec: 4994.80 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:46,820 epoch 5 - iter 38/199 - loss 0.12713831 - time (sec): 3.69 - samples/sec: 5098.66 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:48,602 epoch 5 - iter 57/199 - loss 0.12732715 - time (sec): 5.47 - samples/sec: 5169.80 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:50,360 epoch 5 - iter 76/199 - loss 0.13102872 - time (sec): 7.23 - samples/sec: 5229.68 - lr: 0.002500 - momentum: 0.000000
2025-05-01 15:21:52,293 epoch 5 - iter 95/199 - loss 0.13268633 - time (sec): 9.16 - samples/sec: 5173.40 - lr: 0.002500 - momentum: 0.0000

100%|██████████| 16/16 [00:04<00:00,  3.71it/s]

2025-05-01 15:22:06,891 DEV : loss 0.12125910073518753 - f1-score (micro avg)  0.7001





2025-05-01 15:22:06,961  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00125]
2025-05-01 15:22:06,963  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.000625]
2025-05-01 15:22:06,964  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0003125]
2025-05-01 15:22:06,965 ----------------------------------------------------------------------------------------------------
2025-05-01 15:22:08,923 epoch 6 - iter 19/199 - loss 0.13263196 - time (sec): 1.96 - samples/sec: 4995.25 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:22:10,677 epoch 6 - iter 38/199 - loss 0.13027523 - time (sec): 3.71 - samples/sec: 5154.57 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:22:12,478 epoch 6 - iter 57/199 - loss 0.13248609 - time (sec): 5.51 - samples/sec: 5173.77 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:22:14,288 epoch 6 - iter 76/199 - loss 0.13188077 - time (sec): 7.32 - samples/sec: 5180.4

100%|██████████| 16/16 [00:05<00:00,  3.04it/s]

2025-05-01 15:22:31,912 DEV : loss 0.12222783267498016 - f1-score (micro avg)  0.6968





2025-05-01 15:22:31,980  - 1 epochs without improvement
2025-05-01 15:22:31,981  - 1 epochs without improvement
2025-05-01 15:22:31,982  - 1 epochs without improvement
2025-05-01 15:22:31,983 ----------------------------------------------------------------------------------------------------
2025-05-01 15:22:33,920 epoch 7 - iter 19/199 - loss 0.13113055 - time (sec): 1.94 - samples/sec: 5070.59 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:22:35,731 epoch 7 - iter 38/199 - loss 0.12444812 - time (sec): 3.75 - samples/sec: 5181.43 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:22:37,572 epoch 7 - iter 57/199 - loss 0.12459921 - time (sec): 5.59 - samples/sec: 5227.82 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:22:39,388 epoch 7 - iter 76/199 - loss 0.12319828 - time (sec): 7.40 - samples/sec: 5235.98 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:22:41,675 epoch 7 - iter 95/199 - loss 0.12447915 - time (sec): 9.69 - samples/sec: 4965.41 - lr: 0.000313 - momentum: 0.0000

100%|██████████| 16/16 [00:04<00:00,  3.72it/s]

2025-05-01 15:22:55,634 DEV : loss 0.1219942569732666 - f1-score (micro avg)  0.6994





2025-05-01 15:22:55,701  - 2 epochs without improvement
2025-05-01 15:22:55,703  - 2 epochs without improvement
2025-05-01 15:22:55,705  - 2 epochs without improvement
2025-05-01 15:22:55,706 ----------------------------------------------------------------------------------------------------
2025-05-01 15:22:57,682 epoch 8 - iter 19/199 - loss 0.13610980 - time (sec): 1.97 - samples/sec: 4868.26 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:22:59,488 epoch 8 - iter 38/199 - loss 0.13471907 - time (sec): 3.78 - samples/sec: 5037.01 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:23:01,311 epoch 8 - iter 57/199 - loss 0.13090276 - time (sec): 5.60 - samples/sec: 5116.80 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:23:03,114 epoch 8 - iter 76/199 - loss 0.12909966 - time (sec): 7.41 - samples/sec: 5186.10 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:23:05,270 epoch 8 - iter 95/199 - loss 0.12851537 - time (sec): 9.56 - samples/sec: 5042.54 - lr: 0.000313 - momentum: 0.0000

100%|██████████| 16/16 [00:04<00:00,  3.75it/s]

2025-05-01 15:23:19,426 DEV : loss 0.12264607101678848 - f1-score (micro avg)  0.6962





2025-05-01 15:23:19,492  - 3 epochs without improvement
2025-05-01 15:23:19,494  - 3 epochs without improvement
2025-05-01 15:23:19,494  - 3 epochs without improvement
2025-05-01 15:23:19,495 ----------------------------------------------------------------------------------------------------
2025-05-01 15:23:21,429 epoch 9 - iter 19/199 - loss 0.11529728 - time (sec): 1.93 - samples/sec: 5028.36 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:23:23,258 epoch 9 - iter 38/199 - loss 0.12390930 - time (sec): 3.76 - samples/sec: 5098.89 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:23:25,037 epoch 9 - iter 57/199 - loss 0.12576081 - time (sec): 5.54 - samples/sec: 5172.95 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:23:26,920 epoch 9 - iter 76/199 - loss 0.12440047 - time (sec): 7.42 - samples/sec: 5188.78 - lr: 0.000313 - momentum: 0.000000
2025-05-01 15:23:28,947 epoch 9 - iter 95/199 - loss 0.12456493 - time (sec): 9.45 - samples/sec: 5081.28 - lr: 0.000313 - momentum: 0.0000

100%|██████████| 16/16 [00:04<00:00,  3.75it/s]

2025-05-01 15:23:43,166 DEV : loss 0.12114343047142029 - f1-score (micro avg)  0.7019





2025-05-01 15:23:43,230  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00015625]
2025-05-01 15:23:43,233  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [7.8125e-05]
2025-05-01 15:23:43,234 ----------------------------------------------------------------------------------------------------
2025-05-01 15:23:43,236 learning rate too small - quitting training!
2025-05-01 15:23:43,237 ----------------------------------------------------------------------------------------------------
2025-05-01 15:23:43,239 Saving model ...
2025-05-01 15:24:04,228 Done.
2025-05-01 15:24:04,230 ----------------------------------------------------------------------------------------------------
2025-05-01 15:24:04,231 Loading model from best epoch ...
2025-05-01 15:24:08,375 SequenceTagger predicts: Dictionary with 9 tags: O, B-PER, I-PER, B-ORG, I-ORG, B-LOC, I-LOC, <START>, <STOP>


100%|██████████| 32/32 [00:13<00:00,  2.36it/s]

2025-05-01 15:24:23,083 
Results:
- F-score (micro) 0.7105
- F-score (macro) 0.6405
- Accuracy 0.5998

By class:
              precision    recall  f1-score   support

         PER     0.8313    0.8489    0.8400      1602
         ORG     0.5863    0.4205    0.4897       792
         LOC     0.7388    0.4934    0.5916       602

   micro avg     0.7636    0.6642    0.7105      2996
   macro avg     0.7188    0.5876    0.6405      2996
weighted avg     0.7479    0.6642    0.6975      2996

2025-05-01 15:24:23,083 ----------------------------------------------------------------------------------------------------
2025-05-01 15:24:23,088 ----------------------------------------------------------------------------------------------------
2025-05-01 15:24:23,090 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'twitter'
      (embedding): Embedding(1193515, 100)
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(



  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2025-05-01 15:24:24,878 epoch 1 - iter 19/199 - loss 0.12253580 - time (sec): 1.76 - samples/sec: 5457.72 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:24:26,701 epoch 1 - iter 38/199 - loss 0.12690485 - time (sec): 3.58 - samples/sec: 5355.34 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:24:28,967 epoch 1 - iter 57/199 - loss 0.13184176 - time (sec): 5.85 - samples/sec: 4936.62 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:24:30,882 epoch 1 - iter 76/199 - loss 0.13194920 - time (sec): 7.76 - samples/sec: 4978.51 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:24:32,676 epoch 1 - iter 95/199 - loss 0.13203694 - time (sec): 9.56 - samples/sec: 5062.31 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:24:34,420 epoch 1 - iter 114/199 - loss 0.13421871 - time (sec): 11.30 - samples/sec: 5111.57 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:24:36,162 epoch 1 - iter 133/199 - loss 0.13289331 - time (sec): 13.04 - samples/sec: 5145.24 - lr: 0.001000 - momentum: 0.000000
20

100%|██████████| 16/16 [00:07<00:00,  2.12it/s]

2025-05-01 15:24:50,369 DEV : loss 0.12153401970863342 - f1-score (micro avg)  0.7104





2025-05-01 15:24:50,444  - 0 epochs without improvement
2025-05-01 15:24:50,445  - 0 epochs without improvement
2025-05-01 15:24:50,446  - 0 epochs without improvement
2025-05-01 15:24:50,449  - 0 epochs without improvement
2025-05-01 15:24:50,450 saving best model
2025-05-01 15:25:01,876 ----------------------------------------------------------------------------------------------------
2025-05-01 15:25:04,071 epoch 2 - iter 19/199 - loss 0.12186533 - time (sec): 2.19 - samples/sec: 4355.54 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:06,149 epoch 2 - iter 38/199 - loss 0.12751700 - time (sec): 4.27 - samples/sec: 4430.67 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:07,954 epoch 2 - iter 57/199 - loss 0.12916859 - time (sec): 6.08 - samples/sec: 4687.96 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:09,730 epoch 2 - iter 76/199 - loss 0.13002310 - time (sec): 7.85 - samples/sec: 4864.34 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:11,475 epoch 2 - iter 95

100%|██████████| 16/16 [00:03<00:00,  4.37it/s]

2025-05-01 15:25:25,349 DEV : loss 0.12197193503379822 - f1-score (micro avg)  0.7042





2025-05-01 15:25:25,428  - 1 epochs without improvement
2025-05-01 15:25:25,429  - 1 epochs without improvement
2025-05-01 15:25:25,430  - 1 epochs without improvement
2025-05-01 15:25:25,431  - 1 epochs without improvement
2025-05-01 15:25:25,432 ----------------------------------------------------------------------------------------------------
2025-05-01 15:25:28,758 epoch 3 - iter 19/199 - loss 0.12582272 - time (sec): 3.32 - samples/sec: 2893.38 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:30,712 epoch 3 - iter 38/199 - loss 0.12653882 - time (sec): 5.28 - samples/sec: 3615.01 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:32,576 epoch 3 - iter 57/199 - loss 0.12494306 - time (sec): 7.14 - samples/sec: 4044.65 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:34,343 epoch 3 - iter 76/199 - loss 0.12642901 - time (sec): 8.91 - samples/sec: 4307.36 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:36,091 epoch 3 - iter 95/199 - loss 0.12590765 - time (sec): 10.66

100%|██████████| 16/16 [00:03<00:00,  4.33it/s]

2025-05-01 15:25:50,020 DEV : loss 0.12209952622652054 - f1-score (micro avg)  0.7042





2025-05-01 15:25:50,085  - 2 epochs without improvement
2025-05-01 15:25:50,086  - 2 epochs without improvement
2025-05-01 15:25:50,087  - 2 epochs without improvement
2025-05-01 15:25:50,088  - 2 epochs without improvement
2025-05-01 15:25:50,089 ----------------------------------------------------------------------------------------------------
2025-05-01 15:25:52,268 epoch 4 - iter 19/199 - loss 0.14048560 - time (sec): 2.18 - samples/sec: 4309.20 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:54,397 epoch 4 - iter 38/199 - loss 0.13004818 - time (sec): 4.31 - samples/sec: 4422.49 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:56,239 epoch 4 - iter 57/199 - loss 0.13532954 - time (sec): 6.15 - samples/sec: 4646.71 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:58,073 epoch 4 - iter 76/199 - loss 0.13332298 - time (sec): 7.98 - samples/sec: 4762.68 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:25:59,968 epoch 4 - iter 95/199 - loss 0.13136889 - time (sec): 9.88 

100%|██████████| 16/16 [00:03<00:00,  4.40it/s]

2025-05-01 15:26:13,883 DEV : loss 0.12155045568943024 - f1-score (micro avg)  0.6981





2025-05-01 15:26:13,950  - 3 epochs without improvement
2025-05-01 15:26:13,951  - 3 epochs without improvement
2025-05-01 15:26:13,952  - 3 epochs without improvement
2025-05-01 15:26:13,953  - 3 epochs without improvement
2025-05-01 15:26:13,954 ----------------------------------------------------------------------------------------------------
2025-05-01 15:26:16,154 epoch 5 - iter 19/199 - loss 0.12663361 - time (sec): 2.20 - samples/sec: 4380.51 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:26:18,276 epoch 5 - iter 38/199 - loss 0.12395343 - time (sec): 4.32 - samples/sec: 4416.80 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:26:20,139 epoch 5 - iter 57/199 - loss 0.12461628 - time (sec): 6.18 - samples/sec: 4637.35 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:26:21,989 epoch 5 - iter 76/199 - loss 0.12643641 - time (sec): 8.03 - samples/sec: 4749.27 - lr: 0.001000 - momentum: 0.000000
2025-05-01 15:26:23,849 epoch 5 - iter 95/199 - loss 0.12594716 - time (sec): 9.89 

100%|██████████| 16/16 [00:03<00:00,  4.26it/s]

2025-05-01 15:26:38,461 DEV : loss 0.12236926704645157 - f1-score (micro avg)  0.7009





2025-05-01 15:26:38,531  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0005]
2025-05-01 15:26:38,532  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00025]
2025-05-01 15:26:38,533  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.000125]
2025-05-01 15:26:38,534  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [6.25e-05]
2025-05-01 15:26:38,535 ----------------------------------------------------------------------------------------------------
2025-05-01 15:26:38,536 learning rate too small - quitting training!
2025-05-01 15:26:38,537 ----------------------------------------------------------------------------------------------------
2025-05-01 15:26:38,538 Saving model ...
2025-05-01 15:26:55,929 Done.
2025-05-01 15:26:55,930 ----------------------------------------------------------------------------------------------------
2025-05-01 15:26:55,931 Load

100%|██████████| 32/32 [00:14<00:00,  2.26it/s]

2025-05-01 15:27:14,310 
Results:
- F-score (micro) 0.7132
- F-score (macro) 0.6452
- Accuracy 0.6028

By class:
              precision    recall  f1-score   support

         PER     0.8344    0.8458    0.8400      1602
         ORG     0.5978    0.4129    0.4884       792
         LOC     0.7428    0.5133    0.6071       602

   micro avg     0.7696    0.6646    0.7132      2996
   macro avg     0.7250    0.5907    0.6452      2996
weighted avg     0.7534    0.6646    0.7003      2996

2025-05-01 15:27:14,311 ----------------------------------------------------------------------------------------------------



