In [None]:
# if running in Colab run this first, otherwise make sure flair[word-embeddings] is installed
import torch
!pip install flair[word-embeddings]
import flair

Collecting flair[word-embeddings]
  Downloading flair-0.15.1-py3-none-any.whl.metadata (12 kB)
Collecting boto3>=1.20.27 (from flair[word-embeddings])
  Downloading boto3-1.38.5-py3-none-any.whl.metadata (6.6 kB)
Collecting conllu<5.0.0,>=4.0 (from flair[word-embeddings])
  Downloading conllu-4.5.3-py2.py3-none-any.whl.metadata (19 kB)
Collecting ftfy>=6.1.0 (from flair[word-embeddings])
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting langdetect>=1.0.9 (from flair[word-embeddings])
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m35.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mpld3>=0.3 (from flair[word-embeddings])
  Downloading mpld3-0.5.10-py3-none-any.whl.metadata (5.1 kB)
Collecting pptree>=3.1 (from flair[word-embeddings])
  Downloading pptree-3.1.tar.gz (3.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [2]:
import os
from pathlib import Path

from flair.data import Corpus, Sentence
from flair.datasets import ColumnCorpus
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings, CharacterEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer


if __name__ == "__main__":
    # load corpus
    columns = {0: 'text', 1: 'ner'}
    data_folder = '/home/dataset'
    corpus: Corpus = ColumnCorpus(data_folder, columns,
                                train_file='train.txt',
                                test_file='test.txt',
                                dev_file='dev.txt',
                                column_delimiter='\t')

    # extract the labels from the corpus
    label_type = 'ner'
    label_dict = corpus.make_label_dictionary(label_type=label_type, add_unk=False)

    # train model
    embeddings = FlairEmbeddings('news-forward-fast')

    embedding_types = [
        WordEmbeddings('glove'),
        FlairEmbeddings('news-forward'),
        FlairEmbeddings('news-backward')
    ]

    embeddings = StackedEmbeddings(embeddings=embedding_types)

    tagger = SequenceTagger(hidden_size=256,
                            embeddings=embeddings,
                            tag_dictionary=label_dict,
                            tag_type=label_type,
                            tag_format="BIO")

    trainer = ModelTrainer(tagger, corpus)

    trainer.train('/home/model',
                  learning_rate=0.05,
                  mini_batch_size=32,
                  max_epochs=25)


2025-04-29 21:28:25,777 Reading data from /home/dataset
2025-04-29 21:28:25,779 Train: /home/dataset/train.txt
2025-04-29 21:28:25,779 Dev: /home/dataset/dev.txt
2025-04-29 21:28:25,780 Test: /home/dataset/test.txt
2025-04-29 21:28:27,550 Computing label dictionary. Progress:


4it [00:00, 6428.05it/s]
6338it [00:00, 44566.02it/s]

2025-04-29 21:28:27,702 Dictionary created for label 'ner' with 3 values: PER (seen 3101 times), ORG (seen 2267 times), LOC (seen 1996 times)





2025-04-29 21:28:32,464 SequenceTagger predicts: Dictionary with 7 tags: O, B-PER, I-PER, B-ORG, I-ORG, B-LOC, I-LOC
2025-04-29 21:28:32,750 ----------------------------------------------------------------------------------------------------
2025-04-29 21:28:32,751 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'glove'
      (embedding): Embedding(400001, 100)
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn): LSTM(100, 2048)
      )
    )
    (list_embedding_2): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn): LSTM(100, 2048)
      )
    )
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=4196, out_features=4196, bias=True)
  (rnn): LSTM(4196,

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2025-04-29 21:28:37,762 epoch 1 - iter 19/199 - loss 0.74520324 - time (sec): 4.99 - samples/sec: 1920.14 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:28:43,008 epoch 1 - iter 38/199 - loss 0.58468204 - time (sec): 10.23 - samples/sec: 1861.55 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:28:50,906 epoch 1 - iter 57/199 - loss 0.51655225 - time (sec): 18.13 - samples/sec: 1599.80 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:28:56,379 epoch 1 - iter 76/199 - loss 0.47114497 - time (sec): 23.60 - samples/sec: 1638.25 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:29:01,242 epoch 1 - iter 95/199 - loss 0.43729659 - time (sec): 28.47 - samples/sec: 1699.71 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:29:06,303 epoch 1 - iter 114/199 - loss 0.41154454 - time (sec): 33.53 - samples/sec: 1732.01 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:29:11,210 epoch 1 - iter 133/199 - loss 0.39274511 - time (sec): 38.43 - samples/sec: 1761.02 - lr: 0.050000 - momentum: 0.00000

100%|██████████| 16/16 [00:07<00:00,  2.28it/s]

2025-04-29 21:29:36,398 DEV : loss 0.2040068358182907 - f1-score (micro avg)  0.5631
2025-04-29 21:29:36,458  - 0 epochs without improvement
2025-04-29 21:29:36,459 saving best model





2025-04-29 21:29:41,655 ----------------------------------------------------------------------------------------------------
2025-04-29 21:29:43,589 epoch 2 - iter 19/199 - loss 0.22503790 - time (sec): 1.93 - samples/sec: 4857.37 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:29:45,478 epoch 2 - iter 38/199 - loss 0.22038894 - time (sec): 3.82 - samples/sec: 5030.73 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:29:47,288 epoch 2 - iter 57/199 - loss 0.21685497 - time (sec): 5.63 - samples/sec: 5108.60 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:29:49,043 epoch 2 - iter 76/199 - loss 0.21338605 - time (sec): 7.39 - samples/sec: 5177.07 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:29:50,805 epoch 2 - iter 95/199 - loss 0.21191249 - time (sec): 9.15 - samples/sec: 5216.08 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:29:52,953 epoch 2 - iter 114/199 - loss 0.21053970 - time (sec): 11.30 - samples/sec: 5079.90 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:29:54,9

100%|██████████| 16/16 [00:04<00:00,  3.32it/s]

2025-04-29 21:30:06,001 DEV : loss 0.14997170865535736 - f1-score (micro avg)  0.667





2025-04-29 21:30:06,064  - 0 epochs without improvement
2025-04-29 21:30:06,065 saving best model
2025-04-29 21:30:07,439 ----------------------------------------------------------------------------------------------------
2025-04-29 21:30:09,453 epoch 3 - iter 19/199 - loss 0.18461194 - time (sec): 2.01 - samples/sec: 4862.25 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:11,269 epoch 3 - iter 38/199 - loss 0.17734795 - time (sec): 3.83 - samples/sec: 5047.39 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:13,047 epoch 3 - iter 57/199 - loss 0.16820113 - time (sec): 5.61 - samples/sec: 5136.72 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:14,740 epoch 3 - iter 76/199 - loss 0.17193677 - time (sec): 7.30 - samples/sec: 5211.98 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:16,891 epoch 3 - iter 95/199 - loss 0.17094010 - time (sec): 9.45 - samples/sec: 5044.86 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:18,923 epoch 3 - iter 114/199 - loss 0.17110601 - 

100%|██████████| 16/16 [00:04<00:00,  3.79it/s]

2025-04-29 21:30:31,136 DEV : loss 0.14142167568206787 - f1-score (micro avg)  0.6586





2025-04-29 21:30:31,199  - 1 epochs without improvement
2025-04-29 21:30:31,200 ----------------------------------------------------------------------------------------------------
2025-04-29 21:30:33,059 epoch 4 - iter 19/199 - loss 0.14326565 - time (sec): 1.86 - samples/sec: 5034.74 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:34,883 epoch 4 - iter 38/199 - loss 0.15509221 - time (sec): 3.68 - samples/sec: 5178.42 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:36,628 epoch 4 - iter 57/199 - loss 0.14961443 - time (sec): 5.43 - samples/sec: 5276.35 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:38,451 epoch 4 - iter 76/199 - loss 0.14871500 - time (sec): 7.25 - samples/sec: 5285.34 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:40,379 epoch 4 - iter 95/199 - loss 0.15090824 - time (sec): 9.18 - samples/sec: 5206.33 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:30:42,481 epoch 4 - iter 114/199 - loss 0.15054868 - time (sec): 11.28 - samples/sec: 5068.77 -

100%|██████████| 16/16 [00:06<00:00,  2.55it/s]

2025-04-29 21:30:57,123 DEV : loss 0.12427879869937897 - f1-score (micro avg)  0.7011





2025-04-29 21:30:57,184  - 0 epochs without improvement
2025-04-29 21:30:57,185 saving best model
2025-04-29 21:30:58,728 ----------------------------------------------------------------------------------------------------
2025-04-29 21:31:00,719 epoch 5 - iter 19/199 - loss 0.13061804 - time (sec): 1.99 - samples/sec: 4894.96 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:02,599 epoch 5 - iter 38/199 - loss 0.14014809 - time (sec): 3.87 - samples/sec: 4978.94 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:04,544 epoch 5 - iter 57/199 - loss 0.14585441 - time (sec): 5.81 - samples/sec: 5034.85 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:06,749 epoch 5 - iter 76/199 - loss 0.14387569 - time (sec): 8.02 - samples/sec: 4824.64 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:08,615 epoch 5 - iter 95/199 - loss 0.14322406 - time (sec): 9.89 - samples/sec: 4894.94 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:10,540 epoch 5 - iter 114/199 - loss 0.14243363 - 

100%|██████████| 16/16 [00:04<00:00,  3.66it/s]

2025-04-29 21:31:23,594 DEV : loss 0.11951862275600433 - f1-score (micro avg)  0.7018





2025-04-29 21:31:23,670  - 0 epochs without improvement
2025-04-29 21:31:23,671 saving best model
2025-04-29 21:31:25,016 ----------------------------------------------------------------------------------------------------
2025-04-29 21:31:27,046 epoch 6 - iter 19/199 - loss 0.14191400 - time (sec): 2.03 - samples/sec: 4710.84 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:29,131 epoch 6 - iter 38/199 - loss 0.13773819 - time (sec): 4.11 - samples/sec: 4611.16 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:31,325 epoch 6 - iter 57/199 - loss 0.13929896 - time (sec): 6.31 - samples/sec: 4540.76 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:33,167 epoch 6 - iter 76/199 - loss 0.13700264 - time (sec): 8.15 - samples/sec: 4742.61 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:35,076 epoch 6 - iter 95/199 - loss 0.13628819 - time (sec): 10.06 - samples/sec: 4812.13 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:31:37,089 epoch 6 - iter 114/199 - loss 0.13665050 -

100%|██████████| 16/16 [00:03<00:00,  4.37it/s]

2025-04-29 21:31:49,946 DEV : loss 0.12049930542707443 - f1-score (micro avg)  0.7087





2025-04-29 21:31:50,022  - 0 epochs without improvement
2025-04-29 21:31:50,024 saving best model
2025-04-29 21:31:56,947 ----------------------------------------------------------------------------------------------------
2025-04-29 21:31:58,915 epoch 7 - iter 19/199 - loss 0.12966341 - time (sec): 1.97 - samples/sec: 4944.09 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:00,685 epoch 7 - iter 38/199 - loss 0.13572465 - time (sec): 3.74 - samples/sec: 5201.65 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:02,433 epoch 7 - iter 57/199 - loss 0.13184281 - time (sec): 5.48 - samples/sec: 5269.24 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:04,406 epoch 7 - iter 76/199 - loss 0.13313410 - time (sec): 7.46 - samples/sec: 5124.99 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:06,579 epoch 7 - iter 95/199 - loss 0.13237823 - time (sec): 9.63 - samples/sec: 4949.03 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:08,696 epoch 7 - iter 114/199 - loss 0.13060786 - 

100%|██████████| 16/16 [00:03<00:00,  4.19it/s]

2025-04-29 21:32:21,248 DEV : loss 0.11126990616321564 - f1-score (micro avg)  0.7106





2025-04-29 21:32:21,309  - 0 epochs without improvement
2025-04-29 21:32:21,310 saving best model
2025-04-29 21:32:27,781 ----------------------------------------------------------------------------------------------------
2025-04-29 21:32:30,264 epoch 8 - iter 19/199 - loss 0.12265531 - time (sec): 2.48 - samples/sec: 3806.48 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:32,171 epoch 8 - iter 38/199 - loss 0.11877405 - time (sec): 4.39 - samples/sec: 4364.26 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:33,904 epoch 8 - iter 57/199 - loss 0.11898844 - time (sec): 6.12 - samples/sec: 4652.06 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:35,718 epoch 8 - iter 76/199 - loss 0.12112891 - time (sec): 7.93 - samples/sec: 4808.26 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:37,522 epoch 8 - iter 95/199 - loss 0.12147113 - time (sec): 9.74 - samples/sec: 4914.40 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:39,378 epoch 8 - iter 114/199 - loss 0.12021978 - 

100%|██████████| 16/16 [00:04<00:00,  3.63it/s]

2025-04-29 21:32:52,319 DEV : loss 0.10542764514684677 - f1-score (micro avg)  0.7104
2025-04-29 21:32:52,423  - 1 epochs without improvement
2025-04-29 21:32:52,426 ----------------------------------------------------------------------------------------------------





2025-04-29 21:32:54,776 epoch 9 - iter 19/199 - loss 0.13471035 - time (sec): 2.34 - samples/sec: 4114.00 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:56,712 epoch 9 - iter 38/199 - loss 0.12691850 - time (sec): 4.28 - samples/sec: 4474.49 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:32:58,566 epoch 9 - iter 57/199 - loss 0.12232281 - time (sec): 6.13 - samples/sec: 4725.75 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:00,365 epoch 9 - iter 76/199 - loss 0.12199253 - time (sec): 7.93 - samples/sec: 4846.70 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:02,142 epoch 9 - iter 95/199 - loss 0.11937272 - time (sec): 9.71 - samples/sec: 4946.95 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:03,921 epoch 9 - iter 114/199 - loss 0.11933291 - time (sec): 11.49 - samples/sec: 5015.13 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:06,136 epoch 9 - iter 133/199 - loss 0.12040378 - time (sec): 13.70 - samples/sec: 4879.67 - lr: 0.050000 - momentum: 0.000000
20

100%|██████████| 16/16 [00:03<00:00,  4.21it/s]

2025-04-29 21:33:16,423 DEV : loss 0.12664693593978882 - f1-score (micro avg)  0.6452
2025-04-29 21:33:16,535  - 2 epochs without improvement
2025-04-29 21:33:16,538 ----------------------------------------------------------------------------------------------------





2025-04-29 21:33:18,981 epoch 10 - iter 19/199 - loss 0.11904784 - time (sec): 2.44 - samples/sec: 3950.91 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:20,896 epoch 10 - iter 38/199 - loss 0.11563415 - time (sec): 4.36 - samples/sec: 4483.11 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:22,775 epoch 10 - iter 57/199 - loss 0.11723042 - time (sec): 6.24 - samples/sec: 4696.36 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:24,574 epoch 10 - iter 76/199 - loss 0.11942302 - time (sec): 8.03 - samples/sec: 4784.95 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:26,386 epoch 10 - iter 95/199 - loss 0.11651802 - time (sec): 9.85 - samples/sec: 4874.19 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:28,167 epoch 10 - iter 114/199 - loss 0.11811432 - time (sec): 11.63 - samples/sec: 4951.33 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:30,413 epoch 10 - iter 133/199 - loss 0.11604651 - time (sec): 13.87 - samples/sec: 4835.78 - lr: 0.050000 - momentum: 0.00

100%|██████████| 16/16 [00:03<00:00,  4.39it/s]

2025-04-29 21:33:40,359 DEV : loss 0.10616296529769897 - f1-score (micro avg)  0.7094
2025-04-29 21:33:40,475  - 3 epochs without improvement
2025-04-29 21:33:40,479 ----------------------------------------------------------------------------------------------------





2025-04-29 21:33:42,909 epoch 11 - iter 19/199 - loss 0.11092961 - time (sec): 2.43 - samples/sec: 3956.98 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:44,806 epoch 11 - iter 38/199 - loss 0.10994175 - time (sec): 4.33 - samples/sec: 4404.85 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:46,710 epoch 11 - iter 57/199 - loss 0.10987695 - time (sec): 6.23 - samples/sec: 4614.91 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:48,563 epoch 11 - iter 76/199 - loss 0.11098550 - time (sec): 8.08 - samples/sec: 4760.19 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:50,467 epoch 11 - iter 95/199 - loss 0.11036714 - time (sec): 9.99 - samples/sec: 4824.34 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:52,468 epoch 11 - iter 114/199 - loss 0.11076904 - time (sec): 11.99 - samples/sec: 4825.11 - lr: 0.050000 - momentum: 0.000000
2025-04-29 21:33:54,742 epoch 11 - iter 133/199 - loss 0.11146885 - time (sec): 14.26 - samples/sec: 4720.99 - lr: 0.050000 - momentum: 0.00

100%|██████████| 16/16 [00:04<00:00,  3.29it/s]

2025-04-29 21:34:05,943 DEV : loss 0.11135422438383102 - f1-score (micro avg)  0.6722
2025-04-29 21:34:06,064  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.025]
2025-04-29 21:34:06,068 ----------------------------------------------------------------------------------------------------





2025-04-29 21:34:08,179 epoch 12 - iter 19/199 - loss 0.11252221 - time (sec): 2.11 - samples/sec: 4490.73 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:10,057 epoch 12 - iter 38/199 - loss 0.10835506 - time (sec): 3.99 - samples/sec: 4731.98 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:11,907 epoch 12 - iter 57/199 - loss 0.10648966 - time (sec): 5.84 - samples/sec: 4898.22 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:13,702 epoch 12 - iter 76/199 - loss 0.10858401 - time (sec): 7.63 - samples/sec: 5007.94 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:15,435 epoch 12 - iter 95/199 - loss 0.10954134 - time (sec): 9.37 - samples/sec: 5083.76 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:17,550 epoch 12 - iter 114/199 - loss 0.10720951 - time (sec): 11.48 - samples/sec: 4977.79 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:19,700 epoch 12 - iter 133/199 - loss 0.10701409 - time (sec): 13.63 - samples/sec: 4919.84 - lr: 0.025000 - momentum: 0.00

100%|██████████| 16/16 [00:04<00:00,  3.98it/s]

2025-04-29 21:34:29,982 DEV : loss 0.1156812235713005 - f1-score (micro avg)  0.6417
2025-04-29 21:34:30,096  - 1 epochs without improvement
2025-04-29 21:34:30,100 ----------------------------------------------------------------------------------------------------





2025-04-29 21:34:32,249 epoch 13 - iter 19/199 - loss 0.10688716 - time (sec): 2.15 - samples/sec: 4509.36 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:33,997 epoch 13 - iter 38/199 - loss 0.10209617 - time (sec): 3.89 - samples/sec: 4886.51 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:35,781 epoch 13 - iter 57/199 - loss 0.10016767 - time (sec): 5.68 - samples/sec: 5066.20 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:37,620 epoch 13 - iter 76/199 - loss 0.10186993 - time (sec): 7.52 - samples/sec: 5117.47 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:39,413 epoch 13 - iter 95/199 - loss 0.10329858 - time (sec): 9.31 - samples/sec: 5173.81 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:41,424 epoch 13 - iter 114/199 - loss 0.10448679 - time (sec): 11.32 - samples/sec: 5095.42 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:43,626 epoch 13 - iter 133/199 - loss 0.10499578 - time (sec): 13.52 - samples/sec: 4956.26 - lr: 0.025000 - momentum: 0.00

100%|██████████| 16/16 [00:03<00:00,  4.09it/s]


2025-04-29 21:34:53,861 DEV : loss 0.11211037635803223 - f1-score (micro avg)  0.6641
2025-04-29 21:34:53,965  - 2 epochs without improvement
2025-04-29 21:34:53,968 ----------------------------------------------------------------------------------------------------
2025-04-29 21:34:56,165 epoch 14 - iter 19/199 - loss 0.11202638 - time (sec): 2.19 - samples/sec: 4343.72 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:57,979 epoch 14 - iter 38/199 - loss 0.10550374 - time (sec): 4.01 - samples/sec: 4790.03 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:34:59,824 epoch 14 - iter 57/199 - loss 0.10885327 - time (sec): 5.85 - samples/sec: 4948.16 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:35:01,631 epoch 14 - iter 76/199 - loss 0.10747239 - time (sec): 7.66 - samples/sec: 5054.84 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:35:03,453 epoch 14 - iter 95/199 - loss 0.10658049 - time (sec): 9.48 - samples/sec: 5098.80 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:35:0

100%|██████████| 16/16 [00:05<00:00,  3.19it/s]

2025-04-29 21:35:18,762 DEV : loss 0.10428605228662491 - f1-score (micro avg)  0.7013
2025-04-29 21:35:18,873  - 3 epochs without improvement
2025-04-29 21:35:18,877 ----------------------------------------------------------------------------------------------------





2025-04-29 21:35:20,815 epoch 15 - iter 19/199 - loss 0.10348306 - time (sec): 1.94 - samples/sec: 4798.00 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:35:22,680 epoch 15 - iter 38/199 - loss 0.10123742 - time (sec): 3.80 - samples/sec: 5010.35 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:35:24,463 epoch 15 - iter 57/199 - loss 0.09961154 - time (sec): 5.58 - samples/sec: 5141.10 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:35:26,180 epoch 15 - iter 76/199 - loss 0.10251497 - time (sec): 7.30 - samples/sec: 5227.53 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:35:27,983 epoch 15 - iter 95/199 - loss 0.10310004 - time (sec): 9.10 - samples/sec: 5238.23 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:35:29,970 epoch 15 - iter 114/199 - loss 0.10318410 - time (sec): 11.09 - samples/sec: 5170.10 - lr: 0.025000 - momentum: 0.000000
2025-04-29 21:35:32,213 epoch 15 - iter 133/199 - loss 0.10253864 - time (sec): 13.33 - samples/sec: 5016.35 - lr: 0.025000 - momentum: 0.00

100%|██████████| 16/16 [00:03<00:00,  4.10it/s]


2025-04-29 21:35:42,288 DEV : loss 0.1175757572054863 - f1-score (micro avg)  0.6866
2025-04-29 21:35:42,385  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0125]
2025-04-29 21:35:42,388 ----------------------------------------------------------------------------------------------------
2025-04-29 21:35:44,543 epoch 16 - iter 19/199 - loss 0.09653198 - time (sec): 2.15 - samples/sec: 4458.33 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:35:46,303 epoch 16 - iter 38/199 - loss 0.10407271 - time (sec): 3.91 - samples/sec: 4837.56 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:35:48,018 epoch 16 - iter 57/199 - loss 0.10220262 - time (sec): 5.63 - samples/sec: 5060.33 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:35:49,861 epoch 16 - iter 76/199 - loss 0.09926255 - time (sec): 7.47 - samples/sec: 5119.99 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:35:51,634 epoch 16 - iter 95/199 - loss 0.09854838 - time (sec): 9.25 - samples/sec: 5181.4

100%|██████████| 16/16 [00:03<00:00,  4.29it/s]

2025-04-29 21:36:05,701 DEV : loss 0.1059340313076973 - f1-score (micro avg)  0.7123
2025-04-29 21:36:05,808  - 0 epochs without improvement
2025-04-29 21:36:05,811 saving best model





2025-04-29 21:36:08,420 ----------------------------------------------------------------------------------------------------
2025-04-29 21:36:10,371 epoch 17 - iter 19/199 - loss 0.09081830 - time (sec): 1.95 - samples/sec: 4803.53 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:12,227 epoch 17 - iter 38/199 - loss 0.09208455 - time (sec): 3.80 - samples/sec: 4961.98 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:14,031 epoch 17 - iter 57/199 - loss 0.09548158 - time (sec): 5.61 - samples/sec: 5049.92 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:15,785 epoch 17 - iter 76/199 - loss 0.09779608 - time (sec): 7.36 - samples/sec: 5134.18 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:17,641 epoch 17 - iter 95/199 - loss 0.09813457 - time (sec): 9.22 - samples/sec: 5125.81 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:19,965 epoch 17 - iter 114/199 - loss 0.09816064 - time (sec): 11.54 - samples/sec: 4937.27 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:3

100%|██████████| 16/16 [00:04<00:00,  3.24it/s]

2025-04-29 21:36:33,141 DEV : loss 0.11533748358488083 - f1-score (micro avg)  0.6634





2025-04-29 21:36:33,223  - 1 epochs without improvement
2025-04-29 21:36:33,224 ----------------------------------------------------------------------------------------------------
2025-04-29 21:36:35,197 epoch 18 - iter 19/199 - loss 0.09033833 - time (sec): 1.97 - samples/sec: 4953.23 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:37,024 epoch 18 - iter 38/199 - loss 0.09216936 - time (sec): 3.80 - samples/sec: 5069.23 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:38,830 epoch 18 - iter 57/199 - loss 0.09678208 - time (sec): 5.60 - samples/sec: 5083.00 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:40,722 epoch 18 - iter 76/199 - loss 0.09471698 - time (sec): 7.50 - samples/sec: 5103.92 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:42,897 epoch 18 - iter 95/199 - loss 0.09580852 - time (sec): 9.67 - samples/sec: 4948.05 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:36:45,077 epoch 18 - iter 114/199 - loss 0.09528933 - time (sec): 11.85 - samples/sec: 486

100%|██████████| 16/16 [00:04<00:00,  3.80it/s]

2025-04-29 21:36:57,315 DEV : loss 0.10491728037595749 - f1-score (micro avg)  0.69





2025-04-29 21:36:57,379  - 2 epochs without improvement
2025-04-29 21:36:57,380 ----------------------------------------------------------------------------------------------------
2025-04-29 21:36:59,327 epoch 19 - iter 19/199 - loss 0.08403495 - time (sec): 1.95 - samples/sec: 4866.55 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:01,190 epoch 19 - iter 38/199 - loss 0.08647315 - time (sec): 3.81 - samples/sec: 4999.96 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:03,041 epoch 19 - iter 57/199 - loss 0.09343561 - time (sec): 5.66 - samples/sec: 5076.18 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:04,864 epoch 19 - iter 76/199 - loss 0.09532400 - time (sec): 7.48 - samples/sec: 5118.82 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:06,951 epoch 19 - iter 95/199 - loss 0.09473760 - time (sec): 9.57 - samples/sec: 5024.95 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:09,017 epoch 19 - iter 114/199 - loss 0.09727567 - time (sec): 11.64 - samples/sec: 494

100%|██████████| 16/16 [00:04<00:00,  3.79it/s]

2025-04-29 21:37:21,252 DEV : loss 0.10673127323389053 - f1-score (micro avg)  0.6883





2025-04-29 21:37:21,316  - 3 epochs without improvement
2025-04-29 21:37:21,317 ----------------------------------------------------------------------------------------------------
2025-04-29 21:37:23,213 epoch 20 - iter 19/199 - loss 0.09493021 - time (sec): 1.90 - samples/sec: 5127.34 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:25,056 epoch 20 - iter 38/199 - loss 0.09635107 - time (sec): 3.74 - samples/sec: 5210.18 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:26,805 epoch 20 - iter 57/199 - loss 0.09546454 - time (sec): 5.49 - samples/sec: 5336.24 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:28,607 epoch 20 - iter 76/199 - loss 0.09690662 - time (sec): 7.29 - samples/sec: 5324.66 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:30,565 epoch 20 - iter 95/199 - loss 0.09511365 - time (sec): 9.25 - samples/sec: 5198.39 - lr: 0.012500 - momentum: 0.000000
2025-04-29 21:37:32,738 epoch 20 - iter 114/199 - loss 0.09488838 - time (sec): 11.42 - samples/sec: 504

100%|██████████| 16/16 [00:04<00:00,  3.24it/s]

2025-04-29 21:37:45,613 DEV : loss 0.10848555713891983 - f1-score (micro avg)  0.6853





2025-04-29 21:37:45,691  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00625]
2025-04-29 21:37:45,693 ----------------------------------------------------------------------------------------------------
2025-04-29 21:37:47,565 epoch 21 - iter 19/199 - loss 0.09778334 - time (sec): 1.87 - samples/sec: 5043.47 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:37:49,322 epoch 21 - iter 38/199 - loss 0.09843306 - time (sec): 3.63 - samples/sec: 5233.05 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:37:51,124 epoch 21 - iter 57/199 - loss 0.09587674 - time (sec): 5.43 - samples/sec: 5246.58 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:37:52,866 epoch 21 - iter 76/199 - loss 0.09557397 - time (sec): 7.17 - samples/sec: 5282.51 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:37:54,876 epoch 21 - iter 95/199 - loss 0.09805032 - time (sec): 9.18 - samples/sec: 5219.50 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:37:56,951 epoch 21 - iter 114/19

100%|██████████| 16/16 [00:04<00:00,  3.81it/s]

2025-04-29 21:38:09,149 DEV : loss 0.10669784992933273 - f1-score (micro avg)  0.689





2025-04-29 21:38:09,210  - 1 epochs without improvement
2025-04-29 21:38:09,212 ----------------------------------------------------------------------------------------------------
2025-04-29 21:38:11,146 epoch 22 - iter 19/199 - loss 0.09197768 - time (sec): 1.93 - samples/sec: 5056.02 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:13,008 epoch 22 - iter 38/199 - loss 0.09368230 - time (sec): 3.79 - samples/sec: 5128.16 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:14,728 epoch 22 - iter 57/199 - loss 0.09502320 - time (sec): 5.51 - samples/sec: 5243.27 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:16,502 epoch 22 - iter 76/199 - loss 0.09589235 - time (sec): 7.29 - samples/sec: 5264.04 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:18,379 epoch 22 - iter 95/199 - loss 0.09455090 - time (sec): 9.16 - samples/sec: 5216.43 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:20,578 epoch 22 - iter 114/199 - loss 0.09500564 - time (sec): 11.36 - samples/sec: 504

100%|██████████| 16/16 [00:04<00:00,  3.79it/s]

2025-04-29 21:38:32,838 DEV : loss 0.10549450665712357 - f1-score (micro avg)  0.6941





2025-04-29 21:38:32,900  - 2 epochs without improvement
2025-04-29 21:38:32,901 ----------------------------------------------------------------------------------------------------
2025-04-29 21:38:34,891 epoch 23 - iter 19/199 - loss 0.08798550 - time (sec): 1.99 - samples/sec: 4898.76 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:36,689 epoch 23 - iter 38/199 - loss 0.09456343 - time (sec): 3.79 - samples/sec: 5104.41 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:38,463 epoch 23 - iter 57/199 - loss 0.09227338 - time (sec): 5.56 - samples/sec: 5258.10 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:40,254 epoch 23 - iter 76/199 - loss 0.09148614 - time (sec): 7.35 - samples/sec: 5241.40 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:42,114 epoch 23 - iter 95/199 - loss 0.09109307 - time (sec): 9.21 - samples/sec: 5227.33 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:38:44,378 epoch 23 - iter 114/199 - loss 0.09254390 - time (sec): 11.48 - samples/sec: 505

100%|██████████| 16/16 [00:04<00:00,  3.29it/s]

2025-04-29 21:38:57,146 DEV : loss 0.10584674775600433 - f1-score (micro avg)  0.6844





2025-04-29 21:38:57,208  - 3 epochs without improvement
2025-04-29 21:38:57,209 ----------------------------------------------------------------------------------------------------
2025-04-29 21:38:59,177 epoch 24 - iter 19/199 - loss 0.10001950 - time (sec): 1.97 - samples/sec: 5104.33 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:39:01,016 epoch 24 - iter 38/199 - loss 0.09852757 - time (sec): 3.81 - samples/sec: 5124.90 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:39:02,912 epoch 24 - iter 57/199 - loss 0.09848982 - time (sec): 5.70 - samples/sec: 5103.04 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:39:04,698 epoch 24 - iter 76/199 - loss 0.09495485 - time (sec): 7.49 - samples/sec: 5176.16 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:39:06,648 epoch 24 - iter 95/199 - loss 0.09477328 - time (sec): 9.44 - samples/sec: 5119.42 - lr: 0.006250 - momentum: 0.000000
2025-04-29 21:39:08,783 epoch 24 - iter 114/199 - loss 0.09273926 - time (sec): 11.57 - samples/sec: 499

100%|██████████| 16/16 [00:04<00:00,  3.82it/s]

2025-04-29 21:39:20,974 DEV : loss 0.10416173189878464 - f1-score (micro avg)  0.6931





2025-04-29 21:39:21,058  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.003125]
2025-04-29 21:39:21,062 ----------------------------------------------------------------------------------------------------
2025-04-29 21:39:22,901 epoch 25 - iter 19/199 - loss 0.09778538 - time (sec): 1.84 - samples/sec: 5048.38 - lr: 0.003125 - momentum: 0.000000
2025-04-29 21:39:24,657 epoch 25 - iter 38/199 - loss 0.09315055 - time (sec): 3.59 - samples/sec: 5265.54 - lr: 0.003125 - momentum: 0.000000
2025-04-29 21:39:26,381 epoch 25 - iter 57/199 - loss 0.09308489 - time (sec): 5.32 - samples/sec: 5378.05 - lr: 0.003125 - momentum: 0.000000
2025-04-29 21:39:28,131 epoch 25 - iter 76/199 - loss 0.09323335 - time (sec): 7.07 - samples/sec: 5412.87 - lr: 0.003125 - momentum: 0.000000
2025-04-29 21:39:29,944 epoch 25 - iter 95/199 - loss 0.09117204 - time (sec): 8.88 - samples/sec: 5378.62 - lr: 0.003125 - momentum: 0.000000
2025-04-29 21:39:32,251 epoch 25 - iter 114/1

100%|██████████| 16/16 [00:04<00:00,  3.86it/s]

2025-04-29 21:39:44,351 DEV : loss 0.10540113598108292 - f1-score (micro avg)  0.6932





2025-04-29 21:39:44,425  - 1 epochs without improvement
2025-04-29 21:39:46,138 ----------------------------------------------------------------------------------------------------
2025-04-29 21:39:46,140 Loading model from best epoch ...
2025-04-29 21:39:47,614 SequenceTagger predicts: Dictionary with 9 tags: O, B-PER, I-PER, B-ORG, I-ORG, B-LOC, I-LOC, <START>, <STOP>


100%|██████████| 32/32 [00:12<00:00,  2.64it/s]


2025-04-29 21:40:00,140 
Results:
- F-score (micro) 0.7216
- F-score (macro) 0.6719
- Accuracy 0.6063

By class:
              precision    recall  f1-score   support

         PER     0.8304    0.8315    0.8309      1602
         ORG     0.5786    0.4785    0.5238       792
         LOC     0.7630    0.5831    0.6610       602

   micro avg     0.7584    0.6883    0.7216      2996
   macro avg     0.7240    0.6310    0.6719      2996
weighted avg     0.7503    0.6883    0.7156      2996

2025-04-29 21:40:00,140 ----------------------------------------------------------------------------------------------------
