In [1]:
import sys
from pathlib import Path

In [2]:
module_path = Path.cwd().parent.parent
if module_path not in sys.path:
    sys.path.append(str(module_path))

In [3]:
from flair.datasets import ColumnCorpus
from flair.data import Corpus, Sentence
from flair.embeddings import WordEmbeddings, StackedEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from src.loader import TextLoader

In [5]:
loader = TextLoader()

In [6]:
columns = {0: "text", 1: "ner"}
corpus: Corpus = ColumnCorpus(
    "data/",
    columns,
    train_file="NER_train.txt",
    dev_file="NER_dev.txt",
    test_file="NER_test.txt",
    column_delimiter=" ",
    document_separator_token="<DOC>",
)
corpus.filter_empty_sentences()


2022-12-03 15:15:09,878 Reading data from data
2022-12-03 15:15:09,878 Train: data/NER_train.txt
2022-12-03 15:15:09,879 Dev: data/NER_dev.txt
2022-12-03 15:15:09,879 Test: data/NER_test.txt
2022-12-03 15:15:47,173 Filtering empty sentences
2022-12-03 15:15:49,734 Corpus: 250135 train + 46666 dev + 15696 test sentences


In [7]:
label_dict = corpus.make_label_dictionary(label_type='ner')

2022-12-03 15:15:49,755 Computing label dictionary. Progress:


250135it [00:02, 94395.77it/s]

2022-12-03 15:15:52,408 Dictionary created for label 'ner' with 3 values: DE (seen 115092 times), DS (seen 114349 times)





In [8]:
embedding_types = [
    WordEmbeddings('glove'),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

In [9]:
tagger = SequenceTagger(
    hidden_size=256,
    embeddings=embeddings,
    tag_dictionary=label_dict,
    tag_type="ner",
    use_crf=True,
)


2022-12-03 15:15:54,240 SequenceTagger predicts: Dictionary with 3 tags: <unk>, DE, DS


In [10]:
trainer = ModelTrainer(tagger, corpus)

In [11]:
trainer.train(
    'models/',
    learning_rate=0.1,
    mini_batch_size=32,
    max_epochs=20,
    patience=4,
    embeddings_storage_mode='gpu',
    checkpoint=True,
)


2022-12-03 15:15:55,935 ----------------------------------------------------------------------------------------------------
2022-12-03 15:15:55,936 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'glove'
      (embedding): Embedding(400001, 100)
    )
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=100, out_features=100, bias=True)
  (rnn): LSTM(100, 256, batch_first=True, bidirectional=True)
  (linear): Linear(in_features=512, out_features=5, bias=True)
  (loss_function): ViterbiLoss()
  (crf): CRF()
)"
2022-12-03 15:15:55,936 ----------------------------------------------------------------------------------------------------
2022-12-03 15:15:55,936 Corpus: "Corpus: 250135 train + 46666 dev + 15696 test sentences"
2022-12-03 15:15:55,937 ----------------------------------------------------------------------------------------------------
2022-12-03 15:15:55,

100%|██████████| 1459/1459 [01:43<00:00, 14.06it/s]


2022-12-03 15:23:18,384 Evaluating as a multi-label problem: False
2022-12-03 15:23:21,337 DEV : loss 0.10346631705760956 - f1-score (micro avg)  0.0017
2022-12-03 15:23:21,886 BAD EPOCHS (no improvement): 0
2022-12-03 15:23:22,593 saving best model
2022-12-03 15:23:23,188 ----------------------------------------------------------------------------------------------------
2022-12-03 15:23:56,567 epoch 2 - iter 781/7817 - loss 0.10559244 - samples/sec: 750.63 - lr: 0.100000
2022-12-03 15:24:28,921 epoch 2 - iter 1562/7817 - loss 0.10599293 - samples/sec: 773.97 - lr: 0.100000
2022-12-03 15:25:01,290 epoch 2 - iter 2343/7817 - loss 0.10602334 - samples/sec: 773.57 - lr: 0.100000
2022-12-03 15:25:33,560 epoch 2 - iter 3124/7817 - loss 0.10576322 - samples/sec: 775.94 - lr: 0.100000
2022-12-03 15:26:06,131 epoch 2 - iter 3905/7817 - loss 0.10575062 - samples/sec: 768.76 - lr: 0.100000
2022-12-03 15:26:38,588 epoch 2 - iter 4686/7817 - loss 0.10542264 - samples/sec: 771.47 - lr: 0.100000
20

100%|██████████| 1459/1459 [01:42<00:00, 14.17it/s]


2022-12-03 15:30:34,438 Evaluating as a multi-label problem: False
2022-12-03 15:30:37,426 DEV : loss 0.096551813185215 - f1-score (micro avg)  0.0213
2022-12-03 15:30:37,981 BAD EPOCHS (no improvement): 0
2022-12-03 15:30:38,339 saving best model
2022-12-03 15:30:38,706 ----------------------------------------------------------------------------------------------------
2022-12-03 15:31:12,050 epoch 3 - iter 781/7817 - loss 0.10363700 - samples/sec: 751.01 - lr: 0.100000
2022-12-03 15:31:45,454 epoch 3 - iter 1562/7817 - loss 0.10369032 - samples/sec: 749.54 - lr: 0.100000
2022-12-03 15:32:18,197 epoch 3 - iter 2343/7817 - loss 0.10351495 - samples/sec: 764.70 - lr: 0.100000
2022-12-03 15:32:50,718 epoch 3 - iter 3124/7817 - loss 0.10308222 - samples/sec: 769.90 - lr: 0.100000
2022-12-03 15:33:23,985 epoch 3 - iter 3905/7817 - loss 0.10296485 - samples/sec: 752.63 - lr: 0.100000
2022-12-03 15:33:57,178 epoch 3 - iter 4686/7817 - loss 0.10311434 - samples/sec: 754.31 - lr: 0.100000
2022

100%|██████████| 1459/1459 [01:42<00:00, 14.21it/s]


2022-12-03 15:37:50,853 Evaluating as a multi-label problem: False
2022-12-03 15:37:53,841 DEV : loss 0.09422393143177032 - f1-score (micro avg)  0.0171
2022-12-03 15:37:54,380 BAD EPOCHS (no improvement): 1
2022-12-03 15:37:54,739 ----------------------------------------------------------------------------------------------------
2022-12-03 15:38:27,094 epoch 4 - iter 781/7817 - loss 0.10332028 - samples/sec: 774.04 - lr: 0.100000
2022-12-03 15:38:59,551 epoch 4 - iter 1562/7817 - loss 0.10183680 - samples/sec: 771.48 - lr: 0.100000
2022-12-03 15:39:31,629 epoch 4 - iter 2343/7817 - loss 0.10188033 - samples/sec: 780.60 - lr: 0.100000
2022-12-03 15:40:04,369 epoch 4 - iter 3124/7817 - loss 0.10203601 - samples/sec: 764.78 - lr: 0.100000
2022-12-03 15:40:37,030 epoch 4 - iter 3905/7817 - loss 0.10203372 - samples/sec: 766.62 - lr: 0.100000
2022-12-03 15:41:09,883 epoch 4 - iter 4686/7817 - loss 0.10192181 - samples/sec: 762.13 - lr: 0.100000
2022-12-03 15:41:42,694 epoch 4 - iter 5467/

100%|██████████| 1459/1459 [01:43<00:00, 14.10it/s]


2022-12-03 15:45:05,716 Evaluating as a multi-label problem: False
2022-12-03 15:45:08,668 DEV : loss 0.0936627984046936 - f1-score (micro avg)  0.0123
2022-12-03 15:45:09,198 BAD EPOCHS (no improvement): 2
2022-12-03 15:45:09,550 ----------------------------------------------------------------------------------------------------
2022-12-03 15:45:41,773 epoch 5 - iter 781/7817 - loss 0.10125472 - samples/sec: 777.18 - lr: 0.100000
2022-12-03 15:46:14,588 epoch 5 - iter 1562/7817 - loss 0.10150659 - samples/sec: 763.04 - lr: 0.100000
2022-12-03 15:46:47,508 epoch 5 - iter 2343/7817 - loss 0.10103585 - samples/sec: 760.60 - lr: 0.100000
2022-12-03 15:47:20,749 epoch 5 - iter 3124/7817 - loss 0.10066712 - samples/sec: 753.22 - lr: 0.100000
2022-12-03 15:47:53,762 epoch 5 - iter 3905/7817 - loss 0.10049923 - samples/sec: 758.42 - lr: 0.100000
2022-12-03 15:48:26,053 epoch 5 - iter 4686/7817 - loss 0.10100579 - samples/sec: 775.43 - lr: 0.100000
2022-12-03 15:48:59,244 epoch 5 - iter 5467/7

100%|██████████| 1459/1459 [01:34<00:00, 15.37it/s]


2022-12-03 15:52:20,321 Evaluating as a multi-label problem: False
2022-12-03 15:52:23,301 DEV : loss 0.09435790777206421 - f1-score (micro avg)  0.0201
2022-12-03 15:52:23,843 BAD EPOCHS (no improvement): 3
2022-12-03 15:52:24,193 ----------------------------------------------------------------------------------------------------
2022-12-03 15:52:57,101 epoch 6 - iter 781/7817 - loss 0.09966140 - samples/sec: 760.99 - lr: 0.100000
2022-12-03 15:53:29,401 epoch 6 - iter 1562/7817 - loss 0.10011713 - samples/sec: 775.22 - lr: 0.100000
2022-12-03 15:54:02,044 epoch 6 - iter 2343/7817 - loss 0.10013401 - samples/sec: 767.08 - lr: 0.100000
2022-12-03 15:54:34,348 epoch 6 - iter 3124/7817 - loss 0.10040702 - samples/sec: 775.11 - lr: 0.100000
2022-12-03 15:55:06,837 epoch 6 - iter 3905/7817 - loss 0.10054164 - samples/sec: 770.71 - lr: 0.100000
2022-12-03 15:55:39,744 epoch 6 - iter 4686/7817 - loss 0.10052054 - samples/sec: 760.92 - lr: 0.100000
2022-12-03 15:56:12,603 epoch 6 - iter 5467/

100%|██████████| 1459/1459 [01:35<00:00, 15.25it/s]


2022-12-03 15:59:27,326 Evaluating as a multi-label problem: False
2022-12-03 15:59:30,251 DEV : loss 0.09946397691965103 - f1-score (micro avg)  0.0031
2022-12-03 15:59:30,791 BAD EPOCHS (no improvement): 4
2022-12-03 15:59:31,146 ----------------------------------------------------------------------------------------------------
2022-12-03 16:00:04,507 epoch 7 - iter 781/7817 - loss 0.10057298 - samples/sec: 750.62 - lr: 0.100000
2022-12-03 16:00:36,461 epoch 7 - iter 1562/7817 - loss 0.10065679 - samples/sec: 783.59 - lr: 0.100000
2022-12-03 16:01:09,798 epoch 7 - iter 2343/7817 - loss 0.10023673 - samples/sec: 751.08 - lr: 0.100000
2022-12-03 16:01:50,329 epoch 7 - iter 3124/7817 - loss 0.10018430 - samples/sec: 617.53 - lr: 0.100000
2022-12-03 16:02:24,632 epoch 7 - iter 3905/7817 - loss 0.10040950 - samples/sec: 729.89 - lr: 0.100000
2022-12-03 16:02:57,069 epoch 7 - iter 4686/7817 - loss 0.10043280 - samples/sec: 771.92 - lr: 0.100000
2022-12-03 16:03:30,076 epoch 7 - iter 5467/

100%|██████████| 1459/1459 [01:35<00:00, 15.27it/s]


2022-12-03 16:06:44,662 Evaluating as a multi-label problem: False
2022-12-03 16:06:47,619 DEV : loss 0.09262198954820633 - f1-score (micro avg)  0.013
2022-12-03 16:06:48,152 Epoch     7: reducing learning rate of group 0 to 5.0000e-02.
2022-12-03 16:06:48,153 BAD EPOCHS (no improvement): 5
2022-12-03 16:06:48,512 ----------------------------------------------------------------------------------------------------
2022-12-03 16:07:20,887 epoch 8 - iter 781/7817 - loss 0.09737187 - samples/sec: 773.52 - lr: 0.050000
2022-12-03 16:07:54,258 epoch 8 - iter 1562/7817 - loss 0.09710005 - samples/sec: 750.30 - lr: 0.050000
2022-12-03 16:08:26,872 epoch 8 - iter 2343/7817 - loss 0.09702389 - samples/sec: 767.74 - lr: 0.050000
2022-12-03 16:08:59,779 epoch 8 - iter 3124/7817 - loss 0.09691899 - samples/sec: 760.92 - lr: 0.050000
2022-12-03 16:09:32,199 epoch 8 - iter 3905/7817 - loss 0.09695555 - samples/sec: 772.30 - lr: 0.050000
2022-12-03 16:10:04,805 epoch 8 - iter 4686/7817 - loss 0.09710

100%|██████████| 1459/1459 [01:42<00:00, 14.23it/s]


2022-12-03 16:14:00,236 Evaluating as a multi-label problem: False
2022-12-03 16:14:03,249 DEV : loss 0.09051612764596939 - f1-score (micro avg)  0.0225
2022-12-03 16:14:03,786 BAD EPOCHS (no improvement): 0
2022-12-03 16:14:04,148 saving best model
2022-12-03 16:14:04,488 ----------------------------------------------------------------------------------------------------
2022-12-03 16:14:37,275 epoch 9 - iter 781/7817 - loss 0.09666813 - samples/sec: 763.80 - lr: 0.050000
2022-12-03 16:15:10,016 epoch 9 - iter 1562/7817 - loss 0.09614295 - samples/sec: 764.76 - lr: 0.050000
2022-12-03 16:15:43,051 epoch 9 - iter 2343/7817 - loss 0.09569747 - samples/sec: 757.95 - lr: 0.050000
2022-12-03 16:16:16,446 epoch 9 - iter 3124/7817 - loss 0.09547012 - samples/sec: 749.77 - lr: 0.050000
2022-12-03 16:16:48,908 epoch 9 - iter 3905/7817 - loss 0.09562298 - samples/sec: 771.34 - lr: 0.050000
2022-12-03 16:17:22,730 epoch 9 - iter 4686/7817 - loss 0.09584551 - samples/sec: 740.30 - lr: 0.050000
20

100%|██████████| 1459/1459 [01:46<00:00, 13.76it/s]


2022-12-03 16:21:20,885 Evaluating as a multi-label problem: False
2022-12-03 16:21:23,871 DEV : loss 0.08946173638105392 - f1-score (micro avg)  0.0142
2022-12-03 16:21:24,421 BAD EPOCHS (no improvement): 1
2022-12-03 16:21:24,785 ----------------------------------------------------------------------------------------------------
2022-12-03 16:21:58,659 epoch 10 - iter 781/7817 - loss 0.09434126 - samples/sec: 739.26 - lr: 0.050000
2022-12-03 16:22:31,125 epoch 10 - iter 1562/7817 - loss 0.09453068 - samples/sec: 771.26 - lr: 0.050000
2022-12-03 16:23:03,762 epoch 10 - iter 2343/7817 - loss 0.09528655 - samples/sec: 767.21 - lr: 0.050000
2022-12-03 16:23:36,521 epoch 10 - iter 3124/7817 - loss 0.09520042 - samples/sec: 764.35 - lr: 0.050000
2022-12-03 16:24:09,365 epoch 10 - iter 3905/7817 - loss 0.09538200 - samples/sec: 762.34 - lr: 0.050000
2022-12-03 16:24:41,969 epoch 10 - iter 4686/7817 - loss 0.09542441 - samples/sec: 767.97 - lr: 0.050000
2022-12-03 16:25:14,866 epoch 10 - ite

100%|██████████| 1459/1459 [01:43<00:00, 14.12it/s]


2022-12-03 16:28:37,606 Evaluating as a multi-label problem: False
2022-12-03 16:28:40,576 DEV : loss 0.08909204602241516 - f1-score (micro avg)  0.0204
2022-12-03 16:28:41,108 BAD EPOCHS (no improvement): 2
2022-12-03 16:28:41,473 ----------------------------------------------------------------------------------------------------
2022-12-03 16:29:14,856 epoch 11 - iter 781/7817 - loss 0.09426788 - samples/sec: 750.16 - lr: 0.050000
2022-12-03 16:29:47,428 epoch 11 - iter 1562/7817 - loss 0.09481089 - samples/sec: 768.72 - lr: 0.050000
2022-12-03 16:30:20,507 epoch 11 - iter 2343/7817 - loss 0.09510180 - samples/sec: 756.94 - lr: 0.050000
2022-12-03 16:30:53,064 epoch 11 - iter 3124/7817 - loss 0.09522518 - samples/sec: 769.07 - lr: 0.050000
2022-12-03 16:31:25,963 epoch 11 - iter 3905/7817 - loss 0.09520673 - samples/sec: 761.11 - lr: 0.050000
2022-12-03 16:32:00,521 epoch 11 - iter 4686/7817 - loss 0.09538240 - samples/sec: 724.49 - lr: 0.050000
2022-12-03 16:32:34,120 epoch 11 - ite

100%|██████████| 1459/1459 [01:43<00:00, 14.04it/s]


2022-12-03 16:35:58,106 Evaluating as a multi-label problem: False
2022-12-03 16:36:01,155 DEV : loss 0.09031546860933304 - f1-score (micro avg)  0.0133
2022-12-03 16:36:01,696 BAD EPOCHS (no improvement): 3
2022-12-03 16:36:02,072 ----------------------------------------------------------------------------------------------------
2022-12-03 16:36:34,949 epoch 12 - iter 781/7817 - loss 0.09517229 - samples/sec: 761.74 - lr: 0.050000
2022-12-03 16:37:08,074 epoch 12 - iter 1562/7817 - loss 0.09538028 - samples/sec: 755.90 - lr: 0.050000
2022-12-03 16:37:41,253 epoch 12 - iter 2343/7817 - loss 0.09511751 - samples/sec: 754.67 - lr: 0.050000
2022-12-03 16:38:14,314 epoch 12 - iter 3124/7817 - loss 0.09525418 - samples/sec: 757.33 - lr: 0.050000
2022-12-03 16:38:47,026 epoch 12 - iter 3905/7817 - loss 0.09524923 - samples/sec: 765.43 - lr: 0.050000
2022-12-03 16:39:19,155 epoch 12 - iter 4686/7817 - loss 0.09527270 - samples/sec: 779.35 - lr: 0.050000
2022-12-03 16:39:51,495 epoch 12 - ite

100%|██████████| 1459/1459 [01:43<00:00, 14.05it/s]


2022-12-03 16:43:15,816 Evaluating as a multi-label problem: False
2022-12-03 16:43:18,799 DEV : loss 0.08996888995170593 - f1-score (micro avg)  0.0154
2022-12-03 16:43:19,346 BAD EPOCHS (no improvement): 4
2022-12-03 16:43:19,721 ----------------------------------------------------------------------------------------------------
2022-12-03 16:43:52,621 epoch 13 - iter 781/7817 - loss 0.09573348 - samples/sec: 761.15 - lr: 0.050000
2022-12-03 16:44:25,651 epoch 13 - iter 1562/7817 - loss 0.09494220 - samples/sec: 758.07 - lr: 0.050000
2022-12-03 16:44:58,839 epoch 13 - iter 2343/7817 - loss 0.09486797 - samples/sec: 754.44 - lr: 0.050000
2022-12-03 16:45:31,535 epoch 13 - iter 3124/7817 - loss 0.09479519 - samples/sec: 765.84 - lr: 0.050000
2022-12-03 16:46:04,686 epoch 13 - iter 3905/7817 - loss 0.09487388 - samples/sec: 755.27 - lr: 0.050000
2022-12-03 16:46:37,343 epoch 13 - iter 4686/7817 - loss 0.09483974 - samples/sec: 766.76 - lr: 0.050000
2022-12-03 16:47:09,695 epoch 13 - ite

100%|██████████| 1459/1459 [01:43<00:00, 14.07it/s]


2022-12-03 16:50:32,788 Evaluating as a multi-label problem: False
2022-12-03 16:50:35,798 DEV : loss 0.0890735611319542 - f1-score (micro avg)  0.0224
2022-12-03 16:50:36,335 Epoch    13: reducing learning rate of group 0 to 2.5000e-02.
2022-12-03 16:50:36,336 BAD EPOCHS (no improvement): 5
2022-12-03 16:50:36,701 ----------------------------------------------------------------------------------------------------
2022-12-03 16:51:08,690 epoch 14 - iter 781/7817 - loss 0.09432333 - samples/sec: 782.90 - lr: 0.025000
2022-12-03 16:51:41,862 epoch 14 - iter 1562/7817 - loss 0.09376324 - samples/sec: 754.79 - lr: 0.025000
2022-12-03 16:52:15,307 epoch 14 - iter 2343/7817 - loss 0.09352787 - samples/sec: 748.64 - lr: 0.025000
2022-12-03 16:52:48,534 epoch 14 - iter 3124/7817 - loss 0.09373283 - samples/sec: 753.54 - lr: 0.025000
2022-12-03 16:53:21,183 epoch 14 - iter 3905/7817 - loss 0.09344375 - samples/sec: 766.94 - lr: 0.025000
2022-12-03 16:53:53,773 epoch 14 - iter 4686/7817 - loss 0

100%|██████████| 1459/1459 [01:43<00:00, 14.09it/s]


2022-12-03 16:57:50,156 Evaluating as a multi-label problem: False
2022-12-03 16:57:53,133 DEV : loss 0.0892150029540062 - f1-score (micro avg)  0.0308
2022-12-03 16:57:53,664 BAD EPOCHS (no improvement): 0
2022-12-03 16:57:54,031 saving best model
2022-12-03 16:57:54,373 ----------------------------------------------------------------------------------------------------
2022-12-03 16:58:27,956 epoch 15 - iter 781/7817 - loss 0.09084190 - samples/sec: 745.93 - lr: 0.025000
2022-12-03 16:59:03,861 epoch 15 - iter 1562/7817 - loss 0.09203448 - samples/sec: 697.34 - lr: 0.025000
2022-12-03 16:59:40,005 epoch 15 - iter 2343/7817 - loss 0.09237090 - samples/sec: 692.75 - lr: 0.025000
2022-12-03 17:00:14,050 epoch 15 - iter 3124/7817 - loss 0.09254739 - samples/sec: 735.46 - lr: 0.025000
2022-12-03 17:00:47,681 epoch 15 - iter 3905/7817 - loss 0.09273024 - samples/sec: 744.52 - lr: 0.025000
2022-12-03 17:01:22,921 epoch 15 - iter 4686/7817 - loss 0.09296470 - samples/sec: 710.47 - lr: 0.0250

100%|██████████| 1459/1459 [01:38<00:00, 14.76it/s]


2022-12-03 17:05:30,232 Evaluating as a multi-label problem: False
2022-12-03 17:05:33,387 DEV : loss 0.0882866233587265 - f1-score (micro avg)  0.0186
2022-12-03 17:05:33,956 BAD EPOCHS (no improvement): 1
2022-12-03 17:05:34,666 ----------------------------------------------------------------------------------------------------
2022-12-03 17:05:44,566 ----------------------------------------------------------------------------------------------------
2022-12-03 17:05:44,567 Exiting from training early.
2022-12-03 17:05:44,568 Saving model ...
2022-12-03 17:05:44,928 Done.
2022-12-03 17:05:44,930 ----------------------------------------------------------------------------------------------------
2022-12-03 17:05:44,932 loading file models/best-model.pt
2022-12-03 17:05:45,340 SequenceTagger predicts: Dictionary with 5 tags: <unk>, DE, DS, <START>, <STOP>


100%|██████████| 491/491 [00:35<00:00, 13.71it/s]


2022-12-03 17:06:21,583 Evaluating as a multi-label problem: False
2022-12-03 17:06:22,592 0.0159	0.3609	0.0305	0.0159
2022-12-03 17:06:22,593 
Results:
- F-score (micro) 0.0305
- F-score (macro) 0.2834
- Accuracy 0.0159

By class:
              precision    recall  f1-score   support

       <unk>     0.0000    0.0000    0.0000         0
          DE     0.5210    0.4189    0.4644      7302
          DS     0.5322    0.3024    0.3856      7243

   micro avg     0.0159    0.3609    0.0305     14545
   macro avg     0.3511    0.2404    0.2834     14545
weighted avg     0.5266    0.3609    0.4252     14545

2022-12-03 17:06:22,594 ----------------------------------------------------------------------------------------------------


{'test_score': 0.030521966919514233,
 'dev_score_history': [0.0016527694180800817,
  0.02129778670059778,
  0.01713351357604014,
  0.012251667963917476,
  0.020119477779760146,
  0.0030741903292402944,
  0.013012373223959078,
  0.02248197736313677,
  0.014241657239541775,
  0.020433170670498952,
  0.013316263211862297,
  0.015400360354708235,
  0.022407475301586304,
  0.03076935142034259,
  0.018611791323646758],
 'train_loss_history': [0.11162724334505603,
  0.10472340281774903,
  0.10285513800329801,
  0.10183264383369618,
  0.10100515115303266,
  0.10053029851898,
  0.10019411184330691,
  0.09662685535563979,
  0.09592075308415242,
  0.09555069143122688,
  0.09527929121687452,
  0.09511848705341996,
  0.09487610242898997,
  0.09324911343296113,
  0.09291608804392594],
 'dev_loss_history': [0.10346631705760956,
  0.096551813185215,
  0.09422393143177032,
  0.0936627984046936,
  0.09435790777206421,
  0.09946397691965103,
  0.09262198954820633,
  0.09051612764596939,
  0.0894617363810

In [12]:
model = SequenceTagger.load("models/final-model.pt")

2022-12-03 17:10:16,747 loading file models/final-model.pt
2022-12-03 17:10:17,370 SequenceTagger predicts: Dictionary with 5 tags: <unk>, DE, DS, <START>, <STOP>


In [13]:
text = loader.load_random_text()

sent = Sentence(text.text)
model.predict(sent)

In [15]:
for disc in text.discourses:
    print(disc)

--- 1621354891977 (0 -> 40 | 0 -> 6) - Position ---
The driverless Cars article I am against
---------------------------------------------------
--- 1621354905027 (49 -> 91 | 8 -> 15) - Claim ---
What benefits would we get from these cars
--------------------------------------------------
--- 1621354913903 (92 -> 152 | 16 -> 27) - Claim ---
Will auto body parts run out of buisness because of the cars
----------------------------------------------------
--- 1621354921464 (153 -> 192 | 28 -> 34) - Claim ---
What will happen to taxi drivers jobs. 
-----------------------------------------------------
--- 1621354933349 (193 -> 238 | 35 -> 43) - Claim ---
What benefits would we get out of these cars?
-----------------------------------------------------
--- 1621354948467 (239 -> 278 | 44 -> 51) - Counterclaim ---
Maybe we will get less wrecks with them
------------------------------------------------------------
--- 1621354967258 (279 -> 526 | 52 -> 96) - Rebuttal ---
but think of this what

In [14]:
sent.to_tagged_string()

'Sentence: "The driverless Cars article I am against because What benefits would we get from these cars Will auto body parts run out of buisness because of the cars What will happen to taxi drivers jobs . What benefits would we get out of these cars ? Maybe we will get less wrecks with them but think of this what will happen if your car sensors and autopilot does n\'t work anymore the passangers will have to drive but what if they do n\'t know how to drive because the driving classes got taken away because these cars do n\'t need a driver . It also shows in the text that if something bad happens who \'s fault is it the manufacturer or the driver in this case it would be the manufacturers fault because they did n\'t check the car throughly and shipped it somwhere to be sold to some people then thoese people get stuck in the middle of nowhere because of the driverless car and the manufacturer but what will happen if the car gets struck by lightning because of all the electrical systems t