In [1]:
!pip install flair

Collecting flair
  Downloading flair-0.13.0-py3-none-any.whl (387 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m387.2/387.2 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting boto3>=1.20.27 (from flair)
  Downloading boto3-1.33.2-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.1/139.1 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bpemb>=0.3.2 (from flair)
  Downloading bpemb-0.3.4-py3-none-any.whl (19 kB)
Collecting conllu>=4.0 (from flair)
  Downloading conllu-4.5.3-py2.py3-none-any.whl (16 kB)
Collecting deprecated>=1.2.13 (from flair)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting ftfy>=6.1.0 (from flair)
  Downloading ftfy-6.1.3-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.4/53.4 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
Collecting janome>=0.4.2 (from flair)
  Downloading Janome-0.5.0-py2.py3-none-any.whl (19.7

In [2]:
from flair.data import Sentence
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
from flair.datasets import ColumnCorpus
from flair.datasets import SentenceDataset
from flair.data import Token
from tqdm import tqdm
import gensim
from gensim.models.word2vec import Word2Vec
import pandas as pd
pd.set_option('max.colwidth', 0)

# Prepare Dataset

In [3]:
dataset_file = "mypos-ver.3.0-flair.txt"

# Define the columns in dataset
columns = {0: 'text', 1: 'pos'}

# Initialize the corpus
corpus = ColumnCorpus(data_folder='.', column_format=columns, train_file=dataset_file)

2023-11-29 05:26:07,201 Reading data from .
2023-11-29 05:26:07,206 Train: mypos-ver.3.0-flair.txt
2023-11-29 05:26:07,208 Dev: None
2023-11-29 05:26:07,209 Test: None
2023-11-29 05:26:19,071 No test split found. Using 0% (i.e. 4320 samples) of the train split as test data
2023-11-29 05:26:19,102 No dev split found. Using 0% (i.e. 3888 samples) of the train split as dev data


# Load pre-trained FlairEmbeddings model



In [4]:
from flair.embeddings import FlairEmbeddings, StackedEmbeddings, WordEmbeddings
flair_forward_embedding = FlairEmbeddings('best-lm-fw.pt')
flair_backward_embedding = FlairEmbeddings('best-lm-bw.pt')

In [5]:
# Stack the embeddings
embedding_types = [

    FlairEmbeddings('best-lm-fw.pt'),
    FlairEmbeddings('best-lm-bw.pt')
]

embeddings = StackedEmbeddings(embeddings=embedding_types)

In [6]:
# test stacked embedding
sentence = Sentence('ဒါ ပေါ့ ။ ကိစ္စ မ ရှိ ပါ ဘူး ။')
embeddings.embed(sentence)

for token in sentence:
    print(token)
    print(token.embedding)

Token[0]: "ဒါ"
tensor([-3.7974e-01,  2.1368e-05,  4.7706e-04,  7.7587e-04, -2.9287e-03,
         4.2294e-02, -7.3442e-04, -5.4222e-04,  7.6816e-03,  9.7218e-04,
        -2.0761e-02,  4.2603e-02, -2.0649e-04,  1.3703e-01, -7.4877e-01,
        -1.1288e-02,  3.6933e-02,  1.6755e-02, -4.5331e-03,  1.4367e-04,
         2.4181e-01, -3.2227e-02,  5.6861e-02,  2.0548e-01, -6.6127e-02,
         1.8289e-03,  3.1663e-02, -5.9485e-05, -2.4779e-02, -6.3073e-02,
        -1.3252e-03,  8.0238e-02,  1.2424e-03,  1.7425e-02, -2.8310e-02,
         2.3591e-02,  4.0888e-03, -3.2991e-03,  9.5268e-03,  5.7763e-01,
        -1.5888e-02, -2.5222e-01, -2.3787e-01,  3.0005e-01,  5.3408e-03,
        -1.8336e-01,  2.2487e-04,  2.1046e-03,  1.5220e-02, -2.8578e-04,
         1.7536e-03, -4.2489e-03,  7.3850e-04, -1.4167e-02,  4.7915e-01,
         6.5196e-02,  5.6772e-01, -7.6392e-03,  8.3006e-01, -5.1166e-02,
         8.1143e-02,  4.9698e-01, -4.7595e-05, -7.5586e-02,  5.3671e-01,
         2.5885e-02, -1.1890e-01,  7

In [7]:
from sklearn.metrics.pairwise import cosine_similarity

sentence_1 = Sentence('ဒီ လို ပဲ ပေါ့ ကိစ္စ မ ရှိ ပါ ဘူး')
sentence_2 = Sentence('ဒါ ပေါ့ ကိစ္စ မ ရှိ ပါ ဘူး')

embeddings.embed([sentence_1, sentence_2])

for i1 in sentence_1:
    print(i1.embedding)

for i2 in sentence_2:
    print(i2.embedding)

embedding_1 = i1.embedding.unsqueeze(0).cpu().numpy()
embedding_2 = i2.embedding.unsqueeze(0).cpu().numpy()

cosine_similarity(embedding_1, embedding_2)

tensor([ 2.9254e-02, -6.4189e-05, -8.8054e-03, -1.0163e-01, -1.7472e-03,
         4.8585e-04, -2.2885e-04,  2.7267e-04,  1.1107e-02, -8.9043e-02,
        -1.3916e-02,  5.3967e-03, -1.2641e-04,  6.2256e-02, -7.3497e-01,
         5.1309e-02,  7.0814e-02,  1.7537e-02, -4.2601e-02, -3.8446e-06,
         2.6575e-01, -1.4385e-03, -1.0783e-01,  9.9054e-02, -4.3456e-02,
         1.9507e-03, -3.2171e-03, -7.6641e-07,  3.2656e-04,  8.8832e-02,
        -1.8836e-03,  6.6630e-04,  6.5986e-04,  2.5487e-02, -1.6228e-01,
         2.2297e-02, -3.1839e-03, -5.1704e-01,  1.0487e-02,  8.4584e-02,
        -1.0984e-04, -3.0345e-02,  3.5269e-02,  6.3918e-02,  1.4744e-01,
         6.0292e-02,  1.1620e-04,  1.6999e-03,  1.9981e-03, -2.6658e-04,
         2.3658e-04, -2.6566e-01, -3.5826e-05, -3.9221e-02,  2.1956e-01,
        -2.0117e-02, -3.2813e-01, -2.6102e-03,  7.4787e-01, -3.8880e-01,
        -7.5354e-01,  4.8404e-01, -6.5893e-05, -7.0573e-01, -7.3216e-02,
        -8.6228e-04, -6.8223e-02,  7.6088e-01, -1.4

array([[0.999768]], dtype=float32)

In [8]:
from sklearn.metrics.pairwise import cosine_similarity

sentence_1 = Sentence('သူ တို့ က သူငယ်ချင်း တွေ')
sentence_2 = Sentence('ဒါ ပေါ့ ကိစ္စ မ ရှိ ပါ ဘူး')

embeddings.embed([sentence_1, sentence_2])

for i1 in sentence_1:
    print(i1.embedding)

for i2 in sentence_2:
    print(i2.embedding)

embedding_1 = i1.embedding.unsqueeze(0).cpu().numpy()
embedding_2 = i2.embedding.unsqueeze(0).cpu().numpy()

cosine_similarity(embedding_1, embedding_2)

tensor([ 1.7913e-02,  6.2571e-03, -2.4316e-02,  2.2651e-01, -1.9427e-01,
        -4.2907e-01, -2.7153e-03,  4.6092e-05, -2.8229e-02,  5.6901e-03,
        -1.3612e-02,  7.5855e-02,  8.3727e-06,  1.4217e-02, -7.0944e-01,
        -5.4399e-03, -1.0358e-01,  3.5143e-02,  8.9646e-03,  5.2612e-02,
         2.4288e-01, -6.0744e-04, -4.7541e-04, -2.0992e-02, -1.6508e-01,
         3.5462e-02,  2.4394e-02,  2.1493e-04,  8.2009e-05,  6.3666e-01,
        -1.8794e-01,  1.3086e-02,  2.0589e-03,  3.4556e-03, -5.1886e-03,
         2.3821e-01,  4.4327e-02, -1.0760e-02,  1.6948e-02,  6.6744e-01,
        -5.0410e-03, -1.9614e-02, -3.6799e-01,  4.3213e-01, -4.8506e-02,
        -1.8792e-01, -3.3974e-05,  1.8042e-04,  1.3670e-04, -1.6820e-01,
         2.1544e-04, -9.5751e-02,  3.9907e-04, -3.0664e-03,  9.4965e-02,
        -1.2144e-01, -1.8456e-02, -5.4789e-03,  4.7942e-02, -2.3379e-01,
         1.9085e-01,  2.6867e-03,  3.5216e-05,  7.0800e-02,  6.0919e-03,
        -2.8406e-01, -7.8888e-01,  7.8967e-01,  1.3

array([[0.1703175]], dtype=float32)

# Sequence Tagging

In [9]:
label_type = 'pos'

# Create a label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type, add_unk=True)

# Create model
model = SequenceTagger(hidden_size=256,
                      embeddings=embeddings,
                      tag_dictionary=label_dict,
                      tag_type=label_type)

# Create the trainer and train the model
trainer = ModelTrainer(model, corpus)
trainer.train('pos_tagger', learning_rate=0.1, mini_batch_size=32, max_epochs=10)

2023-11-29 05:26:37,605 Computing label dictionary. Progress:


0it [00:00, ?it/s]
34989it [00:01, 22056.41it/s]

2023-11-29 05:26:39,234 Dictionary created for label 'pos' with 17 values: part (seen 108167 times), n (seen 85233 times), ppm (seen 70276 times), v (seen 62551 times), punc (seen 43844 times), pron (seen 16425 times), conj (seen 14449 times), adj (seen 12826 times), adv (seen 8706 times), num (seen 4789 times), tn (seen 4662 times), fw (seen 2655 times), int (seen 530 times), abb (seen 289 times), sb (seen 212 times), O (seen 1 times)
2023-11-29 05:26:39,237 SequenceTagger predicts: Dictionary with 17 tags: <unk>, part, n, ppm, v, punc, pron, conj, adj, adv, num, tn, fw, int, abb, sb, O
2023-11-29 05:26:39,255 ----------------------------------------------------------------------------------------------------
2023-11-29 05:26:39,256 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.1, inplace=False)
        (encoder): Embedding(173, 100)
        (rnn): LSTM(100, 128)
      )
    )





2023-11-29 05:26:39,287 ----------------------------------------------------------------------------------------------------
2023-11-29 05:26:39,289 Final evaluation on model from best epoch (best-model.pt)
2023-11-29 05:26:39,290  - metric: "('micro avg', 'f1-score')"
2023-11-29 05:26:39,291 ----------------------------------------------------------------------------------------------------
2023-11-29 05:26:39,292 Computation:
2023-11-29 05:26:39,293  - compute on device: cuda:0
2023-11-29 05:26:39,294  - embedding storage: cpu
2023-11-29 05:26:39,295 ----------------------------------------------------------------------------------------------------
2023-11-29 05:26:39,297 Model training base path: "pos_tagger"
2023-11-29 05:26:39,298 ----------------------------------------------------------------------------------------------------
2023-11-29 05:26:39,299 ----------------------------------------------------------------------------------------------------
2023-11-29 05:26:49,585 epo

100%|██████████| 61/61 [00:16<00:00,  3.70it/s]


2023-11-29 05:28:41,683 DEV : loss 0.29482945799827576 - f1-score (micro avg)  0.9064
2023-11-29 05:28:41,882  - 0 epochs without improvement
2023-11-29 05:28:41,884 saving best model
2023-11-29 05:28:41,900 ----------------------------------------------------------------------------------------------------
2023-11-29 05:28:49,367 epoch 2 - iter 109/1094 - loss 0.45897452 - time (sec): 7.46 - samples/sec: 5696.78 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:28:58,053 epoch 2 - iter 218/1094 - loss 0.45638050 - time (sec): 16.15 - samples/sec: 5335.32 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:29:05,563 epoch 2 - iter 327/1094 - loss 0.45419994 - time (sec): 23.66 - samples/sec: 5490.26 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:29:14,300 epoch 2 - iter 436/1094 - loss 0.45184167 - time (sec): 32.40 - samples/sec: 5375.75 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:29:22,599 epoch 2 - iter 545/1094 - loss 0.44761628 - time (sec): 40.70 - samples/sec: 5321.89 - l

100%|██████████| 61/61 [00:10<00:00,  6.00it/s]


2023-11-29 05:30:14,619 DEV : loss 0.2526632249355316 - f1-score (micro avg)  0.9192
2023-11-29 05:30:14,960  - 0 epochs without improvement
2023-11-29 05:30:14,965 saving best model
2023-11-29 05:30:14,986 ----------------------------------------------------------------------------------------------------
2023-11-29 05:30:23,131 epoch 3 - iter 109/1094 - loss 0.40923118 - time (sec): 8.14 - samples/sec: 5315.44 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:30:31,492 epoch 3 - iter 218/1094 - loss 0.40788566 - time (sec): 16.50 - samples/sec: 5224.78 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:30:38,870 epoch 3 - iter 327/1094 - loss 0.40142819 - time (sec): 23.88 - samples/sec: 5428.17 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:30:47,539 epoch 3 - iter 436/1094 - loss 0.40251276 - time (sec): 32.55 - samples/sec: 5320.25 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:30:55,274 epoch 3 - iter 545/1094 - loss 0.40011776 - time (sec): 40.28 - samples/sec: 5377.84 - lr

100%|██████████| 61/61 [00:10<00:00,  5.91it/s]


2023-11-29 05:31:48,578 DEV : loss 0.23656189441680908 - f1-score (micro avg)  0.9263
2023-11-29 05:31:48,824  - 0 epochs without improvement
2023-11-29 05:31:48,828 saving best model
2023-11-29 05:31:48,858 ----------------------------------------------------------------------------------------------------
2023-11-29 05:31:57,676 epoch 4 - iter 109/1094 - loss 0.38520394 - time (sec): 8.82 - samples/sec: 5004.44 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:32:06,799 epoch 4 - iter 218/1094 - loss 0.38287326 - time (sec): 17.94 - samples/sec: 4910.48 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:32:14,146 epoch 4 - iter 327/1094 - loss 0.37729394 - time (sec): 25.29 - samples/sec: 5169.23 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:32:24,530 epoch 4 - iter 436/1094 - loss 0.37382847 - time (sec): 35.67 - samples/sec: 4867.64 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:32:33,184 epoch 4 - iter 545/1094 - loss 0.37217174 - time (sec): 44.32 - samples/sec: 4907.93 - l

100%|██████████| 61/61 [00:10<00:00,  5.86it/s]


2023-11-29 05:33:24,661 DEV : loss 0.21701253950595856 - f1-score (micro avg)  0.9327
2023-11-29 05:33:24,994  - 0 epochs without improvement
2023-11-29 05:33:24,999 saving best model
2023-11-29 05:33:25,029 ----------------------------------------------------------------------------------------------------
2023-11-29 05:33:33,690 epoch 5 - iter 109/1094 - loss 0.37167474 - time (sec): 8.66 - samples/sec: 5052.04 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:33:42,346 epoch 5 - iter 218/1094 - loss 0.36514643 - time (sec): 17.31 - samples/sec: 5022.17 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:33:49,774 epoch 5 - iter 327/1094 - loss 0.35938344 - time (sec): 24.74 - samples/sec: 5285.91 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:33:58,375 epoch 5 - iter 436/1094 - loss 0.35748123 - time (sec): 33.34 - samples/sec: 5201.07 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:34:05,976 epoch 5 - iter 545/1094 - loss 0.35607360 - time (sec): 40.94 - samples/sec: 5299.23 - l

100%|██████████| 61/61 [00:14<00:00,  4.27it/s]


2023-11-29 05:35:02,236 DEV : loss 0.20653484761714935 - f1-score (micro avg)  0.9368
2023-11-29 05:35:02,591  - 0 epochs without improvement
2023-11-29 05:35:02,594 saving best model
2023-11-29 05:35:02,626 ----------------------------------------------------------------------------------------------------
2023-11-29 05:35:10,375 epoch 6 - iter 109/1094 - loss 0.34616211 - time (sec): 7.75 - samples/sec: 5651.06 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:35:18,922 epoch 6 - iter 218/1094 - loss 0.34310871 - time (sec): 16.29 - samples/sec: 5321.17 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:35:26,311 epoch 6 - iter 327/1094 - loss 0.34250808 - time (sec): 23.68 - samples/sec: 5500.75 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:35:35,219 epoch 6 - iter 436/1094 - loss 0.34356504 - time (sec): 32.59 - samples/sec: 5355.45 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:35:42,861 epoch 6 - iter 545/1094 - loss 0.34215472 - time (sec): 40.23 - samples/sec: 5396.01 - l

100%|██████████| 61/61 [00:10<00:00,  5.86it/s]


2023-11-29 05:36:35,522 DEV : loss 0.20146511495113373 - f1-score (micro avg)  0.9375
2023-11-29 05:36:35,861  - 0 epochs without improvement
2023-11-29 05:36:35,863 saving best model
2023-11-29 05:36:35,887 ----------------------------------------------------------------------------------------------------
2023-11-29 05:36:44,425 epoch 7 - iter 109/1094 - loss 0.32506458 - time (sec): 8.54 - samples/sec: 5027.81 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:36:53,107 epoch 7 - iter 218/1094 - loss 0.33027289 - time (sec): 17.22 - samples/sec: 5050.57 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:37:00,593 epoch 7 - iter 327/1094 - loss 0.32880290 - time (sec): 24.70 - samples/sec: 5294.54 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:37:09,221 epoch 7 - iter 436/1094 - loss 0.32776910 - time (sec): 33.33 - samples/sec: 5225.29 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:37:16,628 epoch 7 - iter 545/1094 - loss 0.32979546 - time (sec): 40.74 - samples/sec: 5332.82 - l

100%|██████████| 61/61 [00:10<00:00,  5.56it/s]


2023-11-29 05:38:09,380 DEV : loss 0.19755591452121735 - f1-score (micro avg)  0.9395
2023-11-29 05:38:09,593  - 0 epochs without improvement
2023-11-29 05:38:09,595 saving best model
2023-11-29 05:38:09,612 ----------------------------------------------------------------------------------------------------
2023-11-29 05:38:17,876 epoch 8 - iter 109/1094 - loss 0.32375892 - time (sec): 8.26 - samples/sec: 5146.07 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:38:25,665 epoch 8 - iter 218/1094 - loss 0.32695563 - time (sec): 16.05 - samples/sec: 5347.80 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:38:33,790 epoch 8 - iter 327/1094 - loss 0.32498909 - time (sec): 24.17 - samples/sec: 5341.24 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:38:42,160 epoch 8 - iter 436/1094 - loss 0.32349983 - time (sec): 32.54 - samples/sec: 5284.97 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:38:50,051 epoch 8 - iter 545/1094 - loss 0.32348827 - time (sec): 40.44 - samples/sec: 5339.12 - l

100%|██████████| 61/61 [00:11<00:00,  5.37it/s]


2023-11-29 05:39:43,283 DEV : loss 0.18913514912128448 - f1-score (micro avg)  0.9421
2023-11-29 05:39:43,480  - 0 epochs without improvement
2023-11-29 05:39:43,481 saving best model
2023-11-29 05:39:43,502 ----------------------------------------------------------------------------------------------------
2023-11-29 05:39:51,860 epoch 9 - iter 109/1094 - loss 0.31627871 - time (sec): 8.36 - samples/sec: 5141.16 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:39:59,549 epoch 9 - iter 218/1094 - loss 0.31759759 - time (sec): 16.04 - samples/sec: 5385.27 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:40:08,309 epoch 9 - iter 327/1094 - loss 0.31473363 - time (sec): 24.80 - samples/sec: 5211.26 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:40:16,671 epoch 9 - iter 436/1094 - loss 0.31540236 - time (sec): 33.17 - samples/sec: 5205.77 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:40:24,165 epoch 9 - iter 545/1094 - loss 0.31555377 - time (sec): 40.66 - samples/sec: 5308.01 - l

100%|██████████| 61/61 [00:11<00:00,  5.31it/s]


2023-11-29 05:41:17,630 DEV : loss 0.18710601329803467 - f1-score (micro avg)  0.9417
2023-11-29 05:41:17,821  - 1 epochs without improvement
2023-11-29 05:41:17,826 ----------------------------------------------------------------------------------------------------
2023-11-29 05:41:26,272 epoch 10 - iter 109/1094 - loss 0.31374987 - time (sec): 8.44 - samples/sec: 5058.02 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:41:34,010 epoch 10 - iter 218/1094 - loss 0.31357421 - time (sec): 16.18 - samples/sec: 5326.73 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:41:42,682 epoch 10 - iter 327/1094 - loss 0.31042679 - time (sec): 24.85 - samples/sec: 5207.29 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:41:50,798 epoch 10 - iter 436/1094 - loss 0.31104852 - time (sec): 32.97 - samples/sec: 5246.14 - lr: 0.100000 - momentum: 0.000000
2023-11-29 05:41:58,847 epoch 10 - iter 545/1094 - loss 0.31120267 - time (sec): 41.02 - samples/sec: 5289.04 - lr: 0.100000 - momentum: 0.000000
2023

100%|██████████| 61/61 [00:13<00:00,  4.46it/s]


2023-11-29 05:42:55,599 DEV : loss 0.1849522590637207 - f1-score (micro avg)  0.9433
2023-11-29 05:42:55,791  - 0 epochs without improvement
2023-11-29 05:42:55,792 saving best model
2023-11-29 05:42:55,826 ----------------------------------------------------------------------------------------------------
2023-11-29 05:42:55,832 Loading model from best epoch ...
2023-11-29 05:42:55,849 SequenceTagger predicts: Dictionary with 19 tags: <unk>, part, n, ppm, v, punc, pron, conj, adj, adv, num, tn, fw, int, abb, sb, O, <START>, <STOP>


100%|██████████| 68/68 [00:09<00:00,  6.86it/s]


2023-11-29 05:43:06,314 
Results:
- F-score (micro) 0.9415
- F-score (macro) 0.8643
- Accuracy 0.9415

By class:
              precision    recall  f1-score   support

        part     0.9701    0.9408    0.9552     13532
           n     0.8943    0.9571    0.9247     10430
         ppm     0.9802    0.9819    0.9810      8616
           v     0.9353    0.9233    0.9293      7655
        punc     0.9985    0.9996    0.9991      5386
        pron     0.9562    0.9576    0.9569      2074
        conj     0.8821    0.9366    0.9086      1814
         adj     0.8173    0.6588    0.7295      1650
         adv     0.7181    0.7287    0.7233      1080
         num     0.9984    0.9952    0.9968       630
          tn     0.9457    0.9294    0.9375       581
          fw     0.9962    1.0000    0.9981       263
         int     0.9074    0.8167    0.8596        60
          sb     1.0000    0.7692    0.8696        39
         abb     1.0000    0.1081    0.1951        37

    accuracy         

{'test_score': 0.9415194904080079}

# Predict

In [10]:
# load the model you trained
model = SequenceTagger.load('pos_tagger/final-model.pt')

sentence = Sentence('အသစ် ဝယ် ထား တဲ့ ဆွယ်တာ က အသီးထ ပါ တယ်')

model.predict(sentence)
print(sentence.to_tagged_string())

2023-11-29 05:58:27,562 SequenceTagger predicts: Dictionary with 19 tags: <unk>, part, n, ppm, v, punc, pron, conj, adj, adv, num, tn, fw, int, abb, sb, O, <START>, <STOP>
Sentence[9]: "အသစ် ဝယ် ထား တဲ့ ဆွယ်တာ က အသီးထ ပါ တယ်" → ["အသစ်"/n, "ဝယ်"/v, "ထား"/part, "တဲ့"/part, "ဆွယ်တာ"/n, "က"/ppm, "အသီးထ"/v, "ပါ"/part, "တယ်"/ppm]
