In [None]:
!pip install flair
!pip install torch



In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, PooledFlairEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
import torch

columns = {0: 'text', 1: 'ner'}
label_type = 'ner'
corpus = ColumnCorpus('/content/drive/MyDrive/training_data/flair/', columns)
label_dictionary = corpus.make_label_dictionary(label_type=label_type)
print(label_dictionary)

2024-01-22 09:30:20,603 Reading data from /content/drive/MyDrive/training_data/flair
2024-01-22 09:30:20,604 Train: /content/drive/MyDrive/training_data/flair/train.txt
2024-01-22 09:30:20,605 Dev: None
2024-01-22 09:30:20,607 Test: None
2024-01-22 09:30:30,789 No test split found. Using 0% (i.e. 1402 samples) of the train split as test data
2024-01-22 09:30:30,799 No dev split found. Using 0% (i.e. 1262 samples) of the train split as dev data
2024-01-22 09:30:30,800 Computing label dictionary. Progress:


0it [00:00, ?it/s]
11355it [00:00, 31279.95it/s]

2024-01-22 09:30:31,174 Dictionary created for label 'ner' with 2 values: ORG (seen 36268 times), AWARD (seen 28808 times)
Dictionary with 2 tags: ORG, AWARD





In [None]:
embedding_types = [
    WordEmbeddings('glove'),
    PooledFlairEmbeddings('news-forward', pooling='min'),
    PooledFlairEmbeddings('news-backward', pooling='min'),
]
embeddings = StackedEmbeddings(embeddings=embedding_types)

tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=label_dictionary, tag_type=label_type)
trainer = ModelTrainer(tagger, corpus)
trainer.train('resources/taggers/example-ner', train_with_dev=True, max_epochs=50)

2024-01-22 09:30:42,945 SequenceTagger predicts: Dictionary with 9 tags: O, S-ORG, B-ORG, E-ORG, I-ORG, S-AWARD, B-AWARD, E-AWARD, I-AWARD
2024-01-22 09:30:43,711 ----------------------------------------------------------------------------------------------------
2024-01-22 09:30:43,712 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'glove'
      (embedding): Embedding(400001, 100)
    )
    (list_embedding_1): PooledFlairEmbeddings(
      (context_embeddings): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (list_embedding_2): PooledFlairEmbeddings(
      (context_embeddings): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
  )
  (word_dro

In [None]:
import os
import shutil

model_path = 'resources/taggers/example-ner/final-model.pt'
drive_path = '/content/drive/My Drive/models/flair/50-epoch/final-model.pt'
drive_dir = os.path.dirname(drive_path)
if not os.path.exists(drive_dir):
    os.makedirs(drive_dir)
shutil.copy(model_path, drive_path)
