<a href="https://colab.research.google.com/github/chewzzz1014/fyp/blob/master/ner/src/train_ner_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Train NER Models

In [1]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!mkdir spacy_ner_data

In [None]:
import json
import random
from sklearn.model_selection import train_test_split
import spacy
from spacy.tokens import DocBin

# Load JSON data
with open('/content/drive/MyDrive/FYP/Implementation/Resume Dataset/200_resumes_annotated.json', "r") as f:
    data = json.load(f)

def remove_overlapping_entities(entities):
    """Remove overlapping entities from the list."""
    entities = sorted(entities, key=lambda x: x[0])  # Sort by start position
    non_overlapping = []
    last_end = -1
    for start, end, label in entities:
        if start >= last_end:  # Only add if there's no overlap with the previous entity
            non_overlapping.append((start, end, label))
            last_end = end
    return non_overlapping

# Function to convert JSON data to Spacy's DocBin format
def convert_to_spacy_format(data):
    nlp = spacy.blank("en")  # Load a blank Spacy model
    doc_bin = DocBin()  # Container for our docs

    for item in data:
        text = item['data']['Text']  # Full document text
        entities = []

        for annotation in item['annotations'][0]['result']:
            start = annotation['value']['start']
            end = annotation['value']['end']
            label = annotation['value']['labels'][0]  # Entity label
            entities.append((start, end, label))

        entities = remove_overlapping_entities(entities)  # Remove overlapping entities
        # Create a Spacy doc and add entities to it
        doc = nlp.make_doc(text)
        spans = [doc.char_span(start, end, label=label) for start, end, label in entities]
        # Filter out None spans if Spacy can't align the character indices with tokens
        spans = [span for span in spans if span is not None]
        doc.ents = spans  # Assign entities to the doc
        doc_bin.add(doc)

    return doc_bin

# Split data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Convert train and test sets to Spacy format
train_doc_bin = convert_to_spacy_format(train_data)
test_doc_bin = convert_to_spacy_format(test_data)

# Save the train and test data to .spacy files
train_doc_bin.to_disk("spacy_ner_data/train_data.spacy")
test_doc_bin.to_disk("spacy_ner_data/test_data.spacy")

## Spacy NER

In [None]:
# create base_config.cfg and paste the config generated from spacy widget
# update train and test file path
!touch base_config.cfg

In [None]:
# generate config.cfg from base_config.cfg
!python -m spacy init fill-config base_config.cfg config.cfg

[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [None]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# train model using hyperparameters set in config.cfg
# save trained model in spacy-output/ dir
!python -m spacy train config.cfg --output ./spacy_output
!cp -r ./spacy_output /content/drive/MyDrive/FYP/Implementation/

In [None]:
# evaluate trained model performance
# store output and visualization into result/ dir
!python -m spacy evaluate spacy_output/model-best spacy_ner_data/test_data.spacy -dp spacy_output

[38;5;4mℹ Using CPU[0m
[1m

TOK     100.00
NER P   51.12 
NER R   41.26 
NER F   45.66 
SPEED   2395  

[1m

                P       R       F
NAME        89.66   78.79   83.87
JOB         72.00   32.43   44.72
DEG         62.16   63.89   63.01
UNI         38.89   34.15   36.36
EMAIL       63.33   95.00   76.00
LOC         39.39   31.71   35.14
WORK PER    75.45   83.00   79.05
COMPANY     28.42   36.49   31.95
SKILL       40.96   28.96   33.93
PHONE       89.66   83.87   86.67
STUDY PER   65.62   58.33   61.76

<IPython.core.display.HTML object>
Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/spacy/__main__.py", line 4, in <module>
    setup_cli()
  File "/usr/local/lib/python3.10/dist-packages/spacy/cli/_util.py", line 87, in setup_cli
    c

In [None]:
# make prediction
import spacy
resume_text = '''
John Doe lives at 1234 Elm Street in Los Angeles, CA 90001. He can be reached at +1 (555) 123-4567 or via email at john.doe@example.com. John is a results-driven software engineer with over 5 years of experience in web development and cloud infrastructure, with strong knowledge of JavaScript, Python, and cloud technologies like AWS and Azure. Currently, he works as a Software Engineer at Google LLC in San Francisco, CA, where he has been employed since August 2019. In this role, he has developed scalable web applications using JavaScript, Node.js, and React, deployed and maintained cloud infrastructure on AWS, reducing downtime by 20%, and led a team of 4 engineers to enhance backend performance by 30%. Previously, he worked as a Junior Developer at Tech Innovators Inc. in Austin, TX, from July 2017 to July 2019, where he created RESTful APIs using Python and Flask, collaborated with front-end developers to build and deploy user-facing applications, and wrote unit and integration tests, improving code coverage by 15%.

John holds a Master of Science in Computer Science from the University of California, Berkeley, with a graduation date of May 2017, and a Bachelor of Science in Information Technology from the University of Texas at Austin, graduated in May 2015. His skillset includes proficiency in programming languages like Python, JavaScript, and Java; frameworks such as React, Flask, and Django; cloud platforms including AWS, Google Cloud, and Azure; as well as other tools like Git, Docker, Kubernetes, and SQL. He is certified as an AWS Certified Solutions Architect – Associate, earned in 2020, and as a Google Professional Cloud Architect, earned in 2021'
'''
nlp = spacy.load("spacy-output/model-best")
doc = nlp(resume_text.lower())

print(doc.ents)

for ent in doc.ents:
    print(f"{ent.text}: {ent.label_}")

(john doe, in los, (555) 123-4567, john.doe@example.com, john is, aws, restful apis, master of science, bachelor of science in information technology, python, aws, azure, git, docker)
john doe: NAME
in los: LOC
(555) 123-4567: PHONE
john.doe@example.com: EMAIL
john is: NAME
aws: SKILL
restful apis: SKILL
master of science: DEG
bachelor of science in information technology: DEG
python: SKILL
aws: SKILL
azure: SKILL
git: SKILL
docker: SKILL


In [None]:
from spacy import displacy
displacy.render(doc, style="ent", jupyter=True)

## Flair NER

In [2]:
!pip install flair

Collecting flair
  Downloading flair-0.14.0-py3-none-any.whl.metadata (12 kB)
Collecting boto3>=1.20.27 (from flair)
  Downloading boto3-1.35.54-py3-none-any.whl.metadata (6.7 kB)
Collecting conllu<5.0.0,>=4.0 (from flair)
  Downloading conllu-4.5.3-py2.py3-none-any.whl.metadata (19 kB)
Collecting ftfy>=6.1.0 (from flair)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting langdetect>=1.0.9 (from flair)
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mpld3>=0.3 (from flair)
  Downloading mpld3-0.5.10-py3-none-any.whl.metadata (5.1 kB)
Collecting pptree>=3.1 (from flair)
  Downloading pptree-3.1.tar.gz (3.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pytorch-revgrad>=0.2.0 (from flair)
  Downloading pytorch_revgrad-0.2.0-py3-none-any.whl.metadata (1.7 kB)


In [None]:
# convert spacy data into flair data
import spacy
from spacy.tokens import DocBin
import os

def convert_spacy_to_flair(input_file, output_file):
    """
    Convert SpaCy binary format to Flair's CoNLL format.

    Args:
        input_file (str): Path to SpaCy binary file (.spacy)
        output_file (str): Path to output file for Flair format
    """
    # Load spaCy model
    nlp = spacy.blank("en")

    # Load the DocBin
    doc_bin = DocBin().from_disk(input_file)
    docs = list(doc_bin.get_docs(nlp.vocab))

    with open(output_file, 'w', encoding='utf-8') as f:
        for doc in docs:
            tokens = [(t.text, t.ent_iob_, t.ent_type_) for t in doc]

            # Write tokens in CoNLL format
            for token in tokens:
                text, iob, ent_type = token

                # Convert spaCy IOB to CoNLL format
                if iob == 'O':
                    tag = 'O'
                else:
                    tag = f'{iob}-{ent_type}' if ent_type else 'O'

                # Write line: token and NER tag
                f.write(f'{text} {tag}\n')

            # Empty line between sentences
            f.write('\n')

def convert_spacy_json_to_flair(input_file, output_file):
    """
    Convert SpaCy JSON format to Flair's CoNLL format.

    Args:
        input_file (str): Path to JSON file with SpaCy annotations
        output_file (str): Path to output file for Flair format
    """
    import json

    nlp = spacy.blank("en")

    with open(input_file, 'r', encoding='utf-8') as f:
        training_data = json.load(f)

    with open(output_file, 'w', encoding='utf-8') as f:
        for example in training_data:
            text = example['text']
            ents = example.get('entities', [])

            # Create a spaCy doc
            doc = nlp(text)

            # Add entities to doc
            spans = []
            for start, end, label in ents:
                span = doc.char_span(start, end, label=label)
                if span is not None:
                    spans.append(span)
            doc.ents = spans

            # Convert to CoNLL format
            tokens = [(t.text, t.ent_iob_, t.ent_type_) for t in doc]

            for token in tokens:
                text, iob, ent_type = token
                if iob == 'O':
                    tag = 'O'
                else:
                    tag = f'{iob}-{ent_type}' if ent_type else 'O'
                f.write(f'{text} {tag}\n')

            f.write('\n')

# Example usage for JSON format
flair_train_json = "flair_train.txt"
flair_test_json = "flair_test.txt"

convert_spacy_to_flair('/content/spacy_ner_data/train_data.spacy', flair_train_json)
convert_spacy_to_flair('/content/spacy_ner_data/test_data.spacy', flair_test_json)

FileNotFoundError: [Errno 2] No such file or directory: '/content/spacy_ner_data/train_data.spacy'

In [None]:
# convert spacy data into flair data
import spacy
from spacy.training import Corpus

!python -m spacy download de_core_news_sm
nlp = spacy.load("de_core_news_sm")
corpus = Corpus("/content/spacy_ner_data/test_data.spacy")

data = corpus(nlp)

# Flair supports BIO and BIOES, see https://github.com/flairNLP/flair/issues/875
def rename_biluo_to_bioes(old_tag):
    new_tag = ""
    try:
        if old_tag.startswith("L"):
            new_tag = "E" + old_tag[1:]
        elif old_tag.startswith("U"):
            new_tag = "S" + old_tag[1:]
        else:
            new_tag = old_tag
    except:
        pass
    return new_tag


def generate_corpus():
    corpus = []
    n_ex = 0
    for example in data:
        n_ex += 1
        text = example.text
        doc = nlp(text)
        tags = example.get_aligned_ner()
        # Check if it's an empty list of NER tags.
        if None in tags:
            pass
        else:
            new_tags = [rename_biluo_to_bioes(tag) for tag in tags]
            for token, tag in zip(doc,new_tags):
                row = token.text +' '+ token.pos_ +' ' +tag + '\n'
                corpus.append(row)
            corpus.append('\n')
    return corpus

def write_file(filepath):
    with open(filepath, 'w', encoding='utf-8') as f:
        corpus = generate_corpus()
        f.writelines(corpus)

def main():
    write_file('flair_test.txt')

main()

Collecting de-core-news-sm==3.7.0
  Using cached https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.7.0/de_core_news_sm-3.7.0-py3-none-any.whl (14.6 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('de_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:
# convert json into flair data
import json
import random
from typing import List, Dict, Tuple
import spacy
from collections import defaultdict

class NERConverter:
    def __init__(self):
        self.nlp = spacy.load("en_core_web_sm")

    def get_bioes_label(self, token_index: int, entity_length: int, current_position: int, label: str) -> str:
        """
        Convert to BIOES format
        - S-: Single token entity
        - B-: Beginning of multi-token entity
        - I-: Inside of multi-token entity
        - E-: End of multi-token entity
        - O: Outside
        """
        if entity_length == 1:
            return f'S-{label}'
        if current_position == 0:
            return f'B-{label}'
        if current_position == entity_length - 1:
            return f'E-{label}'
        return f'I-{label}'

    def convert_to_bioes_format(self, json_data: List[dict]) -> List[List[Tuple[str, str]]]:
        """Convert JSON annotations to BIOES format."""
        all_sentences = []

        for item in json_data:
            text = item['data']['Text']
            doc = self.nlp(text)

            # Initialize character-level labels
            char_labels = ['O'] * len(text)

            # First pass: identify entity boundaries and lengths
            entity_spans = []
            if item['annotations'] and len(item['annotations']) > 0:
                for ann in item['annotations'][0]['result']:
                    if 'value' in ann:
                        start = ann['value']['start']
                        end = ann['value']['end']
                        label = ann['value']['labels'][0]
                        entity_spans.append((start, end, label))

            # Sort spans by start position
            entity_spans.sort(key=lambda x: x[0])

            # Second pass: apply BIOES labels
            for start, end, label in entity_spans:
                # Get tokens that are part of this entity
                entity_text = text[start:end]
                entity_doc = self.nlp(entity_text)
                entity_length = len([token for token in entity_doc if not token.is_space])

                # Set labels for the entire span
                current_token_idx = 0
                for i in range(start, end):
                    if i == start or text[i-1].isspace():
                        char_labels[i] = self.get_bioes_label(i, entity_length, current_token_idx, label)
                        current_token_idx += 1
                    else:
                        char_labels[i] = char_labels[i-1]

            # Convert to token-level labels
            current_sentence = []
            for sent in doc.sents:
                for token in sent:
                    # Get the most common label for the token's characters
                    token_chars_labels = char_labels[token.idx:token.idx + len(token.text)]
                    label_counts = defaultdict(int)
                    for char_label in token_chars_labels:
                        label_counts[char_label] += 1

                    token_label = max(label_counts.items(), key=lambda x: x[1])[0]
                    current_sentence.append((token.text, token_label))

                if current_sentence:
                    all_sentences.append(current_sentence)
                    current_sentence = []

        return all_sentences

    def write_flair_file(self, sentences: List[List[Tuple[str, str]]], filename: str):
        """Write sentences in BIOES format to file."""
        with open(filename, 'w', encoding='utf-8') as f:
            for sentence in sentences:
                for token, label in sentence:
                    f.write(f'{token} {label}\n')
                f.write('\n')

    def convert_and_split(self, json_data: List[dict], train_file: str, test_file: str, test_ratio: float = 0.2):
        """Convert JSON to BIOES format and split into train/test sets."""
        all_sentences = self.convert_to_bioes_format(json_data)

        # Shuffle and split
        random.shuffle(all_sentences)
        split_idx = int(len(all_sentences) * (1 - test_ratio))

        train_sentences = all_sentences[:split_idx]
        test_sentences = all_sentences[split_idx:]

        # Write to files
        self.write_flair_file(train_sentences, train_file)
        self.write_flair_file(test_sentences, test_file)

        return len(train_sentences), len(test_sentences)

def main():
    # Load JSON data
    with open('/content/drive/MyDrive/FYP/Implementation/Resume Dataset/200_resumes_annotated.json', 'r', encoding='utf-8') as f:
        json_data = json.load(f)

    # Convert and split data
    converter = NERConverter()
    train_count, test_count = converter.convert_and_split(
        json_data,
        train_file='flair_train.txt',
        test_file='flair_test.txt',
        test_ratio=0.2
    )

    print(f'Created {train_count} training sentences and {test_count} test sentences')

main()

Created 298 training sentences and 75 test sentences


In [4]:
from flair.data import Corpus
from flair.datasets import ColumnCorpus

# Define columns for CoNLL (0: word, 1: label)
columns = {0: 'text', 1: 'ner'}

# Set data folder and file names
data_folder = './'
train_file = 'flair_train.txt'
test_file = 'flair_test.txt'

# Load the corpus
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file=train_file,
                              test_file=test_file,
                              dev_file=None)

2024-11-06 09:54:22,403 Reading data from .
2024-11-06 09:54:22,404 Train: flair_train.txt
2024-11-06 09:54:22,408 Dev: None
2024-11-06 09:54:22,409 Test: flair_test.txt
2024-11-06 09:54:23,941 No dev split found. Using 10% (i.e. 30 samples) of the train split as dev data


In [5]:
tag_dictionary = corpus.make_label_dictionary(label_type='ner')
print("Labels:", tag_dictionary.get_items())

2024-11-06 09:54:28,036 Computing label dictionary. Progress:


0it [00:00, ?it/s]
268it [00:00, 8780.66it/s]

2024-11-06 09:54:28,112 Dictionary created for label 'ner' with 11 values: SKILL (seen 2247 times), JOB (seen 551 times), WORK (seen 469 times), COMPANY (seen 382 times), LOC (seen 257 times), UNI (seen 160 times), DEG (seen 150 times), PHONE (seen 142 times), STUDY (seen 142 times), NAME (seen 140 times), EMAIL (seen 111 times)
Labels: ['SKILL', 'JOB', 'WORK', 'COMPANY', 'LOC', 'UNI', 'DEG', 'PHONE', 'STUDY', 'NAME', 'EMAIL']





In [6]:
from collections import Counter

def count_labels(file_path):
    with open(file_path, 'r') as file:
        labels = [line.split()[-1] for line in file if line.strip()]
    return Counter(labels)

print("Train label distribution:", count_labels('flair_train.txt'))
print("Test label distribution:", count_labels('flair_test.txt'))

Train label distribution: Counter({'O': 72376, 'S-SKILL': 1465, 'PER': 1243, 'B-SKILL': 1006, 'E-SKILL': 1004, 'E-JOB': 547, 'B-JOB': 538, 'E-COMPANY': 350, 'B-COMPANY': 348, 'I-JOB': 297, 'I-DEG': 263, 'I-COMPANY': 262, 'I-SKILL': 199, 'E-UNI': 177, 'B-UNI': 176, 'S-LOC': 176, 'E-DEG': 161, 'B-DEG': 160, 'B-NAME': 156, 'E-NAME': 156, 'I-UNI': 139, 'E-PHONE': 132, 'B-PHONE': 131, 'S-EMAIL': 121, 'B-LOC': 106, 'E-LOC': 106, 'I-PHONE': 84, 'S-COMPANY': 66, 'S-JOB': 62, 'S-PHONE': 20, 'S-DEG': 6, 'I-LOC': 5, 'I-NAME': 5, 'B-EMAIL': 1, 'E-EMAIL': 1})
Test label distribution: Counter({'O': 16717, 'S-SKILL': 326, 'PER': 268, 'B-SKILL': 207, 'E-SKILL': 205, 'E-JOB': 112, 'B-JOB': 109, 'E-COMPANY': 77, 'B-COMPANY': 74, 'I-DEG': 71, 'I-JOB': 60, 'I-COMPANY': 53, 'B-DEG': 41, 'E-DEG': 41, 'B-UNI': 38, 'E-UNI': 38, 'I-SKILL': 37, 'B-NAME': 35, 'E-NAME': 35, 'I-UNI': 32, 'S-LOC': 29, 'S-EMAIL': 27, 'E-PHONE': 25, 'B-PHONE': 24, 'B-LOC': 22, 'E-LOC': 22, 'I-PHONE': 22, 'S-JOB': 13, 'S-COMPANY': 13,

In [7]:
# create NER tagger
from flair.embeddings import WordEmbeddings, StackedEmbeddings, TransformerWordEmbeddings, FlairEmbeddings
from flair.models import SequenceTagger

# using LSTM-CRF on top of frozen embeddings
# combine flair and glove embeddings
# embeddings = StackedEmbeddings([
#                 WordEmbeddings('glove'),
#                 FlairEmbeddings('news-forward'),
#                 FlairEmbeddings('news-backward'),
#             ])
# tagger = SequenceTagger(hidden_size=256,
#                          embeddings=embeddings,
#                          tag_dictionary=tag_dictionary,
#                          tag_type='ner',
#                          use_crf=True,
#                          tag_format = 'BIOES')

# using transformer embedding
# embeddings = TransformerWordEmbeddings('bert-base-uncased',
#                                       fine_tune=True,
#                                       layers='-1',
#                                       subtoken_pooling='first')
embeddings = TransformerWordEmbeddings(
    'roberta-base',  # or 'bert-base-uncased'
    fine_tune=True,
    layers='-1,-2,-3,-4',  # Use last 4 layers
    subtoken_pooling='first',
    allow_long_sentences=True
)
tagger = SequenceTagger(hidden_size=128,
                         embeddings=embeddings,
                         tag_dictionary=tag_dictionary,
                         tag_type='ner',
                         use_crf=False,
                         use_rnn=False,
                         reproject_embeddings=False,
                         tag_format = 'BIOES')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

2024-11-06 09:54:45,764 SequenceTagger predicts: Dictionary with 45 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-NAME, B-NAME, E-NAME, I-NAME, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL


In [8]:
# train flair ner model
from flair.trainers import ModelTrainer
from flair.training_utils import EvaluationMetric

trainer = ModelTrainer(tagger, corpus)

trainer.train(
    base_path='flair_output/',
    learning_rate=0.01,
    mini_batch_size=4,
    max_epochs=50,
    train_with_dev=False
)
!cp -r ./flair_output /content/drive/MyDrive/FYP/Implementation/

2024-11-06 09:54:47,941 ----------------------------------------------------------------------------------------------------
2024-11-06 09:54:47,944 Model: "SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50266, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2024-11-06 09:54:53,013 epoch 1 - iter 6/67 - loss 2.90346011 - time (sec): 5.04 - samples/sec: 1374.31 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:54:56,684 epoch 1 - iter 12/67 - loss 1.77811724 - time (sec): 8.71 - samples/sec: 1819.59 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:54:59,329 epoch 1 - iter 18/67 - loss 1.49312882 - time (sec): 11.35 - samples/sec: 2088.20 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:01,952 epoch 1 - iter 24/67 - loss 1.39814584 - time (sec): 13.98 - samples/sec: 2171.47 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:04,335 epoch 1 - iter 30/67 - loss 1.31976729 - time (sec): 16.36 - samples/sec: 2185.75 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:06,968 epoch 1 - iter 36/67 - loss 1.24680779 - time (sec): 18.99 - samples/sec: 2217.39 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:10,014 epoch 1 - iter 42/67 - loss 1.19058710 - time (sec): 22.04 - samples/sec: 2203.59 - lr: 0.010000 - momentum: 0.000000
2024-11-0

100%|██████████| 1/1 [00:02<00:00,  2.00s/it]

2024-11-06 09:55:22,000 DEV : loss 0.5709929466247559 - f1-score (micro avg)  0.0
2024-11-06 09:55:22,015  - 0 epochs without improvement
2024-11-06 09:55:22,019 ----------------------------------------------------------------------------------------------------





2024-11-06 09:55:24,807 epoch 2 - iter 6/67 - loss 0.64740486 - time (sec): 2.78 - samples/sec: 2781.21 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:28,298 epoch 2 - iter 12/67 - loss 0.65430567 - time (sec): 6.28 - samples/sec: 2566.67 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:30,540 epoch 2 - iter 18/67 - loss 0.68795021 - time (sec): 8.52 - samples/sec: 2476.08 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:33,162 epoch 2 - iter 24/67 - loss 0.66716287 - time (sec): 11.14 - samples/sec: 2440.79 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:35,245 epoch 2 - iter 30/67 - loss 0.68608918 - time (sec): 13.22 - samples/sec: 2432.79 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:37,728 epoch 2 - iter 36/67 - loss 0.68326514 - time (sec): 15.71 - samples/sec: 2436.42 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:55:39,523 epoch 2 - iter 42/67 - loss 0.69568908 - time (sec): 17.50 - samples/sec: 2448.05 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:03<00:00,  3.51s/it]

2024-11-06 09:55:55,453 DEV : loss 0.552460253238678 - f1-score (micro avg)  0.0
2024-11-06 09:55:55,468  - 0 epochs without improvement
2024-11-06 09:55:55,469 ----------------------------------------------------------------------------------------------------





2024-11-06 09:55:58,507 epoch 3 - iter 6/67 - loss 0.47992346 - time (sec): 3.03 - samples/sec: 1968.73 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:01,392 epoch 3 - iter 12/67 - loss 0.53165529 - time (sec): 5.92 - samples/sec: 1978.34 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:04,312 epoch 3 - iter 18/67 - loss 0.59361112 - time (sec): 8.84 - samples/sec: 2176.42 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:06,604 epoch 3 - iter 24/67 - loss 0.61569445 - time (sec): 11.13 - samples/sec: 2252.45 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:09,331 epoch 3 - iter 30/67 - loss 0.60176336 - time (sec): 13.86 - samples/sec: 2311.61 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:12,416 epoch 3 - iter 36/67 - loss 0.60908204 - time (sec): 16.94 - samples/sec: 2348.87 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:15,395 epoch 3 - iter 42/67 - loss 0.61823835 - time (sec): 19.92 - samples/sec: 2286.46 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:05<00:00,  5.05s/it]

2024-11-06 09:56:32,486 DEV : loss 0.5389795303344727 - f1-score (micro avg)  0.0
2024-11-06 09:56:32,501  - 0 epochs without improvement
2024-11-06 09:56:32,502 ----------------------------------------------------------------------------------------------------





2024-11-06 09:56:34,924 epoch 4 - iter 6/67 - loss 0.60814302 - time (sec): 2.42 - samples/sec: 2659.14 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:37,893 epoch 4 - iter 12/67 - loss 0.54065810 - time (sec): 5.39 - samples/sec: 2381.61 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:40,895 epoch 4 - iter 18/67 - loss 0.54992082 - time (sec): 8.39 - samples/sec: 2264.38 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:43,511 epoch 4 - iter 24/67 - loss 0.54490396 - time (sec): 11.01 - samples/sec: 2274.47 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:47,059 epoch 4 - iter 30/67 - loss 0.51715610 - time (sec): 14.55 - samples/sec: 2260.29 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:49,149 epoch 4 - iter 36/67 - loss 0.52613268 - time (sec): 16.64 - samples/sec: 2266.81 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:56:51,870 epoch 4 - iter 42/67 - loss 0.53270014 - time (sec): 19.36 - samples/sec: 2275.40 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:03<00:00,  3.53s/it]

2024-11-06 09:57:08,412 DEV : loss 0.5626357793807983 - f1-score (micro avg)  0.0
2024-11-06 09:57:08,427  - 1 epochs without improvement
2024-11-06 09:57:08,429 ----------------------------------------------------------------------------------------------------





2024-11-06 09:57:11,637 epoch 5 - iter 6/67 - loss 0.48812058 - time (sec): 3.20 - samples/sec: 1364.79 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:14,383 epoch 5 - iter 12/67 - loss 0.52345664 - time (sec): 5.95 - samples/sec: 1902.63 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:17,445 epoch 5 - iter 18/67 - loss 0.51612303 - time (sec): 9.01 - samples/sec: 2123.83 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:20,363 epoch 5 - iter 24/67 - loss 0.51934656 - time (sec): 11.93 - samples/sec: 2086.61 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:23,783 epoch 5 - iter 30/67 - loss 0.52086682 - time (sec): 15.35 - samples/sec: 2110.27 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:26,853 epoch 5 - iter 36/67 - loss 0.51865827 - time (sec): 18.42 - samples/sec: 2151.93 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:29,788 epoch 5 - iter 42/67 - loss 0.52803136 - time (sec): 21.36 - samples/sec: 2110.03 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:03<00:00,  3.43s/it]

2024-11-06 09:57:46,292 DEV : loss 0.557333767414093 - f1-score (micro avg)  0.0
2024-11-06 09:57:46,307  - 2 epochs without improvement
2024-11-06 09:57:46,310 ----------------------------------------------------------------------------------------------------





2024-11-06 09:57:48,706 epoch 6 - iter 6/67 - loss 0.47130300 - time (sec): 2.39 - samples/sec: 2471.35 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:52,562 epoch 6 - iter 12/67 - loss 0.48196330 - time (sec): 6.25 - samples/sec: 2169.61 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:55,058 epoch 6 - iter 18/67 - loss 0.45963138 - time (sec): 8.75 - samples/sec: 2104.12 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:57,401 epoch 6 - iter 24/67 - loss 0.47485225 - time (sec): 11.09 - samples/sec: 2157.55 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:57:59,872 epoch 6 - iter 30/67 - loss 0.47447075 - time (sec): 13.56 - samples/sec: 2152.88 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:58:03,187 epoch 6 - iter 36/67 - loss 0.47661970 - time (sec): 16.87 - samples/sec: 2287.14 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:58:06,248 epoch 6 - iter 42/67 - loss 0.47893293 - time (sec): 19.94 - samples/sec: 2252.21 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:05<00:00,  5.65s/it]

2024-11-06 09:58:24,745 DEV : loss 0.5368471145629883 - f1-score (micro avg)  0.0
2024-11-06 09:58:24,760  - 0 epochs without improvement
2024-11-06 09:58:24,762 ----------------------------------------------------------------------------------------------------





2024-11-06 09:58:27,737 epoch 7 - iter 6/67 - loss 0.53374887 - time (sec): 2.97 - samples/sec: 2466.42 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:58:30,856 epoch 7 - iter 12/67 - loss 0.52523912 - time (sec): 6.09 - samples/sec: 2232.12 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:58:32,964 epoch 7 - iter 18/67 - loss 0.49192620 - time (sec): 8.20 - samples/sec: 2220.68 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:58:36,238 epoch 7 - iter 24/67 - loss 0.47023870 - time (sec): 11.47 - samples/sec: 2284.88 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:58:38,921 epoch 7 - iter 30/67 - loss 0.46672394 - time (sec): 14.16 - samples/sec: 2250.56 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:58:42,105 epoch 7 - iter 36/67 - loss 0.46257774 - time (sec): 17.34 - samples/sec: 2267.75 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:58:46,206 epoch 7 - iter 42/67 - loss 0.45573706 - time (sec): 21.44 - samples/sec: 2213.12 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:03<00:00,  3.50s/it]

2024-11-06 09:59:01,414 DEV : loss 0.5441924929618835 - f1-score (micro avg)  0.0
2024-11-06 09:59:01,429  - 1 epochs without improvement
2024-11-06 09:59:01,430 ----------------------------------------------------------------------------------------------------





2024-11-06 09:59:04,169 epoch 8 - iter 6/67 - loss 0.43771599 - time (sec): 2.73 - samples/sec: 2318.46 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:08,007 epoch 8 - iter 12/67 - loss 0.39941966 - time (sec): 6.57 - samples/sec: 2276.04 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:10,910 epoch 8 - iter 18/67 - loss 0.40498497 - time (sec): 9.48 - samples/sec: 2251.81 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:13,839 epoch 8 - iter 24/67 - loss 0.38367204 - time (sec): 12.40 - samples/sec: 2265.42 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:17,043 epoch 8 - iter 30/67 - loss 0.39214521 - time (sec): 15.61 - samples/sec: 2253.28 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:19,933 epoch 8 - iter 36/67 - loss 0.41592870 - time (sec): 18.50 - samples/sec: 2262.37 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:23,351 epoch 8 - iter 42/67 - loss 0.41639016 - time (sec): 21.92 - samples/sec: 2253.93 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:03<00:00,  3.38s/it]

2024-11-06 09:59:38,391 DEV : loss 0.5453396439552307 - f1-score (micro avg)  0.0
2024-11-06 09:59:38,406  - 2 epochs without improvement
2024-11-06 09:59:38,407 ----------------------------------------------------------------------------------------------------





2024-11-06 09:59:41,069 epoch 9 - iter 6/67 - loss 0.42019997 - time (sec): 2.66 - samples/sec: 2167.30 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:44,429 epoch 9 - iter 12/67 - loss 0.40353938 - time (sec): 6.02 - samples/sec: 2331.02 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:47,103 epoch 9 - iter 18/67 - loss 0.42184808 - time (sec): 8.69 - samples/sec: 2198.72 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:50,366 epoch 9 - iter 24/67 - loss 0.41919254 - time (sec): 11.95 - samples/sec: 2201.74 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:53,332 epoch 9 - iter 30/67 - loss 0.42266405 - time (sec): 14.92 - samples/sec: 2140.67 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:56,049 epoch 9 - iter 36/67 - loss 0.42354993 - time (sec): 17.64 - samples/sec: 2128.40 - lr: 0.010000 - momentum: 0.000000
2024-11-06 09:59:59,821 epoch 9 - iter 42/67 - loss 0.40084631 - time (sec): 21.41 - samples/sec: 2128.41 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:05<00:00,  5.48s/it]

2024-11-06 10:00:17,164 DEV : loss 0.5372605323791504 - f1-score (micro avg)  0.0
2024-11-06 10:00:17,194  - 3 epochs without improvement
2024-11-06 10:00:17,198 ----------------------------------------------------------------------------------------------------





2024-11-06 10:00:19,339 epoch 10 - iter 6/67 - loss 0.46728057 - time (sec): 2.14 - samples/sec: 2481.17 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:00:22,395 epoch 10 - iter 12/67 - loss 0.45213174 - time (sec): 5.19 - samples/sec: 2309.58 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:00:25,894 epoch 10 - iter 18/67 - loss 0.43423860 - time (sec): 8.69 - samples/sec: 2237.01 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:00:29,048 epoch 10 - iter 24/67 - loss 0.42655219 - time (sec): 11.85 - samples/sec: 2217.06 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:00:31,519 epoch 10 - iter 30/67 - loss 0.41433226 - time (sec): 14.32 - samples/sec: 2208.38 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:00:34,388 epoch 10 - iter 36/67 - loss 0.41361749 - time (sec): 17.19 - samples/sec: 2201.57 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:00:36,969 epoch 10 - iter 42/67 - loss 0.40431321 - time (sec): 19.77 - samples/sec: 2218.91 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.38s/it]

2024-11-06 10:00:53,907 DEV : loss 0.5363765358924866 - f1-score (micro avg)  0.0
2024-11-06 10:00:53,921  - 0 epochs without improvement
2024-11-06 10:00:53,924 ----------------------------------------------------------------------------------------------------





2024-11-06 10:00:56,016 epoch 11 - iter 6/67 - loss 0.36660704 - time (sec): 2.09 - samples/sec: 2363.38 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:00:58,893 epoch 11 - iter 12/67 - loss 0.36824480 - time (sec): 4.96 - samples/sec: 2460.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:02,555 epoch 11 - iter 18/67 - loss 0.36424527 - time (sec): 8.63 - samples/sec: 2405.11 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:05,502 epoch 11 - iter 24/67 - loss 0.37131673 - time (sec): 11.57 - samples/sec: 2267.48 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:09,993 epoch 11 - iter 30/67 - loss 0.34255879 - time (sec): 16.07 - samples/sec: 2265.95 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:13,201 epoch 11 - iter 36/67 - loss 0.35278440 - time (sec): 19.27 - samples/sec: 2247.46 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:16,357 epoch 11 - iter 42/67 - loss 0.35757865 - time (sec): 22.43 - samples/sec: 2217.25 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:05<00:00,  5.16s/it]

2024-11-06 10:01:32,375 DEV : loss 0.5366661548614502 - f1-score (micro avg)  0.0
2024-11-06 10:01:32,390  - 1 epochs without improvement
2024-11-06 10:01:32,391 ----------------------------------------------------------------------------------------------------





2024-11-06 10:01:35,636 epoch 12 - iter 6/67 - loss 0.35174184 - time (sec): 3.24 - samples/sec: 1929.09 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:38,430 epoch 12 - iter 12/67 - loss 0.37942836 - time (sec): 6.03 - samples/sec: 2053.87 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:40,851 epoch 12 - iter 18/67 - loss 0.39174736 - time (sec): 8.46 - samples/sec: 2115.93 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:43,466 epoch 12 - iter 24/67 - loss 0.38234093 - time (sec): 11.07 - samples/sec: 2235.96 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:46,818 epoch 12 - iter 30/67 - loss 0.37489460 - time (sec): 14.42 - samples/sec: 2219.04 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:50,301 epoch 12 - iter 36/67 - loss 0.35738617 - time (sec): 17.91 - samples/sec: 2300.85 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:01:53,807 epoch 12 - iter 42/67 - loss 0.35502562 - time (sec): 21.41 - samples/sec: 2242.92 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.40s/it]

2024-11-06 10:02:09,556 DEV : loss 0.5276150107383728 - f1-score (micro avg)  0.0
2024-11-06 10:02:09,574  - 0 epochs without improvement
2024-11-06 10:02:09,576 ----------------------------------------------------------------------------------------------------





2024-11-06 10:02:12,196 epoch 13 - iter 6/67 - loss 0.38573270 - time (sec): 2.62 - samples/sec: 2224.13 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:02:15,345 epoch 13 - iter 12/67 - loss 0.38983286 - time (sec): 5.76 - samples/sec: 2127.72 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:02:17,933 epoch 13 - iter 18/67 - loss 0.40320176 - time (sec): 8.35 - samples/sec: 2204.78 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:02:21,909 epoch 13 - iter 24/67 - loss 0.35074538 - time (sec): 12.33 - samples/sec: 2220.73 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:02:24,856 epoch 13 - iter 30/67 - loss 0.35086239 - time (sec): 15.27 - samples/sec: 2247.81 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:02:27,745 epoch 13 - iter 36/67 - loss 0.34418209 - time (sec): 18.16 - samples/sec: 2227.51 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:02:30,835 epoch 13 - iter 42/67 - loss 0.34367708 - time (sec): 21.25 - samples/sec: 2213.58 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.86s/it]

2024-11-06 10:02:46,828 DEV : loss 0.5382773280143738 - f1-score (micro avg)  0.0
2024-11-06 10:02:46,843  - 1 epochs without improvement
2024-11-06 10:02:46,845 ----------------------------------------------------------------------------------------------------





2024-11-06 10:02:49,633 epoch 14 - iter 6/67 - loss 0.32719320 - time (sec): 2.79 - samples/sec: 2749.65 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:02:52,823 epoch 14 - iter 12/67 - loss 0.34441939 - time (sec): 5.98 - samples/sec: 2408.16 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:02:55,215 epoch 14 - iter 18/67 - loss 0.35884497 - time (sec): 8.37 - samples/sec: 2251.87 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:02:57,844 epoch 14 - iter 24/67 - loss 0.34127994 - time (sec): 11.00 - samples/sec: 2255.55 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:03:00,182 epoch 14 - iter 30/67 - loss 0.34444166 - time (sec): 13.33 - samples/sec: 2255.31 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:03:03,399 epoch 14 - iter 36/67 - loss 0.34310265 - time (sec): 16.55 - samples/sec: 2244.85 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:03:06,414 epoch 14 - iter 42/67 - loss 0.33321689 - time (sec): 19.57 - samples/sec: 2250.59 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:05<00:00,  5.67s/it]

2024-11-06 10:03:25,934 DEV : loss 0.5313313603401184 - f1-score (micro avg)  0.0
2024-11-06 10:03:25,963  - 2 epochs without improvement
2024-11-06 10:03:25,968 ----------------------------------------------------------------------------------------------------





2024-11-06 10:03:28,587 epoch 15 - iter 6/67 - loss 0.35855055 - time (sec): 2.62 - samples/sec: 2385.05 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:03:31,130 epoch 15 - iter 12/67 - loss 0.35018162 - time (sec): 5.16 - samples/sec: 2381.05 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:03:33,353 epoch 15 - iter 18/67 - loss 0.33706988 - time (sec): 7.38 - samples/sec: 2355.94 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:03:36,356 epoch 15 - iter 24/67 - loss 0.33780285 - time (sec): 10.38 - samples/sec: 2323.70 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:03:39,245 epoch 15 - iter 30/67 - loss 0.32643655 - time (sec): 13.27 - samples/sec: 2311.97 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:03:42,566 epoch 15 - iter 36/67 - loss 0.32431365 - time (sec): 16.59 - samples/sec: 2263.59 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:03:45,269 epoch 15 - iter 42/67 - loss 0.32266969 - time (sec): 19.30 - samples/sec: 2312.21 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.48s/it]

2024-11-06 10:04:02,740 DEV : loss 0.532965362071991 - f1-score (micro avg)  0.0
2024-11-06 10:04:02,755  - 3 epochs without improvement
2024-11-06 10:04:02,756 ----------------------------------------------------------------------------------------------------





2024-11-06 10:04:06,043 epoch 16 - iter 6/67 - loss 0.30406599 - time (sec): 3.28 - samples/sec: 2241.02 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:04:09,109 epoch 16 - iter 12/67 - loss 0.31910816 - time (sec): 6.35 - samples/sec: 2209.17 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:04:11,938 epoch 16 - iter 18/67 - loss 0.33843912 - time (sec): 9.18 - samples/sec: 2237.48 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:04:14,753 epoch 16 - iter 24/67 - loss 0.32895677 - time (sec): 11.99 - samples/sec: 2287.62 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:04:17,530 epoch 16 - iter 30/67 - loss 0.32619594 - time (sec): 14.77 - samples/sec: 2292.14 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:04:20,257 epoch 16 - iter 36/67 - loss 0.32915595 - time (sec): 17.50 - samples/sec: 2277.60 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:04:23,584 epoch 16 - iter 42/67 - loss 0.31501147 - time (sec): 20.82 - samples/sec: 2267.52 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:04<00:00,  4.20s/it]

2024-11-06 10:04:39,740 DEV : loss 0.5449539422988892 - f1-score (micro avg)  0.0
2024-11-06 10:04:39,756  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.005]
2024-11-06 10:04:39,757 ----------------------------------------------------------------------------------------------------





2024-11-06 10:04:42,351 epoch 17 - iter 6/67 - loss 0.35768495 - time (sec): 2.59 - samples/sec: 2154.82 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:04:45,165 epoch 17 - iter 12/67 - loss 0.31884516 - time (sec): 5.40 - samples/sec: 2084.59 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:04:47,993 epoch 17 - iter 18/67 - loss 0.28582641 - time (sec): 8.23 - samples/sec: 2164.40 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:04:51,564 epoch 17 - iter 24/67 - loss 0.28208547 - time (sec): 11.80 - samples/sec: 2183.33 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:04:54,553 epoch 17 - iter 30/67 - loss 0.27812905 - time (sec): 14.79 - samples/sec: 2240.83 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:04:57,632 epoch 17 - iter 36/67 - loss 0.27589909 - time (sec): 17.87 - samples/sec: 2215.57 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:05:00,096 epoch 17 - iter 42/67 - loss 0.28601510 - time (sec): 20.33 - samples/sec: 2219.55 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:04<00:00,  4.30s/it]

2024-11-06 10:05:17,148 DEV : loss 0.5247790217399597 - f1-score (micro avg)  0.0
2024-11-06 10:05:17,172  - 0 epochs without improvement
2024-11-06 10:05:17,175 ----------------------------------------------------------------------------------------------------





2024-11-06 10:05:19,501 epoch 18 - iter 6/67 - loss 0.33756843 - time (sec): 2.32 - samples/sec: 2430.72 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:05:22,068 epoch 18 - iter 12/67 - loss 0.31269284 - time (sec): 4.89 - samples/sec: 2181.32 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:05:25,241 epoch 18 - iter 18/67 - loss 0.29015560 - time (sec): 8.06 - samples/sec: 2284.11 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:05:27,905 epoch 18 - iter 24/67 - loss 0.28870866 - time (sec): 10.72 - samples/sec: 2155.01 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:05:31,010 epoch 18 - iter 30/67 - loss 0.29219505 - time (sec): 13.83 - samples/sec: 2224.80 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:05:33,251 epoch 18 - iter 36/67 - loss 0.28682804 - time (sec): 16.07 - samples/sec: 2189.63 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:05:37,132 epoch 18 - iter 42/67 - loss 0.27073610 - time (sec): 19.95 - samples/sec: 2195.33 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.61s/it]

2024-11-06 10:05:54,030 DEV : loss 0.5294201970100403 - f1-score (micro avg)  0.0
2024-11-06 10:05:54,045  - 1 epochs without improvement
2024-11-06 10:05:54,046 ----------------------------------------------------------------------------------------------------





2024-11-06 10:05:57,208 epoch 19 - iter 6/67 - loss 0.24454341 - time (sec): 3.16 - samples/sec: 2164.04 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:05:59,900 epoch 19 - iter 12/67 - loss 0.26924540 - time (sec): 5.85 - samples/sec: 2168.29 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:02,998 epoch 19 - iter 18/67 - loss 0.25310434 - time (sec): 8.95 - samples/sec: 2169.49 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:06,372 epoch 19 - iter 24/67 - loss 0.26045703 - time (sec): 12.32 - samples/sec: 2227.00 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:10,349 epoch 19 - iter 30/67 - loss 0.25602143 - time (sec): 16.30 - samples/sec: 2245.94 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:13,438 epoch 19 - iter 36/67 - loss 0.25931379 - time (sec): 19.39 - samples/sec: 2255.41 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:16,257 epoch 19 - iter 42/67 - loss 0.26445840 - time (sec): 22.21 - samples/sec: 2247.70 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:05<00:00,  5.17s/it]

2024-11-06 10:06:32,131 DEV : loss 0.5321469902992249 - f1-score (micro avg)  0.0
2024-11-06 10:06:32,146  - 2 epochs without improvement
2024-11-06 10:06:32,147 ----------------------------------------------------------------------------------------------------





2024-11-06 10:06:35,738 epoch 20 - iter 6/67 - loss 0.26719882 - time (sec): 3.59 - samples/sec: 2219.83 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:38,748 epoch 20 - iter 12/67 - loss 0.27856008 - time (sec): 6.60 - samples/sec: 2062.71 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:40,988 epoch 20 - iter 18/67 - loss 0.27169464 - time (sec): 8.84 - samples/sec: 2177.03 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:44,076 epoch 20 - iter 24/67 - loss 0.27274553 - time (sec): 11.92 - samples/sec: 2121.90 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:47,376 epoch 20 - iter 30/67 - loss 0.27003518 - time (sec): 15.22 - samples/sec: 2169.23 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:50,473 epoch 20 - iter 36/67 - loss 0.27198449 - time (sec): 18.32 - samples/sec: 2202.27 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:06:53,189 epoch 20 - iter 42/67 - loss 0.27802339 - time (sec): 21.04 - samples/sec: 2233.56 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.51s/it]

2024-11-06 10:07:09,015 DEV : loss 0.5364053845405579 - f1-score (micro avg)  0.0
2024-11-06 10:07:09,031  - 3 epochs without improvement
2024-11-06 10:07:09,033 ----------------------------------------------------------------------------------------------------





2024-11-06 10:07:12,369 epoch 21 - iter 6/67 - loss 0.21676246 - time (sec): 3.33 - samples/sec: 2271.21 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:07:15,504 epoch 21 - iter 12/67 - loss 0.23780672 - time (sec): 6.47 - samples/sec: 2140.71 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:07:18,225 epoch 21 - iter 18/67 - loss 0.24410078 - time (sec): 9.19 - samples/sec: 2219.48 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:07:21,081 epoch 21 - iter 24/67 - loss 0.25055214 - time (sec): 12.04 - samples/sec: 2225.54 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:07:24,179 epoch 21 - iter 30/67 - loss 0.25519226 - time (sec): 15.14 - samples/sec: 2260.28 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:07:26,971 epoch 21 - iter 36/67 - loss 0.25672760 - time (sec): 17.93 - samples/sec: 2242.20 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:07:30,280 epoch 21 - iter 42/67 - loss 0.25644844 - time (sec): 21.24 - samples/sec: 2170.49 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.62s/it]

2024-11-06 10:07:46,236 DEV : loss 0.529495358467102 - f1-score (micro avg)  0.0
2024-11-06 10:07:46,252  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0025]
2024-11-06 10:07:46,254 ----------------------------------------------------------------------------------------------------





2024-11-06 10:07:48,904 epoch 22 - iter 6/67 - loss 0.23535870 - time (sec): 2.64 - samples/sec: 2287.09 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:07:52,413 epoch 22 - iter 12/67 - loss 0.21758957 - time (sec): 6.15 - samples/sec: 2302.74 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:07:55,334 epoch 22 - iter 18/67 - loss 0.23736252 - time (sec): 9.07 - samples/sec: 2229.50 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:07:57,697 epoch 22 - iter 24/67 - loss 0.24954205 - time (sec): 11.44 - samples/sec: 2254.83 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:08:01,280 epoch 22 - iter 30/67 - loss 0.25248282 - time (sec): 15.02 - samples/sec: 2235.93 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:08:04,074 epoch 22 - iter 36/67 - loss 0.24914750 - time (sec): 17.81 - samples/sec: 2237.17 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:08:06,057 epoch 22 - iter 42/67 - loss 0.24864994 - time (sec): 19.80 - samples/sec: 2236.40 - lr: 0.002500 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.74s/it]

2024-11-06 10:08:22,567 DEV : loss 0.5293766260147095 - f1-score (micro avg)  0.0
2024-11-06 10:08:22,594  - 1 epochs without improvement
2024-11-06 10:08:22,598 ----------------------------------------------------------------------------------------------------





2024-11-06 10:08:25,718 epoch 23 - iter 6/67 - loss 0.24361222 - time (sec): 3.12 - samples/sec: 2110.33 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:08:27,981 epoch 23 - iter 12/67 - loss 0.26140442 - time (sec): 5.38 - samples/sec: 2098.96 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:08:30,963 epoch 23 - iter 18/67 - loss 0.23995259 - time (sec): 8.36 - samples/sec: 2223.22 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:08:34,101 epoch 23 - iter 24/67 - loss 0.24508360 - time (sec): 11.50 - samples/sec: 2286.13 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:08:36,962 epoch 23 - iter 30/67 - loss 0.24272221 - time (sec): 14.36 - samples/sec: 2210.68 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:08:39,238 epoch 23 - iter 36/67 - loss 0.24781671 - time (sec): 16.64 - samples/sec: 2255.30 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:08:42,517 epoch 23 - iter 42/67 - loss 0.25653636 - time (sec): 19.91 - samples/sec: 2226.40 - lr: 0.002500 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.99s/it]

2024-11-06 10:08:59,371 DEV : loss 0.5337845683097839 - f1-score (micro avg)  0.0
2024-11-06 10:08:59,387  - 2 epochs without improvement
2024-11-06 10:08:59,389 ----------------------------------------------------------------------------------------------------





2024-11-06 10:09:01,729 epoch 24 - iter 6/67 - loss 0.22095089 - time (sec): 2.34 - samples/sec: 2252.20 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:03,656 epoch 24 - iter 12/67 - loss 0.23870824 - time (sec): 4.26 - samples/sec: 2535.24 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:06,416 epoch 24 - iter 18/67 - loss 0.24573721 - time (sec): 7.02 - samples/sec: 2266.29 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:09,837 epoch 24 - iter 24/67 - loss 0.23105487 - time (sec): 10.44 - samples/sec: 2301.19 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:12,460 epoch 24 - iter 30/67 - loss 0.23614505 - time (sec): 13.07 - samples/sec: 2277.53 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:15,041 epoch 24 - iter 36/67 - loss 0.24612766 - time (sec): 15.65 - samples/sec: 2253.50 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:18,577 epoch 24 - iter 42/67 - loss 0.24474847 - time (sec): 19.18 - samples/sec: 2263.16 - lr: 0.002500 - momentum: 0.000000
202

100%|██████████| 1/1 [00:05<00:00,  5.34s/it]

2024-11-06 10:09:37,717 DEV : loss 0.533233106136322 - f1-score (micro avg)  0.0
2024-11-06 10:09:37,737  - 3 epochs without improvement
2024-11-06 10:09:37,739 ----------------------------------------------------------------------------------------------------





2024-11-06 10:09:40,665 epoch 25 - iter 6/67 - loss 0.25405355 - time (sec): 2.92 - samples/sec: 2320.83 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:43,175 epoch 25 - iter 12/67 - loss 0.25910150 - time (sec): 5.43 - samples/sec: 2431.46 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:46,001 epoch 25 - iter 18/67 - loss 0.25748066 - time (sec): 8.26 - samples/sec: 2231.74 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:49,092 epoch 25 - iter 24/67 - loss 0.23839182 - time (sec): 11.35 - samples/sec: 2239.59 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:52,449 epoch 25 - iter 30/67 - loss 0.24442505 - time (sec): 14.71 - samples/sec: 2233.30 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:55,763 epoch 25 - iter 36/67 - loss 0.23599635 - time (sec): 18.02 - samples/sec: 2254.47 - lr: 0.002500 - momentum: 0.000000
2024-11-06 10:09:58,122 epoch 25 - iter 42/67 - loss 0.23927136 - time (sec): 20.38 - samples/sec: 2268.86 - lr: 0.002500 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.29s/it]

2024-11-06 10:10:13,182 DEV : loss 0.531201958656311 - f1-score (micro avg)  0.0
2024-11-06 10:10:13,199  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00125]
2024-11-06 10:10:13,200 ----------------------------------------------------------------------------------------------------





2024-11-06 10:10:17,415 epoch 26 - iter 6/67 - loss 0.24836142 - time (sec): 4.21 - samples/sec: 1885.81 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:10:20,235 epoch 26 - iter 12/67 - loss 0.24841034 - time (sec): 7.03 - samples/sec: 1958.92 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:10:23,254 epoch 26 - iter 18/67 - loss 0.26117452 - time (sec): 10.05 - samples/sec: 2037.76 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:10:25,875 epoch 26 - iter 24/67 - loss 0.24635652 - time (sec): 12.67 - samples/sec: 2103.11 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:10:28,547 epoch 26 - iter 30/67 - loss 0.23832629 - time (sec): 15.34 - samples/sec: 2127.44 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:10:32,327 epoch 26 - iter 36/67 - loss 0.22617277 - time (sec): 19.12 - samples/sec: 2123.72 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:10:35,210 epoch 26 - iter 42/67 - loss 0.23155717 - time (sec): 22.01 - samples/sec: 2129.13 - lr: 0.001250 - momentum: 0.000000
20

100%|██████████| 1/1 [00:03<00:00,  3.48s/it]

2024-11-06 10:10:50,739 DEV : loss 0.5291683673858643 - f1-score (micro avg)  0.0
2024-11-06 10:10:50,754  - 1 epochs without improvement
2024-11-06 10:10:50,755 ----------------------------------------------------------------------------------------------------





2024-11-06 10:10:52,971 epoch 27 - iter 6/67 - loss 0.22481447 - time (sec): 2.21 - samples/sec: 2821.35 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:10:55,705 epoch 27 - iter 12/67 - loss 0.21491620 - time (sec): 4.94 - samples/sec: 2319.92 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:10:58,572 epoch 27 - iter 18/67 - loss 0.22038750 - time (sec): 7.81 - samples/sec: 2203.43 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:01,828 epoch 27 - iter 24/67 - loss 0.22370391 - time (sec): 11.07 - samples/sec: 2229.42 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:05,254 epoch 27 - iter 30/67 - loss 0.22428417 - time (sec): 14.49 - samples/sec: 2239.66 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:08,666 epoch 27 - iter 36/67 - loss 0.22062822 - time (sec): 17.90 - samples/sec: 2216.56 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:11,837 epoch 27 - iter 42/67 - loss 0.22376622 - time (sec): 21.08 - samples/sec: 2239.49 - lr: 0.001250 - momentum: 0.000000
202

100%|██████████| 1/1 [00:04<00:00,  4.75s/it]

2024-11-06 10:11:28,318 DEV : loss 0.5289976596832275 - f1-score (micro avg)  0.0
2024-11-06 10:11:28,341  - 2 epochs without improvement
2024-11-06 10:11:28,343 ----------------------------------------------------------------------------------------------------





2024-11-06 10:11:31,652 epoch 28 - iter 6/67 - loss 0.21888077 - time (sec): 3.30 - samples/sec: 2283.84 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:34,317 epoch 28 - iter 12/67 - loss 0.23192655 - time (sec): 5.97 - samples/sec: 2108.55 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:37,486 epoch 28 - iter 18/67 - loss 0.22606170 - time (sec): 9.14 - samples/sec: 2243.30 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:39,841 epoch 28 - iter 24/67 - loss 0.22060463 - time (sec): 11.49 - samples/sec: 2296.91 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:42,255 epoch 28 - iter 30/67 - loss 0.23420859 - time (sec): 13.91 - samples/sec: 2267.95 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:44,857 epoch 28 - iter 36/67 - loss 0.22991306 - time (sec): 16.51 - samples/sec: 2263.83 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:11:47,959 epoch 28 - iter 42/67 - loss 0.23662056 - time (sec): 19.61 - samples/sec: 2267.76 - lr: 0.001250 - momentum: 0.000000
202

100%|██████████| 1/1 [00:04<00:00,  4.55s/it]

2024-11-06 10:12:06,085 DEV : loss 0.5297093391418457 - f1-score (micro avg)  0.0
2024-11-06 10:12:06,101  - 3 epochs without improvement
2024-11-06 10:12:06,103 ----------------------------------------------------------------------------------------------------





2024-11-06 10:12:09,308 epoch 29 - iter 6/67 - loss 0.20809080 - time (sec): 3.20 - samples/sec: 2191.70 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:12:12,408 epoch 29 - iter 12/67 - loss 0.19612750 - time (sec): 6.30 - samples/sec: 2198.06 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:12:15,254 epoch 29 - iter 18/67 - loss 0.21330106 - time (sec): 9.15 - samples/sec: 2194.43 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:12:18,250 epoch 29 - iter 24/67 - loss 0.20975799 - time (sec): 12.14 - samples/sec: 2399.38 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:12:22,096 epoch 29 - iter 30/67 - loss 0.21095061 - time (sec): 15.99 - samples/sec: 2260.39 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:12:25,112 epoch 29 - iter 36/67 - loss 0.21477919 - time (sec): 19.01 - samples/sec: 2268.49 - lr: 0.001250 - momentum: 0.000000
2024-11-06 10:12:27,983 epoch 29 - iter 42/67 - loss 0.22141772 - time (sec): 21.88 - samples/sec: 2226.27 - lr: 0.001250 - momentum: 0.000000
202

100%|██████████| 1/1 [00:03<00:00,  3.47s/it]

2024-11-06 10:12:43,303 DEV : loss 0.5305877923965454 - f1-score (micro avg)  0.0
2024-11-06 10:12:43,319  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.000625]
2024-11-06 10:12:43,320 ----------------------------------------------------------------------------------------------------





2024-11-06 10:12:45,661 epoch 30 - iter 6/67 - loss 0.27894369 - time (sec): 2.33 - samples/sec: 2520.42 - lr: 0.000625 - momentum: 0.000000
2024-11-06 10:12:48,850 epoch 30 - iter 12/67 - loss 0.24436809 - time (sec): 5.52 - samples/sec: 2408.45 - lr: 0.000625 - momentum: 0.000000
2024-11-06 10:12:51,576 epoch 30 - iter 18/67 - loss 0.24455421 - time (sec): 8.25 - samples/sec: 2276.10 - lr: 0.000625 - momentum: 0.000000
2024-11-06 10:12:54,152 epoch 30 - iter 24/67 - loss 0.24516993 - time (sec): 10.83 - samples/sec: 2252.27 - lr: 0.000625 - momentum: 0.000000
2024-11-06 10:12:56,734 epoch 30 - iter 30/67 - loss 0.24366488 - time (sec): 13.41 - samples/sec: 2232.14 - lr: 0.000625 - momentum: 0.000000
2024-11-06 10:13:01,055 epoch 30 - iter 36/67 - loss 0.23031952 - time (sec): 17.73 - samples/sec: 2270.82 - lr: 0.000625 - momentum: 0.000000
2024-11-06 10:13:05,310 epoch 30 - iter 42/67 - loss 0.22664710 - time (sec): 21.98 - samples/sec: 2214.10 - lr: 0.000625 - momentum: 0.000000
202

100%|██████████| 1/1 [00:05<00:00,  5.25s/it]

2024-11-06 10:13:21,995 DEV : loss 0.5306172966957092 - f1-score (micro avg)  0.0
2024-11-06 10:13:22,012  - 1 epochs without improvement





2024-11-06 10:13:23,288 ----------------------------------------------------------------------------------------------------
2024-11-06 10:13:23,298 Testing using last state of model ...


100%|██████████| 2/2 [00:04<00:00,  2.29s/it]

2024-11-06 10:13:27,902 
Results:
- F-score (micro) 0.5132
- F-score (macro) 0.6279
- Accuracy 0.3487

By class:
              precision    recall  f1-score   support

       SKILL     0.3525    0.3340    0.3430       533
         JOB     0.5153    0.6720    0.5833       125
        WORK     0.7984    0.8879    0.8408       116
     COMPANY     0.5577    0.6374    0.5949        91
         LOC     0.4615    0.3529    0.4000        51
         DEG     0.6222    0.6667    0.6437        42
         UNI     0.3659    0.3750    0.3704        40
        NAME     0.7045    0.8857    0.7848        35
       PHONE     0.8158    0.9118    0.8611        34
       EMAIL     0.8889    0.8889    0.8889        27
       STUDY     1.0000    0.4242    0.5957        33

   micro avg     0.5083    0.5182    0.5132      1127
   macro avg     0.6439    0.6397    0.6279      1127
weighted avg     0.5052    0.5182    0.5058      1127

2024-11-06 10:13:27,903 --------------------------------------------------




In [9]:
# evaluate model
from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

# Load the trained model
model = SequenceTagger.load('/content/drive/MyDrive/FYP/Implementation/flair_output/final-model.pt')

# Evaluate the model on the test set
result = model.evaluate(corpus.test, gold_label_type='ner', mini_batch_size=32)

# Print the results
# print("Evaluation Loss:", eval_loss)
print(result.detailed_results)  # print the precision, recall, and F1-score per entity type

2024-11-06 10:13:48,061 SequenceTagger predicts: Dictionary with 45 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-NAME, B-NAME, E-NAME, I-NAME, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL


100%|██████████| 3/3 [00:03<00:00,  1.28s/it]


Results:
- F-score (micro) 0.5132
- F-score (macro) 0.6279
- Accuracy 0.3487

By class:
              precision    recall  f1-score   support

       SKILL     0.3525    0.3340    0.3430       533
         JOB     0.5153    0.6720    0.5833       125
        WORK     0.7984    0.8879    0.8408       116
     COMPANY     0.5577    0.6374    0.5949        91
         LOC     0.4615    0.3529    0.4000        51
         DEG     0.6222    0.6667    0.6437        42
         UNI     0.3659    0.3750    0.3704        40
        NAME     0.7045    0.8857    0.7848        35
       PHONE     0.8158    0.9118    0.8611        34
       EMAIL     0.8889    0.8889    0.8889        27
       STUDY     1.0000    0.4242    0.5957        33

   micro avg     0.5083    0.5182    0.5132      1127
   macro avg     0.6439    0.6397    0.6279      1127
weighted avg     0.5052    0.5182    0.5058      1127






In [12]:
from flair.models import SequenceTagger
from flair.data import Sentence
import spacy
from spacy import displacy

# Load your trained Flair NER model
tagger = SequenceTagger.load('/content/drive/MyDrive/FYP/Implementation/flair_output/best-model.pt')

resume_text = '''
John Doe lives at 1234 Elm Street in Los Angeles, CA 90001. He can be reached at +1 (555) 123-4567 or via email at john.doe@example.com. John is a results-driven software engineer with over 5 years of experience in web development and cloud infrastructure, with strong knowledge of JavaScript, Python, and cloud technologies like AWS and Azure. Currently, he works as a Software Engineer at Google LLC in San Francisco, CA, where he has been employed since August 2019. In this role, he has developed scalable web applications using JavaScript, Node.js, and React, deployed and maintained cloud infrastructure on AWS, reducing downtime by 20%, and led a team of 4 engineers to enhance backend performance by 30%. Previously, he worked as a Junior Developer at Tech Innovators Inc. in Austin, TX, from July 2017 to July 2019, where he created RESTful APIs using Python and Flask, collaborated with front-end developers to build and deploy user-facing applications, and wrote unit and integration tests, improving code coverage by 15%.

John holds a Master of Science in Computer Science from the University of California, Berkeley, with a graduation date of May 2017, and a Bachelor of Science in Information Technology from the University of Texas at Austin, graduated in May 2015. His skillset includes proficiency in programming languages like Python, JavaScript, and Java; frameworks such as React, Flask, and Django; cloud platforms including AWS, Google Cloud, and Azure; as well as other tools like Git, Docker, Kubernetes, and SQL. He is certified as an AWS Certified Solutions Architect – Associate, earned in 2020, and as a Google Professional Cloud Architect, earned in 2021'
'''

# Step 1: Predict entities using Flair
sentence = Sentence(resume_text)
tagger.predict(sentence)

# Step 2: Convert Flair predictions to spaCy doc format
# Initialize a blank spaCy NLP pipeline
nlp = spacy.blank("en")
doc = nlp(resume_text)

# Extract entities from Flair prediction and convert to spaCy format
ents = []
for entity in sentence.get_spans('ner'):
    start, end = entity.start_position, entity.end_position
    label = entity.tag
    span = doc.char_span(start, end, label=label)
    if span is not None:
        ents.append(span)

# Set the entities in the spaCy doc
doc.ents = ents

# Step 3: Visualize using displacy
# Display in Jupyter or a web page
displacy.render(doc, style="ent", jupyter=True)


2024-11-06 10:18:43,802 SequenceTagger predicts: Dictionary with 47 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, <START>, <STOP>




In [11]:
from flair.models import SequenceTagger
from flair.data import Sentence

# Load the pretrained NER model
tagger = SequenceTagger.load("/content/drive/MyDrive/FYP/Implementation/flair_output/best-model.pt")
# Example text
text = "Apple is looking at buying U.K. startup for $1 billion."

# Create a Sentence object
sentence = Sentence(resume_text)

# Predict entities
tagger.predict(sentence)

# Print the detected entities
for entity in sentence.get_spans("ner"):
    print(f"Entity: {entity.text}, Type: {entity.get_label('ner').value}, Confidence: {entity.score}")


2024-11-06 10:14:28,943 SequenceTagger predicts: Dictionary with 47 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, <START>, <STOP>
Entity: Doe lives, Type: NAME, Confidence: 0.16702505946159363
Entity: 1234 Elm, Type: NAME, Confidence: 0.11599351465702057
Entity: Los, Type: DEG, Confidence: 0.18544061481952667
Entity: 90001, Type: DEG, Confidence: 0.14748063683509827
Entity: 555, Type: DEG, Confidence: 0.11017131805419922
Entity: 123-4567, Type: DEG, Confidence: 0.11569119244813919
Entity: via, Type: DEG, Confidence: 0.20629706978797913
Entity: at, Type: EMAIL, Confidence: 0.19365909695625305
Entity: john.doe, Type: JOB, Confidence: 0.10587572306394577
Entity: @, Ty