<a href="https://colab.research.google.com/github/chewzzz1014/fyp/blob/master/ner/src/train_ner_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Train NER Models

In [1]:
# mount drive

from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!mkdir spacy_ner_data

In [None]:
import json

import random

from sklearn.model_selection import train_test_split

import spacy

from spacy.tokens import DocBin



# Load JSON data

with open('/content/drive/MyDrive/FYP/Implementation/Resume Dataset/200_resumes_annotated.json', "r") as f:

    data = json.load(f)



def remove_overlapping_entities(entities):

    """Remove overlapping entities from the list."""

    entities = sorted(entities, key=lambda x: x[0])  # Sort by start position

    non_overlapping = []

    last_end = -1

    for start, end, label in entities:

        if start >= last_end:  # Only add if there's no overlap with the previous entity

            non_overlapping.append((start, end, label))

            last_end = end

    return non_overlapping



# Function to convert JSON data to Spacy's DocBin format

def convert_to_spacy_format(data):

    nlp = spacy.blank("en")  # Load a blank Spacy model

    doc_bin = DocBin()  # Container for our docs



    for item in data:

        text = item['data']['Text']  # Full document text

        entities = []



        for annotation in item['annotations'][0]['result']:

            start = annotation['value']['start']

            end = annotation['value']['end']

            label = annotation['value']['labels'][0]  # Entity label

            entities.append((start, end, label))



        entities = remove_overlapping_entities(entities)  # Remove overlapping entities

        # Create a Spacy doc and add entities to it

        doc = nlp.make_doc(text)

        spans = [doc.char_span(start, end, label=label) for start, end, label in entities]

        # Filter out None spans if Spacy can't align the character indices with tokens

        spans = [span for span in spans if span is not None]

        doc.ents = spans  # Assign entities to the doc

        doc_bin.add(doc)



    return doc_bin



# Split data into train and test sets

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)



# Convert train and test sets to Spacy format

train_doc_bin = convert_to_spacy_format(train_data)

test_doc_bin = convert_to_spacy_format(test_data)



# Save the train and test data to .spacy files

train_doc_bin.to_disk("spacy_ner_data/train_data.spacy")

test_doc_bin.to_disk("spacy_ner_data/test_data.spacy")

## Spacy NER

In [None]:
# create base_config.cfg and paste the config generated from spacy widget

# update train and test file path

!touch base_config.cfg

In [None]:
# generate config.cfg from base_config.cfg

!python -m spacy init fill-config base_config.cfg config.cfg

[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [None]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# train model using hyperparameters set in config.cfg

# save trained model in spacy-output/ dir

!python -m spacy train config.cfg --output ./spacy_output

!cp -r ./spacy_output /content/drive/MyDrive/FYP/Implementation/

In [None]:
# evaluate trained model performance

# store output and visualization into result/ dir

!python -m spacy evaluate spacy_output/model-best spacy_ner_data/test_data.spacy -dp spacy_output

[38;5;4mℹ Using CPU[0m
[1m

TOK     100.00
NER P   51.12 
NER R   41.26 
NER F   45.66 
SPEED   2395  

[1m

                P       R       F
NAME        89.66   78.79   83.87
JOB         72.00   32.43   44.72
DEG         62.16   63.89   63.01
UNI         38.89   34.15   36.36
EMAIL       63.33   95.00   76.00
LOC         39.39   31.71   35.14
WORK PER    75.45   83.00   79.05
COMPANY     28.42   36.49   31.95
SKILL       40.96   28.96   33.93
PHONE       89.66   83.87   86.67
STUDY PER   65.62   58.33   61.76

<IPython.core.display.HTML object>
Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/spacy/__main__.py", line 4, in <module>
    setup_cli()
  File "/usr/local/lib/python3.10/dist-packages/spacy/cli/_util.py", line 87, in setup_cli
    c

In [None]:
# make prediction

import spacy

resume_text = '''

John Doe lives at 1234 Elm Street in Los Angeles, CA 90001. He can be reached at +1 (555) 123-4567 or via email at john.doe@example.com. John is a results-driven software engineer with over 5 years of experience in web development and cloud infrastructure, with strong knowledge of JavaScript, Python, and cloud technologies like AWS and Azure. Currently, he works as a Software Engineer at Google LLC in San Francisco, CA, where he has been employed since August 2019. In this role, he has developed scalable web applications using JavaScript, Node.js, and React, deployed and maintained cloud infrastructure on AWS, reducing downtime by 20%, and led a team of 4 engineers to enhance backend performance by 30%. Previously, he worked as a Junior Developer at Tech Innovators Inc. in Austin, TX, from July 2017 to July 2019, where he created RESTful APIs using Python and Flask, collaborated with front-end developers to build and deploy user-facing applications, and wrote unit and integration tests, improving code coverage by 15%.



John holds a Master of Science in Computer Science from the University of California, Berkeley, with a graduation date of May 2017, and a Bachelor of Science in Information Technology from the University of Texas at Austin, graduated in May 2015. His skillset includes proficiency in programming languages like Python, JavaScript, and Java; frameworks such as React, Flask, and Django; cloud platforms including AWS, Google Cloud, and Azure; as well as other tools like Git, Docker, Kubernetes, and SQL. He is certified as an AWS Certified Solutions Architect – Associate, earned in 2020, and as a Google Professional Cloud Architect, earned in 2021'

'''

nlp = spacy.load("spacy-output/model-best")

doc = nlp(resume_text.lower())



print(doc.ents)



for ent in doc.ents:

    print(f"{ent.text}: {ent.label_}")

(john doe, in los, (555) 123-4567, john.doe@example.com, john is, aws, restful apis, master of science, bachelor of science in information technology, python, aws, azure, git, docker)
john doe: NAME
in los: LOC
(555) 123-4567: PHONE
john.doe@example.com: EMAIL
john is: NAME
aws: SKILL
restful apis: SKILL
master of science: DEG
bachelor of science in information technology: DEG
python: SKILL
aws: SKILL
azure: SKILL
git: SKILL
docker: SKILL


In [None]:
from spacy import displacy

displacy.render(doc, style="ent", jupyter=True)

## Flair NER

In [1]:
!pip install flair

Collecting flair
  Downloading flair-0.14.0-py3-none-any.whl.metadata (12 kB)
Collecting conllu<5.0.0,>=4.0 (from flair)
  Downloading conllu-4.5.3-py2.py3-none-any.whl.metadata (19 kB)
Collecting ftfy>=6.1.0 (from flair)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting gdown>=4.4.0 (from flair)
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting langdetect>=1.0.9 (from flair)
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting pptree>=3.1 (from flair)
  Downloading pptree-3.1.tar.gz (3.0 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting pytorch-revgrad>=0.2.0 (from flair)
  Downloading pytorch_revgrad-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting segtok>=1.5.11 (from flair)
  Downloading segtok-1.5.11-py3-none-any.whl.metadata (9.0

In [None]:
# convert spacy data into flair data

import spacy

from spacy.tokens import DocBin

import os



def convert_spacy_to_flair(input_file, output_file):

    """

    Convert SpaCy binary format to Flair's CoNLL format.



    Args:

        input_file (str): Path to SpaCy binary file (.spacy)

        output_file (str): Path to output file for Flair format

    """

    # Load spaCy model

    nlp = spacy.blank("en")



    # Load the DocBin

    doc_bin = DocBin().from_disk(input_file)

    docs = list(doc_bin.get_docs(nlp.vocab))



    with open(output_file, 'w', encoding='utf-8') as f:

        for doc in docs:

            tokens = [(t.text, t.ent_iob_, t.ent_type_) for t in doc]



            # Write tokens in CoNLL format

            for token in tokens:

                text, iob, ent_type = token



                # Convert spaCy IOB to CoNLL format

                if iob == 'O':

                    tag = 'O'

                else:

                    tag = f'{iob}-{ent_type}' if ent_type else 'O'



                # Write line: token and NER tag

                f.write(f'{text} {tag}\n')



            # Empty line between sentences

            f.write('\n')



def convert_spacy_json_to_flair(input_file, output_file):

    """

    Convert SpaCy JSON format to Flair's CoNLL format.



    Args:

        input_file (str): Path to JSON file with SpaCy annotations

        output_file (str): Path to output file for Flair format

    """

    import json



    nlp = spacy.blank("en")



    with open(input_file, 'r', encoding='utf-8') as f:

        training_data = json.load(f)



    with open(output_file, 'w', encoding='utf-8') as f:

        for example in training_data:

            text = example['text']

            ents = example.get('entities', [])



            # Create a spaCy doc

            doc = nlp(text)



            # Add entities to doc

            spans = []

            for start, end, label in ents:

                span = doc.char_span(start, end, label=label)

                if span is not None:

                    spans.append(span)

            doc.ents = spans



            # Convert to CoNLL format

            tokens = [(t.text, t.ent_iob_, t.ent_type_) for t in doc]



            for token in tokens:

                text, iob, ent_type = token

                if iob == 'O':

                    tag = 'O'

                else:

                    tag = f'{iob}-{ent_type}' if ent_type else 'O'

                f.write(f'{text} {tag}\n')



            f.write('\n')



# Example usage for JSON format

flair_train_json = "flair_train.txt"

flair_test_json = "flair_test.txt"



convert_spacy_to_flair('/content/spacy_ner_data/train_data.spacy', flair_train_json)

convert_spacy_to_flair('/content/spacy_ner_data/test_data.spacy', flair_test_json)

FileNotFoundError: [Errno 2] No such file or directory: '/content/spacy_ner_data/train_data.spacy'

In [None]:
# convert spacy data into flair data

import spacy

from spacy.training import Corpus



!python -m spacy download de_core_news_sm

nlp = spacy.load("de_core_news_sm")

corpus = Corpus("/content/spacy_ner_data/test_data.spacy")



data = corpus(nlp)



# Flair supports BIO and BIOES, see https://github.com/flairNLP/flair/issues/875

def rename_biluo_to_bioes(old_tag):

    new_tag = ""

    try:

        if old_tag.startswith("L"):

            new_tag = "E" + old_tag[1:]

        elif old_tag.startswith("U"):

            new_tag = "S" + old_tag[1:]

        else:

            new_tag = old_tag

    except:

        pass

    return new_tag





def generate_corpus():

    corpus = []

    n_ex = 0

    for example in data:

        n_ex += 1

        text = example.text

        doc = nlp(text)

        tags = example.get_aligned_ner()

        # Check if it's an empty list of NER tags.

        if None in tags:

            pass

        else:

            new_tags = [rename_biluo_to_bioes(tag) for tag in tags]

            for token, tag in zip(doc,new_tags):

                row = token.text +' '+ token.pos_ +' ' +tag + '\n'

                corpus.append(row)

            corpus.append('\n')

    return corpus



def write_file(filepath):

    with open(filepath, 'w', encoding='utf-8') as f:

        corpus = generate_corpus()

        f.writelines(corpus)



def main():

    write_file('flair_test.txt')



main()

Collecting de-core-news-sm==3.7.0
  Using cached https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.7.0/de_core_news_sm-3.7.0-py3-none-any.whl (14.6 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('de_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
# convert json into flair data

import json

import random

from typing import List, Dict, Tuple

import spacy

from collections import defaultdict



class NERConverter:

    def __init__(self):

        self.nlp = spacy.load("en_core_web_sm")



    def get_bioes_label(self, token_index: int, entity_length: int, current_position: int, label: str) -> str:

        """

        Convert to BIOES format

        - S-: Single token entity

        - B-: Beginning of multi-token entity

        - I-: Inside of multi-token entity

        - E-: End of multi-token entity

        - O: Outside

        """

        if entity_length == 1:

            return f'S-{label}'

        if current_position == 0:

            return f'B-{label}'

        if current_position == entity_length - 1:

            return f'E-{label}'

        return f'I-{label}'



    def convert_to_bioes_format(self, json_data: List[dict]) -> List[List[Tuple[str, str]]]:

        """Convert JSON annotations to BIOES format."""

        all_sentences = []



        for item in json_data:

            text = item['data']['Text']

            doc = self.nlp(text)



            # Initialize character-level labels

            char_labels = ['O'] * len(text)



            # First pass: identify entity boundaries and lengths

            entity_spans = []

            if item['annotations'] and len(item['annotations']) > 0:

                for ann in item['annotations'][0]['result']:

                    if 'value' in ann:

                        start = ann['value']['start']

                        end = ann['value']['end']

                        label = ann['value']['labels'][0]

                        entity_spans.append((start, end, label))



            # Sort spans by start position

            entity_spans.sort(key=lambda x: x[0])



            # Second pass: apply BIOES labels

            for start, end, label in entity_spans:

                # Get tokens that are part of this entity

                entity_text = text[start:end]

                entity_doc = self.nlp(entity_text)

                entity_length = len([token for token in entity_doc if not token.is_space])



                # Set labels for the entire span

                current_token_idx = 0

                for i in range(start, end):

                    if i == start or text[i-1].isspace():

                        char_labels[i] = self.get_bioes_label(i, entity_length, current_token_idx, label)

                        current_token_idx += 1

                    else:

                        char_labels[i] = char_labels[i-1]



            # Convert to token-level labels

            current_sentence = []

            for sent in doc.sents:

                for token in sent:

                    # Get the most common label for the token's characters

                    token_chars_labels = char_labels[token.idx:token.idx + len(token.text)]

                    label_counts = defaultdict(int)

                    for char_label in token_chars_labels:

                        label_counts[char_label] += 1



                    token_label = max(label_counts.items(), key=lambda x: x[1])[0]

                    current_sentence.append((token.text, token_label))



                if current_sentence:

                    all_sentences.append(current_sentence)

                    current_sentence = []



        return all_sentences



    def write_flair_file(self, sentences: List[List[Tuple[str, str]]], filename: str):

        """Write sentences in BIOES format to file."""

        with open(filename, 'w', encoding='utf-8') as f:

            for sentence in sentences:

                for token, label in sentence:

                    f.write(f'{token} {label}\n')

                f.write('\n')



    def convert_and_split(self, json_data: List[dict], train_file: str, test_file: str, test_ratio: float = 0.2):

        """Convert JSON to BIOES format and split into train/test sets."""

        all_sentences = self.convert_to_bioes_format(json_data)



        # Shuffle and split

        random.shuffle(all_sentences)

        split_idx = int(len(all_sentences) * (1 - test_ratio))



        train_sentences = all_sentences[:split_idx]

        test_sentences = all_sentences[split_idx:]



        # Write to files

        self.write_flair_file(train_sentences, train_file)

        self.write_flair_file(test_sentences, test_file)



        return len(train_sentences), len(test_sentences)



def main():

    # Load JSON data

    with open('/kaggle/input/resume-dataset/200_resumes_annotated.json', 'r', encoding='utf-8') as f:

        json_data = json.load(f)



    # Convert and split data

    converter = NERConverter()

    train_count, test_count = converter.convert_and_split(

        json_data,

        train_file='flair_train.txt',

        test_file='flair_test.txt',

        test_ratio=0.2

    )



    print(f'Created {train_count} training sentences and {test_count} test sentences')



main()

Created 298 training sentences and 75 test sentences


In [3]:
from flair.data import Corpus

from flair.datasets import ColumnCorpus



# Define columns for CoNLL (0: word, 1: label)

columns = {0: 'text', 1: 'ner'}



# Set data folder and file names

data_folder = './'

train_file = '/kaggle/working/flair_test.txt'

test_file = '/kaggle/working/flair_test.txt'



# Load the corpus

corpus: Corpus = ColumnCorpus(data_folder, columns,

                              train_file=train_file,

                              test_file=test_file,

                              dev_file=None)

2024-11-06 13:28:03,750 Reading data from .
2024-11-06 13:28:03,751 Train: /kaggle/working/flair_test.txt
2024-11-06 13:28:03,752 Dev: None
2024-11-06 13:28:03,753 Test: /kaggle/working/flair_test.txt
2024-11-06 13:28:04,853 No dev split found. Using 10% (i.e. 8 samples) of the train split as dev data


In [4]:
tag_dictionary = corpus.make_label_dictionary(label_type='ner')

print("Labels:", tag_dictionary.get_items())

2024-11-06 13:28:11,657 Computing label dictionary. Progress:


0it [00:00, ?it/s]
67it [00:00, 9462.22it/s]

2024-11-06 13:28:11,698 Dictionary created for label 'ner' with 11 values: SKILL (seen 684 times), JOB (seen 142 times), WORK (seen 122 times), COMPANY (seen 98 times), LOC (seen 60 times), UNI (seen 32 times), NAME (seen 31 times), DEG (seen 31 times), PHONE (seen 29 times), STUDY (seen 26 times), EMAIL (seen 20 times)
Labels: ['SKILL', 'JOB', 'WORK', 'COMPANY', 'LOC', 'UNI', 'NAME', 'DEG', 'PHONE', 'STUDY', 'EMAIL']





In [5]:
from collections import Counter



def count_labels(file_path):

    with open(file_path, 'r') as file:

        labels = [line.split()[-1] for line in file if line.strip()]

    return Counter(labels)



print("Train label distribution:", count_labels('flair_train.txt'))

print("Test label distribution:", count_labels('flair_test.txt'))

Train label distribution: Counter({'O': 68889, 'S-SKILL': 1333, 'PER': 1229, 'B-SKILL': 953, 'E-SKILL': 952, 'E-JOB': 523, 'B-JOB': 512, 'E-COMPANY': 337, 'B-COMPANY': 333, 'I-JOB': 287, 'I-DEG': 275, 'I-COMPANY': 238, 'E-UNI': 177, 'B-UNI': 176, 'I-SKILL': 173, 'S-LOC': 171, 'B-DEG': 171, 'E-DEG': 171, 'B-NAME': 156, 'E-NAME': 156, 'I-UNI': 140, 'E-PHONE': 128, 'B-PHONE': 126, 'S-EMAIL': 124, 'B-LOC': 100, 'E-LOC': 100, 'I-PHONE': 83, 'S-COMPANY': 64, 'S-JOB': 58, 'S-PHONE': 25, 'I-NAME': 10, 'I-LOC': 5, 'S-DEG': 4, 'S-UNI': 2, 'B-EMAIL': 1, 'E-EMAIL': 1})
Test label distribution: Counter({'O': 20204, 'S-SKILL': 458, 'PER': 282, 'B-SKILL': 260, 'E-SKILL': 257, 'E-JOB': 136, 'B-JOB': 135, 'E-COMPANY': 90, 'B-COMPANY': 89, 'I-COMPANY': 77, 'I-JOB': 70, 'I-SKILL': 63, 'I-DEG': 59, 'B-UNI': 38, 'E-UNI': 38, 'B-NAME': 35, 'E-NAME': 35, 'S-LOC': 34, 'I-UNI': 31, 'E-DEG': 31, 'B-DEG': 30, 'B-PHONE': 29, 'E-PHONE': 29, 'B-LOC': 28, 'E-LOC': 28, 'S-EMAIL': 24, 'I-PHONE': 23, 'S-JOB': 17, 'S-CO

In [6]:
# create NER tagger

from flair.embeddings import WordEmbeddings, StackedEmbeddings, TransformerWordEmbeddings, FlairEmbeddings

from flair.models import SequenceTagger



# using LSTM-CRF on top of frozen embeddings

# combine flair and glove embeddings

# embeddings = StackedEmbeddings([

#                 WordEmbeddings('glove'),

#                 FlairEmbeddings('news-forward'),

#                 FlairEmbeddings('news-backward'),

#             ])

# tagger = SequenceTagger(hidden_size=256,

#                          embeddings=embeddings,

#                          tag_dictionary=tag_dictionary,

#                          tag_type='ner',

#                          use_crf=True,

#                          tag_format = 'BIOES')



# using transformer embedding

# embeddings = TransformerWordEmbeddings('bert-base-uncased',

#                                       fine_tune=True,

#                                       layers='-1',

#                                       subtoken_pooling='first')

embeddings = TransformerWordEmbeddings(

    'roberta-base',  # or 'bert-base-uncased'

    fine_tune=True,

    layers='-1,-2,-3,-4',  # Use last 4 layers

    subtoken_pooling='first',

    allow_long_sentences=True

)

tagger = SequenceTagger(hidden_size=256,

                         embeddings=embeddings,

                         tag_dictionary=tag_dictionary,

                         tag_type='ner',

                         use_crf=False,

                         use_rnn=False,

                         reproject_embeddings=False,

                         tag_format = 'BIOES')

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

2024-11-06 13:28:24,978 SequenceTagger predicts: Dictionary with 45 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-NAME, B-NAME, E-NAME, I-NAME, S-DEG, B-DEG, E-DEG, I-DEG, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL


In [8]:
# train flair ner model

from flair.trainers import ModelTrainer

from flair.training_utils import EvaluationMetric



trainer = ModelTrainer(tagger, corpus)



trainer.train(

    base_path='flair_output/',

    learning_rate=0.3,

    mini_batch_size=16,

    max_epochs=150,

    train_with_dev=False

)

!cp -r ./flair_output /kaggle/working/

2024-11-06 13:29:04,157 ----------------------------------------------------------------------------------------------------
2024-11-06 13:29:04,161 Model: "SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50266, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features

100%|██████████| 1/1 [00:00<00:00,  1.69it/s]

2024-11-06 13:29:13,660 DEV : loss 1.2978410720825195 - f1-score (micro avg)  0.0
2024-11-06 13:29:13,665  - 0 epochs without improvement
2024-11-06 13:29:13,666 ----------------------------------------------------------------------------------------------------





2024-11-06 13:29:13,919 epoch 2 - iter 1/17 - loss 0.86249295 - time (sec): 0.25 - samples/sec: 6059.16 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:14,604 epoch 2 - iter 2/17 - loss 1.11384921 - time (sec): 0.94 - samples/sec: 3750.16 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:15,355 epoch 2 - iter 3/17 - loss 1.18115562 - time (sec): 1.69 - samples/sec: 2496.55 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:15,801 epoch 2 - iter 4/17 - loss 1.14416695 - time (sec): 2.13 - samples/sec: 2440.66 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:16,254 epoch 2 - iter 5/17 - loss 1.20517979 - time (sec): 2.59 - samples/sec: 2527.35 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:16,716 epoch 2 - iter 6/17 - loss 1.15814114 - time (sec): 3.05 - samples/sec: 2435.23 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:17,080 epoch 2 - iter 7/17 - loss 1.15404000 - time (sec): 3.41 - samples/sec: 2355.20 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:17,

100%|██████████| 1/1 [00:00<00:00,  1.45it/s]

2024-11-06 13:29:23,271 DEV : loss 0.9196046590805054 - f1-score (micro avg)  0.0
2024-11-06 13:29:23,275  - 0 epochs without improvement
2024-11-06 13:29:23,277 ----------------------------------------------------------------------------------------------------





2024-11-06 13:29:23,472 epoch 3 - iter 1/17 - loss 0.77415106 - time (sec): 0.19 - samples/sec: 5979.32 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:23,909 epoch 3 - iter 2/17 - loss 1.34678071 - time (sec): 0.63 - samples/sec: 4162.97 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:24,482 epoch 3 - iter 3/17 - loss 1.19104909 - time (sec): 1.20 - samples/sec: 4306.86 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:25,244 epoch 3 - iter 4/17 - loss 1.15533632 - time (sec): 1.96 - samples/sec: 3123.74 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:25,711 epoch 3 - iter 5/17 - loss 1.21935152 - time (sec): 2.43 - samples/sec: 2881.70 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:26,093 epoch 3 - iter 6/17 - loss 1.22705776 - time (sec): 2.81 - samples/sec: 2710.22 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:26,569 epoch 3 - iter 7/17 - loss 1.11000856 - time (sec): 3.29 - samples/sec: 3057.77 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:27,

100%|██████████| 1/1 [00:00<00:00,  1.83it/s]

2024-11-06 13:29:32,754 DEV : loss 1.3302602767944336 - f1-score (micro avg)  0.0
2024-11-06 13:29:32,759  - 1 epochs without improvement
2024-11-06 13:29:32,761 ----------------------------------------------------------------------------------------------------





2024-11-06 13:29:33,044 epoch 4 - iter 1/17 - loss 1.33976475 - time (sec): 0.28 - samples/sec: 5050.33 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:33,603 epoch 4 - iter 2/17 - loss 1.06239767 - time (sec): 0.84 - samples/sec: 2781.85 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:34,015 epoch 4 - iter 3/17 - loss 1.13536655 - time (sec): 1.25 - samples/sec: 2897.71 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:34,412 epoch 4 - iter 4/17 - loss 1.09889204 - time (sec): 1.65 - samples/sec: 2482.45 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:34,828 epoch 4 - iter 5/17 - loss 0.95502331 - time (sec): 2.07 - samples/sec: 2957.18 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:35,423 epoch 4 - iter 6/17 - loss 1.04842362 - time (sec): 2.66 - samples/sec: 2676.34 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:35,948 epoch 4 - iter 7/17 - loss 1.00116433 - time (sec): 3.19 - samples/sec: 2520.41 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:36,

100%|██████████| 1/1 [00:00<00:00,  1.19it/s]

2024-11-06 13:29:42,502 DEV : loss 1.3769663572311401 - f1-score (micro avg)  0.0
2024-11-06 13:29:42,508  - 2 epochs without improvement
2024-11-06 13:29:42,509 ----------------------------------------------------------------------------------------------------





2024-11-06 13:29:42,807 epoch 5 - iter 1/17 - loss 0.74390360 - time (sec): 0.30 - samples/sec: 7318.34 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:43,482 epoch 5 - iter 2/17 - loss 0.84323460 - time (sec): 0.97 - samples/sec: 3337.84 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:43,911 epoch 5 - iter 3/17 - loss 0.84418253 - time (sec): 1.40 - samples/sec: 2763.42 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:44,238 epoch 5 - iter 4/17 - loss 0.84073802 - time (sec): 1.73 - samples/sec: 2833.19 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:44,882 epoch 5 - iter 5/17 - loss 0.84299065 - time (sec): 2.37 - samples/sec: 2863.64 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:45,784 epoch 5 - iter 6/17 - loss 0.85364915 - time (sec): 3.27 - samples/sec: 2471.30 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:46,383 epoch 5 - iter 7/17 - loss 0.92169080 - time (sec): 3.87 - samples/sec: 2343.59 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:46,

100%|██████████| 1/1 [00:00<00:00,  1.03it/s]

2024-11-06 13:29:52,575 DEV : loss 1.0534536838531494 - f1-score (micro avg)  0.0
2024-11-06 13:29:52,581  - 3 epochs without improvement
2024-11-06 13:29:52,582 ----------------------------------------------------------------------------------------------------





2024-11-06 13:29:52,871 epoch 6 - iter 1/17 - loss 0.66732180 - time (sec): 0.29 - samples/sec: 6323.90 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:53,481 epoch 6 - iter 2/17 - loss 0.84853562 - time (sec): 0.90 - samples/sec: 3021.14 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:54,006 epoch 6 - iter 3/17 - loss 0.87282276 - time (sec): 1.42 - samples/sec: 2545.21 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:54,500 epoch 6 - iter 4/17 - loss 0.89716471 - time (sec): 1.91 - samples/sec: 2435.17 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:54,987 epoch 6 - iter 5/17 - loss 0.82567154 - time (sec): 2.40 - samples/sec: 2673.75 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:55,605 epoch 6 - iter 6/17 - loss 0.80565701 - time (sec): 3.02 - samples/sec: 2316.80 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:55,983 epoch 6 - iter 7/17 - loss 0.78354647 - time (sec): 3.40 - samples/sec: 2574.09 - lr: 0.100000 - momentum: 0.000000
2024-11-06 13:29:56,

100%|██████████| 1/1 [00:00<00:00,  1.57it/s]

2024-11-06 13:30:03,158 DEV : loss 0.9470928311347961 - f1-score (micro avg)  0.0
2024-11-06 13:30:03,164  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.05]
2024-11-06 13:30:03,165 ----------------------------------------------------------------------------------------------------





2024-11-06 13:30:03,364 epoch 7 - iter 1/17 - loss 1.63868205 - time (sec): 0.20 - samples/sec: 4772.40 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:03,906 epoch 7 - iter 2/17 - loss 1.24627979 - time (sec): 0.74 - samples/sec: 3493.74 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:04,388 epoch 7 - iter 3/17 - loss 1.23988780 - time (sec): 1.22 - samples/sec: 2688.20 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:04,679 epoch 7 - iter 4/17 - loss 1.17134407 - time (sec): 1.51 - samples/sec: 2964.95 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:05,234 epoch 7 - iter 5/17 - loss 1.04835570 - time (sec): 2.07 - samples/sec: 2994.14 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:05,981 epoch 7 - iter 6/17 - loss 1.00879858 - time (sec): 2.81 - samples/sec: 2561.68 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:06,676 epoch 7 - iter 7/17 - loss 0.90902266 - time (sec): 3.51 - samples/sec: 2616.91 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:07,

100%|██████████| 1/1 [00:00<00:00,  1.17it/s]

2024-11-06 13:30:13,474 DEV : loss 0.975744366645813 - f1-score (micro avg)  0.0
2024-11-06 13:30:13,480  - 1 epochs without improvement
2024-11-06 13:30:13,482 ----------------------------------------------------------------------------------------------------
2024-11-06 13:30:13,637 epoch 8 - iter 1/17 - loss 0.62833754 - time (sec): 0.15 - samples/sec: 5589.90 - lr: 0.050000 - momentum: 0.000000





2024-11-06 13:30:14,132 epoch 8 - iter 2/17 - loss 0.66924452 - time (sec): 0.65 - samples/sec: 3950.27 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:14,907 epoch 8 - iter 3/17 - loss 0.84981812 - time (sec): 1.42 - samples/sec: 2755.86 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:15,543 epoch 8 - iter 4/17 - loss 0.83638730 - time (sec): 2.06 - samples/sec: 2504.88 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:16,166 epoch 8 - iter 5/17 - loss 0.96360884 - time (sec): 2.68 - samples/sec: 2288.75 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:16,707 epoch 8 - iter 6/17 - loss 0.95915947 - time (sec): 3.22 - samples/sec: 2204.97 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:17,212 epoch 8 - iter 7/17 - loss 0.89524921 - time (sec): 3.73 - samples/sec: 2189.96 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:17,878 epoch 8 - iter 8/17 - loss 0.83881946 - time (sec): 4.39 - samples/sec: 2365.49 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:18,

100%|██████████| 1/1 [00:00<00:00,  1.35it/s]

2024-11-06 13:30:24,114 DEV : loss 1.0001401901245117 - f1-score (micro avg)  0.0
2024-11-06 13:30:24,119  - 2 epochs without improvement
2024-11-06 13:30:24,120 ----------------------------------------------------------------------------------------------------





2024-11-06 13:30:24,384 epoch 9 - iter 1/17 - loss 0.96867381 - time (sec): 0.26 - samples/sec: 4566.71 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:24,952 epoch 9 - iter 2/17 - loss 0.72646691 - time (sec): 0.83 - samples/sec: 3028.34 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:25,397 epoch 9 - iter 3/17 - loss 0.82021002 - time (sec): 1.27 - samples/sec: 2339.60 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:25,960 epoch 9 - iter 4/17 - loss 0.70176835 - time (sec): 1.84 - samples/sec: 2542.39 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:26,794 epoch 9 - iter 5/17 - loss 0.72216249 - time (sec): 2.67 - samples/sec: 2200.73 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:27,303 epoch 9 - iter 6/17 - loss 0.71606039 - time (sec): 3.18 - samples/sec: 2160.73 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:27,898 epoch 9 - iter 7/17 - loss 0.73626511 - time (sec): 3.78 - samples/sec: 2160.07 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:28,

100%|██████████| 1/1 [00:00<00:00,  1.27it/s]

2024-11-06 13:30:34,691 DEV : loss 0.9782727360725403 - f1-score (micro avg)  0.0
2024-11-06 13:30:34,697  - 3 epochs without improvement
2024-11-06 13:30:34,698 ----------------------------------------------------------------------------------------------------





2024-11-06 13:30:35,022 epoch 10 - iter 1/17 - loss 0.75888710 - time (sec): 0.32 - samples/sec: 5789.61 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:35,722 epoch 10 - iter 2/17 - loss 0.80223661 - time (sec): 1.02 - samples/sec: 2635.55 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:36,222 epoch 10 - iter 3/17 - loss 0.73950446 - time (sec): 1.52 - samples/sec: 2737.80 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:36,883 epoch 10 - iter 4/17 - loss 0.70305610 - time (sec): 2.18 - samples/sec: 2711.95 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:37,687 epoch 10 - iter 5/17 - loss 0.68062935 - time (sec): 2.99 - samples/sec: 2788.78 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:38,872 epoch 10 - iter 6/17 - loss 0.66304573 - time (sec): 4.17 - samples/sec: 2572.05 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13:30:39,779 epoch 10 - iter 7/17 - loss 0.67829113 - time (sec): 5.08 - samples/sec: 2320.44 - lr: 0.050000 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.41it/s]

2024-11-06 13:30:45,141 DEV : loss 1.0052577257156372 - f1-score (micro avg)  0.0
2024-11-06 13:30:45,146  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.025]
2024-11-06 13:30:45,147 ----------------------------------------------------------------------------------------------------





2024-11-06 13:30:45,412 epoch 11 - iter 1/17 - loss 0.42248320 - time (sec): 0.26 - samples/sec: 5088.50 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:46,045 epoch 11 - iter 2/17 - loss 0.52286142 - time (sec): 0.89 - samples/sec: 3358.04 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:46,627 epoch 11 - iter 3/17 - loss 0.60133409 - time (sec): 1.48 - samples/sec: 2813.21 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:47,187 epoch 11 - iter 4/17 - loss 0.68284444 - time (sec): 2.04 - samples/sec: 2794.40 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:47,825 epoch 11 - iter 5/17 - loss 0.67545826 - time (sec): 2.68 - samples/sec: 2437.70 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:48,502 epoch 11 - iter 6/17 - loss 0.60518797 - time (sec): 3.35 - samples/sec: 2503.00 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:49,134 epoch 11 - iter 7/17 - loss 0.63798738 - time (sec): 3.98 - samples/sec: 2285.37 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.62it/s]

2024-11-06 13:30:55,776 DEV : loss 0.9812911748886108 - f1-score (micro avg)  0.0
2024-11-06 13:30:55,781  - 1 epochs without improvement
2024-11-06 13:30:55,783 ----------------------------------------------------------------------------------------------------





2024-11-06 13:30:56,140 epoch 12 - iter 1/17 - loss 0.69301105 - time (sec): 0.35 - samples/sec: 6237.07 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:56,898 epoch 12 - iter 2/17 - loss 0.65472355 - time (sec): 1.11 - samples/sec: 3186.37 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:57,351 epoch 12 - iter 3/17 - loss 0.69309347 - time (sec): 1.57 - samples/sec: 2616.63 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:57,823 epoch 12 - iter 4/17 - loss 0.72000793 - time (sec): 2.04 - samples/sec: 2656.15 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:58,584 epoch 12 - iter 5/17 - loss 0.69779901 - time (sec): 2.80 - samples/sec: 2531.78 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:59,390 epoch 12 - iter 6/17 - loss 0.66241189 - time (sec): 3.60 - samples/sec: 2361.41 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:30:59,952 epoch 12 - iter 7/17 - loss 0.67982713 - time (sec): 4.17 - samples/sec: 2297.56 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.10it/s]

2024-11-06 13:31:06,254 DEV : loss 1.0119669437408447 - f1-score (micro avg)  0.0
2024-11-06 13:31:06,260  - 2 epochs without improvement
2024-11-06 13:31:06,261 ----------------------------------------------------------------------------------------------------





2024-11-06 13:31:06,532 epoch 13 - iter 1/17 - loss 0.81097806 - time (sec): 0.27 - samples/sec: 5548.87 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:06,978 epoch 13 - iter 2/17 - loss 0.71859269 - time (sec): 0.71 - samples/sec: 2624.73 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:07,388 epoch 13 - iter 3/17 - loss 0.63303943 - time (sec): 1.12 - samples/sec: 3250.59 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:07,989 epoch 13 - iter 4/17 - loss 0.71890318 - time (sec): 1.72 - samples/sec: 2457.07 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:08,284 epoch 13 - iter 5/17 - loss 0.77983938 - time (sec): 2.02 - samples/sec: 2496.96 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:08,855 epoch 13 - iter 6/17 - loss 0.72487627 - time (sec): 2.59 - samples/sec: 2845.28 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:09,722 epoch 13 - iter 7/17 - loss 0.74184589 - time (sec): 3.46 - samples/sec: 2590.28 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.35it/s]

2024-11-06 13:31:16,024 DEV : loss 0.963586151599884 - f1-score (micro avg)  0.0
2024-11-06 13:31:16,030  - 3 epochs without improvement
2024-11-06 13:31:16,031 ----------------------------------------------------------------------------------------------------





2024-11-06 13:31:16,295 epoch 14 - iter 1/17 - loss 0.74791621 - time (sec): 0.26 - samples/sec: 5824.79 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:16,914 epoch 14 - iter 2/17 - loss 0.86315232 - time (sec): 0.88 - samples/sec: 3375.82 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:17,564 epoch 14 - iter 3/17 - loss 0.80231484 - time (sec): 1.53 - samples/sec: 3282.36 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:18,157 epoch 14 - iter 4/17 - loss 0.78061434 - time (sec): 2.12 - samples/sec: 2809.64 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:18,619 epoch 14 - iter 5/17 - loss 0.76577719 - time (sec): 2.58 - samples/sec: 2726.83 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:19,350 epoch 14 - iter 6/17 - loss 0.73002145 - time (sec): 3.32 - samples/sec: 2718.89 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13:31:20,265 epoch 14 - iter 7/17 - loss 0.71525271 - time (sec): 4.23 - samples/sec: 2493.21 - lr: 0.025000 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.69it/s]

2024-11-06 13:31:26,110 DEV : loss 1.0365099906921387 - f1-score (micro avg)  0.0
2024-11-06 13:31:26,114  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0125]
2024-11-06 13:31:26,116 ----------------------------------------------------------------------------------------------------





2024-11-06 13:31:26,369 epoch 15 - iter 1/17 - loss 0.71299071 - time (sec): 0.25 - samples/sec: 4358.99 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:26,985 epoch 15 - iter 2/17 - loss 0.72748292 - time (sec): 0.87 - samples/sec: 2978.32 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:27,821 epoch 15 - iter 3/17 - loss 0.60285611 - time (sec): 1.70 - samples/sec: 2744.26 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:28,761 epoch 15 - iter 4/17 - loss 0.59433155 - time (sec): 2.64 - samples/sec: 2263.57 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:29,277 epoch 15 - iter 5/17 - loss 0.60782531 - time (sec): 3.16 - samples/sec: 2252.26 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:29,694 epoch 15 - iter 6/17 - loss 0.67084320 - time (sec): 3.58 - samples/sec: 2376.89 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:30,188 epoch 15 - iter 7/17 - loss 0.69590986 - time (sec): 4.07 - samples/sec: 2363.73 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.79it/s]

2024-11-06 13:31:36,192 DEV : loss 0.9908255338668823 - f1-score (micro avg)  0.0
2024-11-06 13:31:36,197  - 1 epochs without improvement
2024-11-06 13:31:36,198 ----------------------------------------------------------------------------------------------------





2024-11-06 13:31:36,399 epoch 16 - iter 1/17 - loss 1.00005551 - time (sec): 0.20 - samples/sec: 4239.21 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:36,947 epoch 16 - iter 2/17 - loss 0.79657741 - time (sec): 0.75 - samples/sec: 2922.03 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:37,527 epoch 16 - iter 3/17 - loss 0.74374537 - time (sec): 1.33 - samples/sec: 2663.43 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:38,027 epoch 16 - iter 4/17 - loss 0.73688856 - time (sec): 1.83 - samples/sec: 2620.50 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:38,706 epoch 16 - iter 5/17 - loss 0.68380490 - time (sec): 2.51 - samples/sec: 2903.95 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:39,489 epoch 16 - iter 6/17 - loss 0.70742801 - time (sec): 3.29 - samples/sec: 2518.09 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:40,012 epoch 16 - iter 7/17 - loss 0.66895114 - time (sec): 3.81 - samples/sec: 2625.26 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.39it/s]

2024-11-06 13:31:46,716 DEV : loss 0.9979218244552612 - f1-score (micro avg)  0.0
2024-11-06 13:31:46,721  - 2 epochs without improvement
2024-11-06 13:31:46,722 ----------------------------------------------------------------------------------------------------





2024-11-06 13:31:46,985 epoch 17 - iter 1/17 - loss 0.55179283 - time (sec): 0.26 - samples/sec: 5562.91 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:47,603 epoch 17 - iter 2/17 - loss 0.47486344 - time (sec): 0.88 - samples/sec: 3012.11 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:48,283 epoch 17 - iter 3/17 - loss 0.59634867 - time (sec): 1.56 - samples/sec: 2890.53 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:48,884 epoch 17 - iter 4/17 - loss 0.62367415 - time (sec): 2.16 - samples/sec: 2286.48 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:49,153 epoch 17 - iter 5/17 - loss 0.70238783 - time (sec): 2.43 - samples/sec: 2427.31 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:49,612 epoch 17 - iter 6/17 - loss 0.69818456 - time (sec): 2.89 - samples/sec: 2372.50 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:50,054 epoch 17 - iter 7/17 - loss 0.73239633 - time (sec): 3.33 - samples/sec: 2173.77 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.07it/s]

2024-11-06 13:31:57,071 DEV : loss 0.9757465124130249 - f1-score (micro avg)  0.0
2024-11-06 13:31:57,076  - 3 epochs without improvement
2024-11-06 13:31:57,077 ----------------------------------------------------------------------------------------------------





2024-11-06 13:31:57,348 epoch 18 - iter 1/17 - loss 0.64832506 - time (sec): 0.27 - samples/sec: 6016.22 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:57,976 epoch 18 - iter 2/17 - loss 0.64938447 - time (sec): 0.90 - samples/sec: 3170.76 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:58,466 epoch 18 - iter 3/17 - loss 0.74716851 - time (sec): 1.39 - samples/sec: 2588.47 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:59,233 epoch 18 - iter 4/17 - loss 0.69682540 - time (sec): 2.15 - samples/sec: 2117.65 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:31:59,837 epoch 18 - iter 5/17 - loss 0.64431590 - time (sec): 2.76 - samples/sec: 2326.75 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:32:00,551 epoch 18 - iter 6/17 - loss 0.67214939 - time (sec): 3.47 - samples/sec: 2257.73 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13:32:01,153 epoch 18 - iter 7/17 - loss 0.66699528 - time (sec): 4.07 - samples/sec: 2375.34 - lr: 0.012500 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.38it/s]

2024-11-06 13:32:08,090 DEV : loss 0.9815186262130737 - f1-score (micro avg)  0.0
2024-11-06 13:32:08,095  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00625]
2024-11-06 13:32:08,097 ----------------------------------------------------------------------------------------------------





2024-11-06 13:32:08,296 epoch 19 - iter 1/17 - loss 0.59046281 - time (sec): 0.20 - samples/sec: 4328.09 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:08,671 epoch 19 - iter 2/17 - loss 0.74094000 - time (sec): 0.57 - samples/sec: 2079.36 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:09,070 epoch 19 - iter 3/17 - loss 0.65337753 - time (sec): 0.97 - samples/sec: 2943.01 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:09,706 epoch 19 - iter 4/17 - loss 0.68240891 - time (sec): 1.61 - samples/sec: 2573.27 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:10,209 epoch 19 - iter 5/17 - loss 0.67438089 - time (sec): 2.11 - samples/sec: 2547.42 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:10,810 epoch 19 - iter 6/17 - loss 0.67724466 - time (sec): 2.71 - samples/sec: 2623.66 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:11,808 epoch 19 - iter 7/17 - loss 0.63951871 - time (sec): 3.71 - samples/sec: 2702.50 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.61it/s]

2024-11-06 13:32:18,296 DEV : loss 0.9744807481765747 - f1-score (micro avg)  0.0
2024-11-06 13:32:18,302  - 1 epochs without improvement
2024-11-06 13:32:18,303 ----------------------------------------------------------------------------------------------------





2024-11-06 13:32:18,600 epoch 20 - iter 1/17 - loss 0.77022737 - time (sec): 0.29 - samples/sec: 5951.71 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:19,337 epoch 20 - iter 2/17 - loss 0.62696092 - time (sec): 1.03 - samples/sec: 3009.60 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:19,914 epoch 20 - iter 3/17 - loss 0.67488961 - time (sec): 1.61 - samples/sec: 2358.82 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:20,313 epoch 20 - iter 4/17 - loss 0.72344424 - time (sec): 2.01 - samples/sec: 2466.46 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:20,765 epoch 20 - iter 5/17 - loss 0.71349882 - time (sec): 2.46 - samples/sec: 2287.24 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:21,163 epoch 20 - iter 6/17 - loss 0.69316171 - time (sec): 2.86 - samples/sec: 2210.30 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:21,771 epoch 20 - iter 7/17 - loss 0.66165761 - time (sec): 3.46 - samples/sec: 2257.37 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.39it/s]

2024-11-06 13:32:28,644 DEV : loss 0.9844509959220886 - f1-score (micro avg)  0.0
2024-11-06 13:32:28,649  - 2 epochs without improvement
2024-11-06 13:32:28,650 ----------------------------------------------------------------------------------------------------





2024-11-06 13:32:28,948 epoch 21 - iter 1/17 - loss 0.79379886 - time (sec): 0.29 - samples/sec: 6042.49 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:29,561 epoch 21 - iter 2/17 - loss 0.77245962 - time (sec): 0.91 - samples/sec: 2962.47 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:30,054 epoch 21 - iter 3/17 - loss 0.73486644 - time (sec): 1.40 - samples/sec: 2899.06 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:30,625 epoch 21 - iter 4/17 - loss 0.74573696 - time (sec): 1.97 - samples/sec: 2736.34 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:31,110 epoch 21 - iter 5/17 - loss 0.80710891 - time (sec): 2.46 - samples/sec: 2469.16 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:31,904 epoch 21 - iter 6/17 - loss 0.74307126 - time (sec): 3.25 - samples/sec: 2533.87 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:32,857 epoch 21 - iter 7/17 - loss 0.72415300 - time (sec): 4.20 - samples/sec: 2232.52 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.59it/s]

2024-11-06 13:32:39,062 DEV : loss 0.9707861542701721 - f1-score (micro avg)  0.0
2024-11-06 13:32:39,068  - 3 epochs without improvement
2024-11-06 13:32:39,070 ----------------------------------------------------------------------------------------------------





2024-11-06 13:32:39,369 epoch 22 - iter 1/17 - loss 0.63072056 - time (sec): 0.30 - samples/sec: 5729.56 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:40,058 epoch 22 - iter 2/17 - loss 0.81922162 - time (sec): 0.99 - samples/sec: 2701.41 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:40,711 epoch 22 - iter 3/17 - loss 0.78016105 - time (sec): 1.64 - samples/sec: 2560.99 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:41,340 epoch 22 - iter 4/17 - loss 0.79360571 - time (sec): 2.27 - samples/sec: 2377.69 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:41,836 epoch 22 - iter 5/17 - loss 0.75072758 - time (sec): 2.76 - samples/sec: 2435.46 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:42,432 epoch 22 - iter 6/17 - loss 0.71563964 - time (sec): 3.36 - samples/sec: 2509.47 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13:32:42,992 epoch 22 - iter 7/17 - loss 0.73496275 - time (sec): 3.92 - samples/sec: 2342.64 - lr: 0.006250 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.80it/s]

2024-11-06 13:32:49,308 DEV : loss 0.9694034457206726 - f1-score (micro avg)  0.0
2024-11-06 13:32:49,313  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.003125]
2024-11-06 13:32:49,315 ----------------------------------------------------------------------------------------------------





2024-11-06 13:32:49,581 epoch 23 - iter 1/17 - loss 0.45010662 - time (sec): 0.26 - samples/sec: 4995.08 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:32:50,020 epoch 23 - iter 2/17 - loss 0.58613169 - time (sec): 0.70 - samples/sec: 2411.75 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:32:50,316 epoch 23 - iter 3/17 - loss 0.63064679 - time (sec): 1.00 - samples/sec: 2600.14 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:32:50,851 epoch 23 - iter 4/17 - loss 0.60159375 - time (sec): 1.53 - samples/sec: 2417.09 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:32:51,573 epoch 23 - iter 5/17 - loss 0.62851776 - time (sec): 2.25 - samples/sec: 2693.70 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:32:52,634 epoch 23 - iter 6/17 - loss 0.58247893 - time (sec): 3.32 - samples/sec: 2737.22 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:32:53,605 epoch 23 - iter 7/17 - loss 0.58905324 - time (sec): 4.29 - samples/sec: 2368.16 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.26it/s]

2024-11-06 13:32:59,741 DEV : loss 0.9711229205131531 - f1-score (micro avg)  0.0
2024-11-06 13:32:59,746  - 1 epochs without improvement
2024-11-06 13:32:59,747 ----------------------------------------------------------------------------------------------------





2024-11-06 13:33:00,008 epoch 24 - iter 1/17 - loss 0.38358811 - time (sec): 0.26 - samples/sec: 6239.63 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:00,746 epoch 24 - iter 2/17 - loss 0.50779875 - time (sec): 1.00 - samples/sec: 3788.61 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:01,449 epoch 24 - iter 3/17 - loss 0.61359124 - time (sec): 1.70 - samples/sec: 2639.25 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:01,792 epoch 24 - iter 4/17 - loss 0.67407538 - time (sec): 2.04 - samples/sec: 2492.91 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:02,255 epoch 24 - iter 5/17 - loss 0.63109877 - time (sec): 2.50 - samples/sec: 2434.18 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:02,773 epoch 24 - iter 6/17 - loss 0.65278358 - time (sec): 3.02 - samples/sec: 2226.57 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:03,120 epoch 24 - iter 7/17 - loss 0.66558314 - time (sec): 3.37 - samples/sec: 2172.00 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.69it/s]

2024-11-06 13:33:10,107 DEV : loss 0.9702813625335693 - f1-score (micro avg)  0.0
2024-11-06 13:33:10,112  - 2 epochs without improvement
2024-11-06 13:33:10,113 ----------------------------------------------------------------------------------------------------





2024-11-06 13:33:10,316 epoch 25 - iter 1/17 - loss 0.85208865 - time (sec): 0.20 - samples/sec: 4683.00 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:10,893 epoch 25 - iter 2/17 - loss 0.79084210 - time (sec): 0.78 - samples/sec: 3074.05 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:11,529 epoch 25 - iter 3/17 - loss 0.87441253 - time (sec): 1.41 - samples/sec: 2372.41 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:11,945 epoch 25 - iter 4/17 - loss 0.89475252 - time (sec): 1.83 - samples/sec: 2109.42 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:12,386 epoch 25 - iter 5/17 - loss 0.78513159 - time (sec): 2.27 - samples/sec: 2497.87 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:13,226 epoch 25 - iter 6/17 - loss 0.70599780 - time (sec): 3.11 - samples/sec: 2435.12 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:13,994 epoch 25 - iter 7/17 - loss 0.72391381 - time (sec): 3.88 - samples/sec: 2138.83 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.17it/s]

2024-11-06 13:33:20,514 DEV : loss 0.9703528881072998 - f1-score (micro avg)  0.0
2024-11-06 13:33:20,520  - 3 epochs without improvement
2024-11-06 13:33:20,522 ----------------------------------------------------------------------------------------------------





2024-11-06 13:33:20,800 epoch 26 - iter 1/17 - loss 0.57050566 - time (sec): 0.27 - samples/sec: 5141.81 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:21,318 epoch 26 - iter 2/17 - loss 0.58269591 - time (sec): 0.79 - samples/sec: 2542.01 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:21,798 epoch 26 - iter 3/17 - loss 0.69778183 - time (sec): 1.27 - samples/sec: 2879.18 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:22,435 epoch 26 - iter 4/17 - loss 0.65774528 - time (sec): 1.91 - samples/sec: 2441.09 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:22,950 epoch 26 - iter 5/17 - loss 0.66008118 - time (sec): 2.42 - samples/sec: 2172.62 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:23,356 epoch 26 - iter 6/17 - loss 0.70011898 - time (sec): 2.83 - samples/sec: 2188.49 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13:33:23,914 epoch 26 - iter 7/17 - loss 0.69084802 - time (sec): 3.39 - samples/sec: 2167.13 - lr: 0.003125 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.28it/s]

2024-11-06 13:33:30,917 DEV : loss 0.9680372476577759 - f1-score (micro avg)  0.0
2024-11-06 13:33:30,922  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0015625]
2024-11-06 13:33:30,924 ----------------------------------------------------------------------------------------------------





2024-11-06 13:33:31,219 epoch 27 - iter 1/17 - loss 0.77046438 - time (sec): 0.29 - samples/sec: 5497.80 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:31,936 epoch 27 - iter 2/17 - loss 0.69858523 - time (sec): 1.01 - samples/sec: 2738.68 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:32,803 epoch 27 - iter 3/17 - loss 0.60050260 - time (sec): 1.88 - samples/sec: 3025.54 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:33,734 epoch 27 - iter 4/17 - loss 0.63344888 - time (sec): 2.81 - samples/sec: 2277.59 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:34,240 epoch 27 - iter 5/17 - loss 0.63908859 - time (sec): 3.31 - samples/sec: 2386.36 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:35,038 epoch 27 - iter 6/17 - loss 0.65093975 - time (sec): 4.11 - samples/sec: 2363.48 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:35,671 epoch 27 - iter 7/17 - loss 0.68295660 - time (sec): 4.74 - samples/sec: 2155.65 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.39it/s]

2024-11-06 13:33:41,481 DEV : loss 0.9684703350067139 - f1-score (micro avg)  0.0
2024-11-06 13:33:41,486  - 1 epochs without improvement
2024-11-06 13:33:41,487 ----------------------------------------------------------------------------------------------------





2024-11-06 13:33:41,665 epoch 28 - iter 1/17 - loss 0.94672146 - time (sec): 0.17 - samples/sec: 6304.43 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:42,162 epoch 28 - iter 2/17 - loss 0.64479174 - time (sec): 0.67 - samples/sec: 3464.42 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:42,669 epoch 28 - iter 3/17 - loss 0.66231445 - time (sec): 1.18 - samples/sec: 2753.35 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:42,989 epoch 28 - iter 4/17 - loss 0.67679644 - time (sec): 1.50 - samples/sec: 2655.39 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:43,382 epoch 28 - iter 5/17 - loss 0.70928983 - time (sec): 1.89 - samples/sec: 2461.51 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:43,875 epoch 28 - iter 6/17 - loss 0.64732051 - time (sec): 2.38 - samples/sec: 2499.03 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:44,463 epoch 28 - iter 7/17 - loss 0.70814192 - time (sec): 2.97 - samples/sec: 2612.40 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2024-11-06 13:33:52,052 DEV : loss 0.9706310629844666 - f1-score (micro avg)  0.0
2024-11-06 13:33:52,058  - 2 epochs without improvement
2024-11-06 13:33:52,060 ----------------------------------------------------------------------------------------------------





2024-11-06 13:33:52,364 epoch 29 - iter 1/17 - loss 0.99682872 - time (sec): 0.30 - samples/sec: 4295.53 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:53,069 epoch 29 - iter 2/17 - loss 0.86525258 - time (sec): 1.01 - samples/sec: 2752.43 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:53,807 epoch 29 - iter 3/17 - loss 0.78343753 - time (sec): 1.74 - samples/sec: 2638.49 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:54,615 epoch 29 - iter 4/17 - loss 0.72956880 - time (sec): 2.55 - samples/sec: 2365.53 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:55,159 epoch 29 - iter 5/17 - loss 0.77401237 - time (sec): 3.10 - samples/sec: 2264.25 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:55,655 epoch 29 - iter 6/17 - loss 0.73461405 - time (sec): 3.59 - samples/sec: 2358.90 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:33:56,288 epoch 29 - iter 7/17 - loss 0.73060350 - time (sec): 4.23 - samples/sec: 2206.24 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.41it/s]

2024-11-06 13:34:02,822 DEV : loss 0.9713014960289001 - f1-score (micro avg)  0.0
2024-11-06 13:34:02,828  - 3 epochs without improvement
2024-11-06 13:34:02,829 ----------------------------------------------------------------------------------------------------





2024-11-06 13:34:03,140 epoch 30 - iter 1/17 - loss 0.93225134 - time (sec): 0.31 - samples/sec: 5495.20 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:34:03,858 epoch 30 - iter 2/17 - loss 0.83909703 - time (sec): 1.03 - samples/sec: 2603.13 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:34:04,253 epoch 30 - iter 3/17 - loss 0.78904396 - time (sec): 1.42 - samples/sec: 2397.99 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:34:04,775 epoch 30 - iter 4/17 - loss 0.68692919 - time (sec): 1.94 - samples/sec: 2682.60 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:34:05,658 epoch 30 - iter 5/17 - loss 0.66359202 - time (sec): 2.83 - samples/sec: 2263.19 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:34:06,176 epoch 30 - iter 6/17 - loss 0.68684300 - time (sec): 3.34 - samples/sec: 2310.39 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13:34:06,609 epoch 30 - iter 7/17 - loss 0.69729171 - time (sec): 3.78 - samples/sec: 2423.40 - lr: 0.001563 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.29it/s]

2024-11-06 13:34:13,229 DEV : loss 0.9694611430168152 - f1-score (micro avg)  0.0
2024-11-06 13:34:13,233  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00078125]
2024-11-06 13:34:13,235 ----------------------------------------------------------------------------------------------------
2024-11-06 13:34:13,352 epoch 31 - iter 1/17 - loss 0.88009997 - time (sec): 0.11 - samples/sec: 4723.03 - lr: 0.000781 - momentum: 0.000000





2024-11-06 13:34:14,038 epoch 31 - iter 2/17 - loss 0.62979592 - time (sec): 0.80 - samples/sec: 4094.51 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:15,661 epoch 31 - iter 3/17 - loss 0.61686628 - time (sec): 2.42 - samples/sec: 2083.39 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:16,346 epoch 31 - iter 4/17 - loss 0.62234611 - time (sec): 3.11 - samples/sec: 1994.17 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:17,030 epoch 31 - iter 5/17 - loss 0.63926084 - time (sec): 3.79 - samples/sec: 2012.36 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:17,700 epoch 31 - iter 6/17 - loss 0.66717006 - time (sec): 4.46 - samples/sec: 1945.04 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:18,048 epoch 31 - iter 7/17 - loss 0.72124924 - time (sec): 4.81 - samples/sec: 1940.44 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:18,394 epoch 31 - iter 8/17 - loss 0.70649746 - time (sec): 5.16 - samples/sec: 2031.61 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.30it/s]

2024-11-06 13:34:23,856 DEV : loss 0.9694814085960388 - f1-score (micro avg)  0.0
2024-11-06 13:34:23,861  - 1 epochs without improvement
2024-11-06 13:34:23,863 ----------------------------------------------------------------------------------------------------





2024-11-06 13:34:24,237 epoch 32 - iter 1/17 - loss 0.66322183 - time (sec): 0.37 - samples/sec: 6001.70 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:24,866 epoch 32 - iter 2/17 - loss 0.70540246 - time (sec): 1.00 - samples/sec: 2659.92 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:25,226 epoch 32 - iter 3/17 - loss 0.64804884 - time (sec): 1.36 - samples/sec: 2797.67 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:25,921 epoch 32 - iter 4/17 - loss 0.71346174 - time (sec): 2.05 - samples/sec: 2403.11 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:26,569 epoch 32 - iter 5/17 - loss 0.69981182 - time (sec): 2.70 - samples/sec: 2444.93 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:27,202 epoch 32 - iter 6/17 - loss 0.70687398 - time (sec): 3.34 - samples/sec: 2392.39 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:27,705 epoch 32 - iter 7/17 - loss 0.73592001 - time (sec): 3.84 - samples/sec: 2492.41 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.40it/s]

2024-11-06 13:34:34,063 DEV : loss 0.9686384797096252 - f1-score (micro avg)  0.0
2024-11-06 13:34:34,068  - 2 epochs without improvement
2024-11-06 13:34:34,069 ----------------------------------------------------------------------------------------------------





2024-11-06 13:34:34,266 epoch 33 - iter 1/17 - loss 1.12650099 - time (sec): 0.19 - samples/sec: 3188.02 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:34,811 epoch 33 - iter 2/17 - loss 0.79177200 - time (sec): 0.74 - samples/sec: 2378.69 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:35,488 epoch 33 - iter 3/17 - loss 0.69702107 - time (sec): 1.42 - samples/sec: 2480.98 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:36,069 epoch 33 - iter 4/17 - loss 0.72588768 - time (sec): 2.00 - samples/sec: 2110.69 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:36,579 epoch 33 - iter 5/17 - loss 0.68286164 - time (sec): 2.51 - samples/sec: 2205.91 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:37,535 epoch 33 - iter 6/17 - loss 0.62419867 - time (sec): 3.46 - samples/sec: 2283.45 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:38,376 epoch 33 - iter 7/17 - loss 0.65269263 - time (sec): 4.30 - samples/sec: 1942.16 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.27it/s]

2024-11-06 13:34:44,654 DEV : loss 0.9686375856399536 - f1-score (micro avg)  0.0
2024-11-06 13:34:44,659  - 3 epochs without improvement
2024-11-06 13:34:44,660 ----------------------------------------------------------------------------------------------------
2024-11-06 13:34:44,821 epoch 34 - iter 1/17 - loss 0.68246029 - time (sec): 0.16 - samples/sec: 5956.53 - lr: 0.000781 - momentum: 0.000000





2024-11-06 13:34:45,232 epoch 34 - iter 2/17 - loss 0.55271167 - time (sec): 0.57 - samples/sec: 3600.07 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:45,729 epoch 34 - iter 3/17 - loss 0.66912899 - time (sec): 1.07 - samples/sec: 2741.88 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:46,224 epoch 34 - iter 4/17 - loss 0.68561105 - time (sec): 1.56 - samples/sec: 2500.76 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:46,849 epoch 34 - iter 5/17 - loss 0.69864904 - time (sec): 2.19 - samples/sec: 2586.02 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:47,643 epoch 34 - iter 6/17 - loss 0.67001589 - time (sec): 2.98 - samples/sec: 2495.85 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:48,280 epoch 34 - iter 7/17 - loss 0.69336380 - time (sec): 3.62 - samples/sec: 2446.24 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13:34:48,845 epoch 34 - iter 8/17 - loss 0.67419190 - time (sec): 4.18 - samples/sec: 2283.48 - lr: 0.000781 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2024-11-06 13:34:55,044 DEV : loss 0.9685079455375671 - f1-score (micro avg)  0.0
2024-11-06 13:34:55,049  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.000390625]
2024-11-06 13:34:55,051 ----------------------------------------------------------------------------------------------------





2024-11-06 13:34:55,265 epoch 35 - iter 1/17 - loss 0.79747629 - time (sec): 0.21 - samples/sec: 4085.37 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:34:55,734 epoch 35 - iter 2/17 - loss 0.67977349 - time (sec): 0.68 - samples/sec: 3001.28 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:34:56,107 epoch 35 - iter 3/17 - loss 0.88269928 - time (sec): 1.05 - samples/sec: 2772.03 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:34:56,621 epoch 35 - iter 4/17 - loss 0.72479813 - time (sec): 1.57 - samples/sec: 3000.78 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:34:57,623 epoch 35 - iter 5/17 - loss 0.68633652 - time (sec): 2.57 - samples/sec: 2905.10 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:34:58,647 epoch 35 - iter 6/17 - loss 0.68770241 - time (sec): 3.59 - samples/sec: 2425.27 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:34:59,269 epoch 35 - iter 7/17 - loss 0.67315823 - time (sec): 4.22 - samples/sec: 2496.69 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.58it/s]

2024-11-06 13:35:05,575 DEV : loss 0.9686506390571594 - f1-score (micro avg)  0.0
2024-11-06 13:35:05,584  - 1 epochs without improvement
2024-11-06 13:35:05,585 ----------------------------------------------------------------------------------------------------
2024-11-06 13:35:05,722 epoch 36 - iter 1/17 - loss 1.09384010 - time (sec): 0.13 - samples/sec: 5911.07 - lr: 0.000391 - momentum: 0.000000





2024-11-06 13:35:06,432 epoch 36 - iter 2/17 - loss 0.68698856 - time (sec): 0.84 - samples/sec: 4263.92 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:07,529 epoch 36 - iter 3/17 - loss 0.73900596 - time (sec): 1.94 - samples/sec: 2448.77 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:08,129 epoch 36 - iter 4/17 - loss 0.69919104 - time (sec): 2.54 - samples/sec: 2529.31 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:08,892 epoch 36 - iter 5/17 - loss 0.64124445 - time (sec): 3.30 - samples/sec: 2356.70 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:09,521 epoch 36 - iter 6/17 - loss 0.61691914 - time (sec): 3.93 - samples/sec: 2223.63 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:10,042 epoch 36 - iter 7/17 - loss 0.61330496 - time (sec): 4.45 - samples/sec: 2182.88 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:10,549 epoch 36 - iter 8/17 - loss 0.63429709 - time (sec): 4.96 - samples/sec: 2274.90 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.35it/s]

2024-11-06 13:35:15,954 DEV : loss 0.9686141014099121 - f1-score (micro avg)  0.0
2024-11-06 13:35:15,960  - 2 epochs without improvement
2024-11-06 13:35:15,961 ----------------------------------------------------------------------------------------------------





2024-11-06 13:35:16,168 epoch 37 - iter 1/17 - loss 0.93595261 - time (sec): 0.20 - samples/sec: 4702.72 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:16,720 epoch 37 - iter 2/17 - loss 0.76489925 - time (sec): 0.76 - samples/sec: 3885.45 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:17,317 epoch 37 - iter 3/17 - loss 0.78236743 - time (sec): 1.35 - samples/sec: 2771.25 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:17,923 epoch 37 - iter 4/17 - loss 0.63220584 - time (sec): 1.96 - samples/sec: 2967.36 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:18,729 epoch 37 - iter 5/17 - loss 0.62984039 - time (sec): 2.76 - samples/sec: 2685.53 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:19,467 epoch 37 - iter 6/17 - loss 0.63756014 - time (sec): 3.50 - samples/sec: 2613.67 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:20,125 epoch 37 - iter 7/17 - loss 0.64132594 - time (sec): 4.16 - samples/sec: 2361.59 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  2.00it/s]

2024-11-06 13:35:26,148 DEV : loss 0.9687976241111755 - f1-score (micro avg)  0.0
2024-11-06 13:35:26,153  - 3 epochs without improvement
2024-11-06 13:35:26,154 ----------------------------------------------------------------------------------------------------





2024-11-06 13:35:26,464 epoch 38 - iter 1/17 - loss 0.80793724 - time (sec): 0.31 - samples/sec: 4586.11 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:27,213 epoch 38 - iter 2/17 - loss 0.62873320 - time (sec): 1.06 - samples/sec: 3322.57 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:28,189 epoch 38 - iter 3/17 - loss 0.63120443 - time (sec): 2.03 - samples/sec: 2965.09 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:29,216 epoch 38 - iter 4/17 - loss 0.64015745 - time (sec): 3.06 - samples/sec: 2641.21 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:29,836 epoch 38 - iter 5/17 - loss 0.64643063 - time (sec): 3.68 - samples/sec: 2371.90 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:30,248 epoch 38 - iter 6/17 - loss 0.63823825 - time (sec): 4.09 - samples/sec: 2427.05 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13:35:30,769 epoch 38 - iter 7/17 - loss 0.65864996 - time (sec): 4.61 - samples/sec: 2343.73 - lr: 0.000391 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.73it/s]

2024-11-06 13:35:36,611 DEV : loss 0.9681512713432312 - f1-score (micro avg)  0.0
2024-11-06 13:35:36,616  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0001953125]
2024-11-06 13:35:36,617 ----------------------------------------------------------------------------------------------------





2024-11-06 13:35:36,969 epoch 39 - iter 1/17 - loss 0.64856491 - time (sec): 0.35 - samples/sec: 6476.84 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:37,685 epoch 39 - iter 2/17 - loss 0.68419711 - time (sec): 1.07 - samples/sec: 3245.22 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:38,091 epoch 39 - iter 3/17 - loss 0.77000681 - time (sec): 1.47 - samples/sec: 2970.79 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:38,577 epoch 39 - iter 4/17 - loss 0.71893199 - time (sec): 1.96 - samples/sec: 2653.41 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:39,193 epoch 39 - iter 5/17 - loss 0.70620673 - time (sec): 2.57 - samples/sec: 2534.49 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:39,798 epoch 39 - iter 6/17 - loss 0.69363237 - time (sec): 3.18 - samples/sec: 2272.15 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:40,242 epoch 39 - iter 7/17 - loss 0.66658338 - time (sec): 3.62 - samples/sec: 2469.88 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.65it/s]

2024-11-06 13:35:46,955 DEV : loss 0.9681549668312073 - f1-score (micro avg)  0.0
2024-11-06 13:35:46,960  - 1 epochs without improvement
2024-11-06 13:35:46,962 ----------------------------------------------------------------------------------------------------





2024-11-06 13:35:47,374 epoch 40 - iter 1/17 - loss 0.56664751 - time (sec): 0.41 - samples/sec: 4866.34 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:48,196 epoch 40 - iter 2/17 - loss 0.65647534 - time (sec): 1.23 - samples/sec: 3026.47 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:48,796 epoch 40 - iter 3/17 - loss 0.60257890 - time (sec): 1.83 - samples/sec: 2920.69 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:49,350 epoch 40 - iter 4/17 - loss 0.63052546 - time (sec): 2.38 - samples/sec: 2617.31 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:49,629 epoch 40 - iter 5/17 - loss 0.66931256 - time (sec): 2.66 - samples/sec: 2566.97 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:49,949 epoch 40 - iter 6/17 - loss 0.65279968 - time (sec): 2.98 - samples/sec: 2589.29 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:50,433 epoch 40 - iter 7/17 - loss 0.67269347 - time (sec): 3.47 - samples/sec: 2585.28 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.73it/s]

2024-11-06 13:35:57,182 DEV : loss 0.9680436253547668 - f1-score (micro avg)  0.0
2024-11-06 13:35:57,188  - 2 epochs without improvement
2024-11-06 13:35:57,189 ----------------------------------------------------------------------------------------------------





2024-11-06 13:35:57,370 epoch 41 - iter 1/17 - loss 0.63108881 - time (sec): 0.18 - samples/sec: 5728.34 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:57,802 epoch 41 - iter 2/17 - loss 0.59649867 - time (sec): 0.61 - samples/sec: 3982.55 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:58,639 epoch 41 - iter 3/17 - loss 0.50883946 - time (sec): 1.45 - samples/sec: 3326.70 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:35:59,716 epoch 41 - iter 4/17 - loss 0.49790151 - time (sec): 2.52 - samples/sec: 2652.21 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:36:00,412 epoch 41 - iter 5/17 - loss 0.54471634 - time (sec): 3.22 - samples/sec: 2381.38 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:36:00,975 epoch 41 - iter 6/17 - loss 0.56095778 - time (sec): 3.78 - samples/sec: 2421.25 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:36:01,462 epoch 41 - iter 7/17 - loss 0.61294810 - time (sec): 4.27 - samples/sec: 2338.94 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:00<00:00,  1.82it/s]

2024-11-06 13:36:07,379 DEV : loss 0.9680617451667786 - f1-score (micro avg)  0.0
2024-11-06 13:36:07,384  - 3 epochs without improvement
2024-11-06 13:36:07,386 ----------------------------------------------------------------------------------------------------





2024-11-06 13:36:07,755 epoch 42 - iter 1/17 - loss 0.71472049 - time (sec): 0.37 - samples/sec: 4500.31 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:36:08,600 epoch 42 - iter 2/17 - loss 0.61190486 - time (sec): 1.21 - samples/sec: 2960.14 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:36:09,235 epoch 42 - iter 3/17 - loss 0.67778681 - time (sec): 1.85 - samples/sec: 2689.51 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:36:09,700 epoch 42 - iter 4/17 - loss 0.69493492 - time (sec): 2.31 - samples/sec: 2576.91 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:36:10,010 epoch 42 - iter 5/17 - loss 0.74696111 - time (sec): 2.62 - samples/sec: 2491.51 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:36:10,389 epoch 42 - iter 6/17 - loss 0.70109941 - time (sec): 3.00 - samples/sec: 2568.75 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13:36:11,022 epoch 42 - iter 7/17 - loss 0.69622493 - time (sec): 3.63 - samples/sec: 2498.85 - lr: 0.000195 - momentum: 0.000000
2024-11-06 13

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]

2024-11-06 13:36:17,540 DEV : loss 0.9682360887527466 - f1-score (micro avg)  0.0
2024-11-06 13:36:17,546  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [9.765625e-05]
2024-11-06 13:36:17,546 ----------------------------------------------------------------------------------------------------
2024-11-06 13:36:17,547 learning rate too small - quitting training!
2024-11-06 13:36:17,548 ----------------------------------------------------------------------------------------------------
2024-11-06 13:36:17,549 Saving model ...





2024-11-06 13:36:18,948 Done.
2024-11-06 13:36:18,949 ----------------------------------------------------------------------------------------------------
2024-11-06 13:36:18,952 Testing using last state of model ...


100%|██████████| 2/2 [00:05<00:00,  2.84s/it]

2024-11-06 13:36:24,673 
Results:
- F-score (micro) 0.0
- F-score (macro) 0.0
- Accuracy 0.0

By class:
              precision    recall  f1-score   support

       SKILL     0.0000    0.0000    0.0000     718.0
         JOB     0.0000    0.0000    0.0000     154.0
        WORK     0.0000    0.0000    0.0000     130.0
     COMPANY     0.0000    0.0000    0.0000     106.0
         LOC     0.0000    0.0000    0.0000      62.0
         UNI     0.0000    0.0000    0.0000      38.0
        NAME     0.0000    0.0000    0.0000      35.0
         DEG     0.0000    0.0000    0.0000      34.0
       PHONE     0.0000    0.0000    0.0000      33.0
       STUDY     0.0000    0.0000    0.0000      28.0
       EMAIL     0.0000    0.0000    0.0000      24.0

   micro avg     0.0000    0.0000    0.0000    1362.0
   macro avg     0.0000    0.0000    0.0000    1362.0
weighted avg     0.0000    0.0000    0.0000    1362.0

2024-11-06 13:36:24,674 -----------------------------------------------------------




{'test_score': 0.0}

In [10]:
# evaluate model

from flair.data import Corpus

from flair.datasets import ColumnCorpus

from flair.models import SequenceTagger

from flair.trainers import ModelTrainer



# Load the trained model

model = SequenceTagger.load('/content/drive/MyDrive/FYP/Implementation/flair_output/final-model.pt')



# Evaluate the model on the test set

result = model.evaluate(corpus.test, gold_label_type='ner', mini_batch_size=32)



# Print the results

# print("Evaluation Loss:", eval_loss)

print(result.detailed_results)  # print the precision, recall, and F1-score per entity type

2024-11-06 10:57:38,582 SequenceTagger predicts: Dictionary with 45 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-DEG, B-DEG, E-DEG, I-DEG, S-UNI, B-UNI, E-UNI, I-UNI, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL


100%|██████████| 3/3 [00:03<00:00,  1.29s/it]


Results:
- F-score (micro) 0.563
- F-score (macro) 0.7187
- Accuracy 0.3962

By class:
              precision    recall  f1-score   support

       SKILL     0.3408    0.3862    0.3621       593
         JOB     0.5730    0.7020    0.6310       151
        WORK     0.8188    0.9457    0.8777       129
     COMPANY     0.5172    0.5714    0.5430       105
         LOC     0.5701    0.7176    0.6354        85
         DEG     0.5862    0.7907    0.6733        43
         UNI     0.5660    0.6667    0.6122        45
        NAME     0.9388    0.9787    0.9583        47
       PHONE     1.0000    0.9787    0.9892        47
       EMAIL     0.8222    0.9250    0.8706        40
       STUDY     0.8286    0.6905    0.7532        42

   micro avg     0.5281    0.6029    0.5630      1327
   macro avg     0.6874    0.7594    0.7187      1327
weighted avg     0.5324    0.6029    0.5642      1327






In [11]:
from flair.models import SequenceTagger

from flair.data import Sentence

import spacy

from spacy import displacy



# Load your trained Flair NER model

tagger = SequenceTagger.load('/content/drive/MyDrive/FYP/Implementation/flair_output/best-model.pt')



resume_text = '''

John Doe lives at 1234 Elm Street in Los Angeles, CA 90001. He can be reached at +1 (555) 123-4567 or via email at john.doe@example.com. John is a results-driven software engineer with over 5 years of experience in web development and cloud infrastructure, with strong knowledge of JavaScript, Python, and cloud technologies like AWS and Azure. Currently, he works as a Software Engineer at Google LLC in San Francisco, CA, where he has been employed since August 2019. In this role, he has developed scalable web applications using JavaScript, Node.js, and React, deployed and maintained cloud infrastructure on AWS, reducing downtime by 20%, and led a team of 4 engineers to enhance backend performance by 30%. Previously, he worked as a Junior Developer at Tech Innovators Inc. in Austin, TX, from July 2017 to July 2019, where he created RESTful APIs using Python and Flask, collaborated with front-end developers to build and deploy user-facing applications, and wrote unit and integration tests, improving code coverage by 15%.



John holds a Master of Science in Computer Science from the University of California, Berkeley, with a graduation date of May 2017, and a Bachelor of Science in Information Technology from the University of Texas at Austin, graduated in May 2015. His skillset includes proficiency in programming languages like Python, JavaScript, and Java; frameworks such as React, Flask, and Django; cloud platforms including AWS, Google Cloud, and Azure; as well as other tools like Git, Docker, Kubernetes, and SQL. He is certified as an AWS Certified Solutions Architect – Associate, earned in 2020, and as a Google Professional Cloud Architect, earned in 2021'

'''



# Step 1: Predict entities using Flair

sentence = Sentence(resume_text)

tagger.predict(sentence)



# Step 2: Convert Flair predictions to spaCy doc format

# Initialize a blank spaCy NLP pipeline

nlp = spacy.blank("en")

doc = nlp(resume_text)



# Extract entities from Flair prediction and convert to spaCy format

ents = []

for entity in sentence.get_spans('ner'):

    start, end = entity.start_position, entity.end_position

    label = entity.tag

    span = doc.char_span(start, end, label=label)

    if span is not None:

        ents.append(span)



# Set the entities in the spaCy doc

doc.ents = ents



# Step 3: Visualize using displacy

# Display in Jupyter or a web page

displacy.render(doc, style="ent", jupyter=True)


2024-11-06 10:57:55,781 SequenceTagger predicts: Dictionary with 47 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, <START>, <STOP>




In [12]:
from flair.models import SequenceTagger

from flair.data import Sentence



# Load the pretrained NER model

tagger = SequenceTagger.load("/content/drive/MyDrive/FYP/Implementation/flair_output/best-model.pt")

# Example text

text = "Apple is looking at buying U.K. startup for $1 billion."



# Create a Sentence object

sentence = Sentence(resume_text)



# Predict entities

tagger.predict(sentence)



# Print the detected entities

for entity in sentence.get_spans("ner"):

    print(f"Entity: {entity.text}, Type: {entity.get_label('ner').value}, Confidence: {entity.score}")


2024-11-06 10:58:17,528 SequenceTagger predicts: Dictionary with 47 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, <START>, <STOP>
Entity: Doe lives, Type: NAME, Confidence: 0.16702505946159363
Entity: 1234 Elm, Type: NAME, Confidence: 0.11599351465702057
Entity: Los, Type: DEG, Confidence: 0.18544061481952667
Entity: 90001, Type: DEG, Confidence: 0.14748063683509827
Entity: 555, Type: DEG, Confidence: 0.11017131805419922
Entity: 123-4567, Type: DEG, Confidence: 0.11569119244813919
Entity: via, Type: DEG, Confidence: 0.20629706978797913
Entity: at, Type: EMAIL, Confidence: 0.19365909695625305
Entity: john.doe, Type: JOB, Confidence: 0.10587572306394577
Entity: @, Ty