<a href="https://colab.research.google.com/github/chewzzz1014/fyp/blob/master/ner/src/train_ner_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Train NER Models

In [1]:
# mount drive

from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!mkdir spacy_ner_data

In [None]:
import json

import random

from sklearn.model_selection import train_test_split

import spacy

from spacy.tokens import DocBin



# Load JSON data

with open('/content/drive/MyDrive/FYP/Implementation/Resume Dataset/200_resumes_annotated.json', "r") as f:

    data = json.load(f)



def remove_overlapping_entities(entities):

    """Remove overlapping entities from the list."""

    entities = sorted(entities, key=lambda x: x[0])  # Sort by start position

    non_overlapping = []

    last_end = -1

    for start, end, label in entities:

        if start >= last_end:  # Only add if there's no overlap with the previous entity

            non_overlapping.append((start, end, label))

            last_end = end

    return non_overlapping



# Function to convert JSON data to Spacy's DocBin format

def convert_to_spacy_format(data):

    nlp = spacy.blank("en")  # Load a blank Spacy model

    doc_bin = DocBin()  # Container for our docs



    for item in data:

        text = item['data']['Text']  # Full document text

        entities = []



        for annotation in item['annotations'][0]['result']:

            start = annotation['value']['start']

            end = annotation['value']['end']

            label = annotation['value']['labels'][0]  # Entity label

            entities.append((start, end, label))



        entities = remove_overlapping_entities(entities)  # Remove overlapping entities

        # Create a Spacy doc and add entities to it

        doc = nlp.make_doc(text)

        spans = [doc.char_span(start, end, label=label) for start, end, label in entities]

        # Filter out None spans if Spacy can't align the character indices with tokens

        spans = [span for span in spans if span is not None]

        doc.ents = spans  # Assign entities to the doc

        doc_bin.add(doc)



    return doc_bin



# Split data into train and test sets

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)



# Convert train and test sets to Spacy format

train_doc_bin = convert_to_spacy_format(train_data)

test_doc_bin = convert_to_spacy_format(test_data)



# Save the train and test data to .spacy files

train_doc_bin.to_disk("spacy_ner_data/train_data.spacy")

test_doc_bin.to_disk("spacy_ner_data/test_data.spacy")

## Spacy NER

In [None]:
# create base_config.cfg and paste the config generated from spacy widget

# update train and test file path

!touch base_config.cfg

In [None]:
# generate config.cfg from base_config.cfg

!python -m spacy init fill-config base_config.cfg config.cfg

[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [None]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# train model using hyperparameters set in config.cfg

# save trained model in spacy-output/ dir

!python -m spacy train config.cfg --output ./spacy_output

!cp -r ./spacy_output /content/drive/MyDrive/FYP/Implementation/

In [None]:
# evaluate trained model performance

# store output and visualization into result/ dir

!python -m spacy evaluate spacy_output/model-best spacy_ner_data/test_data.spacy -dp spacy_output

[38;5;4mℹ Using CPU[0m
[1m

TOK     100.00
NER P   51.12 
NER R   41.26 
NER F   45.66 
SPEED   2395  

[1m

                P       R       F
NAME        89.66   78.79   83.87
JOB         72.00   32.43   44.72
DEG         62.16   63.89   63.01
UNI         38.89   34.15   36.36
EMAIL       63.33   95.00   76.00
LOC         39.39   31.71   35.14
WORK PER    75.45   83.00   79.05
COMPANY     28.42   36.49   31.95
SKILL       40.96   28.96   33.93
PHONE       89.66   83.87   86.67
STUDY PER   65.62   58.33   61.76

<IPython.core.display.HTML object>
Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/spacy/__main__.py", line 4, in <module>
    setup_cli()
  File "/usr/local/lib/python3.10/dist-packages/spacy/cli/_util.py", line 87, in setup_cli
    c

In [None]:
# make prediction

import spacy

resume_text = '''

John Doe lives at 1234 Elm Street in Los Angeles, CA 90001. He can be reached at +1 (555) 123-4567 or via email at john.doe@example.com. John is a results-driven software engineer with over 5 years of experience in web development and cloud infrastructure, with strong knowledge of JavaScript, Python, and cloud technologies like AWS and Azure. Currently, he works as a Software Engineer at Google LLC in San Francisco, CA, where he has been employed since August 2019. In this role, he has developed scalable web applications using JavaScript, Node.js, and React, deployed and maintained cloud infrastructure on AWS, reducing downtime by 20%, and led a team of 4 engineers to enhance backend performance by 30%. Previously, he worked as a Junior Developer at Tech Innovators Inc. in Austin, TX, from July 2017 to July 2019, where he created RESTful APIs using Python and Flask, collaborated with front-end developers to build and deploy user-facing applications, and wrote unit and integration tests, improving code coverage by 15%.



John holds a Master of Science in Computer Science from the University of California, Berkeley, with a graduation date of May 2017, and a Bachelor of Science in Information Technology from the University of Texas at Austin, graduated in May 2015. His skillset includes proficiency in programming languages like Python, JavaScript, and Java; frameworks such as React, Flask, and Django; cloud platforms including AWS, Google Cloud, and Azure; as well as other tools like Git, Docker, Kubernetes, and SQL. He is certified as an AWS Certified Solutions Architect – Associate, earned in 2020, and as a Google Professional Cloud Architect, earned in 2021'

'''

nlp = spacy.load("spacy-output/model-best")

doc = nlp(resume_text.lower())



print(doc.ents)



for ent in doc.ents:

    print(f"{ent.text}: {ent.label_}")

(john doe, in los, (555) 123-4567, john.doe@example.com, john is, aws, restful apis, master of science, bachelor of science in information technology, python, aws, azure, git, docker)
john doe: NAME
in los: LOC
(555) 123-4567: PHONE
john.doe@example.com: EMAIL
john is: NAME
aws: SKILL
restful apis: SKILL
master of science: DEG
bachelor of science in information technology: DEG
python: SKILL
aws: SKILL
azure: SKILL
git: SKILL
docker: SKILL


In [None]:
from spacy import displacy

displacy.render(doc, style="ent", jupyter=True)

## Flair NER

In [1]:
!pip install flair

Collecting flair
  Downloading flair-0.14.0-py3-none-any.whl.metadata (12 kB)
Collecting conllu<5.0.0,>=4.0 (from flair)
  Downloading conllu-4.5.3-py2.py3-none-any.whl.metadata (19 kB)
Collecting ftfy>=6.1.0 (from flair)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting gdown>=4.4.0 (from flair)
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting langdetect>=1.0.9 (from flair)
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting pptree>=3.1 (from flair)
  Downloading pptree-3.1.tar.gz (3.0 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting pytorch-revgrad>=0.2.0 (from flair)
  Downloading pytorch_revgrad-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting segtok>=1.5.11 (from flair)
  Downloading segtok-1.5.11-py3-none-any.whl.metadata (9.0

In [None]:
# convert spacy data into flair data

import spacy

from spacy.tokens import DocBin

import os



def convert_spacy_to_flair(input_file, output_file):

    """

    Convert SpaCy binary format to Flair's CoNLL format.



    Args:

        input_file (str): Path to SpaCy binary file (.spacy)

        output_file (str): Path to output file for Flair format

    """

    # Load spaCy model

    nlp = spacy.blank("en")



    # Load the DocBin

    doc_bin = DocBin().from_disk(input_file)

    docs = list(doc_bin.get_docs(nlp.vocab))



    with open(output_file, 'w', encoding='utf-8') as f:

        for doc in docs:

            tokens = [(t.text, t.ent_iob_, t.ent_type_) for t in doc]



            # Write tokens in CoNLL format

            for token in tokens:

                text, iob, ent_type = token



                # Convert spaCy IOB to CoNLL format

                if iob == 'O':

                    tag = 'O'

                else:

                    tag = f'{iob}-{ent_type}' if ent_type else 'O'



                # Write line: token and NER tag

                f.write(f'{text} {tag}\n')



            # Empty line between sentences

            f.write('\n')



def convert_spacy_json_to_flair(input_file, output_file):

    """

    Convert SpaCy JSON format to Flair's CoNLL format.



    Args:

        input_file (str): Path to JSON file with SpaCy annotations

        output_file (str): Path to output file for Flair format

    """

    import json



    nlp = spacy.blank("en")



    with open(input_file, 'r', encoding='utf-8') as f:

        training_data = json.load(f)



    with open(output_file, 'w', encoding='utf-8') as f:

        for example in training_data:

            text = example['text']

            ents = example.get('entities', [])



            # Create a spaCy doc

            doc = nlp(text)



            # Add entities to doc

            spans = []

            for start, end, label in ents:

                span = doc.char_span(start, end, label=label)

                if span is not None:

                    spans.append(span)

            doc.ents = spans



            # Convert to CoNLL format

            tokens = [(t.text, t.ent_iob_, t.ent_type_) for t in doc]



            for token in tokens:

                text, iob, ent_type = token

                if iob == 'O':

                    tag = 'O'

                else:

                    tag = f'{iob}-{ent_type}' if ent_type else 'O'

                f.write(f'{text} {tag}\n')



            f.write('\n')



# Example usage for JSON format

flair_train_json = "flair_train.txt"

flair_test_json = "flair_test.txt"



convert_spacy_to_flair('/content/spacy_ner_data/train_data.spacy', flair_train_json)

convert_spacy_to_flair('/content/spacy_ner_data/test_data.spacy', flair_test_json)

FileNotFoundError: [Errno 2] No such file or directory: '/content/spacy_ner_data/train_data.spacy'

In [None]:
# convert spacy data into flair data

import spacy

from spacy.training import Corpus



!python -m spacy download de_core_news_sm

nlp = spacy.load("de_core_news_sm")

corpus = Corpus("/content/spacy_ner_data/test_data.spacy")



data = corpus(nlp)



# Flair supports BIO and BIOES, see https://github.com/flairNLP/flair/issues/875

def rename_biluo_to_bioes(old_tag):

    new_tag = ""

    try:

        if old_tag.startswith("L"):

            new_tag = "E" + old_tag[1:]

        elif old_tag.startswith("U"):

            new_tag = "S" + old_tag[1:]

        else:

            new_tag = old_tag

    except:

        pass

    return new_tag





def generate_corpus():

    corpus = []

    n_ex = 0

    for example in data:

        n_ex += 1

        text = example.text

        doc = nlp(text)

        tags = example.get_aligned_ner()

        # Check if it's an empty list of NER tags.

        if None in tags:

            pass

        else:

            new_tags = [rename_biluo_to_bioes(tag) for tag in tags]

            for token, tag in zip(doc,new_tags):

                row = token.text +' '+ token.pos_ +' ' +tag + '\n'

                corpus.append(row)

            corpus.append('\n')

    return corpus



def write_file(filepath):

    with open(filepath, 'w', encoding='utf-8') as f:

        corpus = generate_corpus()

        f.writelines(corpus)



def main():

    write_file('flair_test.txt')



main()

Collecting de-core-news-sm==3.7.0
  Using cached https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.7.0/de_core_news_sm-3.7.0-py3-none-any.whl (14.6 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('de_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
# convert json into flair data

import json

import random

from typing import List, Dict, Tuple

import spacy

from collections import defaultdict



class NERConverter:

    def __init__(self):

        self.nlp = spacy.load("en_core_web_sm")



    def get_bioes_label(self, token_index: int, entity_length: int, current_position: int, label: str) -> str:

        """

        Convert to BIOES format

        - S-: Single token entity

        - B-: Beginning of multi-token entity

        - I-: Inside of multi-token entity

        - E-: End of multi-token entity

        - O: Outside

        """

        if entity_length == 1:

            return f'S-{label}'

        if current_position == 0:

            return f'B-{label}'

        if current_position == entity_length - 1:

            return f'E-{label}'

        return f'I-{label}'



    def convert_to_bioes_format(self, json_data: List[dict]) -> List[List[Tuple[str, str]]]:

        """Convert JSON annotations to BIOES format."""

        all_sentences = []



        for item in json_data:

            text = item['data']['Text']

            doc = self.nlp(text)



            # Initialize character-level labels

            char_labels = ['O'] * len(text)



            # First pass: identify entity boundaries and lengths

            entity_spans = []

            if item['annotations'] and len(item['annotations']) > 0:

                for ann in item['annotations'][0]['result']:

                    if 'value' in ann:

                        start = ann['value']['start']

                        end = ann['value']['end']

                        label = ann['value']['labels'][0]

                        entity_spans.append((start, end, label))



            # Sort spans by start position

            entity_spans.sort(key=lambda x: x[0])



            # Second pass: apply BIOES labels

            for start, end, label in entity_spans:

                # Get tokens that are part of this entity

                entity_text = text[start:end]

                entity_doc = self.nlp(entity_text)

                entity_length = len([token for token in entity_doc if not token.is_space])



                # Set labels for the entire span

                current_token_idx = 0

                for i in range(start, end):

                    if i == start or text[i-1].isspace():

                        char_labels[i] = self.get_bioes_label(i, entity_length, current_token_idx, label)

                        current_token_idx += 1

                    else:

                        char_labels[i] = char_labels[i-1]



            # Convert to token-level labels

            current_sentence = []

            for sent in doc.sents:

                for token in sent:

                    # Get the most common label for the token's characters

                    token_chars_labels = char_labels[token.idx:token.idx + len(token.text)]

                    label_counts = defaultdict(int)

                    for char_label in token_chars_labels:

                        label_counts[char_label] += 1



                    token_label = max(label_counts.items(), key=lambda x: x[1])[0]

                    current_sentence.append((token.text, token_label))



                if current_sentence:

                    all_sentences.append(current_sentence)

                    current_sentence = []



        return all_sentences



    def write_flair_file(self, sentences: List[List[Tuple[str, str]]], filename: str):

        """Write sentences in BIOES format to file."""

        with open(filename, 'w', encoding='utf-8') as f:

            for sentence in sentences:

                for token, label in sentence:

                    f.write(f'{token} {label}\n')

                f.write('\n')



    def convert_and_split(self, json_data: List[dict], train_file: str, test_file: str, test_ratio: float = 0.2):

        """Convert JSON to BIOES format and split into train/test sets."""

        all_sentences = self.convert_to_bioes_format(json_data)



        # Shuffle and split

        random.shuffle(all_sentences)

        split_idx = int(len(all_sentences) * (1 - test_ratio))



        train_sentences = all_sentences[:split_idx]

        test_sentences = all_sentences[split_idx:]



        # Write to files

        self.write_flair_file(train_sentences, train_file)

        self.write_flair_file(test_sentences, test_file)



        return len(train_sentences), len(test_sentences)



def main():

    # Load JSON data

    with open('/kaggle/input/resume-dataset/200_resumes_annotated.json', 'r', encoding='utf-8') as f:

        json_data = json.load(f)



    # Convert and split data

    converter = NERConverter()

    train_count, test_count = converter.convert_and_split(

        json_data,

        train_file='flair_train.txt',

        test_file='flair_test.txt',

        test_ratio=0.2

    )



    print(f'Created {train_count} training sentences and {test_count} test sentences')



main()

Created 298 training sentences and 75 test sentences


In [4]:
from flair.data import Corpus

from flair.datasets import ColumnCorpus



# Define columns for CoNLL (0: word, 1: label)

columns = {0: 'text', 1: 'ner'}



# Set data folder and file names

data_folder = './'

train_file = '/kaggle/working/flair_test.txt'

test_file = '/kaggle/working/flair_test.txt'



# Load the corpus

corpus: Corpus = ColumnCorpus(data_folder, columns,

                              train_file=train_file,

                              test_file=test_file,

                              dev_file=None)

2024-11-06 13:50:53,363 Reading data from .
2024-11-06 13:50:53,365 Train: /kaggle/working/flair_test.txt
2024-11-06 13:50:53,366 Dev: None
2024-11-06 13:50:53,367 Test: /kaggle/working/flair_test.txt
2024-11-06 13:50:54,194 No dev split found. Using 10% (i.e. 8 samples) of the train split as dev data


In [5]:
tag_dictionary = corpus.make_label_dictionary(label_type='ner')

print("Labels:", tag_dictionary.get_items())

2024-11-06 13:50:57,494 Computing label dictionary. Progress:


0it [00:00, ?it/s]
67it [00:00, 9716.42it/s]

2024-11-06 13:50:57,529 Dictionary created for label 'ner' with 11 values: SKILL (seen 435 times), JOB (seen 116 times), WORK (seen 97 times), COMPANY (seen 70 times), LOC (seen 61 times), NAME (seen 37 times), PHONE (seen 37 times), UNI (seen 36 times), DEG (seen 30 times), EMAIL (seen 28 times), STUDY (seen 26 times)
Labels: ['SKILL', 'JOB', 'WORK', 'COMPANY', 'LOC', 'NAME', 'PHONE', 'UNI', 'DEG', 'EMAIL', 'STUDY']





In [6]:
from collections import Counter



def count_labels(file_path):

    with open(file_path, 'r') as file:

        labels = [line.split()[-1] for line in file if line.strip()]

    return Counter(labels)



print("Train label distribution:", count_labels('flair_train.txt'))

print("Test label distribution:", count_labels('flair_test.txt'))

Train label distribution: Counter({'O': 73362, 'S-SKILL': 1509, 'PER': 1232, 'B-SKILL': 1008, 'E-SKILL': 1004, 'E-JOB': 543, 'B-JOB': 533, 'E-COMPANY': 365, 'B-COMPANY': 361, 'I-JOB': 302, 'I-COMPANY': 273, 'I-DEG': 265, 'I-SKILL': 187, 'E-UNI': 172, 'B-UNI': 171, 'S-LOC': 166, 'E-DEG': 165, 'B-DEG': 164, 'B-NAME': 150, 'E-NAME': 150, 'I-UNI': 141, 'E-PHONE': 124, 'B-PHONE': 122, 'S-EMAIL': 118, 'B-LOC': 101, 'E-LOC': 101, 'I-PHONE': 84, 'S-JOB': 60, 'S-COMPANY': 59, 'S-PHONE': 21, 'I-NAME': 9, 'S-DEG': 7, 'I-LOC': 5, 'S-UNI': 2, 'B-EMAIL': 1, 'E-EMAIL': 1})
Test label distribution: Counter({'O': 15731, 'S-SKILL': 282, 'PER': 279, 'B-SKILL': 205, 'E-SKILL': 205, 'E-JOB': 116, 'B-JOB': 114, 'I-DEG': 69, 'E-COMPANY': 62, 'B-COMPANY': 61, 'I-JOB': 55, 'I-SKILL': 49, 'B-UNI': 43, 'E-UNI': 43, 'I-COMPANY': 42, 'B-NAME': 41, 'E-NAME': 41, 'S-LOC': 39, 'B-DEG': 37, 'E-DEG': 37, 'B-PHONE': 33, 'E-PHONE': 33, 'I-UNI': 30, 'S-EMAIL': 30, 'B-LOC': 27, 'E-LOC': 27, 'I-PHONE': 22, 'S-COMPANY': 20, 

In [7]:
# create NER tagger

from flair.embeddings import WordEmbeddings, StackedEmbeddings, TransformerWordEmbeddings, FlairEmbeddings

from flair.models import SequenceTagger



# using LSTM-CRF on top of frozen embeddings

# combine flair and glove embeddings

# embeddings = StackedEmbeddings([

#                 WordEmbeddings('glove'),

#                 FlairEmbeddings('news-forward'),

#                 FlairEmbeddings('news-backward'),

#             ])

# tagger = SequenceTagger(hidden_size=256,

#                          embeddings=embeddings,

#                          tag_dictionary=tag_dictionary,

#                          tag_type='ner',

#                          use_crf=True,

#                          tag_format = 'BIOES')



# using transformer embedding

# embeddings = TransformerWordEmbeddings('bert-base-uncased',

#                                       fine_tune=True,

#                                       layers='-1',

#                                       subtoken_pooling='first')

embeddings = TransformerWordEmbeddings(

    'roberta-base',  # or 'bert-base-uncased'

    fine_tune=True,

    layers='-1,-2,-3,-4',  # Use last 4 layers

    subtoken_pooling='first',

    allow_long_sentences=True

)

tagger = SequenceTagger(hidden_size=256,

                         embeddings=embeddings,

                         tag_dictionary=tag_dictionary,

                         tag_type='ner',

                         use_crf=False,

                         use_rnn=False,

                         reproject_embeddings=False,

                         tag_format = 'BIOES')

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

2024-11-06 13:51:16,469 SequenceTagger predicts: Dictionary with 45 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, S-STUDY, B-STUDY, E-STUDY, I-STUDY


In [8]:
# train flair ner model

from flair.trainers import ModelTrainer

from flair.training_utils import EvaluationMetric



trainer = ModelTrainer(tagger, corpus)



trainer.train(

    base_path='flair_output/',

    learning_rate=5.0e-5,

    mini_batch_size=16,

    max_epochs=50,

    train_with_dev=False

)

!cp -r ./flair_output /kaggle/working/

2024-11-06 13:51:24,215 ----------------------------------------------------------------------------------------------------
2024-11-06 13:51:24,217 Model: "SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50266, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2024-11-06 13:51:26,829 epoch 1 - iter 1/5 - loss 3.74054938 - time (sec): 2.59 - samples/sec: 1316.54 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:27,598 epoch 1 - iter 2/5 - loss 3.36812881 - time (sec): 3.35 - samples/sec: 2171.90 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:29,493 epoch 1 - iter 3/5 - loss 2.56930247 - time (sec): 5.25 - samples/sec: 2142.94 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:31,534 epoch 1 - iter 4/5 - loss 2.62421327 - time (sec): 7.29 - samples/sec: 2085.58 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:32,861 epoch 1 - iter 5/5 - loss 2.64770448 - time (sec): 8.62 - samples/sec: 1832.90 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:32,862 ----------------------------------------------------------------------------------------------------
2024-11-06 13:51:32,864 EPOCH 1 done: loss 2.6477 - lr: 0.300000


100%|██████████| 1/1 [00:00<00:00,  1.03it/s]

2024-11-06 13:51:33,870 DEV : loss 2.8290953636169434 - f1-score (micro avg)  0.031
2024-11-06 13:51:33,876  - 0 epochs without improvement
2024-11-06 13:51:33,877 saving best model





2024-11-06 13:51:34,696 ----------------------------------------------------------------------------------------------------
2024-11-06 13:51:35,456 epoch 2 - iter 1/5 - loss 2.90082307 - time (sec): 0.76 - samples/sec: 4248.61 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:37,277 epoch 2 - iter 2/5 - loss 2.11704190 - time (sec): 2.58 - samples/sec: 2835.45 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:39,494 epoch 2 - iter 3/5 - loss 1.96507488 - time (sec): 4.80 - samples/sec: 2539.85 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:41,299 epoch 2 - iter 4/5 - loss 3.01271909 - time (sec): 6.60 - samples/sec: 2290.44 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:42,123 epoch 2 - iter 5/5 - loss 2.92544927 - time (sec): 7.42 - samples/sec: 2127.39 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:42,125 ----------------------------------------------------------------------------------------------------
2024-11-06 13:51:42,126 EPOCH 2 done: loss 2.9254 - lr:

100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

2024-11-06 13:51:43,415 DEV : loss 1.5340508222579956 - f1-score (micro avg)  0.0
2024-11-06 13:51:43,423  - 1 epochs without improvement
2024-11-06 13:51:43,425 ----------------------------------------------------------------------------------------------------





2024-11-06 13:51:44,192 epoch 3 - iter 1/5 - loss 1.65645101 - time (sec): 0.76 - samples/sec: 4620.90 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:46,225 epoch 3 - iter 2/5 - loss 1.59045284 - time (sec): 2.80 - samples/sec: 2655.95 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:48,188 epoch 3 - iter 3/5 - loss 1.33881016 - time (sec): 4.76 - samples/sec: 2500.19 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:50,044 epoch 3 - iter 4/5 - loss 1.30969969 - time (sec): 6.62 - samples/sec: 2279.52 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:51,249 epoch 3 - iter 5/5 - loss 1.39895789 - time (sec): 7.82 - samples/sec: 2019.67 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:51,250 ----------------------------------------------------------------------------------------------------
2024-11-06 13:51:51,251 EPOCH 3 done: loss 1.3990 - lr: 0.300000


100%|██████████| 1/1 [00:02<00:00,  2.29s/it]

2024-11-06 13:51:53,574 DEV : loss 2.2958009243011475 - f1-score (micro avg)  0.031
2024-11-06 13:51:53,581  - 0 epochs without improvement
2024-11-06 13:51:53,582 ----------------------------------------------------------------------------------------------------





2024-11-06 13:51:54,340 epoch 4 - iter 1/5 - loss 1.45364818 - time (sec): 0.75 - samples/sec: 4021.28 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:56,230 epoch 4 - iter 2/5 - loss 1.14473794 - time (sec): 2.65 - samples/sec: 2703.44 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:57,914 epoch 4 - iter 3/5 - loss 1.32892521 - time (sec): 4.33 - samples/sec: 2314.47 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:51:59,539 epoch 4 - iter 4/5 - loss 1.53250985 - time (sec): 5.95 - samples/sec: 2486.36 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:01,047 epoch 4 - iter 5/5 - loss 1.52797126 - time (sec): 7.46 - samples/sec: 2116.76 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:01,048 ----------------------------------------------------------------------------------------------------
2024-11-06 13:52:01,048 EPOCH 4 done: loss 1.5280 - lr: 0.300000


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]

2024-11-06 13:52:02,986 DEV : loss 3.342398166656494 - f1-score (micro avg)  0.031
2024-11-06 13:52:02,992  - 1 epochs without improvement
2024-11-06 13:52:02,993 ----------------------------------------------------------------------------------------------------





2024-11-06 13:52:03,755 epoch 5 - iter 1/5 - loss 0.77074295 - time (sec): 0.76 - samples/sec: 5348.51 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:05,634 epoch 5 - iter 2/5 - loss 1.17198170 - time (sec): 2.64 - samples/sec: 2970.77 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:07,517 epoch 5 - iter 3/5 - loss 1.45618358 - time (sec): 4.52 - samples/sec: 2606.37 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:09,477 epoch 5 - iter 4/5 - loss 1.43385515 - time (sec): 6.48 - samples/sec: 2339.76 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:10,798 epoch 5 - iter 5/5 - loss 1.46536084 - time (sec): 7.80 - samples/sec: 2024.28 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:10,799 ----------------------------------------------------------------------------------------------------
2024-11-06 13:52:10,800 EPOCH 5 done: loss 1.4654 - lr: 0.300000


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]

2024-11-06 13:52:12,715 DEV : loss 2.2581286430358887 - f1-score (micro avg)  0.031
2024-11-06 13:52:12,721  - 0 epochs without improvement
2024-11-06 13:52:12,723 ----------------------------------------------------------------------------------------------------





2024-11-06 13:52:13,583 epoch 6 - iter 1/5 - loss 1.38216992 - time (sec): 0.86 - samples/sec: 4756.77 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:15,522 epoch 6 - iter 2/5 - loss 1.30968665 - time (sec): 2.80 - samples/sec: 2520.56 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:17,465 epoch 6 - iter 3/5 - loss 1.47620868 - time (sec): 4.74 - samples/sec: 2450.76 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:19,493 epoch 6 - iter 4/5 - loss 1.67489257 - time (sec): 6.77 - samples/sec: 2259.16 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:20,725 epoch 6 - iter 5/5 - loss 1.69952131 - time (sec): 8.00 - samples/sec: 1974.51 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:20,727 ----------------------------------------------------------------------------------------------------
2024-11-06 13:52:20,728 EPOCH 6 done: loss 1.6995 - lr: 0.300000


100%|██████████| 1/1 [00:01<00:00,  1.23s/it]

2024-11-06 13:52:21,978 DEV : loss 1.0808682441711426 - f1-score (micro avg)  0.0
2024-11-06 13:52:21,984  - 1 epochs without improvement
2024-11-06 13:52:21,985 ----------------------------------------------------------------------------------------------------





2024-11-06 13:52:22,678 epoch 7 - iter 1/5 - loss 1.66432550 - time (sec): 0.69 - samples/sec: 4353.62 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:24,603 epoch 7 - iter 2/5 - loss 1.33801256 - time (sec): 2.62 - samples/sec: 3103.44 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:26,703 epoch 7 - iter 3/5 - loss 1.48200931 - time (sec): 4.71 - samples/sec: 2558.94 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:28,630 epoch 7 - iter 4/5 - loss 2.58581118 - time (sec): 6.64 - samples/sec: 2303.34 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:29,833 epoch 7 - iter 5/5 - loss 2.52930565 - time (sec): 7.85 - samples/sec: 2013.29 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:29,834 ----------------------------------------------------------------------------------------------------
2024-11-06 13:52:29,835 EPOCH 7 done: loss 2.5293 - lr: 0.300000


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]

2024-11-06 13:52:31,584 DEV : loss 4.056033611297607 - f1-score (micro avg)  0.031
2024-11-06 13:52:31,590  - 2 epochs without improvement
2024-11-06 13:52:31,592 ----------------------------------------------------------------------------------------------------





2024-11-06 13:52:32,155 epoch 8 - iter 1/5 - loss 2.49123791 - time (sec): 0.56 - samples/sec: 4650.46 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:34,304 epoch 8 - iter 2/5 - loss 1.50439531 - time (sec): 2.71 - samples/sec: 2785.42 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:36,383 epoch 8 - iter 3/5 - loss 1.54259338 - time (sec): 4.79 - samples/sec: 2210.74 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:38,242 epoch 8 - iter 4/5 - loss 2.94460783 - time (sec): 6.65 - samples/sec: 2281.67 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:39,697 epoch 8 - iter 5/5 - loss 2.84966100 - time (sec): 8.10 - samples/sec: 1949.39 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:39,698 ----------------------------------------------------------------------------------------------------
2024-11-06 13:52:39,699 EPOCH 8 done: loss 2.8497 - lr: 0.300000


100%|██████████| 1/1 [00:01<00:00,  1.81s/it]

2024-11-06 13:52:41,531 DEV : loss 2.803159236907959 - f1-score (micro avg)  0.0
2024-11-06 13:52:41,538  - 3 epochs without improvement
2024-11-06 13:52:41,539 ----------------------------------------------------------------------------------------------------





2024-11-06 13:52:42,432 epoch 9 - iter 1/5 - loss 0.88873479 - time (sec): 0.89 - samples/sec: 5403.96 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:44,687 epoch 9 - iter 2/5 - loss 1.22930138 - time (sec): 3.15 - samples/sec: 2863.22 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:46,581 epoch 9 - iter 3/5 - loss 1.21081717 - time (sec): 5.04 - samples/sec: 2383.68 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:48,322 epoch 9 - iter 4/5 - loss 1.52474078 - time (sec): 6.78 - samples/sec: 2245.06 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:49,491 epoch 9 - iter 5/5 - loss 1.51413965 - time (sec): 7.95 - samples/sec: 1986.88 - lr: 0.300000 - momentum: 0.000000
2024-11-06 13:52:49,493 ----------------------------------------------------------------------------------------------------
2024-11-06 13:52:49,493 EPOCH 9 done: loss 1.5141 - lr: 0.300000


100%|██████████| 1/1 [00:01<00:00,  1.30s/it]

2024-11-06 13:52:50,814 DEV : loss 2.0650739669799805 - f1-score (micro avg)  0.0
2024-11-06 13:52:50,821  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.15]
2024-11-06 13:52:50,822 ----------------------------------------------------------------------------------------------------





2024-11-06 13:52:51,526 epoch 10 - iter 1/5 - loss 1.69242380 - time (sec): 0.70 - samples/sec: 4130.71 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:52:53,355 epoch 10 - iter 2/5 - loss 1.32550120 - time (sec): 2.53 - samples/sec: 2486.85 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:52:55,268 epoch 10 - iter 3/5 - loss 2.08454620 - time (sec): 4.44 - samples/sec: 2249.18 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:52:57,390 epoch 10 - iter 4/5 - loss 1.67379772 - time (sec): 6.56 - samples/sec: 2316.82 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:52:58,894 epoch 10 - iter 5/5 - loss 1.65912664 - time (sec): 8.07 - samples/sec: 1957.51 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:52:58,896 ----------------------------------------------------------------------------------------------------
2024-11-06 13:52:58,896 EPOCH 10 done: loss 1.6591 - lr: 0.150000


100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

2024-11-06 13:53:00,115 DEV : loss 1.4314812421798706 - f1-score (micro avg)  0.0
2024-11-06 13:53:00,122  - 1 epochs without improvement
2024-11-06 13:53:00,123 ----------------------------------------------------------------------------------------------------





2024-11-06 13:53:00,827 epoch 11 - iter 1/5 - loss 2.25933959 - time (sec): 0.70 - samples/sec: 3536.08 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:02,656 epoch 11 - iter 2/5 - loss 1.45957398 - time (sec): 2.53 - samples/sec: 2841.80 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:04,598 epoch 11 - iter 3/5 - loss 1.31224063 - time (sec): 4.47 - samples/sec: 2326.90 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:06,666 epoch 11 - iter 4/5 - loss 1.07850507 - time (sec): 6.54 - samples/sec: 2323.74 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:08,170 epoch 11 - iter 5/5 - loss 1.09764978 - time (sec): 8.04 - samples/sec: 1963.46 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:08,171 ----------------------------------------------------------------------------------------------------
2024-11-06 13:53:08,172 EPOCH 11 done: loss 1.0976 - lr: 0.150000


100%|██████████| 1/1 [00:01<00:00,  1.19s/it]

2024-11-06 13:53:09,385 DEV : loss 1.8986519575119019 - f1-score (micro avg)  0.0
2024-11-06 13:53:09,390  - 2 epochs without improvement
2024-11-06 13:53:09,392 ----------------------------------------------------------------------------------------------------





2024-11-06 13:53:10,187 epoch 12 - iter 1/5 - loss 0.83479103 - time (sec): 0.79 - samples/sec: 4724.89 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:12,327 epoch 12 - iter 2/5 - loss 0.78278876 - time (sec): 2.93 - samples/sec: 2649.95 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:14,532 epoch 12 - iter 3/5 - loss 0.79194291 - time (sec): 5.14 - samples/sec: 2327.73 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:16,568 epoch 12 - iter 4/5 - loss 0.80856671 - time (sec): 7.17 - samples/sec: 2147.21 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:17,710 epoch 12 - iter 5/5 - loss 0.83544996 - time (sec): 8.32 - samples/sec: 1899.27 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:17,712 ----------------------------------------------------------------------------------------------------
2024-11-06 13:53:17,713 EPOCH 12 done: loss 0.8354 - lr: 0.150000


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]

2024-11-06 13:53:19,468 DEV : loss 2.115933895111084 - f1-score (micro avg)  0.031
2024-11-06 13:53:19,474  - 0 epochs without improvement
2024-11-06 13:53:19,476 ----------------------------------------------------------------------------------------------------





2024-11-06 13:53:20,307 epoch 13 - iter 1/5 - loss 0.97282255 - time (sec): 0.83 - samples/sec: 5211.00 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:22,431 epoch 13 - iter 2/5 - loss 0.79531098 - time (sec): 2.95 - samples/sec: 2797.48 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:24,624 epoch 13 - iter 3/5 - loss 0.82919902 - time (sec): 5.15 - samples/sec: 2533.47 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:26,500 epoch 13 - iter 4/5 - loss 0.86410898 - time (sec): 7.02 - samples/sec: 2194.98 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:27,311 epoch 13 - iter 5/5 - loss 0.87086942 - time (sec): 7.83 - samples/sec: 2016.39 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:27,312 ----------------------------------------------------------------------------------------------------
2024-11-06 13:53:27,313 EPOCH 13 done: loss 0.8709 - lr: 0.150000


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]

2024-11-06 13:53:29,072 DEV : loss 2.356039047241211 - f1-score (micro avg)  0.031
2024-11-06 13:53:29,078  - 1 epochs without improvement
2024-11-06 13:53:29,080 ----------------------------------------------------------------------------------------------------





2024-11-06 13:53:29,878 epoch 14 - iter 1/5 - loss 1.07182408 - time (sec): 0.80 - samples/sec: 3820.51 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:32,100 epoch 14 - iter 2/5 - loss 1.27972848 - time (sec): 3.02 - samples/sec: 2155.43 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:34,159 epoch 14 - iter 3/5 - loss 0.90894654 - time (sec): 5.08 - samples/sec: 2358.20 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:36,349 epoch 14 - iter 4/5 - loss 0.91526635 - time (sec): 7.27 - samples/sec: 2132.60 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:37,505 epoch 14 - iter 5/5 - loss 0.93003072 - time (sec): 8.42 - samples/sec: 1875.33 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:37,506 ----------------------------------------------------------------------------------------------------
2024-11-06 13:53:37,506 EPOCH 14 done: loss 0.9300 - lr: 0.150000


100%|██████████| 1/1 [00:01<00:00,  1.15s/it]

2024-11-06 13:53:38,673 DEV : loss 2.117419719696045 - f1-score (micro avg)  0.0
2024-11-06 13:53:38,680  - 2 epochs without improvement
2024-11-06 13:53:38,681 ----------------------------------------------------------------------------------------------------





2024-11-06 13:53:39,476 epoch 15 - iter 1/5 - loss 0.86447509 - time (sec): 0.79 - samples/sec: 4763.39 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:41,479 epoch 15 - iter 2/5 - loss 0.83994932 - time (sec): 2.79 - samples/sec: 2772.13 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:43,400 epoch 15 - iter 3/5 - loss 0.87728277 - time (sec): 4.72 - samples/sec: 2336.76 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:45,346 epoch 15 - iter 4/5 - loss 0.81894452 - time (sec): 6.66 - samples/sec: 2219.62 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:46,663 epoch 15 - iter 5/5 - loss 0.86136870 - time (sec): 7.98 - samples/sec: 1979.69 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:46,664 ----------------------------------------------------------------------------------------------------
2024-11-06 13:53:46,665 EPOCH 15 done: loss 0.8614 - lr: 0.150000


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]

2024-11-06 13:53:48,565 DEV : loss 2.940617799758911 - f1-score (micro avg)  0.031
2024-11-06 13:53:48,572  - 3 epochs without improvement
2024-11-06 13:53:48,573 ----------------------------------------------------------------------------------------------------





2024-11-06 13:53:49,366 epoch 16 - iter 1/5 - loss 0.88862080 - time (sec): 0.79 - samples/sec: 4655.09 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:51,376 epoch 16 - iter 2/5 - loss 0.91851730 - time (sec): 2.80 - samples/sec: 2860.16 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:53,275 epoch 16 - iter 3/5 - loss 0.95691555 - time (sec): 4.70 - samples/sec: 2308.08 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:55,236 epoch 16 - iter 4/5 - loss 0.88644020 - time (sec): 6.66 - samples/sec: 2277.51 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:56,648 epoch 16 - iter 5/5 - loss 0.88700128 - time (sec): 8.07 - samples/sec: 1956.71 - lr: 0.150000 - momentum: 0.000000
2024-11-06 13:53:56,649 ----------------------------------------------------------------------------------------------------
2024-11-06 13:53:56,650 EPOCH 16 done: loss 0.8870 - lr: 0.150000


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]

2024-11-06 13:53:58,485 DEV : loss 2.5294673442840576 - f1-score (micro avg)  0.031
2024-11-06 13:53:58,492  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.075]
2024-11-06 13:53:58,493 ----------------------------------------------------------------------------------------------------





2024-11-06 13:53:59,213 epoch 17 - iter 1/5 - loss 0.85431275 - time (sec): 0.72 - samples/sec: 4810.41 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:00,978 epoch 17 - iter 2/5 - loss 0.82691304 - time (sec): 2.48 - samples/sec: 2745.71 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:02,937 epoch 17 - iter 3/5 - loss 0.81488334 - time (sec): 4.44 - samples/sec: 2438.86 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:04,994 epoch 17 - iter 4/5 - loss 0.77682481 - time (sec): 6.50 - samples/sec: 2158.88 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:06,371 epoch 17 - iter 5/5 - loss 0.76716081 - time (sec): 7.88 - samples/sec: 2005.48 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:06,373 ----------------------------------------------------------------------------------------------------
2024-11-06 13:54:06,373 EPOCH 17 done: loss 0.7672 - lr: 0.075000


100%|██████████| 1/1 [00:02<00:00,  2.00s/it]

2024-11-06 13:54:08,411 DEV : loss 2.6841745376586914 - f1-score (micro avg)  0.031
2024-11-06 13:54:08,418  - 1 epochs without improvement
2024-11-06 13:54:08,419 ----------------------------------------------------------------------------------------------------





2024-11-06 13:54:09,580 epoch 18 - iter 1/5 - loss 0.57505130 - time (sec): 1.16 - samples/sec: 4523.92 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:11,906 epoch 18 - iter 2/5 - loss 0.66931906 - time (sec): 3.48 - samples/sec: 2459.61 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:13,659 epoch 18 - iter 3/5 - loss 0.70107063 - time (sec): 5.24 - samples/sec: 2136.70 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:15,404 epoch 18 - iter 4/5 - loss 0.79350465 - time (sec): 6.98 - samples/sec: 2144.21 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:16,578 epoch 18 - iter 5/5 - loss 0.77320698 - time (sec): 8.16 - samples/sec: 1936.40 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:16,580 ----------------------------------------------------------------------------------------------------
2024-11-06 13:54:16,580 EPOCH 18 done: loss 0.7732 - lr: 0.075000


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


2024-11-06 13:54:18,817 DEV : loss 2.546793222427368 - f1-score (micro avg)  0.031
2024-11-06 13:54:18,824  - 2 epochs without improvement
2024-11-06 13:54:18,825 ----------------------------------------------------------------------------------------------------
2024-11-06 13:54:19,612 epoch 19 - iter 1/5 - loss 0.75032655 - time (sec): 0.78 - samples/sec: 4037.10 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:21,489 epoch 19 - iter 2/5 - loss 0.71971634 - time (sec): 2.66 - samples/sec: 2612.92 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:23,517 epoch 19 - iter 3/5 - loss 0.78063071 - time (sec): 4.69 - samples/sec: 2352.47 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:25,533 epoch 19 - iter 4/5 - loss 0.79776613 - time (sec): 6.70 - samples/sec: 2221.45 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:26,930 epoch 19 - iter 5/5 - loss 0.79723329 - time (sec): 8.10 - samples/sec: 1949.39 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:26,932 ------------

100%|██████████| 1/1 [00:01<00:00,  1.87s/it]

2024-11-06 13:54:28,845 DEV : loss 2.7381420135498047 - f1-score (micro avg)  0.031
2024-11-06 13:54:28,852  - 3 epochs without improvement
2024-11-06 13:54:28,853 ----------------------------------------------------------------------------------------------------





2024-11-06 13:54:29,618 epoch 20 - iter 1/5 - loss 0.73528982 - time (sec): 0.76 - samples/sec: 4969.42 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:31,496 epoch 20 - iter 2/5 - loss 0.80640706 - time (sec): 2.64 - samples/sec: 2903.59 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:33,595 epoch 20 - iter 3/5 - loss 0.78286223 - time (sec): 4.74 - samples/sec: 2416.63 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:35,622 epoch 20 - iter 4/5 - loss 0.80072416 - time (sec): 6.77 - samples/sec: 2154.08 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:36,915 epoch 20 - iter 5/5 - loss 0.78870064 - time (sec): 8.06 - samples/sec: 1959.81 - lr: 0.075000 - momentum: 0.000000
2024-11-06 13:54:36,916 ----------------------------------------------------------------------------------------------------
2024-11-06 13:54:36,917 EPOCH 20 done: loss 0.7887 - lr: 0.075000


100%|██████████| 1/1 [00:01<00:00,  1.94s/it]

2024-11-06 13:54:38,894 DEV : loss 2.2323100566864014 - f1-score (micro avg)  0.031
2024-11-06 13:54:38,901  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0375]
2024-11-06 13:54:38,902 ----------------------------------------------------------------------------------------------------





2024-11-06 13:54:39,960 epoch 21 - iter 1/5 - loss 0.61893586 - time (sec): 1.06 - samples/sec: 5220.53 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:42,001 epoch 21 - iter 2/5 - loss 0.71268625 - time (sec): 3.10 - samples/sec: 2699.81 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:43,502 epoch 21 - iter 3/5 - loss 0.71990152 - time (sec): 4.60 - samples/sec: 2627.50 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:45,300 epoch 21 - iter 4/5 - loss 0.76132239 - time (sec): 6.40 - samples/sec: 2316.98 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:46,527 epoch 21 - iter 5/5 - loss 0.76552746 - time (sec): 7.62 - samples/sec: 2071.92 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:46,529 ----------------------------------------------------------------------------------------------------
2024-11-06 13:54:46,530 EPOCH 21 done: loss 0.7655 - lr: 0.037500


100%|██████████| 1/1 [00:01<00:00,  1.92s/it]

2024-11-06 13:54:48,493 DEV : loss 2.29327654838562 - f1-score (micro avg)  0.031
2024-11-06 13:54:48,498  - 1 epochs without improvement
2024-11-06 13:54:48,500 ----------------------------------------------------------------------------------------------------





2024-11-06 13:54:49,228 epoch 22 - iter 1/5 - loss 0.83584170 - time (sec): 0.72 - samples/sec: 5230.86 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:51,217 epoch 22 - iter 2/5 - loss 0.78264704 - time (sec): 2.71 - samples/sec: 2739.02 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:53,306 epoch 22 - iter 3/5 - loss 0.75879079 - time (sec): 4.80 - samples/sec: 2318.94 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:55,340 epoch 22 - iter 4/5 - loss 0.73663449 - time (sec): 6.84 - samples/sec: 2268.61 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:56,700 epoch 22 - iter 5/5 - loss 0.74613254 - time (sec): 8.20 - samples/sec: 1926.73 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:54:56,702 ----------------------------------------------------------------------------------------------------
2024-11-06 13:54:56,703 EPOCH 22 done: loss 0.7461 - lr: 0.037500


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]

2024-11-06 13:54:58,449 DEV : loss 2.2971131801605225 - f1-score (micro avg)  0.031
2024-11-06 13:54:58,456  - 2 epochs without improvement
2024-11-06 13:54:58,458 ----------------------------------------------------------------------------------------------------





2024-11-06 13:54:59,170 epoch 23 - iter 1/5 - loss 0.82383058 - time (sec): 0.71 - samples/sec: 4662.77 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:00,999 epoch 23 - iter 2/5 - loss 0.75656053 - time (sec): 2.54 - samples/sec: 2704.13 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:03,157 epoch 23 - iter 3/5 - loss 0.74804362 - time (sec): 4.70 - samples/sec: 2585.64 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:05,065 epoch 23 - iter 4/5 - loss 0.77335421 - time (sec): 6.60 - samples/sec: 2168.23 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:06,115 epoch 23 - iter 5/5 - loss 0.75450448 - time (sec): 7.65 - samples/sec: 2063.46 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:06,116 ----------------------------------------------------------------------------------------------------
2024-11-06 13:55:06,117 EPOCH 23 done: loss 0.7545 - lr: 0.037500


100%|██████████| 1/1 [00:02<00:00,  2.39s/it]

2024-11-06 13:55:08,545 DEV : loss 2.331432819366455 - f1-score (micro avg)  0.031
2024-11-06 13:55:08,552  - 3 epochs without improvement
2024-11-06 13:55:08,553 ----------------------------------------------------------------------------------------------------





2024-11-06 13:55:09,407 epoch 24 - iter 1/5 - loss 0.73898129 - time (sec): 0.85 - samples/sec: 4845.37 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:11,322 epoch 24 - iter 2/5 - loss 0.81825438 - time (sec): 2.77 - samples/sec: 2744.88 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:13,349 epoch 24 - iter 3/5 - loss 0.75597919 - time (sec): 4.79 - samples/sec: 2419.10 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:15,440 epoch 24 - iter 4/5 - loss 0.74335864 - time (sec): 6.88 - samples/sec: 2191.12 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:16,701 epoch 24 - iter 5/5 - loss 0.74071918 - time (sec): 8.15 - samples/sec: 1939.09 - lr: 0.037500 - momentum: 0.000000
2024-11-06 13:55:16,703 ----------------------------------------------------------------------------------------------------
2024-11-06 13:55:16,704 EPOCH 24 done: loss 0.7407 - lr: 0.037500


100%|██████████| 1/1 [00:01<00:00,  1.76s/it]

2024-11-06 13:55:18,501 DEV : loss 2.2450926303863525 - f1-score (micro avg)  0.031
2024-11-06 13:55:18,507  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.01875]
2024-11-06 13:55:18,508 ----------------------------------------------------------------------------------------------------





2024-11-06 13:55:19,475 epoch 25 - iter 1/5 - loss 0.71594677 - time (sec): 0.96 - samples/sec: 4734.69 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:21,637 epoch 25 - iter 2/5 - loss 0.74832009 - time (sec): 3.13 - samples/sec: 2549.11 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:23,487 epoch 25 - iter 3/5 - loss 0.77594696 - time (sec): 4.98 - samples/sec: 2244.06 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:25,349 epoch 25 - iter 4/5 - loss 0.74184278 - time (sec): 6.84 - samples/sec: 2171.30 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:26,625 epoch 25 - iter 5/5 - loss 0.75749630 - time (sec): 8.11 - samples/sec: 1946.47 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:26,627 ----------------------------------------------------------------------------------------------------
2024-11-06 13:55:26,628 EPOCH 25 done: loss 0.7575 - lr: 0.018750


100%|██████████| 1/1 [00:01<00:00,  1.79s/it]

2024-11-06 13:55:28,453 DEV : loss 2.3088462352752686 - f1-score (micro avg)  0.031
2024-11-06 13:55:28,459  - 1 epochs without improvement
2024-11-06 13:55:28,461 ----------------------------------------------------------------------------------------------------





2024-11-06 13:55:29,256 epoch 26 - iter 1/5 - loss 0.78872521 - time (sec): 0.79 - samples/sec: 4974.68 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:31,333 epoch 26 - iter 2/5 - loss 0.68602031 - time (sec): 2.87 - samples/sec: 2877.26 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:33,264 epoch 26 - iter 3/5 - loss 0.73624487 - time (sec): 4.80 - samples/sec: 2376.13 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:34,992 epoch 26 - iter 4/5 - loss 0.75863086 - time (sec): 6.53 - samples/sec: 2343.58 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:36,324 epoch 26 - iter 5/5 - loss 0.74581958 - time (sec): 7.86 - samples/sec: 2009.53 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:36,325 ----------------------------------------------------------------------------------------------------
2024-11-06 13:55:36,326 EPOCH 26 done: loss 0.7458 - lr: 0.018750


100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

2024-11-06 13:55:37,583 DEV : loss 2.1410164833068848 - f1-score (micro avg)  0.0
2024-11-06 13:55:37,589  - 2 epochs without improvement
2024-11-06 13:55:37,591 ----------------------------------------------------------------------------------------------------





2024-11-06 13:55:38,467 epoch 27 - iter 1/5 - loss 0.68913750 - time (sec): 0.87 - samples/sec: 5138.84 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:40,559 epoch 27 - iter 2/5 - loss 0.68621015 - time (sec): 2.97 - samples/sec: 3062.52 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:42,668 epoch 27 - iter 3/5 - loss 0.69506016 - time (sec): 5.07 - samples/sec: 2424.05 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:44,529 epoch 27 - iter 4/5 - loss 0.74393277 - time (sec): 6.94 - samples/sec: 2174.23 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:45,741 epoch 27 - iter 5/5 - loss 0.74556843 - time (sec): 8.15 - samples/sec: 1938.54 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:45,742 ----------------------------------------------------------------------------------------------------
2024-11-06 13:55:45,743 EPOCH 27 done: loss 0.7456 - lr: 0.018750


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]

2024-11-06 13:55:47,648 DEV : loss 2.2356834411621094 - f1-score (micro avg)  0.031
2024-11-06 13:55:47,655  - 3 epochs without improvement
2024-11-06 13:55:47,656 ----------------------------------------------------------------------------------------------------





2024-11-06 13:55:48,603 epoch 28 - iter 1/5 - loss 0.62401083 - time (sec): 0.94 - samples/sec: 4285.98 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:50,691 epoch 28 - iter 2/5 - loss 0.65860324 - time (sec): 3.03 - samples/sec: 2478.93 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:52,691 epoch 28 - iter 3/5 - loss 0.71545470 - time (sec): 5.03 - samples/sec: 2347.96 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:54,584 epoch 28 - iter 4/5 - loss 0.73765974 - time (sec): 6.93 - samples/sec: 2228.22 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:55,766 epoch 28 - iter 5/5 - loss 0.75288094 - time (sec): 8.11 - samples/sec: 1948.28 - lr: 0.018750 - momentum: 0.000000
2024-11-06 13:55:55,767 ----------------------------------------------------------------------------------------------------
2024-11-06 13:55:55,769 EPOCH 28 done: loss 0.7529 - lr: 0.018750


100%|██████████| 1/1 [00:02<00:00,  2.16s/it]

2024-11-06 13:55:57,983 DEV : loss 2.289015293121338 - f1-score (micro avg)  0.031
2024-11-06 13:55:57,989  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.009375]
2024-11-06 13:55:57,991 ----------------------------------------------------------------------------------------------------





2024-11-06 13:55:58,949 epoch 29 - iter 1/5 - loss 0.66676250 - time (sec): 0.96 - samples/sec: 3994.38 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:00,898 epoch 29 - iter 2/5 - loss 0.74730701 - time (sec): 2.90 - samples/sec: 2765.90 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:02,918 epoch 29 - iter 3/5 - loss 0.73593024 - time (sec): 4.92 - samples/sec: 2386.32 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:04,849 epoch 29 - iter 4/5 - loss 0.75397980 - time (sec): 6.86 - samples/sec: 2260.06 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:06,127 epoch 29 - iter 5/5 - loss 0.75704652 - time (sec): 8.13 - samples/sec: 1941.83 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:06,129 ----------------------------------------------------------------------------------------------------
2024-11-06 13:56:06,130 EPOCH 29 done: loss 0.7570 - lr: 0.009375


100%|██████████| 1/1 [00:01<00:00,  1.74s/it]

2024-11-06 13:56:07,909 DEV : loss 2.1826369762420654 - f1-score (micro avg)  0.031
2024-11-06 13:56:07,915  - 1 epochs without improvement
2024-11-06 13:56:07,916 ----------------------------------------------------------------------------------------------------





2024-11-06 13:56:08,958 epoch 30 - iter 1/5 - loss 0.64461413 - time (sec): 1.04 - samples/sec: 5464.69 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:10,914 epoch 30 - iter 2/5 - loss 0.69424826 - time (sec): 3.00 - samples/sec: 2776.26 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:12,505 epoch 30 - iter 3/5 - loss 0.69161432 - time (sec): 4.59 - samples/sec: 2692.07 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:14,523 epoch 30 - iter 4/5 - loss 0.72678870 - time (sec): 6.60 - samples/sec: 2328.83 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:15,742 epoch 30 - iter 5/5 - loss 0.73958199 - time (sec): 7.82 - samples/sec: 2018.88 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:15,744 ----------------------------------------------------------------------------------------------------
2024-11-06 13:56:15,745 EPOCH 30 done: loss 0.7396 - lr: 0.009375


100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

2024-11-06 13:56:16,987 DEV : loss 2.1580240726470947 - f1-score (micro avg)  0.0
2024-11-06 13:56:16,994  - 2 epochs without improvement
2024-11-06 13:56:16,995 ----------------------------------------------------------------------------------------------------





2024-11-06 13:56:17,539 epoch 31 - iter 1/5 - loss 1.01514786 - time (sec): 0.54 - samples/sec: 5185.89 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:19,307 epoch 31 - iter 2/5 - loss 0.84136023 - time (sec): 2.31 - samples/sec: 3144.32 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:21,495 epoch 31 - iter 3/5 - loss 0.80869188 - time (sec): 4.50 - samples/sec: 2486.71 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:23,446 epoch 31 - iter 4/5 - loss 0.76065969 - time (sec): 6.45 - samples/sec: 2402.30 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:24,741 epoch 31 - iter 5/5 - loss 0.76143114 - time (sec): 7.74 - samples/sec: 2039.95 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:24,742 ----------------------------------------------------------------------------------------------------
2024-11-06 13:56:24,743 EPOCH 31 done: loss 0.7614 - lr: 0.009375


100%|██████████| 1/1 [00:01<00:00,  1.17s/it]

2024-11-06 13:56:25,936 DEV : loss 2.1954519748687744 - f1-score (micro avg)  0.0
2024-11-06 13:56:25,943  - 3 epochs without improvement
2024-11-06 13:56:25,944 ----------------------------------------------------------------------------------------------------





2024-11-06 13:56:26,677 epoch 32 - iter 1/5 - loss 0.84310255 - time (sec): 0.73 - samples/sec: 5522.75 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:28,558 epoch 32 - iter 2/5 - loss 0.73579576 - time (sec): 2.61 - samples/sec: 2900.76 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:30,658 epoch 32 - iter 3/5 - loss 0.70225921 - time (sec): 4.71 - samples/sec: 2601.64 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:32,832 epoch 32 - iter 4/5 - loss 0.73189071 - time (sec): 6.89 - samples/sec: 2208.15 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:34,061 epoch 32 - iter 5/5 - loss 0.73734777 - time (sec): 8.11 - samples/sec: 1946.46 - lr: 0.009375 - momentum: 0.000000
2024-11-06 13:56:34,063 ----------------------------------------------------------------------------------------------------
2024-11-06 13:56:34,064 EPOCH 32 done: loss 0.7373 - lr: 0.009375


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]

2024-11-06 13:56:35,817 DEV : loss 2.2181007862091064 - f1-score (micro avg)  0.031
2024-11-06 13:56:35,824  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0046875]
2024-11-06 13:56:35,825 ----------------------------------------------------------------------------------------------------





2024-11-06 13:56:36,536 epoch 33 - iter 1/5 - loss 0.80319511 - time (sec): 0.71 - samples/sec: 4145.28 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:38,519 epoch 33 - iter 2/5 - loss 0.72521477 - time (sec): 2.69 - samples/sec: 2707.74 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:40,659 epoch 33 - iter 3/5 - loss 0.73815267 - time (sec): 4.83 - samples/sec: 2431.38 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:42,627 epoch 33 - iter 4/5 - loss 0.73130901 - time (sec): 6.80 - samples/sec: 2185.63 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:43,877 epoch 33 - iter 5/5 - loss 0.74549275 - time (sec): 8.05 - samples/sec: 1962.30 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:43,878 ----------------------------------------------------------------------------------------------------
2024-11-06 13:56:43,879 EPOCH 33 done: loss 0.7455 - lr: 0.004687


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]

2024-11-06 13:56:45,237 DEV : loss 2.1566810607910156 - f1-score (micro avg)  0.0
2024-11-06 13:56:45,243  - 1 epochs without improvement
2024-11-06 13:56:45,244 ----------------------------------------------------------------------------------------------------





2024-11-06 13:56:46,424 epoch 34 - iter 1/5 - loss 0.73508680 - time (sec): 1.18 - samples/sec: 3379.28 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:48,374 epoch 34 - iter 2/5 - loss 0.69308466 - time (sec): 3.13 - samples/sec: 2403.11 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:50,472 epoch 34 - iter 3/5 - loss 0.71765161 - time (sec): 5.23 - samples/sec: 2171.58 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:52,573 epoch 34 - iter 4/5 - loss 0.75083430 - time (sec): 7.33 - samples/sec: 2053.58 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:53,826 epoch 34 - iter 5/5 - loss 0.74945251 - time (sec): 8.58 - samples/sec: 1841.09 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:53,828 ----------------------------------------------------------------------------------------------------
2024-11-06 13:56:53,829 EPOCH 34 done: loss 0.7495 - lr: 0.004687


100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

2024-11-06 13:56:55,086 DEV : loss 2.1500771045684814 - f1-score (micro avg)  0.0
2024-11-06 13:56:55,091  - 2 epochs without improvement
2024-11-06 13:56:55,093 ----------------------------------------------------------------------------------------------------





2024-11-06 13:56:55,818 epoch 35 - iter 1/5 - loss 0.82557674 - time (sec): 0.72 - samples/sec: 3893.47 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:57,609 epoch 35 - iter 2/5 - loss 0.87607723 - time (sec): 2.51 - samples/sec: 2371.02 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:56:59,650 epoch 35 - iter 3/5 - loss 0.77405950 - time (sec): 4.55 - samples/sec: 2399.54 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:57:01,811 epoch 35 - iter 4/5 - loss 0.73599958 - time (sec): 6.72 - samples/sec: 2252.30 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:57:03,150 epoch 35 - iter 5/5 - loss 0.73684965 - time (sec): 8.05 - samples/sec: 1961.03 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:57:03,151 ----------------------------------------------------------------------------------------------------
2024-11-06 13:57:03,152 EPOCH 35 done: loss 0.7368 - lr: 0.004687


100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

2024-11-06 13:57:04,419 DEV : loss 2.1538712978363037 - f1-score (micro avg)  0.0
2024-11-06 13:57:04,425  - 3 epochs without improvement
2024-11-06 13:57:04,426 ----------------------------------------------------------------------------------------------------





2024-11-06 13:57:05,224 epoch 36 - iter 1/5 - loss 0.70672742 - time (sec): 0.80 - samples/sec: 4441.08 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:57:07,308 epoch 36 - iter 2/5 - loss 0.73022370 - time (sec): 2.88 - samples/sec: 2616.25 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:57:09,294 epoch 36 - iter 3/5 - loss 0.72866965 - time (sec): 4.87 - samples/sec: 2401.03 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:57:11,233 epoch 36 - iter 4/5 - loss 0.75626715 - time (sec): 6.80 - samples/sec: 2223.04 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:57:12,443 epoch 36 - iter 5/5 - loss 0.74823965 - time (sec): 8.01 - samples/sec: 1970.73 - lr: 0.004687 - momentum: 0.000000
2024-11-06 13:57:12,445 ----------------------------------------------------------------------------------------------------
2024-11-06 13:57:12,446 EPOCH 36 done: loss 0.7482 - lr: 0.004687


100%|██████████| 1/1 [00:01<00:00,  1.28s/it]

2024-11-06 13:57:13,744 DEV : loss 2.129985809326172 - f1-score (micro avg)  0.0
2024-11-06 13:57:13,750  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00234375]
2024-11-06 13:57:13,751 ----------------------------------------------------------------------------------------------------





2024-11-06 13:57:14,628 epoch 37 - iter 1/5 - loss 0.56462596 - time (sec): 0.88 - samples/sec: 3857.99 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:16,557 epoch 37 - iter 2/5 - loss 0.78213760 - time (sec): 2.80 - samples/sec: 2372.76 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:18,388 epoch 37 - iter 3/5 - loss 0.77642459 - time (sec): 4.63 - samples/sec: 2207.86 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:20,361 epoch 37 - iter 4/5 - loss 0.72386461 - time (sec): 6.61 - samples/sec: 2280.27 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:21,850 epoch 37 - iter 5/5 - loss 0.73843103 - time (sec): 8.10 - samples/sec: 1950.64 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:21,852 ----------------------------------------------------------------------------------------------------
2024-11-06 13:57:21,853 EPOCH 37 done: loss 0.7384 - lr: 0.002344


100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

2024-11-06 13:57:23,121 DEV : loss 2.132415771484375 - f1-score (micro avg)  0.0
2024-11-06 13:57:23,128  - 1 epochs without improvement
2024-11-06 13:57:23,129 ----------------------------------------------------------------------------------------------------





2024-11-06 13:57:23,836 epoch 38 - iter 1/5 - loss 0.87756834 - time (sec): 0.70 - samples/sec: 4340.89 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:25,594 epoch 38 - iter 2/5 - loss 0.78925965 - time (sec): 2.46 - samples/sec: 2736.61 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:27,632 epoch 38 - iter 3/5 - loss 0.73199919 - time (sec): 4.50 - samples/sec: 2323.49 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:29,785 epoch 38 - iter 4/5 - loss 0.74817069 - time (sec): 6.65 - samples/sec: 2213.47 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:31,255 epoch 38 - iter 5/5 - loss 0.73994173 - time (sec): 8.12 - samples/sec: 1944.40 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:31,256 ----------------------------------------------------------------------------------------------------
2024-11-06 13:57:31,257 EPOCH 38 done: loss 0.7399 - lr: 0.002344


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

2024-11-06 13:57:32,633 DEV : loss 2.1255035400390625 - f1-score (micro avg)  0.0
2024-11-06 13:57:32,640  - 2 epochs without improvement
2024-11-06 13:57:32,642 ----------------------------------------------------------------------------------------------------





2024-11-06 13:57:33,447 epoch 39 - iter 1/5 - loss 0.68994239 - time (sec): 0.80 - samples/sec: 4658.70 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:35,458 epoch 39 - iter 2/5 - loss 0.68203082 - time (sec): 2.81 - samples/sec: 2571.75 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:37,456 epoch 39 - iter 3/5 - loss 0.75284653 - time (sec): 4.81 - samples/sec: 2116.43 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:39,363 epoch 39 - iter 4/5 - loss 0.76553309 - time (sec): 6.72 - samples/sec: 2156.87 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:40,712 epoch 39 - iter 5/5 - loss 0.73954218 - time (sec): 8.07 - samples/sec: 1957.83 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:40,713 ----------------------------------------------------------------------------------------------------
2024-11-06 13:57:40,714 EPOCH 39 done: loss 0.7395 - lr: 0.002344


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]

2024-11-06 13:57:42,130 DEV : loss 2.1207151412963867 - f1-score (micro avg)  0.0
2024-11-06 13:57:42,136  - 3 epochs without improvement
2024-11-06 13:57:42,138 ----------------------------------------------------------------------------------------------------





2024-11-06 13:57:43,119 epoch 40 - iter 1/5 - loss 0.67088870 - time (sec): 0.98 - samples/sec: 4967.56 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:45,238 epoch 40 - iter 2/5 - loss 0.73165417 - time (sec): 3.10 - samples/sec: 2486.52 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:46,990 epoch 40 - iter 3/5 - loss 0.71299870 - time (sec): 4.85 - samples/sec: 2264.20 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:48,913 epoch 40 - iter 4/5 - loss 0.73057098 - time (sec): 6.77 - samples/sec: 2240.85 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:50,343 epoch 40 - iter 5/5 - loss 0.73959457 - time (sec): 8.20 - samples/sec: 1925.61 - lr: 0.002344 - momentum: 0.000000
2024-11-06 13:57:50,344 ----------------------------------------------------------------------------------------------------
2024-11-06 13:57:50,346 EPOCH 40 done: loss 0.7396 - lr: 0.002344


100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

2024-11-06 13:57:51,642 DEV : loss 2.134614944458008 - f1-score (micro avg)  0.0
2024-11-06 13:57:51,649  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.001171875]
2024-11-06 13:57:51,650 ----------------------------------------------------------------------------------------------------





2024-11-06 13:57:52,760 epoch 41 - iter 1/5 - loss 0.65002278 - time (sec): 1.11 - samples/sec: 5402.23 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:57:54,904 epoch 41 - iter 2/5 - loss 0.73217133 - time (sec): 3.25 - samples/sec: 2624.89 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:57:56,345 epoch 41 - iter 3/5 - loss 0.78384665 - time (sec): 4.69 - samples/sec: 2504.23 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:57:58,166 epoch 41 - iter 4/5 - loss 0.75416224 - time (sec): 6.51 - samples/sec: 2352.68 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:57:59,422 epoch 41 - iter 5/5 - loss 0.74409538 - time (sec): 7.77 - samples/sec: 2033.27 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:57:59,423 ----------------------------------------------------------------------------------------------------
2024-11-06 13:57:59,424 EPOCH 41 done: loss 0.7441 - lr: 0.001172


100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

2024-11-06 13:58:00,668 DEV : loss 2.128782272338867 - f1-score (micro avg)  0.0
2024-11-06 13:58:00,673  - 1 epochs without improvement
2024-11-06 13:58:00,675 ----------------------------------------------------------------------------------------------------





2024-11-06 13:58:01,391 epoch 42 - iter 1/5 - loss 0.85721241 - time (sec): 0.71 - samples/sec: 4834.75 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:03,847 epoch 42 - iter 2/5 - loss 0.76401431 - time (sec): 3.17 - samples/sec: 2530.25 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:06,048 epoch 42 - iter 3/5 - loss 0.72510282 - time (sec): 5.37 - samples/sec: 2173.07 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:07,982 epoch 42 - iter 4/5 - loss 0.74184871 - time (sec): 7.30 - samples/sec: 2098.72 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:09,166 epoch 42 - iter 5/5 - loss 0.74024160 - time (sec): 8.49 - samples/sec: 1860.76 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:09,167 ----------------------------------------------------------------------------------------------------
2024-11-06 13:58:09,168 EPOCH 42 done: loss 0.7402 - lr: 0.001172


100%|██████████| 1/1 [00:01<00:00,  1.18s/it]

2024-11-06 13:58:10,370 DEV : loss 2.118509531021118 - f1-score (micro avg)  0.0
2024-11-06 13:58:10,376  - 2 epochs without improvement
2024-11-06 13:58:10,378 ----------------------------------------------------------------------------------------------------





2024-11-06 13:58:11,089 epoch 43 - iter 1/5 - loss 0.81324014 - time (sec): 0.71 - samples/sec: 4728.58 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:13,003 epoch 43 - iter 2/5 - loss 0.82735853 - time (sec): 2.62 - samples/sec: 2435.03 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:14,864 epoch 43 - iter 3/5 - loss 0.79906166 - time (sec): 4.48 - samples/sec: 2252.71 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:16,947 epoch 43 - iter 4/5 - loss 0.73434042 - time (sec): 6.57 - samples/sec: 2350.29 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:18,467 epoch 43 - iter 5/5 - loss 0.73345223 - time (sec): 8.09 - samples/sec: 1953.24 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:18,468 ----------------------------------------------------------------------------------------------------
2024-11-06 13:58:18,469 EPOCH 43 done: loss 0.7335 - lr: 0.001172


100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

2024-11-06 13:58:19,728 DEV : loss 2.119107961654663 - f1-score (micro avg)  0.0
2024-11-06 13:58:19,735  - 3 epochs without improvement
2024-11-06 13:58:19,736 ----------------------------------------------------------------------------------------------------





2024-11-06 13:58:20,528 epoch 44 - iter 1/5 - loss 0.76611307 - time (sec): 0.79 - samples/sec: 5922.58 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:22,482 epoch 44 - iter 2/5 - loss 0.80331050 - time (sec): 2.74 - samples/sec: 2680.55 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:24,437 epoch 44 - iter 3/5 - loss 0.73153998 - time (sec): 4.70 - samples/sec: 2540.76 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:26,460 epoch 44 - iter 4/5 - loss 0.74337485 - time (sec): 6.72 - samples/sec: 2260.93 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:27,755 epoch 44 - iter 5/5 - loss 0.74436044 - time (sec): 8.02 - samples/sec: 1970.37 - lr: 0.001172 - momentum: 0.000000
2024-11-06 13:58:27,757 ----------------------------------------------------------------------------------------------------
2024-11-06 13:58:27,758 EPOCH 44 done: loss 0.7444 - lr: 0.001172


100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

2024-11-06 13:58:28,987 DEV : loss 2.112769603729248 - f1-score (micro avg)  0.0
2024-11-06 13:58:28,993  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.0005859375]
2024-11-06 13:58:28,994 ----------------------------------------------------------------------------------------------------





2024-11-06 13:58:29,972 epoch 45 - iter 1/5 - loss 0.61570657 - time (sec): 0.98 - samples/sec: 5492.90 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:32,108 epoch 45 - iter 2/5 - loss 0.71259992 - time (sec): 3.11 - samples/sec: 2642.43 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:33,976 epoch 45 - iter 3/5 - loss 0.72905571 - time (sec): 4.98 - samples/sec: 2451.55 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:35,871 epoch 45 - iter 4/5 - loss 0.73200953 - time (sec): 6.87 - samples/sec: 2262.97 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:37,013 epoch 45 - iter 5/5 - loss 0.74391500 - time (sec): 8.02 - samples/sec: 1970.36 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:37,015 ----------------------------------------------------------------------------------------------------
2024-11-06 13:58:37,016 EPOCH 45 done: loss 0.7439 - lr: 0.000586


100%|██████████| 1/1 [00:01<00:00,  1.14s/it]

2024-11-06 13:58:38,180 DEV : loss 2.1099488735198975 - f1-score (micro avg)  0.0
2024-11-06 13:58:38,186  - 1 epochs without improvement
2024-11-06 13:58:38,188 ----------------------------------------------------------------------------------------------------





2024-11-06 13:58:38,989 epoch 46 - iter 1/5 - loss 0.85056008 - time (sec): 0.80 - samples/sec: 3957.28 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:41,076 epoch 46 - iter 2/5 - loss 0.73808907 - time (sec): 2.89 - samples/sec: 2467.16 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:43,147 epoch 46 - iter 3/5 - loss 0.72266673 - time (sec): 4.96 - samples/sec: 2325.47 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:45,051 epoch 46 - iter 4/5 - loss 0.73980067 - time (sec): 6.86 - samples/sec: 2182.47 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:46,253 epoch 46 - iter 5/5 - loss 0.73126899 - time (sec): 8.06 - samples/sec: 1958.90 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:46,254 ----------------------------------------------------------------------------------------------------
2024-11-06 13:58:46,256 EPOCH 46 done: loss 0.7313 - lr: 0.000586


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

2024-11-06 13:58:47,629 DEV : loss 2.1112000942230225 - f1-score (micro avg)  0.0
2024-11-06 13:58:47,636  - 2 epochs without improvement
2024-11-06 13:58:47,637 ----------------------------------------------------------------------------------------------------





2024-11-06 13:58:48,442 epoch 47 - iter 1/5 - loss 0.85574307 - time (sec): 0.80 - samples/sec: 5876.39 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:50,475 epoch 47 - iter 2/5 - loss 0.83291464 - time (sec): 2.84 - samples/sec: 2885.67 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:52,567 epoch 47 - iter 3/5 - loss 0.76611522 - time (sec): 4.93 - samples/sec: 2447.50 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:54,547 epoch 47 - iter 4/5 - loss 0.76049885 - time (sec): 6.91 - samples/sec: 2155.03 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:55,746 epoch 47 - iter 5/5 - loss 0.75064947 - time (sec): 8.11 - samples/sec: 1948.49 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:55,747 ----------------------------------------------------------------------------------------------------
2024-11-06 13:58:55,748 EPOCH 47 done: loss 0.7506 - lr: 0.000586


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]

2024-11-06 13:58:57,134 DEV : loss 2.1093082427978516 - f1-score (micro avg)  0.0
2024-11-06 13:58:57,141  - 3 epochs without improvement
2024-11-06 13:58:57,141 ----------------------------------------------------------------------------------------------------





2024-11-06 13:58:57,855 epoch 48 - iter 1/5 - loss 0.71501855 - time (sec): 0.71 - samples/sec: 4295.86 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:58:59,636 epoch 48 - iter 2/5 - loss 0.81857390 - time (sec): 2.49 - samples/sec: 2719.34 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:59:01,543 epoch 48 - iter 3/5 - loss 0.78956375 - time (sec): 4.40 - samples/sec: 2425.82 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:59:03,798 epoch 48 - iter 4/5 - loss 0.72419318 - time (sec): 6.65 - samples/sec: 2317.39 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:59:05,248 epoch 48 - iter 5/5 - loss 0.74462830 - time (sec): 8.10 - samples/sec: 1948.95 - lr: 0.000586 - momentum: 0.000000
2024-11-06 13:59:05,250 ----------------------------------------------------------------------------------------------------
2024-11-06 13:59:05,251 EPOCH 48 done: loss 0.7446 - lr: 0.000586


100%|██████████| 1/1 [00:01<00:00,  1.22s/it]

2024-11-06 13:59:06,494 DEV : loss 2.1100640296936035 - f1-score (micro avg)  0.0
2024-11-06 13:59:06,500  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.00029296875]
2024-11-06 13:59:06,502 ----------------------------------------------------------------------------------------------------





2024-11-06 13:59:07,218 epoch 49 - iter 1/5 - loss 0.82886000 - time (sec): 0.71 - samples/sec: 4633.05 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:09,117 epoch 49 - iter 2/5 - loss 0.75906067 - time (sec): 2.61 - samples/sec: 2783.15 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:11,375 epoch 49 - iter 3/5 - loss 0.69443105 - time (sec): 4.87 - samples/sec: 2441.34 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:13,455 epoch 49 - iter 4/5 - loss 0.73883810 - time (sec): 6.95 - samples/sec: 2181.54 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:14,658 epoch 49 - iter 5/5 - loss 0.74572380 - time (sec): 8.15 - samples/sec: 1937.13 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:14,659 ----------------------------------------------------------------------------------------------------
2024-11-06 13:59:14,661 EPOCH 49 done: loss 0.7457 - lr: 0.000293


100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

2024-11-06 13:59:15,932 DEV : loss 2.1088993549346924 - f1-score (micro avg)  0.0
2024-11-06 13:59:15,938  - 1 epochs without improvement
2024-11-06 13:59:15,939 ----------------------------------------------------------------------------------------------------





2024-11-06 13:59:16,687 epoch 50 - iter 1/5 - loss 0.73539700 - time (sec): 0.74 - samples/sec: 4700.21 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:18,916 epoch 50 - iter 2/5 - loss 0.77029299 - time (sec): 2.97 - samples/sec: 2334.86 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:20,752 epoch 50 - iter 3/5 - loss 0.80636977 - time (sec): 4.81 - samples/sec: 2134.24 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:22,661 epoch 50 - iter 4/5 - loss 0.75581440 - time (sec): 6.72 - samples/sec: 2116.58 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:24,208 epoch 50 - iter 5/5 - loss 0.73477771 - time (sec): 8.27 - samples/sec: 1910.94 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:24,209 ----------------------------------------------------------------------------------------------------
2024-11-06 13:59:24,210 EPOCH 50 done: loss 0.7348 - lr: 0.000293


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

2024-11-06 13:59:25,713 DEV : loss 2.1093502044677734 - f1-score (micro avg)  0.0
2024-11-06 13:59:25,718  - 2 epochs without improvement
2024-11-06 13:59:25,719 ----------------------------------------------------------------------------------------------------





2024-11-06 13:59:26,203 epoch 51 - iter 1/5 - loss 0.92854000 - time (sec): 0.48 - samples/sec: 5754.80 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:27,667 epoch 51 - iter 2/5 - loss 0.79905493 - time (sec): 1.94 - samples/sec: 3395.70 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:29,922 epoch 51 - iter 3/5 - loss 0.70280014 - time (sec): 4.20 - samples/sec: 3032.92 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:32,251 epoch 51 - iter 4/5 - loss 0.72306667 - time (sec): 6.53 - samples/sec: 2363.90 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:33,375 epoch 51 - iter 5/5 - loss 0.74123042 - time (sec): 7.65 - samples/sec: 2064.03 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:33,376 ----------------------------------------------------------------------------------------------------
2024-11-06 13:59:33,377 EPOCH 51 done: loss 0.7412 - lr: 0.000293


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

2024-11-06 13:59:34,662 DEV : loss 2.1099021434783936 - f1-score (micro avg)  0.0
2024-11-06 13:59:34,669  - 3 epochs without improvement
2024-11-06 13:59:34,671 ----------------------------------------------------------------------------------------------------





2024-11-06 13:59:35,635 epoch 52 - iter 1/5 - loss 0.65854809 - time (sec): 0.96 - samples/sec: 4484.05 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:37,798 epoch 52 - iter 2/5 - loss 0.71275360 - time (sec): 3.13 - samples/sec: 2662.05 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:39,673 epoch 52 - iter 3/5 - loss 0.76376672 - time (sec): 5.00 - samples/sec: 2256.29 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:41,488 epoch 52 - iter 4/5 - loss 0.75014734 - time (sec): 6.81 - samples/sec: 2257.00 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:42,724 epoch 52 - iter 5/5 - loss 0.74154402 - time (sec): 8.05 - samples/sec: 1962.05 - lr: 0.000293 - momentum: 0.000000
2024-11-06 13:59:42,725 ----------------------------------------------------------------------------------------------------
2024-11-06 13:59:42,726 EPOCH 52 done: loss 0.7415 - lr: 0.000293


100%|██████████| 1/1 [00:01<00:00,  1.18s/it]

2024-11-06 13:59:43,927 DEV : loss 2.110234498977661 - f1-score (micro avg)  0.0
2024-11-06 13:59:43,933  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.000146484375]
2024-11-06 13:59:43,935 ----------------------------------------------------------------------------------------------------





2024-11-06 13:59:44,646 epoch 53 - iter 1/5 - loss 0.92008665 - time (sec): 0.71 - samples/sec: 3526.25 - lr: 0.000146 - momentum: 0.000000
2024-11-06 13:59:46,466 epoch 53 - iter 2/5 - loss 0.75327941 - time (sec): 2.53 - samples/sec: 2679.39 - lr: 0.000146 - momentum: 0.000000
2024-11-06 13:59:48,559 epoch 53 - iter 3/5 - loss 0.69886651 - time (sec): 4.62 - samples/sec: 2319.46 - lr: 0.000146 - momentum: 0.000000
2024-11-06 13:59:50,578 epoch 53 - iter 4/5 - loss 0.76622246 - time (sec): 6.64 - samples/sec: 2223.28 - lr: 0.000146 - momentum: 0.000000
2024-11-06 13:59:51,879 epoch 53 - iter 5/5 - loss 0.73377541 - time (sec): 7.94 - samples/sec: 1988.79 - lr: 0.000146 - momentum: 0.000000
2024-11-06 13:59:51,880 ----------------------------------------------------------------------------------------------------
2024-11-06 13:59:51,881 EPOCH 53 done: loss 0.7338 - lr: 0.000146


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]

2024-11-06 13:59:53,367 DEV : loss 2.109262466430664 - f1-score (micro avg)  0.0
2024-11-06 13:59:53,373  - 1 epochs without improvement
2024-11-06 13:59:53,374 ----------------------------------------------------------------------------------------------------





2024-11-06 13:59:54,047 epoch 54 - iter 1/5 - loss 0.93331876 - time (sec): 0.67 - samples/sec: 3681.29 - lr: 0.000146 - momentum: 0.000000
2024-11-06 13:59:55,777 epoch 54 - iter 2/5 - loss 0.81310183 - time (sec): 2.40 - samples/sec: 2356.31 - lr: 0.000146 - momentum: 0.000000
2024-11-06 13:59:57,801 epoch 54 - iter 3/5 - loss 0.79794922 - time (sec): 4.42 - samples/sec: 2207.36 - lr: 0.000146 - momentum: 0.000000
2024-11-06 13:59:59,982 epoch 54 - iter 4/5 - loss 0.72580916 - time (sec): 6.61 - samples/sec: 2315.73 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:01,464 epoch 54 - iter 5/5 - loss 0.73707362 - time (sec): 8.09 - samples/sec: 1952.89 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:01,466 ----------------------------------------------------------------------------------------------------
2024-11-06 14:00:01,467 EPOCH 54 done: loss 0.7371 - lr: 0.000146


100%|██████████| 1/1 [00:01<00:00,  1.21s/it]

2024-11-06 14:00:02,702 DEV : loss 2.1089272499084473 - f1-score (micro avg)  0.0
2024-11-06 14:00:02,708  - 2 epochs without improvement
2024-11-06 14:00:02,709 ----------------------------------------------------------------------------------------------------





2024-11-06 14:00:03,516 epoch 55 - iter 1/5 - loss 0.79659609 - time (sec): 0.80 - samples/sec: 4625.75 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:05,370 epoch 55 - iter 2/5 - loss 0.82839493 - time (sec): 2.66 - samples/sec: 2760.47 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:07,190 epoch 55 - iter 3/5 - loss 0.80336536 - time (sec): 4.48 - samples/sec: 2447.49 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:09,193 epoch 55 - iter 4/5 - loss 0.77315947 - time (sec): 6.48 - samples/sec: 2256.47 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:10,604 epoch 55 - iter 5/5 - loss 0.74129562 - time (sec): 7.89 - samples/sec: 2001.47 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:10,605 ----------------------------------------------------------------------------------------------------
2024-11-06 14:00:10,607 EPOCH 55 done: loss 0.7413 - lr: 0.000146


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]

2024-11-06 14:00:12,098 DEV : loss 2.1088526248931885 - f1-score (micro avg)  0.0
2024-11-06 14:00:12,103  - 3 epochs without improvement
2024-11-06 14:00:12,105 ----------------------------------------------------------------------------------------------------





2024-11-06 14:00:13,074 epoch 56 - iter 1/5 - loss 0.68379631 - time (sec): 0.97 - samples/sec: 4838.72 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:15,240 epoch 56 - iter 2/5 - loss 0.69164966 - time (sec): 3.13 - samples/sec: 2660.49 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:17,232 epoch 56 - iter 3/5 - loss 0.70538785 - time (sec): 5.13 - samples/sec: 2314.55 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:19,102 epoch 56 - iter 4/5 - loss 0.74475010 - time (sec): 6.99 - samples/sec: 2146.40 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:20,267 epoch 56 - iter 5/5 - loss 0.74285322 - time (sec): 8.16 - samples/sec: 1935.78 - lr: 0.000146 - momentum: 0.000000
2024-11-06 14:00:20,268 ----------------------------------------------------------------------------------------------------
2024-11-06 14:00:20,269 EPOCH 56 done: loss 0.7429 - lr: 0.000146


100%|██████████| 1/1 [00:01<00:00,  1.27s/it]

2024-11-06 14:00:21,557 DEV : loss 2.1083991527557373 - f1-score (micro avg)  0.0
2024-11-06 14:00:21,564  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [7.32421875e-05]
2024-11-06 14:00:21,564 ----------------------------------------------------------------------------------------------------
2024-11-06 14:00:21,565 learning rate too small - quitting training!
2024-11-06 14:00:21,566 ----------------------------------------------------------------------------------------------------
2024-11-06 14:00:21,567 Saving model ...





2024-11-06 14:00:22,548 Done.
2024-11-06 14:00:22,549 ----------------------------------------------------------------------------------------------------
2024-11-06 14:00:22,552 Loading model from best epoch ...
2024-11-06 14:00:25,740 SequenceTagger predicts: Dictionary with 45 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, S-STUDY, B-STUDY, E-STUDY, I-STUDY


100%|██████████| 2/2 [00:08<00:00,  4.43s/it]

2024-11-06 14:00:34,924 
Results:
- F-score (micro) 0.0298
- F-score (macro) 0.0028
- Accuracy 0.0153

By class:
              precision    recall  f1-score   support

       SKILL     0.0158    0.5791    0.0308       487
         JOB     0.0000    0.0000    0.0000       130
        WORK     0.0000    0.0000    0.0000       111
     COMPANY     0.0000    0.0000    0.0000        82
         LOC     0.0000    0.0000    0.0000        66
         UNI     0.0000    0.0000    0.0000        43
        NAME     0.0000    0.0000    0.0000        41
       PHONE     0.0000    0.0000    0.0000        41
         DEG     0.0000    0.0000    0.0000        37
       STUDY     0.0000    0.0000    0.0000        34
       EMAIL     0.0000    0.0000    0.0000        30

   micro avg     0.0158    0.2559    0.0298      1102
   macro avg     0.0014    0.0526    0.0028      1102
weighted avg     0.0070    0.2559    0.0136      1102

2024-11-06 14:00:34,925 --------------------------------------------------


  pid, fd = os.forkpty()


cp: './flair_output' and '/kaggle/working/flair_output' are the same file


In [10]:
# evaluate model

from flair.data import Corpus

from flair.datasets import ColumnCorpus

from flair.models import SequenceTagger

from flair.trainers import ModelTrainer



# Load the trained model

model = SequenceTagger.load('/content/drive/MyDrive/FYP/Implementation/flair_output/final-model.pt')



# Evaluate the model on the test set

result = model.evaluate(corpus.test, gold_label_type='ner', mini_batch_size=32)



# Print the results

# print("Evaluation Loss:", eval_loss)

print(result.detailed_results)  # print the precision, recall, and F1-score per entity type

2024-11-06 10:57:38,582 SequenceTagger predicts: Dictionary with 45 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-DEG, B-DEG, E-DEG, I-DEG, S-UNI, B-UNI, E-UNI, I-UNI, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL


100%|██████████| 3/3 [00:03<00:00,  1.29s/it]


Results:
- F-score (micro) 0.563
- F-score (macro) 0.7187
- Accuracy 0.3962

By class:
              precision    recall  f1-score   support

       SKILL     0.3408    0.3862    0.3621       593
         JOB     0.5730    0.7020    0.6310       151
        WORK     0.8188    0.9457    0.8777       129
     COMPANY     0.5172    0.5714    0.5430       105
         LOC     0.5701    0.7176    0.6354        85
         DEG     0.5862    0.7907    0.6733        43
         UNI     0.5660    0.6667    0.6122        45
        NAME     0.9388    0.9787    0.9583        47
       PHONE     1.0000    0.9787    0.9892        47
       EMAIL     0.8222    0.9250    0.8706        40
       STUDY     0.8286    0.6905    0.7532        42

   micro avg     0.5281    0.6029    0.5630      1327
   macro avg     0.6874    0.7594    0.7187      1327
weighted avg     0.5324    0.6029    0.5642      1327






In [11]:
from flair.models import SequenceTagger

from flair.data import Sentence

import spacy

from spacy import displacy



# Load your trained Flair NER model

tagger = SequenceTagger.load('/content/drive/MyDrive/FYP/Implementation/flair_output/best-model.pt')



resume_text = '''

John Doe lives at 1234 Elm Street in Los Angeles, CA 90001. He can be reached at +1 (555) 123-4567 or via email at john.doe@example.com. John is a results-driven software engineer with over 5 years of experience in web development and cloud infrastructure, with strong knowledge of JavaScript, Python, and cloud technologies like AWS and Azure. Currently, he works as a Software Engineer at Google LLC in San Francisco, CA, where he has been employed since August 2019. In this role, he has developed scalable web applications using JavaScript, Node.js, and React, deployed and maintained cloud infrastructure on AWS, reducing downtime by 20%, and led a team of 4 engineers to enhance backend performance by 30%. Previously, he worked as a Junior Developer at Tech Innovators Inc. in Austin, TX, from July 2017 to July 2019, where he created RESTful APIs using Python and Flask, collaborated with front-end developers to build and deploy user-facing applications, and wrote unit and integration tests, improving code coverage by 15%.



John holds a Master of Science in Computer Science from the University of California, Berkeley, with a graduation date of May 2017, and a Bachelor of Science in Information Technology from the University of Texas at Austin, graduated in May 2015. His skillset includes proficiency in programming languages like Python, JavaScript, and Java; frameworks such as React, Flask, and Django; cloud platforms including AWS, Google Cloud, and Azure; as well as other tools like Git, Docker, Kubernetes, and SQL. He is certified as an AWS Certified Solutions Architect – Associate, earned in 2020, and as a Google Professional Cloud Architect, earned in 2021'

'''



# Step 1: Predict entities using Flair

sentence = Sentence(resume_text)

tagger.predict(sentence)



# Step 2: Convert Flair predictions to spaCy doc format

# Initialize a blank spaCy NLP pipeline

nlp = spacy.blank("en")

doc = nlp(resume_text)



# Extract entities from Flair prediction and convert to spaCy format

ents = []

for entity in sentence.get_spans('ner'):

    start, end = entity.start_position, entity.end_position

    label = entity.tag

    span = doc.char_span(start, end, label=label)

    if span is not None:

        ents.append(span)



# Set the entities in the spaCy doc

doc.ents = ents



# Step 3: Visualize using displacy

# Display in Jupyter or a web page

displacy.render(doc, style="ent", jupyter=True)


2024-11-06 10:57:55,781 SequenceTagger predicts: Dictionary with 47 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, <START>, <STOP>




In [12]:
from flair.models import SequenceTagger

from flair.data import Sentence



# Load the pretrained NER model

tagger = SequenceTagger.load("/content/drive/MyDrive/FYP/Implementation/flair_output/best-model.pt")

# Example text

text = "Apple is looking at buying U.K. startup for $1 billion."



# Create a Sentence object

sentence = Sentence(resume_text)



# Predict entities

tagger.predict(sentence)



# Print the detected entities

for entity in sentence.get_spans("ner"):

    print(f"Entity: {entity.text}, Type: {entity.get_label('ner').value}, Confidence: {entity.score}")


2024-11-06 10:58:17,528 SequenceTagger predicts: Dictionary with 47 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, <START>, <STOP>
Entity: Doe lives, Type: NAME, Confidence: 0.16702505946159363
Entity: 1234 Elm, Type: NAME, Confidence: 0.11599351465702057
Entity: Los, Type: DEG, Confidence: 0.18544061481952667
Entity: 90001, Type: DEG, Confidence: 0.14748063683509827
Entity: 555, Type: DEG, Confidence: 0.11017131805419922
Entity: 123-4567, Type: DEG, Confidence: 0.11569119244813919
Entity: via, Type: DEG, Confidence: 0.20629706978797913
Entity: at, Type: EMAIL, Confidence: 0.19365909695625305
Entity: john.doe, Type: JOB, Confidence: 0.10587572306394577
Entity: @, Ty