<a href="https://colab.research.google.com/github/chewzzz1014/fyp/blob/master/ner/src/train_ner_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Train NER Models

In [1]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!mkdir spacy_ner_data

In [3]:
import json
import random
from sklearn.model_selection import train_test_split
import spacy
from spacy.tokens import DocBin

# Load JSON data
with open('/content/drive/MyDrive/FYP/Implementation/Resume Dataset/200_resumes_annotated.json', "r") as f:
    data = json.load(f)

def remove_overlapping_entities(entities):
    """Remove overlapping entities from the list."""
    entities = sorted(entities, key=lambda x: x[0])  # Sort by start position
    non_overlapping = []
    last_end = -1
    for start, end, label in entities:
        if start >= last_end:  # Only add if there's no overlap with the previous entity
            non_overlapping.append((start, end, label))
            last_end = end
    return non_overlapping

# Function to convert JSON data to Spacy's DocBin format
def convert_to_spacy_format(data):
    nlp = spacy.blank("en")  # Load a blank Spacy model
    doc_bin = DocBin()  # Container for our docs

    for item in data:
        text = item['data']['Text']  # Full document text
        entities = []

        for annotation in item['annotations'][0]['result']:
            start = annotation['value']['start']
            end = annotation['value']['end']
            label = annotation['value']['labels'][0]  # Entity label
            entities.append((start, end, label))

        entities = remove_overlapping_entities(entities)  # Remove overlapping entities
        # Create a Spacy doc and add entities to it
        doc = nlp.make_doc(text)
        spans = [doc.char_span(start, end, label=label) for start, end, label in entities]
        # Filter out None spans if Spacy can't align the character indices with tokens
        spans = [span for span in spans if span is not None]
        doc.ents = spans  # Assign entities to the doc
        doc_bin.add(doc)

    return doc_bin

# Split data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Convert train and test sets to Spacy format
train_doc_bin = convert_to_spacy_format(train_data)
test_doc_bin = convert_to_spacy_format(test_data)

# Save the train and test data to .spacy files
train_doc_bin.to_disk("spacy_ner_data/train_data.spacy")
test_doc_bin.to_disk("spacy_ner_data/test_data.spacy")

## Spacy NER

In [4]:
# create base_config.cfg and paste the config generated from spacy widget
# update train and test file path
!touch base_config.cfg

In [5]:
# generate config.cfg from base_config.cfg
!python -m spacy init fill-config base_config.cfg config.cfg

[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [6]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [8]:
# train model using hyperparameters set in config.cfg
# save trained model in spacy-output/ dir

# use cpu
# !python -m spacy train config.cfg --output ./spacy_output

# use gpu
!python -m spacy train config.cfg --gpu-id 0 --output ./spacy_output

!cp -r ./spacy_output /content/drive/MyDrive/FYP/Implementation/

[38;5;4mℹ Saving to output directory: spacy_output[0m
[38;5;4mℹ Using GPU: 0[0m
[1m
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.0001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00    393.52    0.25    0.84    0.15    0.00
  3     500        318.16  39628.63   22.18   50.26   14.23    0.22
  6    1000        218.25  23196.31   37.41   32.47   44.11    0.37
  9    1500        125.29  21066.13   43.77   52.45   37.56    0.44
 12    2000        141.29  19069.86   47.18   61.24   38.38    0.47
 15    2500        144.90  16835.35   49.74   50.23   49.25    0.50
 18    3000        161.53  16130.90   49.34   53.33   45.90    0.49
 21    3500        167.70  14694.07   50.29   53.01   47.84    0.50
 25    4000        176.30  13344.23   50.41   50.60   50.22    0.50
 28    4500        185.72  12757.72 

In [9]:
# evaluate trained model performance
# store output and visualization into result/ dir
!python -m spacy evaluate spacy_output/model-best spacy_ner_data/test_data.spacy -dp spacy_output

[38;5;4mℹ Using CPU[0m
[38;5;4mℹ To switch to GPU 0, use the option: --gpu-id 0[0m
[1m

TOK     100.00
NER P   53.64 
NER R   48.88 
NER F   51.15 
SPEED   1175  

[1m

                 P       R       F
NAME         97.06   89.19   92.96
LOC          48.57   58.62   53.12
PHONE       100.00   94.74   97.30
EMAIL        85.29   90.62   87.88
SKILL        32.52   26.28   29.07
WORK PER     84.71   94.33   89.26
JOB          66.06   45.86   54.14
STUDY PER    60.38   78.05   68.09
COMPANY      59.30   46.79   52.31
UNI          39.06   52.08   44.64
DEG          62.50   74.47   67.96

<IPython.core.display.HTML object>
Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/spacy/__main__.py", line 4, in <module>
    setup_cli()
  File "/usr/local/lib

In [10]:
# make prediction
import spacy
resume_text = '''
John Doe lives at 1234 Elm Street in Los Angeles, CA 90001. He can be reached at +1 (555) 123-4567 or via email at john.doe@example.com. John is a results-driven software engineer with over 5 years of experience in web development and cloud infrastructure, with strong knowledge of JavaScript, Python, and cloud technologies like AWS and Azure. Currently, he works as a Software Engineer at Google LLC in San Francisco, CA, where he has been employed since August 2019. In this role, he has developed scalable web applications using JavaScript, Node.js, and React, deployed and maintained cloud infrastructure on AWS, reducing downtime by 20%, and led a team of 4 engineers to enhance backend performance by 30%. Previously, he worked as a Junior Developer at Tech Innovators Inc. in Austin, TX, from July 2017 to July 2019, where he created RESTful APIs using Python and Flask, collaborated with front-end developers to build and deploy user-facing applications, and wrote unit and integration tests, improving code coverage by 15%.

John holds a Master of Science in Computer Science from the University of California, Berkeley, with a graduation date of May 2017, and a Bachelor of Science in Information Technology from the University of Texas at Austin, graduated in May 2015. His skillset includes proficiency in programming languages like Python, JavaScript, and Java; frameworks such as React, Flask, and Django; cloud platforms including AWS, Google Cloud, and Azure; as well as other tools like Git, Docker, Kubernetes, and SQL. He is certified as an AWS Certified Solutions Architect – Associate, earned in 2020, and as a Google Professional Cloud Architect, earned in 2021'
'''
nlp = spacy.load("spacy_output/model-best")
doc = nlp(resume_text.lower())

print(doc.ents)

for ent in doc.ents:
    print(f"{ent.text}: {ent.label_}")



(john doe, at 1234, in los, (555) 123, john.doe@example.com, software engineer, software engineer, at google llc, at tech innovators inc, july 2017 to july 2019, master of science in computer science, from the university, berkeley, bachelor of science, the university, may 2015, python, associate,)
john doe: NAME
at 1234: PHONE
in los: LOC
(555) 123: PHONE
john.doe@example.com: WORK PER
software engineer: JOB
software engineer: JOB
at google llc: COMPANY
at tech innovators inc: COMPANY
july 2017 to july 2019: STUDY PER
master of science in computer science: DEG
from the university: UNI
berkeley: LOC
bachelor of science: DEG
the university: UNI
may 2015: STUDY PER
python: SKILL
associate,: UNI


In [11]:
from spacy import displacy
displacy.render(doc, style="ent", jupyter=True)

## Flair NER

In [None]:
!pip install flair



In [None]:
# convert spacy data into flair data
import spacy
from spacy.tokens import DocBin
import os

def convert_spacy_to_flair(input_file, output_file):
    """
    Convert SpaCy binary format to Flair's CoNLL format.

    Args:
        input_file (str): Path to SpaCy binary file (.spacy)
        output_file (str): Path to output file for Flair format
    """
    # Load spaCy model
    nlp = spacy.blank("en")

    # Load the DocBin
    doc_bin = DocBin().from_disk(input_file)
    docs = list(doc_bin.get_docs(nlp.vocab))

    with open(output_file, 'w', encoding='utf-8') as f:
        for doc in docs:
            tokens = [(t.text, t.ent_iob_, t.ent_type_) for t in doc]

            # Write tokens in CoNLL format
            for token in tokens:
                text, iob, ent_type = token

                # Convert spaCy IOB to CoNLL format
                if iob == 'O':
                    tag = 'O'
                else:
                    tag = f'{iob}-{ent_type}' if ent_type else 'O'

                # Write line: token and NER tag
                f.write(f'{text} {tag}\n')

            # Empty line between sentences
            f.write('\n')

def convert_spacy_json_to_flair(input_file, output_file):
    """
    Convert SpaCy JSON format to Flair's CoNLL format.

    Args:
        input_file (str): Path to JSON file with SpaCy annotations
        output_file (str): Path to output file for Flair format
    """
    import json

    nlp = spacy.blank("en")

    with open(input_file, 'r', encoding='utf-8') as f:
        training_data = json.load(f)

    with open(output_file, 'w', encoding='utf-8') as f:
        for example in training_data:
            text = example['text']
            ents = example.get('entities', [])

            # Create a spaCy doc
            doc = nlp(text)

            # Add entities to doc
            spans = []
            for start, end, label in ents:
                span = doc.char_span(start, end, label=label)
                if span is not None:
                    spans.append(span)
            doc.ents = spans

            # Convert to CoNLL format
            tokens = [(t.text, t.ent_iob_, t.ent_type_) for t in doc]

            for token in tokens:
                text, iob, ent_type = token
                if iob == 'O':
                    tag = 'O'
                else:
                    tag = f'{iob}-{ent_type}' if ent_type else 'O'
                f.write(f'{text} {tag}\n')

            f.write('\n')

# Example usage for JSON format
flair_train_json = "flair_train.txt"
flair_test_json = "flair_test.txt"

convert_spacy_to_flair('/content/spacy_ner_data/train_data.spacy', flair_train_json)
convert_spacy_to_flair('/content/spacy_ner_data/test_data.spacy', flair_test_json)

FileNotFoundError: [Errno 2] No such file or directory: '/content/spacy_ner_data/train_data.spacy'

In [None]:
# convert spacy data into flair data
import spacy
from spacy.training import Corpus

!python -m spacy download de_core_news_sm
nlp = spacy.load("de_core_news_sm")
corpus = Corpus("/content/spacy_ner_data/test_data.spacy")

data = corpus(nlp)

# Flair supports BIO and BIOES, see https://github.com/flairNLP/flair/issues/875
def rename_biluo_to_bioes(old_tag):
    new_tag = ""
    try:
        if old_tag.startswith("L"):
            new_tag = "E" + old_tag[1:]
        elif old_tag.startswith("U"):
            new_tag = "S" + old_tag[1:]
        else:
            new_tag = old_tag
    except:
        pass
    return new_tag


def generate_corpus():
    corpus = []
    n_ex = 0
    for example in data:
        n_ex += 1
        text = example.text
        doc = nlp(text)
        tags = example.get_aligned_ner()
        # Check if it's an empty list of NER tags.
        if None in tags:
            pass
        else:
            new_tags = [rename_biluo_to_bioes(tag) for tag in tags]
            for token, tag in zip(doc,new_tags):
                row = token.text +' '+ token.pos_ +' ' +tag + '\n'
                corpus.append(row)
            corpus.append('\n')
    return corpus

def write_file(filepath):
    with open(filepath, 'w', encoding='utf-8') as f:
        corpus = generate_corpus()
        f.writelines(corpus)

def main():
    write_file('flair_test.txt')

main()

Collecting de-core-news-sm==3.7.0
  Using cached https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.7.0/de_core_news_sm-3.7.0-py3-none-any.whl (14.6 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('de_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# convert json into flair data
import json
import random
from typing import List, Dict, Tuple
import spacy
from collections import defaultdict

class NERConverter:
    def __init__(self):
        self.nlp = spacy.load("en_core_web_sm")

    def get_bioes_label(self, token_index: int, entity_length: int, current_position: int, label: str) -> str:
        """
        Convert to BIOES format
        - S-: Single token entity
        - B-: Beginning of multi-token entity
        - I-: Inside of multi-token entity
        - E-: End of multi-token entity
        - O: Outside
        """
        if entity_length == 1:
            return f'S-{label}'
        if current_position == 0:
            return f'B-{label}'
        if current_position == entity_length - 1:
            return f'E-{label}'
        return f'I-{label}'

    def convert_to_bioes_format(self, json_data: List[dict]) -> List[List[Tuple[str, str]]]:
        """Convert JSON annotations to BIOES format."""
        all_sentences = []

        for item in json_data:
            text = item['data']['Text']
            doc = self.nlp(text)

            # Initialize character-level labels
            char_labels = ['O'] * len(text)

            # First pass: identify entity boundaries and lengths
            entity_spans = []
            if item['annotations'] and len(item['annotations']) > 0:
                for ann in item['annotations'][0]['result']:
                    if 'value' in ann:
                        start = ann['value']['start']
                        end = ann['value']['end']
                        label = ann['value']['labels'][0]
                        entity_spans.append((start, end, label))

            # Sort spans by start position
            entity_spans.sort(key=lambda x: x[0])

            # Second pass: apply BIOES labels
            for start, end, label in entity_spans:
                # Get tokens that are part of this entity
                entity_text = text[start:end]
                entity_doc = self.nlp(entity_text)
                entity_length = len([token for token in entity_doc if not token.is_space])

                # Set labels for the entire span
                current_token_idx = 0
                for i in range(start, end):
                    if i == start or text[i-1].isspace():
                        char_labels[i] = self.get_bioes_label(i, entity_length, current_token_idx, label)
                        current_token_idx += 1
                    else:
                        char_labels[i] = char_labels[i-1]

            # Convert to token-level labels
            current_sentence = []
            for sent in doc.sents:
                for token in sent:
                    # Get the most common label for the token's characters
                    token_chars_labels = char_labels[token.idx:token.idx + len(token.text)]
                    label_counts = defaultdict(int)
                    for char_label in token_chars_labels:
                        label_counts[char_label] += 1

                    token_label = max(label_counts.items(), key=lambda x: x[1])[0]
                    current_sentence.append((token.text, token_label))

                if current_sentence:
                    all_sentences.append(current_sentence)
                    current_sentence = []

        return all_sentences

    def write_flair_file(self, sentences: List[List[Tuple[str, str]]], filename: str):
        """Write sentences in BIOES format to file."""
        with open(filename, 'w', encoding='utf-8') as f:
            for sentence in sentences:
                for token, label in sentence:
                    f.write(f'{token} {label}\n')
                f.write('\n')

    def convert_and_split(self, json_data: List[dict], train_file: str, test_file: str, test_ratio: float = 0.2):
        """Convert JSON to BIOES format and split into train/test sets."""
        all_sentences = self.convert_to_bioes_format(json_data)

        # Shuffle and split
        random.shuffle(all_sentences)
        split_idx = int(len(all_sentences) * (1 - test_ratio))

        train_sentences = all_sentences[:split_idx]
        test_sentences = all_sentences[split_idx:]

        # Write to files
        self.write_flair_file(train_sentences, train_file)
        self.write_flair_file(test_sentences, test_file)

        return len(train_sentences), len(test_sentences)

def main():
    # Load JSON data
    with open('/content/drive/MyDrive/FYP/Implementation/Resume Dataset/200_resumes_annotated.json', 'r', encoding='utf-8') as f:
        json_data = json.load(f)

    # Convert and split data
    converter = NERConverter()
    train_count, test_count = converter.convert_and_split(
        json_data,
        train_file='flair_train.txt',
        test_file='flair_test.txt',
        test_ratio=0.2
    )

    print(f'Created {train_count} training sentences and {test_count} test sentences')

main()

Created 298 training sentences and 75 test sentences


In [None]:
from flair.data import Corpus
from flair.datasets import ColumnCorpus

# Define columns for CoNLL (0: word, 1: label)
columns = {0: 'text', 1: 'ner'}

# Set data folder and file names
data_folder = './'
train_file = 'flair_train.txt'
test_file = 'flair_test.txt'

# Load the corpus
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file=train_file,
                              test_file=test_file,
                              dev_file=None)

2024-11-06 10:22:39,922 Reading data from .
2024-11-06 10:22:39,923 Train: flair_train.txt
2024-11-06 10:22:39,927 Dev: None
2024-11-06 10:22:39,928 Test: flair_test.txt
2024-11-06 10:22:41,745 No dev split found. Using 10% (i.e. 30 samples) of the train split as dev data


In [None]:
tag_dictionary = corpus.make_label_dictionary(label_type='ner')
print("Labels:", tag_dictionary.get_items())

2024-11-06 10:22:43,986 Computing label dictionary. Progress:


0it [00:00, ?it/s]
268it [00:00, 12655.92it/s]

2024-11-06 10:22:44,035 Dictionary created for label 'ner' with 11 values: SKILL (seen 2196 times), JOB (seen 520 times), WORK (seen 445 times), COMPANY (seen 353 times), LOC (seen 221 times), DEG (seen 154 times), UNI (seen 152 times), STUDY (seen 133 times), NAME (seen 133 times), PHONE (seen 132 times), EMAIL (seen 99 times)
Labels: ['SKILL', 'JOB', 'WORK', 'COMPANY', 'LOC', 'DEG', 'UNI', 'STUDY', 'NAME', 'PHONE', 'EMAIL']





In [None]:
from collections import Counter

def count_labels(file_path):
    with open(file_path, 'r') as file:
        labels = [line.split()[-1] for line in file if line.strip()]
    return Counter(labels)

print("Train label distribution:", count_labels('flair_train.txt'))
print("Test label distribution:", count_labels('flair_test.txt'))

Train label distribution: Counter({'O': 71084, 'S-SKILL': 1461, 'PER': 1178, 'B-SKILL': 950, 'E-SKILL': 946, 'E-JOB': 521, 'B-JOB': 511, 'E-COMPANY': 340, 'B-COMPANY': 333, 'I-JOB': 283, 'I-DEG': 268, 'I-COMPANY': 254, 'I-SKILL': 191, 'E-UNI': 170, 'B-UNI': 169, 'B-DEG': 160, 'E-DEG': 160, 'S-LOC': 147, 'B-NAME': 144, 'E-NAME': 144, 'I-UNI': 127, 'E-PHONE': 116, 'B-PHONE': 114, 'S-EMAIL': 108, 'B-LOC': 101, 'E-LOC': 101, 'I-PHONE': 82, 'S-COMPANY': 63, 'S-JOB': 62, 'S-PHONE': 23, 'I-NAME': 10, 'S-DEG': 6, 'S-UNI': 2, 'I-LOC': 1, 'B-EMAIL': 1, 'E-EMAIL': 1})
Test label distribution: Counter({'O': 18009, 'PER': 333, 'S-SKILL': 330, 'B-SKILL': 263, 'E-SKILL': 263, 'E-JOB': 138, 'B-JOB': 136, 'B-COMPANY': 89, 'E-COMPANY': 87, 'I-JOB': 74, 'I-DEG': 66, 'I-COMPANY': 61, 'S-LOC': 58, 'B-NAME': 47, 'E-NAME': 47, 'I-SKILL': 45, 'B-UNI': 45, 'E-UNI': 45, 'I-UNI': 44, 'E-DEG': 42, 'B-DEG': 41, 'B-PHONE': 41, 'E-PHONE': 41, 'S-EMAIL': 40, 'B-LOC': 27, 'E-LOC': 27, 'I-PHONE': 24, 'S-COMPANY': 16, '

In [None]:
# create NER tagger
from flair.embeddings import WordEmbeddings, StackedEmbeddings, TransformerWordEmbeddings, FlairEmbeddings
from flair.models import SequenceTagger

# using LSTM-CRF on top of frozen embeddings
# combine flair and glove embeddings
# embeddings = StackedEmbeddings([
#                 WordEmbeddings('glove'),
#                 FlairEmbeddings('news-forward'),
#                 FlairEmbeddings('news-backward'),
#             ])
# tagger = SequenceTagger(hidden_size=256,
#                          embeddings=embeddings,
#                          tag_dictionary=tag_dictionary,
#                          tag_type='ner',
#                          use_crf=True,
#                          tag_format = 'BIOES')

# using transformer embedding
# embeddings = TransformerWordEmbeddings('bert-base-uncased',
#                                       fine_tune=True,
#                                       layers='-1',
#                                       subtoken_pooling='first')
embeddings = TransformerWordEmbeddings(
    'roberta-base',  # or 'bert-base-uncased'
    fine_tune=True,
    layers='-1,-2,-3,-4',  # Use last 4 layers
    subtoken_pooling='first',
    allow_long_sentences=True
)
tagger = SequenceTagger(hidden_size=256,
                         embeddings=embeddings,
                         tag_dictionary=tag_dictionary,
                         tag_type='ner',
                         use_crf=False,
                         use_rnn=False,
                         reproject_embeddings=False,
                         tag_format = 'BIOES')



2024-11-06 10:59:16,123 SequenceTagger predicts: Dictionary with 45 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-DEG, B-DEG, E-DEG, I-DEG, S-UNI, B-UNI, E-UNI, I-UNI, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL


In [None]:
# train flair ner model
from flair.trainers import ModelTrainer
from flair.training_utils import EvaluationMetric

trainer = ModelTrainer(tagger, corpus)

trainer.train(
    base_path='flair_output/',
    learning_rate=0.01,
    mini_batch_size=4,
    max_epochs=50,
    train_with_dev=False
)
!cp -r ./flair_output /content/drive/MyDrive/FYP/Implementation/

2024-11-06 10:23:02,204 ----------------------------------------------------------------------------------------------------
2024-11-06 10:23:02,207 Model: "SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50266, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2024-11-06 10:23:05,898 epoch 1 - iter 6/67 - loss 2.77562790 - time (sec): 3.66 - samples/sec: 1991.31 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:07,877 epoch 1 - iter 12/67 - loss 2.17279687 - time (sec): 5.64 - samples/sec: 2162.44 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:10,314 epoch 1 - iter 18/67 - loss 1.73979483 - time (sec): 8.08 - samples/sec: 2274.98 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:13,007 epoch 1 - iter 24/67 - loss 1.49670987 - time (sec): 10.77 - samples/sec: 2281.43 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:15,268 epoch 1 - iter 30/67 - loss 1.40065300 - time (sec): 13.03 - samples/sec: 2273.90 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:17,923 epoch 1 - iter 36/67 - loss 1.34974097 - time (sec): 15.69 - samples/sec: 2286.83 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:20,103 epoch 1 - iter 42/67 - loss 1.27069429 - time (sec): 17.87 - samples/sec: 2298.39 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:02<00:00,  2.93s/it]

2024-11-06 10:23:34,284 DEV : loss 0.4303983449935913 - f1-score (micro avg)  0.0
2024-11-06 10:23:34,304  - 0 epochs without improvement
2024-11-06 10:23:34,305 ----------------------------------------------------------------------------------------------------





2024-11-06 10:23:36,425 epoch 2 - iter 6/67 - loss 0.77140476 - time (sec): 2.12 - samples/sec: 2197.95 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:38,579 epoch 2 - iter 12/67 - loss 0.83176617 - time (sec): 4.27 - samples/sec: 2349.66 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:42,071 epoch 2 - iter 18/67 - loss 0.73596175 - time (sec): 7.76 - samples/sec: 2328.55 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:44,358 epoch 2 - iter 24/67 - loss 0.74539854 - time (sec): 10.05 - samples/sec: 2449.61 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:46,695 epoch 2 - iter 30/67 - loss 0.73309234 - time (sec): 12.39 - samples/sec: 2409.24 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:49,279 epoch 2 - iter 36/67 - loss 0.70209214 - time (sec): 14.97 - samples/sec: 2352.57 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:23:52,171 epoch 2 - iter 42/67 - loss 0.71382846 - time (sec): 17.86 - samples/sec: 2394.95 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:07<00:00,  7.91s/it]

2024-11-06 10:24:12,209 DEV : loss 0.4133286774158478 - f1-score (micro avg)  0.0
2024-11-06 10:24:12,230  - 0 epochs without improvement
2024-11-06 10:24:12,231 ----------------------------------------------------------------------------------------------------





2024-11-06 10:24:15,230 epoch 3 - iter 6/67 - loss 0.57467712 - time (sec): 3.00 - samples/sec: 2489.91 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:24:18,605 epoch 3 - iter 12/67 - loss 0.58675001 - time (sec): 6.37 - samples/sec: 2250.57 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:24:21,844 epoch 3 - iter 18/67 - loss 0.56801475 - time (sec): 9.61 - samples/sec: 2192.43 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:24:24,249 epoch 3 - iter 24/67 - loss 0.59568322 - time (sec): 12.01 - samples/sec: 2187.08 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:24:26,879 epoch 3 - iter 30/67 - loss 0.61023264 - time (sec): 14.64 - samples/sec: 2192.54 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:24:29,312 epoch 3 - iter 36/67 - loss 0.62346563 - time (sec): 17.08 - samples/sec: 2196.79 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:24:31,820 epoch 3 - iter 42/67 - loss 0.61800989 - time (sec): 19.58 - samples/sec: 2220.39 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:08<00:00,  8.78s/it]

2024-11-06 10:24:51,635 DEV : loss 0.40528959035873413 - f1-score (micro avg)  0.0
2024-11-06 10:24:51,655  - 0 epochs without improvement
2024-11-06 10:24:51,656 ----------------------------------------------------------------------------------------------------





2024-11-06 10:24:53,668 epoch 4 - iter 6/67 - loss 0.58511808 - time (sec): 2.01 - samples/sec: 2568.56 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:24:56,654 epoch 4 - iter 12/67 - loss 0.61907580 - time (sec): 4.99 - samples/sec: 2386.61 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:24:59,054 epoch 4 - iter 18/67 - loss 0.60025711 - time (sec): 7.39 - samples/sec: 2339.38 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:02,582 epoch 4 - iter 24/67 - loss 0.57515673 - time (sec): 10.92 - samples/sec: 2338.20 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:06,140 epoch 4 - iter 30/67 - loss 0.55029776 - time (sec): 14.48 - samples/sec: 2250.78 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:09,162 epoch 4 - iter 36/67 - loss 0.56295548 - time (sec): 17.50 - samples/sec: 2210.01 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:11,904 epoch 4 - iter 42/67 - loss 0.55711580 - time (sec): 20.24 - samples/sec: 2224.36 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:08<00:00,  8.05s/it]

2024-11-06 10:25:30,432 DEV : loss 0.39869266748428345 - f1-score (micro avg)  0.0
2024-11-06 10:25:30,468  - 0 epochs without improvement
2024-11-06 10:25:30,470 ----------------------------------------------------------------------------------------------------





2024-11-06 10:25:32,297 epoch 5 - iter 6/67 - loss 0.65070348 - time (sec): 1.82 - samples/sec: 2181.01 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:35,272 epoch 5 - iter 12/67 - loss 0.61272730 - time (sec): 4.80 - samples/sec: 1973.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:38,132 epoch 5 - iter 18/67 - loss 0.58559587 - time (sec): 7.66 - samples/sec: 2107.67 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:41,209 epoch 5 - iter 24/67 - loss 0.58730065 - time (sec): 10.73 - samples/sec: 2178.18 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:44,109 epoch 5 - iter 30/67 - loss 0.57515688 - time (sec): 13.63 - samples/sec: 2177.37 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:46,558 epoch 5 - iter 36/67 - loss 0.57917169 - time (sec): 16.08 - samples/sec: 2145.92 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:25:49,504 epoch 5 - iter 42/67 - loss 0.57317858 - time (sec): 19.03 - samples/sec: 2149.62 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:06<00:00,  6.87s/it]

2024-11-06 10:26:09,024 DEV : loss 0.39459559321403503 - f1-score (micro avg)  0.0
2024-11-06 10:26:09,044  - 0 epochs without improvement
2024-11-06 10:26:09,045 ----------------------------------------------------------------------------------------------------





2024-11-06 10:26:11,877 epoch 6 - iter 6/67 - loss 0.51458286 - time (sec): 2.83 - samples/sec: 2323.06 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:26:14,499 epoch 6 - iter 12/67 - loss 0.53442605 - time (sec): 5.45 - samples/sec: 2367.53 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:26:18,324 epoch 6 - iter 18/67 - loss 0.52206453 - time (sec): 9.27 - samples/sec: 2261.40 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:26:21,893 epoch 6 - iter 24/67 - loss 0.49083187 - time (sec): 12.84 - samples/sec: 2229.04 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:26:24,989 epoch 6 - iter 30/67 - loss 0.50693285 - time (sec): 15.94 - samples/sec: 2203.29 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:26:28,190 epoch 6 - iter 36/67 - loss 0.49294709 - time (sec): 19.14 - samples/sec: 2213.73 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:26:30,981 epoch 6 - iter 42/67 - loss 0.49144749 - time (sec): 21.93 - samples/sec: 2154.64 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:07<00:00,  7.30s/it]

2024-11-06 10:26:48,424 DEV : loss 0.394151508808136 - f1-score (micro avg)  0.0
2024-11-06 10:26:48,446  - 0 epochs without improvement
2024-11-06 10:26:48,448 ----------------------------------------------------------------------------------------------------





2024-11-06 10:26:51,818 epoch 7 - iter 6/67 - loss 0.42566419 - time (sec): 3.37 - samples/sec: 2001.60 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:26:54,959 epoch 7 - iter 12/67 - loss 0.48993261 - time (sec): 6.51 - samples/sec: 2179.51 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:26:57,513 epoch 7 - iter 18/67 - loss 0.47972956 - time (sec): 9.06 - samples/sec: 2124.31 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:00,356 epoch 7 - iter 24/67 - loss 0.46290484 - time (sec): 11.90 - samples/sec: 2196.30 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:02,674 epoch 7 - iter 30/67 - loss 0.47832040 - time (sec): 14.22 - samples/sec: 2217.09 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:04,540 epoch 7 - iter 36/67 - loss 0.48322730 - time (sec): 16.09 - samples/sec: 2222.50 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:07,814 epoch 7 - iter 42/67 - loss 0.47518693 - time (sec): 19.36 - samples/sec: 2194.91 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:08<00:00,  8.52s/it]

2024-11-06 10:27:29,142 DEV : loss 0.38722071051597595 - f1-score (micro avg)  0.0
2024-11-06 10:27:29,162  - 0 epochs without improvement
2024-11-06 10:27:29,163 ----------------------------------------------------------------------------------------------------





2024-11-06 10:27:31,707 epoch 8 - iter 6/67 - loss 0.44391930 - time (sec): 2.54 - samples/sec: 2268.80 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:34,288 epoch 8 - iter 12/67 - loss 0.41668351 - time (sec): 5.12 - samples/sec: 2304.50 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:37,632 epoch 8 - iter 18/67 - loss 0.44704208 - time (sec): 8.46 - samples/sec: 2269.46 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:40,599 epoch 8 - iter 24/67 - loss 0.43720343 - time (sec): 11.43 - samples/sec: 2231.19 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:42,788 epoch 8 - iter 30/67 - loss 0.42789018 - time (sec): 13.62 - samples/sec: 2273.40 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:46,787 epoch 8 - iter 36/67 - loss 0.43655424 - time (sec): 17.62 - samples/sec: 2162.47 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:27:49,542 epoch 8 - iter 42/67 - loss 0.43640730 - time (sec): 20.37 - samples/sec: 2143.14 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:08<00:00,  8.63s/it]

2024-11-06 10:28:09,831 DEV : loss 0.3860132396221161 - f1-score (micro avg)  0.0
2024-11-06 10:28:09,852  - 0 epochs without improvement
2024-11-06 10:28:09,853 ----------------------------------------------------------------------------------------------------





2024-11-06 10:28:11,738 epoch 9 - iter 6/67 - loss 0.44525067 - time (sec): 1.88 - samples/sec: 2497.00 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:28:14,343 epoch 9 - iter 12/67 - loss 0.51129143 - time (sec): 4.48 - samples/sec: 2147.17 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:28:17,433 epoch 9 - iter 18/67 - loss 0.44145114 - time (sec): 7.58 - samples/sec: 2198.72 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:28:21,074 epoch 9 - iter 24/67 - loss 0.43254460 - time (sec): 11.22 - samples/sec: 2161.48 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:28:24,756 epoch 9 - iter 30/67 - loss 0.43086096 - time (sec): 14.90 - samples/sec: 2195.16 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:28:26,938 epoch 9 - iter 36/67 - loss 0.44751996 - time (sec): 17.08 - samples/sec: 2150.65 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:28:29,324 epoch 9 - iter 42/67 - loss 0.44458339 - time (sec): 19.47 - samples/sec: 2194.58 - lr: 0.010000 - momentum: 0.000000
2024-11-06

100%|██████████| 1/1 [00:08<00:00,  8.61s/it]

2024-11-06 10:28:49,516 DEV : loss 0.3823254406452179 - f1-score (micro avg)  0.0
2024-11-06 10:28:49,536  - 0 epochs without improvement
2024-11-06 10:28:49,538 ----------------------------------------------------------------------------------------------------





2024-11-06 10:28:52,405 epoch 10 - iter 6/67 - loss 0.36262316 - time (sec): 2.86 - samples/sec: 2250.60 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:28:55,325 epoch 10 - iter 12/67 - loss 0.41264979 - time (sec): 5.78 - samples/sec: 2260.49 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:28:58,146 epoch 10 - iter 18/67 - loss 0.40489271 - time (sec): 8.60 - samples/sec: 2197.55 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:00,295 epoch 10 - iter 24/67 - loss 0.43732712 - time (sec): 10.75 - samples/sec: 2140.58 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:02,580 epoch 10 - iter 30/67 - loss 0.44299041 - time (sec): 13.04 - samples/sec: 2147.01 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:05,390 epoch 10 - iter 36/67 - loss 0.42699736 - time (sec): 15.85 - samples/sec: 2163.67 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:08,255 epoch 10 - iter 42/67 - loss 0.41881314 - time (sec): 18.71 - samples/sec: 2206.12 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.98s/it]

2024-11-06 10:29:28,881 DEV : loss 0.3812136650085449 - f1-score (micro avg)  0.0
2024-11-06 10:29:28,920  - 0 epochs without improvement
2024-11-06 10:29:28,923 ----------------------------------------------------------------------------------------------------





2024-11-06 10:29:31,212 epoch 11 - iter 6/67 - loss 0.40736219 - time (sec): 2.29 - samples/sec: 2272.79 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:34,909 epoch 11 - iter 12/67 - loss 0.39666565 - time (sec): 5.98 - samples/sec: 2028.53 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:38,159 epoch 11 - iter 18/67 - loss 0.38803965 - time (sec): 9.23 - samples/sec: 2043.29 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:40,620 epoch 11 - iter 24/67 - loss 0.38686238 - time (sec): 11.69 - samples/sec: 2060.30 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:42,371 epoch 11 - iter 30/67 - loss 0.39812015 - time (sec): 13.45 - samples/sec: 2098.01 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:45,472 epoch 11 - iter 36/67 - loss 0.38657212 - time (sec): 16.55 - samples/sec: 2177.50 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:29:49,088 epoch 11 - iter 42/67 - loss 0.36768182 - time (sec): 20.16 - samples/sec: 2154.59 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:06<00:00,  6.67s/it]

2024-11-06 10:30:08,088 DEV : loss 0.3779582977294922 - f1-score (micro avg)  0.0
2024-11-06 10:30:08,108  - 0 epochs without improvement
2024-11-06 10:30:08,110 ----------------------------------------------------------------------------------------------------





2024-11-06 10:30:11,672 epoch 12 - iter 6/67 - loss 0.38916434 - time (sec): 3.56 - samples/sec: 2248.06 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:30:14,306 epoch 12 - iter 12/67 - loss 0.37601374 - time (sec): 6.19 - samples/sec: 2224.64 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:30:16,196 epoch 12 - iter 18/67 - loss 0.39433180 - time (sec): 8.08 - samples/sec: 2168.36 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:30:19,733 epoch 12 - iter 24/67 - loss 0.35907673 - time (sec): 11.62 - samples/sec: 2199.16 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:30:22,387 epoch 12 - iter 30/67 - loss 0.36888612 - time (sec): 14.27 - samples/sec: 2182.73 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:30:25,578 epoch 12 - iter 36/67 - loss 0.36975731 - time (sec): 17.46 - samples/sec: 2215.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:30:27,864 epoch 12 - iter 42/67 - loss 0.37809066 - time (sec): 19.75 - samples/sec: 2232.35 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.46s/it]

2024-11-06 10:30:46,807 DEV : loss 0.3780912160873413 - f1-score (micro avg)  0.0
2024-11-06 10:30:46,828  - 1 epochs without improvement
2024-11-06 10:30:46,830 ----------------------------------------------------------------------------------------------------





2024-11-06 10:30:49,181 epoch 13 - iter 6/67 - loss 0.31801356 - time (sec): 2.35 - samples/sec: 2615.82 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:30:53,222 epoch 13 - iter 12/67 - loss 0.33458442 - time (sec): 6.39 - samples/sec: 1990.49 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:30:56,832 epoch 13 - iter 18/67 - loss 0.31341011 - time (sec): 10.00 - samples/sec: 2053.34 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:00,360 epoch 13 - iter 24/67 - loss 0.31800181 - time (sec): 13.53 - samples/sec: 2082.03 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:02,747 epoch 13 - iter 30/67 - loss 0.34399705 - time (sec): 15.91 - samples/sec: 2089.50 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:05,720 epoch 13 - iter 36/67 - loss 0.35754709 - time (sec): 18.89 - samples/sec: 2082.29 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:08,206 epoch 13 - iter 42/67 - loss 0.35855524 - time (sec): 21.37 - samples/sec: 2098.65 - lr: 0.010000 - momentum: 0.000000
20

100%|██████████| 1/1 [00:08<00:00,  8.95s/it]

2024-11-06 10:31:28,484 DEV : loss 0.37649959325790405 - f1-score (micro avg)  0.0
2024-11-06 10:31:28,506  - 0 epochs without improvement
2024-11-06 10:31:28,507 ----------------------------------------------------------------------------------------------------





2024-11-06 10:31:31,040 epoch 14 - iter 6/67 - loss 0.37199438 - time (sec): 2.53 - samples/sec: 2332.35 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:33,824 epoch 14 - iter 12/67 - loss 0.35108357 - time (sec): 5.31 - samples/sec: 2245.96 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:36,438 epoch 14 - iter 18/67 - loss 0.36117006 - time (sec): 7.93 - samples/sec: 2271.70 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:39,302 epoch 14 - iter 24/67 - loss 0.36504886 - time (sec): 10.79 - samples/sec: 2273.29 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:42,028 epoch 14 - iter 30/67 - loss 0.35853612 - time (sec): 13.52 - samples/sec: 2268.43 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:44,605 epoch 14 - iter 36/67 - loss 0.35458049 - time (sec): 16.09 - samples/sec: 2237.76 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:31:47,472 epoch 14 - iter 42/67 - loss 0.35518986 - time (sec): 18.96 - samples/sec: 2206.97 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:09<00:00,  9.07s/it]

2024-11-06 10:32:09,754 DEV : loss 0.3755929470062256 - f1-score (micro avg)  0.0
2024-11-06 10:32:09,775  - 0 epochs without improvement
2024-11-06 10:32:09,777 ----------------------------------------------------------------------------------------------------





2024-11-06 10:32:12,748 epoch 15 - iter 6/67 - loss 0.35698417 - time (sec): 2.97 - samples/sec: 1812.65 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:32:15,534 epoch 15 - iter 12/67 - loss 0.35125014 - time (sec): 5.75 - samples/sec: 2058.21 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:32:17,908 epoch 15 - iter 18/67 - loss 0.37380191 - time (sec): 8.13 - samples/sec: 2075.96 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:32:20,732 epoch 15 - iter 24/67 - loss 0.36760100 - time (sec): 10.95 - samples/sec: 2165.64 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:32:24,172 epoch 15 - iter 30/67 - loss 0.34810161 - time (sec): 14.39 - samples/sec: 2117.46 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:32:26,908 epoch 15 - iter 36/67 - loss 0.35141410 - time (sec): 17.13 - samples/sec: 2080.63 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:32:28,918 epoch 15 - iter 42/67 - loss 0.34197326 - time (sec): 19.14 - samples/sec: 2093.99 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:09<00:00,  9.09s/it]

2024-11-06 10:32:51,490 DEV : loss 0.3735407292842865 - f1-score (micro avg)  0.0
2024-11-06 10:32:51,528  - 0 epochs without improvement
2024-11-06 10:32:51,530 ----------------------------------------------------------------------------------------------------





2024-11-06 10:32:55,382 epoch 16 - iter 6/67 - loss 0.28248790 - time (sec): 3.85 - samples/sec: 2536.35 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:32:57,432 epoch 16 - iter 12/67 - loss 0.30309655 - time (sec): 5.90 - samples/sec: 2257.28 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:00,170 epoch 16 - iter 18/67 - loss 0.30602114 - time (sec): 8.64 - samples/sec: 2341.91 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:03,107 epoch 16 - iter 24/67 - loss 0.31249041 - time (sec): 11.57 - samples/sec: 2235.49 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:06,308 epoch 16 - iter 30/67 - loss 0.31081007 - time (sec): 14.77 - samples/sec: 2188.69 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:09,263 epoch 16 - iter 36/67 - loss 0.31546608 - time (sec): 17.73 - samples/sec: 2175.34 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:11,874 epoch 16 - iter 42/67 - loss 0.31382567 - time (sec): 20.34 - samples/sec: 2180.26 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.80s/it]

2024-11-06 10:33:31,291 DEV : loss 0.3734825551509857 - f1-score (micro avg)  0.0
2024-11-06 10:33:31,326  - 0 epochs without improvement
2024-11-06 10:33:31,331 ----------------------------------------------------------------------------------------------------





2024-11-06 10:33:34,742 epoch 17 - iter 6/67 - loss 0.30421015 - time (sec): 3.41 - samples/sec: 1797.22 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:37,666 epoch 17 - iter 12/67 - loss 0.26891061 - time (sec): 6.33 - samples/sec: 2007.70 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:39,880 epoch 17 - iter 18/67 - loss 0.28520406 - time (sec): 8.55 - samples/sec: 2077.10 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:42,052 epoch 17 - iter 24/67 - loss 0.28963744 - time (sec): 10.72 - samples/sec: 2118.78 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:44,959 epoch 17 - iter 30/67 - loss 0.29101780 - time (sec): 13.62 - samples/sec: 2225.98 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:48,243 epoch 17 - iter 36/67 - loss 0.28610378 - time (sec): 16.91 - samples/sec: 2221.91 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:33:51,486 epoch 17 - iter 42/67 - loss 0.29389447 - time (sec): 20.15 - samples/sec: 2210.07 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:06<00:00,  6.99s/it]

2024-11-06 10:34:10,048 DEV : loss 0.372924268245697 - f1-score (micro avg)  0.0
2024-11-06 10:34:10,070  - 0 epochs without improvement
2024-11-06 10:34:10,072 ----------------------------------------------------------------------------------------------------





2024-11-06 10:34:12,793 epoch 18 - iter 6/67 - loss 0.30757578 - time (sec): 2.72 - samples/sec: 2205.88 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:34:15,625 epoch 18 - iter 12/67 - loss 0.28877725 - time (sec): 5.55 - samples/sec: 2086.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:34:17,944 epoch 18 - iter 18/67 - loss 0.30226233 - time (sec): 7.87 - samples/sec: 2120.94 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:34:20,370 epoch 18 - iter 24/67 - loss 0.30072842 - time (sec): 10.29 - samples/sec: 2116.54 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:34:23,722 epoch 18 - iter 30/67 - loss 0.29877130 - time (sec): 13.65 - samples/sec: 2122.24 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:34:26,814 epoch 18 - iter 36/67 - loss 0.30222418 - time (sec): 16.74 - samples/sec: 2130.53 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:34:29,589 epoch 18 - iter 42/67 - loss 0.30393813 - time (sec): 19.51 - samples/sec: 2118.38 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.55s/it]

2024-11-06 10:34:50,109 DEV : loss 0.37193039059638977 - f1-score (micro avg)  0.0
2024-11-06 10:34:50,129  - 0 epochs without improvement
2024-11-06 10:34:50,130 ----------------------------------------------------------------------------------------------------





2024-11-06 10:34:53,090 epoch 19 - iter 6/67 - loss 0.24864569 - time (sec): 2.96 - samples/sec: 2169.87 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:34:55,891 epoch 19 - iter 12/67 - loss 0.30014073 - time (sec): 5.76 - samples/sec: 2034.84 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:34:58,596 epoch 19 - iter 18/67 - loss 0.31445458 - time (sec): 8.46 - samples/sec: 2112.74 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:02,258 epoch 19 - iter 24/67 - loss 0.29949934 - time (sec): 12.12 - samples/sec: 2164.83 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:05,182 epoch 19 - iter 30/67 - loss 0.29677560 - time (sec): 15.05 - samples/sec: 2123.34 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:07,528 epoch 19 - iter 36/67 - loss 0.29260562 - time (sec): 17.39 - samples/sec: 2155.70 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:10,529 epoch 19 - iter 42/67 - loss 0.28596740 - time (sec): 20.39 - samples/sec: 2130.89 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:09<00:00,  9.35s/it]

2024-11-06 10:35:31,515 DEV : loss 0.3706222474575043 - f1-score (micro avg)  0.0
2024-11-06 10:35:31,536  - 0 epochs without improvement
2024-11-06 10:35:31,537 ----------------------------------------------------------------------------------------------------





2024-11-06 10:35:34,229 epoch 20 - iter 6/67 - loss 0.25171112 - time (sec): 2.69 - samples/sec: 2352.40 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:37,177 epoch 20 - iter 12/67 - loss 0.27694611 - time (sec): 5.63 - samples/sec: 2256.96 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:40,126 epoch 20 - iter 18/67 - loss 0.26485310 - time (sec): 8.58 - samples/sec: 2192.40 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:43,333 epoch 20 - iter 24/67 - loss 0.27543082 - time (sec): 11.79 - samples/sec: 2113.87 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:45,750 epoch 20 - iter 30/67 - loss 0.28081041 - time (sec): 14.21 - samples/sec: 2108.09 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:48,582 epoch 20 - iter 36/67 - loss 0.27385075 - time (sec): 17.04 - samples/sec: 2086.09 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:35:51,084 epoch 20 - iter 42/67 - loss 0.26773926 - time (sec): 19.54 - samples/sec: 2079.32 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.72s/it]

2024-11-06 10:36:12,629 DEV : loss 0.3710145950317383 - f1-score (micro avg)  0.0
2024-11-06 10:36:12,651  - 1 epochs without improvement
2024-11-06 10:36:12,653 ----------------------------------------------------------------------------------------------------





2024-11-06 10:36:15,549 epoch 21 - iter 6/67 - loss 0.29421416 - time (sec): 2.89 - samples/sec: 2151.87 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:36:18,091 epoch 21 - iter 12/67 - loss 0.30530143 - time (sec): 5.43 - samples/sec: 2205.15 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:36:20,448 epoch 21 - iter 18/67 - loss 0.30552678 - time (sec): 7.79 - samples/sec: 2268.94 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:36:23,177 epoch 21 - iter 24/67 - loss 0.29138394 - time (sec): 10.52 - samples/sec: 2200.41 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:36:26,340 epoch 21 - iter 30/67 - loss 0.28475667 - time (sec): 13.68 - samples/sec: 2177.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:36:29,115 epoch 21 - iter 36/67 - loss 0.28392111 - time (sec): 16.46 - samples/sec: 2160.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:36:33,210 epoch 21 - iter 42/67 - loss 0.27736038 - time (sec): 20.55 - samples/sec: 2173.83 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.94s/it]

2024-11-06 10:36:53,516 DEV : loss 0.37051740288734436 - f1-score (micro avg)  0.0
2024-11-06 10:36:53,536  - 0 epochs without improvement
2024-11-06 10:36:53,537 ----------------------------------------------------------------------------------------------------





2024-11-06 10:36:55,638 epoch 22 - iter 6/67 - loss 0.29985075 - time (sec): 2.10 - samples/sec: 2587.56 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:36:58,091 epoch 22 - iter 12/67 - loss 0.29631150 - time (sec): 4.55 - samples/sec: 2251.71 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:01,129 epoch 22 - iter 18/67 - loss 0.25997924 - time (sec): 7.59 - samples/sec: 2339.32 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:05,025 epoch 22 - iter 24/67 - loss 0.26236164 - time (sec): 11.48 - samples/sec: 2195.48 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:07,339 epoch 22 - iter 30/67 - loss 0.26024104 - time (sec): 13.80 - samples/sec: 2190.41 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:09,803 epoch 22 - iter 36/67 - loss 0.25624333 - time (sec): 16.26 - samples/sec: 2147.82 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:13,017 epoch 22 - iter 42/67 - loss 0.25865248 - time (sec): 19.48 - samples/sec: 2140.49 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.40s/it]

2024-11-06 10:37:33,904 DEV : loss 0.36912602186203003 - f1-score (micro avg)  0.0
2024-11-06 10:37:33,957  - 0 epochs without improvement
2024-11-06 10:37:33,959 ----------------------------------------------------------------------------------------------------





2024-11-06 10:37:37,126 epoch 23 - iter 6/67 - loss 0.27439863 - time (sec): 3.16 - samples/sec: 1785.25 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:39,263 epoch 23 - iter 12/67 - loss 0.28372295 - time (sec): 5.30 - samples/sec: 1926.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:42,542 epoch 23 - iter 18/67 - loss 0.28119565 - time (sec): 8.58 - samples/sec: 1909.88 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:45,683 epoch 23 - iter 24/67 - loss 0.27949775 - time (sec): 11.72 - samples/sec: 1924.33 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:48,843 epoch 23 - iter 30/67 - loss 0.26095977 - time (sec): 14.88 - samples/sec: 2095.81 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:52,093 epoch 23 - iter 36/67 - loss 0.24700153 - time (sec): 18.13 - samples/sec: 2076.37 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:37:55,485 epoch 23 - iter 42/67 - loss 0.23965479 - time (sec): 21.52 - samples/sec: 2103.23 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:06<00:00,  6.71s/it]

2024-11-06 10:38:13,454 DEV : loss 0.36991575360298157 - f1-score (micro avg)  0.0
2024-11-06 10:38:13,474  - 1 epochs without improvement
2024-11-06 10:38:13,477 ----------------------------------------------------------------------------------------------------





2024-11-06 10:38:16,597 epoch 24 - iter 6/67 - loss 0.24186739 - time (sec): 3.12 - samples/sec: 2466.81 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:38:20,264 epoch 24 - iter 12/67 - loss 0.21730512 - time (sec): 6.78 - samples/sec: 2195.72 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:38:22,882 epoch 24 - iter 18/67 - loss 0.24717872 - time (sec): 9.40 - samples/sec: 2106.85 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:38:25,633 epoch 24 - iter 24/67 - loss 0.23685257 - time (sec): 12.15 - samples/sec: 2206.37 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:38:28,344 epoch 24 - iter 30/67 - loss 0.24366653 - time (sec): 14.86 - samples/sec: 2203.88 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:38:30,793 epoch 24 - iter 36/67 - loss 0.24289271 - time (sec): 17.31 - samples/sec: 2174.24 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:38:33,253 epoch 24 - iter 42/67 - loss 0.24554889 - time (sec): 19.77 - samples/sec: 2158.05 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.90s/it]

2024-11-06 10:38:53,579 DEV : loss 0.36820122599601746 - f1-score (micro avg)  0.0
2024-11-06 10:38:53,599  - 0 epochs without improvement
2024-11-06 10:38:53,601 ----------------------------------------------------------------------------------------------------





2024-11-06 10:38:56,332 epoch 25 - iter 6/67 - loss 0.25315336 - time (sec): 2.73 - samples/sec: 2297.97 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:38:59,806 epoch 25 - iter 12/67 - loss 0.24993102 - time (sec): 6.20 - samples/sec: 2153.58 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:02,169 epoch 25 - iter 18/67 - loss 0.25187835 - time (sec): 8.56 - samples/sec: 2119.78 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:04,392 epoch 25 - iter 24/67 - loss 0.24672804 - time (sec): 10.79 - samples/sec: 2210.99 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:07,294 epoch 25 - iter 30/67 - loss 0.25812664 - time (sec): 13.69 - samples/sec: 2156.04 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:09,710 epoch 25 - iter 36/67 - loss 0.25581859 - time (sec): 16.11 - samples/sec: 2177.29 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:12,687 epoch 25 - iter 42/67 - loss 0.24789295 - time (sec): 19.08 - samples/sec: 2176.95 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:09<00:00,  9.08s/it]

2024-11-06 10:39:34,401 DEV : loss 0.3651423156261444 - f1-score (micro avg)  0.0
2024-11-06 10:39:34,422  - 0 epochs without improvement
2024-11-06 10:39:34,425 ----------------------------------------------------------------------------------------------------





2024-11-06 10:39:37,138 epoch 26 - iter 6/67 - loss 0.25539664 - time (sec): 2.71 - samples/sec: 2422.62 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:40,853 epoch 26 - iter 12/67 - loss 0.24822648 - time (sec): 6.42 - samples/sec: 2225.65 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:43,375 epoch 26 - iter 18/67 - loss 0.23637572 - time (sec): 8.95 - samples/sec: 2214.99 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:46,160 epoch 26 - iter 24/67 - loss 0.23405385 - time (sec): 11.73 - samples/sec: 2245.99 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:48,451 epoch 26 - iter 30/67 - loss 0.24367831 - time (sec): 14.02 - samples/sec: 2209.45 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:50,550 epoch 26 - iter 36/67 - loss 0.25025783 - time (sec): 16.12 - samples/sec: 2209.20 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:39:53,260 epoch 26 - iter 42/67 - loss 0.24605959 - time (sec): 18.83 - samples/sec: 2176.00 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.79s/it]

2024-11-06 10:40:14,708 DEV : loss 0.3638223111629486 - f1-score (micro avg)  0.0
2024-11-06 10:40:14,735  - 0 epochs without improvement
2024-11-06 10:40:14,736 ----------------------------------------------------------------------------------------------------





2024-11-06 10:40:17,437 epoch 27 - iter 6/67 - loss 0.24353820 - time (sec): 2.70 - samples/sec: 1863.29 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:40:20,941 epoch 27 - iter 12/67 - loss 0.20894408 - time (sec): 6.20 - samples/sec: 2137.45 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:40:23,083 epoch 27 - iter 18/67 - loss 0.21266704 - time (sec): 8.34 - samples/sec: 2206.02 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:40:25,950 epoch 27 - iter 24/67 - loss 0.21296696 - time (sec): 11.21 - samples/sec: 2171.45 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:40:28,910 epoch 27 - iter 30/67 - loss 0.20920290 - time (sec): 14.17 - samples/sec: 2203.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:40:31,611 epoch 27 - iter 36/67 - loss 0.21981705 - time (sec): 16.87 - samples/sec: 2217.41 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:40:35,004 epoch 27 - iter 42/67 - loss 0.21495505 - time (sec): 20.26 - samples/sec: 2159.76 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.97s/it]

2024-11-06 10:40:55,749 DEV : loss 0.3650190532207489 - f1-score (micro avg)  0.0
2024-11-06 10:40:55,770  - 1 epochs without improvement
2024-11-06 10:40:55,772 ----------------------------------------------------------------------------------------------------





2024-11-06 10:40:58,999 epoch 28 - iter 6/67 - loss 0.21861776 - time (sec): 3.22 - samples/sec: 2131.11 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:01,111 epoch 28 - iter 12/67 - loss 0.23228598 - time (sec): 5.34 - samples/sec: 2243.15 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:04,362 epoch 28 - iter 18/67 - loss 0.20150536 - time (sec): 8.59 - samples/sec: 2231.25 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:07,309 epoch 28 - iter 24/67 - loss 0.20711431 - time (sec): 11.53 - samples/sec: 2160.69 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:10,277 epoch 28 - iter 30/67 - loss 0.21260625 - time (sec): 14.50 - samples/sec: 2154.64 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:12,813 epoch 28 - iter 36/67 - loss 0.21242184 - time (sec): 17.04 - samples/sec: 2154.63 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:15,740 epoch 28 - iter 42/67 - loss 0.21278257 - time (sec): 19.96 - samples/sec: 2172.20 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.42s/it]

2024-11-06 10:41:35,434 DEV : loss 0.36470699310302734 - f1-score (micro avg)  0.0
2024-11-06 10:41:35,477  - 2 epochs without improvement
2024-11-06 10:41:35,482 ----------------------------------------------------------------------------------------------------





2024-11-06 10:41:38,362 epoch 29 - iter 6/67 - loss 0.19670081 - time (sec): 2.88 - samples/sec: 1681.01 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:41,732 epoch 29 - iter 12/67 - loss 0.17945686 - time (sec): 6.25 - samples/sec: 2005.03 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:44,501 epoch 29 - iter 18/67 - loss 0.19171793 - time (sec): 9.02 - samples/sec: 2053.15 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:47,215 epoch 29 - iter 24/67 - loss 0.20481121 - time (sec): 11.73 - samples/sec: 2052.96 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:50,121 epoch 29 - iter 30/67 - loss 0.20988637 - time (sec): 14.64 - samples/sec: 2018.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:53,192 epoch 29 - iter 36/67 - loss 0.20532577 - time (sec): 17.71 - samples/sec: 1999.81 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:41:56,514 epoch 29 - iter 42/67 - loss 0.20708647 - time (sec): 21.03 - samples/sec: 2033.35 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.01s/it]

2024-11-06 10:42:15,492 DEV : loss 0.3634025752544403 - f1-score (micro avg)  0.0
2024-11-06 10:42:15,512  - 0 epochs without improvement
2024-11-06 10:42:15,514 ----------------------------------------------------------------------------------------------------





2024-11-06 10:42:18,259 epoch 30 - iter 6/67 - loss 0.16767749 - time (sec): 2.74 - samples/sec: 2286.10 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:42:20,683 epoch 30 - iter 12/67 - loss 0.18782481 - time (sec): 5.17 - samples/sec: 2375.62 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:42:23,838 epoch 30 - iter 18/67 - loss 0.20784101 - time (sec): 8.32 - samples/sec: 2229.84 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:42:26,007 epoch 30 - iter 24/67 - loss 0.21257201 - time (sec): 10.49 - samples/sec: 2282.63 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:42:29,354 epoch 30 - iter 30/67 - loss 0.20925982 - time (sec): 13.84 - samples/sec: 2173.21 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:42:31,486 epoch 30 - iter 36/67 - loss 0.21083281 - time (sec): 15.97 - samples/sec: 2149.26 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:42:34,786 epoch 30 - iter 42/67 - loss 0.20390035 - time (sec): 19.27 - samples/sec: 2181.31 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.38s/it]

2024-11-06 10:42:54,779 DEV : loss 0.36349794268608093 - f1-score (micro avg)  0.0
2024-11-06 10:42:54,799  - 1 epochs without improvement
2024-11-06 10:42:54,800 ----------------------------------------------------------------------------------------------------





2024-11-06 10:42:57,933 epoch 31 - iter 6/67 - loss 0.18696682 - time (sec): 3.13 - samples/sec: 1824.62 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:01,545 epoch 31 - iter 12/67 - loss 0.18330071 - time (sec): 6.74 - samples/sec: 1891.70 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:04,437 epoch 31 - iter 18/67 - loss 0.18019150 - time (sec): 9.63 - samples/sec: 2033.06 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:07,104 epoch 31 - iter 24/67 - loss 0.18261205 - time (sec): 12.30 - samples/sec: 2066.16 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:10,062 epoch 31 - iter 30/67 - loss 0.18669643 - time (sec): 15.26 - samples/sec: 2062.30 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:12,927 epoch 31 - iter 36/67 - loss 0.19029290 - time (sec): 18.12 - samples/sec: 2137.97 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:15,698 epoch 31 - iter 42/67 - loss 0.19048570 - time (sec): 20.89 - samples/sec: 2089.78 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.71s/it]

2024-11-06 10:43:35,511 DEV : loss 0.3652862012386322 - f1-score (micro avg)  0.0
2024-11-06 10:43:35,533  - 2 epochs without improvement
2024-11-06 10:43:35,534 ----------------------------------------------------------------------------------------------------





2024-11-06 10:43:38,713 epoch 32 - iter 6/67 - loss 0.25020840 - time (sec): 3.17 - samples/sec: 2303.92 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:41,187 epoch 32 - iter 12/67 - loss 0.23169178 - time (sec): 5.65 - samples/sec: 2232.71 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:44,073 epoch 32 - iter 18/67 - loss 0.21427052 - time (sec): 8.53 - samples/sec: 2191.32 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:45,931 epoch 32 - iter 24/67 - loss 0.22428790 - time (sec): 10.39 - samples/sec: 2208.03 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:48,630 epoch 32 - iter 30/67 - loss 0.21343332 - time (sec): 13.09 - samples/sec: 2185.21 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:51,437 epoch 32 - iter 36/67 - loss 0.19965857 - time (sec): 15.90 - samples/sec: 2167.66 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:43:53,471 epoch 32 - iter 42/67 - loss 0.20155744 - time (sec): 17.93 - samples/sec: 2202.36 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:09<00:00,  9.10s/it]

2024-11-06 10:44:15,760 DEV : loss 0.3621325194835663 - f1-score (micro avg)  0.0
2024-11-06 10:44:15,782  - 0 epochs without improvement
2024-11-06 10:44:15,784 ----------------------------------------------------------------------------------------------------





2024-11-06 10:44:19,321 epoch 33 - iter 6/67 - loss 0.13974126 - time (sec): 3.53 - samples/sec: 2046.36 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:44:21,981 epoch 33 - iter 12/67 - loss 0.14663339 - time (sec): 6.19 - samples/sec: 2124.24 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:44:24,079 epoch 33 - iter 18/67 - loss 0.16756100 - time (sec): 8.29 - samples/sec: 2167.23 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:44:26,716 epoch 33 - iter 24/67 - loss 0.17407269 - time (sec): 10.93 - samples/sec: 2149.20 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:44:29,179 epoch 33 - iter 30/67 - loss 0.16860193 - time (sec): 13.39 - samples/sec: 2059.38 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:44:31,795 epoch 33 - iter 36/67 - loss 0.16559124 - time (sec): 16.01 - samples/sec: 2108.00 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:44:35,870 epoch 33 - iter 42/67 - loss 0.17353451 - time (sec): 20.08 - samples/sec: 2103.09 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:09<00:00,  9.16s/it]

2024-11-06 10:44:57,310 DEV : loss 0.36427757143974304 - f1-score (micro avg)  0.0
2024-11-06 10:44:57,330  - 1 epochs without improvement
2024-11-06 10:44:57,331 ----------------------------------------------------------------------------------------------------





2024-11-06 10:44:59,422 epoch 34 - iter 6/67 - loss 0.16337936 - time (sec): 2.09 - samples/sec: 2376.12 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:01,877 epoch 34 - iter 12/67 - loss 0.18872791 - time (sec): 4.54 - samples/sec: 2256.66 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:04,534 epoch 34 - iter 18/67 - loss 0.18990570 - time (sec): 7.20 - samples/sec: 2241.51 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:07,539 epoch 34 - iter 24/67 - loss 0.19059964 - time (sec): 10.20 - samples/sec: 2154.93 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:10,286 epoch 34 - iter 30/67 - loss 0.18658376 - time (sec): 12.95 - samples/sec: 2190.49 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:13,830 epoch 34 - iter 36/67 - loss 0.18698114 - time (sec): 16.50 - samples/sec: 2132.48 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:16,353 epoch 34 - iter 42/67 - loss 0.18784103 - time (sec): 19.02 - samples/sec: 2156.75 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.39s/it]

2024-11-06 10:45:36,248 DEV : loss 0.3614729642868042 - f1-score (micro avg)  0.0
2024-11-06 10:45:36,280  - 0 epochs without improvement
2024-11-06 10:45:36,283 ----------------------------------------------------------------------------------------------------





2024-11-06 10:45:39,407 epoch 35 - iter 6/67 - loss 0.18070105 - time (sec): 3.12 - samples/sec: 1668.48 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:42,023 epoch 35 - iter 12/67 - loss 0.17878130 - time (sec): 5.74 - samples/sec: 1856.76 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:44,436 epoch 35 - iter 18/67 - loss 0.18048532 - time (sec): 8.15 - samples/sec: 2099.86 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:47,956 epoch 35 - iter 24/67 - loss 0.16851332 - time (sec): 11.67 - samples/sec: 2057.52 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:50,269 epoch 35 - iter 30/67 - loss 0.16575621 - time (sec): 13.98 - samples/sec: 2114.83 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:52,405 epoch 35 - iter 36/67 - loss 0.16706484 - time (sec): 16.12 - samples/sec: 2157.13 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:45:55,868 epoch 35 - iter 42/67 - loss 0.16982367 - time (sec): 19.58 - samples/sec: 2151.57 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:06<00:00,  6.87s/it]

2024-11-06 10:46:14,343 DEV : loss 0.36218324303627014 - f1-score (micro avg)  0.0
2024-11-06 10:46:14,365  - 1 epochs without improvement
2024-11-06 10:46:14,367 ----------------------------------------------------------------------------------------------------





2024-11-06 10:46:16,592 epoch 36 - iter 6/67 - loss 0.18390375 - time (sec): 2.22 - samples/sec: 2205.43 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:46:19,230 epoch 36 - iter 12/67 - loss 0.15389910 - time (sec): 4.86 - samples/sec: 2118.55 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:46:22,350 epoch 36 - iter 18/67 - loss 0.16311134 - time (sec): 7.98 - samples/sec: 2142.30 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:46:25,419 epoch 36 - iter 24/67 - loss 0.17020382 - time (sec): 11.05 - samples/sec: 2171.11 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:46:28,272 epoch 36 - iter 30/67 - loss 0.17226129 - time (sec): 13.90 - samples/sec: 2104.77 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:46:31,753 epoch 36 - iter 36/67 - loss 0.16770800 - time (sec): 17.38 - samples/sec: 2108.81 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:46:34,264 epoch 36 - iter 42/67 - loss 0.16350856 - time (sec): 19.89 - samples/sec: 2141.40 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.58s/it]

2024-11-06 10:46:54,680 DEV : loss 0.36144164204597473 - f1-score (micro avg)  0.0
2024-11-06 10:46:54,700  - 0 epochs without improvement
2024-11-06 10:46:54,703 ----------------------------------------------------------------------------------------------------





2024-11-06 10:46:56,965 epoch 37 - iter 6/67 - loss 0.15610427 - time (sec): 2.26 - samples/sec: 2341.79 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:00,458 epoch 37 - iter 12/67 - loss 0.14486567 - time (sec): 5.75 - samples/sec: 2037.68 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:02,515 epoch 37 - iter 18/67 - loss 0.16283152 - time (sec): 7.81 - samples/sec: 2120.58 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:04,875 epoch 37 - iter 24/67 - loss 0.15988978 - time (sec): 10.17 - samples/sec: 2102.46 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:07,740 epoch 37 - iter 30/67 - loss 0.15958726 - time (sec): 13.03 - samples/sec: 2148.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:11,226 epoch 37 - iter 36/67 - loss 0.16560332 - time (sec): 16.52 - samples/sec: 2119.83 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:13,797 epoch 37 - iter 42/67 - loss 0.16932737 - time (sec): 19.09 - samples/sec: 2094.28 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:10<00:00, 10.84s/it]

2024-11-06 10:47:37,809 DEV : loss 0.3634187877178192 - f1-score (micro avg)  0.0
2024-11-06 10:47:37,831  - 1 epochs without improvement
2024-11-06 10:47:37,834 ----------------------------------------------------------------------------------------------------





2024-11-06 10:47:40,054 epoch 38 - iter 6/67 - loss 0.19165378 - time (sec): 2.22 - samples/sec: 2245.95 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:42,661 epoch 38 - iter 12/67 - loss 0.18374306 - time (sec): 4.82 - samples/sec: 2157.05 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:44,947 epoch 38 - iter 18/67 - loss 0.17562911 - time (sec): 7.11 - samples/sec: 2279.81 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:47,501 epoch 38 - iter 24/67 - loss 0.16840323 - time (sec): 9.66 - samples/sec: 2215.27 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:50,058 epoch 38 - iter 30/67 - loss 0.16420671 - time (sec): 12.22 - samples/sec: 2215.49 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:52,754 epoch 38 - iter 36/67 - loss 0.16180406 - time (sec): 14.92 - samples/sec: 2286.00 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:47:56,737 epoch 38 - iter 42/67 - loss 0.15875525 - time (sec): 18.90 - samples/sec: 2218.63 - lr: 0.010000 - momentum: 0.000000
2024

100%|██████████| 1/1 [00:08<00:00,  8.97s/it]

2024-11-06 10:48:18,285 DEV : loss 0.36120808124542236 - f1-score (micro avg)  0.0
2024-11-06 10:48:18,309  - 0 epochs without improvement
2024-11-06 10:48:18,311 ----------------------------------------------------------------------------------------------------





2024-11-06 10:48:20,198 epoch 39 - iter 6/67 - loss 0.16046299 - time (sec): 1.88 - samples/sec: 2566.71 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:48:23,013 epoch 39 - iter 12/67 - loss 0.15770652 - time (sec): 4.70 - samples/sec: 2178.08 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:48:25,852 epoch 39 - iter 18/67 - loss 0.14776450 - time (sec): 7.54 - samples/sec: 2235.94 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:48:28,858 epoch 39 - iter 24/67 - loss 0.14720380 - time (sec): 10.54 - samples/sec: 2200.25 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:48:31,615 epoch 39 - iter 30/67 - loss 0.14846599 - time (sec): 13.30 - samples/sec: 2197.79 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:48:34,118 epoch 39 - iter 36/67 - loss 0.15195608 - time (sec): 15.80 - samples/sec: 2176.16 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:48:37,784 epoch 39 - iter 42/67 - loss 0.14291772 - time (sec): 19.47 - samples/sec: 2166.82 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.36s/it]

2024-11-06 10:48:57,583 DEV : loss 0.36151450872421265 - f1-score (micro avg)  0.0
2024-11-06 10:48:57,615  - 1 epochs without improvement
2024-11-06 10:48:57,618 ----------------------------------------------------------------------------------------------------





2024-11-06 10:49:00,195 epoch 40 - iter 6/67 - loss 0.15235956 - time (sec): 2.57 - samples/sec: 2412.01 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:03,186 epoch 40 - iter 12/67 - loss 0.14472337 - time (sec): 5.56 - samples/sec: 2320.66 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:05,454 epoch 40 - iter 18/67 - loss 0.14847230 - time (sec): 7.83 - samples/sec: 2187.72 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:08,329 epoch 40 - iter 24/67 - loss 0.15071820 - time (sec): 10.70 - samples/sec: 2173.54 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:10,884 epoch 40 - iter 30/67 - loss 0.14681529 - time (sec): 13.26 - samples/sec: 2191.67 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:13,475 epoch 40 - iter 36/67 - loss 0.14678020 - time (sec): 15.85 - samples/sec: 2143.30 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:16,637 epoch 40 - iter 42/67 - loss 0.14567777 - time (sec): 19.01 - samples/sec: 2196.07 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:06<00:00,  6.87s/it]

2024-11-06 10:49:36,210 DEV : loss 0.36175355315208435 - f1-score (micro avg)  0.0
2024-11-06 10:49:36,230  - 2 epochs without improvement
2024-11-06 10:49:36,233 ----------------------------------------------------------------------------------------------------





2024-11-06 10:49:39,213 epoch 41 - iter 6/67 - loss 0.13312944 - time (sec): 2.98 - samples/sec: 2187.88 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:41,897 epoch 41 - iter 12/67 - loss 0.12622168 - time (sec): 5.66 - samples/sec: 2127.42 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:44,346 epoch 41 - iter 18/67 - loss 0.13956287 - time (sec): 8.11 - samples/sec: 2231.89 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:47,342 epoch 41 - iter 24/67 - loss 0.14010256 - time (sec): 11.11 - samples/sec: 2166.33 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:50,574 epoch 41 - iter 30/67 - loss 0.13865468 - time (sec): 14.34 - samples/sec: 2132.93 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:53,703 epoch 41 - iter 36/67 - loss 0.13298261 - time (sec): 17.47 - samples/sec: 2164.87 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:49:56,526 epoch 41 - iter 42/67 - loss 0.13558267 - time (sec): 20.29 - samples/sec: 2158.26 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.64s/it]

2024-11-06 10:50:17,258 DEV : loss 0.3614254593849182 - f1-score (micro avg)  0.0
2024-11-06 10:50:17,278  - 3 epochs without improvement
2024-11-06 10:50:17,280 ----------------------------------------------------------------------------------------------------





2024-11-06 10:50:20,455 epoch 42 - iter 6/67 - loss 0.12370605 - time (sec): 3.17 - samples/sec: 2411.06 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:50:22,959 epoch 42 - iter 12/67 - loss 0.12629347 - time (sec): 5.68 - samples/sec: 2473.28 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:50:25,890 epoch 42 - iter 18/67 - loss 0.12963623 - time (sec): 8.61 - samples/sec: 2327.72 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:50:28,655 epoch 42 - iter 24/67 - loss 0.13475490 - time (sec): 11.37 - samples/sec: 2339.72 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:50:31,152 epoch 42 - iter 30/67 - loss 0.13493075 - time (sec): 13.87 - samples/sec: 2229.97 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:50:33,870 epoch 42 - iter 36/67 - loss 0.13114951 - time (sec): 16.59 - samples/sec: 2222.43 - lr: 0.010000 - momentum: 0.000000
2024-11-06 10:50:37,534 epoch 42 - iter 42/67 - loss 0.12574064 - time (sec): 20.25 - samples/sec: 2282.57 - lr: 0.010000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:09<00:00,  9.14s/it]

2024-11-06 10:50:57,662 DEV : loss 0.36349546909332275 - f1-score (micro avg)  0.0
2024-11-06 10:50:57,683  - 4 epochs without improvement (above 'patience')-> annealing learning_rate to [0.005]
2024-11-06 10:50:57,685 ----------------------------------------------------------------------------------------------------





2024-11-06 10:51:00,942 epoch 43 - iter 6/67 - loss 0.13504112 - time (sec): 3.25 - samples/sec: 2099.43 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:04,096 epoch 43 - iter 12/67 - loss 0.13095405 - time (sec): 6.41 - samples/sec: 2076.04 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:06,960 epoch 43 - iter 18/67 - loss 0.11621112 - time (sec): 9.27 - samples/sec: 2177.92 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:10,193 epoch 43 - iter 24/67 - loss 0.12007862 - time (sec): 12.50 - samples/sec: 2131.14 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:12,782 epoch 43 - iter 30/67 - loss 0.11589941 - time (sec): 15.09 - samples/sec: 2205.81 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:15,686 epoch 43 - iter 36/67 - loss 0.11684025 - time (sec): 18.00 - samples/sec: 2139.21 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:18,594 epoch 43 - iter 42/67 - loss 0.11831110 - time (sec): 20.90 - samples/sec: 2137.60 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.71s/it]

2024-11-06 10:51:38,438 DEV : loss 0.3618875741958618 - f1-score (micro avg)  0.0
2024-11-06 10:51:38,459  - 1 epochs without improvement
2024-11-06 10:51:38,461 ----------------------------------------------------------------------------------------------------





2024-11-06 10:51:41,057 epoch 44 - iter 6/67 - loss 0.07311379 - time (sec): 2.59 - samples/sec: 2105.16 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:43,208 epoch 44 - iter 12/67 - loss 0.10318088 - time (sec): 4.74 - samples/sec: 2217.16 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:45,579 epoch 44 - iter 18/67 - loss 0.10547217 - time (sec): 7.11 - samples/sec: 2345.77 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:49,265 epoch 44 - iter 24/67 - loss 0.11622986 - time (sec): 10.80 - samples/sec: 2224.46 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:51,803 epoch 44 - iter 30/67 - loss 0.11224206 - time (sec): 13.34 - samples/sec: 2214.34 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:55,268 epoch 44 - iter 36/67 - loss 0.10771430 - time (sec): 16.80 - samples/sec: 2244.05 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:51:57,749 epoch 44 - iter 42/67 - loss 0.10857132 - time (sec): 19.28 - samples/sec: 2215.60 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.03s/it]

2024-11-06 10:52:17,648 DEV : loss 0.36161041259765625 - f1-score (micro avg)  0.0
2024-11-06 10:52:17,686  - 2 epochs without improvement
2024-11-06 10:52:17,688 ----------------------------------------------------------------------------------------------------





2024-11-06 10:52:20,546 epoch 45 - iter 6/67 - loss 0.12545141 - time (sec): 2.85 - samples/sec: 1832.21 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:52:22,821 epoch 45 - iter 12/67 - loss 0.12551218 - time (sec): 5.13 - samples/sec: 2004.71 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:52:26,701 epoch 45 - iter 18/67 - loss 0.12446105 - time (sec): 9.01 - samples/sec: 2072.64 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:52:29,349 epoch 45 - iter 24/67 - loss 0.11578859 - time (sec): 11.66 - samples/sec: 2156.43 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:52:32,447 epoch 45 - iter 30/67 - loss 0.11169208 - time (sec): 14.76 - samples/sec: 2129.56 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:52:34,939 epoch 45 - iter 36/67 - loss 0.11023153 - time (sec): 17.25 - samples/sec: 2098.19 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:52:37,539 epoch 45 - iter 42/67 - loss 0.11185782 - time (sec): 19.85 - samples/sec: 2134.86 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:06<00:00,  6.97s/it]

2024-11-06 10:52:57,206 DEV : loss 0.3618696928024292 - f1-score (micro avg)  0.0
2024-11-06 10:52:57,227  - 3 epochs without improvement
2024-11-06 10:52:57,228 ----------------------------------------------------------------------------------------------------





2024-11-06 10:52:59,862 epoch 46 - iter 6/67 - loss 0.12049535 - time (sec): 2.63 - samples/sec: 2417.49 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:03,019 epoch 46 - iter 12/67 - loss 0.11280711 - time (sec): 5.79 - samples/sec: 2224.80 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:05,703 epoch 46 - iter 18/67 - loss 0.10766329 - time (sec): 8.47 - samples/sec: 2155.99 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:08,994 epoch 46 - iter 24/67 - loss 0.10908266 - time (sec): 11.76 - samples/sec: 2142.10 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:12,032 epoch 46 - iter 30/67 - loss 0.11335380 - time (sec): 14.80 - samples/sec: 2191.78 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:15,299 epoch 46 - iter 36/67 - loss 0.10894888 - time (sec): 18.07 - samples/sec: 2152.08 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:18,299 epoch 46 - iter 42/67 - loss 0.11017086 - time (sec): 21.07 - samples/sec: 2112.06 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:07<00:00,  7.94s/it]

2024-11-06 10:53:36,976 DEV : loss 0.3607959747314453 - f1-score (micro avg)  0.0
2024-11-06 10:53:36,997  - 0 epochs without improvement
2024-11-06 10:53:36,998 ----------------------------------------------------------------------------------------------------





2024-11-06 10:53:40,384 epoch 47 - iter 6/67 - loss 0.09074466 - time (sec): 3.38 - samples/sec: 1741.22 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:43,717 epoch 47 - iter 12/67 - loss 0.08666254 - time (sec): 6.71 - samples/sec: 1911.60 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:45,566 epoch 47 - iter 18/67 - loss 0.08841613 - time (sec): 8.56 - samples/sec: 1998.51 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:48,352 epoch 47 - iter 24/67 - loss 0.08811180 - time (sec): 11.35 - samples/sec: 2038.48 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:50,888 epoch 47 - iter 30/67 - loss 0.08901189 - time (sec): 13.88 - samples/sec: 2074.10 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:54,468 epoch 47 - iter 36/67 - loss 0.10101839 - time (sec): 17.46 - samples/sec: 2058.08 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:53:57,377 epoch 47 - iter 42/67 - loss 0.10051236 - time (sec): 20.37 - samples/sec: 2106.52 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.44s/it]

2024-11-06 10:54:18,020 DEV : loss 0.36064791679382324 - f1-score (micro avg)  0.0
2024-11-06 10:54:18,044  - 0 epochs without improvement
2024-11-06 10:54:18,045 ----------------------------------------------------------------------------------------------------





2024-11-06 10:54:20,585 epoch 48 - iter 6/67 - loss 0.10539791 - time (sec): 2.54 - samples/sec: 2236.41 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:54:23,151 epoch 48 - iter 12/67 - loss 0.11429277 - time (sec): 5.10 - samples/sec: 2413.75 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:54:26,088 epoch 48 - iter 18/67 - loss 0.10760587 - time (sec): 8.04 - samples/sec: 2236.15 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:54:28,615 epoch 48 - iter 24/67 - loss 0.10507925 - time (sec): 10.57 - samples/sec: 2275.15 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:54:31,560 epoch 48 - iter 30/67 - loss 0.10290293 - time (sec): 13.51 - samples/sec: 2264.86 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:54:33,829 epoch 48 - iter 36/67 - loss 0.09693005 - time (sec): 15.78 - samples/sec: 2349.40 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:54:36,913 epoch 48 - iter 42/67 - loss 0.09961666 - time (sec): 18.86 - samples/sec: 2269.47 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.87s/it]

2024-11-06 10:54:58,229 DEV : loss 0.35979601740837097 - f1-score (micro avg)  0.0
2024-11-06 10:54:58,251  - 0 epochs without improvement
2024-11-06 10:54:58,252 ----------------------------------------------------------------------------------------------------





2024-11-06 10:55:01,622 epoch 49 - iter 6/67 - loss 0.08976887 - time (sec): 3.36 - samples/sec: 2141.49 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:04,475 epoch 49 - iter 12/67 - loss 0.09012650 - time (sec): 6.22 - samples/sec: 2162.28 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:07,069 epoch 49 - iter 18/67 - loss 0.09401330 - time (sec): 8.81 - samples/sec: 2058.74 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:10,726 epoch 49 - iter 24/67 - loss 0.09311855 - time (sec): 12.47 - samples/sec: 2066.76 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:13,264 epoch 49 - iter 30/67 - loss 0.09750576 - time (sec): 15.01 - samples/sec: 2051.67 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:15,928 epoch 49 - iter 36/67 - loss 0.09869074 - time (sec): 17.67 - samples/sec: 2032.16 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:19,241 epoch 49 - iter 42/67 - loss 0.09890522 - time (sec): 20.98 - samples/sec: 2048.24 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.69s/it]

2024-11-06 10:55:39,466 DEV : loss 0.35994070768356323 - f1-score (micro avg)  0.0
2024-11-06 10:55:39,487  - 1 epochs without improvement
2024-11-06 10:55:39,489 ----------------------------------------------------------------------------------------------------





2024-11-06 10:55:42,256 epoch 50 - iter 6/67 - loss 0.12414608 - time (sec): 2.76 - samples/sec: 2557.09 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:44,511 epoch 50 - iter 12/67 - loss 0.12244653 - time (sec): 5.02 - samples/sec: 2111.44 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:47,401 epoch 50 - iter 18/67 - loss 0.10625405 - time (sec): 7.91 - samples/sec: 2176.33 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:50,838 epoch 50 - iter 24/67 - loss 0.10078489 - time (sec): 11.35 - samples/sec: 2149.79 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:54,143 epoch 50 - iter 30/67 - loss 0.10366076 - time (sec): 14.65 - samples/sec: 2237.04 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:55:57,611 epoch 50 - iter 36/67 - loss 0.09959324 - time (sec): 18.12 - samples/sec: 2164.31 - lr: 0.005000 - momentum: 0.000000
2024-11-06 10:56:00,416 epoch 50 - iter 42/67 - loss 0.10065833 - time (sec): 20.92 - samples/sec: 2144.50 - lr: 0.005000 - momentum: 0.000000
202

100%|██████████| 1/1 [00:08<00:00,  8.66s/it]

2024-11-06 10:56:19,904 DEV : loss 0.3591618239879608 - f1-score (micro avg)  0.0
2024-11-06 10:56:19,925  - 0 epochs without improvement





2024-11-06 10:56:26,085 ----------------------------------------------------------------------------------------------------
2024-11-06 10:56:26,089 Testing using last state of model ...


100%|██████████| 2/2 [00:05<00:00,  2.74s/it]

2024-11-06 10:56:31,631 
Results:
- F-score (micro) 0.563
- F-score (macro) 0.7187
- Accuracy 0.3962

By class:
              precision    recall  f1-score   support

       SKILL     0.3408    0.3862    0.3621       593
         JOB     0.5730    0.7020    0.6310       151
        WORK     0.8188    0.9457    0.8777       129
     COMPANY     0.5172    0.5714    0.5430       105
         LOC     0.5701    0.7176    0.6354        85
         DEG     0.5862    0.7907    0.6733        43
         UNI     0.5660    0.6667    0.6122        45
        NAME     0.9388    0.9787    0.9583        47
       PHONE     1.0000    0.9787    0.9892        47
       EMAIL     0.8222    0.9250    0.8706        40
       STUDY     0.8286    0.6905    0.7532        42

   micro avg     0.5281    0.6029    0.5630      1327
   macro avg     0.6874    0.7594    0.7187      1327
weighted avg     0.5324    0.6029    0.5642      1327

2024-11-06 10:56:31,633 ---------------------------------------------------




In [None]:
# evaluate model
from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

# Load the trained model
model = SequenceTagger.load('/content/drive/MyDrive/FYP/Implementation/flair_output/final-model.pt')

# Evaluate the model on the test set
result = model.evaluate(corpus.test, gold_label_type='ner', mini_batch_size=32)

# Print the results
# print("Evaluation Loss:", eval_loss)
print(result.detailed_results)  # print the precision, recall, and F1-score per entity type

2024-11-06 10:57:38,582 SequenceTagger predicts: Dictionary with 45 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-DEG, B-DEG, E-DEG, I-DEG, S-UNI, B-UNI, E-UNI, I-UNI, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL


100%|██████████| 3/3 [00:03<00:00,  1.29s/it]


Results:
- F-score (micro) 0.563
- F-score (macro) 0.7187
- Accuracy 0.3962

By class:
              precision    recall  f1-score   support

       SKILL     0.3408    0.3862    0.3621       593
         JOB     0.5730    0.7020    0.6310       151
        WORK     0.8188    0.9457    0.8777       129
     COMPANY     0.5172    0.5714    0.5430       105
         LOC     0.5701    0.7176    0.6354        85
         DEG     0.5862    0.7907    0.6733        43
         UNI     0.5660    0.6667    0.6122        45
        NAME     0.9388    0.9787    0.9583        47
       PHONE     1.0000    0.9787    0.9892        47
       EMAIL     0.8222    0.9250    0.8706        40
       STUDY     0.8286    0.6905    0.7532        42

   micro avg     0.5281    0.6029    0.5630      1327
   macro avg     0.6874    0.7594    0.7187      1327
weighted avg     0.5324    0.6029    0.5642      1327






In [None]:
from flair.models import SequenceTagger
from flair.data import Sentence
import spacy
from spacy import displacy

# Load your trained Flair NER model
tagger = SequenceTagger.load('/content/drive/MyDrive/FYP/Implementation/flair_output/best-model.pt')

resume_text = '''
John Doe lives at 1234 Elm Street in Los Angeles, CA 90001. He can be reached at +1 (555) 123-4567 or via email at john.doe@example.com. John is a results-driven software engineer with over 5 years of experience in web development and cloud infrastructure, with strong knowledge of JavaScript, Python, and cloud technologies like AWS and Azure. Currently, he works as a Software Engineer at Google LLC in San Francisco, CA, where he has been employed since August 2019. In this role, he has developed scalable web applications using JavaScript, Node.js, and React, deployed and maintained cloud infrastructure on AWS, reducing downtime by 20%, and led a team of 4 engineers to enhance backend performance by 30%. Previously, he worked as a Junior Developer at Tech Innovators Inc. in Austin, TX, from July 2017 to July 2019, where he created RESTful APIs using Python and Flask, collaborated with front-end developers to build and deploy user-facing applications, and wrote unit and integration tests, improving code coverage by 15%.

John holds a Master of Science in Computer Science from the University of California, Berkeley, with a graduation date of May 2017, and a Bachelor of Science in Information Technology from the University of Texas at Austin, graduated in May 2015. His skillset includes proficiency in programming languages like Python, JavaScript, and Java; frameworks such as React, Flask, and Django; cloud platforms including AWS, Google Cloud, and Azure; as well as other tools like Git, Docker, Kubernetes, and SQL. He is certified as an AWS Certified Solutions Architect – Associate, earned in 2020, and as a Google Professional Cloud Architect, earned in 2021'
'''

# Step 1: Predict entities using Flair
sentence = Sentence(resume_text)
tagger.predict(sentence)

# Step 2: Convert Flair predictions to spaCy doc format
# Initialize a blank spaCy NLP pipeline
nlp = spacy.blank("en")
doc = nlp(resume_text)

# Extract entities from Flair prediction and convert to spaCy format
ents = []
for entity in sentence.get_spans('ner'):
    start, end = entity.start_position, entity.end_position
    label = entity.tag
    span = doc.char_span(start, end, label=label)
    if span is not None:
        ents.append(span)

# Set the entities in the spaCy doc
doc.ents = ents

# Step 3: Visualize using displacy
# Display in Jupyter or a web page
displacy.render(doc, style="ent", jupyter=True)


2024-11-06 10:57:55,781 SequenceTagger predicts: Dictionary with 47 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, <START>, <STOP>




In [None]:
from flair.models import SequenceTagger
from flair.data import Sentence

# Load the pretrained NER model
tagger = SequenceTagger.load("/content/drive/MyDrive/FYP/Implementation/flair_output/best-model.pt")
# Example text
text = "Apple is looking at buying U.K. startup for $1 billion."

# Create a Sentence object
sentence = Sentence(resume_text)

# Predict entities
tagger.predict(sentence)

# Print the detected entities
for entity in sentence.get_spans("ner"):
    print(f"Entity: {entity.text}, Type: {entity.get_label('ner').value}, Confidence: {entity.score}")


2024-11-06 10:58:17,528 SequenceTagger predicts: Dictionary with 47 tags: O, S-SKILL, B-SKILL, E-SKILL, I-SKILL, S-JOB, B-JOB, E-JOB, I-JOB, S-WORK, B-WORK, E-WORK, I-WORK, S-COMPANY, B-COMPANY, E-COMPANY, I-COMPANY, S-LOC, B-LOC, E-LOC, I-LOC, S-UNI, B-UNI, E-UNI, I-UNI, S-DEG, B-DEG, E-DEG, I-DEG, S-NAME, B-NAME, E-NAME, I-NAME, S-PHONE, B-PHONE, E-PHONE, I-PHONE, S-STUDY, B-STUDY, E-STUDY, I-STUDY, S-EMAIL, B-EMAIL, E-EMAIL, I-EMAIL, <START>, <STOP>
Entity: Doe lives, Type: NAME, Confidence: 0.16702505946159363
Entity: 1234 Elm, Type: NAME, Confidence: 0.11599351465702057
Entity: Los, Type: DEG, Confidence: 0.18544061481952667
Entity: 90001, Type: DEG, Confidence: 0.14748063683509827
Entity: 555, Type: DEG, Confidence: 0.11017131805419922
Entity: 123-4567, Type: DEG, Confidence: 0.11569119244813919
Entity: via, Type: DEG, Confidence: 0.20629706978797913
Entity: at, Type: EMAIL, Confidence: 0.19365909695625305
Entity: john.doe, Type: JOB, Confidence: 0.10587572306394577
Entity: @, Ty