In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'/Users/macbookpro/Documents/Documents - Macbook’s MacBook Pro/career/career_chief_rep'

In [4]:
# Configuration for spaCy Named Entity Recognition (NER) model training
spacy_ner:
  root_dir: artifacts/model_training/spacy_ner  # Root directory for training artifacts
  json_annotated_path: artifacts/model_training/spacy_ner/project-4-at-2024-04-01-07-30-2333a63c.json  # Path to annotated data
  output_path: artifacts/model_training/spacy_ner/output/converted_data.spacy  # Output path for the converted spaCy data
  train_data_path: artifacts/data_transformation/train_data.csv  # Path to unannotated training data
  test_data_path: artifacts/data_transformation/test_data.csv  # Path to unannotated test data
  val_data_path: artifacts/data_transformation/val_data.csv  # Path to unannotated validation data
  spacy_train: artifacts/model_training/spacy_ner/output/train_data.spacy  # Processed spaCy training data
  spacy_dev: artifacts/model_training/spacy_ner/output/dev_data.spacy  # Processed spaCy dev (validation) data
  gpu_allocator: pytorch  # GPU allocator (use 'pytorch' for PyTorch)
  components:  # NLP pipeline components
    - name: "ner"
      factory: "ner"
  training:  # Training parameters
    batch_size: 128
    dropout: 0.5
    optimizer:
      learn_rate: 0.001
    patience: 3
    max_epochs: 20


SyntaxError: invalid syntax (2308916172.py, line 2)

In [5]:
from dataclasses import dataclass
from pathlib import Path
from typing import List, Dict, Any

@dataclass
class SpacyNERConfig:
    """
    Represents the configuration for spaCy Named Entity Recognition (NER) model training.

    This class is intended to be populated with values from a YAML configuration file,
    providing structured access to the configurations within the Python codebase.
    
    Attributes:
        root_dir (Path): Directory for storing training artifacts and results.
        json_annotated_path (Path): Path to the JSON file with annotations from Label Studio.
        output_path (Path): Destination path for the converted spaCy data format.
        train_data_path (Path): Path to the CSV file containing unannotated training data.
        test_data_path (Path): Path to the CSV file containing unannotated test data.
        val_data_path (Path): Path to the CSV file containing unannotated validation data.
        spacy_train (Path): Path for the processed spaCy training data.
        spacy_dev (Path): Path for the processed spaCy development (validation) data.
        gpu_allocator (str): The GPU allocator for training, e.g., 'pytorch'.
        components (List[Dict[str, Any]]): Configuration for the NER pipeline components.
        training (Dict[str, Any]): Dictionary containing the training parameters.
    """
    root_dir: Path
    json_annotated_path: Path
    output_path: Path
    train_data_path: Path
    test_data_path: Path
    val_data_path: Path
    spacy_train: Path
    spacy_dev: Path
    gpu_allocator: str
    components: List[Dict[str, Any]]
    training: Dict[str, Any]


In [6]:
from src.career_chief.constants import *
from src.career_chief.utils.common import read_yaml, create_directories
from src.career_chief import logger
from src.career_chief.entity.config_entity import (DataIngestionConfig, DataValidationConfig, SpacyNERConfig)

class ConfigurationManager:
    """
    ConfigurationManager manages configurations needed for the data pipeline.

    The class reads configuration, parameter, and schema settings from specified files
    and provides a set of methods to access these settings. It also takes care of
    creating necessary directories defined in the configurations.

    Attributes:
    - config (dict): Configuration settings.
    - params (dict): Parameters for the pipeline.
    - schema (dict): Schema information.
    """
    
    def __init__(self, 
                 config_filepath = CONFIG_FILE_PATH, 
                 params_filepath = PARAMS_FILE_PATH, 
                 schema_filepath = SCHEMA_FILE_PATH) -> None:
        """
        Initialize ConfigurationManager with configurations, parameters, and schema.

        Args:
        - config_filepath (Path): Path to the configuration file.
        - params_filepath (Path): Path to the parameters file.
        - schema_filepath (Path): Path to the schema file.

        Creates:
        - Directories specified in the configuration.
        """
        self.config = self._read_config_file(config_filepath, "config")
        self.params = self._read_config_file(params_filepath, "params")
        self.schema = self._read_config_file(schema_filepath, "schema")

        # Create the directory for storing artifacts if it doesn't exist
        create_directories([self.config.artifacts_root])

    def _read_config_file(self, filepath: str, config_name: str) -> dict:
        """
        Read a configuration file and return its content.

        Args:
        - filepath (str): Path to the configuration file.
        - config_name (str): Name of the configuration (for logging purposes).

        Returns:
        - dict: Configuration settings.

        Raises:
        - Exception: If there's an error reading the file.
        """
        try:
            return read_yaml(filepath)
        except Exception as e:
            logger.error(f"Error reading {config_name} file: {filepath}. Error: {e}")
            raise

    
    def get_spacy_ner_config(self) -> SpacyNERConfig:
        """
        Fetches and constructs the spaCy NER training configuration.

        Extracts settings related to spaCy NER model training from the loaded YAML
        configurations and returns them encapsulated in a SpacyNERConfig object.

        Returns:
        - SpacyNERConfig: Configuration object for spaCy NER model training.

        Raises:
        - KeyError: If any required configuration is missing.
        """
        try:
            ner_config = self.config['spacy_ner']
            
            # Dynamically construct and return the SpacyNERConfig object
            return SpacyNERConfig(
                root_dir=Path(ner_config['root_dir']),
                json_annotated_path=Path(ner_config['json_annotated_path']),
                output_path=Path(ner_config['output_path']),
                train_data_path=Path(ner_config['train_data_path']),
                test_data_path=Path(ner_config['test_data_path']),
                val_data_path=Path(ner_config['val_data_path']),
                spacy_train=Path(ner_config['spacy_train']),
                spacy_dev=Path(ner_config['spacy_dev']),
                gpu_allocator=ner_config['gpu_allocator'],
                components=ner_config['components'],
                training=ner_config['training']
            )

        except KeyError as e:
            logger.error(f"A required configuration is missing in the 'spacy_ner' section: {e}")
            raise KeyError(f"Missing configuration in 'spacy_ner': {e}") from e

# Component


In [61]:
import spacy
from spacy.tokens import DocBin
import random
import json
from pathlib import Path
from sklearn.model_selection import train_test_split

from spacy.training import Example
from spacy.util import minibatch, compounding
import random
from src.career_chief import logger

class SpacyCustomNERModel:
    def __init__(self, config):
        self.config = config
        self.nlp = spacy.blank("en")  # Create a blank English model
        if "ner" not in self.nlp.pipe_names:
            self.ner = self.nlp.add_pipe("ner")

    # def process_annotations(self, annotated_data):
    #     doc_bin = DocBin()
    #     for item in annotated_data:
    #         doc = self.nlp.make_doc(item["text"])
    #         ents = []
    #         for start, end, label in item["entities"]:
    #             ents.append(doc.char_span(start, end, label=label, alignment_mode="strict"))
    #         doc.ents = ents
    #         doc_bin.add(doc)
    #     return doc_bin

    def process_annotations(self, data, nlp):
        """
        Processes annotations from a structured JSON format into spaCy's binary DocBin format.
        
        This function takes a list of annotated items, where each item includes the text and its annotations,
        and processes these into a spaCy-compatible format for NER training. Each annotation is used to create
        a `Span` object in spaCy, marking entity boundaries and their labels within the document text.
        
        Args:
            data (list): A list of annotated data items. Each item is expected to have keys 'data' and 'annotations',
                        where 'data' contains the text and 'annotations' is a list of annotation objects.
            nlp (Language): An instance of a spaCy Language object for document processing.
        
        Returns:
            DocBin: A DocBin object containing the processed documents with entities marked.
        """
        doc_bin = DocBin()  # Initialize DocBin to store processed documents.

        # Iterate over each annotated item in the input data.
        for item in data:
            # Extract text for the current item.
            text = item["data"]["text"]
            # Tokenize the text using the provided spaCy Language object.
            doc = nlp.make_doc(text)

            # Prepare a list to store entity spans.
            ents = []
            # Set to track existing spans and avoid overlaps.
            existing_spans = set()

            # Iterate over each annotation in the current item.
            for annot in item["annotations"][0]["result"]:
                # Extract start and end indices, and the entity label.
                start = annot["value"]["start"]
                end = annot["value"]["end"]
                label = annot["value"]["labels"][0]

                # Ensure the new span does not overlap with existing ones.
                if not any((start <= s < end) or (start < e <= end) for s, e in existing_spans):
                    # Create a span from the annotation.
                    span = doc.char_span(start, end, label=label, alignment_mode="strict")
                    # If the span is valid (i.e., not None), add it to the list of entities.
                    if span is not None:
                        ents.append(span)
                        # Record the span boundaries to check for future overlaps.
                        existing_spans.add((start, end))

            # Update the document with identified entities.
            doc.ents = ents
            # Add the document to DocBin.
            doc_bin.add(doc)
        
        # Log completion of annotation processing.
        logger.info("Annotations processed successfully.")
        return doc_bin


    def split_and_save_data(self, docs, test_size=0.2):
        """
        Splits the list of spaCy Doc objects into training and development sets,
        then saves them as .spacy files.

        Args:
            docs (list): The full list of spaCy Doc objects.
            test_size (float): The proportion of the dataset to include in the test split.
        """
        # Split the docs into training and development sets
        train_docs, dev_docs = train_test_split(docs, test_size=test_size, random_state=42)
        logger.info(f"Split data into {len(train_docs)} training and {len(dev_docs)} development examples.")

        # Save the train and dev DocBins
        train_bin = DocBin(docs=train_docs, store_user_data=True)
        dev_bin = DocBin(docs=dev_docs, store_user_data=True)
        train_bin.to_disk(self.config.spacy_train)
        dev_bin.to_disk(self.config.spacy_dev)
        logger.info("Training and development data saved successfully.")

    def get_labels(self, docs):
        """
        Extracts unique entity labels from the provided spaCy documents.

        Args:
            docs (list of spacy.Doc): List of spaCy Doc objects containing annotations.

        Returns:
            set: A set containing unique entity labels.
        """
        labels = set()
        for doc in docs:
            for ent in doc.ents:
                labels.add(ent.label_)
        return labels
    
    def train_ner_model(self):
        """
        Trains a custom NER model using the provided configuration.
        """
        # Load or create a blank spaCy model
        if self.config.gpu_allocator:
            spacy.require_gpu()
        nlp = spacy.blank("en")

        # Check if 'ner' is not in the pipeline, add it
        if 'ner' not in nlp.pipe_names:
            ner = nlp.add_pipe('ner', last=True)
        else:
            ner = nlp.get_pipe("ner")
        
        # Load training and development data
        train_docs = list(DocBin().from_disk(self.config.spacy_train).get_docs(nlp.vocab))
        dev_docs = list(DocBin().from_disk(self.config.spacy_dev).get_docs(nlp.vocab))

        # Add labels to NER from training and development docs
        for label in self.get_labels(train_docs + dev_docs):
            ner.add_label(label)
        
        # Disable other pipelines during training
        other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
        with nlp.disable_pipes(*other_pipes):
            optimizer = nlp.begin_training()

            for itn in range(self.config.training["max_epochs"]):
                random.shuffle(train_docs)
                losses = {}

                # Batch up the examples using spaCy's minibatch
                batches = minibatch(train_docs, size=compounding(4., 32., 1.001))
                for batch in batches:
                    examples = [Example.from_dict(nlp.make_doc(doc.text), {"entities": [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]}) for doc in batch]
                    
                    # Update the model
                    nlp.update(examples, drop=0.5, losses=losses)
                print(f"Losses at iteration {itn}: {losses}")

        # Save the trained model
        model_output_dir = Path(self.config.root_dir) / "trained_model"
        nlp.to_disk(model_output_dir)
        logger.info(f"Custom NER model trained and saved successfully at {model_output_dir}.")


    

        
    def run(self):
        # Load annotations from the JSON file
        with open(self.config.json_annotated_path, 'r', encoding='utf-8') as f:
            annotated_data = json.load(f)
        
        # Initialize a blank English spaCy model
        nlp = spacy.blank("en")
        logger.info("Blank spaCy model initialized.")

        # Process annotations using the loaded data and the initialized nlp object
        doc_bin = self.process_annotations(annotated_data, nlp)
        
        # Assuming split_and_save_data expects a list of docs, not a DocBin
        # Convert DocBin to list of docs for splitting and saving
        docs = list(doc_bin.get_docs(nlp.vocab))

        # Split the processed annotations into training and development data and save them
        self.split_and_save_data(docs)
        
        # Train the custom NER model
        self.train_ner_model()

        # Optionally, save the trained model to disk
        self.nlp.to_disk(Path(self.config.root_dir) / "trained_model")

        logger.info("SpacyCustomNERModel pipeline completed successfully.")




# import spacy
# from spacy.tokens import DocBin
# import json
# from pathlib import Path
# from sklearn.model_selection import train_test_split
# from spacy.training import Example
# from spacy.util import minibatch, compounding
# import random

# from src.career_chief import logger
# from src.career_chief.config.configuration import ConfigurationManager

# class SpacyCustomNERModel:
#     """
#     A class for handling the preparation, training, and evaluation of a custom spaCy Named Entity Recognition (NER) model.
    
#     Attributes:
#         config (ConfigurationManager): An instance of ConfigurationManager to access configuration details.
#     """
    
#     def __init__(self, config: SpacyNERConfig):
#         """
#         Initializes SpacyCustomNERModel with the specific configurations for spaCy NER model training.
        
#         Args:
#             config (SpacyNERConfig): Configuration object containing all necessary settings for the NER model.
#         """
#         self.config = config
#         logger.info("SpacyCustomNERModel initialized with provided SpacyNERConfig.")


    
    
#     def process_annotations(self, data, nlp):
#         """
#         Processes annotations from a structured JSON format into spaCy's binary DocBin format.
        
#         Args:
#             data (list): A list of annotated data items.
#             nlp (Language): An instance of a spaCy Language object for document processing.
#         """
#         doc_bin = DocBin()  # Initialize DocBin to store docs

#         for item in data:
#             text = item["data"]["text"]
#             annotations = item["annotations"][0]["result"]
#             doc = nlp.make_doc(text)  # Tokenize the text

#             ents = []
#             existing_spans = set()  # Track spans to avoid overlaps
#             for annot in annotations:
#                 start = annot["value"]["start"]
#                 end = annot["value"]["end"]
#                 label = annot["value"]["labels"][0]

#                 # Check for overlap
#                 if not any((start <= s < end) or (start < e <= end) for s, e in existing_spans):
#                     span = doc.char_span(start, end, label=label, alignment_mode="strict")
#                     if span is not None:
#                         ents.append(span)
#                         existing_spans.add((start, end))

#             # Update the document with the entities
#             doc.ents = ents
#             doc_bin.add(doc)
        
#         logger.info("Annotations processed successfully.")
#         return doc_bin

#     def save_spacy_annotations(self, doc_bin, output_path):
#         """
#         Saves the processed annotations to disk in spaCy's binary format.

#         Args:
#             doc_bin (DocBin): The DocBin object containing processed annotations.
#             output_path (str): Path where the binary file will be saved.
#         """
#         doc_bin.to_disk(output_path)
#         print(f"Processed data saved to: {output_path}")
#         logger.info(f"Processed data saved to: {output_path}")
        
#     def read_spacy_annotations(self, file_path):
#         """
#         Reads and prints spaCy annotations from a .spacy file for verification.
        
#         Args:
#             file_path (str): Path to the .spacy file containing annotated data.
#         """
#         nlp = spacy.blank("en")  # Load the blank English model
#         doc_bin = DocBin().from_disk(file_path)  # Load the DocBin file
#         docs = list(doc_bin.get_docs(nlp.vocab))  # Create a list of Doc objects
        
#         for doc in docs:
#             print(f"Text: {doc.text[:50]}...")  # Print document text
#             for ent in doc.ents:
#                 print(f" - Entity: {ent.text}, Label: {ent.label_}")  # Print each entity and its label
#             print("\n---\n")

#         logger.info(f"Annotations from {file_path} read and printed for verification.")
#         return docs


#     def split_and_save_data(self, annotated_data, test_size=0.2):
#         """
#         Splits the annotated data into training and development sets, then saves them as .spacy files.

#         Args:
#             annotated_data (list): The full list of annotated examples.
#             test_size (float): The proportion of the dataset to include in the test split.
#         """

#         # Initialize blank English model
#         nlp = spacy.blank("en")

#         # Load the processed and annotated data
#         docs = self.read_spacy_annotations(self.config.output_path)

#         # Split the annotated data into training and development sets
#         train_docs, dev_docs = train_test_split(docs, test_size=0.2, random_state=42)
#         logger.info(f"Split annotated data into {len(train_docs)} training and {len(dev_docs)} development examples.")

#         # Create DocBin for train and dev, and add respective docs
#         train_bin = DocBin(attrs=["ENT_IOB", "ENT_TYPE"])
#         for doc in train_docs:
#             train_bin.add(doc)

#         dev_bin = DocBin(attrs=["ENT_IOB", "ENT_TYPE"])
#         for doc in dev_docs:
#             dev_bin.add(doc)

#         # Save the train and dev DocBins
#         train_bin.to_disk(self.config.spacy_train)
#         dev_bin.to_disk(self.config.spacy_dev)
    
#     def get_labels(self, data):
#         """
#         Extracts unique labels from the annotated data.

#         Args:
#             data (list): A list of spacy.Doc objects from training and development data.

#         Returns:
#             set: A set of unique labels.
#         """
#         labels = set()
#         for doc in data:
#             for ent in doc.ents:
#                 labels.add(ent.label_)
#         return labels

    
#     def train_ner_model(self):
#         """
#         Trains a custom NER model using the split and saved training and development datasets.
#         """
#         # Load the spaCy model
#         if self.config.gpu_allocator:
#             spacy.require_gpu()

#         nlp = spacy.blank("en")  # Create a blank English model
        
#         ## Check if the 'ner' component already exists in the pipeline
#         # If not, add it
#         if not nlp.has_pipe("ner"):
#             nlp.add_pipe("ner")

#         # Now, you can access the NER component and add labels, train, etc.
#         ner = nlp.get_pipe("ner")

#         # Load training and development data
#         train_bin = DocBin().from_disk(self.config.spacy_train)
#         dev_bin = DocBin().from_disk(self.config.spacy_dev)

#         train_data = list(train_bin.get_docs(nlp.vocab))
#         dev_data = list(dev_bin.get_docs(nlp.vocab))
        
#         # Add the NER pipe to the pipeline if it doesn't exist
#         # Create a new NER pipe if needed
#         if "ner" not in nlp.pipe_names:
#             ner = nlp.create_pipe("ner")
#             nlp.add_pipe(ner)
#         else:
#             ner = nlp.get_pipe("ner")

#         # Add labels to the NER pipe
#         for label in self.get_labels(train_data + dev_data):  # Implement get_labels to extract unique labels
#             ner.add_label(label)

#         # Get names of other pipes to disable them during training
#         other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
#         with nlp.disable_pipes(*other_pipes):  # Train only NER
#             optimizer = nlp.begin_training()
            
#             for itn in range(self.config.training["max_epochs"]):
#                 losses = {}
#                 batches = spacy.util.minibatch(train_data, size=self.config.training["batch_size"])
#                 for batch in batches:
#                     texts, annotations = zip(*batch)
#                     nlp.update(texts, annotations, sgd=optimizer, drop=self.config.training["dropout"], losses=losses)
#                 print("Losses", losses)  # Optionally, implement more sophisticated logging

#         # Save the trained model
#         nlp.to_disk(Path(self.config.root_dir) / "trained_model")
#         logger.info("Custom NER model trained and saved successfully.")


#     def run(self):
#         """
#         Executes the full pipeline for spaCy NER model training.
#         This includes processing annotations, splitting data, saving processed data,
#         and training the custom NER model.
#         """
#         logger.info("Starting the SpacyCustomNERModel pipeline.")
        
#         # Load annotations from the annotated data file
#         file_path = self.config.json_annotated_path
#         logger.info(f"Loading annotations from {file_path}.")
#         with open(file_path, 'r', encoding='utf-8') as file:
#             annotated_data = json.load(file)
        
#         # Process annotations to create spaCy Doc objects and save them in a DocBin
#         logger.info("Processing annotations.")
#         nlp = spacy.blank("en")  # Initialize a blank spaCy model for processing
#         doc_bin = self.process_annotations(annotated_data, nlp)
        
#         # Save processed annotations in .spacy format for later use
#         output_path = self.config.output_path
#         logger.info(f"Saving processed annotations to {output_path}.")
#         self.save_spacy_annotations(doc_bin, output_path)
        
#         # Optional: Read back the saved annotations for verification
#         logger.info("Reading back saved annotations for verification.")
#         self.read_spacy_annotations(output_path)
        
#         # Split the processed annotations into training and development data
#         logger.info("Splitting processed annotations into training and development datasets.")
#         self.split_and_save_data(annotated_data)  # Assumes split_and_save_data handles the split and saving
        
#         # Train the custom NER model using the split and saved training and development datasets
#         logger.info("Training the custom NER model.")
#         self.train_ner_model()
        
#         logger.info("SpacyCustomNERModel pipeline completed successfully.")




# Pipeline

In [62]:
from src.career_chief import logger
from src.career_chief.config.configuration import ConfigurationManager
# from src.career_chief.components.spacy_ner_custom_model import SpacyCustomNERModel

class SpacyCustomNERModelPipeline:
    """
    Orchestrates the pipeline for training a custom spaCy Named Entity Recognition (NER) model.

    This class manages the end-to-end process of configuring, training, and evaluating
    a custom spaCy NER model. It leverages the ConfigurationManager to fetch necessary
    configurations and uses the SpacyCustomNERModel component to execute the training process.
    """

    STAGE_NAME = "Custom NER spaCy Model Training Pipeline"

    def __init__(self):
        """
        Initializes the pipeline with the ConfigurationManager instance
        to access the necessary configurations for the NER model training.
        """
        self.config_manager = ConfigurationManager()
        logger.info(f"{self.STAGE_NAME} initialized successfully.")

    def run_spacy_custom_ner_model(self):
        """
        Executes the spaCy custom NER model training pipeline.

        This method orchestrates the process of loading configurations, processing annotations,
        training the model, and optionally evaluating its performance. It handles and logs
        any errors that occur during the pipeline execution.
        """
        try:
            logger.info(f"{self.STAGE_NAME}: Fetching model configuration.")
            spacy_custom_ner_model_config = self.config_manager.get_spacy_ner_config()

            logger.info(f"{self.STAGE_NAME}: Initializing the SpacyCustomNERModel component.")
            spacy_model_training = SpacyCustomNERModel(config=spacy_custom_ner_model_config)

            logger.info(f"{self.STAGE_NAME}: Executing the training pipeline.")
            spacy_model_training.run()

            logger.info(f"{self.STAGE_NAME}: Training pipeline executed successfully.")

        except Exception as e:
            logger.error(f"{self.STAGE_NAME}: Error occurred during model training - {str(e)}")
            raise e

if __name__ == '__main__':
    pipeline = SpacyCustomNERModelPipeline()
    pipeline.run_spacy_custom_ner_model()


[2024-04-02 17:23:23,004: 41: career_chief_logger: INFO: common:  yaml file: config/config.yaml loaded successfully]


[2024-04-02 17:23:23,013: 41: career_chief_logger: INFO: common:  yaml file: params.yaml loaded successfully]
[2024-04-02 17:23:23,019: 41: career_chief_logger: INFO: common:  yaml file: schema.yaml loaded successfully]
[2024-04-02 17:23:23,027: 64: career_chief_logger: INFO: common:  Created directory at: artifacts]
[2024-04-02 17:23:23,028: 22: career_chief_logger: INFO: 3306622033:  Custom NER spaCy Model Training Pipeline initialized successfully.]
[2024-04-02 17:23:23,029: 33: career_chief_logger: INFO: 3306622033:  Custom NER spaCy Model Training Pipeline: Fetching model configuration.]
[2024-04-02 17:23:23,031: 36: career_chief_logger: INFO: 3306622033:  Custom NER spaCy Model Training Pipeline: Initializing the SpacyCustomNERModel component.]
[2024-04-02 17:23:23,295: 39: career_chief_logger: INFO: 3306622033:  Custom NER spaCy Model Training Pipeline: Executing the training pipeline.]
[2024-04-02 17:23:23,513: 180: career_chief_logger: INFO: 570671794:  Blank spaCy model initi

[2024-04-02 17:23:24,348] [INFO] Created vocabulary


[2024-04-02 17:23:24,348: 140: spacy: INFO: initialize:  Created vocabulary]


[2024-04-02 17:23:24,351] [INFO] Finished initializing nlp object


[2024-04-02 17:23:24,351: 151: spacy: INFO: initialize:  Finished initializing nlp object]
Losses at iteration 0: {'ner': 19617.19465446472}
Losses at iteration 1: {'ner': 10884.20190668106}
Losses at iteration 2: {'ner': 6195.7482770085335}
Losses at iteration 3: {'ner': 3769.1879676878452}
Losses at iteration 4: {'ner': 3778.7230287976563}
Losses at iteration 5: {'ner': 3491.7997135845944}
Losses at iteration 6: {'ner': 3880.4862605035305}
Losses at iteration 7: {'ner': 3045.6036987900734}
Losses at iteration 8: {'ner': 3435.81584133883}
Losses at iteration 9: {'ner': 2881.633843127638}
Losses at iteration 10: {'ner': 3298.4148988546804}
Losses at iteration 11: {'ner': 3897.402091026772}
Losses at iteration 12: {'ner': 3091.5327505394816}
Losses at iteration 13: {'ner': 2934.867629684275}
Losses at iteration 14: {'ner': 3521.4135258719325}
Losses at iteration 15: {'ner': 2951.705285578966}
Losses at iteration 16: {'ner': 2135.759005185566}
Losses at iteration 17: {'ner': 2025.7460361