In [76]:
import json
import random
import logging
import spacy
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
from spacy.training import Example
from spacy.scorer import Scorer
from sklearn.metrics import accuracy_score

In [77]:
def convert_dataturks_to_spacy(dataturks_JSON_FilePath):
    try:
        training_data = []
        lines=[]
        with open(dataturks_JSON_FilePath, 'r') as f:
            lines = f.readlines()

        for line in lines:
            data = json.loads(line)
            text = data['content']
            entities = []
            for annotation in data['annotation']:
                #only a single point in text annotation.
                point = annotation['points'][0]
                labels = annotation['label']
                # handle both list of labels or a single label.
                if not isinstance(labels, list):
                    labels = [labels]

                for label in labels:
                    #dataturks indices are both inclusive [start, end] but spacy is not [start, end)
                    entities.append((point['start'], point['end'] + 1 ,label))


            training_data.append((text, {"entities" : entities}))

        return training_data
    except Exception as e:
        logging.exception("Unable to process " + dataturks_JSON_FilePath + "\n" + "error = " + str(e))
        return None


In [78]:
def clean_annotations(train_data):
    cleaned_data = []
    for text, annotation in train_data:
        entities = annotation['entities']
        cleaned_entities = []
        last_end = -1

        for start, end, label in sorted(entities, key=lambda x: x[0]):
            if start >= last_end:  # Ensure there is no overlap
                cleaned_entities.append((start, end, label))
                last_end = end
            else:
                # Optionally, handle overlaps by merging or adjusting entities
                print(f"Found overlapping entity: {(start, end, label)} in text: '{text}'")
                # Merge entities if desired
                # Example: cleaned_entities[-1] = (cleaned_entities[-1][0], max(cleaned_entities[-1][1], end), label)

        cleaned_data.append((text, {"entities": cleaned_entities}))
    return cleaned_data

In [79]:

def train_spacy():

    TRAIN_DATA = convert_dataturks_to_spacy("traindata.json")
    TRAIN_DATA = clean_annotations(TRAIN_DATA)
    nlp = spacy.blank('en')  # create blank Language class
    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'ner' not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe("ner", last=True)


    # add labels
    for _, annotations in TRAIN_DATA:
         for ent in annotations.get('entities'):
            ner.add_label(ent[2])

    # get names of other pipes to disable them during training
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NER
        optimizer = nlp.begin_training()
        for itn in range(1):
            print("Statring iteration " + str(itn))
            random.shuffle(TRAIN_DATA)
            losses = {}
            for text, annotations in TRAIN_DATA:
                doc = nlp.make_doc(text)
                example = Example.from_dict(doc, annotations)
                try:
                  nlp.update(
                      [example],  # batch of texts
                      drop=0.2,  # dropout - make it harder to memorise data
                      sgd=optimizer,  # callable to update weights
                      losses=losses)
                except Exception as error:
                    print(error)
                    continue
            print(losses)
    #test the model and evaluate it
    TEST_DATA = convert_dataturks_to_spacy("testdata.json")
    c=0
    for text, annot in TEST_DATA:
        with open("resume" + str(c) + ".txt", "w") as f:
            doc_to_test = nlp(text)
            d = {}
            for ent in doc_to_test.ents:
                d[ent.label_] = []
            for ent in doc_to_test.ents:
                d[ent.label_].append(ent.text)

            y_true = []
            y_pred = []
            for ent in doc_to_test.ents:
              try:
                doc_gold_text = nlp.make_doc(text)
                gold = Example.from_dict(doc_gold_text, annot)
                y_true.extend([x.ent_type_ if x.ent_type_ else "Not " + ent.label_ for x in gold.reference])
                y_pred.extend([x.ent_type_ if x.ent_type_ else "Not " + ent.label_ for x in doc_to_test])
              except Exception as e:
                pass
            try:
                print("\nFor Entity " + ent.label_ + ":\n")
                print("Classification Report:\n", classification_report(y_true, y_pred))
                p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
                a = accuracy_score(y_true, y_pred)
                print("Accuracy:", a)
                print("Precision:", p)
                print("Recall:", r)
                print("F-score:", f)
            except Exception as e:
                print(e)
                pass

            c += 1
    print("Accuracy:", a)
    print("Precision:", p)
    print("Recall:", r)
    print("F-score:", f)

In [85]:
train_spacy()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
• knowledge of SLP Multimedia Framework
• Ported Player (Audio and Video) module
• Ported Audio Out and Audio In modules using ALSA library
• ETMs Firmware Client: Wabtec Railway Electronics, German Town

Syam Prasad Devendla
Environment: Windows XP, IAR Workbench Hardware: IOC board (Provided by WRE)
Languages: C
• Serial - Ethernet Bridge
• Serial communication commands handling.
• Supt Link
Client: Schindler Elevator & Escalator Corporation
Environment: Windows Mobile 5.0, embedded VC, Visual Studio 2005
Languages: VC++, MFC
• A Business application. It makes easy for the superintendents to check the status of the
elevators and escalators which are newly installed or being maintained and rate them
according to their performance and report the same to the Sap server using mobility
solutions.
• FldLink
Client: Schindler Elevator & Escalator Corporation
Environment: Windows Mobile 5.0, embedded VC, Visual Studio 2005
Lang

Developer - Infosys Limited

Hyderabad..." with entities "[(0, 11, 'Name'), (12, 21, 'Designation'), (24, 40...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
Senior Technology Support Executive at In..." with entities "[(0, 8, 'Name'), (9, 44, 'Designation'), (47, 55, ...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
IT SUPPORT

Sulthan Bathery, Kerala, ..." with entities "[(0, 12, 'Name'), (13, 23, 'Designation'), (25, 40...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
"Store Executive" - Orange City Ho..." with entities "[(0, 15, 'Name'), (17, 32, 'Designation'), (36, 77...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to ch

[E024] Could not find an optimal move to supervise the parser. Usually, this means that the model can't be updated in a way that's valid and satisfies the correct annotations specified in the GoldParse. For example, are all labels added to the model? If you're training a named entity recognizer, also make sure that none of your annotated entity spans have leading or trailing whitespace or punctuation. You can also use the `debug data` command to validate your JSON-formatted training data. For details, run:
python -m spacy debug data --help


Hoshiarpur, Punjab - Email me on Ind..." with entities "[(0, 13, 'Name'), (14, 24, 'Location'), (55, 98, '...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
Kolkata, West Bengal - Email ..." with entities "[(0, 20, 'Name'), (21, 28, 'Location'), (63, 115, ...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
Kharadi, Pune, 411014, IN - Email ..." with entities "[(0, 15, 'Name'), (16, 23, 'Location'), (63, 111, ...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
Server Support Engineer

Gurgaon, Har..." with entities "[(0, 12, 'Name'), (13, 36, 'Designation'), (38, 45...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignm

[E024] Could not find an optimal move to supervise the parser. Usually, this means that the model can't be updated in a way that's valid and satisfies the correct annotations specified in the GoldParse. For example, are all labels added to the model? If you're training a named entity recognizer, also make sure that none of your annotated entity spans have leading or trailing whitespace or punctuation. You can also use the `debug data` command to validate your JSON-formatted training data. For details, run:
python -m spacy debug data --help


Tamil Nadu - Email me on Indeed..." with entities "[(0, 17, 'Name'), (19, 29, 'Location'), (52, 100, ...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
Principal Consultant at Oracle

Bengalu..." with entities "[(0, 10, 'Name'), (11, 31, 'Designation'), (35, 41...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
SAP ABAP Consultant

Hyderabad, Tel..." with entities "[(0, 14, 'Name'), (15, 23, 'Companies worked at'),...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
New Delhi, Delhi - Email me on Indeed:..." with entities "[(0, 11, 'Name'), (12, 21, 'Location'), (50, 93, '...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the

{'ner': 11767.96976098521}

For Entity Degree:

Classification Report:
                      precision    recall  f1-score   support

       College Name       0.00      0.00      0.00        44
Companies worked at       0.00      0.00      0.00         8
             Degree       0.00      0.00      0.00         0
        Designation       0.00      0.00      0.00        24
      Email Address       0.00      0.00      0.00         0
           Location       1.00      1.00      1.00         4
               Name       1.00      1.00      1.00         8
         Not Degree       0.94      0.49      0.65       245
  Not Email Address       0.94      0.49      0.65       245
       Not Location       0.94      0.49      0.65       245
           Not Name       0.94      0.49      0.65       245
             Skills       0.00      0.00      0.00       172

           accuracy                           0.40      1240
          macro avg       0.48      0.33      0.38      1240
       weig

Application Development Associate - A..." with entities "[(1295, 1622, 'Skills'), (993, 1154, 'Skills'), (9...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Active member of IIIT Committee in ..." with entities "[(1155, 1199, 'Email Address'), (743, 1141, 'Skill...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, mod


For Entity Email Address:

Classification Report:
                      precision    recall  f1-score   support

       College Name       0.00      0.00      0.00        14
Companies worked at       0.00      0.00      0.00         8
        Designation       0.00      0.00      0.00        12
      Email Address       1.00      0.33      0.50         6
           Location       0.00      0.00      0.00         6
               Name       1.00      0.67      0.80         6
  Not Email Address       0.96      1.00      0.98       732
           Not Name       0.96      1.00      0.98       732
             Skills       0.00      0.00      0.00        16

           accuracy                           0.96      1532
          macro avg       0.44      0.33      0.36      1532
       weighted avg       0.92      0.96      0.94      1532

Accuracy: 0.9595300261096605
Precision: 0.924620759746637
Recall: 0.9595300261096605
F-score: 0.9408895622483997

For Entity Degree:

Classification Rep

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
lecturer - oracle tutorials

Mumbai,..." with entities "[(2010, 2013, 'Degree'), (973, 1703, 'Skills'), (9...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.



For Entity Degree:

Classification Report:
                    precision    recall  f1-score   support

     College Name       0.00      0.00      0.00        16
           Degree       0.00      1.00      0.01         8
      Designation       0.00      0.00      0.00        20
    Email Address       0.00      0.00      0.00         0
         Location       0.00      0.00      0.00        12
             Name       1.00      1.00      1.00         8
       Not Degree       0.95      0.29      0.44      1246
Not Email Address       0.95      0.29      0.44       623
         Not Name       0.95      0.29      0.44       623

         accuracy                           0.29      2556
        macro avg       0.43      0.32      0.26      2556
     weighted avg       0.93      0.29      0.43      2556

Accuracy: 0.2863849765258216
Precision: 0.9314310805732742
Recall: 0.2863849765258216
F-score: 0.4335350159442598

For Entity Degree:

Classification Report:
                      preci

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Automation developer

- Email me on In..." with entities "[(2826, 2879, 'Skills'), (2768, 2810, 'Skills'), (...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(ave


For Entity Graduation Year:

max() arg is an empty sequence

For Entity Degree:

Classification Report:
                    precision    recall  f1-score   support

     College Name       0.00      0.00      0.00        15
           Degree       0.05      1.00      0.09        18
      Designation       0.00      0.00      0.00         9
    Email Address       0.00      0.00      0.00         0
         Location       0.00      0.00      0.00         6
             Name       1.00      1.00      1.00         6
       Not Degree       0.99      0.92      0.95       289
Not Email Address       0.99      0.92      0.95       289
         Not Name       0.99      0.92      0.95       289
           Skills       0.00      0.00      0.00       261

         accuracy                           0.70      1182
        macro avg       0.40      0.48      0.39      1182
     weighted avg       0.73      0.70      0.70      1182

Accuracy: 0.6954314720812182
Precision: 0.728467104516943
Recall:

QA Tester

Chennai, Tamil Nadu - Ema..." with entities "[(1970, 2083, 'Skills'), (1675, 1926, 'Skills'), (...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Transaction Processor - Oracle Ind..." with entities "[(1710, 1721, 'Skills'), (1692, 1707, 'Skills'), (...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, modif


For Entity Degree:

Classification Report:
                      precision    recall  f1-score   support

       College Name       0.00      0.00      0.00        12
Companies worked at       0.00      0.00      0.00         9
             Degree       0.00      0.00      0.00         0
        Designation       0.00      0.00      0.00        30
      Email Address       1.00      1.00      1.00         3
    Graduation Year       0.00      0.00      0.00         3
               Name       1.00      1.00      1.00         6
         Not Degree       0.98      0.96      0.97       760
  Not Email Address       0.98      0.96      0.97       760
           Not Name       0.98      0.96      0.97       760
             Skills       0.00      0.00      0.00       606

           accuracy                           0.75      2949
          macro avg       0.45      0.44      0.45      2949
       weighted avg       0.76      0.75      0.75      2949

Accuracy: 0.7466937945066124
Precisio

Senior Associate Consultant - Infosys ..." with entities "[(2394, 2479, 'Skills'), (2254, 2361, 'Skills'), (...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Team member - Oracle

Bangalore, Karnatak..." with entities "[(1439, 1592, 'Skills'), (1280, 1289, 'Location'),...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(avera


For Entity Email Address:

Classification Report:
                      precision    recall  f1-score   support

       College Name       0.00      0.00      0.00        15
Companies worked at       0.00      0.00      0.00        18
      Email Address       0.00      0.00      0.00         0
    Graduation Year       0.00      0.00      0.00         6
           Location       1.00      1.00      1.00         3
               Name       1.00      1.00      1.00         6
  Not Email Address       0.88      1.00      0.93       384
       Not Location       0.88      1.00      0.93       384
           Not Name       0.88      1.00      0.93       384
             Skills       0.00      0.00      0.00       123

           accuracy                           0.88      1323
          macro avg       0.46      0.50      0.48      1323
       weighted avg       0.77      0.88      0.82      1323

Accuracy: 0.8752834467120182
Precision: 0.7699528323915379
Recall: 0.8752834467120182
F-sco

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Senior Analyst - Cisco

New Delh..." with entities "[(1361, 1408, 'Email Address'), (1219, 1348, 'Skil...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.7171717171717171
Precision: 0.7846993888049606
Recall: 0.7171717171717171
F-score: 0.7429951903445007

For Entity Graduation Year:

Classification Report:
                      precision    recall  f1-score   support

Companies worked at       0.00      0.00      0.00        10
             Degree       0.06      1.00      0.12         5
      Email Address       1.00      1.00      1.00         5
    Graduation Year       0.50      0.25      0.33        20
           Location       0.00      0.00      0.00        10
               Name       1.00      1.00      1.00        10
         Not Degree       0.99      0.96      0.98       394
  Not Email Address       0.99      0.96      0.98       394
Not Graduation Year       0.99      0.96      0.98       788
           Not Name       0.99      0.96      0.98       394

           accuracy                           0.95      2030
          macro avg       0.65      0.71      0.64      2030
       weighted avg       0.97      0

Pune, Maharashtra - Email me on Indeed..." with entities "[(868, 876, 'Skills'), (813, 828, 'College Name'),...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Technology Analyst - Infosys Limit..." with entities "[(11663, 12212, 'Skills'), (11476, 11559, 'Skills'...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, mod


For Entity Degree:

Classification Report:
                    precision    recall  f1-score   support

     College Name       0.00      0.00      0.00        15
           Degree       0.03      1.00      0.05        15
      Designation       0.00      0.00      0.00        18
    Email Address       1.00      1.00      1.00         3
  Graduation Year       0.00      0.00      0.00         3
         Location       0.00      0.00      0.00        18
             Name       1.00      1.00      1.00         6
       Not Degree       0.99      0.97      0.98      2099
Not Email Address       0.99      0.97      0.98      2099
         Not Name       0.99      0.97      0.98      2099
           Skills       0.00      0.00      0.00       381

         accuracy                           0.91      6756
        macro avg       0.46      0.54      0.45      6756
     weighted avg       0.93      0.91      0.92      6756

Accuracy: 0.9116341030195382
Precision: 0.9284633549347237
Recall: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Deployed chef for configuration manag..." with entities "[(4611, 4621, 'Skills'), (4531, 4567, 'College Nam...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.



For Entity Degree:

Classification Report:
                      precision    recall  f1-score   support

       College Name       0.00      0.00      0.00        16
Companies worked at       0.00      0.00      0.00        20
             Degree       0.03      1.00      0.06         4
        Designation       0.00      0.00      0.00        88
      Email Address       1.00      1.00      1.00         4
           Location       0.00      0.00      0.00        16
               Name       1.00      1.00      1.00         8
         Not Degree       0.96      0.97      0.97       842
  Not Email Address       0.96      0.97      0.97       842
       Not Location       0.96      0.97      0.97       842
           Not Name       0.96      0.97      0.97       842
             Skills       0.00      0.00      0.00         8

           accuracy                           0.93      3532
          macro avg       0.49      0.57      0.49      3532
       weighted avg       0.92      0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



For Entity Graduation Year:

max() arg is an empty sequence

For Entity Graduation Year:

Classification Report:
                      precision    recall  f1-score   support

       College Name       0.00      0.00      0.00         6
      Email Address       0.00      0.00      0.00         0
    Graduation Year       1.00      1.00      1.00         3
           Location       0.00      0.00      0.00        18
               Name       1.00      1.00      1.00         6
  Not Email Address       0.96      1.00      0.98      1183
Not Graduation Year       0.96      1.00      0.98      1183
           Not Name       0.96      1.00      0.98      1183
             Skills       0.00      0.00      0.00       111

           accuracy                           0.96      3693
          macro avg       0.54      0.56      0.55      3693
       weighted avg       0.93      0.96      0.95      3693

Accuracy: 0.9634443541835905
Precision: 0.9289828043142482
Recall: 0.9634443541835905
F-s

BI / Big Data/ Azure

Hyderabad-Dec..." with entities "[(5893, 6043, 'Skills'), (5879, 5883, 'Graduation ...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9593826157595451
Precision: 0.9351834512981235
Recall: 0.9593826157595451
F-score: 0.9456206323750364

Accuracy: 0.9634443541835905
Precision: 0.9329879420039412
Recall: 0.9634443541835905
F-score: 0.9463293162034578

Accuracy: 0.9601949634443542
Precision: 0.9336098866708394
Recall: 0.9601949634443542
F-score: 0.9457312532094156

Accuracy: 0.9634443541835905
Precision: 0.9289828043142482
Recall: 0.9634443541835905
F-score: 0.9458989924866265