In [15]:
import os
import json
import spacy
from tqdm.notebook import tqdm
from spacy.scorer import Scorer
from spacy.training import Example
import pandas as pd

In [3]:
gpu = spacy.prefer_gpu()
print(gpu)

True


In [4]:
nlp = spacy.load('../training-output/v2/model-best')

In [5]:
with open('../label-studio-files/v1/output.json') as f:
    labeled_data = json.load(f)

In [6]:
training_labeled_data = []
for data in labeled_data:
    text = data["data"]["text"]
    entities = []
    for annotation in data["annotations"][0]["result"]:
        start = annotation["value"]["start"]
        end = annotation["value"]["end"]
        label = annotation["value"]["labels"][0]
        for ent in entities:
            # check if the current entity is subspan of the annotated entity
            if (start >= ent[0] and end <= ent[1]) or (start <= ent[0] and end >= ent[1]):
                break
        else:
            entities.append((start, end, label))
    training_labeled_data.append((text, {"entities": entities}))

In [7]:
scorer = Scorer(nlp)

In [8]:
job_descriptions = []
for text, entities in training_labeled_data:
    predicted = nlp(text)
    example = Example.from_dict(predicted, entities)
    job_descriptions.append(example)
scorer.score(job_descriptions)

At Cloudflare, we have our eyes set on an..." with entities "[(381, 389, 'TECH_SKILL'), (1407, 1412, 'TECH_SKIL...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
In order ..." with entities "[(327, 343, 'DOMAIN'), (569, 586, 'ROLE'), (731, 7...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
Data Engineers develop modern data architecture a..." with entities "[(1005, 1016, 'SOFT_SKILL'), (1242, 1256, 'TECH_SK...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
We are creating the future of..." with entities "[(3397, 3408, 'OTHER'), (3663, 3667, 'PROG_LANG'),...". Use `spacy.training.offsets_to_biluo_tags(nlp.make_doc(text), entities)` to check the alignment. Mis

{'token_acc': 1.0,
 'token_p': 1.0,
 'token_r': 1.0,
 'token_f': 1.0,
 'ents_p': 0.6464367816091954,
 'ents_r': 0.5103448275862069,
 'ents_f': 0.5703853955375254,
 'ents_per_type': {'ROLE': {'p': 0.68,
   'r': 0.5396825396825397,
   'f': 0.6017699115044247},
  'SOFT_SKILL': {'p': 0.6253521126760564,
   'r': 0.7375415282392026,
   'f': 0.6768292682926829},
  'TECH_SKILL': {'p': 0.6204595997034841,
   'r': 0.5113011606597434,
   'f': 0.5606162089752178},
  'DEGREE': {'p': 0.4732824427480916, 'r': 0.5299145299145299, 'f': 0.5},
  'PROG_LANG': {'p': 0.9041666666666667,
   'r': 0.8509803921568627,
   'f': 0.8767676767676768},
  'REQ': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'RESP': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'OTHER': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'DOMAIN': {'p': 0.0, 'r': 0.0, 'f': 0.0}}}

In [16]:
from itertools import groupby


def doc_to_spans(doc):
    tokens = [(tok.text, tok.idx, tok.ent_type_) for tok in doc]
    results = []
    entities = set()
    for entity, group in groupby(tokens, key=lambda t: t[-1]):
        if not entity:
            continue
        group = list(group)
        _, start, _ = group[0]
        word, last, _ = group[-1]
        text = ' '.join(item[0] for item in group)
        end = last + len(word)
        results.append({
            'from_name': 'label',
            'to_name': 'text',
            'type': 'labels',
            'value': {
                'start': start,
                'end': end,
                'text': text,
                'labels': [entity]
            }
        })
        entities.add(entity)

    return results, entities

In [17]:
entities = set()
tasks = []
for data in labeled_data:
    text = data["data"]["text"]
    doc = nlp(text)
    spans, ents = doc_to_spans(doc)
    entities |= ents

    tasks.append({
        'data': {'text': text},
        'predictions': [{'model_version': 'v2', 'result': spans}],
    })

In [18]:
# Save Label Studio tasks.json
print(f'Save {len(tasks)} tasks to "tasks.json"')
with open('../label-studio-files/v2/tasks.json', mode='w') as f:
    json.dump(tasks, f, indent=2)
    
# Save class labels as a txt file
print('Named entities are saved to "named_entities.txt"')
with open('../label-studio-files/v2/named_entities.txt', mode='w') as f:
    f.write('\n'.join(sorted(entities)))

Save 121 tasks to "tasks.json"
Named entities are saved to "named_entities.txt"


## Model Comparison - SpaCy vs. Custom Model

In [9]:
spacy_nlp = spacy.load('en_core_web_md')
custom_nlp = spacy.load('../training-output/v2/model-best')

In [18]:
performance = []
spacy_scores = Scorer(spacy_nlp)
custom_scores = Scorer(custom_nlp)

In [11]:
spacy_examples = []
custom_examples = []
for text, entities in training_labeled_data:
    spacy_doc = spacy_nlp(text)
    custom_doc = custom_nlp(text)
    spacy_example = Example.from_dict(spacy_doc, entities)
    custom_example = Example.from_dict(custom_doc, entities)
    spacy_examples.append(spacy_example)
    custom_examples.append(custom_example)

In [12]:
spacy_scores.score(spacy_examples)

{'token_acc': 1.0,
 'token_p': 1.0,
 'token_r': 1.0,
 'token_f': 1.0,
 'tag_acc': None,
 'sents_p': None,
 'sents_r': None,
 'sents_f': None,
 'dep_uas': None,
 'dep_las': None,
 'dep_las_per_type': None,
 'pos_acc': None,
 'morph_acc': None,
 'morph_micro_p': None,
 'morph_micro_r': None,
 'morph_micro_f': None,
 'morph_per_feat': None,
 'lemma_acc': None,
 'ents_p': 0.0,
 'ents_r': 0.0,
 'ents_f': 0.0,
 'ents_per_type': {'PRODUCT': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'DATE': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'ORG': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'REQ': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'TECH_SKILL': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'SOFT_SKILL': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'DEGREE': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'ROLE': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'RESP': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'OTHER': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'PROG_LANG': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'PERSON': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'GPE': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'ORDI

In [13]:
custom_scores.score(custom_examples)

{'token_acc': 1.0,
 'token_p': 1.0,
 'token_r': 1.0,
 'token_f': 1.0,
 'ents_p': 0.6464367816091954,
 'ents_r': 0.5103448275862069,
 'ents_f': 0.5703853955375254,
 'ents_per_type': {'ROLE': {'p': 0.68,
   'r': 0.5396825396825397,
   'f': 0.6017699115044247},
  'SOFT_SKILL': {'p': 0.6253521126760564,
   'r': 0.7375415282392026,
   'f': 0.6768292682926829},
  'TECH_SKILL': {'p': 0.6204595997034841,
   'r': 0.5113011606597434,
   'f': 0.5606162089752178},
  'DEGREE': {'p': 0.4732824427480916, 'r': 0.5299145299145299, 'f': 0.5},
  'PROG_LANG': {'p': 0.9041666666666667,
   'r': 0.8509803921568627,
   'f': 0.8767676767676768},
  'REQ': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'RESP': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'OTHER': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'DOMAIN': {'p': 0.0, 'r': 0.0, 'f': 0.0}}}

In [25]:
metrics_score = custom_scores.score(custom_examples)
metrics_score

{'token_acc': 1.0,
 'token_p': 1.0,
 'token_r': 1.0,
 'token_f': 1.0,
 'ents_p': 0.6464367816091954,
 'ents_r': 0.5103448275862069,
 'ents_f': 0.5703853955375254,
 'ents_per_type': {'ROLE': {'p': 0.68,
   'r': 0.5396825396825397,
   'f': 0.6017699115044247},
  'SOFT_SKILL': {'p': 0.6253521126760564,
   'r': 0.7375415282392026,
   'f': 0.6768292682926829},
  'TECH_SKILL': {'p': 0.6204595997034841,
   'r': 0.5113011606597434,
   'f': 0.5606162089752178},
  'DEGREE': {'p': 0.4732824427480916, 'r': 0.5299145299145299, 'f': 0.5},
  'PROG_LANG': {'p': 0.9041666666666667,
   'r': 0.8509803921568627,
   'f': 0.8767676767676768},
  'REQ': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'RESP': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'OTHER': {'p': 0.0, 'r': 0.0, 'f': 0.0},
  'DOMAIN': {'p': 0.0, 'r': 0.0, 'f': 0.0}}}

In [26]:

scores = []
for ent in metrics_score['ents_per_type']:
    scores.append([ent, metrics_score['ents_per_type'][ent]['p'], metrics_score['ents_per_type'][ent]['r'], metrics_score['ents_per_type'][ent]['f']])
    
df = pd.DataFrame(scores, columns=['Entity', 'Precision', 'Recall', 'F-score'])
df

Unnamed: 0,Entity,Precision,Recall,F-score
0,ROLE,0.68,0.539683,0.60177
1,SOFT_SKILL,0.625352,0.737542,0.676829
2,TECH_SKILL,0.62046,0.511301,0.560616
3,DEGREE,0.473282,0.529915,0.5
4,PROG_LANG,0.904167,0.85098,0.876768
5,REQ,0.0,0.0,0.0
6,RESP,0.0,0.0,0.0
7,OTHER,0.0,0.0,0.0
8,DOMAIN,0.0,0.0,0.0


In [14]:
from spacy import displacy
spacy_doc = spacy_nlp(training_labeled_data[0][0])
custom_doc = custom_nlp(training_labeled_data[0][0])
displacy.render(spacy_doc, style='ent', jupyter=True)
displacy.render(custom_doc, style='ent', jupyter=True)

### References
1. https://labelstud.io/blog/evaluating-named-entity-recognition-parsers-with-spacy-and-label-studio/