In [1]:
%load_ext autoreload
%autoreload 2
    
# generic
import os
import sys    
module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
# custom
from train import *

In [2]:
# Load pre-existing spacy model
import spacy
nlp=spacy.load('en_core_web_sm')

# Getting the pipeline component
ner=nlp.get_pipe("ner")

In [3]:
train_dir = "../traindata_edited.json"
test_dir = "../testdata.json"

In [4]:
training_data = []
lines = []

with open(train_dir, "rb") as f:
    lines = f.readlines()

for line in lines:
    data = json.loads(line)
    text = data["content"]
    entities = []
    for annotation in data["annotation"]:
        # only a single point in text annotation.
        point = annotation["points"][0]
        labels = annotation["label"]
        # handle both list of labels or a single label.
        if not isinstance(labels, list):
            labels = [labels]

        for label in labels:
            # dataturks indices are both inclusive [start, end] but spacy is not [start, end)
            entities.append((point["start"], point["end"] + 1, label))

    training_data.append((text, {"entities": entities}))

In [5]:
#https://github.com/explosion/spaCy/issues/3558
import re


def trim_entity_spans(data: list) -> list:
    """Removes leading and trailing white spaces from entity spans.

    Args:
        data (list): The data to be cleaned in spaCy JSON format.

    Returns:
        list: The cleaned data.
    """
    invalid_span_tokens = re.compile(r'\s')

    cleaned_data = []
    for text, annotations in data:
        entities = annotations['entities']
        valid_entities = []
        for start, end, label in entities:
            valid_start = start
            valid_end = end
            while valid_start < len(text) and invalid_span_tokens.match(
                    text[valid_start]):
                valid_start += 1
            while valid_end > 1 and invalid_span_tokens.match(
                    text[valid_end - 1]):
                valid_end -= 1
            valid_entities.append([valid_start, valid_end, label])
        cleaned_data.append([text, {'entities': valid_entities}])

    return cleaned_data

In [6]:
training_data = trim_entity_spans(training_data)

In [7]:
# Adding labels to the `ner`

for _, annotations in training_data:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

In [8]:
# Disable pipeline components you dont need to change
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]

In [9]:
# Import requirements
import random
from spacy.util import minibatch, compounding
from pathlib import Path

# TRAINING THE MODEL
with nlp.disable_pipes(*unaffected_pipes):

  # Training for 30 iterations
  for iteration in range(30):
    print("iteration:", iteration)
    # shuufling examples  before every iteration
    random.shuffle(training_data)
    losses = {}
    # batch up the examples using spaCy's minibatch
    batches = minibatch(training_data, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        texts, annotations = zip(*batch)
        #print(annotations)
        nlp.update(
                    texts,  # batch of texts
                    annotations,  # batch of annotations
                    drop=0,  # dropout - make it harder to memorise data
                    losses=losses,
                )
        print("Losses", losses)

Escalation Specialist - HiPower Support ..." with entities "[[2182, 2210, 'College Name'], [2177, 2180, 'Degre...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
HR Executive

Bengaluru, Karnataka..." with entities "[[1818, 1892, 'Skills'], [1761, 1765, 'Graduation ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Senior Corporate Account Executive ..." with entities "[[2635, 2639, 'Graduation Year'], [2568, 2590, 'Co...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Kottayam, Kerala - Email me on Indeed..." with entities "[[2165, 2214, 'Skills'], [2145, 2155, 'Degree'],

Losses {'ner': 456.84234046936035}


Quality Analyst - ThoughtWorks Technolo..." with entities "[[2301, 2380, 'Skills'], [2088, 2131, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Voice and Accent Trainer :Masters i..." with entities "[[1130, 1174, 'Email Address'], [1112, 1116, 'Grad...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
java developer

Pune, Maharashtra..." with entities "[[1894, 2173, 'Skills'], [1726, 1850, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 1153.6221137046814}


Bengaluru, Karnataka - Email me on ..." with entities "[[3517, 3878, 'Skills'], [3387, 3481, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Arabic Language supporter (Content Analyst..." with entities "[[2349, 2471, 'Skills'], [2331, 2340, 'Degree'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 2095.128128528595}


Architecture SharePoint/Office 365..." with entities "[[1171, 1573, 'Skills'], [962, 1095, 'Skills'], [9...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Application Developer - SAP ABAP

Kol..." with entities "[[3255, 3264, 'Skills'], [3246, 3254, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 2371.3693981319666}


Cluster HR Manager - Velammal New

Chennai,..." with entities "[[3760, 4638, 'Skills'], [3727, 3742, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Sr. Test Manager

Mumbai, Maharashtra ..." with entities "[[11201, 11408, 'Skills'], [11172, 11191, 'College...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 3242.9847793728113}


Senior Technical Lead - HCL Cisco

-..." with entities "[[6646, 7279, 'Skills'], [6451, 6554, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Orrisha - Email me on Indeed: indeed...." with entities "[[265, 307, 'Email Address'], [210, 251, 'Skills']...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Test Engineer - Infosys Limited

- Em..." with entities "[[3132, 3611, 'Skills'], [3005, 3082, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 3920.31965829432}


Technology Consultant - EIT Services ..." with entities "[[6732, 6848, 'Skills'], [6703, 6705, 'Graduation ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
6+ Exp in banking operations and cre..." with entities "[[8800, 8927, 'Skills'], [8760, 8788, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Technical Project Manager

Hyderabad,..." with entities "[[4901, 4909, 'Location'], [4843, 4861, 'College N...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 4315.14881016314}


SAP UI5 Lead, Native HANA Developer -..." with entities "[[5053, 5058, 'Companies worked at'], [5013, 5018,...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 4841.294818177819}


Bidar, Karnataka - Email me on Indee..." with entities "[[1341, 1384, 'Email Address'], [1131, 1136, 'Loca...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 5376.564513459802}


Microsoft SQL-SERVER

Dhule, Mahara..." with entities "[[1482, 1487, 'Location'], [1369, 1413, 'Email Add...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
SAURABH SANDHIKAR

Hyderabad, Te..." with entities "[[2562, 2597, 'Skills'], [2366, 2375, 'Companies w...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 6688.892485871911}
Losses {'ner': 7564.701426759362}


Devops

Hyderabad, Telangana - Email..." with entities "[[2957, 3073, 'Skills'], [2943, 2947, 'Graduation ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Bengaluru, Karnataka - Email me on In..." with entities "[[3823, 3978, 'Skills'], [3466, 3819, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Gokul, Uttar Pradesh - Email me on Indeed..." with entities "[[970, 1002, 'Skills'], [863, 868, 'Location'], [6...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 8108.563119187951}


Kharadi, Pune, 411014, IN - Email ..." with entities "[[1943, 2050, 'Skills'], [478, 488, 'Designation']...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Software Development Engineer wit..." with entities "[[4167, 4170, 'Skills'], [4141, 4145, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Software Development Director - Ora..." with entities "[[4960, 4964, 'Graduation Year'], [4938, 4958, 'Co...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 8475.717280998826}


IT SUPPORT

Sulthan Bathery, Kerala, ..." with entities "[[3913, 4370, 'Skills'], [3884, 3880, 'Graduation ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
CES ASSOCIATE CONSULTANT

Bangalore, Kar..." with entities "[[2669, 2944, 'Skills'], [2618, 2638, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 8674.262307852507}


7 years of experience in IT Netw..." with entities "[[5471, 5838, 'Skills'], [5457, 5461, 'Graduation ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 9152.562431305647}


Tech Support Executive - Teleperformance..." with entities "[[872, 911, 'Email Address'], [800, 858, 'Skills']...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Senior Technology Support Executive at In..." with entities "[[3660, 3663, 'Graduation Year'], [3600, 3602, 'Gr...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 9404.407358974218}


Infosys group as a Test Analyst - In..." with entities "[[2348, 3131, 'Skills'], [2324, 2328, 'Graduation ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Server Support Engineer

Gurgaon, Har..." with entities "[[2326, 2333, 'Location'], [1821, 2095, 'Skills'],...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 9955.960100024939}


Working as Escalation Engineer with M..." with entities "[[4016, 4025, 'Companies worked at'], [3941, 3950,...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Hyderabad, Telangana - Email me on Indeed..." with entities "[[4542, 4549, 'Skills'], [4178, 4187, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 10712.683423370123}


Senior Process Executive

Jaipur, Raja..." with entities "[[3144, 3495, 'Skills'], [3081, 3104, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Technical Lead at Infosys Ltd. - P..." with entities "[[2814, 2839, 'Degree'], [2773, 2812, 'Degree'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Bengaluru, Karnataka - Email me on In..." with entities "[[948, 1180, 'Skills'], [833, 845, 'College Name']...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 10935.160366863012}


Program Manager, Product Manager, Prod..." with entities "[[1750, 1759, 'Companies worked at'], [1467, 1476,...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
PeopleSoft Consultant

Bangalore Urban, K..." with entities "[[3511, 3749, 'Skills'], [3313, 3364, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 11322.10897526145}
Losses {'ner': 11564.82072646916}


A Dev-Test Professional with ..." with entities "[[3321, 3376, 'Skills'], [3296, 3311, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Shivpuri, Madhya Pradesh - Email me o..." with entities "[[870, 893, 'Skills'], [794, 810, 'College Name'],...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Principal Consultant at Oracle

Bengalu..." with entities "[[3345, 3896, 'Skills'], [3276, 3313, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 11846.12884195149}


LEAD ENGINEER - CISCO

- Email me on In..." with entities "[[2707, 2711, 'Skills'], [2683, 2693, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 12341.834150865674}


SDET Automation Engineer, Infosys - CR..." with entities "[[3909, 3931, 'College Name'], [3883, 3907, 'Degre...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Senior Process Executive - STAR Ind..." with entities "[[2877, 3031, 'Skills'], [2728, 2742, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 12516.828362300992}


Quantitative Analyst

- Email me o..." with entities "[[773, 847, 'Skills'], [736, 740, 'Graduation Year...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
SQL and MSBI Developer with experience ..." with entities "[[3056, 3090, 'Skills'], [3042, 3046, 'Graduation ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 12830.611467197537}


Test Engineer

Mangalore, Karnataka - E..." with entities "[[2110, 2404, 'Skills'], [2055, 2064, 'Location'],...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Trainer-Finacle-Core Banking Sol..." with entities "[[5840, 5847, 'Companies worked at'], [2090, 2137,...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Oceanic Consultants as a HR Executiv..." with entities "[[5670, 5780, 'Skills'], [462, 481, 'Designation']...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 13114.72632856667}


(Microsoft Partner Readiness Operati..." with entities "[[3345, 3380, 'College Name'], [3322, 3343, 'Degre...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Major Incident Manager / Escalati..." with entities "[[7924, 8039, 'Skills'], [7872, 7891, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 13608.832463935018}


Tripura - Email me on Indeed..." with entities "[[277, 328, 'Email Address'], [257, 263, 'College ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 13931.022941544652}


head of operation and logistics

Tri..." with entities "[[774, 897, 'Skills'], [694, 728, 'Skills'], [648,...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Data Analyst Intern - Oracle Retai..." with entities "[[2247, 2573, 'Skills'], [1435, 1480, 'Email Addre...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 14260.432606652379}


- Email me on Indeed: indeed.com/r/Sha..." with entities "[[2421, 2450, 'College Name'], [2416, 2419, 'Degre...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Operations Center Shift Manager (OCS..." with entities "[[3054, 3363, 'Skills'], [2333, 2339, 'Companies w...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Senior System Engineer - Infosys Limited..." with entities "[[2292, 2296, 'Graduation Year'], [2245, 2269, 'Co...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Jaipur, Rajasthan - Email me on Indeed: ..." with entities "[[194, 511, 'Skills'], [174, 183, 'Degre

Losses {'ner': 14454.490203772672}


Pune, Maharashtra - Email me on Indee..." with entities "[[1860, 1871, 'Name'], [1502, 1600, 'Skills'], [12...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Secunderabad, Andhra Pradesh - Email me..." with entities "[[4729, 4733, 'Graduation Year'], [2634, 2638, 'Gr...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Hoshiarpur, Punjab - Email me on Ind..." with entities "[[1234, 1277, 'Email Address'], [1146, 1150, 'Grad...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 14546.133324743249}


IT SUPPORT

Sulthan Bathery, Kerala, ..." with entities "[[4167, 4176, 'Companies worked at'], [3913, 4040,...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Associate Software Engineer

Bengalur..." with entities "[[990, 1007, 'Skills'], [952, 968, 'Skills'], [919...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Microsoft Certified System Engineer

Del..." with entities "[[523, 562, 'Email Address'], [500, 509, 'Name'], ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 14893.111695648171}


Pune, Maharashtra - Email me on Ind..." with entities "[[3878, 3937, 'Degree'], [3111, 3846, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 15011.112698980607}


Actively looking for opportunity in ...." with entities "[[2889, 3087, 'Skills'], [2734, 2846, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Principal Engineer Technical Staff ..." with entities "[[2656, 2688, 'Skills'], [2586, 2626, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 15307.647357412614}


Senior Executive (MIS & Audit) - Job ..." with entities "[[8139, 8163, 'College Name'], [8134, 8136, 'Degre...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Mumbai, Maharashtra - Email me on ..." with entities "[[1551, 1852, 'Skills'], [1454, 1499, 'Email Addre...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
3 TCS Security guard Access Control..." with entities "[[971, 1015, 'Email Address'], [877, 895, 'College...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 15615.263697334565}


Senior Travel Operations (Domestic,..." with entities "[[4232, 4330, 'Skills'], [4058, 4101, 'Degree'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Tamil Nadu - Email me on Indeed..." with entities "[[363, 411, 'Email Address'], [314, 349, 'Skills']...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 15916.22224802617}


"Store Executive" - Orange City Ho..." with entities "[[6994, 7350, 'Skills'], [6936, 6973, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
New Delhi, Delhi - Email me on Indee..." with entities "[[937, 980, 'Email Address'], [580, 923, 'Skills']...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 16450.696176477708}


"Infosys" internship

Bengaluru, Karnat..." with entities "[[998, 1038, 'Email Address'], [611, 984, 'Skills'...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 16819.80001587514}


Pune, Maharashtra - Email me on Ind..." with entities "[[7675, 7692, 'Years of Experience'], [7133, 7595,...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Assisting Microsoft Partners - Exchang..." with entities "[[1308, 1349, 'Email Address'], [1277, 1281, 'Loca...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 17172.308536955155}


Azure Developer with 9 Yrs 8 months ..." with entities "[[2547, 2756, 'Skills'], [2537, 2546, 'Companies w...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 17542.753467031755}


Network Ops Associate - Accenture

Bengal..." with entities "[[1844, 1873, 'Skills'], [1794, 1830, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Mathura, Uttar Pradesh - Email me o..." with entities "[[975, 1020, 'Skills'], [814, 843, 'College Name']...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 17882.734192320146}
Losses {'ner': 18076.531017609872}


Program Manager (Software Delivery..." with entities "[[4459, 4959, 'Skills'], [4445, 4450, 'Designation...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 18270.408949800767}
Losses {'ner': 18533.5707182372}
Losses {'ner': 18896.89236111287}


Patna, Bihar - Email me on Indeed: ..." with entities "[[283, 327, 'Email Address'], [257, 262, 'Location...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Engineer / Electrical Supervisor, S..." with entities "[[2912, 3288, 'Skills'], [2721, 2726, 'UNKNOWN'], ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Software Engineer

Bangalore City, Kar..." with entities "[[6930, 7494, 'Skills'], [6845, 6874, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 19357.450734087266}


- Email me on Indeed: indeed.com/r/Sai-Dh..." with entities "[[2956, 3016, 'Skills'], [2567, 2952, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Associate consultant@SAP labs ..." with entities "[[3052, 3067, 'Skills'], [2993, 3016, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Senior Associate Consultant - Infosys..." with entities "[[3982, 4412, 'Skills'], [3162, 3674, 'Skills'], [...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)


Losses {'ner': 19878.24602789525}


- Email me on Indeed: indeed.com/r/Ava..." with entities "[[368, 409, 'Email Address'], [314, 334, 'College ...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Sr. Systems Engineer at Infosys Li..." with entities "[[3828, 3931, 'Skills'], [3753, 3794, 'College Nam...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Bangalore, Karnataka - Email me on Indee..." with entities "[[2473, 2498, 'Designation'], [2030, 2037, 'Compan...". Use `spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)` to check the alignment. Misaligned entities ('-') will be ignored during training.
  gold = GoldParse(doc, **gold)
Banglore, Karnataka, Karnataka - ..." with entities "[[4563, 4746, 'Skills'], [4538, 4551, 'College Na

Losses {'ner': 20462.06021256093}
Losses {'ner': 20513.49948931951}
Losses {'ner': 279.7355834245682}
Losses {'ner': 699.4176083803177}
Losses {'ner': 1094.9760357141495}
Losses {'ner': 1575.5158644914627}
Losses {'ner': 2016.8031123876572}
Losses {'ner': 2398.37982070446}
Losses {'ner': 2653.4381049871445}
Losses {'ner': 3011.599970936775}
Losses {'ner': 3379.6184417009354}
Losses {'ner': 3668.4030777215958}
Losses {'ner': 4118.679510951042}
Losses {'ner': 4378.796913027763}
Losses {'ner': 4522.77527564764}
Losses {'ner': 4618.606311999261}
Losses {'ner': 5023.214799128473}
Losses {'ner': 5131.871185772121}
Losses {'ner': 5354.804783336818}
Losses {'ner': 5461.498540110886}
Losses {'ner': 5656.581805296242}
Losses {'ner': 5813.544078834355}
Losses {'ner': 5993.234286375344}
Losses {'ner': 6485.795117922127}
Losses {'ner': 6824.060422964394}
Losses {'ner': 7023.1359016522765}
Losses {'ner': 7329.298180781305}
Losses {'ner': 7631.995694838464}
Losses {'ner': 7832.428013049066}
Losses {'

Losses {'ner': 8077.739962925902}
Losses {'ner': 8122.018875683076}
Losses {'ner': 8321.667310083634}
Losses {'ner': 9225.092439974076}
Losses {'ner': 9494.490276659257}
Losses {'ner': 9586.558628036059}
Losses {'ner': 9837.123132421053}
Losses {'ner': 9930.654087944306}
Losses {'ner': 129.26541508734226}
Losses {'ner': 214.40478047635406}
Losses {'ner': 269.54634621646255}
Losses {'ner': 500.77259208168834}
Losses {'ner': 661.2954846462235}
Losses {'ner': 718.4055888867006}
Losses {'ner': 769.9333646288142}
Losses {'ner': 1045.1681394567713}
Losses {'ner': 1183.6280086776242}
Losses {'ner': 1215.6449912637472}
Losses {'ner': 1964.75409732759}
Losses {'ner': 2045.4950434453785}
Losses {'ner': 2178.9220493920147}
Losses {'ner': 2293.566395390779}
Losses {'ner': 2443.1004560627043}
Losses {'ner': 2537.33314171806}
Losses {'ner': 2631.663867284544}
Losses {'ner': 2996.5021584527567}
Losses {'ner': 3295.8497031228617}
Losses {'ner': 3541.291219760664}
Losses {'ner': 3616.288329434581}
Loss

Losses {'ner': 4401.486725571565}
Losses {'ner': 4431.891744773835}
Losses {'ner': 4484.419576154352}
Losses {'ner': 4513.476686221984}
Losses {'ner': 4611.649115422246}
Losses {'ner': 4623.826384892847}
Losses {'ner': 4689.910790447306}
Losses {'ner': 4783.257966775607}
Losses {'ner': 4873.943890171286}
Losses {'ner': 4895.665591694051}
Losses {'ner': 5290.876419521504}
Losses {'ner': 5397.815369453849}
Losses {'ner': 5453.379230570616}
Losses {'ner': 5496.700405281605}
Losses {'ner': 64.75462181027979}
Losses {'ner': 149.54699601512402}
Losses {'ner': 240.02370393788442}
Losses {'ner': 288.30226641000627}
Losses {'ner': 376.27327372172294}
Losses {'ner': 571.2228331718725}
Losses {'ner': 583.9804660779555}
Losses {'ner': 599.508841201583}
Losses {'ner': 691.7210893816446}
Losses {'ner': 953.1781209892724}
Losses {'ner': 1024.9567183641848}
Losses {'ner': 1058.7183942398187}
Losses {'ner': 1130.7075618406889}
Losses {'ner': 1287.1159884533045}
Losses {'ner': 1401.0420628389475}
Losses

Losses {'ner': 1734.4096857251643}
Losses {'ner': 1766.8048464060485}
Losses {'ner': 1783.0288767372913}
Losses {'ner': 1835.3079670142106}
Losses {'ner': 1853.14058333281}
Losses {'ner': 1992.3915126884312}
Losses {'ner': 2090.757616918483}
Losses {'ner': 2118.445992279281}
Losses {'ner': 2190.4703407361594}
Losses {'ner': 2302.2450715973464}
Losses {'ner': 2630.9858264401046}
Losses {'ner': 2643.5923727218833}
Losses {'ner': 2682.628000949866}
Losses {'ner': 2741.531925225114}
Losses {'ner': 2855.2218458889474}
Losses {'ner': 2891.4872801615347}
Losses {'ner': 2914.492428602134}
Losses {'ner': 3112.1918845786204}
Losses {'ner': 3192.5710935531606}
Losses {'ner': 3219.574226815668}
Losses {'ner': 3871.8125357222234}
Losses {'ner': 58.995794117450714}
Losses {'ner': 197.02937650680542}
Losses {'ner': 277.6856497526169}
Losses {'ner': 352.7914265394211}
Losses {'ner': 579.572402998805}
Losses {'ner': 635.4611562635255}
Losses {'ner': 695.2846913333487}
Losses {'ner': 736.2817981462431}


Losses {'ner': 1030.8421912180406}
Losses {'ner': 1061.3399774628838}
Losses {'ner': 1108.744993583407}
Losses {'ner': 1144.5274259979983}
Losses {'ner': 1211.2018122757931}
Losses {'ner': 1267.410672349963}
Losses {'ner': 1305.1628047382605}
Losses {'ner': 1360.2906173491997}
Losses {'ner': 1402.3988162657392}
Losses {'ner': 1789.867910780586}
Losses {'ner': 1833.313757625982}
Losses {'ner': 2053.792288956806}
Losses {'ner': 2090.9015203338263}
Losses {'ner': 2138.7576638531086}
Losses {'ner': 2197.8041049648223}
Losses {'ner': 2264.5481497306523}
Losses {'ner': 2265.518696327018}
Losses {'ner': 2287.696293342259}
Losses {'ner': 2372.4176305953833}
Losses {'ner': 2434.2258311987343}
Losses {'ner': 2501.0859709114256}
Losses {'ner': 2549.392973816716}
Losses {'ner': 2634.524625206129}
Losses {'ner': 2667.425382282425}
Losses {'ner': 2746.428381483842}
Losses {'ner': 2770.4109791021765}
Losses {'ner': 2774.0192255181464}
Losses {'ner': 71.50711771845818}
Losses {'ner': 232.5706272870302

Losses {'ner': 524.1117698215603}
Losses {'ner': 545.8020323777528}
Losses {'ner': 555.5468800754825}
Losses {'ner': 562.6229755260356}
Losses {'ner': 572.9187376514819}
Losses {'ner': 579.4199469954804}
Losses {'ner': 609.9247601073332}
Losses {'ner': 724.8895360225908}
Losses {'ner': 760.9754187459223}
Losses {'ner': 786.3792386757307}
Losses {'ner': 798.1147057506353}
Losses {'ner': 865.1860550287277}
Losses {'ner': 868.9861141780864}
Losses {'ner': 927.8721973442014}
Losses {'ner': 978.081132500666}
Losses {'ner': 1052.0114867090313}
Losses {'ner': 1060.1591998107622}
Losses {'ner': 1084.6759436011205}
Losses {'ner': 1124.92580416723}
Losses {'ner': 1427.8801586918423}
Losses {'ner': 1431.2044792089443}
Losses {'ner': 1485.6269306156616}
Losses {'ner': 1550.4060557156633}
Losses {'ner': 1664.4911132573914}
Losses {'ner': 1670.676343975507}
Losses {'ner': 1688.258573410951}
Losses {'ner': 1706.1652820766378}
Losses {'ner': 1739.0876724263128}
Losses {'ner': 1819.3494117897992}
Losse

In [20]:
to_predict = training_data[0][0]
to_predict

"Sai Patha\nMule ESB Integration Developer - Cisco Systems\n\nHyderabad, Telangana - Email me on Indeed: indeed.com/r/Sai-Patha/981ba615ab108e29\n\n• 6+ years of professional experience in end-to-end designing, developing and implementation\nof\nsoftware solutions in the areas of Middleware Integration and J2EE based applications.\n• Expertise in the areas of Core Java, Servlet 2.3, JSP, Web Services, MESB, and OSB.\n• Expertise in PL SQL programming and Oracle Apps (Oracle Order management)\n• Having 2.5+ years of experience in Mule and expert in Mule ESB development (3.7v & 3.8v),\nMule\nESB administration and Mule API management (API GW 1.x, 2.x, 3.x) CloudHub.\n• Experience in building Mule ESB & API management platform for organizations\n• Experience in performance tuning, testing, and benchmarking the platform for the\norganization.\n• Expert in building middleware systems using Message Routing, Content Enrichment, Cache\nMechanism, Message Filtering, Message Transformation, Mess

In [18]:
to_predict2 = "Maxine Curry\
Dayjob.com\
The Big Peg\
120 Vyse Street\
Birmingham B18 6NF\
England\
T: 0044 121 638 0026\
E: info@dayjob.com\
PERSONAL SUMMARY\
Maxine can ensure that IT Infrastructure is secure, reliable, fit for purpose and evolves with a businesses needs. You can rely on her to create flexible, secure and first-class systems which will save you time, money and allow you to grow your business. In the past she has built her own complete IT function from scratch. She enjoys working with her peers to identify and advise on new cutting-edge solutions that can improve their productivity. As a true professional she will ensure that all relevant risks, especially with regards to information infrastructure are documented and managed appropriately. On a personal level she is comfortable making decisions and taking responsibility at a senior level. Right now, she is looking to join a young, proactive, driven and above all fun organisation that has big plans for the future.\
CAREER HISTORY\
IT MANAGER – Start Date – Present\
Employers name – Location\
Responsible for helping the company to remain IT compliant, efficient and profitable during the course of its operations.\
Duties;\
Ensuring that the company’s IT systems are readily available for use by staff.\
Making sure key staff are appropriately trained in the usage of all IT products and services in order to effectively carry out their jobs.\
Identifying opportunities to improve efficiencies using technology.\
Designing IT training programs and workshops for staff.\
Leading, motivating and managing a small Infrastructure and Service Desk team.\
Managing the businesses websites and domains.\
Being the point of contact for operational IT matters when senior managers or directors are unavailable.\
Providing colleagues with step-by-step technical help in both verbal and written format.\
In charge of IT budgeting, controlling costs and keeping the department on track financially.\
Maintaining a log and list of all required repairs, upgrades and maintenance.\
JOB TITLE – Start Date – End Date\
Employers name – Location\
KEY SKILLS AND COMPETENCIES\
Professional\
Experience of working with MSPs and around Service Integration.\
Knowledge and understanding of Database, network and communication protocols.\
Successful track record in relationship management with key stakeholders.\
Good ability to inform stakeholders and manage their expectations.\
Relevant project management experience particularly in delivering small and large scale IT projects.\
Personal\
Excellent verbal and written communication and facilitation skills.\
Willing to travel at short notice to sites across the country.\
AREAS OF EXPERTISE\
Goal setting\
IT Management\
Staff motivation\
IT security"

In [19]:
# Testing the model
doc = nlp(to_predict)
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

Entities [('Sai Patha', 'Name'), ('Mule ESB Integration Developer', 'Designation'), ('Cisco Systems', 'Companies worked at'), ('Hyderabad', 'Location'), ('indeed.com/r/Sai-Patha/981ba615ab108e29', 'Email Address'), ('Mule ESB Integration Developer', 'Designation'), ('Cisco Systems', 'Companies worked at'), ('Cisco Systems', 'Companies worked at'), ('Cisco Systems', 'Companies worked at'), ('Cisco Systems', 'Companies worked at'), ('Software Engineer', 'Designation'), ('Cisco Systems', 'Companies worked at'), ('Software Engineer', 'Designation'), ('Cisco Systems', 'Companies worked at'), ('Software Engineer', 'Designation'), ('Arrow Electronics Inc', 'Companies worked at'), ('2011', 'Graduation Year'), ('Bachelor of Technology in Technology', 'Degree'), ('Amrita School of Engineering', 'College Name'), ('2011', 'Graduation Year'), ('ORACLE (3 years), JAVA (3 years), SOAP (2 years), Subversion (2 years), SVN (2 years)', 'Skills'), ('Languages Java, Java Script, PL/SQL, SQL SERVER, HTML, 

In [None]:
# Save the  model to directory
output_dir = Path('/content/')
nlp.to_disk(output_dir)
print("Saved model to", output_dir)

# Load the saved model and predict
print("Loading from", output_dir)
nlp_updated = spacy.load(output_dir)
doc = nlp_updated("Fridge can be ordered in FlipKart" )
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])