# Installing Spacy

In [None]:
pip install spacy --upgrade

In [None]:
!python -m spacy info

In [3]:
import spacy
from spacy.tokens import DocBin
from tqdm import tqdm

nlp = spacy.blank("en")
db = DocBin()

# Opening and Converting Training Set JSON to Docbin

In [6]:
import json

f = open("training_set.json")
TRAINING_DATA = json.load(f)

In [None]:
for text, annotation in tqdm(TRAINING_DATA['annotations']):
  doc = nlp.make_doc(text)
  entities = []
  for start, end, label in annotation["entities"]:
    span = doc.char_span(start, end, label=label, alignment_mode="contract")
    if span is None:
      print("Skipping")
    else:
      entities.append(span)

  doc.ents = entities
  db.add(doc)

db.to_disk("./training_data.spacy")


# Opening and Converting Validation Set JSON to Docbin

In [9]:
import json

f = open("validation_set.json")
VALIDATION_DATA = json.load(f)

In [None]:
for text, annotation in tqdm(VALIDATION_DATA['annotations']):
  doc = nlp.make_doc(text)
  entities = []
  for start, end, label in annotation["entities"]:
    span = doc.char_span(start, end, label=label, alignment_mode="contract")
    if span is None:
      print("Skipping")
    else:
      entities.append(span)

  doc.ents = entities
  db.add(doc)

db.to_disk("./validation_data.spacy")

# Setting Up Spacy Configuration

In [None]:
!python -m spacy init fill-config base_config.cfg config.cfg

In [None]:
!python -m spacy download en_core_web_lg

# Training Model

In [None]:
!python -m spacy train config.cfg --output ./ --paths.train ./training_data.spacy --paths.dev ./validation_data.spacy

# Testing Model

In [15]:
nlp_ner = spacy.load("model-best")

In [75]:
doc = nlp_ner("Microsoft job interview this monday from 1:30pm to 2:00pm")

In [None]:
spacy.displacy.render(doc, style="ent", jupyter=True)