In [3]:
import spacy

In [4]:
nlp = spacy.load('en_core_web_sm')
text = "I'd like to schedule an appointment with John at 4pm on Sunday, the name is Kyle."
doc = nlp(text)

In [5]:
print([(t,t.pos_) for t in doc])
for ent in doc.ents:
    print(ent, ent.label_)
spacy.explain('ADP')

[(I, 'PRON'), ('d, 'VERB'), (like, 'VERB'), (to, 'PART'), (schedule, 'VERB'), (an, 'DET'), (appointment, 'NOUN'), (with, 'ADP'), (John, 'PROPN'), (at, 'ADP'), (4, 'NUM'), (pm, 'NOUN'), (on, 'ADP'), (Sunday, 'PROPN'), (,, 'PUNCT'), (the, 'DET'), (name, 'NOUN'), (is, 'AUX'), (Kyle, 'PROPN'), (., 'PUNCT')]
John PERSON
4pm TIME
Sunday DATE
Kyle WORK_OF_ART


'adposition'

In [6]:
from spacy.matcher import PhraseMatcher

employees = ['John', 'Abby', 'Carlos', 'Ryan']
matcher = PhraseMatcher(nlp.vocab)
patterns = list(nlp.pipe(employees))
matcher.add('Employee', None, *patterns)

matches = matcher(doc)
print([doc[start:end] for match_id, start, end in matches])

[John]


In [7]:
spacy.displacy.render(doc, style='dep', jupyter=True)
# spacy.explain("pobj")

In [129]:
def extract_name_relations(doc):
    # Merge entities and noun chunks into one token
    spans = list(doc.ents) + list(doc.noun_chunks)
    spans = spacy.util.filter_spans(spans)
    with doc.retokenize() as retokenizer:
        for span in spans:
            retokenizer.merge(span)
    relations = []
    for person in filter(lambda w: w.ent_type_ == "PERSON", doc):
        if person.dep_ in ("attr", "dobj"):
            subject = [w for w in person.head.lefts if w.dep_ == "nsubj"]
            if subject:
                subject = subject[0]
                relations.append((subject, person))
        elif person.dep_ == "pobj" and person.head.dep_ == "prep":
            relations.append((person.head.head, person))
    return relations

relations = extract_name_relations(doc)
for r1, r2 in relations:
    print(f'{r1.text}\t{r2.ent_type_}\t{r2.text}')

an appointment	PERSON	John
Abby	PERSON	Josh


In [121]:
for r1, r2 in relations:
    if r2.dep_ == 'pobj': employee = r2.text
    elif r2.dep_ == 'attr': name = r2.text

print(f'name: {name}\temployee: {employee}')

name: Ryan	employee: Josh


In [107]:
# more friendly way
client = None
employees = []
for person in filter(lambda x: x.ent_type_ == 'PERSON', doc):
    print(person,person.head.head in employees)
    if person.dep_ == 'pobj' or person.head in employees:
        print(1)
        employees.append(person)
    elif person.dep_ == 'attr' or person.head.head in employees:
        print(2)
        client = person
    print(person, person.dep_, person.head, person.head.head)
print(f'client: {client}\temployees: {employees}')

John False
1
John pobj with appointment
Carlos False
1
Carlos conj John with
Abby True
1
Abby conj Carlos John
Josh True
1
Josh pobj for Abby
client: None	employees: [John, Carlos, Abby, Josh]
