In [7]:
import spacy

import pandas as pd
from pandas import DataFrame

Load a model: (make sure to download the model ahead: https://spacy.io/usage/models)

In [2]:
nlp = spacy.load('en_core_web_sm')

In [14]:
doc = nlp(u'John didn\'t see Kate running with a telescope.')

In [6]:
for token in doc:
    print(token.text, token.pos_, token.dep_)

John PROPN nsubj
did VERB aux
n't ADV neg
see VERB ROOT
Kate PROPN nsubj
running VERB ccomp
with ADP prep
a DET det
telescope NOUN pobj
. PUNCT punct


In [29]:
df_pos = DataFrame({'text': [token.text for token in doc],
               'pos': [token.pos_ for token in doc],
               'dep': [token.dep_ for token in doc]})

df_pos

Unnamed: 0,dep,pos,text
0,nsubj,PROPN,John
1,aux,VERB,did
2,neg,ADV,n't
3,ROOT,VERB,see
4,nsubj,PROPN,Kate
5,ccomp,VERB,running
6,prep,ADP,with
7,det,DET,a
8,pobj,NOUN,telescope
9,punct,PUNCT,.


In [8]:
from spacy import displacy  # dependency visualizer

In [19]:
displacy.render(doc, style="dep", jupyter = True, options={'distance':100})
# when running in spyder use displacy.serve(doc, style="dep")
# see this post: https://stackoverflow.com/questions/48452071/jupyterlab-output-doesnt-show-visualization

another example:

In [17]:
doc1 = nlp(u'Our company is training workers.')
displacy.render(doc1, style='dep', jupyter=True, options={'distance':140})

#### Name Entity Recognition

In [31]:
nlp_ner = spacy.load('en_core_web_sm')
doc_ner = nlp_ner(u'John didn\'t see Kate running with a telescope.')
displacy.render(doc_ner, style='ent', jupyter=True, options={'distance':140})

In [33]:
df_ner = DataFrame({'text': [token.text for token in doc_ner.ents],
                   'start_char': [token.start_char for token in doc_ner.ents],
                   'end_char': [token.end_char for token in doc_ner.ents],
                   'label': [token.label_ for token in doc_ner.ents]})

df_ner

Unnamed: 0,end_char,label,start_char,text
0,4,PERSON,0,John
1,20,PERSON,16,Kate


#### Word vectors and similarity

one can load the customized vector weights: https://spacy.io/usage/vectors-similarity

In [39]:
nlp_sim = spacy.load("en_core_web_lg")
tokens = nlp_sim(u'John saw Kate running with telescope')

for token1 in tokens:
    for token2 in tokens:
        print(token1.text, token2.text, token1.similarity(token2))

John John 1.0
John saw 0.27469003
John Kate 0.56349665
John running 0.12022378
John with 0.11883789
John telescope 0.09269081
saw John 0.27469003
saw saw 1.0
saw Kate 0.14281112
saw running 0.3419491
saw with 0.3443244
saw telescope 0.14218588
Kate John 0.56349665
Kate saw 0.14281112
Kate Kate 1.0
Kate running -0.00088570913
Kate with 0.04857191
Kate telescope 0.03155928
running John 0.12022378
running saw 0.3419491
running Kate -0.00088570913
running running 1.0
running with 0.3845435
running telescope 0.07198203
with John 0.11883789
with saw 0.3443244
with Kate 0.04857191
with running 0.3845435
with with 1.0
with telescope 0.10859962
telescope John 0.09269081
telescope saw 0.14218588
telescope Kate 0.03155928
telescope running 0.07198203
telescope with 0.10859962
telescope telescope 1.0
