# Learning NLP Spacy

In [50]:
# import
import spacy

In [51]:
## python -m spacy download en_core_web_sm
nlp = spacy.load('en_core_web_sm')

In [52]:
doc = nlp("I want a pinot gris that costs around $25 and have reviews greater than 94")

In [53]:
[t for t in doc]

[I,
 want,
 a,
 pinot,
 gris,
 that,
 costs,
 around,
 $,
 25,
 and,
 have,
 reviews,
 greater,
 than,
 94]

In [54]:
for token in doc:
    print(token.text, token.pos_, token.dep_, token.head.text)

I PRON nsubj want
want VERB ROOT want
a DET det gris
pinot ADJ compound gris
gris NOUN dobj want
that DET nsubj costs
costs VERB relcl gris
around ADP quantmod 25
$ SYM quantmod 25
25 NUM dobj costs
and CCONJ cc costs
have VERB conj costs
reviews NOUN dobj have
greater ADJ amod 94
than SCONJ quantmod 94
94 NUM punct want


In [55]:
for chunk in doc.noun_chunks:
    print(f'{chunk.text} | {chunk.root.text} | {chunk.root.dep_} | {chunk.root.head.text}')

I | I | nsubj | want
a pinot gris | gris | dobj | want
reviews | reviews | dobj | have


In [56]:
for entity in doc.ents:
    print(entity.text, entity.label_)

around $25 MONEY


In [57]:
## gramatical dependency graph of the document
from spacy import displacy

displacy.render(doc)

## Semanthic Similarity

In [58]:
nlp = spacy.load("en_core_web_md") ## loading larger package
doc1 = nlp("wine")
doc2 = nlp("a pinot gris wine")

# Similarity of two documents
print(doc1, "<->", doc2, doc1.similarity(doc2))

wine <-> a pinot gris wine 0.8935815525767866


In [1]:
pip freeze

backcall==0.2.0
blis==0.7.4
catalogue==2.0.1
certifi==2020.12.5
chardet==4.0.0
click==7.1.2
colorama==0.4.4
cymem==2.0.5
decorator==4.4.2
en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.0.0/en_core_web_md-3.0.0-py3-none-any.whl
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl
idna==2.10
ipykernel==5.5.0
ipython==7.21.0
ipython-genutils==0.2.0
jedi==0.18.0
Jinja2==2.11.3
jupyter-client==6.1.12
jupyter-core==4.7.1
MarkupSafe==1.1.1
murmurhash==1.0.5
numpy==1.20.1
packaging==20.9
pandas==1.2.3
parso==0.8.1
pathy==0.4.0
pickleshare==0.7.5
preshed==3.0.5
prompt-toolkit==3.0.18
pydantic==1.7.3
Pygments==2.8.1
pyparsing==2.4.7
python-dateutil==2.8.1
pytz==2021.1
pywin32==300
pyzmq==22.0.3
requests==2.25.1
six==1.15.0
smart-open==3.0.0
spacy==3.0.5
spacy-legacy==3.0.1
srsly==2.4.0
thinc==8.0.2
tornado==6.1
tqdm==4.59.0
traitlets==5.0.5
typer==0.3.2
urllib3==1.26.