In [18]:
import spacy
from spacy import displacy
from pathlib import Path

In [2]:
nlp = spacy.load("en_core_web_md")

In [3]:
doc = nlp("I ran quickly to the store")
[token for token in doc]

[I, ran, quickly, to, the, store]

In [4]:
[token.pos_ for token in doc]

['PRON', 'VERB', 'ADV', 'ADP', 'DET', 'NOUN']

In [5]:
doc = nlp("I read a wonderful book. I booked a flight to Mexico.")
[sent.text for sent in doc.sents]

['I read a wonderful book.', 'I booked a flight to Mexico.']

In [6]:
[chunk.text for chunk in doc.noun_chunks]

['I', 'a wonderful book', 'I', 'a flight', 'Mexico']

In [7]:
doc = nlp("Steve Jobs founded Apple Computer")
[(ent.text, ent.label_) for ent in doc.ents]

[('Steve Jobs', 'PERSON'), ('Apple Computer', 'ORG')]

In [8]:
displacy.render(doc, style="ent")

In [33]:
doc = nlp("I ran with running shoes")
options = {"compact": False, "bg": "#000000",
           "color": "white", "font": "Helvetica"}
svg = displacy.render(doc, style="dep", options=options, jupyter=True)

In [34]:
svg = displacy.render(doc, style="dep", options=options, jupyter=False)
output_path = Path("dep_plt.svg")
output_path.open("w", encoding="utf-8").write(svg)

3856

In [10]:
print(spacy.explain("nsubj"))
print(spacy.explain("prep"))
print(spacy.explain("pobj"))
print(spacy.explain("amod"))

nominal subject
prepositional modifier
object of preposition
adjectival modifier


In [11]:
doc = nlp("I ran with running shoes")
[(token.text, token.lemma_, token.pos_) for token in doc]

[('I', 'I', 'PRON'),
 ('ran', 'run', 'VERB'),
 ('with', 'with', 'ADP'),
 ('running', 'running', 'NOUN'),
 ('shoes', 'shoe', 'NOUN')]

In [12]:
doc1 = nlp("I like cats")
doc2 = nlp("I like dogs")
# Compare 2 documents
doc1.similarity(doc2)

0.957709143352323

In [13]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [14]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x127b53c40>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x127b53760>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x127d650b0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x127d10340>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x127d93680>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x127d65200>)]

In [15]:
doc = nlp("I like cats")
doc.sentiment

0.0

In [16]:
doc = nlp("I hate dogs")
doc.sentiment

0.0