In [1]:
# Based on examples at: https://realpython.com/natural-language-processing-spacy-python/

In [2]:
# Do imports
import spacy

In [3]:
## Download the small core model; others are medium and large
#!python -m spacy download en_core_web_sm

In [4]:
# Load small model
nlp = spacy.load('en_core_web_sm')

In [5]:
# Prepare string
text = ("The authority did not permit giving of fishing permit.")
doc = nlp(text)

In [6]:
# Print tokens
print ([token.text for token in doc])

['The', 'authority', 'did', 'not', 'permit', 'giving', 'of', 'fishing', 'permit', '.']


In [7]:
# Print tokens and lemmas
print ([(token.text, token.lemma_) for token in doc])

[('The', 'the'), ('authority', 'authority'), ('did', 'do'), ('not', 'not'), ('permit', 'permit'), ('giving', 'give'), ('of', 'of'), ('fishing', 'fishing'), ('permit', 'permit'), ('.', '.')]


In [8]:
# Parts of speech
for token in doc:
    print (token, token.tag_, token.pos_, spacy.explain(token.tag_))

The DT DET determiner
authority NN NOUN noun, singular or mass
did VBD AUX verb, past tense
not RB PART adverb
permit VB VERB verb, base form
giving VBG VERB verb, gerund or present participle
of IN ADP conjunction, subordinating or preposition
fishing NN NOUN noun, singular or mass
permit NN NOUN noun, singular or mass
. . PUNCT punctuation mark, sentence closer


In [9]:
# Chunking to get nouns
for chunk in doc.noun_chunks:
    print (chunk)

The authority
fishing permit


In [10]:
# Show dependency graph
from spacy import displacy
displacy.render(doc, style='dep')

In [11]:
# For analyzing verbs, we need to install textacy
## Uncomment as necessary
#!pip install textacy

In [12]:
# Do import
import textacy

In [13]:
# Make internal obj
new_doc = textacy.make_spacy_doc(text,  lang='en_core_web_sm')

In [14]:
# Parse for verb specification
pattern = r'(<VERB>?<ADV>*<VERB>+)'
verb_phrases = textacy.extract.pos_regex_matches(new_doc, pattern)

In [15]:
for chunk in verb_phrases:
    print(chunk.text)

permit giving


  action="once",


## Another Example

In [16]:
## Same example as NLTK
text = ("I prefer a morning flight.")

In [17]:
doc = nlp(text)

In [18]:
# Print tokens and lemmas
print ([(token.text, token.lemma_) for token in doc])

[('I', '-PRON-'), ('prefer', 'prefer'), ('a', 'a'), ('morning', 'morning'), ('flight', 'flight'), ('.', '.')]


In [19]:
# Parts of speech
for token in doc:
    print (token, token.tag_, token.pos_, spacy.explain(token.tag_))

I PRP PRON pronoun, personal
prefer VBP VERB verb, non-3rd person singular present
a DT DET determiner
morning NN NOUN noun, singular or mass
flight NN NOUN noun, singular or mass
. . PUNCT punctuation mark, sentence closer


In [20]:
# Chunking to get nouns
for chunk in doc.noun_chunks:
    print (chunk)

I
a morning flight


In [21]:
# Show dependency graph
from spacy import displacy
displacy.render(doc, style='dep')