In [14]:
# Based on examples at: https://realpython.com/natural-language-processing-spacy-python/

In [15]:
# Do imports
import spacy
nlp = spacy.load('en_core_web_sm')

In [16]:
# Prepare string
text = ("The authority did not permit giving of fishing permit.")
doc = nlp(text)

In [17]:
# Print tokens
print ([token.text for token in doc])

['The', 'authority', 'did', 'not', 'permit', 'giving', 'of', 'fishing', 'permit', '.']


In [18]:
# Print tokens and lemmas
print ([(token.text, token.lemma_) for token in doc])

[('The', 'the'), ('authority', 'authority'), ('did', 'do'), ('not', 'not'), ('permit', 'permit'), ('giving', 'give'), ('of', 'of'), ('fishing', 'fishing'), ('permit', 'permit'), ('.', '.')]


In [19]:
# Parts of speech
for token in doc:
    print (token, token.tag_, token.pos_, spacy.explain(token.tag_))

The DT DET determiner
authority NN NOUN noun, singular or mass
did VBD AUX verb, past tense
not RB PART adverb
permit VB VERB verb, base form
giving VBG VERB verb, gerund or present participle
of IN ADP conjunction, subordinating or preposition
fishing NN NOUN noun, singular or mass
permit NN NOUN noun, singular or mass
. . PUNCT punctuation mark, sentence closer


In [21]:
# Chunking to get nouns
for chunk in doc.noun_chunks:
    print (chunk)

The authority
fishing permit


In [22]:
# Show dependency graph
from spacy import displacy
displacy.render(doc, style='dep')

In [24]:
# For analyzing verbs, we need to install textacy
!pip install textacy

Collecting textacy
  Downloading textacy-0.10.1-py3-none-any.whl (183 kB)
[K     |████████████████████████████████| 183 kB 1.0 MB/s eta 0:00:01
[?25hCollecting jellyfish>=0.7.0
  Downloading jellyfish-0.8.2.tar.gz (134 kB)
[K     |████████████████████████████████| 134 kB 11.8 MB/s eta 0:00:01
Collecting pyemd>=0.5.0
  Downloading pyemd-0.5.1.tar.gz (91 kB)
[K     |████████████████████████████████| 91 kB 11.1 MB/s eta 0:00:01
[?25hCollecting pyphen>=0.9.4
  Downloading Pyphen-0.9.5-py2.py3-none-any.whl (3.0 MB)
[K     |████████████████████████████████| 3.0 MB 13.2 MB/s eta 0:00:01
Building wheels for collected packages: jellyfish, pyemd
  Building wheel for jellyfish (setup.py) ... [?25ldone
[?25h  Created wheel for jellyfish: filename=jellyfish-0.8.2-cp37-cp37m-macosx_10_9_x86_64.whl size=23910 sha256=fe630c1d36c3ea2fc0d6e73fba9c3b9de5382e673c3152496177cfc2ddc7f720
  Stored in directory: /Users/biplavs/Library/Caches/pip/wheels/12/9e/c1/46af1db49a60a4adf555196c0ead132e4a36a2bd6

In [25]:
# Do import
import textacy

In [26]:
# Make internal obj
new_doc = textacy.make_spacy_doc(text,  lang='en_core_web_sm')

In [31]:
# Parse for verb specification
pattern = r'(<VERB>?<ADV>*<VERB>+)'
verb_phrases = textacy.extract.pos_regex_matches(new_doc, pattern)

In [32]:
for chunk in verb_phrases:
    print(chunk.text)

permit giving


  action="once",
