In [3]:
import spacy
from utils import *
from spacy import displacy

nlp = spacy.load('en')

document_string = """
The Waystone Inn lay in silence,
and it was a silence of three parts.
"""

document_string = ' '.join(document_string.split())

print('Working with string: "%s"' % document_string)
doc = nlp(document_string)

print('All the found noun chunks & some properties:')

rows = [['Chunk', '.root', 'root.dep_', '.root.head']]
for chunk in doc.noun_chunks:
    rows.append([
        chunk,            # A Span object with the full phrase.
        chunk.root,       # The key Token within this phrase.
        chunk.root.dep_,  # The grammatical role of this phrase.
        chunk.root.head   # The grammatical parent Token.
    ])
table([], rows)

# Chunk                .root       root.dep_     .root.head
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# The Waystone Inn     Inn         nsubj         lay
# silence              silence     pobj          in
# it                   it          nsubj         was
# a silence            silence     attr          was
# three parts          parts       pobj          of


# Find the head words of sentences.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

document_string = """
It's the questions we can't answer that teach us the most.
They teach us how to think.
"""

# Remove starting, ending, and duplicated whitespace characters.
document_string = ' '.join(document_string.split())

print('Working with string: "%s"' % document_string)
doc = nlp(document_string)

# For each sentence, spacy identifies a root of the dependency
# tree. You can think of this as the grammatically most
# meaningful word in the sentence.

print('Root word of each sentence:')
rows = [['Root', '|', 'Sentence']]
for sentence in doc.sents:
    rows.append([sentence.root, '|', sentence.text])
table([],rows)

# Root  | Sentence
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 's    | It's the questions we can't answer that teach us ...
# teach | They teach us how to think.


# Find all the dependent tokens of a given one.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# This means finding the words in a sentence being operated on
# by the given input word. Another perspective is to view words
# lower in the dependency tree (that is, being more dependent),
# as being less important to the overall sentence meaning.

print('Dependent words (aka subtree) of some tokens:')
rows = [['Token', '|', 'Subtree']]

# Print subtrees for 'teach' in 1st sentence, 'most', and then
# 'teach' in the 2nd sentence (which are tokens 9, 12, and 15).
for token in [doc[9], doc[12], doc[15]]:
    subtree = [
        ('((%s))' if t is token else '%s') % t.text
        for t in token.subtree
    ]
    rows.append([token.text, '|', ' '.join(subtree)])
table([],rows)

# Token | Subtree
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# teach | that ((teach)) us the most
# most  | the ((most))
# teach | They ((teach)) us how to think .

Working with string: "The Waystone Inn lay in silence, and it was a silence of three parts."
All the found noun chunks & some properties:


0,1,2,3
Chunk,.root,root.dep_,.root.head
The Waystone Inn,Inn,nsubj,lay
silence,silence,pobj,in
it,it,nsubj,was
a silence,silence,attr,was
three parts,parts,pobj,of


Working with string: "It's the questions we can't answer that teach us the most. They teach us how to think."
Root word of each sentence:


0,1,2
Root,|,Sentence
's,|,It's the questions we can't answer that teach us the most.
teach,|,They teach us how to think.


Dependent words (aka subtree) of some tokens:


0,1,2
Token,|,Subtree
teach,|,that ((teach)) us the most
most,|,the ((most))
teach,|,They ((teach)) us how to think .


In [4]:
displacy.render(doc, style='dep', jupyter=True)