In [92]:
import nltk
import re

In [93]:
s1 = "Holmes sat."
s2 = "Holmes lit a pipe."
s3 = "We arrived the day before Thursday."
s4 = "Holmes sat in the red armchair and he chuckled."
s5 = "My companion smiled an enigmatical smile. "
s6 = "Holmes chuckled to himself."
s7 = "She never said a word until we were at the door here."
s8 = "Holmes sat down and lit his pipe."
s9 = "Holmes sat in the little red armchair."
s10 = "Holmes sat in the the armchair."
sentences = [s1, s2, s3, s4, s5, s6, s7, s8, s9, s10]
sentence = s7

In [94]:
def preprocess(sentence):
    """
    Convert `sentence` to a list of its words.
    Pre-process sentence by converting all characters to lowercase
    and removing any word that does not contain at least one alphabetic
    character.
    """
    tokens = nltk.word_tokenize(sentence)
    s = [w.lower() for w in tokens if re.search('[a-zA-Z]', w) is not None]
    return s

In [95]:
s = preprocess(sentence)
s

['she',
 'never',
 'said',
 'a',
 'word',
 'until',
 'we',
 'were',
 'at',
 'the',
 'door',
 'here']

In [96]:
TERMINALS = """
Adj -> "country" | "dreadful" | "enigmatical" | "little" | "moist" | "red"
Adv -> "down" | "here" | "never"
Conj -> "and" | "until"
Det -> "a" | "an" | "his" | "my" | "the"
N -> "armchair" | "companion" | "day" | "door" | "hand" | "he" | "himself"
N -> "holmes" | "home" | "i" | "mess" | "paint" | "palm" | "pipe" | "she"
N -> "smile" | "thursday" | "walk" | "we" | "word"
P -> "at" | "before" | "in" | "of" | "on" | "to"
V -> "arrived" | "came" | "chuckled" | "had" | "lit" | "said" | "sat"
V -> "smiled" | "tell" | "were"
"""

NONTERMINALS = """
S -> NP VP | S Conj S
NP -> Nom | Det Nom | Det Nom PP | NP Adv | NP Conj NP
VP -> V | V NP | V PP | Adv VP | V Adv | VP Conj VP
PP -> P NP
Nom -> N | Adj Nom
"""

grammar = nltk.CFG.fromstring(NONTERMINALS + TERMINALS)
parser = nltk.ChartParser(grammar)

In [97]:
# Attempt to parse sentence
try:
    trees = list(parser.parse(s))
except ValueError as e:
    print(e)
if not trees:
    print("Could not parse sentence.")

In [98]:
list(parser.parse(s))

[Tree('S', [Tree('S', [Tree('NP', [Tree('NP', [Tree('Nom', [Tree('N', ['she'])])]), Tree('Adv', ['never'])]), Tree('VP', [Tree('V', ['said']), Tree('NP', [Tree('Det', ['a']), Tree('Nom', [Tree('N', ['word'])])])])]), Tree('Conj', ['until']), Tree('S', [Tree('NP', [Tree('Nom', [Tree('N', ['we'])])]), Tree('VP', [Tree('V', ['were']), Tree('PP', [Tree('P', ['at']), Tree('NP', [Tree('NP', [Tree('Det', ['the']), Tree('Nom', [Tree('N', ['door'])])]), Tree('Adv', ['here'])])])])])]),
 Tree('S', [Tree('S', [Tree('NP', [Tree('Nom', [Tree('N', ['she'])])]), Tree('VP', [Tree('Adv', ['never']), Tree('VP', [Tree('V', ['said']), Tree('NP', [Tree('Det', ['a']), Tree('Nom', [Tree('N', ['word'])])])])])]), Tree('Conj', ['until']), Tree('S', [Tree('NP', [Tree('Nom', [Tree('N', ['we'])])]), Tree('VP', [Tree('V', ['were']), Tree('PP', [Tree('P', ['at']), Tree('NP', [Tree('NP', [Tree('Det', ['the']), Tree('Nom', [Tree('N', ['door'])])]), Tree('Adv', ['here'])])])])])])]

In [99]:
for tree in trees:
    print(list(tree))
    print('\n\n')

[Tree('S', [Tree('NP', [Tree('NP', [Tree('Nom', [Tree('N', ['she'])])]), Tree('Adv', ['never'])]), Tree('VP', [Tree('V', ['said']), Tree('NP', [Tree('Det', ['a']), Tree('Nom', [Tree('N', ['word'])])])])]), Tree('Conj', ['until']), Tree('S', [Tree('NP', [Tree('Nom', [Tree('N', ['we'])])]), Tree('VP', [Tree('V', ['were']), Tree('PP', [Tree('P', ['at']), Tree('NP', [Tree('NP', [Tree('Det', ['the']), Tree('Nom', [Tree('N', ['door'])])]), Tree('Adv', ['here'])])])])])]



[Tree('S', [Tree('NP', [Tree('Nom', [Tree('N', ['she'])])]), Tree('VP', [Tree('Adv', ['never']), Tree('VP', [Tree('V', ['said']), Tree('NP', [Tree('Det', ['a']), Tree('Nom', [Tree('N', ['word'])])])])])]), Tree('Conj', ['until']), Tree('S', [Tree('NP', [Tree('Nom', [Tree('N', ['we'])])]), Tree('VP', [Tree('V', ['were']), Tree('PP', [Tree('P', ['at']), Tree('NP', [Tree('NP', [Tree('Det', ['the']), Tree('Nom', [Tree('N', ['door'])])]), Tree('Adv', ['here'])])])])])]





In [100]:
for tree in trees:
    tree.pretty_print()
    print('\n\n')

                                  S                                 
                __________________|_____________                     
               |                  |             S                   
               |                  |     ________|___                 
               |                  |    |            VP              
               |                  |    |    ________|___             
               S                  |    |   |            PP          
      _________|____              |    |   |     _______|___         
     NP             VP            |    |   |    |           NP      
  ___|____      ____|___          |    |   |    |        ___|____    
 NP       |    |        NP        |    NP  |    |       NP       |  
 |        |    |     ___|___      |    |   |    |    ___|___     |   
Nom       |    |    |      Nom    |   Nom  |    |   |      Nom   |  
 |        |    |    |       |     |    |   |    |   |       |    |   
 N       Adv   V   Det     

In [101]:
tree_x = trees[0]
for x in tree_x.subtrees(lambda t: t.height() == 3):
    print(f'x:\n {x}')
    print(f'x.label():\n {x.label()}')
    print(f'x.height():\n {x.height()}') 
    print(f'x.leaves():\n {x.leaves()}') 
    print('\n\n')

x:
 (Nom (N she))
x.label():
 Nom
x.height():
 3
x.leaves():
 ['she']



x:
 (Nom (N word))
x.label():
 Nom
x.height():
 3
x.leaves():
 ['word']



x:
 (Nom (N we))
x.label():
 Nom
x.height():
 3
x.leaves():
 ['we']



x:
 (Nom (N door))
x.label():
 Nom
x.height():
 3
x.leaves():
 ['door']





In [102]:
tree_x = trees[0]
for s in tree_x.subtrees(lambda t: t.label() == 'NP'):
    print(f's:\n {s}')
    y = [x.label() for x in s.subtrees()]
    print(y)
    print(y.count('NP'))
    print(s.leaves())
    print(' '.join(s.leaves()))
    print('\n\n')

s:
 (NP (NP (Nom (N she))) (Adv never))
['NP', 'NP', 'Nom', 'N', 'Adv']
2
['she', 'never']
she never



s:
 (NP (Nom (N she)))
['NP', 'Nom', 'N']
1
['she']
she



s:
 (NP (Det a) (Nom (N word)))
['NP', 'Det', 'Nom', 'N']
1
['a', 'word']
a word



s:
 (NP (Nom (N we)))
['NP', 'Nom', 'N']
1
['we']
we



s:
 (NP (NP (Det the) (Nom (N door))) (Adv here))
['NP', 'NP', 'Det', 'Nom', 'N', 'Adv']
2
['the', 'door', 'here']
the door here



s:
 (NP (Det the) (Nom (N door)))
['NP', 'Det', 'Nom', 'N']
1
['the', 'door']
the door





In [103]:
tree_x = trees[0]
out = []
for s in tree_x.subtrees(lambda t: t.label() == 'NP'):
    y = [x.label() for x in s.subtrees()]
    c = y.count('NP')
    if c == 1:
        out.append(' '.join(s.leaves()))

out

['she', 'a word', 'we', 'the door']

In [104]:
for sentence in sentences:
    
    s = preprocess(sentence)
    
    # Attempt to parse sentence
    try:
        trees = list(parser.parse(s))
    except ValueError as e:
        print(e)
    if not trees:
        print("Could not parse sentence.")
    
    for tree in trees:
        tree.pretty_print()
        print('\n\n')

        out = []
        for s in tree.subtrees(lambda t: t.label() == 'NP'):
            y = [x.label() for x in s.subtrees()]
            c = y.count('NP')
            if c == 1:
                out.append(s)

        print("Noun Phrase Chunks")
        for np in out:
            print(" ".join(np.flatten()))
        print('\n\n\n\n')

        S     
   _____|___   
  NP        | 
  |         |  
 Nom        VP
  |         |  
  N         V 
  |         |  
holmes     sat




Noun Phrase Chunks
holmes





            S              
   _________|___            
  |             VP         
  |      _______|___        
  NP    |           NP     
  |     |        ___|___    
 Nom    |       |      Nom 
  |     |       |       |   
  N     V      Det      N  
  |     |       |       |   
holmes lit      a      pipe




Noun Phrase Chunks
holmes
a pipe





       S                               
  _____|_____                           
 |           VP                        
 |      _____|___                       
 |     |         NP                    
 |     |      ___|__________            
 |     |     |   |          PP         
 |     |     |   |     _____|_____      
 NP    |     |   |    |           NP   
 |     |     |   |    |           |     
Nom    |     |  Nom   |          Nom   
 |     |     |   |    |   