In [34]:
import nltk
import re

In [35]:
s1 = "Holmes sat."
s2 = "Holmes lit a pipe."
s3 = "We arrived the day before Thursday."
s4 = "Holmes sat in the red armchair and he chuckled."
s5 = "My companion smiled an enigmatical smile. "
s6 = "Holmes chuckled to himself."
s7 = "She never said a word until we were at the door here."
s8 = "Holmes sat down and lit his pipe."
s9 = "I had a country walk on Thursday and came home in a dreadful mess."
s10 = "I had a little moist red paint in the palm of my hand."
s11 = "Holmes sat in the little red armchair."
s12 = "Holmes sat in the the armchair."
sentences = [s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12]
sentence = s7

In [36]:
def preprocess(sentence):
    """
    Convert `sentence` to a list of its words.
    Pre-process sentence by converting all characters to lowercase
    and removing any word that does not contain at least one alphabetic
    character.
    """
    tokens = nltk.word_tokenize(sentence)
    s = [w.lower() for w in tokens if re.search('[a-zA-Z]', w) is not None]
    return s

In [37]:
s = preprocess(sentence)
s

['she',
 'never',
 'said',
 'a',
 'word',
 'until',
 'we',
 'were',
 'at',
 'the',
 'door',
 'here']

In [38]:
TERMINALS = """
Adj -> "country" | "dreadful" | "enigmatical" | "little" | "moist" | "red"
Adv -> "down" | "here" | "never"
Conj -> "and" | "until"
Det -> "a" | "an" | "his" | "my" | "the"
N -> "armchair" | "companion" | "day" | "door" | "hand" | "he" | "himself"
N -> "holmes" | "home" | "i" | "mess" | "paint" | "palm" | "pipe" | "she"
N -> "smile" | "thursday" | "walk" | "we" | "word"
P -> "at" | "before" | "in" | "of" | "on" | "to"
V -> "arrived" | "came" | "chuckled" | "had" | "lit" | "said" | "sat"
V -> "smiled" | "tell" | "were"
"""

NONTERMINALS = """
S -> NP VP | S Conj S
NP -> Nom | Det Nom | Det Nom PP | Nom Adv | NP Conj NP
VP -> V | V NP | V PP | V NP PP | Adv VP | V Adv | VP Conj VP
PP -> P NP
Nom -> N | Adj Nom
"""

grammar = nltk.CFG.fromstring(NONTERMINALS + TERMINALS)
parser = nltk.ChartParser(grammar)

In [39]:
# Attempt to parse sentence
try:
    trees = list(parser.parse(s))
except ValueError as e:
    print(e)
if not trees:
    print("Could not parse sentence.")

Could not parse sentence.


In [40]:
list(parser.parse(s))

[]

In [41]:
for tree in trees:
    print(list(tree))
    print('\n\n')

In [42]:
for tree in trees:
    tree.pretty_print()
    print('\n\n')

In [43]:
tree_x = trees[0]
for x in tree_x.subtrees(lambda t: t.height() == 3):
    print(f'x:\n {x}')
    print(f'x.label():\n {x.label()}')
    print(f'x.height():\n {x.height()}') 
    print(f'x.leaves():\n {x.leaves()}') 
    print('\n\n')

IndexError: list index out of range

In [11]:
tree_x = trees[0]
for s in tree_x.subtrees(lambda t: t.label() == 'NP'):
    print(f's:\n {s}')
    y = [x.label() for x in s.subtrees()]
    print(y)
    print(y.count('NP'))
    print(s.leaves())
    print(' '.join(s.leaves()))
    print('\n\n')

s:
 (NP (NP (Nom (N she))) (Adv never))
['NP', 'NP', 'Nom', 'N', 'Adv']
2
['she', 'never']
she never



s:
 (NP (Nom (N she)))
['NP', 'Nom', 'N']
1
['she']
she



s:
 (NP (Det a) (Nom (N word)))
['NP', 'Det', 'Nom', 'N']
1
['a', 'word']
a word



s:
 (NP (Nom (N we)))
['NP', 'Nom', 'N']
1
['we']
we



s:
 (NP (NP (Det the) (Nom (N door))) (Adv here))
['NP', 'NP', 'Det', 'Nom', 'N', 'Adv']
2
['the', 'door', 'here']
the door here



s:
 (NP (Det the) (Nom (N door)))
['NP', 'Det', 'Nom', 'N']
1
['the', 'door']
the door





In [12]:
tree_x = trees[0]
out = []
for s in tree_x.subtrees(lambda t: t.label() == 'NP'):
    y = [x.label() for x in s.subtrees()]
    c = y.count('NP')
    if c == 1:
        out.append(' '.join(s.leaves()))

out

['she', 'a word', 'we', 'the door']

In [13]:
for sentence in sentences:
    
    s = preprocess(sentence)
    
    # Attempt to parse sentence
    try:
        trees = list(parser.parse(s))
    except ValueError as e:
        print(e)
    if not trees:
        print("Could not parse sentence.")
    
    for tree in trees:
        tree.pretty_print()
        print('\n\n')

        out = []
        for s in tree.subtrees(lambda t: t.label() == 'NP'):
            y = [x.label() for x in s.subtrees()]
            c = y.count('NP')
            if c == 1:
                out.append(s)

        print("Noun Phrase Chunks")
        for np in out:
            print(" ".join(np.flatten()))
        print('\n\n\n\n')

        S     
   _____|___   
  NP        | 
  |         |  
 Nom        VP
  |         |  
  N         V 
  |         |  
holmes     sat




Noun Phrase Chunks
holmes





            S              
   _________|___            
  |             VP         
  |      _______|___        
  NP    |           NP     
  |     |        ___|___    
 Nom    |       |      Nom 
  |     |       |       |   
  N     V      Det      N  
  |     |       |       |   
holmes lit      a      pipe




Noun Phrase Chunks
holmes
a pipe





             S                             
  ___________|_______                       
 |                   VP                    
 |      _____________|__________            
 |     |         |              PP         
 |     |         |         _____|_____      
 NP    |         NP       |           NP   
 |     |      ___|___     |           |     
Nom    |     |      Nom   |          Nom   
 |     |     |       |    |           |     
 N     V    Det      N    