In [179]:
import nltk
import re
import os

In [143]:
def preprocess(string):
    string = string.lower()
    sentence = re.sub(r"\d|[^\s\w]","", string)
    tokens = nltk.tokenize.word_tokenize(sentence)
    return tokens


In [174]:
TERMINALS = """
Adj -> "country" | "dreadful" | "enigmatical" | "little" | "moist" | "red"
Adv -> "down" | "here" | "never"
Conj -> "and" | "until"
Det -> "a" | "an" | "his" | "my" | "the" 
N -> "armchair" | "companion" | "day" | "door" | "hand" | "he" | "himself"
N -> "holmes" | "home" | "i" | "mess" | "paint" | "palm" | "pipe" | "she"
N -> "smile" | "thursday" | "walk" | "we" | "word"
P -> "at" | "before" | "in" | "of" | "on" | "to"
V -> "arrived" | "came" | "chuckled" | "had" | "lit" | "said" | "sat"
V -> "smiled" | "tell" | "were"
"""

NONTERMINALS = """
S ->  NP VP | S Conj S  | S Conj VP

AP -> Adj | Adj AP
PP -> P NP | P 

NP -> N | Det NP | AP NP | PP NP
VP -> V | V NP | VP PP | Adv VP | VP Adv

"""

grammar = nltk.CFG.fromstring(NONTERMINALS + TERMINALS)
parser = nltk.ChartParser(grammar)

In [161]:
""""
Holmes sat. 
  N     V
Holmes lit a pipe.
  N    V  Det  N
We arrived the day before Thursday.
 N    V    Det  N    P       N
Holmes sat in the red armchair and he chuckled.
 N      V  P  Det Adj   N      Conj N   V
My companion smiled an enigmatical smile. 
Det  N        V     Det   Adj        N
Holmes chuckled to himself.
 N        V      P    N 
 She never said a word until we were at the door here.
  N   Adv   V  Det  N   Conj  N   V   P Det  N    Adv
Holmes sat down and lit his pipe.
  N     V  Adv  Conj V  Det  N   
I had a country walk on Thursday and came home in a dreadful mess.
N  V  Det Adj    N    P   N     Conj   V   N   P Det  Adj      N 
I had a little moist red paint in the palm of my hand.
N  V  Det Adj  Adj   Adj  N    P  Det  N   P  Det  N 
  
""""



'N     V   P  Det Adj  N     Conj  N   V'

In [201]:
filename = os.listdir('sentences')
for file in filename:
    filepath = "sentences/" + file
    with open(filepath) as f:
        sentence = f.read()
    s = preprocess(sentence)
    try:        
        trees = list(parser.parse(s))
        for tree in trees:
            tree.pretty_print()

    
    except ValueError as e:
        print(e)
    if not trees:
        print("Could not parse sentence.")



        S     
   _____|___   
  NP        VP
  |         |  
  N         V 
  |         |  
holmes     sat

              S                                                         
  ____________|____________________                                      
 |                                 VP                                   
 |                             ____|_________________________            
 |                            VP                             |          
 |             _______________|_____________                 |           
 |            VP                            |                |          
 |    ________|______                       |                |           
 |   |               NP                     |                |          
 |   |    ___________|____                  |                |           
 |   |   |                NP                |                |          
 |   |   |            ____|________         |                |           
 |   |   

In [213]:
for t in trees:
    for s in t.subtrees():
        print(s)

(S
  (S
    (NP (N i))
    (VP
      (VP (V had) (NP (Det a) (NP (AP (Adj country)) (NP (N walk)))))
      (PP (P on) (NP (N thursday)))))
  (Conj and)
  (VP
    (VP (V came) (NP (N home)))
    (PP (P in) (NP (Det a) (NP (AP (Adj dreadful)) (NP (N mess)))))))
(S
  (NP (N i))
  (VP
    (VP (V had) (NP (Det a) (NP (AP (Adj country)) (NP (N walk)))))
    (PP (P on) (NP (N thursday)))))
(NP (N i))
(N i)
(VP
  (VP (V had) (NP (Det a) (NP (AP (Adj country)) (NP (N walk)))))
  (PP (P on) (NP (N thursday))))
(VP (V had) (NP (Det a) (NP (AP (Adj country)) (NP (N walk)))))
(V had)
(NP (Det a) (NP (AP (Adj country)) (NP (N walk))))
(Det a)
(NP (AP (Adj country)) (NP (N walk)))
(AP (Adj country))
(Adj country)
(NP (N walk))
(N walk)
(PP (P on) (NP (N thursday)))
(P on)
(NP (N thursday))
(N thursday)
(Conj and)
(VP
  (VP (V came) (NP (N home)))
  (PP (P in) (NP (Det a) (NP (AP (Adj dreadful)) (NP (N mess))))))
(VP (V came) (NP (N home)))
(V came)
(NP (N home))
(N home)
(PP (P in) (NP (Det a) (NP (A