In [1]:
from nltk.corpus import treebank
t = treebank.parsed_sents('wsj_0001.mrg')
for tree in t:
    print(tree)
    tree.draw()

(S
  (NP-SBJ
    (NP (NNP Pierre) (NNP Vinken))
    (, ,)
    (ADJP (NP (CD 61) (NNS years)) (JJ old))
    (, ,))
  (VP
    (MD will)
    (VP
      (VB join)
      (NP (DT the) (NN board))
      (PP-CLR (IN as) (NP (DT a) (JJ nonexecutive) (NN director)))
      (NP-TMP (NNP Nov.) (CD 29))))
  (. .))
(S
  (NP-SBJ (NNP Mr.) (NNP Vinken))
  (VP
    (VBZ is)
    (NP-PRD
      (NP (NN chairman))
      (PP
        (IN of)
        (NP
          (NP (NNP Elsevier) (NNP N.V.))
          (, ,)
          (NP (DT the) (NNP Dutch) (VBG publishing) (NN group))))))
  (. .))


In [2]:
import nltk
from nltk import CFG
grammar = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
  """)
sent = ["I", "shot", "an", "elephant", "in", "my", "pajamas"]
parser = nltk.ChartParser(grammar)
for tree in parser.parse(sent):
    print(tree)

(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))
(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))


In [3]:
from nltk.parse.generate import generate
for sentence in generate(grammar, n=10):
    print(' '.join(sentence))

an elephant shot an elephant
an elephant shot an pajamas
an elephant shot my elephant
an elephant shot my pajamas
an elephant shot an elephant in an elephant
an elephant shot an elephant in an pajamas
an elephant shot an elephant in my elephant
an elephant shot an elephant in my pajamas
an elephant shot an elephant in an elephant in an elephant
an elephant shot an elephant in an elephant in an pajamas


In [4]:
from nltk import CFG
grammar = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I' | DT NN | P
    VP -> V NP | VP PP
    Det -> 'an' | 'my' |'DT'
    N -> 'elephant' | 'pajamas'|'NN'
    V -> 'shot'|'VBP'|'VBD'
    P -> 'in'|'PRP$'| 'PRP'| 'IN'
    

  """)
sent = ["I", "shot", "an", "elephant", "in", "my", "sweatshirt"]
try:
    parser = nltk.ChartParser(grammar)
    for tree in parser.parse(sent):
        print(tree)
except:
    print("error parsing the sentence")
    
    


error parsing the sentence


In [5]:
import nltk
from nltk.tokenize import TreebankWordTokenizer
treebank_tokenizer = TreebankWordTokenizer()

input = "I shot him"
tokens = treebank_tokenizer.tokenize(input)
print(tokens)

pos_tags = nltk.pos_tag(tokens)
print(pos_tags)
pos_only = [tag[1] for tag in pos_tags]
print(pos_only)
parser = nltk.ChartParser(grammar)

for tree in parser.parse(pos_only):
        print(tree)
    
print("tse")

['I', 'shot', 'him']
[('I', 'PRP'), ('shot', 'VBD'), ('him', 'PRP')]
['PRP', 'VBD', 'PRP']
(S (NP (P PRP)) (VP (V VBD) (NP (P PRP))))
tse


In [2]:
from stanfordcorenlp import StanfordCoreNLP

#nlp = StanfordCoreNLP(r'/products/corenlp/stanford-corenlp-full-2018-02-27')
nlp = StanfordCoreNLP('http://localhost', port=9000)
sentence = 'The ship has sailed.'
constituency = nlp.parse(sentence)
print('Constituency Parsing:', constituency)

nlp.close() # Do not forget to close! The backend server will consume a lot memery.

Constituency Parsing: (ROOT
  (S
    (NP (DT The) (NN ship))
    (VP (VBZ has)
      (VP (VBN sailed)))
    (. .)))


In [3]:
print(constituency)

(ROOT
  (S
    (NP (DT The) (NN ship))
    (VP (VBZ has)
      (VP (VBN sailed)))
    (. .)))


In [4]:
from nltk.tree import *
Tree.fromstring(constituency).pretty_print()
const_tree = Tree.fromstring(constituency)

             ROOT               
              |                  
              S                 
      ________|_______________   
     |             VP         | 
     |         ____|____      |  
     NP       |         VP    | 
  ___|___     |         |     |  
 DT      NN  VBZ       VBN    . 
 |       |    |         |     |  
The     ship has      sailed  . 



In [5]:
def getNodes(parent):
    for node in parent:
        if type(node) is Tree:
            if node.label() == "ROOT":
                print("======== Sentence =========")
                print("Sentence:", " ".join(node.leaves()))
            else:
                print("Label:", node.label())
                print("Leaves:", node.leaves())

            getNodes(node)
        else:
            print("Word:", node)
getNodes(const_tree)

Label: S
Leaves: ['The', 'ship', 'has', 'sailed', '.']
Label: NP
Leaves: ['The', 'ship']
Label: DT
Leaves: ['The']
Word: The
Label: NN
Leaves: ['ship']
Word: ship
Label: VP
Leaves: ['has', 'sailed']
Label: VBZ
Leaves: ['has']
Word: has
Label: VP
Leaves: ['sailed']
Label: VBN
Leaves: ['sailed']
Word: sailed
Label: .
Leaves: ['.']
Word: .
