In [None]:
import nltk
nltk.download('conll2000')

[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.


True

In [None]:
from nltk.corpus import conll2000

In [None]:
train_sents=conll2000.chunked_sents('train.txt')

In [None]:
test_sents=conll2000.chunked_sents('test.txt')

**TRIGRAM TAGGER**

In [None]:
from nltk import ChunkParserI, TrigramTagger
from nltk.chunk import conlltags2tree, tree2conlltags
 
 
class TrigramChunkParser(ChunkParserI):
    def __init__(self, train_sents):
        # Extract only the (POS-TAG, IOB-CHUNK-TAG) pairs
        train_data = [[(pos_tag, chunk_tag) for word, pos_tag, chunk_tag in tree2conlltags(sent)] 
                      for sent in train_sents]
 
        # Train a TrigramTagger
        self.tagger = TrigramTagger(train_data)
 
    def parse(self, sentence):
        pos_tags = [pos for word, pos in sentence]
 
        # Get the Chunk tags
        tagged_pos_tags = self.tagger.tag(pos_tags)
 
        # Assemble the (word, pos, chunk) triplets
        conlltags = [(word, pos_tag, chunk_tag) 
                     for ((word, pos_tag), (pos_tag, chunk_tag)) in zip(sentence, tagged_pos_tags)]
 
        # Transform to tree
        return conlltags2tree(conlltags)

In [None]:
trigram_chunker = TrigramChunkParser(train_sents)
print(trigram_chunker.evaluate(test_sents))

ChunkParse score:
    IOB Accuracy:  87.7%%
    Precision:     81.0%%
    Recall:        84.4%%
    F-Measure:     82.6%%


**FOR NOUN PHRASE**

In [None]:
train_sents_NP = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
test_sents_NP = conll2000.chunked_sents('test.txt', chunk_types=['NP'])

In [None]:
trigram_chunker = TrigramChunkParser(train_sents_NP)
print("using Trigramtagger")
print(trigram_chunker.evaluate(test_sents_NP))

using Trigramtagger
ChunkParse score:
    IOB Accuracy:  93.3%%
    Precision:     82.5%%
    Recall:        86.8%%
    F-Measure:     84.6%%


**RULE BASED FOR NOUN PHRASE**

In [None]:
grammar_NP = r"""
     NP: {<DT|JJ|NN.*>+} 
         """

In [None]:
cp = nltk.RegexpParser(grammar_NP)

In [None]:
print(cp.evaluate(test_sents_NP))

ChunkParse score:
    IOB Accuracy:  80.7%%
    Precision:     65.7%%
    Recall:        61.1%%
    F-Measure:     63.3%%


**FOR VERB PHRASE**

In [None]:
train_sents_VP = conll2000.chunked_sents('train.txt', chunk_types=['VP'])
test_sents_VP = conll2000.chunked_sents('test.txt', chunk_types=['VP'])

In [None]:
trigram_chunker = TrigramChunkParser(train_sents_VP)
print("using Trigramtagger")
print(trigram_chunker.evaluate(test_sents_VP))

using Trigramtagger
ChunkParse score:
    IOB Accuracy:  96.0%%
    Precision:     74.9%%
    Recall:        80.0%%
    F-Measure:     77.3%%


**RULE BASED FOR VERB PHRASE**

In [None]:
grammar_VP = r"""
     VP: {<VB.*>+} # Chunk verbs and their arguments
     """

In [None]:
cp = nltk.RegexpParser(grammar_VP)
print(cp.evaluate(test_sents_VP))


ChunkParse score:
    IOB Accuracy:  92.8%%
    Precision:     59.6%%
    Recall:        68.9%%
    F-Measure:     63.9%%


**FOR PREPOSTION PHRASE**

In [None]:
train_sents_PP = conll2000.chunked_sents('train.txt', chunk_types=['PP'])
test_sents_PP = conll2000.chunked_sents('test.txt', chunk_types=['PP'])

In [None]:
trigram_chunker = TrigramChunkParser(train_sents_PP)
print("using Trigramtagger")
print(trigram_chunker.evaluate(test_sents_PP))

using Trigramtagger
ChunkParse score:
    IOB Accuracy:  96.6%%
    Precision:     81.7%%
    Recall:        85.9%%
    F-Measure:     83.7%%


**RULE BASED FOR PROPOSITION PHRASE**

In [None]:
grammar_PP = r"""
     PP: {<IN>}              
     """

In [None]:
cp = nltk.RegexpParser(grammar_PP)
print(cp.evaluate(test_sents_PP))

ChunkParse score:
    IOB Accuracy:  96.7%%
    Precision:     81.8%%
    Recall:        86.3%%
    F-Measure:     84.0%%
