In [1]:
import os
import pandas as pd
import string
import nltk
import nltk.corpus
from nltk.tokenize import word_tokenize
from nltk import Tree

In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [4]:
# Defining a function for reading the text documents
def Document(*doc_path):
  for files in doc_path:
    data_files = open(files)
    data_read = data_files.read()
  return data_read




In [5]:
docs = Document(r'/content/Secret Garden.txt')
docs

'Aristide Valentin, Chief of the Paris Police, was late for his dinner, and some of his guests began to arrive before him. These were, however, reassured by his confidential servant, Ivan, the old man with a scar, and a face almost as grey as his moustaches, who always sat at a table in the entrance hall—a hall hung with weapons. Valentin’s house was perhaps as peculiar and celebrated as its master. It was an old house, with high walls and tall poplars almost overhanging the Seine; but the oddity—and perhaps the police value—of its architecture was this: that there was no ultimate exit at all except through this front door, which was guarded by Ivan and the armoury. The garden was large and elaborate, and there were many exits from the house into the garden. But there was no exit from the garden into the world outside; all round it ran a tall, smooth, unscalable wall with special spikes at the top; no bad garden, perhaps, for a man to reflect in whom some hundred criminals had sworn to

In [6]:
grammar = """
     NP: {<JJ>*<NN*>+}
     {<JJ>*<NN*><CC>*<NN*>+}
     {<DT>*<JJ>*<NN>+}
     {<JJ>*<NNS>+}
    """


In [7]:
chunked_NP = nltk.RegexpParser(grammar)

def chunked_text(data):
  sentences = nltk.sent_tokenize(data)
  sentences = [nltk.word_tokenize(sent) for sent in sentences]
  sentences = [nltk.pos_tag(sent) for sent in sentences]
  sentences = [chunked_NP.parse(sent) for sent in sentences]
  return sentences

In [8]:
tag_of_words = chunked_text(docs)
tag_of_words

[Tree('S', [('Aristide', 'NNP'), ('Valentin', 'NNP'), (',', ','), ('Chief', 'NNP'), ('of', 'IN'), ('the', 'DT'), ('Paris', 'NNP'), ('Police', 'NNP'), (',', ','), ('was', 'VBD'), ('late', 'JJ'), ('for', 'IN'), ('his', 'PRP$'), Tree('NP', [('dinner', 'NN')]), (',', ','), ('and', 'CC'), ('some', 'DT'), ('of', 'IN'), ('his', 'PRP$'), Tree('NP', [('guests', 'NNS')]), ('began', 'VBD'), ('to', 'TO'), ('arrive', 'VB'), ('before', 'IN'), ('him', 'PRP'), ('.', '.')]),
 Tree('S', [('These', 'DT'), ('were', 'VBD'), (',', ','), ('however', 'RB'), (',', ','), ('reassured', 'VBN'), ('by', 'IN'), ('his', 'PRP$'), Tree('NP', [('confidential', 'JJ'), ('servant', 'NN')]), (',', ','), ('Ivan', 'NNP'), (',', ','), ('the', 'DT'), Tree('NP', [('old', 'JJ'), ('man', 'NN')]), ('with', 'IN'), ('a', 'DT'), Tree('NP', [('scar', 'NN')]), (',', ','), ('and', 'CC'), ('a', 'DT'), Tree('NP', [('face', 'NN')]), ('almost', 'RB'), ('as', 'RB'), ('grey', 'JJ'), ('as', 'IN'), ('his', 'PRP$'), Tree('NP', [('moustaches', 'NN

In [9]:
def NP_chunked(sentences):
  nps = []
  chunked_NP = nltk.RegexpParser(grammar)
  for sent in sentences:
    tree = chunked_NP.parse(sent)
    for subtree in tree.subtrees():
      if subtree.label() == 'NP':
        t = subtree
        t = ' '.join(word for word, tag in t.leaves())
        nps.append(t)
  return nps

In [10]:
print("The extracted noun phrases are")
text = chunked_text(docs)
text_noun_phrases = NP_chunked(text)

print(text_noun_phrases)



The extracted noun phrases are
['dinner', 'guests', 'confidential servant', 'old man', 'scar', 'face', 'moustaches', 'table', 'entrance hall—a hall hung', 'weapons', 's house', 'peculiar', 'master', 'old house', 'high walls', 'tall poplars', 'oddity—and', 'police', 'architecture', 'ultimate exit', 'front door', 'armoury', 'garden', 'many exits', 'house', 'garden', 'exit', 'garden', 'world', 'tall', 'unscalable wall', 'special spikes', 'top', 'bad garden', 'man', 'criminals', 'guests', 'host', 'ten minutes', 'truth', 'last arrangements', 'executions', 'such ugly things', 'duties', 'precision', 'Ruthless', 'pursuit', 'criminals', 'punishment', 'methods', 'great influence', 'mitigation', 'sentences', 'purification', 'prisons', 'great humanitarian French freethinkers', 'only thing', 'justice', 'black clothes', 'red rosette—an', 'elegant figure', 'dark beard', 'grey', 'house', 'study', 'grounds', 'garden door', 'box', 'official place', 'few seconds', 'open door', 'garden', 'sharp moon', 'ra