# Chunking

In general, human brain is good at grouping information together to make it easier to process and understand. When we memorize a telephone
number or any other sequence of numbers, we don't memorize them as separate individual numbers, we group them together to
memorize them easily.

Chucking - is grouping of information

In [1]:
#Importing the Libraries
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import RegexpParser

In [2]:
#Importing the data
#Poem: The Bell BY RICHARD JONES
dataset = """In the tower the bell
is alone, like a man
in his room,
thinking and thinking.
 
The bell is made of iron.
It takes the weight
of a man
to make the bell move.
 
Far below, the bell feels
hands on a rope.
It considers this.
It turns its head.
 
Miles away,
a man in his room
hears the clear sound,
and lifts his head to listen."""

In [3]:
#Tokenize the data

token_data = word_tokenize(dataset)
print(token_data)

['In', 'the', 'tower', 'the', 'bell', 'is', 'alone', ',', 'like', 'a', 'man', 'in', 'his', 'room', ',', 'thinking', 'and', 'thinking', '.', 'The', 'bell', 'is', 'made', 'of', 'iron', '.', 'It', 'takes', 'the', 'weight', 'of', 'a', 'man', 'to', 'make', 'the', 'bell', 'move', '.', 'Far', 'below', ',', 'the', 'bell', 'feels', 'hands', 'on', 'a', 'rope', '.', 'It', 'considers', 'this', '.', 'It', 'turns', 'its', 'head', '.', 'Miles', 'away', ',', 'a', 'man', 'in', 'his', 'room', 'hears', 'the', 'clear', 'sound', ',', 'and', 'lifts', 'his', 'head', 'to', 'listen', '.']


In [5]:
#Apply the POS Tagging
pos_tagging = pos_tag(token_data)
print(pos_tagging)

[('In', 'IN'), ('the', 'DT'), ('tower', 'NN'), ('the', 'DT'), ('bell', 'NN'), ('is', 'VBZ'), ('alone', 'RB'), (',', ','), ('like', 'IN'), ('a', 'DT'), ('man', 'NN'), ('in', 'IN'), ('his', 'PRP$'), ('room', 'NN'), (',', ','), ('thinking', 'VBG'), ('and', 'CC'), ('thinking', 'VBG'), ('.', '.'), ('The', 'DT'), ('bell', 'NN'), ('is', 'VBZ'), ('made', 'VBN'), ('of', 'IN'), ('iron', 'NN'), ('.', '.'), ('It', 'PRP'), ('takes', 'VBZ'), ('the', 'DT'), ('weight', 'NN'), ('of', 'IN'), ('a', 'DT'), ('man', 'NN'), ('to', 'TO'), ('make', 'VB'), ('the', 'DT'), ('bell', 'NN'), ('move', 'NN'), ('.', '.'), ('Far', 'NNP'), ('below', 'IN'), (',', ','), ('the', 'DT'), ('bell', 'NN'), ('feels', 'VBZ'), ('hands', 'NNS'), ('on', 'IN'), ('a', 'DT'), ('rope', 'NN'), ('.', '.'), ('It', 'PRP'), ('considers', 'VBZ'), ('this', 'DT'), ('.', '.'), ('It', 'PRP'), ('turns', 'VBZ'), ('its', 'PRP$'), ('head', 'NN'), ('.', '.'), ('Miles', 'NNP'), ('away', 'RB'), (',', ','), ('a', 'DT'), ('man', 'NN'), ('in', 'IN'), ('his'

In [6]:
#Define the sequence of Chunk
sequence_chunk = """
                chunk:
                {<NN>+}
                {<NNP>+}
                {<NNPS>+}
                """

In [7]:
#Create object with Regular Expression
chunk = RegexpParser(sequence_chunk)

In [8]:
#Final Sterp
chunk_result = chunk.parse(pos_tagging)
print(chunk_result)

(S
  In/IN
  the/DT
  (chunk tower/NN)
  the/DT
  (chunk bell/NN)
  is/VBZ
  alone/RB
  ,/,
  like/IN
  a/DT
  (chunk man/NN)
  in/IN
  his/PRP$
  (chunk room/NN)
  ,/,
  thinking/VBG
  and/CC
  thinking/VBG
  ./.
  The/DT
  (chunk bell/NN)
  is/VBZ
  made/VBN
  of/IN
  (chunk iron/NN)
  ./.
  It/PRP
  takes/VBZ
  the/DT
  (chunk weight/NN)
  of/IN
  a/DT
  (chunk man/NN)
  to/TO
  make/VB
  the/DT
  (chunk bell/NN move/NN)
  ./.
  (chunk Far/NNP)
  below/IN
  ,/,
  the/DT
  (chunk bell/NN)
  feels/VBZ
  hands/NNS
  on/IN
  a/DT
  (chunk rope/NN)
  ./.
  It/PRP
  considers/VBZ
  this/DT
  ./.
  It/PRP
  turns/VBZ
  its/PRP$
  (chunk head/NN)
  ./.
  (chunk Miles/NNP)
  away/RB
  ,/,
  a/DT
  (chunk man/NN)
  in/IN
  his/PRP$
  (chunk room/NN)
  hears/VBZ
  the/DT
  clear/JJ
  (chunk sound/NN)
  ,/,
  and/CC
  lifts/VBZ
  his/PRP$
  (chunk head/NN)
  to/TO
  listen/VB
  ./.)
