### POS Tagging, Chunking, Named Entity Recognition

In [1]:
import nltk
from nltk import pos_tag
from nltk import word_tokenize

text = "The way to get started is to quit talking and begin doing."

tokeniser = word_tokenize(text)

print("The tokeniser is: -\n",tokeniser)
print("The pos tagged tokeniser is: -")
pos_tag(tokeniser)

The tokeniser is: -
 ['The', 'way', 'to', 'get', 'started', 'is', 'to', 'quit', 'talking', 'and', 'begin', 'doing', '.']
The pos tagged tokeniser is: -


[('The', 'DT'),
 ('way', 'NN'),
 ('to', 'TO'),
 ('get', 'VB'),
 ('started', 'VBN'),
 ('is', 'VBZ'),
 ('to', 'TO'),
 ('quit', 'VB'),
 ('talking', 'VBG'),
 ('and', 'CC'),
 ('begin', 'VB'),
 ('doing', 'VBG'),
 ('.', '.')]

### Chunking in NLP

In [2]:
sentence = "The quick brown fox jumps over the lazy dog."
tokens = nltk.word_tokenize(sentence)
print("The tokens are: -\n",tokens)

The tokens are: -
 ['The', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog', '.']


In [3]:
tag = nltk.pos_tag(tokens)
print(tag)

[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]


In [4]:
grammar = "NP: {<DT>?<JJ>*<NN>}"
cp = nltk.RegexpParser(grammar)
result = cp.parse(tag)
print(result)

(S
  (NP The/DT quick/JJ brown/NN)
  (NP fox/NN)
  jumps/VBZ
  over/IN
  (NP the/DT lazy/JJ dog/NN)
  ./.)


In [5]:
result.draw()

### Named Entity Recognition

In [6]:
text = "NASA awarded Elon Musk’s SpaceX a $2.9 billion contract to build the lunar lander."
tokens = word_tokenize(text)
tag=pos_tag(tokens)
print(tag)

[('NASA', 'NNP'), ('awarded', 'VBD'), ('Elon', 'NNP'), ('Musk', 'NNP'), ('’', 'NNP'), ('s', 'VBD'), ('SpaceX', 'NNP'), ('a', 'DT'), ('$', '$'), ('2.9', 'CD'), ('billion', 'CD'), ('contract', 'NN'), ('to', 'TO'), ('build', 'VB'), ('the', 'DT'), ('lunar', 'NN'), ('lander', 'NN'), ('.', '.')]


In [7]:
ne_tree = nltk.ne_chunk(tag)
print(ne_tree)

(S
  (ORGANIZATION NASA/NNP)
  awarded/VBD
  (PERSON Elon/NNP Musk/NNP)
  ’/NNP
  s/VBD
  (ORGANIZATION SpaceX/NNP)
  a/DT
  $/$
  2.9/CD
  billion/CD
  contract/NN
  to/TO
  build/VB
  the/DT
  lunar/NN
  lander/NN
  ./.)
