# __Named Entity Recognition__



## Getting all the imports


### nltk imports

In [72]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import conlltags2tree, tree2conlltags
from pprint import pprint

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


 ### SpaCy imports

In [73]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm

## nltkNER

nltkNER is a class which performs NER using nltk libraries

### The class definition

In [75]:
class nltkNER:
  def __init__(self):
    self.model_name = "nltk"
    self.preprocess_sentence = ''
    self.sent = ''
    self.pattern = 'NP: {<DT>?<JJ>*<NN>}'
    self.cs = ''
    self.iob_tagged = ''
    self.ne_tree = ''

  def preprocess(self, sent):
    self.sent = sent
    self.preprocess_sentence = nltk.word_tokenize(sent)
    self.preprocess_sentence = nltk.pos_tag(self.preprocess_sentence)
    return self.preprocess_sentence

  def create_chunks(self):
    cp = nltk.RegexpParser(self.pattern)
    self.cs = cp.parse(self.preprocess_sentence)
    return self.cs

  def get_iob_tags(self):
    self.iob_tagged = tree2conlltags(self.cs)
    pprint(self.iob_tagged)

  def get_entities_with_classifier(self):
    self.ne_tree = nltk.ne_chunk(pos_tag(word_tokenize(self.sent)))
    print(self.ne_tree)

In [98]:
ex = 'European authorities fined Google a record $5.1 billion on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices'

### Calling the functions

In [76]:
nltk_ner = nltkNER()
print(nltk_ner.preprocess(ex))


[('European', 'JJ'), ('authorities', 'NNS'), ('fined', 'VBD'), ('Google', 'NNP'), ('a', 'DT'), ('record', 'NN'), ('$', '$'), ('5.1', 'CD'), ('billion', 'CD'), ('on', 'IN'), ('Wednesday', 'NNP'), ('for', 'IN'), ('abusing', 'VBG'), ('its', 'PRP$'), ('power', 'NN'), ('in', 'IN'), ('the', 'DT'), ('mobile', 'JJ'), ('phone', 'NN'), ('market', 'NN'), ('and', 'CC'), ('ordered', 'VBD'), ('the', 'DT'), ('company', 'NN'), ('to', 'TO'), ('alter', 'VB'), ('its', 'PRP$'), ('practices', 'NNS')]


In [77]:
print(nltk_ner.create_chunks())

(S
  European/JJ
  authorities/NNS
  fined/VBD
  Google/NNP
  (NP a/DT record/NN)
  $/$
  5.1/CD
  billion/CD
  on/IN
  Wednesday/NNP
  for/IN
  abusing/VBG
  its/PRP$
  (NP power/NN)
  in/IN
  (NP the/DT mobile/JJ phone/NN)
  (NP market/NN)
  and/CC
  ordered/VBD
  (NP the/DT company/NN)
  to/TO
  alter/VB
  its/PRP$
  practices/NNS)


In [78]:
nltk_ner.get_iob_tags()

[('European', 'JJ', 'O'),
 ('authorities', 'NNS', 'O'),
 ('fined', 'VBD', 'O'),
 ('Google', 'NNP', 'O'),
 ('a', 'DT', 'B-NP'),
 ('record', 'NN', 'I-NP'),
 ('$', '$', 'O'),
 ('5.1', 'CD', 'O'),
 ('billion', 'CD', 'O'),
 ('on', 'IN', 'O'),
 ('Wednesday', 'NNP', 'O'),
 ('for', 'IN', 'O'),
 ('abusing', 'VBG', 'O'),
 ('its', 'PRP$', 'O'),
 ('power', 'NN', 'B-NP'),
 ('in', 'IN', 'O'),
 ('the', 'DT', 'B-NP'),
 ('mobile', 'JJ', 'I-NP'),
 ('phone', 'NN', 'I-NP'),
 ('market', 'NN', 'B-NP'),
 ('and', 'CC', 'O'),
 ('ordered', 'VBD', 'O'),
 ('the', 'DT', 'B-NP'),
 ('company', 'NN', 'I-NP'),
 ('to', 'TO', 'O'),
 ('alter', 'VB', 'O'),
 ('its', 'PRP$', 'O'),
 ('practices', 'NNS', 'O')]


In [79]:
nltk_ner.get_entities_with_classifier()

(S
  (GPE European/JJ)
  authorities/NNS
  fined/VBD
  (PERSON Google/NNP)
  a/DT
  record/NN
  $/$
  5.1/CD
  billion/CD
  on/IN
  Wednesday/NNP
  for/IN
  abusing/VBG
  its/PRP$
  power/NN
  in/IN
  the/DT
  mobile/JJ
  phone/NN
  market/NN
  and/CC
  ordered/VBD
  the/DT
  company/NN
  to/TO
  alter/VB
  its/PRP$
  practices/NNS)


## SpaCyNER

SpaCyNER is a class which performs NER using SpaCy libraries

### Class definition

In [93]:
class SpaCyNER:
  def __init__(self, sentence: str):
    self.nlp = spacy.load("en_core_web_sm")
    self.sentence = sentence
    self.doc = ''

  def create_model(self):
    self.doc = self.nlp(self.sentence)
    pprint([(X.text, X.label_) for X in self.doc.ents])

  def show_tokens(self):
    pprint([(X, X.ent_iob_, X.ent_type_) for X in self.doc])

  def generate_raw_markup(self):
    displacy.render(self.doc, jupyter = True, style = 'ent')

In [94]:
doc = 'European authorities fined Google a record $5.1 billion on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices'

### Calling the functions

In [95]:
spacy_ner = SpaCyNER(doc)
spacy_ner.create_model()

[('European', 'NORP'),
 ('Google', 'ORG'),
 ('$5.1 billion', 'MONEY'),
 ('Wednesday', 'DATE')]


In [96]:
spacy_ner.show_tokens()

[(European, 'B', 'NORP'),
 (authorities, 'O', ''),
 (fined, 'O', ''),
 (Google, 'B', 'ORG'),
 (a, 'O', ''),
 (record, 'O', ''),
 ($, 'B', 'MONEY'),
 (5.1, 'I', 'MONEY'),
 (billion, 'I', 'MONEY'),
 (on, 'O', ''),
 (Wednesday, 'B', 'DATE'),
 (for, 'O', ''),
 (abusing, 'O', ''),
 (its, 'O', ''),
 (power, 'O', ''),
 (in, 'O', ''),
 (the, 'O', ''),
 (mobile, 'O', ''),
 (phone, 'O', ''),
 (market, 'O', ''),
 (and, 'O', ''),
 (ordered, 'O', ''),
 (the, 'O', ''),
 (company, 'O', ''),
 (to, 'O', ''),
 (alter, 'O', ''),
 (its, 'O', ''),
 (practices, 'O', '')]


In [97]:
spacy_ner.generate_raw_markup()