## Named Entity Recognition Using NLTK

Installing Dependencies :

    Install NLTK Library

In [None]:
pip install nltk

In [None]:
# Import nltk library
import nltk
from nltk.tree import Tree

# Download the required packages
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('stopwords')

### Extract the named entities from text using NLTK
    
Below are few examples for getting NER using NLTK

* India won the 2011 ICC Cricket World Cup by defeating Sri Lanka in the final at Mumbai.
* Virat kohli is one of the best cricketers of India.
* Sachin Tendulkar is widely regarded as the greatest Indian batsman of all time..

In [None]:
# Define the sentence
sentence = "Virat kohli is one of the best cricketer of India."
#"Narendra Modi is the prime minister of India."
#"Barack Obama was born in Hawaii."

# Step 1: Tokenize the sentence into words
words = nltk.word_tokenize(sentence)

# Step 2: Tag the words with their part-of-speech
tags = nltk.pos_tag(words)

# Step 3: Named Entity Recognition using NLTK's ne_chunk
chunks = nltk.ne_chunk(tags)

# Print the chunks
print(f"Named Entities:\n")
print(chunks)


### Filtering the named entities

In [None]:
# Step 4: Convert NLTK's tree format to IOB (Inside-Outside-Beginning) tagged format
iob_tags = nltk.tree2conlltags(chunks)

# Step 5: Filter out non-entities and stopwords
filtered_tags = [(word, tag, ner) for word, tag, ner in iob_tags if ner != 'O' and word.lower() not in stopwords.words('english')]

# Print the filtered named entities
print("Filtered Named Entities: \n")
for word, tag, ner in filtered_tags:
    print(f"Word: {word}, Tag: {tag}, NER: {ner}")

### Visualizing the result in tree format

In [None]:
# Step 6: Visualize the named entity tree

tree = Tree.fromstring(str(chunks))
tree.pretty_print()