<a href="https://colab.research.google.com/github/Yash-Yelave/Natural_language_processing/blob/main/NLP_PR2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import nltk
from nltk import ne_chunk, pos_tag, word_tokenize
from nltk.tree import Tree

# Download required NLTK data (covering all cases)
nltk.download("maxent_ne_chunker")
nltk.download("maxent_ne_chunker_tab")
nltk.download("words")
nltk.download("punkt")
nltk.download("punkt_tab")
nltk.download("averaged_perceptron_tagger")
nltk.download("averaged_perceptron_tagger_eng")


def perform_ner_nltk(text):
    # Tokenize and POS tag
    tokens = word_tokenize(text)
    pos_tags = pos_tag(tokens)

    # Perform NER
    named_entities = ne_chunk(pos_tags)

    # Extract named entities
    entities = []
    for chunk in named_entities:
        if isinstance(chunk, Tree):
            entity = " ".join(token for token, pos in chunk.leaves())
            label = chunk.label()
            entities.append((entity, label))

    return entities

# Example usage
text = "Reliance Industries, led by Mukesh Ambani, announced a new partnership"
entities = perform_ner_nltk(text)

print("Named Entities:")
for entity, label in entities:
    print(f"{entity} ({label})")

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package maxent_ne_chunker_tab to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker_tab.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


Named Entities:
Reliance (GPE)
Industries (ORGANIZATION)
Mukesh Ambani (PERSON)


# Using Spacy

In [2]:
import spacy

nlp = spacy.load("en_core_web_sm")

text = "On 15th August 2023, Microsoft announced a partnership with the Indian government in New Delhi. Satya Nadella, the CEO of Microsoft, stated that the collaboration would focus on Artificial Intelligence research and digital innovation. Earlier in 2022, Google had launched a similar initiative in Bengaluru, where Sundar Pichai highlighted the importance of AI in education and healthcare."
doc = nlp(text)

print("Named Entities, Phrases, and Labels:")
for ent in doc.ents:
    print(f"{ent.text:<20} | {ent.label_:<10} | {spacy.explain(ent.label_)}")


Named Entities, Phrases, and Labels:
15th                 | ORDINAL    | "first", "second", etc.
August 2023          | DATE       | Absolute or relative dates or periods
Microsoft            | ORG        | Companies, agencies, institutions, etc.
Indian               | NORP       | Nationalities or religious or political groups
New Delhi            | GPE        | Countries, cities, states
Satya Nadella        | PERSON     | People, including fictional
Microsoft            | ORG        | Companies, agencies, institutions, etc.
Artificial Intelligence | ORG        | Companies, agencies, institutions, etc.
2022                 | DATE       | Absolute or relative dates or periods
Google               | ORG        | Companies, agencies, institutions, etc.
Bengaluru            | GPE        | Countries, cities, states
Sundar Pichai        | ORG        | Companies, agencies, institutions, etc.
AI                   | GPE        | Countries, cities, states
