## Named Entity Recognition

In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import pos_tag, ne_chunk
import string
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
stop_words = set(stopwords.words('english'))

In [3]:
def extract_entities(entities):
    named_entities = []
    for subtree in entities:
        if isinstance(subtree, nltk.Tree):
            entity = " ".join([token for token, tag in subtree.leaves()])
            label = subtree.label()
            named_entities.append((entity, label))
    
    return named_entities

In [4]:
def write_entities_to_file(named_entities, output_file):
    with open(output_file, 'w') as file:
        for entity, label in named_entities:
            file.write(f"{entity} : {label}\n")


In [5]:
def main(input_file, output_file):
    with open(input_file, 'r') as file:
        text = file.read()
        
    tokens = word_tokenize(text)

    tagged_tokens = pos_tag(tokens)

    entities = ne_chunk(tagged_tokens)
    named_entities = extract_entities(entities)
    
    print("\nNamed Entities:")
    print(named_entities)
    
    write_entities_to_file(named_entities, output_file)
    
    print("\nOutput written to:", output_file)

if __name__ == "__main__":
    input_file = r"C:\Users\pc\Downloads\ADS Lab programs\input.txt"  
    output_file = r"C:\Users\pc\Downloads\ADS Lab programs\output.txt" 
    main(input_file, output_file)


Named Entities:
[('NLP', 'ORGANIZATION'), ('NLP', 'ORGANIZATION'), ('Siri', 'PERSON'), ('Alexa', 'PERSON'), ('Google', 'PERSON'), ('NLP', 'ORGANIZATION'), ('NER', 'ORGANIZATION'), ('Steve Jobs', 'PERSON'), ('Apple Inc.', 'ORGANIZATION'), ('Cupertino', 'GPE'), ('California', 'GPE'), ('NER', 'ORGANIZATION'), ('Steve Jobs', 'PERSON'), ('Apple', 'PERSON'), ('California', 'GPE'), ('NER', 'ORGANIZATION'), ('CRFs', 'ORGANIZATION'), ('RNNs', 'ORGANIZATION'), ('NER', 'ORGANIZATION'), ('NLP', 'ORGANIZATION'), ('NLP', 'ORGANIZATION')]

Output written to: C:\Users\pc\Downloads\ADS Lab programs\output.txt
