# **Advanced NLP Techniques (NER, Sentiment Analysis)**

In [13]:
import pandas as pd
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Download the necessary NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [16]:
data_ner = {
    'text': [
        'Google announced a new AI tool in San Francisco yesterday.',
        'Marie Curie was awarded the Nobel Prize in Physics in 1903.',
        'Tesla and SpaceX are both led by the visionary Elon Musk.',
        'The Eiffel Tower is one of the most famous landmarks in Paris.',
        'The Amazon rainforest is often referred to as the lungs of the Earth.',
        'The Mona Lisa, painted by Leonardo da Vinci, is housed in the Louvre.',
        'The FIFA World Cup 2022 was held in Qatar.'
    ]
}

df_ner = pd.DataFrame(data_ner)
df_ner.head()


Unnamed: 0,text
0,Google announced a new AI tool in San Francisc...
1,Marie Curie was awarded the Nobel Prize in Phy...
2,Tesla and SpaceX are both led by the visionary...
3,The Eiffel Tower is one of the most famous lan...
4,The Amazon rainforest is often referred to as ...


**Named Entity Recognition (NER) with NLTK**

In [17]:
def nltk_ner(text):

    tokens = word_tokenize(text)
    pos_tags = pos_tag(tokens)

    # Perform NER
    named_entities = ne_chunk(pos_tags)

    # Extract named entities as a string
    entities = []
    for subtree in named_entities:
        if isinstance(subtree, nltk.Tree):
            entity_name = " ".join([word for word, pos in subtree.leaves()])
            entity_type = subtree.label()
            entities.append(f"{entity_name} ({entity_type})")

    return entities
#Applying the function to DataFrame
df_ner['entities'] = df_ner['text'].apply(nltk_ner)

df_ner[['text', 'entities']]


Unnamed: 0,text,entities
0,Google announced a new AI tool in San Francisc...,"[Google (PERSON), San Francisco (GPE)]"
1,Marie Curie was awarded the Nobel Prize in Phy...,"[Marie (PERSON), Curie (PERSON), Nobel Prize (..."
2,Tesla and SpaceX are both led by the visionary...,"[Tesla (GPE), SpaceX (ORGANIZATION), Elon Musk..."
3,The Eiffel Tower is one of the most famous lan...,"[Eiffel Tower (ORGANIZATION), Paris (GPE)]"
4,The Amazon rainforest is often referred to as ...,"[Amazon (ORGANIZATION), Earth (LOCATION)]"
5,"The Mona Lisa, painted by Leonardo da Vinci, i...","[Mona Lisa (ORGANIZATION), Leonardo (PERSON), ..."
6,The FIFA World Cup 2022 was held in Qatar.,"[FIFA (ORGANIZATION), Qatar (GPE)]"


**Sentiment Analysis with NLTK's VADER**

In [18]:
sia = SentimentIntensityAnalyzer()

def sentiment_analysis(text):
    return sia.polarity_scores(text)

# Apply sentiment analysis to the DataFrame
df_ner['sentiment'] = df_ner['text'].apply(sentiment_analysis)

df_ner[['text', 'entities', 'sentiment']]


Unnamed: 0,text,entities,sentiment
0,Google announced a new AI tool in San Francisc...,"[Google (PERSON), San Francisco (GPE)]","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
1,Marie Curie was awarded the Nobel Prize in Phy...,"[Marie (PERSON), Curie (PERSON), Nobel Prize (...","{'neg': 0.0, 'neu': 0.6, 'pos': 0.4, 'compound..."
2,Tesla and SpaceX are both led by the visionary...,"[Tesla (GPE), SpaceX (ORGANIZATION), Elon Musk...","{'neg': 0.0, 'neu': 0.746, 'pos': 0.254, 'comp..."
3,The Eiffel Tower is one of the most famous lan...,"[Eiffel Tower (ORGANIZATION), Paris (GPE)]","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
4,The Amazon rainforest is often referred to as ...,"[Amazon (ORGANIZATION), Earth (LOCATION)]","{'neg': 0.0, 'neu': 0.876, 'pos': 0.124, 'comp..."
5,"The Mona Lisa, painted by Leonardo da Vinci, i...","[Mona Lisa (ORGANIZATION), Leonardo (PERSON), ...","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
6,The FIFA World Cup 2022 was held in Qatar.,"[FIFA (ORGANIZATION), Qatar (GPE)]","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
