In [None]:
!pip install pandas nltk spacy textblob openpyxl
!pip install pandas nltk textblob spacy gensim openpyxl


In [1]:
import pandas as pd

data = pd.read_excel('/content/Assignment (1).xlsx')


In [2]:
data.head()

Unnamed: 0,Article
0,"Retailers, the makers of foods marketed for we..."
1,"Move over, Ozempic — there’s a new drug in tow..."
2,Sept 14 (Reuters) - Bristol Myers Squibb (BMY....
3,Austin Wolcott was 18 years old and pretty sur...
4,"Cancer, often referred to as the “emperor of a..."


In [3]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import spacy
from textblob import TextBlob

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)
    words = [word for word in word_tokenize(text.lower()) if word not in stop_words]
    return ' '.join(words)

data['cleaned_article'] = data['Article'].apply(clean_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
data.head()

Unnamed: 0,Article,cleaned_article
0,"Retailers, the makers of foods marketed for we...",retailers makers foods marketed weight loss ty...
1,"Move over, Ozempic — there’s a new drug in tow...",move ozempic theres new drug town eli lillys z...
2,Sept 14 (Reuters) - Bristol Myers Squibb (BMY....,sept 14 reuters bristol myers squibb bmyn said...
3,Austin Wolcott was 18 years old and pretty sur...,austin wolcott 18 years old pretty sure wouldn...
4,"Cancer, often referred to as the “emperor of a...",cancer often referred emperor maladies unyield...


In [6]:
def analyze_sentiment(text):
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'

data['mood'] = data['cleaned_article'].apply(analyze_sentiment)


In [7]:
from gensim import corpora
from gensim.models import LdaModel

word_list = [article.split() for article in data['cleaned_article']]
dictionary = corpora.Dictionary(word_list)
corpus = [dictionary.doc2bow(words) for words in word_list]

lda_model = LdaModel(corpus, num_topics=3, id2word=dictionary, passes=15)

topics = lda_model.print_topics(num_words=5)

for idx, topic in enumerate(topics, start=1):
    print(f"Topic {idx}: {topic}")


Topic 1: (0, '0.011*"cart" + 0.010*"cancer" + 0.008*"therapy" + 0.006*"says" + 0.006*"cells"')
Topic 2: (1, '0.017*"nike" + 0.010*"firm" + 0.006*"believe" + 0.006*"market" + 0.005*"also"')
Topic 3: (2, '0.009*"account" + 0.006*"taco" + 0.006*"new" + 0.005*"brukinsa" + 0.005*"strava"')


In [21]:
from textblob import TextBlob

aspect_keywords = {
    'innovation': ['innovation', 'new technology', 'tech advancement'],
    'cost': ['cost', 'expensive', 'price'],
    'plan': ['plan', 'proposal', 'scheme'],
    'traffic_impact': ['traffic', 'congestion', 'commute']
}

def find_aspects_and_sentiment(text, aspect_keywords):
    aspect_sentiment = {}
    blob = TextBlob(text)

    for aspect, keywords in aspect_keywords.items():
        for keyword in keywords:
            if keyword in text:
                sentiment_polarity = blob.sentiment.polarity
                sentiment = 'positive' if sentiment_polarity > 0 else 'negative' if sentiment_polarity < 0 else 'neutral'
                aspect_sentiment[aspect] = sentiment
                break
    return aspect_sentiment

data['aspect_sentiment'] = data['cleaned_article'].apply(lambda x: find_aspects_and_sentiment(x, aspect_keywords))



In [22]:
for idx, row in data.iterrows():
    print(f"Article {idx + 1}:")
    print(f" Text: {row['cleaned_article']}")
    print(f"Aspects and Sentiment: {row['aspect_sentiment']}")


Article 1:
 Text: retailers makers foods marketed weight loss types companies could see knockon effects rise diabetes weight loss drugs like ozempic every summer publicly traded companies posted secondquarter results americans baring bodies beach year timing apt several earnings calls august chief executives reassured investors ozempic revolution left dust could somehow share blazing success new diabetes weight loss drugs puts us good position solution drugs said dan r chard chief executive medifast makes diet products like shakes protein bars adding theyre looking guidance told analysts even explaining newgeneration drugs helped pummel earnings 347 percent year year continue study michael johnson chief executive nutritional supplement maker herbalife told investors see opportunity capitalize theory opportunity making profits losing fortunes could vast companies behind drugs also completely different industries known glp1 drugs medications already driving big profits novo nordisk makes

In [23]:
flattened_aspect_sentiment = []

for idx, row in data.iterrows():
    if isinstance(row['aspect_sentiment'], dict):
        for aspect, sentiment in row['aspect_sentiment'].items():
            flattened_aspect_sentiment.append({
                'article_id': idx,
                'aspect': aspect,
                'predicted_sentiment': sentiment
            })

predictions = pd.DataFrame(flattened_aspect_sentiment)


print("Flattened Aspect Sentiments:")
print(predictions.head())


Flattened Aspect Sentiments:
   article_id aspect predicted_sentiment
0           0   cost            positive
1           0   plan            positive
2           1   cost            negative
3           2   cost            positive
4           2   plan            positive


In [24]:
ground_truth = pd.DataFrame({
    'article_id': [0, 1, 2],
    'aspect': ['innovation', 'cost', 'plan'],
    'expected_sentiment': ['positive', 'negative', 'positive']
})

comparison = pd.merge(
    ground_truth,
    predictions,
    on=['article_id', 'aspect'],
    how='inner'
)

allowed_classes = ['positive', 'negative', 'neutral']
comparison = comparison[
    (comparison['expected_sentiment'].isin(allowed_classes)) &
    (comparison['predicted_sentiment'].isin(allowed_classes))
]

from sklearn.metrics import classification_report

report = classification_report(
    comparison['expected_sentiment'],
    comparison['predicted_sentiment'],
    zero_division=0
)

print("Classification Report:\n", report)


Classification Report:
               precision    recall  f1-score   support

    negative       1.00      1.00      1.00         1
    positive       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

