<a href="https://colab.research.google.com/github/Usama-alt1/ML/blob/web/News_Headlines_%2B_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

In [None]:
def scrape_dawn_headlines(section_url="https://www.dawn.com"):  # Default to homepage
    response = requests.get(section_url)
    soup = BeautifulSoup(response.text, "html.parser")
    headlines = []
    for tag in soup.find_all("h2", class_="story__title"):
        title = tag.get_text(strip=True)
        article_link = tag.find("a")["href"]  # Assuming headlines have links
        headlines.append({"title": title, "link": article_link})
    df = pd.DataFrame(headlines)
    return df

In [None]:
def analyze_sentiment(df):
    sentiments = []
    polarities = []

    for headline in df["Headline"]:
        blob = TextBlob(headline)
        polarity = blob.sentiment.polarity
        polarities.append(polarity)

        if polarity > 0:
            sentiments.append("Positive")
        elif polarity < 0:
            sentiments.append("Negative")
        else:
            sentiments.append("Neutral")

    df["Sentiment"] = [scores["compound"] for scores in sentiments]
    df["Positive"] = [scores["pos"] for scores in sentiments]
    df["Negative"] = [scores["neg"] for scores in sentiments]
    return df

In [None]:
!pip install nltk
import nltk

nltk.download('vader_lexicon')

from nltk.sentiment.vader import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()
# Instead of 'Headline', use a sample headline string
sample_headline = "Pakistan wins the cricket match."
sentiments = analyzer.polarity_scores(sample_headline)
print(sentiments)



[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


{'neg': 0.0, 'neu': 0.519, 'pos': 0.481, 'compound': 0.5719}


In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

def analyze_sentiment(df):
    analyzer = SentimentIntensityAnalyzer()
    sentiments = []


    for headline in df["title"]:
        scores = analyzer.polarity_scores(headline)
        sentiments.append(scores)

    df["Sentiment"] = sentiments
    return df

In [None]:
def visualize_sentiment(df):
    sns.set(style="whitegrid")
    plt.figure(figsize=(8, 6))
    sns.countplot(x="Sentiment", data=df, palette="Set2")
    plt.title("Sentiment Distribution of Dawn Headlines")
    plt.xlabel("Sentiment")
    plt.ylabel("Count")
    plt.show()

In [None]:
def visualize_sentiment(df):
    sns.set(style="whitegrid")
    plt.figure(figsize=(8, 6))
    sns.countplot(x="Sentiment", data=df, palette="Set2")
    plt.title("Sentiment Distribution of Dawn Headlines")
    plt.xlabel("Sentiment")
    plt.ylabel("Count")
    plt.show()

In [None]:
import concurrent.futures
if __name__ == "__main__":
    urls = ["https://www.dawn.com", "https://www.dawn.com/business"]
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(scrape_dawn_headlines, urls))

    headlines_df_homepage = results[0]
    headlines_df_business = results[1]
    print("🧠 Analyzing sentiment...")

    analyzed_df = analyze_sentiment(headlines_df_homepage)

    print(analyzed_df.head())

    print("📊 Visualizing sentiment...")
    # visualize_sentiment(analyzed_df)

    print("💾 Saving analysis results...")
    analyzed_df.to_csv("dawn_sentiment_analysis.csv", index=False)


🧠 Analyzing sentiment...
                                               title  \
0                End of live blog for elections 2024   
1  Scrutiny concludes for nomination papers of 48...   
2        ECP notifies Mahmood Khan as PTI-P chairman   
3  Gaza death toll climbs to 50,912 after Israeli...   
4  PSL 10 formally begins with glitzy opening cer...   

                                                link  \
0                       /live/elections-2024#1822566   
1                       /live/elections-2024#1822564   
2                       /live/elections-2024#1822561   
3            https://www.dawn.com/live/gaza-invasion   
4  https://www.dawn.com/news/1903543/psl-10-forma...   

                                           Sentiment  
0  {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...  
1  {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...  
2  {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...  
3  {'neg': 0.402, 'neu': 0.598, 'pos': 0.0, 'comp...  
4  {'neg': 0.0, 'neu': 1.0