In [31]:
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
from datetime import datetime
import nltk
from nltk import word_tokenize, pos_tag

# Download NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Function to perform sentiment analysis
def analyze_sentiment(text):
    analysis = TextBlob(text)
    # Get the sentiment polarity score
    sentiment_score = analysis.sentiment.polarity
    # Assigning sentiment label based on polarity score
    if sentiment_score > 0:
        sentiment_label = 'Positive'
    elif sentiment_score == 0:
        sentiment_label = 'Neutral'
    else:
        sentiment_label = 'Negative'
    # Get the reason for sentiment score
    if sentiment_score > 0.2:
        reason = 'Strongly positive'
    elif sentiment_score > 0:
        reason = 'Somewhat positive'
    elif sentiment_score == 0:
        reason = 'Neutral'
    elif sentiment_score > -0.2:
        reason = 'Somewhat negative'
    else:
        reason = 'Strongly negative'
    return sentiment_label, sentiment_score, reason

# Function to extract nouns from the headline
def extract_nouns_from_headline(headline):
    tokens = word_tokenize(headline)
    tagged = pos_tag(tokens)
    nouns = [word for word, pos in tagged if pos.startswith('NN')]
    return nouns

# Function to get headlines from URL and perform sentiment analysis
def analyze_headlines_from_url(url):
    # Retrieve HTML content of the webpage
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Extract headlines, datetime stamps, and article URLs
    headlines_data = []
    for article in soup.find_all('a', class_='article-link'):
        headline = article.find('h3', class_='article-name').text.strip()
        datetime_stamp = article.find('time', class_='date-with-prefix')['datetime']
        article_url = article['href']
        # Perform sentiment analysis of the headline
        sentiment_label, sentiment_score, reason = analyze_sentiment(headline)
        # Extract nouns from headline and use them as tags
        tags = extract_nouns_from_headline(headline)
        headlines_data.append({'headline': headline, 'sentiment_label': sentiment_label, 'sentiment_score': sentiment_score, 'reason': reason, 'datetime': datetime_stamp, 'tags': tags})
    
    return headlines_data

# URL of the webpage containing headlines
url = 'https://www.space.com/search?searchTerm=webb'

# Analyze headlines and sentiment from the URL
headlines_data = analyze_headlines_from_url(url)

# Print results
for headline_data in headlines_data:
    print("Title:", headline_data['headline'])
    print("Sentiment Label:", headline_data['sentiment_label'])
    print("Sentiment Score:", headline_data['sentiment_score'])
    print("Reason:", headline_data['reason'])
    print("Datetime:", headline_data['datetime'])
    print("Tags:", headline_data['tags'])
    print()


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DonGaspar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\DonGaspar\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Title: James Webb Space Telescope uncovers massive sunshield in next step of risky deployment
Sentiment Label: Neutral
Sentiment Score: 0.0
Reason: Neutral
Datetime: 2021-12-30T20:22:53Z
Tags: ['James', 'Webb', 'Space', 'Telescope', 'uncovers', 'sunshield', 'step', 'deployment']

Title: James Webb: Early NASA Visionary
Sentiment Label: Positive
Sentiment Score: 0.1
Reason: Somewhat positive
Datetime: 2017-11-22T02:49:15Z
Tags: ['James', 'Webb', 'NASA', 'Visionary']

Title: James Webb Space Telescope arrives at new home in space
Sentiment Label: Positive
Sentiment Score: 0.13636363636363635
Reason: Somewhat positive
Datetime: 2022-01-24T22:12:38Z
Tags: ['James', 'Webb', 'Space', 'Telescope', 'home', 'space']

Title: Paint it black: behind the James Webb Space Telescope's signature color
Sentiment Label: Negative
Sentiment Score: -0.2833333333333333
Reason: Strongly negative
Datetime: 2022-02-08T21:53:19Z
Tags: ['Paint', 'James', 'Webb', 'Space', 'Telescope', 'signature', 'color']

Title

In [15]:
#pip install requests beautifulsoup4 textblob
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob

In [34]:
import requests
from bs4 import BeautifulSoup

# Function to get news data
def get_news_data():
    # Set headers to mimic a browser visit
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    # Google News URL
    news_url = "https://news.google.com/news/rss"

    # Make an HTTP request to the URL
    response = requests.get(news_url, headers=headers)

    # Parse the XML content of the response
    soup = BeautifulSoup(response.content, features="xml")

    # Find all news items
    news_items = soup.findAll('item')

    # List to hold news data
    news_data = []

    # Extract title, link, and publish date for each news item
    for item in news_items:
        news_data.append({
            "title": item.title.text,
            "link": item.link.text,
            "pubDate": item.pubDate.text
        })

    return news_data

# Function to print news data
def print_news_data(news_data):
    for news in news_data:
        print(f"Title: {news['title']}")
        print(f"Link: {news['link']}")
        print(f"Publish Date: {news['pubDate']}")
        print("-" * 60)

# Get news data
news_data = get_news_data()

# Print news data
print_news_data(news_data)

Title: Nikki Haley suspends election 2024 campaign - The Associated Press
Link: https://news.google.com/rss/articles/CBMibWh0dHBzOi8vYXBuZXdzLmNvbS9hcnRpY2xlL25pa2tpLWhhbGV5LXJlcHVibGljYW4tdHJ1bXAtc3VwZXItdHVlc2RheS1sb3NzZXMtOTVhYjU2YjY4YThlZWZiYmYwNGVmOTBmMmYwMGVmMjnSAQA?oc=5
Publish Date: Wed, 06 Mar 2024 17:48:00 GMT
------------------------------------------------------------
Title: Crew members killed for first time in Houthi attack on commercial ship near Yemen, US officials say - CNN
Link: https://news.google.com/rss/articles/CBMiVGh0dHBzOi8vd3d3LmNubi5jb20vMjAyNC8wMy8wNi9wb2xpdGljcy9jcmV3LW1lbWJlcnMta2lsbGVkLWhvdXRoaS1hdHRhY2svaW5kZXguaHRtbNIBWGh0dHBzOi8vYW1wLmNubi5jb20vY25uLzIwMjQvMDMvMDYvcG9saXRpY3MvY3Jldy1tZW1iZXJzLWtpbGxlZC1ob3V0aGktYXR0YWNrL2luZGV4Lmh0bWw?oc=5
Publish Date: Wed, 06 Mar 2024 18:47:00 GMT
------------------------------------------------------------
Title: Trump’s on the ballot, but the Supreme Court left key constitutional questions unanswered - CNN
Link: ht

In [36]:
import requests
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from datetime import datetime
import nltk
from nltk import word_tokenize, pos_tag

# Download NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('vader_lexicon')

# Initialize VADER
sid = SentimentIntensityAnalyzer()

# Function to perform sentiment analysis using VADER
def analyze_sentiment_vader(text):
    # Get sentiment scores
    sentiment_scores = sid.polarity_scores(text)
    # Determine sentiment label based on compound score
    if sentiment_scores['compound'] >= 0.05:
        sentiment_label = 'Positive'
    elif sentiment_scores['compound'] <= -0.05:
        sentiment_label = 'Negative'
    else:
        sentiment_label = 'Neutral'
    return sentiment_label, sentiment_scores['compound']

# Function to get news data from Google News
def get_news_data():
    # Set headers to mimic a browser visit
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    # Google News URL
    news_url = "https://news.google.com/news/rss"

    # Make an HTTP request to the URL
    response = requests.get(news_url, headers=headers)

    # Parse the XML content of the response
    soup = BeautifulSoup(response.content, features="xml")

    # Find all news items
    news_items = soup.findAll('item')

    # List to hold news data
    news_data = []

    # Extract title, link, and publish date for each news item
    for item in news_items:
        news_data.append({
            "title": item.title.text,
            "link": item.link.text,
            "pubDate": item.pubDate.text
        })

    return news_data

# Function to print news data
def print_news_data(news_data):
    for news in news_data:
        print(f"Title: {news['title']}")
        print(f"Link: {news['link']}")
        print(f"Publish Date: {news['pubDate']}")
        print("-" * 60)

# Analyze headlines and sentiment from the Google News titles using VADER
def analyze_headlines_from_google_news():
    # Get news data from Google News
    news_data = get_news_data()

    # List to hold analyzed headlines
    headlines_data = []

    # Analyze sentiment for each headline
    for news in news_data:
        headline = news['title']
        # Perform sentiment analysis using VADER
        sentiment_label, sentiment_score = analyze_sentiment_vader(headline)
        headlines_data.append({'headline': headline, 'sentiment_label': sentiment_label, 'sentiment_score': sentiment_score, 'link': news['link'], 'pubDate': news['pubDate']})
    
    return headlines_data

# Analyze headlines from Google News
headlines_data = analyze_headlines_from_google_news()

# Print results
for headline_data in headlines_data:
    print("Title:", headline_data['headline'])
    print("Sentiment Label:", headline_data['sentiment_label'])
    print("Sentiment Score:", headline_data['sentiment_score'])
    print("Link:", headline_data['link'])
    print("Publish Date:", headline_data['pubDate'])
    print()


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DonGaspar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\DonGaspar\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\DonGaspar\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Title: Nikki Haley ends White House bid, clearing path for a Trump-Biden rematch - Reuters
Sentiment Label: Neutral
Sentiment Score: 0.0
Link: https://news.google.com/rss/articles/CBMib2h0dHBzOi8vd3d3LnJldXRlcnMuY29tL3dvcmxkL3VzL25pa2tpLWhhbGV5LWVuZHMtd2hpdGUtaG91c2UtYmlkLWNsZWFyaW5nLXBhdGgtdHJ1bXAtYmlkZW4tcmVtYXRjaC0yMDI0LTAzLTA2L9IBAA?oc=5
Publish Date: Wed, 06 Mar 2024 20:00:25 GMT

Title: Crew members killed for first time in Houthi attack on commercial ship near Yemen, US officials say - CNN
Sentiment Label: Negative
Sentiment Score: -0.8225
Link: https://news.google.com/rss/articles/CBMiVGh0dHBzOi8vd3d3LmNubi5jb20vMjAyNC8wMy8wNi9wb2xpdGljcy9jcmV3LW1lbWJlcnMta2lsbGVkLWhvdXRoaS1hdHRhY2svaW5kZXguaHRtbNIBWGh0dHBzOi8vYW1wLmNubi5jb20vY25uLzIwMjQvMDMvMDYvcG9saXRpY3MvY3Jldy1tZW1iZXJzLWtpbGxlZC1ob3V0aGktYXR0YWNrL2luZGV4Lmh0bWw?oc=5
Publish Date: Wed, 06 Mar 2024 18:47:00 GMT

Title: Trump’s on the ballot, but the Supreme Court left key constitutional questions unanswered - CNN
Sentiment L

In [41]:
from GoogleNews import GoogleNews

# Initialize GoogleNews object
googlenews = GoogleNews()

# Set the time period (optional)
googlenews.set_time_range('02/01/2023','03/01/2023')

# Set the language and region (optional)
#googlenews.set_lang('en')
#googlenews.set_region('US')

# Search for news items containing "Mark Zuckerberg"
googlenews.search('Mark Zuckerberg')

# Get the result
result = googlenews.results()

# Print the result
for news in result:
    print(f"Title: {news['title']}")
    print(f"Media: {news['media']}")
    print(f"Date: {news['date']}")
    print(f"Link: {news['link']}")
    print("-" * 60)

Title: Gravitas | Meta Blackout: How much money did Mark Zuckerberg lose?
Media: WION
Date: 2 hours ago
Link: https://www.wionews.com/videos/gravitas-meta-blackout-how-much-money-did-mark-zuckerberg-lose-697417&ved=2ahUKEwixps3EyuCEAxVsLEQIHZPADQEQxfQBegQIAxAC&usg=AOvVaw0nSry4GCRfyl3UNIRjST6T
------------------------------------------------------------
Title: Mark Zuckerberg loses $3 billion after Meta outage
Media: WION
Date: 2 hours ago
Link: https://www.wionews.com/videos/mark-zuckerberg-loses-3-billion-after-meta-outage-697410&ved=2ahUKEwixps3EyuCEAxVsLEQIHZPADQEQxfQBegQICRAC&usg=AOvVaw0DanOKQS583rZ24ZM0rJOw
------------------------------------------------------------
Title: Mark Zuckerberg loses $3 billion after Meta outage | WION News
Media: YouTube
Date: 2 hours ago
Link: https://www.youtube.com/watch%3Fv%3DKIzuYCKh5r8&ved=2ahUKEwixps3EyuCEAxVsLEQIHZPADQEQxfQBegQIBBAC&usg=AOvVaw0aVs0W0yd3_-oykvQ3vm8V
------------------------------------------------------------
Title: Billionaire

In [57]:
from GoogleNews import GoogleNews
from textblob import TextBlob
from datetime import datetime
import nltk
from nltk import word_tokenize, pos_tag

# Download NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Initialize GoogleNews object
googlenews = GoogleNews()

# Set the time period (optional)
googlenews.set_time_range('02/01/2023','03/01/2024')

# Set the language and region (optional)
# googlenews.set_lang('en')
# googlenews.set_region('US')

# Search for news items containing a target of interest
googlenews.search('climate change')
googlenews.get_page(5)
# Get the result
result = googlenews.results()

# Function to perform sentiment analysis
def analyze_sentiment(text):
    analysis = TextBlob(text)
    # Get the sentiment polarity score
    sentiment_score = analysis.sentiment.polarity
    # Assigning sentiment label based on polarity score
    if sentiment_score > 0:
        sentiment_label = 'Positive'
    elif sentiment_score == 0:
        sentiment_label = 'Neutral'
    else:
        sentiment_label = 'Negative'
    return sentiment_label, sentiment_score

# Function to extract nouns from the headline
def extract_nouns_from_headline(headline):
    tokens = word_tokenize(headline)
    tagged = pos_tag(tokens)
    nouns = [word for word, pos in tagged if pos.startswith('NN')]
    return nouns

# Function to infer topic from the headline
def infer_topic(headline):
    # Use a simple keyword extraction approach
    keywords = {
        'business': ['business', 'stock', 'market', 'finance', 'economy', 'company', 'industry'],
        'technology': ['technology', 'tech', 'innovation', 'digital', 'internet'],
        'politics': ['politics', 'government', 'election', 'policy', 'administration'],
        'sports': ['sports', 'athletics', 'games', 'football', 'basketball', 'soccer'],
        'entertainment': ['entertainment', 'celebrity', 'film', 'music', 'showbiz', 'hollywood'],
        'health': ['health', 'medicine', 'wellness', 'medical', 'fitness'],
        'science': ['science', 'research', 'discovery', 'innovation', 'experiment'],
        'poor_performance': ['poor performance', 'decline', 'struggle', 'losses', 'failure', 'outage'],
        'other': ['other', 'miscellaneous', 'general']
    }
    
    for topic, keywords_list in keywords.items():
        for keyword in keywords_list:
            if keyword in headline.lower():
                return topic
    return 'Other'

# Print the result with sentiment analysis and inferred topic
for news in result:
    headline = news['title']
    sentiment_label, sentiment_score = analyze_sentiment(headline)
    nouns = extract_nouns_from_headline(headline)
    topic = infer_topic(headline)
    print("Title:", headline)
    print("Sentiment Label:", sentiment_label)
    print("Sentiment Score:", sentiment_score)
    print("Nouns:", nouns)
    print("Topic:", topic)
    print("Media:", news['media'])
    print("Date:", news['date'])
    print("Link:", news['link'])
    print("-" * 60)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DonGaspar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\DonGaspar\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Title: This satellite could help fight climate change | March 6, 2024
Sentiment Label: Neutral
Sentiment Score: 0.0
Nouns: ['satellite', 'climate', 'change', '|', 'March']
Topic: Other
Media: YouTube
Date: 5 minutes ago
Link: https://www.youtube.com/watch%3Fv%3DhVZKPqCUedQ&ved=2ahUKEwj30uuMz-CEAxVgm68BHVAZAAMQxfQBegQIARAC&usg=AOvVaw0QljIGpq3RIFiyMWLsRA8C
------------------------------------------------------------
Title: SEC signs off on landmark climate rule as legal backlash looms
Sentiment Label: Positive
Sentiment Score: 0.05
Nouns: ['SEC', 'signs', 'landmark', 'climate', 'rule', 'backlash', 'looms']
Topic: Other
Media: Politico
Date: 5 minutes ago
Link: https://www.politico.com/news/2024/03/06/sec-set-to-launch-landmark-climate-rule-sparking-legal-blitz-00145323&ved=2ahUKEwj30uuMz-CEAxVgm68BHVAZAAMQxfQBegQICRAC&usg=AOvVaw0_gk-XEFXrFDUypnvGdZsn
------------------------------------------------------------
Title: US SEC adopts climate rule that may face challenges despite dilution
Se

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DonGaspar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\DonGaspar\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Title: Gravitas | Meta Blackout: How much money did Mark Zuckerberg lose?
Sentiment Label: Positive
Sentiment Score: 0.2
Nouns: ['Gravitas', '|', 'Meta', 'Blackout', 'money', 'Mark', 'Zuckerberg']
Topic: Other
Summary: 
Media: WION
Date: 2 hours ago
Link: https://www.wionews.com/videos/gravitas-meta-blackout-how-much-money-did-mark-zuckerberg-lose-697417&ved=2ahUKEwivpMPVzuCEAxX5na8BHWWoCUgQxfQBegQIBBAC&usg=AOvVaw3ngtVckufvGEFRW6vG9K51
------------------------------------------------------------
Title: Mark Zuckerberg loses $3 billion after Meta outage
Sentiment Label: Negative
Sentiment Score: -0.3
Nouns: ['Mark', 'Zuckerberg', 'Meta', 'outage']
Topic: poor_performance
Summary: 
Media: WION
Date: 3 hours ago
Link: https://www.wionews.com/videos/mark-zuckerberg-loses-3-billion-after-meta-outage-697410&ved=2ahUKEwivpMPVzuCEAxX5na8BHWWoCUgQxfQBegQICRAC&usg=AOvVaw0RsP5f04UUt5nplrf9NwCz
------------------------------------------------------------
Title: Mark Zuckerberg loses $3 billion aft