In [1]:
!pip install transformers



In [2]:
!pip install textblob



In [8]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [18]:
# Import necessary libraries
import os
import requests
import aiohttp
import asyncio
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
import pandas as pd
import sqlite3
import nest_asyncio

# Patch asyncio to allow nested loops (for interactive environments like Jupyter)
nest_asyncio.apply()

# Delete existing database if it exists to reset the table schema
if os.path.exists('news_feed.db'):
    os.remove('news_feed.db')
    print("Old database removed. A new one will be created.")

# Set up database connection
conn = sqlite3.connect('news_feed.db')
cursor = conn.cursor()

# Create table to store news articles with VADER and TextBlob sentiment
cursor.execute('''CREATE TABLE IF NOT EXISTS news (
                    title TEXT,
                    summary TEXT,
                    vader_sentiment REAL,
                    textblob_sentiment REAL,
                    url TEXT
                )''')
conn.commit()

# API Key
api_key = 'your API Key
# Asynchronous function to fetch news articles
async def fetch_news_async(session, query, api_key, page_size=10):
    url = f'https://newsapi.org/v2/everything?q={query}&apiKey={api_key}&pageSize={page_size}'
    async with session.get(url) as response:
        if response.status != 200:
            print(f"Failed to fetch articles: {response.status}")
            return []
        data = await response.json()
        return data.get('articles', [])

# Function to summarize articles
def summarize_article(article_text):
    summarizer = pipeline('summarization', model="facebook/bart-large-cnn")
    summary = summarizer(article_text, max_length=min(150, len(article_text)), min_length=40, do_sample=False)
    return summary[0]['summary_text']

# Function to analyze sentiment using both VADER and TextBlob for comparison
def analyze_sentiment(article_text):
    # VADER sentiment
    analyzer = SentimentIntensityAnalyzer()
    vader_sentiment = analyzer.polarity_scores(article_text)['compound']

    # TextBlob sentiment
    blob = TextBlob(article_text)
    textblob_sentiment = blob.sentiment.polarity

    return vader_sentiment, textblob_sentiment

# Asynchronous function to process and store articles
async def process_articles(query, api_key):
    async with aiohttp.ClientSession() as session:
        articles = await fetch_news_async(session, query, api_key)
        news_feed = []

        for article in articles:
            title = article['title']
            description = article.get('description', '')
            content = article.get('content', '')
            full_text = f"{title} {description} {content}"

            # Summarize and analyze sentiment
            summary = summarize_article(full_text)
            vader_sentiment, textblob_sentiment = analyze_sentiment(summary)

            # Append to list and store in database
            news_feed.append({
                'title': title,
                'summary': summary,
                'vader_sentiment': vader_sentiment,
                'textblob_sentiment': textblob_sentiment,
                'url': article['url']
            })
            cursor.execute("INSERT INTO news (title, summary, vader_sentiment, textblob_sentiment, url) VALUES (?, ?, ?, ?, ?)",
                           (title, summary, vader_sentiment, textblob_sentiment, article['url']))

        # Commit changes to the database
        conn.commit()

        # Convert to DataFrame for easy visualization
        df = pd.DataFrame(news_feed)

        # Save DataFrame to an Excel file
        df.to_excel("news_feed.xlsx", index=False)
        print("News feed saved to 'news_feed.xlsx'")

        return df

# Function to retrieve news from the database
def get_saved_news():
    cursor.execute("SELECT * FROM news")
    rows = cursor.fetchall()
    df = pd.DataFrame(rows, columns=['Title', 'Summary', 'VADER Sentiment', 'TextBlob Sentiment', 'URL'])

    # Save retrieved data to Excel as well
    df.to_excel("saved_news_feed.xlsx", index=False)
    print("Saved news feed exported to 'saved_news_feed.xlsx'")

    return df

# Run the process_articles asynchronously in a Jupyter-compatible way
async def get_news_feed(query):
    return await process_articles(query, api_key)

# Example usage
query = "artificial intelligence"
news_feed_df = asyncio.run(get_news_feed(query))

# Display the latest news feed with sentiment comparison
print("Latest News Feed with Sentiment Comparison:")
print(news_feed_df[['title', 'summary', 'vader_sentiment', 'textblob_sentiment', 'url']])

# Fetch and display saved news with sentiment comparison
saved_news_df = get_saved_news()
print("\nSaved News Feed with Sentiment Comparison:")
print(saved_news_df)

# Close the database connection
conn.close()


Old database removed. A new one will be created.


Your max_length is set to 150, but your input_length is only 82. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=41)
Your max_length is set to 150, but your input_length is only 86. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=43)
Your max_length is set to 150, but your input_length is only 112. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=56)
Your max_length is set to 150, but your input_length is only 89. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=44)
You

News feed saved to 'news_feed.xlsx'
Latest News Feed with Sentiment Comparison:
                                               title                                            summary  vader_sentiment  textblob_sentiment                                                url
0         How a Trump Win Could Unleash Dangerous AI  Trump's opposition to “woke” safety standards ...           0.4215           -0.184517  https://www.wired.com/story/donald-trump-ai-sa...
1  This Toilet Seat Has a Camera for Taking Pictu...  This Toilet Seat Has a Camera for Taking Pictu...           0.8074           -0.211111  https://gizmodo.com/this-toilet-seat-has-a-cam...
2  President Biden sets up new AI guardrails for ...  President Biden sets up new AI guardrails for ...           0.8750           -0.044805  https://consent.yahoo.com/v2/collectConsent?se...
3  Nobel Prize Goes to ‘Godfathers of AI’ Who Now...  Two AI researchers, John Hopfield and Geoffrey...           0.5574           -0.150000  https://gi