In [None]:
import requests
from bs4 import BeautifulSoup
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline
import torch
import pandas as pd

In [None]:
# Load the FinBERT sentiment analysis model
tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')

In [None]:
finbert = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

In [None]:

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
                  ' Chrome/91.0.4472.124 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
}

In [None]:
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

In [None]:
sp500_table = pd.read_html(url)[0]

In [None]:
sp500_table = sp500_table['Symbol'].tolist()
sp500_table

In [None]:
for stock in sp500_table:
    url = f"https://finance.yahoo.com/quote/{stock}/news?p={stock}"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        news_items = soup.find_all('li', class_='stream-item')
        
        sentiment_scores = []
        print(f"\nScraped Headlines and Sentiment Scores for {stock}:")
        
        for i, item in enumerate(news_items):    
            headline_tag = item.find('h3')
            headline = headline_tag.get_text().strip() if headline_tag else ''
            
            description_tag = item.find('p')
            description = description_tag.get_text().strip() if description_tag else ''
            
            text = f"{headline} {description}"
            
            if description and headline:
                finbert_result = finbert(text)[0]
                
                sentiment_score = finbert_result['score']
                sentiment_label = finbert_result['label']
                
                if sentiment_label == 'positive' or sentiment_label == 'negative':
                    if sentiment_label == 'negative':
                        sentiment_score = -sentiment_score 
                    sentiment_scores.append(sentiment_score)
                
                print(f"{i+1}. Headline: {headline}")
                print(f"   Description: {description}")
                print(f"   Sentiment: {sentiment_label}, Score: {sentiment_score:.4f}")
                print('-' * 80)
                
        if sentiment_scores:
            average_sentiment = sum(sentiment_scores) / len(sentiment_scores)
            print(f"Average Sentiment Score for {stock}: {average_sentiment:.4f}")