In [1]:
import requests
from bs4 import BeautifulSoup
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline
import torch
import pandas as pd

In [2]:
# Load the FinBERT sentiment analysis model
tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')

In [3]:
finbert = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [4]:

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
                  ' Chrome/91.0.4472.124 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
}

In [5]:
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

In [6]:
sp500_table = pd.read_html(url)[0]

In [7]:
sp500_table = sp500_table['Symbol'].tolist()
sp500_table

['MMM',
 'AOS',
 'ABT',
 'ABBV',
 'ACN',
 'ADBE',
 'AMD',
 'AES',
 'AFL',
 'A',
 'APD',
 'ABNB',
 'AKAM',
 'ALB',
 'ARE',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'AON',
 'APA',
 'AAPL',
 'AMAT',
 'APTV',
 'ACGL',
 'ADM',
 'ANET',
 'AJG',
 'AIZ',
 'T',
 'ATO',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'AXON',
 'BKR',
 'BALL',
 'BAC',
 'BK',
 'BBWI',
 'BAX',
 'BDX',
 'BRK.B',
 'BBY',
 'BIO',
 'TECH',
 'BIIB',
 'BLK',
 'BX',
 'BA',
 'BKNG',
 'BWA',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BRO',
 'BF.B',
 'BLDR',
 'BG',
 'BXP',
 'CHRW',
 'CDNS',
 'CZR',
 'CPT',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CARR',
 'CTLT',
 'CAT',
 'CBOE',
 'CBRE',
 'CDW',
 'CE',
 'COR',
 'CNC',
 'CNP',
 'CF',
 'CRL',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CLX',
 'CME',
 'CMS',
 'KO',
 'CTSH',
 'CL',
 'CMCSA',
 'CAG'

In [12]:
import requests
from datetime import datetime

# Define the API endpoint
api_url = "http://127.0.0.1:8000/api/sentiment-analysis"

# Data to send in the POST request
data = {
    "stock_symbol": "AAPL",
    "sentiment_score": 32.45,  # Example sentiment score
    "analysis_date": datetime.now().strftime('%Y-%m-%d %H:%M:%S')  # Current date and time
}

# Send a POST request to the API
response = requests.post(api_url, json=data)

# Check if the request was successful
if response.status_code == 201:
    print("Data successfully sent to the API!")
    print(response.json())  # Print the response from the API
else:
    print(f"Failed to send data. Status code: {response.status_code}")
    print(response.json())  # Print the error message

Data successfully sent to the API!
{'stock_symbol': 'AAPL', 'sentiment_score': 32.45, 'analysis_date': '2024-09-14 13:12:51', 'updated_at': '2024-09-14T10:12:51.000000Z', 'created_at': '2024-09-14T10:12:51.000000Z', 'id': 3}


In [10]:
MAX_TOKENS = 512
from datetime import datetime
api_url = "http://your-laravel-app.com/api/sentiment-analysis"
for stock in sp500_table:
    url = f"https://finance.yahoo.com/quote/{stock}/news?p={stock}"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        news_items = soup.find_all('li', class_='stream-item')
        
        sentiment_scores = []
        print(f"\nScraped Headlines and Sentiment Scores for {stock}:")
        
        for i, item in enumerate(news_items):    
            headline_tag = item.find('h3')
            headline = headline_tag.get_text().strip() if headline_tag else ''
            
            description_tag = item.find('p')
            description = description_tag.get_text().strip() if description_tag else ''
            
            text = f"{headline} {description}"
            
            if len(text) > MAX_TOKENS:
                # If text is too long, use only the description
                text = description if len(description) <= MAX_TOKENS else description[:MAX_TOKENS]
            
            if description and headline:
                finbert_result = finbert(text)[0]
                
                sentiment_score = finbert_result['score']
                sentiment_label = finbert_result['label']
                
                if sentiment_label == 'positive' or sentiment_label == 'negative':
                    if sentiment_label == 'negative':
                        sentiment_score = -sentiment_score 
                    sentiment_scores.append(sentiment_score)
                
                print(f"{i+1}. Headline: {headline}")
                print(f"   Description: {description}")
                print(f"   Sentiment: {sentiment_label}, Score: {sentiment_score:.4f}")
                print('-' * 80)
                
        if sentiment_scores:
            average_sentiment = sum(sentiment_scores) / len(sentiment_scores)
            print(f"Average Sentiment Score for {stock}: {average_sentiment:.4f}")
            data = {
                "stock_symbol": stock,
                "sentiment_score": average_sentiment,  # Example sentiment score
                "analysis_date": datetime.now().strftime('%Y-%m-%d %H:%M:%S')  # Current date and time
            }
            response = requests.post(api_url, json=data)
            if response.status_code == 201:
                print("Data successfully sent to the API!")
                print(response.json())  # Print the response from the API
            else:
                print(f"Failed to send data. Status code: {response.status_code}")
                print(response.json())  # Print the error message
        else:
            print("No sentiment scores to average.")

    else:
        print(f"Failed to fetch data for {stock}. Status code: {response.status_code}")


Scraped Headlines and Sentiment Scores for MMM:
1. Headline: Ground-Breaking 3M Invention Helps Propel Record-Setting Solar Adventure Across the US
   Description: NORTHAMPTON, MA / ACCESSWIRE / September 12, 2024 / 3M Originally published on 3M News Center Will Jones had a major problem. His solar-powered car broke down in the middle of the desert as the air temperature reached 119 degrees and the nearest gas ...
   Sentiment: negative, Score: -0.8135
--------------------------------------------------------------------------------
3. Headline: Here's Why It is Appropriate to Retain 3M Stock Right Now
   Description: MMM gains from strength in its Transportation and Electronics segment, buyouts and shareholder-friendly policies. Softness in the Consumer segment is concerning.
   Sentiment: positive, Score: 0.6612
--------------------------------------------------------------------------------
4. Headline: 3M (MMM) Laps the Stock Market: Here's Why
   Description: In the latest trading

ConnectionError: HTTPConnectionPool(host='your-laravel-app.com', port=80): Max retries exceeded with url: /api/sentiment-analysis (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000018964E34F10>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))