In [28]:
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

In [29]:
import requests
from bs4 import BeautifulSoup
from yahoo_fin.stock_info import get_analysts_info
import pandas as pd

In [30]:
list_of_news_categories = ['europe', 'us', 'middleeast', 'asia', 'uk', 'investing', 'economy', 'tech', 'weather', 'deals', 'sport', 'business', 'science', 'politics', 'entertainment', 'health', 'travel', 'style']

In [31]:
list_of_companies_to_follow_stocks = ['AMZN', 'TSMC34.SA', 'ADBE']

In [32]:
#stock_news_method = get_yf_rss()

In [33]:
# Function to clean up text
def clean_text(text):
    return ' '.join(text.split()).replace('\\n', '').strip()

In [34]:
# Function to determine the category from the URL
def get_category_from_url(url):
    for category in list_of_news_categories:
        if category in url.lower():
            return category.capitalize()
    return 'Uncategorized'

In [35]:
# Function to fetch and parse CNN news sitemap
def fetch_cnn_news():
    url = "https://www.cnn.com/sitemap/news.xml"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'xml')
    news_items = soup.find_all('url')
    
    news_data = []
    for item in news_items[:5]:  # Limiting to first 5 news articles for brevity
        news_url = item.find('loc').text
        publication_date = item.find('news:publication_date').text if item.find('news:publication_date') else 'No date'
        
        news_response = requests.get(news_url)
        news_soup = BeautifulSoup(news_response.content, 'html.parser')
        title = news_soup.find('h1').text if news_soup.find('h1') else 'No title'
        summary = news_soup.find('meta', {'name': 'description'})['content'] if news_soup.find('meta', {'name': 'description'}) else 'No summary'
        
        # Clean up title and summary
        title = clean_text(title)
        summary = clean_text(summary)
        
        category = get_category_from_url(news_url)
        
        if title != 'No title':
            news_data.append({'title': title, 'summary': summary, 'category': category, 'url': news_url, 'publication_date': publication_date})
    
    return news_data

In [36]:
cnn_news = fetch_cnn_news()

In [37]:
print('Length of CNN news: ',len(cnn_news))

Length of CNN news:  5


In [38]:
print("CNN News:")
for news in cnn_news:
    print(news)

CNN News:
{'title': 'The latest on the 2024 presidential campaign', 'summary': 'Vice President Kamala Harris and running mate Minnesota Gov. Tim Walz are campaigning for the Democratic ticket in Wisconsin and Michigan today as GOP vice presidential candidate JD Vance also visits those states. Follow here for the latest live updates on the 2024 presidential race.', 'category': 'Politics', 'url': 'https://www.cnn.com/politics/live-news/kamala-harris-trump-election-08-07-24/index.html', 'publication_date': '2024-08-07T12:10:45.154000+00:00'}
{'title': 'Look of the Week: Harris-Walz’s campaign hat says more than you might think', 'summary': 'What do sport hunters and fans of breakout queer pop star Chappell Roan have in common? Well, not much, perhaps. But now you can add at least one item to that list: Kamala Harris’ and Tim Walz’s new campaign cap.', 'category': 'Style', 'url': 'https://www.cnn.com/2024/08/07/style/campaign-hat-harris-walz-lotw/index.html', 'publication_date': '2024-08-0

In [44]:
# Function to fetch and parse Yahoo Finance sectors
def fetch_yahoo_finance_quotations(ticker):
    url = "https://finance.yahoo.com/quote/"+ticker+"/"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    quote = {}
    quote_statistics = soup.find('div', {'data-testid': 'quote-statistics'})
    
    if quote_statistics:
        for item in quote_statistics.find_all('li'):
            label = item.find('span', class_='label')
            value = item.find('span', class_='value')
            
            label_text = clean_text(label.text) if label else 'No label'
            value_text = clean_text(value.text) if value else 'No value'
            
            if label_text != 'No label': quote[label_text] = value_text 
    
    return quote

In [45]:
yahoo_finance_news = fetch_yahoo_finance_quotations()


In [47]:
print("\nYahoo Finance News:", yahoo_finance_news)


Yahoo Finance News: {'Previous Close': '609.57', 'Open': '616.07', 'Bid': '613.82 x 100', 'Ask': '614.98 x 100', "Day's Range": '612.54 - 632.00', '52 Week Range': '344.73 - 697.49', 'Volume': '2,161,534', 'Avg. Volume': '3,268,311', 'Market Cap (intraday)': '263.799B', 'Beta (5Y Monthly)': '1.27', 'PE Ratio (TTM)': '38.35', 'EPS (TTM)': '16.03', 'Earnings Date': 'Oct 16, 2024 - Oct 21, 2024', 'Forward Dividend & Yield': '--', 'Ex-Dividend Date': '--', '1y Target Est': '692.69'}
