In [59]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.chrome.options import Options
from datetime import datetime, timedelta
import time
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [60]:
# Set up the Chrome driver
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")

driver = webdriver.Chrome(options=chrome_options)  

In [61]:
def convert_relative_time_to_dates(description):
    now = datetime.now()

    if 'hour' in description:
        hours_ago = int(description.split()[0])
        date_time = now - timedelta(hours=hours_ago)
    elif 'yesterday' in description:
        date_time = now - timedelta(days=1)
    elif 'days ago' in description:
        days_ago = int(description.split()[0])
        date_time = now - timedelta(days=days_ago)
    else:
        date_time = now  # Default to now if unrecognized format
    date_time = date_time.date().strftime("%Y-%m-%d") 

    return date_time


def scroll_to_bottom(driver):
    SCROLL_PAUSE_TIME = 2

    # Get scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait to load page
        time.sleep(SCROLL_PAUSE_TIME)

        # Calculate new scroll height and compare with last scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

def scrape_yahoo_finance(stock_symbol, from_date, to_date):
    base_url = f"https://finance.yahoo.com/quote/{stock_symbol}/news"

    driver.get(base_url)
    scroll_to_bottom(driver)

    soup = BeautifulSoup(driver.page_source, 'html.parser')
    news_list = []

    news = soup.find_all(class_="stream-item yf-7rcxn")
    
    for new in news:
        title = new.a.get("aria-label")
        date_span = new.find(class_="publishing font-condensed yf-da5pxu")
        time_date = date_span.text.strip().split("•")[-1]
        date = convert_relative_time_to_dates(time_date)
        news_list.append([date, title])
    
    driver.quit()

    return pd.DataFrame(news_list, columns=['Date', 'Title'])

In [62]:
# Define the stock symbol and date range
stock_symbol = 'TSLA'
to_date = datetime.now().strftime('%Y-%m-%d')
from_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')

# Scrape the news headlines
news_df = scrape_yahoo_finance(stock_symbol, from_date, to_date)

<div class="publishing font-condensed yf-da5pxu">Yahoo Finance Video <i aria-hidden="true" data-svelte-h="svelte-sg6bkb">•</i> 13 hours ago</div>
2024-07-20
<div class="publishing font-condensed yf-da5pxu">Yahoo Finance Video <i aria-hidden="true" data-svelte-h="svelte-sg6bkb">•</i> 14 hours ago</div>
2024-07-20
<div class="publishing font-condensed yf-da5pxu">Motley Fool <i aria-hidden="true" data-svelte-h="svelte-sg6bkb">•</i> 10 hours ago</div>
2024-07-20
<div class="publishing font-condensed yf-da5pxu">Investor's Business Daily <i aria-hidden="true" data-svelte-h="svelte-sg6bkb">•</i> 10 hours ago</div>
2024-07-20
<div class="publishing font-condensed yf-da5pxu">TheStreet <i aria-hidden="true" data-svelte-h="svelte-sg6bkb">•</i> 11 hours ago</div>
2024-07-20
<div class="publishing font-condensed yf-da5pxu">Bloomberg <i aria-hidden="true" data-svelte-h="svelte-sg6bkb">•</i> 12 hours ago</div>
2024-07-20
<div class="publishing font-condensed yf-da5pxu">MarketWatch <i aria-hidden="tru

In [63]:
vader = SentimentIntensityAnalyzer()
compounds = []
for i, row in news_df.iterrows():
    compound = vader.polarity_scores(row['Title'])['compound']
    compounds.append(compound)

news_df['Compound'] = compounds

# Print the DataFrame
print(news_df)

           Date                                              Title  Compound
0    2024-07-20  Tesla is building a robotaxi, but there's a hi...    0.0000
1    2024-07-20         Tesla's Cybertruck: Is it a win or a miss?    0.4939
2    2024-07-20  Own EV Stocks? There's a Dark Side to Recent S...    0.3400
3    2024-07-20  Dow Jones Futures: Stock Market Struggles As H...   -0.2732
4    2024-07-20  Cathie Wood unloads $8 million of surging tech...    0.0000
..          ...                                                ...       ...
193  2024-07-11  Three Energy Stocks to Buy as the Summer Heat ...    0.2732
194  2024-07-11  Tesla to reportedly delay robotaxi rollout to ...   -0.3182
195  2024-07-11      3 Cult-Like Stocks to Buy for Long-Term Gains    0.3400
196  2024-07-11  Uber, Lyft Jump After Report Of Tesla's Robota...   -0.3182
197  2024-07-11  Tesla Robotaxi Debut Reportedly Delayed, Sendi...   -0.2263

[198 rows x 3 columns]
