In [1]:
# Import dependencies
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time, requests, os

In [2]:
# Yahoo Finance BaseURL
url = "https://finance.yahoo.com/"

# List of companies tickers
companies = ["TSLA", "MSFT", "GME"]

In [3]:
# Start chrome webdriver
driver = webdriver.Chrome(executable_path = r"C:\temp\chromedriver.exe")

# Hit url and wait for 2 seconds
driver.get(url)
time.sleep(2)

In [4]:
# Enter name of company in searchbox
driver.find_element_by_xpath("//input[@placeholder = 'Search for news, symbols or companies']").send_keys(companies[0])
time.sleep(2)

# Click on Search icon
driver.find_element_by_xpath("//button[@id= 'header-desktop-search-button']").click()
time.sleep(2)

In [5]:
# Driver clicks on Historical Data tab
driver.find_element_by_xpath("//span[text() = 'Historical Data']").click()
time.sleep(2)

In [6]:
# Click dropdown
driver.find_element_by_css_selector("[data-icon=CoreArrowDown]").click()
time.sleep(2)

In [7]:
# Click 5 Years data
driver.find_element_by_css_selector("[data-value='5_Y']").click()
time.sleep(2)

In [8]:
# Click "Apply" button
click_apply = driver.find_element_by_xpath("//span[text()='Apply']").click()
time.sleep(2)

In [9]:
# Webpage uses dynamic loading of data where data loads upon scrolling down
# Driver scrolls down by 5000 px for 13 times to load full table
for i in range(0,13):
 driver.execute_script("window.scrollBy(0,5000)")
 time.sleep(2)

In [10]:
# Fetch webpage and store in a variable
webpage = driver.page_source
# Print the fetched webpage
#print(webpage)

In [11]:
# Parse web page fetched from driver using Beautiful Soup
soup = BeautifulSoup(driver.page_source, "html.parser")

In [12]:
# Table is searched using class and stored in another variable
table = soup.find("table", class_="W(100%) M(0)")

In [13]:
# List of all the rows of stock data
rows = table.find_all("tr", class_="BdT Bdc($seperatorColor) Ta(end) Fz(s) Whs(nw)")

In [14]:
# Create empty list to store data
extracted_data = []

# Loop through each row of the table
for i in range(0, len(rows)):
    try:
        # Create empty dictionary to store present data in each row
        # Date, Open, High, Low, Close, Adj Close, Volume
        row_dict = {}
        # Extract all columns of the row and store in variable
        values = rows[i].find_all('td')

        # Values are extracted and stored in dictionary
        if len(values) == 7:
            row_dict["Date"] = values[0].find('span').text.replace(',', '')
            row_dict["Open"] = values[1].find('span').text.replace(',', '')
            row_dict["High"] = values[2].find('span').text.replace(',', '')
            row_dict["Low"] = values[3].find('span').text.replace(',', '')
            row_dict["Close"] = values[4].find('span').text.replace(',', '')
            row_dict["Adj Close"] = values[5].find('span').text.replace(',', '')
            row_dict["Volume"] = values[6].find('span').text.replace(',', '')

            # Append into list
            extracted_data.append(row_dict)
    
    except:
        # Check exception
        print("Row Number: " + str(i))

    finally:
        i = i + 1

In [15]:
row_dict

{'Date': 'Feb 08 2016',
 'Open': '31.42',
 'High': '31.43',
 'Low': '29.20',
 'Close': '29.60',
 'Adj Close': '29.60',
 'Volume': '46565000'}

In [16]:
# Convert list of dictionaries to a dataframe
extracted_data_df = pd.DataFrame(extracted_data)
extracted_data_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,Feb 05 2021,845.00,864.77,838.97,852.23,852.23,18524800
1,Feb 04 2021,855.00,856.50,833.42,849.99,849.99,15812700
2,Feb 03 2021,877.02,878.08,853.06,854.69,854.69,18343500
3,Feb 02 2021,844.68,880.50,842.20,872.79,872.79,24346200
4,Feb 01 2021,814.29,842.00,795.56,839.81,839.81,25391400
...,...,...,...,...,...,...,...
1254,Feb 12 2016,31.00,31.40,28.74,30.21,30.21,36179000
1255,Feb 11 2016,30.40,32.65,29.40,30.09,30.09,71262000
1256,Feb 10 2016,30.10,30.99,28.35,28.73,28.73,52032500
1257,Feb 09 2016,28.46,31.96,28.21,29.65,29.65,43258000


In [18]:
extracted_data_df.iloc[0]

Date         Feb 05 2021
Open              845.00
High              864.77
Low               838.97
Close             852.23
Adj Close         852.23
Volume          18524800
Name: 0, dtype: object

In [21]:
extracted_data_df.iloc[1258]

Date         Feb 08 2016
Open               31.42
High               31.43
Low                29.20
Close              29.60
Adj Close          29.60
Volume          46565000
Name: 1258, dtype: object