In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import csv
import selenium
from selenium import webdriver

## 1. Scrape yahoo finance

In [2]:
def getData(tickers):
    """Scrapes yahoo finance data given a stock ticker list"""
    stocks_data = []
    for ticker in tickers:
        url = f'https://finance.yahoo.com/quote/{ticker}?p={ticker}&.tsrc=fin-srch'

        # Setting a header in order to let yahoo finance website o recognize our script as a normal browser
        headers = {'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
        # requesting info to the Y.F. server
        r = requests.get(url, headers = headers)
        # creating the bs4 object
        soup = BeautifulSoup(r.text, 'html.parser') 

        # Extracting ticker
        ticker_tag = soup.find('fin-streamer', attrs={'class':'Fw(b) Fz(36px) Mb(-4px) D(ib)'})
        ticker = ticker_tag['data-symbol']

        # Extracting price
        price_tag = soup.find('fin-streamer', attrs={'class':'Fw(b) Fz(36px) Mb(-4px) D(ib)','data-field': 'regularMarketPrice'})
        price = float(price_tag['value'])

        # Extracting change
        change_tag = soup.find('fin-streamer', attrs={'class':'Fw(500) Pstart(8px) Fz(24px)','data-field': 'regularMarketChange'})
        change = float(change_tag['value'])

        # Extracting percent change
        percent_change_tag = soup.find('fin-streamer', attrs={'class':'Fw(500) Pstart(8px) Fz(24px)','data-field': 'regularMarketChangePercent'})
        percent_change = float(percent_change_tag['value'])*100

        stock_data = {
        'Ticker': ticker,
        'Price': price,
        'Change': change,
        'Percent Change': percent_change
        }
        
        stocks_data.append(stock_data)
        
    return stocks_data

## 2. Get Nasdaq Tickers to scrape

In [3]:
url = 'https://stockanalysis.com/list/nasdaq-stocks/'

In [4]:
def extract_data(url):
    """Scrapes tickers given a specific url"""
    
    # Using Selenium and bs4 to get the html page source and parse it
    driver = webdriver.Chrome()
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser') 
    
    # Find the table by its class
    table = soup.find('table', class_='symbol-table')

    # Extract table rows from the table body
    rows = table.find('tbody').find_all('tr')

    # Initialize a list to store the parsed data
    parsed_data = []

    # Iterate through each row and extract the data
    for row in rows:
        cols = row.find_all('td')
        row_data = [col.get_text(strip=True) for col in cols]
        parsed_data.append(row_data)
    
    return pd.DataFrame(parsed_data)

In [5]:
table = extract_data(url)

In [6]:
table.head()

Unnamed: 0,0,1,2,3,4,5,6
0,1,AAPL,Apple Inc.,"2,781.64B",178.01,-1.72%,383.93B
1,2,MSFT,Microsoft Corporation,"2,406.95B",323.95,-0.93%,211.92B
2,3,GOOGL,Alphabet Inc.,"1,655.73B",131.68,-0.52%,289.53B
3,4,AMZN,"Amazon.com, Inc.","1,369.35B",133.49,-1.45%,538.05B
4,5,NVDA,NVIDIA Corporation,"1,176.76B",476.83,1.20%,25.88B


In [7]:
# select top-10 tickers
nasdaq_tickers = table.iloc[:,1].to_list()[:10]

## 3. Run Scraper

In [8]:
# scrape price data
data = getData(nasdaq_tickers)

In [9]:
df = pd.DataFrame(data)

In [10]:
# save data to a csv
df.to_csv("stocks_data.csv", index=False)

In [11]:
# checking csv is correct
!head stocks_data.csv

Ticker,Price,Change,Percent Change
AAPL,177.75,-3.369995,-1.8606422
MSFT,323.81,-3.1900024,-0.9755359
GOOGL,131.548,-0.82199097,-0.62097977
AMZN,133.35,-2.1699982,-1.6012381999999998
NVDA,475.44,4.279999,0.908396
META,291.89,-2.3499756,-0.7986595000000001
TSLA,229.8337,-7.026306,-2.9664386
AVGO,863.6327,-12.947327,-1.4770273999999999
ASML,652.73,-24.390015,-3.6020227
