In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import sqlalchemy
import time
%load_ext sql

In [2]:
engine = sqlalchemy.create_engine("postgresql+psycopg2://postgres:postgres@localhost:5432/nasdaq_companies")
%sql postgresql://postgres:postgres@localhost:5432/nasdaq_companies

In [None]:
tickers = %sql SELECT ticker FROM company
tickers = tickers.DataFrame()
tickers = tickers['ticker']

In [4]:
implicit_wait_time = 10
options = webdriver.ChromeOptions()
options.page_load_strategy = 'normal'
options.timeouts = {'pageLoad': 10000}
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(implicit_wait_time)
driver.get('https://stockanalysis.com/login/')

In [None]:
for ticker in tickers[756:]:
    timeout_counter = 0
    driver.get(f"https://stockanalysis.com/stocks/{ticker.lower()}/")
    page_loaded = False
    while not page_loaded:
        try:
            WebDriverWait(driver, 10).until(lambda driver: driver.execute_script("return document.readyState") == "complete")
            page_loaded = True
        except TimeoutException:
            print(f"Timeout Exception while loading {ticker} page.")
            time.sleep(30)
            driver.get(f"https://stockanalysis.com/stocks/{ticker.lower()}/")
    
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, 'Financials')))
        financials_link = driver.find_element(By.LINK_TEXT, 'Financials')
    except (TimeoutException, NoSuchElementException):
        print(f"No financial data found for {ticker}.")
        continue
    finally:
        driver.implicitly_wait(implicit_wait_time)

    financials_link.click()
    financials_loaded = False
    while not financials_loaded:
        try:
            if timeout_counter >= 3:
                break
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'main-table')))
            financials_loaded = True
        except TimeoutException:
            print(f"Timeout Exception while loading financials page for {ticker}.")
            time.sleep(10)
            driver.get(f"https://stockanalysis.com/stocks/{ticker.lower()}/financials/")
            timeout_counter += 1
    if timeout_counter >= 3:
        continue
        
    url_tab_dict = {
        'Income':'',
        'Balance Sheet':'balance-sheet/',
        'Cash Flow':'cash-flow-statement/',
        'Ratios':'ratios/'
    }

    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//button[contains(text(), 'Quarterly')]")))
    except TimeoutException:
        print(f"No quaterly data available for {ticker}.")
        continue
    
    for sub_tab_name in ['Income', 'Balance Sheet', 'Cash Flow', 'Ratios']:
    
        sub_tab = driver.find_element(By.LINK_TEXT, sub_tab_name)
        sub_tab.click()
        sub_tab_loaded = False
        while not sub_tab_loaded:
            try:
                if timeout_counter >= 3:
                    break
                WebDriverWait(driver, 10).until(EC.text_to_be_present_in_element((By.CSS_SELECTOR, 'h1.mb-0'), sub_tab_name))
                WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'main-table')))
                WebDriverWait(driver, 10).until(EC.url_contains(f"stocks/{ticker.lower()}/financials/{url_tab_dict[sub_tab_name]}"))
                sub_tab_loaded = True
            except TimeoutException:
                print(f"Timeout Exception while loading income page for {ticker}.")
                time.sleep(10)
                driver.get(f"https://stockanalysis.com/stocks/{ticker.lower()}/financials/{url_tab_dict[sub_tab_name]}")
                timeout_counter += 1
        if timeout_counter >= 3:
            break;
            
        try:
            driver.implicitly_wait(0)
            data_source_button = driver.find_element(By.XPATH, "//span[text()='Data Source']")
            data_source_button.click()
            nasdaq_button = driver.find_element(By.XPATH, "//button[contains(text(), 'NASDAQ Data Link')]")
            nasdaq_button.click()
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//span[contains(text(), 'NASDAQ')]")))
        except TimeoutException:
            print(f"Could not load {sub_tab_name} NASDAQ data for {ticker}.")
            break
        except:
            pass
        finally:
            driver.implicitly_wait(implicit_wait_time)
        
        try:
            WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Quarterly')]")))
            quarterly_button = driver.find_element(By.XPATH, "//button[contains(text(), 'Quarterly')]")
            quarterly_button.click()
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//button[contains(@class, 'active')]")))
            WebDriverWait(driver, 10).until(EC.url_contains('?p=quarterly'))
        except:
            print(f"Could not click Quarterly button for {ticker}.")
            break

        try:
            WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[@title='Change number units']")))
            units_button = driver.find_element(By.XPATH, "//button[@title='Change number units']")
            units_button.click()
            WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Raw')]")))
            raw_button = driver.find_element(By.XPATH, "//button[contains(text(), 'Raw')]")
            raw_button.click()
            WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//span[contains(text(), 'Raw')]")))
        except:
            print(f"Could not change displayed units for {ticker}.")
            break
    
        try:
            WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[text()='Download']")))
            download_button = driver.find_element(By.XPATH, "//span[text()='Download']")
            download_button.click()
            WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Download to CSV')]")))
            csv_button = driver.find_element(By.XPATH, "//button[contains(text(), 'Download to CSV')]")
            csv_button.click()
        except (TimeoutException, NoSuchElementException):
            print(f"Could not download {sub_tab_name} CSV for {ticker}.")
            raise