In [2]:
import argparse
import logging
import pathlib
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

In [None]:
driver_path = "/Users/acasariego/miniconda3/bin/chromedriver"

In [None]:
class DownloadFilings:

    def __init__(self, ticker, start_date, dest_dir, driver_path):
        self.ticker = ticker
        self.start_date = start_date
        self.dest_dir = dest_dir
        self.driver = webdriver.Chrome(service=Service(executable_path=driver_path))
        self.wait = WebDriverWait(self.driver, 10)
     
         
    def create_folders(self):
        dest_dir = Path(self.dest_dir)
        dir = dest_dir / "raw_html" / f"{self.ticker}"
        dir.mkdir(parents=True, exist_ok=True)


    def element_exists(self, by, value):
        try:
            self.driver.find_element(by, value)
            return True
        except NoSuchElementException:
            return False 
    
    
    def get_main_html(self):
        driver =  self.driver.maximize_window
        driver.get("https://www.sec.gov/edgar/searchedgar/companysearch.html")
        # Validate success? 
    
    def get_ticker_html(self):
        actions = actionChains(self.driver)
        input= self.driver.find_element("id", "edgar-company-person")  
        actions.move_to_element(input).click().send_keys(self.ticker, Keys.ENTER).perform()
        # validate? 
        
    def select_10k_and_10q_buttons(self):
        xpath_10k_and_10q = r'//*[@id="filingsStart"]/div[2]/div[3]/h5'
        self.wait.until(EC.element_to_be_clickable(("xpath", xpath_10k_and_10q))).click()
        xpath_view_all_10k_and_10q =  r'//*[@id="filingsStart"]/div[2]/div[3]/div/button[1]'
        self.wait.until(EC.element_to_be_clickable(("xpath",xpath_view_all_10k_and_10q))).click()
    
    def fill_searchbox(self):
        self.driver.find_element("id", "searchbox").send_keys("10-Q")
        self.driver.find_element("id", "filingDateFrom").clear()
        time.sleep(2)
        actions = ActionChains(self.driver)
        input = self.wait.until(EC.element_to_be_clickable((By.ID, "filingDateFrom")))
        actions.move_to_element(input).click().send_keys(self.start_date, Keys.ENTER).perform()

    def click_form_link(self, i):
        xml_form_link = f'//*[@id="filingsTable"]/tbody/tr[{i}]/td[2]/div/a[1]'
        form = self.wait.until(EC.element_to_be_clickable((By.XPATH, xml_form_link))) 
        form = self.driver.execute_script("arguments[0].click();", form)
        self.wait.until(EC.number_of_windows_to_be(2))
        self.driver.switch_to.window(self.driver.window_handles[1])
    

    def get_ixvFrame_html(self):
        self.driver.switch_to.frame(driver.find_element(By.ID, "ixvFrame"))
        self.wait.until(EC.element_to_be_clickable((By.ID, "menu-dropdown-link"))).click()
        self.wait.until(EC.element_to_be_clickable((By.ID, "form-information-html"))).click()
        self.wait.until(EC.number_of_windows_to_be(3))
        time.sleep(1)

    def get_txt(self):

    def get_html(self):

    def save_file(self, form_type, filing_date):
        raw_dir = Path(self.dest_dir) / "raw_html" / f"{self.ticker}"
        with open(f"{raw_dir}/{self.ticker}_{form_type}_filing_{filing_date}.html", "w", encoding='utf-8') as f:
                f.write(self.driver.page_source)
    
    def download_all(self):
        self.create_folders()
        self.get_main_html()
        self.get_ticker_html()
        self.select_10k_and_10q_buttons()
        self.fill_searchbox()

        form_count = len(self.driver.find_element(By.ID, "filingsTable").find_elements(By.TAG_NAME, "tr"))
        for i in range(1, form_count + 1):
            form_type = self.driver.find_element(By.XPATH, f'//*[@id="filingsTable"]/tbody/tr[{i}]/td[1]')
            filing_date = self.driver.find_element(By.XPATH, f'//*[@id="filingsTable"]/tbody/tr[{i}]/td[3]').text

            if form_type.text == '10-Q':
                self.click_form_link(i)

                if self.element_exists(By.ID, "ixvFrame"):
                    self.get_ixvFrame_html(self)
                    self.save_file(form_type, filing_date)

                elif self.element_exists(By.ID, "formHeader"):
                    self.get_txt(self)
                    self.save_file(form_type, filing_date)

                else: 
                    self.get_html(self)
                    self.save_file(form_type, filing_date)
        

# Potential errors
    # self.driver across functions 

# Next steps
    # verify it works
    # Add error handling ... decide where I want to add it with #validate 
    # Add print statements (for logging)

In [2]:
def parse_arguments():
    """Parse command-line arguments."""
    arg_parser = argparse.ArgumentParser(description='Download 10-Q filings from SEC.')
    arg_parser.add_argument('-t', '--ticker', required=True, type=str, help='Ticker symbol of the company')
    arg_parser.add_argument('-s', '--start_date', required=True, type=str, help='Start date for the filings (YYYY-MM-DD)')
    arg_parser.add_argument('-d', '--dest_dir', required=True, type=Path, help='Destination directory for downloaded files')
    arg_parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')
    return arg_parser.parse_args()

In [None]:
def download_files_10Q(ticker, start_date, dest_dir, verbose):
    """Download the ticker's raw 10-Q filings from SEC to the specified directory."""

    driver = webdriver.Chrome(service=Service(executable_path="/Users/acasariego/miniconda3/bin/chromedriver"))
    driver.maximize_window()
    driver.get("https://www.sec.gov/edgar/searchedgar/companysearch.html")
    
    search_ticker(driver, ticker)
    select_filings(driver)
    specify_search_criteria(driver, start_date)
    
    raw_dir = create_directory(dest_dir, ticker)
    download_forms(driver, raw_dir, ticker, verbose)

    driver.quit()

In [None]:
def search_ticker(driver, ticker):
    """Search for the company's ticker in the SEC database."""
    actions = ActionChains(driver)
    input_element = driver.find_element("id", "edgar-company-person")
    actions.move_to_element(input_element).click().send_keys(ticker, Keys.ENTER).perform()
    

In [None]:
def select_filings(driver):
    """Select 10-Q & 10-K filing types."""
    wait = WebDriverWait(driver, 10)
    wait.until(EC.element_to_be_clickable(("xpath", r'//*[@id="filingsStart"]/div[2]/div[3]/h5'))).click()
    wait.until(EC.element_to_be_clickable(("xpath", r'//*[@id="filingsStart"]/div[2]/div[3]/div/button[1]'))).click()

In [None]:
def specify_search_criteria(driver, start_date):
    """Specify the filing type and date range for the search."""
    wait = WebDriverWait(driver, 10)
    driver.find_element("id", "searchbox").send_keys("10-Q")
    driver.find_element("id", "filingDateFrom").clear()
    time.sleep(2)
    input_element = wait.until(EC.element_to_be_clickable((By.ID, "filingDateFrom")))
    ActionChains(driver).move_to_element(input_element).click().send_keys(start_date, Keys.ENTER).perform()

In [None]:
def download_forms(driver, raw_dir, ticker, verbose):
    """Download the 10-Q forms and save them to the specified directory."""
    wait = WebDriverWait(driver, 10)
    form_count = len(driver.find_element("id", "filingsTable").find_elements(By.TAG_NAME, "tr"))

    for i in range(1, form_count):
        file_type = driver.find_element(By.XPATH, f'//*[@id="filingsTable"]/tbody/tr[{i}]/td[1]')
        date = driver.find_element(By.XPATH, f'//*[@id="filingsTable"]/tbody/tr[{i}]/td[3]').text
        
        if file_type.text == '10-Q':
            process_filing(driver, raw_dir, ticker, date, i, verbose)