In [1]:
import os
import re
import shutil
import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from pathlib import Path

from secrets import *


def move_from_download_to_staging(account):
    # Ensure expected staging the directory exists
    account_dir = Path(staging_dir) / account_dirs[account]
    os.makedirs(account_dir, exist_ok=True)
    
    # List the files in the download dir
    for f in os.listdir(download_dir):
        source_path = Path(download_dir) / f
        dest_path = Path(account_dir) / f
        
        # Ensure the file matches an expected downloaded file
        if not re.match(input_regex, f):
            continue
            
        # Compute the new name for the file
        new_filename = re.sub(input_regex, output_regex, f)
        
        # Ensure the new name matches an allowed name
        allowed = all([re.match(p, new_filename) for p in allow_patterns])
        if not allowed:
            continue
            
        # Move the file from download to staging
        shutil.move(source_path, dest_path)
        
        # Rename the file to the new name
        new_dest_path = Path(account_dir) / new_filename
        dest_path.rename(new_dest_path)
        
        
def copy_to_destination():
    for account in wellsfargo_accounts:
        # print(f'Copying account {account}')
        account_dir = Path(staging_dir) / account_dirs[account]
        dest_dir = Path(destination_dir) / account_dirs[account]
        os.makedirs(dest_dir, exist_ok=True)
        for f in os.listdir(account_dir):
            source_path = Path(account_dir) / f
            dest_path = Path(dest_dir) / f
            if not os.path.exists(dest_path):
                # print(dest_path)
                shutil.copy(source_path, dest_path)        
        

def main():        
    # Set Chrome to download PDFs instead of viewing them in the browser
    chrome_options = webdriver.ChromeOptions()

    prefs = {
        "download.default_directory": download_dir,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "plugins.always_open_pdf_externally": True  # This will force PDFs to be downloaded
    }
    chrome_options.add_experimental_option("prefs", prefs)

    driver = webdriver.Chrome(options=chrome_options)

    wait = WebDriverWait(driver, 10)

    driver.get('https://wellsfargo.com')

    userid_box = driver.find_element('id', 'userid')
    userid_box.send_keys(wellsfargo_user)

    # Apparently necessary so the site doesn't suspect a bot
    time.sleep(5)
    password_box = driver.find_element('id', 'password')

    # TODO get this from a file - there's probably a package like .dotenv or something like it
    password_box.send_keys(wellsfargo_password)
    password_box.send_keys(Keys.RETURN)

    account = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(), 'Accounts')]")))
    account.click()

    statements = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "View Statements & Documents")))
    statements.click()

    statements_and_disclosures = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(), 'Statements and Disclosures')]")))
    statements_and_disclosures.click()

    no_selection = True
    account_label = 'Select account'
    select_account = None

    for account in wellsfargo_accounts:
        wait = WebDriverWait(driver, 10)
        if no_selection:
            select_account = wait.until(EC.element_to_be_clickable((By.XPATH, f"//span[contains(text(), '{account_label}')]")))
            no_selection = False
        else:
            select_account = wait.until(EC.element_to_be_clickable((By.XPATH, f"//label[contains(text(), '{account_label}')]")))

        # select_account.click() doesn't work here, but this does
        driver.execute_script("arguments[0].click();", select_account)

        # print(f'Downloading statements for {account}...')
        account_item = wait.until(EC.element_to_be_clickable((By.XPATH, f"//span[contains(text(), '{account}')]")))
        account_item.click()

        statement_item = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(text(), 'Statement ')]")))
        statement_items = driver.find_elements(By.XPATH, "//span[contains(text(), 'Statement ')]")
        for item in statement_items:
            item.click()

        # Allow download time
        time.sleep(2)
        move_from_download_to_staging(account)
        account_label = account

    # copy new files to their final destination
    copy_to_destination()
    
    # cleanup
    shutil.rmtree(download_dir)
    shutil.rmtree(staging_dir)

main()

TimeoutException: Message: 
Stacktrace:
0   chromedriver                        0x00000001031a2a80 chromedriver + 4385408
1   chromedriver                        0x000000010319b38c chromedriver + 4354956
2   chromedriver                        0x0000000102db8b0c chromedriver + 281356
3   chromedriver                        0x0000000102dfb2f8 chromedriver + 553720
4   chromedriver                        0x0000000102e33d24 chromedriver + 785700
5   chromedriver                        0x0000000102defeec chromedriver + 507628
6   chromedriver                        0x0000000102df08c4 chromedriver + 510148
7   chromedriver                        0x000000010316a43c chromedriver + 4154428
8   chromedriver                        0x000000010316eea0 chromedriver + 4173472
9   chromedriver                        0x000000010314fff8 chromedriver + 4046840
10  chromedriver                        0x000000010316f78c chromedriver + 4175756
11  chromedriver                        0x0000000103142fb8 chromedriver + 3993528
12  chromedriver                        0x000000010318d21c chromedriver + 4297244
13  chromedriver                        0x000000010318d398 chromedriver + 4297624
14  chromedriver                        0x000000010319af84 chromedriver + 4353924
15  libsystem_pthread.dylib             0x000000018bee2f94 _pthread_start + 136
16  libsystem_pthread.dylib             0x000000018beddd34 thread_start + 8
