In [11]:
# | default_exp crawler.crawler

In [12]:
#| exporti
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from bs4 import BeautifulSoup
# import time


In [13]:
#| export
def driversetup(is_headless: bool = True) -> webdriver:
    options = webdriver.ChromeOptions()
    # run Selenium in headless mode

    if is_headless:
        options.add_argument("--headless")

    options.add_argument("--no-sandbox")

    driver = webdriver.Chrome(options=options)

    return driver

In [14]:
#| export
def wait_and_return(
    driver: webdriver,
    element_id: str,
    el_type=By.ID,
    min_sleep_time=15,
    return_soup: bool = False,
):
    """
    Wait for the element to be present and return it.
    """

    try:
        res = WebDriverWait(driver, timeout=15, poll_frequency=1).until(
            EC.presence_of_element_located((el_type, element_id)))

        if return_soup:
            return BeautifulSoup(res.get_attribute("innerHTML"))
        return res

    except Exception as e:
        print(e)
        print(
            f"Timeout Exception: did not load within {min_sleep_time} seconds.")


In [15]:
#| export

def authenticate_driver(driver, domo_instance, domo_username, domo_password):

    url = f"https://{domo_instance}.domo.com/auth/index"

    driver.get(url)

    button = wait_and_return(driver, element_id="sign-in",
                             el_type=By.CLASS_NAME, min_sleep_time=4)

    form_username = wait_and_return(driver, "username", el_type=By.NAME)

    form_password = wait_and_return(driver, "password", el_type=By.NAME)

    # Sending input
    form_username.clear()
    form_username.send_keys(domo_username)
    form_password.clear()
    form_password.send_keys(domo_password)
    button.click()

    return driver

In [17]:
#|export
def pagesource(url,
               element_id: str,
               element_type=By.ID,
               driver=None,
               ):

    is_driver_close = True
    if driver:
        is_driver_close = False

    driver = driver or driversetup()

    print(f"💤 loading {url} 💤")

    driver.get(url)

    try:
        ele = WebDriverWait(driver, timeout=15, poll_frequency=1).until(
            EC.presence_of_element_located(
                (element_type, element_id))
        )
        print(f"Page {url} is loaded within 10 seconds.")
    
    except Exception as e:
        print(e)
        print(f"Timeout Exception: Page {url} did not load within 10 seconds.")

    soup = BeautifulSoup(driver.page_source)

    if is_driver_close:
        driver.close()
    return soup
