In [1]:
from pathlib import Path

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from mersearch.helpers import smiles_to_mol_file_format

In [2]:
def scroll_shim(passed_in_driver, object):
    """Allows firefox driver to scroll to elements without throwing an error"""
    x = object.location['x']
    y = object.location['y']
    scroll_by_coord = 'window.scrollTo(%s,%s);' % (
        x,
        y
    )
    scroll_nav_out_of_way = 'window.scrollBy(0, -120);'
    passed_in_driver.execute_script(scroll_by_coord)
    passed_in_driver.execute_script(scroll_nav_out_of_way)

In [3]:
profile_path = str(Path('~') / 'Library' / 'Application Support' / 'Firefox' / 'Profiles' / 'pwofczxw.default')
PROXY_HOST = "12.12.12.123"
PROXY_PORT = "1234"
options=Options()
options.set_preference('profile', profile_path)
options.set_preference("network.proxy.type", 1)
options.set_preference("network.proxy.http", PROXY_HOST)
options.set_preference("network.proxy.http_port", int(PROXY_PORT))
options.set_preference("dom.webdriver.enabled", False)
options.set_preference('useAutomationExtension', False)
options.set_preference("excludeSwitches", "enable-automation")
service = Service(r'/usr/local/bin/geckodriver')

driver = webdriver.Firefox(service=service, options=options)

In [4]:
print(type(driver))

<class 'selenium.webdriver.firefox.webdriver.WebDriver'>


In [5]:
# navigate to this site to get auto logged in so search can be performed
url = "https://us.vwr.com/store/search/searchMol.jsp"
driver.get(url)

In [6]:
# navigate to actual search site
button_link = WebDriverWait(driver=driver, timeout=30).until(EC.presence_of_element_located((By.ID, "emolPunchout")))
button = button_link.find_element(By.TAG_NAME, 'input')
scroll_shim(driver, button)
actions = ActionChains(driver)
# scroll to the button
actions.move_to_element(button_link).perform()
# navigate to https://vwr.emolecules.com/index.php
# click button until button is actually registered
try:
    while True:
        button.click()
except StaleElementReferenceException:
    pass

In [7]:
# find button to load molfile and click it
import_molfile_button = WebDriverWait(driver=driver, timeout=30).until(EC.presence_of_element_located((By.XPATH, '//span[@title="Import Molfile"]')))
import_molfile_button.click()

In [8]:
# find textarea to add molfile data to
chemwriter = WebDriverWait(driver=driver, timeout=30).until(EC.presence_of_element_located((By.CLASS_NAME, "chemwriter")))
molfile_pane = chemwriter.find_element(By.CLASS_NAME, 'content')
molfile_textarea = molfile_pane.find_element(By.TAG_NAME, 'textarea')

In [9]:
# add molfile data
smiles = 'O=C1CCCCCCCCCCCCCCO1'
# smiles = 'O=COC(=O)C1(C)CCCC2(C)C3CC(=O)OCC3CCC12'
molfile_textarea.clear()
molfile_textarea.send_keys(smiles_to_mol_file_format(smiles))

In [10]:
# find button to write molfile data to chemwriter
button = chemwriter.find_element(By.TAG_NAME, 'button')
button.click()

In [11]:
# run exact structure search
button = WebDriverWait(driver=driver, timeout=30).until(EC.presence_of_element_located((By.NAME, "ex_button")))
button.click()

In [12]:
# search listed structures for first one and click
try:
    (
        WebDriverWait(driver=driver, timeout=30).until(EC.presence_of_element_located((By.CLASS_NAME, "hitlist-table")))
        .find_element(By.ID, 'row_0.0')
        .find_element(By.CLASS_NAME, 'compound_actions_left')
        .find_element(By.TAG_NAME, 'img')
    ).click()
except NoSuchElementException:
    print("No element, smiles failed")

In [13]:
soup = BeautifulSoup(driver.page_source, 'lxml')

In [14]:
# find property data
table = soup.find("table", attrs={"id": "properties_table"})
table_data = table.tbody.find_all("tr")
properties_data = {}
# ignore properties row by starting at 1
for tr in table_data[1:]:
    td = tr.find_all("td")
    properties_data[td[0].string] = td[1].string

In [15]:
# find name data
name_data = {}
names = []
table = soup.find("div", attrs={"id": "name_table"})
table = table.find("table", attrs={"class": "data_table"})
table_data = table.tbody.find_all("tr")
# ignore known names row
for tr in table_data[1:]:
    td = tr.find_all("td")
    l_col = td[0].string.strip().strip(':')
    if l_col != 'Name':
        name_data[l_col] = td[1].string.strip()
    else:
        names.append(td[1].string.strip())
name_data['names'] = names

In [16]:
# find supplier data
supplier_data = {}
table = soup.find("div", attrs={"id": "supplier_table"})
table = table.find("table", attrs={"class": "data_table"})
table_data = table.tbody.find_all("tr")
# ignore source, compound id row
for tr in table_data[1:]:
    td = tr.find_all("td")
    l_col = td[0].string.strip().strip(':')
    supplier_data[l_col] = td[1].string.strip()

In [17]:
# find and click more info button to see prices 
WebDriverWait(driver=driver, timeout=30).until(EC.presence_of_element_located((By.ID, "add_item_0"))).click()

In [24]:
soup = BeautifulSoup(driver.page_source, 'lxml')
while soup.find("table", attrs={"class": "bbpricetable"}) is None:
    soup = BeautifulSoup(driver.page_source, 'lxml')

In [25]:
# get costs
table = soup.find("table", attrs={"class": "bbpricetable"})

In [26]:
table_data = table.tbody.find_all("tr")

In [None]:
price_data = []
for tr in table_data:
    td = tr.find_all('td')
    # large headers so skip
    if len(td) == 1:
        continue
    # column headers so skip
    elif len(td) == 7:
        continue
    elif len(td) == 8:
        row = {
            'supplier': td[0].text.strip(),
            'supplier_id': td[1].text.split('Name')[0].strip(),
        }
        row['amount'] = td[5].text.strip()
        row['units'] = td[6].text.strip()
        row['price'] = td[7].text.strip()
        price_data.append(row.copy())
    elif len(td) == 5:
        row['amount'] = td[2].text.strip()
        row['units'] = td[3].text.strip()
        row['price'] = td[4].text.strip()
        price_data.append(row.copy())

In [None]:
driver.get('https://vwr.emolecules.com/index.php')