Skip to content

Python: Short, Self Contained, Correct Example

Steve Smith edited this page Sep 17, 2018 · 3 revisions

Please test your python install environment with this basic example. If this doesn't work, the script won't work.

# module dependencies
import psutil, signal
from selenium import webdriver
import fake_useragent as fake_ua

ua = fake_ua.UserAgent()
proxy_addr = '127.0.0.1:8180'

chrome_options = webdriver.ChromeOptions()

chrome_options.add_argument('headless')
# https://stackoverflow.com/questions/49565042/way-to-change-google-chrome-user-agent-in-selenium
chrome_options.add_argument(f"user-agent={ua.random}")
chrome_options.add_argument("window-size=1296,1018")
# Disable image downloads; see https://stackoverflow.com/questions/18657976/disable-images-in-selenium-google-chromedriver
chrome_options.add_argument('blink-settings=imagesEnabled=false')
chrome_options.add_argument('mute-audio')
if False:
    # apparently headless Chrome doesn't set preferences
    chrome_options.add_experimental_option("prefs", {"acceptInsecureCerts": True})
# chrome_options.add_argument("--proxy-server={}".format(proxy_addr))
# chrome_options.binary_location=chrome_executable

print(dict(chrome_options.to_capabilities()))

driver = webdriver.Chrome(chrome_options=chrome_options)

if True:
    if False:
        url_get = 'https://amazon.com'
    elif False:
        url_get = 'https://check.torproject.org'
    else:
        url_get = 'http://whatsmyuseragent.org'
    driver.get(url_get)
    driver.implicitly_wait(10)
    driver.get_screenshot_as_file('main-page.png')

print("{} version is {}, chromedriver version is {}".format(driver.capabilities["browserName"], driver.capabilities["version"],
                                                  driver.capabilities["chrome"]["chromedriverVersion"]))

# GET
driver.get('http://www.google.com/search?q=fubar&safe=active')

# print page links
for link in [div.find_element_by_tag_name('a').get_attribute('href') for div in
             driver.find_elements_by_css_selector('div.g') if
             div.find_element_by_tag_name('a').get_attribute('href') is not None]:
    print(link)

# clear cookies and memory
try:
    driver.delete_all_cookies()
    driver.execute_script('window.localStorage.clear();')
    driver.execute_script('window.sessionStorage.clear();')
except Exception as e:
    print(e)

if True:
    # clean up
    pid = driver.service.process.pid
    try:
        driver.close()
        driver.quit()
    except Exception as e:
        print(e)
    for c in psutil.Process(pid): c.send_signal(signal.SIGTERM)
    driver.service.process.send_signal(signal.SIGTERM)
    del driver
Clone this wiki locally