# Create a driver

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

In [2]:
service = Service(r'./chromedriver_win.exe') # for windows
# service = Service(r'./chromedriver_mac') # for mac
driver = webdriver.Chrome(service = service)

url = r'https://paulocoliveira.github.io/mypages/jobapplication.html'
driver.get(url) # opens a new page

In [22]:
# execute a simple javascript code
driver.execute_script("alert('Hello World!')") # opens an alert box

In [23]:
driver.forward() # goes forward to the next page

In [24]:
driver.refresh() # refreshes the page

In [25]:
driver.fullscreen_window() # maximizes the window

In [27]:
driver.set_window_size(800, 600) # sets the window size

In [28]:
driver.get_screenshot_as_file('../data_out/screenshot.png') # takes a screenshot

True

In [31]:
driver.title # returns the title of the page

'Job Application Form'

In [14]:
driver.current_url # returns the current url

'https://paulocoliveira.github.io/mypages/jobapplication.html'

In [3]:
driver.page_source[:50] # returns a string of the source code of the page
# we may further use bs4 or regex to extract data from the source code.

'<html><head>\n    <title>Job Application Form</titl'

# The `By` class

In [7]:
from selenium.webdriver.common.by import By

In [81]:
# copy element from source
# <input type="text" id="fullName" name="fullName">
fullname = driver.find_element(By.ID, "fullName")

In [36]:
fullname # it's a Web Element

<selenium.webdriver.remote.webelement.WebElement (session="729f1cd72ed2d97e56b2573cb88c80bb", element="f.F35563CCC5DEBCEFCE492362AEE0B66C.d.FE8C6A8D0BA61B27583F0BCBECF62537.e.101")>

In [37]:
positions = driver.find_element(By.ID, "desiredPosition")

In [38]:
# inspect properties of a Web Element
print(positions.tag_name)
print(positions.get_attribute("name"))
print(positions.is_displayed())
print(positions.is_selected())
print(positions.parent)
print(positions.text)

select
desiredPosition
True
False
<selenium.webdriver.chrome.webdriver.WebDriver (session="729f1cd72ed2d97e56b2573cb88c80bb")>
                Developer
                QA
                Designer
                Manager
            


In [39]:
# to select multiple elements. it's a list of Web Elements
input_elements = driver.find_elements(By.TAG_NAME, "input")

In [41]:
submit = driver.find_element(By.XPATH, """//*[@id="jobApplicationForm"]/button""")
submit

<selenium.webdriver.remote.webelement.WebElement (session="729f1cd72ed2d97e56b2573cb88c80bb", element="f.F35563CCC5DEBCEFCE492362AEE0B66C.d.FE8C6A8D0BA61B27583F0BCBECF62537.e.126")>

# Interact with Elements

In [42]:
fullname.send_keys("Chao Ding")

In [43]:
fullname.clear()

In [44]:
input_elements[1].send_keys("chao.ding@hku.hk")

In [45]:
driver.find_element(By.ID, "location2").click()

In [46]:
driver.find_element(By.ID, "skill1").click()
driver.find_element(By.ID, "skill2").click()

In [47]:
submit.click()

# The `Keys` Class

In [48]:
from selenium.webdriver.common.keys import Keys

In [83]:
driver.find_element(By.ID, "email").clear()

In [84]:
driver.find_element(By.ID, "email").send_keys("chao.ding@hkuhk")

In [85]:
driver.find_element(By.ID, "email").send_keys(2 * Keys.ARROW_LEFT)

In [86]:
driver.find_element(By.ID, "email").send_keys(".")

In [87]:
driver.find_element(By.ID, "email").send_keys(Keys.TAB, '12345678')
# press Tab and then input '12345678' in Phone Number

# The `Select` Class

In [9]:
from selenium.webdriver.support.select import Select

In [55]:
positions.text

'                Developer\n                QA\n                Designer\n                Manager\n            '

In [56]:
options = Select(positions)

In [57]:
options.select_by_value("developer")
# note that here "value" refers to the value attribute of the option tag

In [58]:
options.select_by_visible_text("QA")
# note that here "visible text" refers to the text between the option tags

In [59]:
options.select_by_index(0)
# note that here "index" refers to the index of the option tag in the select tag

In [62]:
driver.close()

# ActionChains

In [4]:
from selenium.webdriver.common.action_chains import ActionChains

In [5]:
service = Service(r'./chromedriver_win.exe') # for windows
# service = Service(r'./chromedriver_mac') # for mac
course_driver = webdriver.Chrome(service = service)

course_url = r'https://ug.hkubs.hku.hk/course'
course_driver.get(course_url)

In [65]:
search = course_driver.find_element(By.ID, "q")
confirm = course_driver.find_element(By.CLASS_NAME, "btn.btn--form.btn--submit")
# for space in a class name, use "." to replace it

ActionChains(course_driver).double_click(search).send_keys_to_element(search, "IIMT").click(confirm).perform()

In [10]:
# or, queued up one by one
actions = ActionChains(course_driver)

# ECON courses in 2024-2025 2nd semester
search = course_driver.find_element(By.ID, "q")
confirm = course_driver.find_element(By.CLASS_NAME, "btn.btn--form.btn--submit")
semester = Select(course_driver.find_element(By.ID, "semester"))

search.clear()
actions.send_keys_to_element(search, "ECON")
semester.select_by_visible_text("2")
actions.move_to_element(confirm)
actions.click()
actions.perform()

In [67]:
course_driver.current_url # notice the current URL

'https://ug.hkubs.hku.hk/course?q=ECON&academic_year=2024-2025&semester=sem-2'

In [11]:
import pandas as pd
from io import StringIO

# obtain the current source code, read the table, and store it in a DataFrame
pd.read_html(StringIO(course_driver.page_source))[0].head()

Unnamed: 0,Code,Course Name,Lecturer,Semester,Outline
0,ECON1210K,Introductory Microeconomics,Prof. Chen ZHAO,2,Download
1,ECON1210L,Introductory Microeconomics,Dr. Si CHEN,2,Download
2,ECON1210M,Introductory Microeconomics,Dr. Chi Pui HO,2,Download
3,ECON1210NO,Introductory Microeconomics,Dr. Chi Pui HO,2,Download
4,ECON1220GJK,Introductory Macroeconomics,Dr. Chi Wa YUEN,2,Download


# Explicit Waits

In [2]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [4]:
service = Service(r'./chromedriver_win.exe') # for windows
# service = Service(r'./chromedriver_mac') # for mac

google_url = r'https://www.google.com/'
google = webdriver.Chrome(service = service)
google.get(google_url)

In [5]:
search = google.find_element(By.CLASS_NAME, "gLFyf") # the search bar in Google

In [15]:
search.send_keys("Cyberport", Keys.ENTER) # send keywords to the search bar

map_button = WebDriverWait(google, 10).until(
                EC.presence_of_element_located((By.XPATH, 
                                                '//*[@id="hdtb-sc"]/div/div/div[1]/div/div[3]/a')))
# find the map button and wait for it to be clickable

map_button.click()

# Exceptions

In [12]:
from selenium.common import exceptions

In [16]:
dir(exceptions)[:5] # there are many exceptions

['ERROR_URL',
 'ElementClickInterceptedException',
 'ElementNotInteractableException',
 'ElementNotSelectableException',
 'ElementNotVisibleException']

# Use the following template at all times

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
# import other modules as needed: time, re, pandas, etc.

In [77]:
service = Service(r'./chromedriver_win.exe') # for windows
# service = Service(r'./chromedriver_mac') # for mac

agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36'
options = Options()
options.add_argument(f"user-agent={agent}")

driver = webdriver.Chrome(service=service, options=options)
url = r'http://selenium.dev'
driver.get(url)