# Introduction to Web Scraping using Selenium
Adapted from https://medium.com/the-andela-way/introduction-to-web-scraping-using-selenium-7ec377a8cf72

In [7]:
# allows you to launch/initialise a browser
from selenium import webdriver 
# allows you to search for things using specific parameters
from selenium.webdriver.common.by import By 
# allows you to wait for a page to load
from selenium.webdriver.support.ui import WebDriverWait 
# specify what you are looking for on a specific page in order to determine that the web page has loaded
from selenium.webdriver.support import expected_conditions as EC 
# handling a timeout situation
from selenium.common.exceptions import TimeoutException

In [8]:
# add the incognito argument to our webdriver
option = webdriver.ChromeOptions()
option.add_argument(' — incognito')

# create a new instance of Chrome
browser = webdriver.Chrome(executable_path='C://Users//Rach//Documents//chromedriver//chromedriver', chrome_options=option)

# in this example, the desired website url is NBA Player Salaries
browser.get('https://hoopshype.com/salaries/players/')

# implement a try/except for handling a timeout situation should it occur
timeout = 20    # wait 20 seconds for page to load
# We assume that the branding logo is among the last things to load, so we check for that
try:
    WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH, "//img[@class='branding__logo']")))
except TimeoutException:
    print('Timed out waiting for page to load')
    browser.quit()

In [9]:
# get all player names
# find_elements_by_xpath returns an array of selenium objects
player_element = browser.find_elements_by_xpath('//td[@class="name"]')
# use list comprehension to get the actual player names and not the selenium objects
players = [x.text for x in player_element[1:]]
# print out all the titles
print('players:')
print(players, '\n')

players:
['Stephen Curry', 'Chris Paul', 'Russell Westbrook', 'James Harden', 'John Wall', 'LeBron James', 'Kevin Durant', 'Blake Griffin', 'Paul George', 'Klay Thompson', 'Mike Conley', 'Kemba Walker', 'Kawhi Leonard', 'Jimmy Butler', 'Tobias Harris', 'Kyrie Irving', 'Khris Middleton', 'Anthony Davis', 'Damian Lillard', 'Kevin Love', 'Ben Simmons', 'Pascal Siakam', 'Kyle Lowry', 'Steven Adams', 'Nikola Jokic', 'Andrew Wiggins', 'Joel Embiid', 'Kristaps Porzingis', 'Karl-Anthony Towns', 'Devin Booker', 'CJ McCollum', 'Bradley Beal', 'Andre Drummond', "D'Angelo Russell", 'Gordon Hayward', 'Otto Porter', 'DeMar DeRozan', 'Giannis Antetokounmpo', 'Al Horford', 'Jamal Murray', 'Brandon Ingram', 'Nikola Vucevic', 'Jrue Holiday', 'Rudy Gobert', 'Buddy Hield', 'LaMarcus Aldridge', 'Jaylen Brown', 'Draymond Green', 'Harrison Barnes', 'Fred VanVleet', 'Victor Oladipo', 'Malcolm Brogdon', 'Gary Harris', 'Danilo Gallinari', 'Zach LaVine', 'Jerami Grant', 'Tim Hardaway Jr', 'Julius Randle', 'Terry

In [10]:
# now get the player salaries
salary_element = browser.find_elements_by_xpath('//td[@class="hh-salaries-sorted"]')
# same concept as for list-comprehension above
salaries = [x.text for x in salary_element[1:]]
print('salaries:')
print(salaries, '\n')

salaries:
['$43,006,362', '$41,358,814', '$41,358,814', '$41,254,920', '$41,254,920', '$39,219,566', '$39,058,950', '$36,810,996', '$35,450,412', '$35,361,360', '$34,502,130', '$34,379,100', '$34,379,100', '$34,379,100', '$34,358,850', '$33,460,350', '$33,051,724', '$32,742,000', '$31,626,953', '$31,258,256', '$30,559,200', '$30,559,200', '$30,500,000', '$29,592,695', '$29,542,010', '$29,542,010', '$29,542,010', '$29,467,800', '$29,467,800', '$29,467,800', '$29,354,152', '$28,751,774', '$28,751,774', '$28,649,250', '$28,500,000', '$28,489,239', '$27,739,975', '$27,528,088', '$27,500,000', '$27,285,000', '$27,285,000', '$26,000,000', '$25,876,111', '$25,775,281', '$24,931,817', '$24,000,000', '$23,735,118', '$22,246,956', '$22,215,909', '$21,250,000', '$21,000,000', '$20,700,000', '$19,610,714', '$19,500,000', '$19,500,000', '$19,050,000', '$18,975,000', '$18,900,000', '$18,900,000', '$18,500,000', '$18,136,364', '$18,000,000', '$18,000,000', '$18,000,000', '$18,000,000', '$17,850,000',

In [11]:
browser.close()

In [12]:
# combine the responses (player name and salary) using zip function, then print out each pair
print('Player Name : Salary')
for player, salary in zip(players, salaries):
    print(player + ': ' + salary)

Player Name : Salary
Stephen Curry: $43,006,362
Chris Paul: $41,358,814
Russell Westbrook: $41,358,814
James Harden: $41,254,920
John Wall: $41,254,920
LeBron James: $39,219,566
Kevin Durant: $39,058,950
Blake Griffin: $36,810,996
Paul George: $35,450,412
Klay Thompson: $35,361,360
Mike Conley: $34,502,130
Kemba Walker: $34,379,100
Kawhi Leonard: $34,379,100
Jimmy Butler: $34,379,100
Tobias Harris: $34,358,850
Kyrie Irving: $33,460,350
Khris Middleton: $33,051,724
Anthony Davis: $32,742,000
Damian Lillard: $31,626,953
Kevin Love: $31,258,256
Ben Simmons: $30,559,200
Pascal Siakam: $30,559,200
Kyle Lowry: $30,500,000
Steven Adams: $29,592,695
Nikola Jokic: $29,542,010
Andrew Wiggins: $29,542,010
Joel Embiid: $29,542,010
Kristaps Porzingis: $29,467,800
Karl-Anthony Towns: $29,467,800
Devin Booker: $29,467,800
CJ McCollum: $29,354,152
Bradley Beal: $28,751,774
Andre Drummond: $28,751,774
D'Angelo Russell: $28,649,250
Gordon Hayward: $28,500,000
Otto Porter: $28,489,239
DeMar DeRozan: $27,