In [24]:
# Imports
# Reference: https://www.youtube.com/watch?v=lTypMlVBFM4

from selenium import webdriver
import pandas as pd
from datetime import date
import time

In [25]:
# WTA Tennis Singles Rankings Page:

wta_url = "https://www.wtatennis.com/rankings/singles"

In [26]:
# Chrome Driver:

driver = webdriver.Chrome('C:\Program Files (x86)\chromedriver.exe')
driver.get(wta_url)

In [27]:
# Click on Only essential cookies button to resume

button = driver.find_element_by_xpath("//button[@class='button button--icon-left cookie-notice__button cookie-notice__button--alt js-cookie-notice-btn']")

button.click()

In [28]:
# Scroll down a little then click show more:
# Reference: https://stackoverflow.com/questions/20986631/how-can-i-scroll-a-web-page-using-selenium-webdriver-in-python

for i in range(10):
    driver.execute_script("window.scrollTo(window.scrollY, window.scrollY + 200)")
    time.sleep(1)

In [29]:
# Click on Show more to load more:
show_more = driver.find_element_by_xpath("//button[@class='btn widget-footer__more-button rankings__show-more js-show-more-button']")

show_more.click()

In [30]:
# Scroll more after clicking show more:

from selenium.webdriver.common.keys import Keys

html = driver.find_element_by_tag_name('html')

for i in range(9):
    html.send_keys(Keys.PAGE_DOWN)
    time.sleep(1)

## Webscrape Items

Obtain:

* Rank
* Move Up Or Down
* Player Name
* Country Ticker
* Age
* Tournaments Played
* WTA Ranking Points

In [31]:
# Ranks:

ranks = [x.text for x in 
         driver.find_elements_by_xpath('//*[@id="main-content"]/section/div[3]/table/tbody/tr/td[1]/span[1]')]

In [32]:
# Player Name

wta_player = [x.text for x in 
              driver.find_elements_by_xpath('//*[@id="main-content"]/section/div[3]/table/tbody/tr/td[1]/a')]

In [33]:
# Country Ticker:
country_tickers = [x.text for x in 
                   driver.find_elements_by_xpath('//*[@id="main-content"]/section/div[3]/table/tbody/tr/td[3]/span')]

In [34]:
# Player Age:
wta_age = [x.text for x in 
           driver.find_elements_by_xpath('//*[@id="main-content"]/section/div[3]/table/tbody/tr/td[4]')]

In [35]:
# Tournaments Played:
tournaments_played = [x.text for x in 
                      driver.find_elements_by_xpath('//*[@id="main-content"]/section/div[3]/table/tbody/tr/td[5]')]

In [36]:
# Points:
wta_points = [x.text for x in 
              driver.find_elements_by_xpath('//*[@id="main-content"]/section/div[3]/table/tbody/tr/td[6]')]

### Dealing With Rank Movements (Icons & Text)

In [37]:
# Rank Movement, obtain class"
rank_movement_icon = [x.get_attribute('class') for x in 
                      driver.find_elements_by_xpath('//*[@id="main-content"]/section/div[3]/table/tbody/tr/td[1]/span[2]')]

In [38]:
# Change icons into signs, movement up -> +, movement down -> -, movement none change that to empty string:

def sign_change(x):
    if x == 'rankings__movement rankings__movement--down ':
        return "-"
    elif x == 'rankings__movement rankings__movement--up ':
        return "+"
    else:
        return ""

# Use map to apply sign_change function
rank_movement_signs = list(map(sign_change, rank_movement_icon))

In [39]:
# Rank Movement Number/Text:
rank_movement_text = [x.text for x in 
                      driver.find_elements_by_xpath('//*[@id="main-content"]/section/div[3]/table/tbody/tr/td[1]/span[2]')]

In [40]:
# Combine rank movement signs and text:

rank_movement_list = [x + y for x, y in zip (rank_movement_signs, rank_movement_text)]

In [41]:
# Change just negative sign to 0 to represent zero change in rank movement:

def zero_rank_change(y):
    if y == '-':
        return 0
    else:
        return y

# Use map to apply zero_rank_change function
rank_movements = list(map(zero_rank_change, rank_movement_list))

In [42]:
### Create Dataframe:
wta_rankings_df = pd.DataFrame({
                    'Rank': ranks,
                    'Rank Movement': rank_movements,
                    'WTA Player': wta_player,
                    'Country': country_tickers,
                    'Age': wta_age,
                    'Tournaments Played': tournaments_played,
                    'Points': wta_points
})

# Just get top 100 WTA players:

wta_top100_df = wta_rankings_df.iloc[0:100]

In [43]:
wta_top100_df.head(10)

Unnamed: 0,Rank,Rank Movement,WTA Player,Country,Age,Tournaments Played,Points
0,1,0,Ashleigh Barty,AUS,25,14,8330
1,2,0,Aryna Sabalenka,BLR,23,18,5563
2,3,0,Barbora Krejcikova,CZE,26,26,5003
3,4,1,Paula Badosa,ESP,24,30,4429
4,5,-1,Karolina Pliskova,CZE,29,15,4347
5,6,2,Maria Sakkari,GRE,26,17,4191
6,7,-1,Anett Kontaveit,EST,26,20,4137
7,8,1,Iga Swiatek,POL,20,15,3936
8,9,-2,Garbiñe Muguruza,ESP,28,18,3350
9,10,0,Ons Jabeur,TUN,27,18,3065


## Save Raw Data to Excel File, .xlsx File

In [44]:
str(date.today())

'2022-02-27'

In [45]:
# Write to excel file:
from datetime import date

wta_top100_df.to_excel("WTA_TennisPlayers_Top100" + str(date.today()) + ".xlsx", index = False)

In [46]:
## Write as .csv file:
wta_top100_df.to_csv("WTA_TennisPlayers_Top100" + str(date.today()) + ".csv", index = False)