In [1]:
import csv
import datetime
import numpy as np
import pandas as pd
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
from bs4 import BeautifulSoup
from requests import get

In [2]:
df = pd.read_excel('data/base_data.xlsx')
playersSet = set(df['Winner']).union(set(df['Loser']))
players = {n : [] for n in playersSet}

In [3]:
def write_csv(players):
    with open('data/playersdata.csv', 'w', newline='', encoding='utf-8-sig') as file :
        writer = csv.writer(file)
        writer.writerow(['Rank', 'Player', 'Elo', 'HardRaw','ClayRaw', 'GrassRaw', 
                         'hElo', 'cElo', 'gElo', 'BirthDate', 'Hand', 'BackHand'])
        for p in players.values():
            writer.writerow(p)

In [4]:
def uniformName(w):
    surname = w.split()
    newName = surname[-1] + ' '
    for n in range(len(surname)-2, -1, -1):
        newName += surname[n][0] + '.'
    return newName

In [5]:
def static_scraping(players):
    url = 'http://tennisabstract.com/reports/atp_elo_ratings.html'
    r =  get(url)
    page = BeautifulSoup(r.content)
    table = page.find(id='reportable')
    allTr = table.find_all('tr')

    for tr in allTr[1:] :
        tds = tr.find_all('td')
        toWrite = [tds[i].text for i in (0, 1, 3, 5, 6, 7, 9, 10, 11)]

        toWrite[1] = uniformName(toWrite[1])

        playerLink = tr.a['href']
        raw = get(playerLink)
        playerPage = BeautifulSoup(raw.content)
        s = playerPage.find('script', language='JavaScript').text
        i = s.find('var dob =')
        dob = datetime.date(int(s[i+10:i+14]), int(s[i+14:i+16]), int(s[i+16:i+18]))
        i = s.find('var hand =')
        hand = s[i+12]
        if hand not in {'L', 'R'}:
            hand = None
        i = s.find('var backhand =')
        backhand = s[i+16]
        if backhand not in {'1', '2'}:
            backhand = None

        toWrite += [dob, hand, backhand]
        players[toWrite[1]] = toWrite

In [6]:
def dynamic_scraping(playerNames):
    driver = webdriver.Chrome()
    driver.get("https://www.atptour.com/en/players")
    items = list(playerNames.items())
    for w, values in items:
        surname = w.split()[0]
        search = driver.find_element_by_name("playerInput")
        search.clear()
        search.click()
        search.send_keys(surname+ " ")
        ul = driver.find_element_by_id("playerDropdown")
        sleep(8)
        
        try:   
            playersList = ul.find_elements_by_partial_link_text(" " + surname)
            for p in playersList:
                pName = p.text.replace('-', ' ').split()
                initials = w.split()[1].split('.')[:-1]
                print(pName , ' --> ' , initials)
                found = True
                for i in range(len(initials)):
                    found = found and initials[i] == pName[i][0]
                if found:
                    player = p
                    break
                    
            if not found:    
                raise Exception(w + ' not found')
                
            print(w + ": " + player.text)
            player.click()
            sleep(8)
                 
            playerInfo = driver.find_element_by_css_selector("div.player-profile-hero-table")
            pBD = playerInfo.find_element_by_css_selector("span.table-birthday").text
            pBD = pBD[1:-1].replace('.', '-')

            play = driver.find_element_by_xpath('//*[@id="playerProfileHero"]/div[2]/div[2]/div/table/tbody/tr[2]/td[3]/div/div[2]').text
            hand = play[0]
            if hand not in {'L', 'R'}:
                        hand = None
            index = play.find('-Handed Backhand')
            backHand = play[index-3 : index]
            if backHand == "One" : 
                backHand = 1
            elif backHand == "Two":
                backHand = 2
            else:
                backHand = None
        except Exception as e:
            print(e)
            pBD = None
            hand = None
            backHand = None
            
        if values == []:
            playerNames[w] = [None, w, None, None, None, None, None, None, None, pBD, hand, backHand]
        else:
            values[-3] = pBD
            values[-2] = hand
            values[-1] = backHand
            playerNames[w] = values

In [7]:
print("Static Scraping started")
static_scraping(players)
print("Dynamic Scraping started")
dynamic_scraping(players)
print("Writing...")
write_csv(players)
print("Done")

Static Scraping started
Dynamic Scraping started
['Alex', 'Mayer']  -->  ['F']
['Clemens', 'Mayer']  -->  ['F']
['Florian', 'Mayer']  -->  ['F']
Mayer F.: Florian Mayer
['Norbert', 'Gombos']  -->  ['N']
Gombos N.: Norbert Gombos
['Evgeny', 'Kirillov']  -->  ['E']
Kirillov E.: Evgeny Kirillov
['Guillermo', 'Olaso']  -->  ['G']
Olaso G.: Guillermo Olaso
['Yassine', 'Idmbarek']  -->  ['Y']
Idmbarek Y.: Yassine Idmbarek
string index out of range
['Alexander', 'Sadecky']  -->  ['A']
Sadecky A.: Alexander Sadecky
['Gonzalo', 'Lama']  -->  ['G']
Lama G.: Gonzalo Lama
['Chia', 'Cheng', 'Yang']  -->  ['T', 'H']
['Shao', 'Chi', 'Yang']  -->  ['T', 'H']
['Dae', 'Yang', 'Kweon']  -->  ['T', 'H']
['Zi', 'Dong', 'Yang']  -->  ['T', 'H']
['Eui', 'Yeol', 'Yang']  -->  ['T', 'H']
['Yi', 'Fan', 'Yang']  -->  ['T', 'H']
['Wei', 'Guang', 'Yang']  -->  ['T', 'H']
['Hao', 'Yang', 'Chen']  -->  ['T', 'H']
['Tsung', 'Hua', 'Yang']  -->  ['T', 'H']
Yang T.H.: Tsung-Hua Yang
['Dmitry', 'Tursunov']  -->  ['D']
T

['Filip', 'Horansky']  -->  ['F']
Horansky F.: Filip Horansky
['Adrian', 'Garza']  -->  ['D']
['Daniel', 'Garza']  -->  ['D']
Garza D.: Daniel Garza
['Gerard', 'Granollers']  -->  []
Granollers Pujol G.: Gerard Granollers
['Chris', 'Herbert']  -->  ['P', 'H']
['Pierre', 'Hugues', 'Herbert']  -->  ['P', 'H']
Herbert P.H.: Pierre-Hugues Herbert
['Adam', 'Becker']  -->  ['B']
['Benjamin', 'Becker']  -->  ['B']
Becker B.: Benjamin Becker
Message: stale element reference: element is not attached to the page document
  (Session info: chrome=79.0.3945.88)

['Alex', 'Kuznetsov']  -->  ['A']
Kuznetsov A.: Alex Kuznetsov
['Marco', 'Chiudinelli']  -->  ['M']
Chiudinelli M.: Marco Chiudinelli
['Agustin', 'Coria']  -->  ['F']
['Federico', 'Coria']  -->  ['F']
Coria F.: Federico Coria
['Germain', 'Gigounon']  -->  ['G']
Gigounon G.: Germain Gigounon
['Jason', 'Kubler']  -->  ['J']
Kubler J.: Jason Kubler
['Andrew', 'Harrison']  -->  ['R']
['Chris', 'Harrison']  -->  ['R']
['Christian', 'Harrison']  

['Felix', 'Auger', 'Aliassime']  -->  ['F']
Auger-Aliassime F.: Felix Auger-Aliassime
['Joao', 'Domingues']  -->  ['J']
Domingues J.: Joao Domingues
['Apostolos', 'Tsitsipas']  -->  ['S']
['Petros', 'Tsitsipas']  -->  ['S']
['Stefanos', 'Tsitsipas']  -->  ['S']
Tsitsipas S.: Stefanos Tsitsipas
['Adam', 'Novak']  -->  ['D']
['Balazs', 'Novak']  -->  ['D']
['Bosko', 'Novakovic']  -->  ['D']
['David', 'Novak']  -->  ['D']
Novak D.: David Novak
string index out of range
['Jeevan', 'Nedunchezhiyan']  -->  ['J']
Nedunchezhiyan J.: Jeevan Nedunchezhiyan
['Mikhail', 'Elgin']  -->  ['M']
Elgin M.: Mikhail Elgin
['Bowen', 'Ouyang']  -->  ['B']
Ouyang B.: Bowen Ouyang
['Mehdi', 'Ziadi']  -->  ['M']
Ziadi M.: Mehdi Ziadi
string index out of range
['Marco', 'Viola']  -->  ['M']
Viola M.: Marco Viola
string index out of range
['Adam', 'Anderson']  -->  ['K']
['Alec', 'Anderson']  -->  ['K']
['Andrew', 'Anderson']  -->  ['K']
['Andrew', 'Anderson']  -->  ['K']
['Art', 'Anderson']  -->  ['K']
['Derek'

['David', 'Simon']  -->  ['G']
['Ezequiel', 'Simonit']  -->  ['G']
['Francotenis', 'De', 'Simone']  -->  ['G']
['Frantisek', 'Simon']  -->  ['G']
['Fred', 'Simonsson']  -->  ['G']
['Frederik', 'Simon']  -->  ['G']
['George', 'Simond']  -->  ['G']
Simon G.: George Simond
string index out of range
['Wishaya', 'Trongcharoenchaikul']  -->  ['W']
Trongcharoenchaikul W.: Wishaya Trongcharoenchaikul
['Adrian', 'Clarke']  -->  ['J']
['Adrian', 'Clarke']  -->  ['J']
['B.L.', 'Clarke']  -->  ['J']
['Braden', 'Clarke']  -->  ['J']
['Chris', 'Clarke']  -->  ['J']
['Curtis', 'Clarke']  -->  ['J']
['Donaldson', 'Clarke']  -->  ['J']
['E.R.', 'Clarke']  -->  ['J']
['Edwin', 'Clarke']  -->  ['J']
['G.', 'Clarke']  -->  ['J']
['Jaime', 'Clarke']  -->  ['J']
Clarke J.: Jaime Clarke
string index out of range
['Bill', 'Quigley']  -->  ['E']
['Eric', 'Quigley']  -->  ['E']
Quigley E.: Eric Quigley
['Andreas', 'Seppi']  -->  ['A']
Seppi A.: Andreas Seppi
['A', 'El', 'Ghani']  -->  []
El Amrani R.: A El Ghan

['Laslo', 'Djere']  -->  ['L']
Djere L.: Laslo Djere
['Ai', 'Min', 'Zhang']  -->  ['Z']
['Andrew', 'Zhang']  -->  ['Z']
['Bo', 'Zhang']  -->  ['Z']
['Bolun', 'Zhang']  -->  ['Z']
['Changli', 'Zhang']  -->  ['Z']
['Changli', 'Zhang']  -->  ['Z']
['Dongrun', 'Zhang']  -->  ['Z']
['Nian', 'Fei', 'Zhang']  -->  ['Z']
['Hao', 'Zhang']  -->  ['Z']
['Hao', 'Zhang']  -->  ['Z']
['Jia', 'Heng', 'Zhang']  -->  ['Z']
['Yun', 'Hong', 'Zhang']  -->  ['Z']
['Jiu', 'Hua', 'Zhang']  -->  ['Z']
['Ting', 'Hui', 'Zhang']  -->  ['Z']
['Hui', 'Zhang']  -->  ['Z']
['Jiajun', 'Zhang']  -->  ['Z']
['Zheng', 'Qi', 'Jonathan', 'Zhang']  -->  ['Z']
Zhang Z.: Zheng Qi Jonathan Zhang
string index out of range
['Federico', 'Gaio']  -->  ['F']
Gaio F.: Federico Gaio
['Michael', 'Berrer']  -->  ['M']
Berrer M.: Michael Berrer
['Illya', 'Marchenko']  -->  ['I']
Marchenko I.: Illya Marchenko
['Daniel', 'Dutra', 'da', 'Silva']  -->  []
Dutra Silva R.: Daniel Dutra da Silva
['Gerald', 'Melzer']  -->  ['G']
Melzer G.: Ger

['Daryl', 'Monfils']  -->  ['G']
['Gael', 'Monfils']  -->  ['G']
Monfils G.: Gael Monfils
['Andy', 'Murray']  -->  ['A']
Murray A.: Andy Murray
['Mikhail', 'Biryukov']  -->  ['M']
Biryukov M.: Mikhail Biryukov
string index out of range
['Heiner', 'Moraing']  -->  ['M']
['Maik', 'Moraing']  -->  ['M']
Moraing M.: Maik Moraing
string index out of range
['Dominique', 'Maden']  -->  ['Y']
['Yannick', 'Maden']  -->  ['Y']
Maden Y.: Yannick Maden
['Greg', 'Goodall']  -->  ['J']
['Jason', 'Goodall']  -->  ['J']
Goodall J.: Jason Goodall
['Marc', 'Gicquel']  -->  ['M']
Gicquel M.: Marc Gicquel
['C.', 'Frailey']  -->  ['G']
['E.', 'Frailey']  -->  ['G']
['Fernando', 'Fraile']  -->  ['G']
['G.', 'Frailey']  -->  ['G']
Fraile G.: G. Frailey
Message: no such element: Unable to locate element: {"method":"css selector","selector":"span.table-birthday"}
  (Session info: chrome=79.0.3945.88)

['Gabriel', 'A.', 'Penino', 'De', 'Souza']  -->  []
De Greef A.: Gabriel A. Penino De Souza
['Federico', 'Delb

['Davey', 'Roberts']  -->  ['S']
['Donnie', 'Roberts']  -->  ['S']
['E.', 'Robertson']  -->  ['S']
['Fabien', 'Robert']  -->  ['S']
['G.', 'Roberts']  -->  ['S']
['G.', 'Roberts']  -->  ['S']
['Geoff', 'Roberts']  -->  ['S']
['James', 'Roberts', 'Thompson']  -->  ['S']
['Jan', 'Robert', 'Lajos']  -->  ['S']
['Jason', 'Roberts']  -->  ['S']
['Javier', 'Roberto', 'Rodriguez']  -->  ['S']
['Jimmy', 'Roberts']  -->  ['S']
['Johannes', 'Robert', 'Van', 'Overbeek']  -->  ['S']
['Jordan', 'Roberts']  -->  ['S']
['Jose', 'Roberto', 'Gueiors', 'Da', 'Silva']  -->  ['S']
['Justin', 'Roberts']  -->  ['S']
['K.', 'Roberts']  -->  ['S']
['Luis', 'Roberto', 'Capriles', 'Fanianos']  -->  ['S']
['Mark', 'Roberts']  -->  ['S']
['Matthew', 'Roberts']  -->  ['S']
['Michael', 'Roberts']  -->  ['S']
['Michael', 'Robertson']  -->  ['S']
['Mikael', 'Robertson']  -->  ['S']
['P.', 'Roberts']  -->  ['S']
Robert S. not found
['Alessandro', 'Motti']  -->  ['B']
['Bernard', 'Mottez']  -->  ['B']
Mott B.: Bernard 

Message: no such element: Unable to locate element: {"method":"css selector","selector":"span.table-birthday"}
  (Session info: chrome=79.0.3945.88)

['Jonathan', 'Dasnieres', 'de', 'Veigy']  -->  []
Dasnieres de Veigy J.: Jonathan Dasnieres de Veigy
['Maxime', 'Authom']  -->  ['M']
Authom M.: Maxime Authom
['Anders', 'Haaseth']  -->  ['T']
['Bernhard', 'Haas']  -->  ['T']
['Eric', 'Haase']  -->  ['T']
['Karl', 'Haase']  -->  ['T']
['Martijn', 'Van', 'Haasteren']  -->  ['T']
['Philippe', 'Haas']  -->  ['T']
['Robin', 'Haase']  -->  ['T']
['Rudiger', 'Haas']  -->  ['T']
['Santiago', 'Haas']  -->  ['T']
['Tommy', 'Haas']  -->  ['T']
Haas T.: Tommy Haas
['Sumit', 'Nagal']  -->  ['S']
Nagal S.: Sumit Nagal
['Prajnesh', 'Gunneswaran']  -->  ['P']
Gunneswaran P.: Prajnesh Gunneswaran
['Jack', 'Oswald']  -->  ['P']
['Philipp', 'Oswald']  -->  ['P']
Oswald P.: Philipp Oswald
['Djordje', 'Djokovic']  -->  ['N']
['Marko', 'Djokovic']  -->  ['N']
['Novak', 'Djokovic']  -->  ['N']
Djokovic N.: Nov

['Cristian', 'Garin']  -->  ['C']
Garin C.: Cristian Garin
['Fernando', 'Teixeira']  -->  ['M']
['Guilherme', 'Teixeira']  -->  ['M']
['Matheus', 'Teixeira', 'Rohde']  -->  ['M']
Teixeira M.: Matheus Teixeira Rohde
string index out of range
['Philipp', 'Kohlschreiber']  -->  ['P']
Kohlschreiber P.: Philipp Kohlschreiber
['J.S.D.', 'Sweeting']  -->  ['R']
['Ryan', 'Sweeting']  -->  ['R']
Sweeting R.: Ryan Sweeting
['Gerard', 'Granollers']  -->  ['M']
['Marcel', 'Granollers']  -->  ['M']
Granollers M.: Marcel Granollers
['Antonio', 'Wuttke']  -->  ['Y']
['Bart', 'Wuyts']  -->  ['Y']
['Chang', 'Rung', 'Wu']  -->  ['Y']
['Chen', 'Yu', 'Wu']  -->  ['Y']
['Chun', 'En', 'Wu']  -->  ['Y']
['Cliford', 'Wuyum', 'Nkwain']  -->  ['Y']
['Di', 'Wu']  -->  ['Y']
['Edwin', 'Wu']  -->  ['Y']
['Yi', 'fan', 'Wu']  -->  ['Y']
Wu Y.: Yi fan Wu
string index out of range
['Konstantin', 'Kravchuk']  -->  ['K']
Kravchuk K.: Konstantin Kravchuk
['Anand', 'Amritraj']  -->  ['P']
['Ashok', 'Amritraj']  -->  ['P']

['Ben', 'Rocavert']  -->  []
Roca Batalla O.: Ben Rocavert
Message: no such element: Unable to locate element: {"method":"css selector","selector":"span.table-birthday"}
  (Session info: chrome=79.0.3945.88)

['Eduard', 'Davydenko']  -->  ['P']
['Nikolay', 'Davydenko']  -->  ['P']
['Philipp', 'Davydenko']  -->  ['P']
Davydenko P.: Philipp Davydenko
['A.', 'Broese', 'Van', 'Groenon']  -->  ['D']
['A.', 'Vandersman']  -->  ['D']
['A.J.', 'Van', 'Der', 'Steur']  -->  ['D']
['AC', 'Hans', 'Van', 'Swol']  -->  ['D']
['Adolfo', 'Van', 'Gelderen']  -->  ['D']
['Alexander', 'Van', 'Cott']  -->  ['D']
['Aloys', 'J', 'Willem', 'Van', 'Baal']  -->  ['D']
['Ananda', 'Van', 'Den', 'Doren']  -->  ['D']
['Andre', 'Van', 'der', 'Merwe']  -->  ['D']
['Andre', 'Van', 'Der', 'Merwe']  -->  ['D']
['Andre', 'Janse', 'Van', 'Rensburg']  -->  ['D']
['Antal', 'Van', 'Der', 'Duim']  -->  ['D']
['Antonio', 'Van', 'Grichen']  -->  ['D']
['Arjan', 'Van', 'Der', 'Zande']  -->  ['D']
['Bart', 'Van', 'Den', 'Berg'] 

string index out of range
['Marius', 'Copil']  -->  ['M']
Copil M.: Marius Copil
['Farrukh', 'Dustov']  -->  ['F']
Dustov F.: Farrukh Dustov
['Alexander', 'Slabinsky']  -->  ['A']
Slabinsky A.: Alexander Slabinsky
['Kimmer', 'Coppejans']  -->  ['K']
Coppejans K.: Kimmer Coppejans
['Aushael', 'Reynolds']  -->  ['B']
['Bobby', 'Reynolds']  -->  ['B']
Reynolds B.: Bobby Reynolds
['Eduardo', 'Struvay']  -->  ['E']
Struvay E.: Eduardo Struvay
string index out of range
['Amer', 'Delic']  -->  ['A']
Delic A.: Amer Delic
['A.', 'Broese', 'Van', 'Groenon']  -->  []
Van Der Merwe I.: A. Broese Van Groenon
Message: no such element: Unable to locate element: {"method":"css selector","selector":"span.table-birthday"}
  (Session info: chrome=79.0.3945.88)

['Aeron', 'Martinovic']  -->  ['F']
['Alastair', 'Martin']  -->  ['F']
['Alberto', 'Martin']  -->  ['F']
['Alejandro', 'Martinez', 'Dura']  -->  ['F']
['Alejandro', 'Martinez']  -->  ['F']
['Pablo', 'Alejandro', 'Martini']  -->  ['F']
['Aleks', 'M

string index out of range
['Borna', 'Gojo']  -->  ['B']
Gojo B.: Borna Gojo
['Zachary', 'Svajda']  -->  ['Z']
Svajda Z.: Zachary Svajda
['Philipp', 'Kohlschreiber']  -->  ['P', '']
Kohlschreiber P.. not found
['Mohamed', 'Safwat']  -->  ['M']
Safwat M.: Mohamed Safwat
['Amir', 'Weintraub']  -->  ['A']
Weintraub A.: Amir Weintraub
['Justin', 'S.', 'Shane']  -->  ['R']
['Ryan', 'Shane']  -->  ['R']
Shane R.: Ryan Shane
['Grigor', 'Dimitrov']  -->  ['G']
Dimitrov G.: Grigor Dimitrov
['Fernando', 'Verdasco']  -->  ['F']
Verdasco F.: Fernando Verdasco
['Filippo', 'Moroni']  -->  ['G']
['Gian', 'Marco', 'Moroni']  -->  ['G']
Moroni G.: Gian Marco Moroni
['Kamil', 'Majchrzak']  -->  ['K']
Majchrzak K.: Kamil Majchrzak
['Jerzy', 'Janowicz']  -->  ['J']
Janowicz J.: Jerzy Janowicz
['Luke', 'Saville']  -->  ['L']
Saville L.: Luke Saville
['German', 'Ojeda']  -->  []
Ojeda Lara R.: German Ojeda
string index out of range
['Daniel', 'Gimeno', 'Traver']  -->  ['D']
Gimeno-Traver D.: Daniel Gimeno-Tr

Message: no such element: Unable to locate element: {"method":"css selector","selector":"span.table-birthday"}
  (Session info: chrome=79.0.3945.88)

['Juan', 'Ignacio', 'Chela']  -->  ['J', 'I']
Chela J.I.: Juan Ignacio Chela
['Marco', 'Cecchinato']  -->  ['M']
Cecchinato M.: Marco Cecchinato
['Antonio', 'Wuttke']  -->  ['D']
['Bart', 'Wuyts']  -->  ['D']
['Chang', 'Rung', 'Wu']  -->  ['D']
['Chen', 'Yu', 'Wu']  -->  ['D']
['Chun', 'En', 'Wu']  -->  ['D']
['Cliford', 'Wuyum', 'Nkwain']  -->  ['D']
['Di', 'Wu']  -->  ['D']
Wu D.: Di Wu
['Paul', 'Groth']  -->  ['S']
['Sam', 'Groth']  -->  ['S']
Groth S.: Sam Groth
['Damir', 'Dzumhur']  -->  ['D']
Dzumhur D.: Damir Dzumhur
['Corentin', 'Moutet']  -->  ['C']
Moutet C.: Corentin Moutet
['Gabriel', 'A.', 'Penino', 'De', 'Souza']  -->  []
De Bakker T.: Gabriel A. Penino De Souza
['Axsel', 'Monteiro', 'de', 'Lima']  -->  ['T']
['Israel', 'Monteiro']  -->  ['T']
['Joao', 'Monteiro']  -->  ['T']
['Michel', 'Monteiro']  -->  ['T']
['Nicolau', 'M

['Mikael', 'Torpegaard']  -->  ['M']
Torpegaard M.: Mikael Torpegaard
['Andrei', 'Medvedev']  -->  ['D']
['Daniil', 'Medvedev']  -->  ['D']
Medvedev D.: Daniil Medvedev
['Andres', 'Cava', 'Ramirez']  -->  ['C']
['Antonio', 'Ramirez']  -->  ['C']
['Boniek', 'Ramon', 'Ramirez', 'Vega']  -->  ['C']
['Juan', 'Carlos', 'Ramirez']  -->  ['C']
['Carlos', 'Ramirez', 'Utermann']  -->  ['C']
Ramirez C.: Carlos Ramirez Utermann
['Adolfo', 'Daniel', 'Vallejo']  -->  ['M']
['Alejandro', 'Daniel', 'Castillo', 'Gamarra']  -->  ['M']
['Alvaro', 'Daniel', 'Riveros']  -->  ['M']
['Arman', 'Danielyan']  -->  ['M']
['Bohus', 'Danielcik']  -->  ['M']
['Bruce', 'Daniels']  -->  ['M']
['Christian', 'Daniel', 'Bianculli']  -->  ['M']
['Christofer', 'Daniel', 'Goncalves']  -->  ['M']
['Joao', 'Daniel', 'Almeida']  -->  ['M']
['Pedro', 'Daniel', 'Gutierrez', 'Arreaga']  -->  ['M']
['Paul', 'Daniel', 'Gijon', 'Arruabarrena']  -->  ['M']
['Emilio', 'Daniel', 'Baez', 'Britez']  -->  ['M']
['Jorge', 'Daniel', 'Barc

['A.', 'Baker']  -->  ['J']
['A.T.', 'Baker']  -->  ['J']
['A.T.', 'Baker2']  -->  ['J']
['A.T.', 'Baker2']  -->  ['J']
['Adam', 'Baker']  -->  ['J']
['Blu', 'Baker']  -->  ['J']
['Brian', 'Baker']  -->  ['J']
['Jean', 'Edouard', 'Baker']  -->  ['J']
Baker J.: Jean Edouard Baker
Message: no such element: Unable to locate element: {"method":"css selector","selector":"span.table-birthday"}
  (Session info: chrome=79.0.3945.88)

['Benoit', 'Paire']  -->  ['B']
Paire B.: Benoit Paire
['Dino', 'Marcan']  -->  ['D']
Marcan D.: Dino Marcan
['Augusto', 'Elias']  -->  ['G']
['David', 'De', 'Leon', 'Elias']  -->  ['G']
['Jose', 'Elias', 'Yapur']  -->  ['G']
['Erik', 'Eliasson']  -->  ['G']
['Erik', 'Eliasson']  -->  ['G']
['Esteban', 'Elias']  -->  ['G']
['Fredrik', 'Eliasson']  -->  ['G']
['Gastao', 'Elias']  -->  ['G']
Elias G.: Gastao Elias
['A.', 'Reid']  -->  ['M']
['A.', 'Reid']  -->  ['M']
['D.', 'Reid']  -->  ['M']
['Delroy', 'Reid']  -->  ['M']
['Doug', 'Reid']  -->  ['M']
['F.', 'Reid'

Message: no such element: Unable to locate element: {"method":"css selector","selector":"span.table-birthday"}
  (Session info: chrome=79.0.3945.88)

['Botic', 'Van', 'de', 'Zandschulp']  -->  ['D', 'V', 'B']
Zandschulp D.V.B. not found
['Albert', 'Lopez', 'Morales']  -->  ['G', 'G']
['Alberto', 'Lopez', 'Nunez']  -->  ['G', 'G']
['Daniel', 'Alejandro', 'Lopez', 'Cassaccia']  -->  ['G', 'G']
['Alex', 'Lopez', 'Moron']  -->  ['G', 'G']
['Alvaro', 'Lopez', 'San', 'Martin']  -->  ['G', 'G']
['Andres', 'Ariel', 'Fernandez', 'Lopez']  -->  ['G', 'G']
['Andres', 'Lopez']  -->  ['G', 'G']
['Javier', 'Andres', 'Lopez', 'Martinez']  -->  ['G', 'G']
['Andres', 'Munoz', 'Lopez']  -->  ['G', 'G']
['Angel', 'Luis', 'Lopez', 'Guillen']  -->  ['G', 'G']
['Miguel', 'Angel', 'Lopez', 'Jaen']  -->  ['G', 'G']
['Miguel', 'Angel', 'Lopez']  -->  ['G', 'G']
['Angel', 'Luis', 'Lopez']  -->  ['G', 'G']
['Marco', 'Antonio', 'Lopez']  -->  ['G', 'G']
['Aramis', 'Lopez']  -->  ['G', 'G']
['Benjamin', 'Winter', 

['A.', 'Smith']  -->  ['P', 'J']
['Alfonso', 'Smith']  -->  ['P', 'J']
['Andrew', 'Smith']  -->  ['P', 'J']
['Arthur', 'Smith']  -->  ['P', 'J']
['Austin', 'Smith']  -->  ['P', 'J']
['B.', 'Smith']  -->  ['P', 'J']
['Barnaby', 'Smith']  -->  ['P', 'J']
['Benjamin', 'Smith']  -->  ['P', 'J']
['Brandon', 'Weir', 'Smith']  -->  ['P', 'J']
['C.', 'Smith', 'Bingham']  -->  ['P', 'J']
['C.', 'Smith', 'Bingham']  -->  ['P', 'J']
['C.', 'Smith']  -->  ['P', 'J']
['Cameron', 'Edward', 'Smith']  -->  ['P', 'J']
['Casey', 'Smith']  -->  ['P', 'J']
['Cole', 'Smith']  -->  ['P', 'J']
['Colin', 'Smith']  -->  ['P', 'J']
['Colter', 'Smith']  -->  ['P', 'J']
['Connor', 'Smith']  -->  ['P', 'J']
['D.', 'Smith']  -->  ['P', 'J']
['Daniel', 'Smith']  -->  ['P', 'J']
['Darrell', 'Smith']  -->  ['P', 'J']
['David', 'Smith']  -->  ['P', 'J']
['Doug', 'Smith']  -->  ['P', 'J']
['Drew', 'Smith']  -->  ['P', 'J']
['Dudley', 'Smith']  -->  ['P', 'J']
['E.', 'Smith']  -->  ['P', 'J']
['Errol', 'Smith']  -->  ['P

['Miguel', 'Angel', 'Traverso']  -->  ['G', 'D']
['Victor', 'Gimeno', 'Traver']  -->  ['G', 'D']
Traver G.D. not found
['Marc', 'Andrea', 'Huesler']  -->  ['A', 'M']
Huesler A.M. not found
['Manuel', 'Guinard']  -->  ['M']
Guinard M.: Manuel Guinard
['Konstantin', 'Kotov']  -->  ['P']
['Pavel', 'Kotov']  -->  ['P']
Kotov P.: Pavel Kotov
['Javier', 'Barranco', 'Cosano']  -->  ['B', 'J']
Cosano B.J. not found
['Uladzimir', 'Ignatik']  -->  ['U']
Ignatik U.: Uladzimir Ignatik
['Martin', 'Redlicki']  -->  ['M']
Redlicki M.: Martin Redlicki
['Antoine', 'Escoffier']  -->  ['A']
Escoffier A.: Antoine Escoffier
['Arthur', 'Heller']  -->  ['P']
['Peter', 'Heller']  -->  ['P']
Heller P.: Peter Heller
['Jordi', 'Blanchar']  -->  ['U']
['Ugo', 'Blanchet']  -->  ['U']
Blanch U.: Ugo Blanchet
string index out of range
['Chia', 'Cheng', 'Yang']  -->  ['H', 'T']
['Shao', 'Chi', 'Yang']  -->  ['H', 'T']
['Dae', 'Yang', 'Kweon']  -->  ['H', 'T']
['Zi', 'Dong', 'Yang']  -->  ['H', 'T']
['Eui', 'Yeol', 'Y

['Gordan', 'Brkic']  -->  ['T']
['Josip', 'Brkic']  -->  ['T']
['Tomislav', 'Brkic']  -->  ['T']
Brkic T.: Tomislav Brkic
['Isao', 'Uchida']  -->  ['K']
['Kaichi', 'Uchida']  -->  ['K']
Uchida K.: Kaichi Uchida
['Bang', 'Ho', 'Song']  -->  ['E']
['Dong', 'Wook', 'Song']  -->  ['E']
['Evan', 'Song']  -->  ['E']
Song E.: Evan Song
['Edward', 'Bourchier']  -->  ['H']
['Harry', 'Bourchier']  -->  ['H']
Bourchier H.: Harry Bourchier
['Matias', 'Franco', 'Descotte']  -->  ['F', 'M']
Descotte F.M. not found
['Brandon', 'Blasco', 'del', 'Cid']  -->  ['R']
['Roberto', 'Cid', 'Subervi']  -->  ['R']
Cid R.: Roberto Cid Subervi
['Maverick', 'Banes']  -->  ['M']
Banes M.: Maverick Banes
['Renta', 'Tokuda']  -->  ['R']
Tokuda R.: Renta Tokuda
['Alexander', 'Zhurbin']  -->  ['A']
Zhurbin A.: Alexander Zhurbin
['Fabien', 'Reboul']  -->  ['F']
Reboul F.: Fabien Reboul
['Jacob', 'Grills']  -->  ['J']
Grills J.: Jacob Grills
Message: stale element reference: element is not attached to the page document
 

In [None]:
df_ext = pd.read_excel('data/base_data.xlsx')
scraped_data = pd.read_csv('data/playersdata.csv', encoding='utf-8-sig')

In [None]:
def fill_values(row, scraped): 
    wElo = None
    wBirthDate = None
    wHand = None
    wBackhand = None
    wSurfaceElo = None

    lElo = None
    lBirthDate = None
    lHand = None
    lBackhand = None
    lSurfaceElo = None
        
    wFound = False
    lFound = False
    for i, r in scraped.iterrows() :
        if(row['Winner']  == r['Player']) :
            wFound = True
            wElo = r['Elo']
            wBirthDate = r['BirthDate']
            wHand = r['Hand']
            wBackhand = r['BackHand']
            if(row['Surface'] == 'Hard') :
                wSurfaceElo = r['hElo']
            if(row['Surface'] == 'Clay') :
                wSurfaceElo = r['cElo']
            if(row['Surface'] == 'Grass') :
                wSurfaceElo = r['gElo']
            
        elif (row['Loser']  == r['Player']) :
            lFound = True
            lElo = r['Elo']
            lBirthDate = r['BirthDate']
            lHand = r['Hand']
            lBackhand = r['BackHand']
            if(row['Surface'] == 'Hard') :
                lSurfaceElo = r['hElo']
            if(row['Surface'] == 'Clay') :
                lSurfaceElo = r['cElo']
            if(row['Surface'] == 'Grass') :
                lSurfaceElo = r['gElo']
        elif wFound and lFound :
            break
        
    print(row.name, row['Winner'], row['Loser'])
    return wElo, wSurfaceElo, wBirthDate, wHand, wBackhand, lElo, lSurfaceElo, lBirthDate, lHand, lBackhand

In [None]:
def join_datasets(base, scraped):
    result = base.apply(lambda row: fill_values(row, scraped), axis=1, result_type='expand')
            
    base = base.assign(WElo=result[0], WSurfElo=result[1], WBD=result[2], WHand=result[3], WBHand=result[4],
                  LElo=result[5], LSurfElo=result[6], LBD=result[7], LHand=result[8], LBHand=result[9])
    
    base['Date'] = pd.to_datetime(base['Date'])
    return base.sort_values(by='Date')

def write_dataset(base):
    os.chdir('data')
    with open('dataset.csv', 'w', newline='', encoding='utf-8-sig') as file :
        writer = csv.writer(file)
        writer.writerow([c for c in base.columns])
        for i, r in df_ext.iterrows():
            writer.writerow(r)
    os.chdir('..')
            
def split_by_year(base):
    os.chdir('data')
    for year in base.Date.dt.year.unique():
        base[base.Date.dt.year == year].to_csv(str(year) + '.csv', 
                                               encoding='utf-8-sig',
                                               index=False)
    os.chdir('..')

In [None]:
df_ext = join_datasets(df_ext, scraped_data)
write_dataset(df_ext)
split_by_year(df_ext)