In [4]:
import requests
from bs4 import BeautifulSoup

import pandas as pd 
import numpy as np
import time
from random import uniform

### Scrapping and analysing data from ATP Rankings

In [5]:
# Getting the page on the first time 
atp_page = requests.get('http://www.atpworldtour.com/en/rankings/singles')

# Parsing the page
atp_parsed = BeautifulSoup(atp_page.content, 'html.parser')

# Printing results
#print(atp_parsed.prettify())

# Printing the child-pages list --> It contains the Dates
#print(list(atp_parsed.children)) 

In [6]:
# Checking type -> bs4 = NavigableString, Doctype
#[type(item) for item in list(atp_parsed.children)] 

In [7]:
# Getting all dates 
%time 

date_raw = atp_parsed.find(id='filterHolder').find(class_='dropdown').find_all('li')
current_date = atp_parsed.find(id='filterHolder').find(class_='current')['data-value']                                                                              
all_dates = [date_raw[x].get_text().strip() for x in range(1,len(date_raw))]      

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.96 µs


In [8]:
# Crawler
# We need to crawl each page with the different rankings' date (based on diff URL)

# List with the information per row
data = []

def url_crawler(url, date, data): 
    atp_page = requests.get(url)
    print(url)

    # Parsing the page
    atp_parsed = BeautifulSoup(atp_page.content, 'html.parser')
    
    # Class with the the desired content
    rank_by_row = atp_parsed.find(class_='mega-table')

    # Scrapping the page
    for row in rank_by_row.select('tr')[1:]:
        data.append([
                    date,
                    int(row.find(class_='rank-cell').get_text().strip()),
                    str(row.find(class_='player-cell').get_text().strip()),
                    int(row.find(class_='age-cell').get_text().strip()),
                    int((row.find(class_='points-cell').get_text()).replace(",",""))
                    ])

In [9]:
# Building the pandas dataframe

def list_to_df():
    atp_df = pd.DataFrame(data, columns=['Date', 'Ranking', 'Player', 'Age', 'Points'])
    
    return atp_df
    
# Old version
#
# def list_to_df():
#    atp_df = pd.DataFrame(   
#        {'Ranking': rank,
#         'Player': players,
#         'Age': age,
#         'Points': points,
#         'Tourn. Played': tourn,
#         'Date': current_date
#        })
#    return atp_df

In [10]:
# Generating all desired URL pages

# URL e.g.: http://www.atpworldtour.com/en/rankings/singles?rankDate=2013-06-24&rankRange=0-100
# Using the dates we can access all pages

url_eg = "http://www.atpworldtour.com/en/rankings/singles?rankDate=2013-06-24&rankRange=0-100"

all_url = [url_eg.replace(url_eg[57:67], all_dates[x].replace('.', '-')) for x in range(len(all_dates))]

#all_url -> OK 

In [11]:
#the number of pages you want to parse
nPages = len(all_url)

for desired_url in all_url[:nPages]:
    %time 

    # In order to send consecutive queries we need a time delay. Randomize between .5 and 1.5 sec 
    #time.sleep(uniform(0.5,1.5))
    # Parsing
    url_crawler(desired_url, desired_url[57:67], data)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2017-05-08&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2017-05-01&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2017-04-24&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2017-04-17&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.29 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2017-04-10&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2017-04-03&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2016-02-15&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.05 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2016-02-08&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2016-02-01&rankRange=0-100
CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 9.06 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2016-01-18&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2016-01-11&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2016-01-04&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2014-12-01&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.05 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2014-11-24&rankRange=0-100
CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 17.2 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2014-11-17&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2014-11-10&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 6.2 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2014-11-03&rankRange=0-100
CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.82 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2014-10-27&rankRange=0-100
CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 7.87 µs
http://www.atpworldtour.com/en/rankings/singles?r

http://www.atpworldtour.com/en/rankings/singles?rankDate=2013-09-30&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2013-09-23&rankRange=0-100
CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2013-09-16&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2013-09-09&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2013-08-26&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2013-08-19&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2013-08-12&rankRange=0-100
CPU times: user 2 µs, sy

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2012-07-16&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2012-07-09&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2012-06-25&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2012-06-18&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2012-06-11&rankRange=0-100
CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.2 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2012-05-28&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?r

http://www.atpworldtour.com/en/rankings/singles?rankDate=2011-05-02&rankRange=0-100
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2011-04-25&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2011-04-18&rankRange=0-100
CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2011-04-11&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2011-04-04&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2011-03-21&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2011-03-07&rankRange=0-100
CPU times: user 

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2010-03-15&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2010-03-08&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2010-03-01&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2010-02-22&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2010-02-15&rankRange=0-100
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 6.2 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2010-02-08&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.05 µs
http://www.atpworldtour.com/en/rankings/singl

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2009-02-16&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2009-02-09&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2009-02-02&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2009-01-26&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2009-01-19&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2009-01-12&rankRange=0-100
CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2008-01-21&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2008-01-14&rankRange=0-100
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2008-01-07&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2007-12-31&rankRange=0-100
CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2007-12-24&rankRange=0-100
CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 6.91 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2007-12-17&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/sing

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2006-12-25&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2006-12-18&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2006-12-11&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2006-12-04&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2006-11-27&rankRange=0-100
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2006-11-20&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/sing

http://www.atpworldtour.com/en/rankings/singles?rankDate=2005-12-05&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2005-11-28&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2005-11-21&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2005-11-14&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2005-11-07&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 6.2 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2005-10-31&rankRange=0-100
CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2005-10-24&rankRange=0-100
CPU times: user 2 µs, sys: 0 

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2004-11-08&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2004-11-01&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2004-10-25&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2004-10-18&rankRange=0-100
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2004-10-11&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.05 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2004-10-04&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/sing

http://www.atpworldtour.com/en/rankings/singles?rankDate=2003-10-20&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2003-10-13&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2003-10-06&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 6.91 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2003-09-29&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2003-09-22&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.72 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2003-09-15&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2003-09-08&rankRange=0-100
CPU times: user 3 µs, sys: 0

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2002-09-23&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2002-09-16&rankRange=0-100
CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2002-09-09&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2002-09-02&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 6.2 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2002-08-26&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2002-08-19&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?r

http://www.atpworldtour.com/en/rankings/singles?rankDate=2001-09-03&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2001-08-27&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 6.2 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2001-08-20&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2001-08-13&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2001-08-06&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 6.2 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2001-07-30&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2001-07-23&rankRange=0-100
CPU times: user 2 µs, sys: 0 n

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2000-08-07&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 3.81 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2000-07-31&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2000-07-24&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2000-07-17&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2000-07-10&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.72 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=2000-07-03&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?

http://www.atpworldtour.com/en/rankings/singles?rankDate=1999-07-12&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 6.2 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1999-07-05&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1999-06-28&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1999-06-21&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1999-06-14&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1999-06-07&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1999-05-31&rankRange=0-100
CPU times: user 2 µs, sys: 0 

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1998-06-15&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1998-06-08&rankRange=0-100
CPU times: user 1 µs, sys: 1 µs, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1998-06-01&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1998-05-25&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1998-05-18&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1998-05-11&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?

http://www.atpworldtour.com/en/rankings/singles?rankDate=1997-05-26&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1997-05-19&rankRange=0-100
CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.72 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1997-05-12&rankRange=0-100
CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1997-05-05&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1997-04-28&rankRange=0-100
CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.01 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1997-04-21&rankRange=0-100
CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.77 µs
http://www.atpworldtour.com/en/rankings/singles?rankDate=1997-04-14&rankRange=0-100
CPU times: user 2 µs, sy

AttributeError: 'NoneType' object has no attribute 'select'

In [12]:
# Generating the Dataframe
%time
df = list_to_df()

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 6.2 µs


In [13]:
df.head(10)

Unnamed: 0,Date,Ranking,Player,Age,Points
0,2017-05-08,1,Andy Murray,29,11270
1,2017-05-08,2,Novak Djokovic,29,7085
2,2017-05-08,3,Stan Wawrinka,32,5685
3,2017-05-08,4,Roger Federer,35,5125
4,2017-05-08,5,Rafael Nadal,30,4375
5,2017-05-08,6,Milos Raonic,26,4135
6,2017-05-08,7,Marin Cilic,28,3725
7,2017-05-08,8,Kei Nishikori,27,3650
8,2017-05-08,9,Dominic Thiem,23,3615
9,2017-05-08,10,David Goffin,26,3055


In [14]:
# Saving on csv to avoid necessity to scrap all again
df.to_csv('atp_ranking_all.csv', index=False)

In [15]:
# Converting data type to date
df.Date = pd.to_datetime(df.Date)

## How long has Roger Ferder been in the Top5?

In [16]:
roger_f_df = df[df.Player=='Roger Federer'].copy()

## Who was the youngest player ever to reach the Top10?

In [30]:
# Getting the minimum age ever in the top 10
min_age = df[df.Ranking<=10].Age.min()

df[(df.Ranking<=10) & (df.Age==min_age)].groupby('Player').Date.min()

Player
Rafael Nadal   2005-04-25
Name: Date, dtype: datetime64[ns]

## What is the most catastrophic ranking drop a player ever suffered from one year to the next?

In [44]:
# Grouping the df per player/year and getting the best and worst position on the desired year
df_drop = df.groupby(['Player', df.Date.dt.year])['Ranking'].agg(['min', 'max'])

# Reseting index, renaming the columns and sorting by player name/ year
df_drop = df_drop.reset_index().rename(columns={'Date': 'Year', 'min': 'Best_Pos', 'max': 'Worst_Pos'})\
    .sort_values(by=['Player', 'Year'])

In [45]:
df_drop.head(5)

Unnamed: 0,Player,Year,Best_Pos,Worst_Pos
0,Adam Pavlasek,2016,75,99
1,Adam Pavlasek,2017,72,98
2,Adrian Mannarino,2009,88,94
3,Adrian Mannarino,2010,83,99
4,Adrian Mannarino,2011,49,89


In [48]:
for player in df.Player.unique():
    df_temp = df_drop[df_drop.Player==player].copy

#### Testing scrapping by Coll

In [None]:
'''
# Getting the names from the ranking ordered
rank_by_date = atp_parsed.find(id='rankingDetailAjaxContainer')
players_raw = rank_by_date.find_all(class_='player-cell')

players_ordered = []
for x in range(len(players_raw)):
    players_ordered.append(players_raw[x].find('a').get_text())

# Creating ranking list in order
ranking = [x for x in range(1,101)]

ranking_df = pd.DataFrame(   
    {'Ranking': ranking,
     'Player': players_ordered
    })
'''