In [24]:
from bs4 import BeautifulSoup
import pandas as pd
import requests as req
import collections
from collections import OrderedDict

In [29]:
df.to_csv('Player_stats.csv')

In [26]:
bat_stats = ['Matches played', 'Innings batted', 'Not outs', 'Runs scored', 'Highest inns score', 
                'Batting average', 'Balls faced', 'Batting strike rate', 'Hundreds scored', 
                'Fifties scored', 'Boundary fours', 'Boundary sixes', 'Catches taken', 'Stumpings made']
bowl_stats = ['Matches played', 'Innings bowled', 'Balls bowled', 'Runs conceded', 'Wickets taken',
                'Bowling average', 'Economy rate','Bowling strike rate', '4 wickets in inning', 
                '5 wickets in inning', '10 wickets in match']

def get_player_stats(url):
    """
    Given the URL of the Cricinfo stats page of a player, ouputs their ODI stats as a dict.
    
    If they are listed as a batsman, only their batting/fielding stats will be included.
    If they are listed as a bowler, only their bowling stats will be included.
    If they are listed as an allrounder, both will be included.
    Best innings bowling and best match bowling will not be included.
    
    Parameters:
    url (str): the Cricinfo URL of the player
    
    Returns:
    dict: the relevant ODI stats of the player
    
    """
    
    bs = BeautifulSoup(req.get(url).text)
    paragraphs = bs.find_all('p', {'class': 'ciPlayerinformationtxt'})
    role = ''
    for par in paragraphs:
        if 'Playing role' in par.b.string:
            role = par.span.string.lower()
            
    tables = bs.find_all('table',{'class':'engineTable'}, limit=2)
    stats = OrderedDict()
    if 'batsman' in role or 'allrounder' in role:
        bat_stat_vals = []
        for row in tables[0].find_all('tr'):
            left_column = row.find('td')
            if left_column != None and left_column.get_text() == 'ODIs':
                for stat in row.find_all('td'):
                    value = stat.get_text()
                    if value == 'ODIs' or '/' in value:
                        continue
                    if '*' in value:
                        value = value.replace('*','')
                    value = float(value)
                    bat_stat_vals.append(value)
        if len(bat_stat_vals) == 0:
            return dict()
        stats.update(dict(zip(bat_stats, bat_stat_vals)))
        
    if 'bowler' in role or 'allrounder' in role:
        bowl_stat_vals = []
        for row in tables[1].find_all('tr'):
            left_column = row.find('td')
            if left_column != None and left_column.get_text() == 'ODIs':
                for stat in row.find_all('td'):
                    value = stat.get_text()
                    if value == 'ODIs' or '/' in value or '-' in value:
                        continue
                    if '*' in value:
                        value = value.replace('*','')
                    value = float(value)
                    bowl_stat_vals.append(value)
        if len(bowl_stat_vals) == 0:
            return dict()
        stats.update(dict(zip(bowl_stats, bowl_stat_vals)))
    
    return stats

In [27]:
def combine_stats(urls):
    """
    Given a list of Cricinfo URLs, combines all stats into a one-row DataFrame
    
    Parameters:
    urls (list of str): the list of Cricinfo URLs
    
    Returns:
    DataFrame: the sum of all 15 players' stats in a DataFrame
    
    """
    all_stats = []
    num_batters = 0
    num_bowlers = 0
    for url in urls:
        player_stats = get_player_stats(url)
        if 'Innings batted' in player_stats.keys():
            num_batters += 1
            innings_batted = player_stats['Innings batted']
            player_stats['Runs scored/Innings'] = player_stats.pop('Runs scored') / innings_batted
            player_stats['Balls faced/Innings'] = player_stats.pop('Balls faced') / innings_batted
            player_stats['Hundreds/Innings'] = player_stats.pop('Hundreds scored') / innings_batted
            player_stats['Fifties/Innings'] = player_stats.pop('Fifties scored') / innings_batted
            player_stats['Boundary fours/Innings'] = player_stats.pop('Boundary fours') / innings_batted
            player_stats['Boundary sixes/Innings'] = player_stats.pop('Boundary sixes') / innings_batted
            player_stats['Catches/Innings'] = player_stats.pop('Catches taken') / innings_batted
        if 'Innings bowled' in player_stats.keys():
            num_bowlers += 1
            innings_bowled = player_stats['Innings bowled']
            player_stats['Balls bowled/Innings'] = player_stats.pop('Balls bowled') / innings_bowled
            player_stats['Runs conceded/Innings'] = player_stats.pop('Runs conceded') / innings_bowled
            player_stats['Wickets taken/Innings'] = player_stats.pop('Wickets taken') / innings_bowled
            player_stats['4 wickets innings/Innings'] = player_stats.pop('4 wickets in inning') / innings_bowled
            player_stats['5 wickets innings/Innings'] = player_stats.pop('5 wickets in inning') / innings_bowled
        
        all_stats.append(player_stats)
    counter = collections.Counter()
    for stat in all_stats: 
        counter.update(stat)
    newdict = dict(counter)
    for key in newdict.keys():
        if any(key == x for x in bat_stats[1:]):
            newdict[key] = newdict[key] / num_batters
        elif any(key == x for x in bowl_stats[1:]):
            newdict[key] = newdict[key] / num_bowlers
        newdict[key] = [newdict[key]]
    return pd.DataFrame.from_dict(newdict)

In [28]:
urls = ['http://www.espncricinfo.com/india/content/player/253802.html',
       'http://www.espncricinfo.com/india/content/player/34102.html',
       'http://www.espncricinfo.com/india/content/player/28235.html',
       'http://www.espncricinfo.com/india/content/player/28081.html',
       'http://www.espncricinfo.com/india/content/player/290716.html',
       'http://www.espncricinfo.com/india/content/player/625371.html',
       'http://www.espncricinfo.com/india/content/player/326016.html',
       'http://www.espncricinfo.com/india/content/player/559235.html',
       'http://www.espncricinfo.com/india/content/player/430246.html',
       'http://www.espncricinfo.com/india/content/player/625383.html',
       'http://www.espncricinfo.com/india/content/player/481896.html',
       'http://www.espncricinfo.com/india/content/player/477021.html',
       'http://www.espncricinfo.com/india/content/player/30045.html',
       'http://www.espncricinfo.com/india/content/player/422108.html',
       'http://www.espncricinfo.com/india/content/player/234675.html']
india_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/pakistan/content/player/227760.html',
       'http://www.espncricinfo.com/pakistan/content/player/1158100.html',
       'http://www.espncricinfo.com/pakistan/content/player/259551.html',
       'http://www.espncricinfo.com/pakistan/content/player/512191.html',
       'http://www.espncricinfo.com/pakistan/content/player/568276.html',
       'http://www.espncricinfo.com/pakistan/content/player/39950.html',
       'http://www.espncricinfo.com/pakistan/content/player/348144.html',
       'http://www.espncricinfo.com/pakistan/content/player/42657.html',
       'http://www.espncricinfo.com/pakistan/content/player/41434.html',
       'http://www.espncricinfo.com/pakistan/content/player/318788.html',
       'http://www.espncricinfo.com/pakistan/content/player/922943.html',
       'http://www.espncricinfo.com/pakistan/content/player/227758.html',
       'http://www.espncricinfo.com/pakistan/content/player/681305.html',
       'http://www.espncricinfo.com/pakistan/content/player/681117.html',
       'http://www.espncricinfo.com/pakistan/content/player/1072470.html']
pakistan_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/england/content/player/24598.html',
       'http://www.espncricinfo.com/england/content/player/8917.html',
       'http://www.espncricinfo.com/england/content/player/297433.html',
       'http://www.espncricinfo.com/england/content/player/308967.html',
       'http://www.espncricinfo.com/england/content/player/662973.html',
       'http://www.espncricinfo.com/england/content/player/12454.html',
       'http://www.espncricinfo.com/england/content/player/249866.html',
       'http://www.espncricinfo.com/england/content/player/19264.html',
       'http://www.espncricinfo.com/england/content/player/244497.html',
       'http://www.espncricinfo.com/england/content/player/303669.html',
       'http://www.espncricinfo.com/england/content/player/298438.html',
       'http://www.espncricinfo.com/england/content/player/311158.html',
       'http://www.espncricinfo.com/england/content/player/308251.html',
       'http://www.espncricinfo.com/england/content/player/247235.html',
       'http://www.espncricinfo.com/england/content/player/351588.html']
england_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/afghanistan/content/player/318340.html',
       'http://www.espncricinfo.com/afghanistan/content/player/793457.html',
       'http://www.espncricinfo.com/afghanistan/content/player/533956.html',
       'http://www.espncricinfo.com/afghanistan/content/player/320652.html',
       'http://www.espncricinfo.com/afghanistan/content/player/440970.html',
       'http://www.espncricinfo.com/afghanistan/content/player/524049.html',
       'http://www.espncricinfo.com/afghanistan/content/player/318339.html',
       'http://www.espncricinfo.com/afghanistan/content/player/25913.html',
       'http://www.espncricinfo.com/afghanistan/content/player/793463.html',
       'http://www.espncricinfo.com/afghanistan/content/player/516561.html',
       'http://www.espncricinfo.com/afghanistan/content/player/440963.html',
       'http://www.espncricinfo.com/afghanistan/content/player/311427.html',
       'http://www.espncricinfo.com/afghanistan/content/player/974109.html',
       'http://www.espncricinfo.com/afghanistan/content/player/352048.html',
       'http://www.espncricinfo.com/afghanistan/content/player/419873.html']
afghanistan_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/australia/content/player/5334.html',
       'http://www.espncricinfo.com/australia/content/player/272477.html',
       'http://www.espncricinfo.com/australia/content/player/326434.html',
       'http://www.espncricinfo.com/australia/content/player/261354.html',
       'http://www.espncricinfo.com/australia/content/player/489889.html',
       'http://www.espncricinfo.com/australia/content/player/215155.html',
       'http://www.espncricinfo.com/australia/content/player/272279.html',
       'http://www.espncricinfo.com/australia/content/player/6683.html',
       'http://www.espncricinfo.com/australia/content/player/325026.html',
       'http://www.espncricinfo.com/australia/content/player/774223.html',
       'http://www.espncricinfo.com/australia/content/player/267192.html',
       'http://www.espncricinfo.com/australia/content/player/311592.html',
       'http://www.espncricinfo.com/australia/content/player/325012.html',
       'http://www.espncricinfo.com/australia/content/player/219889.html',
       'http://www.espncricinfo.com/australia/content/player/379504.html']
australia_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/bangladesh/content/player/373538.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56143.html',
       'http://www.espncricinfo.com/bangladesh/content/player/269237.html',
       'http://www.espncricinfo.com/bangladesh/content/player/550133.html',
       'http://www.espncricinfo.com/bangladesh/content/player/629070.html',
       'http://www.espncricinfo.com/bangladesh/content/player/629063.html',
       'http://www.espncricinfo.com/bangladesh/content/player/300619.html',
       'http://www.espncricinfo.com/bangladesh/content/player/330902.html',
       'http://www.espncricinfo.com/bangladesh/content/player/410763.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56025.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56007.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56194.html',
       'http://www.espncricinfo.com/bangladesh/content/player/536936.html',
       'http://www.espncricinfo.com/bangladesh/content/player/436677.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56029.html']
bangladesh_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/newzealand/content/player/277906.html',
       'http://www.espncricinfo.com/newzealand/content/player/559066.html',
       'http://www.espncricinfo.com/newzealand/content/player/232364.html',
       'http://www.espncricinfo.com/newzealand/content/player/38699.html',
       'http://www.espncricinfo.com/newzealand/content/player/55395.html',
       'http://www.espncricinfo.com/newzealand/content/player/493773.html',
       'http://www.espncricinfo.com/newzealand/content/player/226492.html',
       'http://www.espncricinfo.com/newzealand/content/player/506612.html',
       'http://www.espncricinfo.com/newzealand/content/player/388802.html',
       'http://www.espncricinfo.com/newzealand/content/player/232359.html',
       'http://www.espncricinfo.com/newzealand/content/player/355269.html',
       'http://www.espncricinfo.com/newzealand/content/player/539511.html',
       'http://www.espncricinfo.com/newzealand/content/player/502714.html',
       'http://www.espncricinfo.com/newzealand/content/player/277912.html',
       'http://www.espncricinfo.com/newzealand/content/player/440516.html']
newzealand_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/southafrica/content/player/44828.html',
       'http://www.espncricinfo.com/southafrica/content/player/379143.html',
       'http://www.espncricinfo.com/southafrica/content/player/321777.html',
       'http://www.espncricinfo.com/southafrica/content/player/44932.html',
       'http://www.espncricinfo.com/southafrica/content/player/43906.html',
       'http://www.espncricinfo.com/southafrica/content/player/600498.html',
       'http://www.espncricinfo.com/southafrica/content/player/337790.html',
       'http://www.espncricinfo.com/southafrica/content/player/327830.html',
       'http://www.espncricinfo.com/southafrica/content/player/540316.html',
       'http://www.espncricinfo.com/southafrica/content/player/550215.html',
       'http://www.espncricinfo.com/southafrica/content/player/47492.html',
       'http://www.espncricinfo.com/southafrica/content/player/542023.html',
       'http://www.espncricinfo.com/southafrica/content/player/481979.html',
       'http://www.espncricinfo.com/southafrica/content/player/40618.html',
       'http://www.espncricinfo.com/southafrica/content/player/379145.html']
southafrica_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/srilanka/content/player/227772.html',
       'http://www.espncricinfo.com/srilanka/content/player/49619.html',
       'http://www.espncricinfo.com/srilanka/content/player/49758.html',
       'http://www.espncricinfo.com/srilanka/content/player/784369.html',
       'http://www.espncricinfo.com/srilanka/content/player/300631.html',
       'http://www.espncricinfo.com/srilanka/content/player/465793.html',
       'http://www.espncricinfo.com/srilanka/content/player/629074.html',
       'http://www.espncricinfo.com/srilanka/content/player/328026.html',
       'http://www.espncricinfo.com/srilanka/content/player/49764.html',
       'http://www.espncricinfo.com/srilanka/content/player/233514.html',
       'http://www.espncricinfo.com/srilanka/content/player/222354.html',
       'http://www.espncricinfo.com/srilanka/content/player/49700.html',
       'http://www.espncricinfo.com/srilanka/content/player/301236.html',
       'http://www.espncricinfo.com/srilanka/content/player/370040.html',
       'http://www.espncricinfo.com/srilanka/content/player/324358.html']
srilanka_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/westindies/content/player/431901.html',
       'http://www.espncricinfo.com/westindies/content/player/391485.html',
       'http://www.espncricinfo.com/westindies/content/player/315594.html',
       'http://www.espncricinfo.com/westindies/content/player/604302.html',
       'http://www.espncricinfo.com/westindies/content/player/914567.html',
       'http://www.espncricinfo.com/westindies/content/player/581379.html',
       'http://www.espncricinfo.com/westindies/content/player/670025.html',
       'http://www.espncricinfo.com/westindies/content/player/670013.html',
       'http://www.espncricinfo.com/westindies/content/player/495551.html',
       'http://www.espncricinfo.com/westindies/content/player/446101.html',
       'http://www.espncricinfo.com/westindies/content/player/230553.html',
       'http://www.espncricinfo.com/westindies/content/player/277472.html',
       'http://www.espncricinfo.com/westindies/content/player/51880.html',
       'http://www.espncricinfo.com/westindies/content/player/276298.html',
       'http://www.espncricinfo.com/westindies/content/player/457249.html']
westindies_stats = combine_stats(urls)
combined_stats = [india_stats, pakistan_stats, england_stats, afghanistan_stats, australia_stats, bangladesh_stats,
                 newzealand_stats, southafrica_stats, srilanka_stats, westindies_stats]
df = pd.concat(combined_stats)
df.index = ['India', 'Pakistan', 'England', 'Afghanistan', 'Australia', 'Bangladesh', 'New Zealand', 'South Africa',
           'Sri Lanka', 'West Indies']