## Scraping Cricbuzz site to get Top Batters in Tests, ODIs, T20Is

In [1]:
# importing libraries
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
#getting page to parse with beautifulsoup
url = 'https://www.cricbuzz.com/cricket-stats/icc-rankings/men/batting'

page = requests.get(url)
doc = BeautifulSoup(page.content,'html.parser')

#### Obtaining div tags that contain all batters data

In [4]:
div_tags = doc.find_all('div', class_='cb-col cb-col-100 cb-font-14 cb-lst-itm text-center')
len(div_tags)

290

In [5]:
div_tags[0]

<div class="cb-col cb-col-100 cb-font-14 cb-lst-itm text-center"> <div class="cb-col cb-col-16 cb-rank-tbl cb-font-16">1</div> <div class="cb-col cb-col-50 cb-lst-itm-sm text-left"> <div class="cb-col cb-col-33"> <div class="cb-col cb-col-50"><span class="cb-ico" style="position:absolute;"></span>    –</div> <div class="cb-col cb-col-50"><img alt="Joe Root" class="img-responsive cb-rank-plyr-img" src="/a/img/v1/50x50/i1/c170942/joe-root.jpg" title="Joe Root"/></div> </div> <div class="cb-col cb-col-67 cb-rank-plyr"> <a class="text-hvr-underline text-bold cb-font-16" href="/profiles/8019/joe-root" title="Joe Root's Profile">Joe Root</a> <div class="cb-font-12 text-gray">ENGLAND</div> </div> </div> <div class="cb-col cb-col-17 cb-rank-tbl pull-right">890</div> </div>

#### Function to obtain Top 10 test batters

In [50]:
def get_test_player_data():
    test_batters_names = []

    for i in range(0,10):
        test_names = div_tags[i].find('a', class_='text-hvr-underline text-bold cb-font-16').text
        test_batters_names.append(test_names)

    test_batters_country = []

    for i in range(0,10):
        test_player_country = div_tags[i].find('div', class_='cb-font-12 text-gray').text
        test_batters_country.append(test_player_country)

    test_batters_ratings = []

    for i in range(0,10):
        test_player_rating = div_tags[i].find('div', class_='cb-col cb-col-17 cb-rank-tbl pull-right').text
        test_batters_ratings.append(test_player_rating)

    test_batters_positions = []

    for i in range(0,10):
        test_player_position = div_tags[i].find('div', class_='cb-col cb-col-16 cb-rank-tbl cb-font-16').text
        test_batters_positions.append(test_player_position)
    
    #store the values into a dictionary and return
    test_dict = {
        'Positions': test_batters_positions,
        'Player Name': test_batters_names,
        'Country': test_batters_country,
        'Ratings': test_batters_ratings,
         }
    
    return test_dict

#### Creating a dataframe before writing to csv files

In [None]:
test_df = pd.DataFrame(get_test_player_data())

In [52]:
test_df

Unnamed: 0,Positions,Player Name,Country,Ratings
0,1,Joe Root,ENGLAND,890
1,2,Marnus Labuschagne,AUSTRALIA,885
2,3,Babar Azam,PAKISTAN,879
3,4,Steven Smith,AUSTRALIA,848
4,5,Rishabh Pant,INDIA,801
5,6,Kane Williamson,NEW ZEALAND,786
6,7,Usman Khawaja,AUSTRALIA,766
7,8,Dimuth Karunaratne,SRI LANKA,748
8,9,Rohit Sharma,INDIA,746
9,10,Jonny Bairstow,ENGLAND,726


#### Similar function to get ODI batters rankings with adjusted index

In [53]:
def get_odi_player_data():
    odi_batters_names = []
    
    #adjust range as obtained div tags contain top 100 batters in all formats
    for i in range(96,106):
        odi_names = div_tags[i].find('a', class_='text-hvr-underline text-bold cb-font-16').text
        odi_batters_names.append(odi_names)

    odi_batters_country = []

    for i in range(96,106):
        odi_player_country = div_tags[i].find('div', class_='cb-font-12 text-gray').text
        odi_batters_country.append(odi_player_country)

    odi_batters_ratings = []

    for i in range(96,106):
        odi_player_rating = div_tags[i].find('div', class_='cb-col cb-col-17 cb-rank-tbl pull-right').text
        odi_batters_ratings.append(odi_player_rating)

    odi_batters_positions = []

    for i in range(96,106):
        odi_player_position = div_tags[i].find('div', class_='cb-col cb-col-16 cb-rank-tbl cb-font-16').text
        odi_batters_positions.append(odi_player_position)
    
    odi_dict = {
        'Positions': odi_batters_positions,
        'Player Name': odi_batters_names,
        'Country': odi_batters_country,
        'Ratings': odi_batters_ratings,
        }
    
    return odi_dict

In [54]:
odi_df = pd.DataFrame(get_odi_player_data())

In [55]:
odi_df

Unnamed: 0,Positions,Player Name,Country,Ratings
0,1,Babar Azam,PAKISTAN,890
1,2,Rassie van der Dussen,SOUTH AFRICA,789
2,3,Quinton de Kock,SOUTH AFRICA,784
3,4,Imam-ul-Haq,PAKISTAN,779
4,5,Virat Kohli,INDIA,744
5,6,David Warner,AUSTRALIA,743
6,7,Rohit Sharma,INDIA,740
7,8,Jonny Bairstow,ENGLAND,732
8,9,Ross Taylor,NEW ZEALAND,715
9,10,Kane Williamson,NEW ZEALAND,698


#### Similar function to get T20I batters rankings with adjusted index

In [63]:
def get_t20i_player_data():
    t20i_batters_names = []

    for i in range(192,202):
        t20i_names = div_tags[i].find('a', class_='text-hvr-underline text-bold cb-font-16').text
        t20i_batters_names.append(t20i_names)

    t20i_batters_country = []

    for i in range(192,202):
        t20i_player_country = div_tags[i].find('div', class_='cb-font-12 text-gray').text
        t20i_batters_country.append(t20i_player_country)

    t20i_batters_ratings = []

    for i in range(192,202):
        t20i_player_rating = div_tags[i].find('div', class_='cb-col cb-col-17 cb-rank-tbl pull-right').text
        t20i_batters_ratings.append(t20i_player_rating)

    t20i_batters_positions = []

    for i in range(192,202):
        t20i_player_position = div_tags[i].find('div', class_='cb-col cb-col-16 cb-rank-tbl cb-font-16').text
        t20i_batters_positions.append(t20i_player_position)
    
    t20i_dict = {
        'Positions': t20i_batters_positions,
        'Player Name': t20i_batters_names,
        'Country': t20i_batters_country,
        'Ratings': t20i_batters_ratings,
        }
    
    return t20i_dict

In [64]:
t20i_df = pd.DataFrame(get_t20i_player_data())

In [65]:
t20i_df

Unnamed: 0,Positions,Player Name,Country,Ratings
0,1,Mohammad Rizwan,PAKISTAN,815
1,2,Babar Azam,PAKISTAN,794
2,3,Aiden Markram,SOUTH AFRICA,792
3,4,Suryakumar Yadav,INDIA,775
4,5,Dawid Malan,ENGLAND,731
5,6,Aaron Finch,AUSTRALIA,716
6,7,Devon Conway,NEW ZEALAND,683
7,8,Pathum Nissanka,SRI LANKA,675
8,9,Muhammad Waseem,UNITED ARAB EMIRATES,671
9,10,Reeza Hendricks,SOUTH AFRICA,628


#### Writing all the files to CSV

In [67]:
test_df.to_csv('Test Batter Rankings.csv', index=None)
odi_df.to_csv('ODI Batter Rankings.csv', index=None)
t20i_df.to_csv('T20I Batter Rankings.csv', index=None)