In [1]:
#Importing the libraries

from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
#Getting the URL of the site and extracting the contents

url="https://www.espncricinfo.com/records/tournament/team-match-results/icc-men-s-t20-world-cup-2022-23-14450"

page = requests.get(url)

soup = BeautifulSoup(page.text,"html")

In [3]:
#Getting the table contents

table=soup.find_all("table")[0]

In [4]:
#Extracting the portion that contains the table title

table_titles=table.find_all('td')[:7]

In [5]:
#Getting the table titles

table_titles=[title.text.strip() for title in table_titles]

In [6]:
#Display table titles

print(table_titles)

['Team 1', 'Team 2', 'Winner', 'Margin', 'Ground', 'Match Date', 'Scorecard']


In [7]:
#creating the DataFrame for match summary

df_match_summary=pd.DataFrame(columns=table_titles)
df_match_summary

Unnamed: 0,Team 1,Team 2,Winner,Margin,Ground,Match Date,Scorecard


In [8]:
#Extracting the html content to get the column data

column_data=table.find_all("tr")[1:]

In [10]:
#Extracting the values row wise

#Link and match_id's of all matches
scorecard_links={}

#Extracting the data and adding into the dataframe
for row in column_data:
    td_data=row.find_all("td")
    table_data=[data.text.strip() for data in td_data]
    scorecard_link ="https://www.espncricinfo.com"+row.find_all("a")[-1]["href"]
    match_id=row.find_all("a")[1].text.strip()
    scorecard_links[match_id]=scorecard_link
    length=len(df_match_summary)
    df_match_summary.loc[length]=table_data

In [11]:
#dataframe

df_match_summary

Unnamed: 0,Team 1,Team 2,Winner,Margin,Ground,Match Date,Scorecard
0,Namibia,Sri Lanka,Namibia,55 runs,Geelong,"Oct 16, 2022",T20I # 1823
1,Netherlands,U.A.E.,Netherlands,3 wickets,Geelong,"Oct 16, 2022",T20I # 1825
2,Scotland,West Indies,Scotland,42 runs,Hobart,"Oct 17, 2022",T20I # 1826
3,Ireland,Zimbabwe,Zimbabwe,31 runs,Hobart,"Oct 17, 2022",T20I # 1828
4,Namibia,Netherlands,Netherlands,5 wickets,Geelong,"Oct 18, 2022",T20I # 1830
...,...,...,...,...,...,...,...
79,Bangladesh,Pakistan,Pakistan,5 wickets,Adelaide,"Nov 6, 2022",T20I # 1872
80,India,Zimbabwe,India,71 runs,Melbourne,"Nov 6, 2022",T20I # 1873
81,New Zealand,Pakistan,Pakistan,7 wickets,Sydney,"Nov 9, 2022",T20I # 1877
82,England,India,England,10 wickets,Adelaide,"Nov 10, 2022",T20I # 1878


In [12]:
#Changing the column name scorecard to ID

df_match_summary.rename({"Scorecard":"match_id"},axis=1,inplace=True)

In [13]:
#checking for any null values in the dataframe

df_match_summary.isna().sum()

Team 1        0
Team 2        0
Winner        0
Margin        0
Ground        0
Match Date    0
match_id      0
dtype: int64

In [14]:
#Saving the file to csv

df_match_summary.to_csv(r"Data\match_summary.csv",index=False)

In [15]:
#Extracting the batting summary for each of the matches

df_batting_summary=pd.DataFrame(columns=["match","teamInnings","battingPos","batsman_name","dismissal","runs","balls","minutes","4s","6s","SR","player_link","match_id"])
#df_batting_summary=[data.text.strip() for data in table1.find_all("tr")[0]]
#df_batting_summary=["match","teamInnings","battingPos"]+df_batting_summary
#df_batting_summary=pd.DataFrame(columns=df_batting_summary)

#scorecard_links={"T20I # 1823":"https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/namibia-vs-sri-lanka-1st-match-first-round-group-a-1298135/full-scorecard"}

#Looping through each link to get the scorecard of all matches
for match_id,link in scorecard_links.items():
    page = requests.get(link)
    soup = BeautifulSoup(page.text,"html")
    table1=soup.find_all("table")[0]
    table2=soup.find_all("table")[2]
    outer_span1=soup.find('span', class_='ds-inline-flex ds-items-center ds-bg-ui-fill ds-text-typo-primary ds-border ds-border-ui-stroke-primary ds-h-6 ds-px-2 ds-rounded-full ds-font-medium ds-cursor-pointer ds-whitespace-nowrap')
    outer_span2=soup.find('span',class_='ds-inline-flex ds-items-center ds-bg-ui-fill-alternate ds-text-typo ds-h-6 ds-px-2 ds-rounded-full ds-font-medium ds-cursor-pointer ds-whitespace-nowrap')
    inner_span1 = outer_span1.find('span', class_='ds-text-tight-xs')
    inner_span2 = outer_span2.find('span', class_='ds-text-tight-xs')
    team1 = inner_span1.text.replace("Innings"," ").strip()
    team2 = inner_span2.text.replace("Innings"," ").strip()


    batting_pos=0
    column_data=table1.find_all("tr")[1:]
    for row in column_data:
        if '<td class="ds-min-w-max" colspan="1">Extras</td>' in str(row):
            break

        elif '<tr class="ds-hidden"><td class="" colspan="9"></td></tr>' in str(row):
            continue
        else:
            batting_pos=batting_pos+1
            table_data=[team1+" Vs "+team2,team1,batting_pos]
            row_data=row.find_all("td")
            table_data1=[data.text for data in row_data]
            player_link="https://www.espncricinfo.com"+row.find('a')['href']
            table_data=table_data+table_data1+[player_link]+[match_id]
            length=len(df_batting_summary)
            df_batting_summary.loc[length]=table_data
            

    batting_pos=0
    column_data=table2.find_all("tr")[1:]
    for row in column_data:
        if '<td class="ds-min-w-max" colspan="1">Extras</td>' in str(row):
            break

        elif '<tr class="ds-hidden"><td class="" colspan="9"></td></tr>' in str(row):
            continue
        else:
            batting_pos=batting_pos+1
            table_data=[team1+" Vs "+team2,team2,batting_pos]
            row_data=row.find_all("td")
            table_data1=[data.text for data in row_data]
            player_link="https://www.espncricinfo.com"+row.find('a')['href']
            table_data=table_data+table_data1+[player_link]+[match_id]
            length=len(df_batting_summary)
            df_batting_summary.loc[length]=table_data

df_batting_summary

Unnamed: 0,match,teamInnings,battingPos,batsman_name,dismissal,runs,balls,minutes,4s,6s,SR,player_link,match_id
0,Namibia Vs Sri Lanka,Namibia,1,Michael van Lingen,c Pramod Madushan b Chameera,3,6,7,0,0,50.00,https://www.espncricinfo.com/cricketers/michae...,T20I # 1823
1,Namibia Vs Sri Lanka,Namibia,2,Divan la Cock,c Shanaka b Pramod Madushan,9,9,15,1,0,100.00,https://www.espncricinfo.com/cricketers/divan-...,T20I # 1823
2,Namibia Vs Sri Lanka,Namibia,3,Jan Nicol Loftie-Eaton,c †Mendis b Karunaratne,20,12,18,1,2,166.66,https://www.espncricinfo.com/cricketers/jan-ni...,T20I # 1823
3,Namibia Vs Sri Lanka,Namibia,4,Stephan Baard,c DM de Silva b Pramod Madushan,26,24,49,2,0,108.33,https://www.espncricinfo.com/cricketers/stepha...,T20I # 1823
4,Namibia Vs Sri Lanka,Namibia,5,Gerhard Erasmus (c),c Gunathilaka b PWH de Silva,20,24,30,0,0,83.33,https://www.espncricinfo.com/cricketers/gerhar...,T20I # 1823
...,...,...,...,...,...,...,...,...,...,...,...,...,...
694,Pakistan Vs England,England,3,Phil Salt,c Iftikhar Ahmed b Haris Rauf,10,9,16,2,0,111.11,https://www.espncricinfo.com/cricketers/phil-s...,T20I # 1879
695,Pakistan Vs England,England,4,Ben Stokes,not out,52,49,81,5,1,106.12,https://www.espncricinfo.com/cricketers/ben-st...,T20I # 1879
696,Pakistan Vs England,England,5,Harry Brook,c Shaheen Shah Afridi b Shadab Khan,20,23,36,1,0,86.95,https://www.espncricinfo.com/cricketers/harry-...,T20I # 1879
697,Pakistan Vs England,England,6,Moeen Ali,b Mohammad Wasim,19,13,30,3,0,146.15,https://www.espncricinfo.com/cricketers/moeen-...,T20I # 1879


In [17]:
#Replacing the "\" and unwanted values present in the columns

def replace_after_backslash(value):
    backslash_index = value.find('\xa0')
    return value[:backslash_index] if backslash_index != -1 else value
    
def replace_between_backslash(value):
    value=value.replace("†","")
    return(value)

df_batting_summary['batsman_name'] = df_batting_summary['batsman_name'].apply(lambda x: replace_after_backslash(x))
df_batting_summary['dismissal']=df_batting_summary['dismissal'].apply(lambda x: replace_between_backslash(x))
df_batting_summary

Unnamed: 0,match,teamInnings,battingPos,batsman_name,dismissal,runs,balls,minutes,4s,6s,SR,player_link,match_id
0,Namibia Vs Sri Lanka,Namibia,1,Michael van Lingen,c Pramod Madushan b Chameera,3,6,7,0,0,50.00,https://www.espncricinfo.com/cricketers/michae...,T20I # 1823
1,Namibia Vs Sri Lanka,Namibia,2,Divan la Cock,c Shanaka b Pramod Madushan,9,9,15,1,0,100.00,https://www.espncricinfo.com/cricketers/divan-...,T20I # 1823
2,Namibia Vs Sri Lanka,Namibia,3,Jan Nicol Loftie-Eaton,c Mendis b Karunaratne,20,12,18,1,2,166.66,https://www.espncricinfo.com/cricketers/jan-ni...,T20I # 1823
3,Namibia Vs Sri Lanka,Namibia,4,Stephan Baard,c DM de Silva b Pramod Madushan,26,24,49,2,0,108.33,https://www.espncricinfo.com/cricketers/stepha...,T20I # 1823
4,Namibia Vs Sri Lanka,Namibia,5,Gerhard Erasmus,c Gunathilaka b PWH de Silva,20,24,30,0,0,83.33,https://www.espncricinfo.com/cricketers/gerhar...,T20I # 1823
...,...,...,...,...,...,...,...,...,...,...,...,...,...
694,Pakistan Vs England,England,3,Phil Salt,c Iftikhar Ahmed b Haris Rauf,10,9,16,2,0,111.11,https://www.espncricinfo.com/cricketers/phil-s...,T20I # 1879
695,Pakistan Vs England,England,4,Ben Stokes,not out,52,49,81,5,1,106.12,https://www.espncricinfo.com/cricketers/ben-st...,T20I # 1879
696,Pakistan Vs England,England,5,Harry Brook,c Shaheen Shah Afridi b Shadab Khan,20,23,36,1,0,86.95,https://www.espncricinfo.com/cricketers/harry-...,T20I # 1879
697,Pakistan Vs England,England,6,Moeen Ali,b Mohammad Wasim,19,13,30,3,0,146.15,https://www.espncricinfo.com/cricketers/moeen-...,T20I # 1879


In [18]:
#Creating a new column to find whether the batsman is out or not

df_batting_summary["out/not_out"]=df_batting_summary["dismissal"].apply(lambda x : "not out" if x=="not out " else "out")

In [18]:
#droppiing the dismissal column

df_batting_summary.drop(columns=["dismissal"],inplace=True)
df_batting_summary

Unnamed: 0,match,teamInnings,battingPos,batsman_name,runs,balls,minutes,4s,6s,SR,player_link,match_id
0,Namibia Vs Sri Lanka,Namibia,1,Michael van Lingen,3,6,7,0,0,50.00,https://www.espncricinfo.com/cricketers/michae...,T20I # 1823
1,Namibia Vs Sri Lanka,Namibia,2,Divan la Cock,9,9,15,1,0,100.00,https://www.espncricinfo.com/cricketers/divan-...,T20I # 1823
2,Namibia Vs Sri Lanka,Namibia,3,Jan Nicol Loftie-Eaton,20,12,18,1,2,166.66,https://www.espncricinfo.com/cricketers/jan-ni...,T20I # 1823
3,Namibia Vs Sri Lanka,Namibia,4,Stephan Baard,26,24,49,2,0,108.33,https://www.espncricinfo.com/cricketers/stepha...,T20I # 1823
4,Namibia Vs Sri Lanka,Namibia,5,Gerhard Erasmus,20,24,30,0,0,83.33,https://www.espncricinfo.com/cricketers/gerhar...,T20I # 1823
...,...,...,...,...,...,...,...,...,...,...,...,...
694,Pakistan Vs England,England,3,Phil Salt,10,9,16,2,0,111.11,https://www.espncricinfo.com/cricketers/phil-s...,T20I # 1879
695,Pakistan Vs England,England,4,Ben Stokes,52,49,81,5,1,106.12,https://www.espncricinfo.com/cricketers/ben-st...,T20I # 1879
696,Pakistan Vs England,England,5,Harry Brook,20,23,36,1,0,86.95,https://www.espncricinfo.com/cricketers/harry-...,T20I # 1879
697,Pakistan Vs England,England,6,Moeen Ali,19,13,30,3,0,146.15,https://www.espncricinfo.com/cricketers/moeen-...,T20I # 1879


In [19]:
#dataframe info

df_batting_summary.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 699 entries, 0 to 698
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   match         699 non-null    object
 1   teamInnings   699 non-null    object
 2   battingPos    699 non-null    int64 
 3   batsman_name  699 non-null    object
 4   runs          699 non-null    object
 5   balls         699 non-null    object
 6   minutes       699 non-null    object
 7   4s            699 non-null    object
 8   6s            699 non-null    object
 9   SR            699 non-null    object
 10  player_link   699 non-null    object
 11  match_id      699 non-null    object
dtypes: int64(1), object(11)
memory usage: 71.0+ KB


In [20]:
#Creating a new dataframe for player info

df_player_info1=df_batting_summary[["batsman_name","teamInnings","player_link"]]
df_player_info1.rename({"batsman_name":"name","teamInnings":"team"},axis=1,inplace=True)
df_player_info1

Unnamed: 0,name,team,player_link
0,Michael van Lingen,Namibia,https://www.espncricinfo.com/cricketers/michae...
1,Divan la Cock,Namibia,https://www.espncricinfo.com/cricketers/divan-...
2,Jan Nicol Loftie-Eaton,Namibia,https://www.espncricinfo.com/cricketers/jan-ni...
3,Stephan Baard,Namibia,https://www.espncricinfo.com/cricketers/stepha...
4,Gerhard Erasmus,Namibia,https://www.espncricinfo.com/cricketers/gerhar...
...,...,...,...
694,Phil Salt,England,https://www.espncricinfo.com/cricketers/phil-s...
695,Ben Stokes,England,https://www.espncricinfo.com/cricketers/ben-st...
696,Harry Brook,England,https://www.espncricinfo.com/cricketers/harry-...
697,Moeen Ali,England,https://www.espncricinfo.com/cricketers/moeen-...


In [22]:
#dropping the link column from the batting summary dataframe

df_batting_summary.drop(columns=["player_link"],inplace=True)

In [23]:
#Saving the csv file

df_batting_summary.to_csv(r"Data\batting_summary.csv",index=False)

In [22]:
#bowling summary

#link='https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/england-vs-pakistan-final-1298179/full-scorecard'

df_bowling_summary=pd.DataFrame(columns=["match","bowlingTeam","bowlerName","overs","maiden","runs","wickets","economy","0s","4s","6s","wides","noBalls","player_link","match_id"])
#df_batting_summary=[data.text.strip() for data in table1.find_all("tr")[0]]
#df_batting_summary=["match","teamInnings","battingPos"]+df_batting_summary
#df_batting_summary=pd.DataFrame(columns=df_batting_summary)

#Looping through each link to get the scorecard of all matches
for match_id,link in scorecard_links.items():  
    page = requests.get(link)
    soup = BeautifulSoup(page.text,"html")
    table1=soup.find_all("table")[1]
    table2=soup.find_all("table")[3]
    outer_span1=soup.find('span',class_='ds-inline-flex ds-items-center ds-bg-ui-fill-alternate ds-text-typo ds-h-6 ds-px-2 ds-rounded-full ds-font-medium ds-cursor-pointer ds-whitespace-nowrap')                                     
    outer_span2=soup.find('span', class_='ds-inline-flex ds-items-center ds-bg-ui-fill ds-text-typo-primary ds-border ds-border-ui-stroke-primary ds-h-6 ds-px-2 ds-rounded-full ds-font-medium ds-cursor-pointer ds-whitespace-nowrap')
    inner_span1 = outer_span1.find('span', class_='ds-text-tight-xs')
    inner_span2 = outer_span2.find('span', class_='ds-text-tight-xs')
    team1 = inner_span1.text.replace("Innings"," ").strip()
    team2 = inner_span2.text.replace("Innings"," ").strip()


    column_data=table1.find_all("tr")[1:]
    for row in column_data:
        if '<td class="" colspan="11">' in str(row):
                continue
        else:
            table_data=[team1+" Vs "+team2,team1]
            row_data=row.find_all("td")
            table_data1=[data.text for data in row_data]
            player_link="https://www.espncricinfo.com"+row.find('a')['href']
            table_data=table_data+table_data1+[player_link]+[match_id]
            length=len(df_bowling_summary)
            df_bowling_summary.loc[length]=table_data                           


    column_data=table2.find_all("tr")[1:]
    for row in column_data:
        if '<td class="" colspan="11">' in str(row):
                continue
        else:
            table_data=[team1+" Vs "+team2,team2]
            row_data=row.find_all("td")
            table_data1=[data.text for data in row_data]
            player_link="https://www.espncricinfo.com"+row.find('a')['href']
            table_data=table_data+table_data1+[player_link]+[match_id]
            length=len(df_bowling_summary)
            df_bowling_summary.loc[length]=table_data                           

df_bowling_summary


Unnamed: 0,match,bowlingTeam,bowlerName,overs,maiden,runs,wickets,economy,0s,4s,6s,wides,noBalls,player_link,match_id
0,Sri Lanka Vs Namibia,Sri Lanka,Maheesh Theekshana,4,0,23,1,5.75,7,0,0,2,0,https://www.espncricinfo.com/cricketers/mahees...,T20I # 1823
1,Sri Lanka Vs Namibia,Sri Lanka,Dushmantha Chameera,4,0,39,1,9.75,6,3,1,2,0,https://www.espncricinfo.com/cricketers/dushma...,T20I # 1823
2,Sri Lanka Vs Namibia,Sri Lanka,Pramod Madushan,4,0,37,2,9.25,6,3,1,0,0,https://www.espncricinfo.com/cricketers/pramod...,T20I # 1823
3,Sri Lanka Vs Namibia,Sri Lanka,Chamika Karunaratne,4,0,36,1,9.00,7,3,1,1,0,https://www.espncricinfo.com/cricketers/chamik...,T20I # 1823
4,Sri Lanka Vs Namibia,Sri Lanka,Wanindu Hasaranga,4,0,27,1,6.75,8,1,1,0,0,https://www.espncricinfo.com/cricketers/wanind...,T20I # 1823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,England Vs Pakistan,Pakistan,Naseem Shah,4,0,30,0,7.50,15,3,1,1,0,https://www.espncricinfo.com/cricketers/naseem...,T20I # 1879
496,England Vs Pakistan,Pakistan,Haris Rauf,4,0,23,2,5.75,13,3,0,1,0,https://www.espncricinfo.com/cricketers/haris-...,T20I # 1879
497,England Vs Pakistan,Pakistan,Shadab Khan,4,0,20,1,5.00,10,1,0,0,0,https://www.espncricinfo.com/cricketers/shadab...,T20I # 1879
498,England Vs Pakistan,Pakistan,Mohammad Wasim,4,0,38,1,9.50,5,5,0,2,0,https://www.espncricinfo.com/cricketers/mohamm...,T20I # 1879


In [24]:
#renaming the bowling summary columns and adding the player details to the player info dataframe

df_player_info2=df_bowling_summary[["bowlerName","bowlingTeam","player_link"]]
df_player_info2.rename({"bowlerName":"name","bowlingTeam":"team"},axis=1,inplace=True)
df_player_info=pd.concat([df_player_info1,df_player_info2])
df_player_info

Unnamed: 0,name,team,player_link
0,Michael van Lingen,Namibia,https://www.espncricinfo.com/cricketers/michae...
1,Divan la Cock,Namibia,https://www.espncricinfo.com/cricketers/divan-...
2,Jan Nicol Loftie-Eaton,Namibia,https://www.espncricinfo.com/cricketers/jan-ni...
3,Stephan Baard,Namibia,https://www.espncricinfo.com/cricketers/stepha...
4,Gerhard Erasmus,Namibia,https://www.espncricinfo.com/cricketers/gerhar...
...,...,...,...
495,Naseem Shah,Pakistan,https://www.espncricinfo.com/cricketers/naseem...
496,Haris Rauf,Pakistan,https://www.espncricinfo.com/cricketers/haris-...
497,Shadab Khan,Pakistan,https://www.espncricinfo.com/cricketers/shadab...
498,Mohammad Wasim,Pakistan,https://www.espncricinfo.com/cricketers/mohamm...


In [25]:
#dropping the player link column from the bowling summary dataframe

df_bowling_summary.drop(columns=["player_link"],inplace=True)

In [26]:
#saving the bowling summary file

df_bowling_summary.to_csv(r"Data\bowling_summary.csv",index=False)

In [27]:
#dropping the duplicates

df_player_info.drop_duplicates(subset="name",keep="first",inplace=True)
df_player_info.reset_index(drop=True,inplace=True)

In [28]:
#Adding the battingstyle,bowlingstyle and playingrole column to the palyer info dataframe

df_player_info=pd.concat([df_player_info,pd.DataFrame(columns=["battingStyle","bowlingStyle","playingRole"])])
df_player_info

Unnamed: 0,name,team,player_link,battingStyle,bowlingStyle,playingRole
0,Michael van Lingen,Namibia,https://www.espncricinfo.com/cricketers/michae...,,,
1,Divan la Cock,Namibia,https://www.espncricinfo.com/cricketers/divan-...,,,
2,Jan Nicol Loftie-Eaton,Namibia,https://www.espncricinfo.com/cricketers/jan-ni...,,,
3,Stephan Baard,Namibia,https://www.espncricinfo.com/cricketers/stepha...,,,
4,Gerhard Erasmus,Namibia,https://www.espncricinfo.com/cricketers/gerhar...,,,
...,...,...,...,...,...,...
208,Ashton Agar,Australia,https://www.espncricinfo.com/cricketers/ashton...,,,
209,Brandon Glover,Netherlands,https://www.espncricinfo.com/cricketers/brando...,,,
210,Shoriful Islam,Bangladesh,https://www.espncricinfo.com/cricketers/shorif...,,,
211,Ebadot Hossain,Bangladesh,https://www.espncricinfo.com/cricketers/ebadot...,,,


In [29]:
#extarcting the battingstyle, bowlingstyle and playingrole values from the website

table_list=[]
players_info_extracted=pd.DataFrame(columns=["battingStyle","bowlingStyle","playingRole"])
links='https://www.espncricinfo.com/cricketers/matthew-wade-230193'

#for link in [links]:
for link in df_player_info["player_link"]:
    table_list=[]
    flag=0
    page = requests.get(link)
    soup = BeautifulSoup(page.text,"html")
    player_outer=soup.find("div",class_="ds-grid lg:ds-grid-cols-3 ds-grid-cols-2 ds-gap-4 ds-mb-8")
    for row in player_outer:
        if row.find("p").text.strip()== "Batting Style":
            table_list.append(row.find("span").text.strip())
        elif row.find("p").text.strip()== "Bowling Style":
            flag=1
            table_list.append(row.find("span").text.strip())
        elif row.find("p").text.strip()== "Playing Role":
            table_list.append(row.find("span").text.strip())
        else:
            pass
    if flag==0:
        table_list.insert(1,"")
    length=len(players_info_extracted)
    players_info_extracted.loc[length]=table_list
    
players_info_extracted

Unnamed: 0,battingStyle,bowlingStyle,playingRole
0,Left hand Bat,"Left arm Medium, Slow Left arm Orthodox",Bowling Allrounder
1,Right hand Bat,Legbreak,Opening Batter
2,Left hand Bat,"Right arm Medium, Legbreak",Batter
3,Right hand Bat,Right arm Medium fast,Batter
4,Right hand Bat,Right arm Offbreak,Allrounder
...,...,...,...
208,Left hand Bat,Slow Left arm Orthodox,Bowler
209,Right hand Bat,Right arm Fast,Bowler
210,Left hand Bat,Left arm Medium fast,Bowler
211,Right hand Bat,Right arm Fast medium,Bowler


In [30]:
#assigning the extratced values to the playerinfo dataframe

df_player_info[["battingStyle","bowlingStyle","playingRole"]]=players_info_extracted
df_player_info

Unnamed: 0,name,team,player_link,battingStyle,bowlingStyle,playingRole
0,Michael van Lingen,Namibia,https://www.espncricinfo.com/cricketers/michae...,Left hand Bat,"Left arm Medium, Slow Left arm Orthodox",Bowling Allrounder
1,Divan la Cock,Namibia,https://www.espncricinfo.com/cricketers/divan-...,Right hand Bat,Legbreak,Opening Batter
2,Jan Nicol Loftie-Eaton,Namibia,https://www.espncricinfo.com/cricketers/jan-ni...,Left hand Bat,"Right arm Medium, Legbreak",Batter
3,Stephan Baard,Namibia,https://www.espncricinfo.com/cricketers/stepha...,Right hand Bat,Right arm Medium fast,Batter
4,Gerhard Erasmus,Namibia,https://www.espncricinfo.com/cricketers/gerhar...,Right hand Bat,Right arm Offbreak,Allrounder
...,...,...,...,...,...,...
208,Ashton Agar,Australia,https://www.espncricinfo.com/cricketers/ashton...,Left hand Bat,Slow Left arm Orthodox,Bowler
209,Brandon Glover,Netherlands,https://www.espncricinfo.com/cricketers/brando...,Right hand Bat,Right arm Fast,Bowler
210,Shoriful Islam,Bangladesh,https://www.espncricinfo.com/cricketers/shorif...,Left hand Bat,Left arm Medium fast,Bowler
211,Ebadot Hossain,Bangladesh,https://www.espncricinfo.com/cricketers/ebadot...,Right hand Bat,Right arm Fast medium,Bowler


In [31]:
#saving the playerinfo dataframe as csv

df_player_info.to_csv(r"Data\player_info.csv",index=False)