### Importing Libraries

In [227]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [228]:
# read dataset
bowling_df=pd.read_csv("ODIs_Bowling.csv")

In [229]:
# Drop useless columns
index=bowling_df[(bowling_df['Mat']=="-")|(bowling_df['Inns']=="-")|(bowling_df['Balls']=="-")| (bowling_df['BBI']=="-")].index
bowling_df.drop(index,inplace=True)
bowling_df.drop(columns=['Unnamed: 0','Unnamed: 13'],inplace=True)

In [230]:
# players with at least 20 matches
bowling_df=bowling_df[bowling_df['Mat']>=20]

In [231]:
bowling_df.head(5)

Unnamed: 0,Player,Span,Mat,Inns,Balls,Runs,Wkts,BBI,Ave,Econ,SR,4,5
0,M Muralitharan (Asia/ICC/SL),1993-2011,350,341,18811,12326,534,7/30,23.08,3.93,35.2,15,10
1,Wasim Akram (PAK),1984-2003,356,351,18186,11812,502,5/15,23.52,3.89,36.2,17,6
2,Waqar Younis (PAK),1989-2003,262,258,12698,9919,416,7/36,23.84,4.68,30.5,14,13
3,WPUJC Vaas (Asia/SL),1994-2008,322,320,15775,11014,400,8/19,27.53,4.18,39.4,9,4
4,Shahid Afridi (Asia/ICC/PAK),1996-2015,398,372,17670,13632,395,7/12,34.51,4.62,44.7,4,9


In [232]:
# Check null values
bowling_df.isnull().sum()

Player    0
Span      0
Mat       0
Inns      0
Balls     0
Runs      0
Wkts      0
BBI       0
Ave       0
Econ      0
SR        0
4         0
5         0
dtype: int64

In [233]:
bowling_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 776 entries, 0 to 1592
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Player  776 non-null    object
 1   Span    776 non-null    object
 2   Mat     776 non-null    int64 
 3   Inns    776 non-null    object
 4   Balls   776 non-null    object
 5   Runs    776 non-null    object
 6   Wkts    776 non-null    object
 7   BBI     776 non-null    object
 8   Ave     776 non-null    object
 9   Econ    776 non-null    object
 10  SR      776 non-null    object
 11  4       776 non-null    object
 12  5       776 non-null    object
dtypes: int64(1), object(12)
memory usage: 84.9+ KB


In [234]:
bowling_df.columns

Index(['Player', 'Span', 'Mat', 'Inns', 'Balls', 'Runs', 'Wkts', 'BBI', 'Ave',
       'Econ', 'SR', '4', '5'],
      dtype='object')

In [235]:
# Convert Objects into bumerical values
bowling_df['Inns']=bowling_df['Inns'].astype('int')
bowling_df['Balls']=bowling_df['Balls'].astype('int')
bowling_df['Runs']=bowling_df['Runs'].astype('int')
bowling_df['Wkts']=bowling_df['Wkts'].astype('int')
bowling_df['Ave']=bowling_df['Ave'].astype('float')
bowling_df['Econ']=bowling_df['Econ'].astype('float')
bowling_df['SR']=bowling_df['SR'].astype('float')
bowling_df['4']=bowling_df['4'].astype('int')
bowling_df['5']=bowling_df['5'].astype('int')


In [236]:
bowling_df.head(2)

Unnamed: 0,Player,Span,Mat,Inns,Balls,Runs,Wkts,BBI,Ave,Econ,SR,4,5
0,M Muralitharan (Asia/ICC/SL),1993-2011,350,341,18811,12326,534,7/30,23.08,3.93,35.2,15,10
1,Wasim Akram (PAK),1984-2003,356,351,18186,11812,502,5/15,23.52,3.89,36.2,17,6


In [237]:
bowling_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 776 entries, 0 to 1592
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Player  776 non-null    object 
 1   Span    776 non-null    object 
 2   Mat     776 non-null    int64  
 3   Inns    776 non-null    int32  
 4   Balls   776 non-null    int32  
 5   Runs    776 non-null    int32  
 6   Wkts    776 non-null    int32  
 7   BBI     776 non-null    object 
 8   Ave     776 non-null    float64
 9   Econ    776 non-null    float64
 10  SR      776 non-null    float64
 11  4       776 non-null    int32  
 12  5       776 non-null    int32  
dtypes: float64(3), int32(6), int64(1), object(3)
memory usage: 66.7+ KB


In [238]:
# Adding nation,name for players to dataset
country=[]
name=[]
for i in list(bowling_df["Player"]):
    x=i.split("(")
    country.append(x[1][:-1])
    name.append(x[0])

c=pd.DataFrame()
c["nation"]=country

c['nation']=c['nation'].str.replace("Asia","")
c['nation']=c['nation'].str.replace("/","")
c['nation']=c['nation'].str.replace("ICC","")

# adding nation column to dataset
bowling_df['Nation']=c['nation']
bowling_df["Player"]=name

bowling_df.dropna(inplace=True)

In [239]:
# Save file
# bowling_df.to_csv("new_bowling.csv",index=False)

# ANALYSIS

In [240]:
df=pd.read_csv("new_bowling.csv")

In [243]:
df['SR']

0      35.2
1      36.2
2      30.5
3      39.4
4      44.7
       ... 
611    61.5
612    46.0
613    58.5
614    42.4
615    63.7
Name: SR, Length: 616, dtype: float64

In [251]:
player=list(df['Player'])

In [291]:
df.columns

Index(['Player', 'Span', 'Mat', 'Inns', 'Balls', 'Runs', 'Wkts', 'BBI', 'Ave',
       'Econ', 'SR', '4', '5', 'Nation'],
      dtype='object')

In [338]:
# Fetch Player Stats
def player_stats_bowling(name):
    x=df[df['Player']==name]
    Span=x['Span'].values[0]
    Matches=x['Mat'].values[0]
    Innings=x['Inns'].values[0]
    Balls=x['Balls'].values[0]
    Runs=x['Runs'].values[0]
    Wickets=x['Wkts'].values[0]
    BBI=x['BBI'].values[0]
    Average=x['Ave'].values[0]
    Economy=x['Econ'].values[0]
    Strike_Rate=x['SR'].values[0]
    Fours=x['4'].values[0]
    Fives=x['5'].values[0]
    Nation=x['Nation'].values[0]
    return Span,Matches,Innings,Balls,Runs,Wickets,BBI,Average,Economy,Strike_Rate,Fours,Fives,Nation

In [339]:
player_stats_bowling("M Muralitharan ")

('1993-2011',
 350,
 341,
 18811,
 12326,
 534,
 '7/30',
 23.08,
 3.93,
 35.2,
 15,
 10,
 'SL')

In [333]:
x

10