### Problem Statement:
What is the optimal bowling strategy in Chinnaswamy, Which type of bowlers to use & when (phase of play)?
<br>    -Venue analysis
<br>    -Historic trends
<br>    -Bowlers analysis

## Keywords Involved:
<br>    - Bowling Strategy
<br>    - Bowlers analysis
<br>    - By Phases of play - (PP/Middle/Death)
<br>    - By Innings - (Bowl 1st/2nd)
<br>    - Venue Analysis
<br>    - Performance in venues
<br>    - Toss factor
<br>    - Venue trends --- (runrates, wickets, ..)

In [2]:
import math
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

import matplotlib.pyplot as plt

In [3]:
deliveries = pd.read_csv('../cric_stats/deliveries.csv')
matches = pd.read_csv('../cric_stats/matches.csv')


In [4]:
deliveries.dismissal_kind.unique()


array([nan, 'caught', 'bowled', 'run out', 'lbw', 'caught and bowled',
       'stumped', 'retired hurt', 'hit wicket', 'obstructing the field'],
      dtype=object)

In [5]:
def is_wicket(player_dismissed, dismissal_kind):
    if type(player_dismissed) != str:
        return 0
    elif ~(type(player_dismissed) != str) & (dismissal_kind not in ['run out', 'retired hurt', 'obstructing the field']):
        return 1
    else:
        return 0


In [6]:
deliveries['isBowlerWk'] = deliveries.apply(lambda x: is_wicket(x['player_dismissed'], x['dismissal_kind']), axis = 1)


In [7]:
def balls_per_dismissal(balls, dismissals):
    if dismissals > 0:
        return balls/dismissals
    else:
        return balls/1
    
def balls_per_boundary(balls, boundaries):
    if boundaries > 0:
        return balls/boundaries
    else:
        return balls/1 

def get_dot_percentage(dots, balls):
    if balls > 0:
        return dots/balls
    else:
        return 0
    
def runs_per_ball(balls, runs_conceeded):
    if balls > 0:
        return runs_conceeded/balls
    else:
        return math.inf
    
def runs_per_dismissal(runs_conceeded, dismissals):
    if dismissals > 0:
        return runs_conceeded/dismissals
    else:
        return math.inf

def playerStatistics(df):
    
    df['isDot'] = df['batsman_runs'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['batsman_runs'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['batsman_runs'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['batsman_runs'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['batsman_runs'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['batsman_runs'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['bowler', 'match_id'])['total_runs'].sum().reset_index()).groupby(['bowler'])['total_runs'].sum().reset_index().rename(columns={'total_runs':'runs'})
    innings = pd.DataFrame(df.groupby(['bowler'])['match_id'].apply(lambda x: len(list(np.unique(x)))).reset_index()).rename(columns = {'match_id':'innings'})
    balls = pd.DataFrame(df.groupby(['bowler'])['match_id'].count()).reset_index().rename(columns = {'match_id':'balls'})
    dismissals = pd.DataFrame(df.groupby(['bowler'])['isBowlerWk'].sum()).reset_index().rename(columns = {'isBowlerWk':'dismissals'})
    
    dots = pd.DataFrame(df.groupby(['bowler'])['isDot'].sum()).reset_index().rename(columns = {'isDot':'dots'})
    ones = pd.DataFrame(df.groupby(['bowler'])['isOne'].sum()).reset_index().rename(columns = {'isOne':'ones'})
    twos = pd.DataFrame(df.groupby(['bowler'])['isTwo'].sum()).reset_index().rename(columns = {'isTwo':'twos'})
    threes = pd.DataFrame(df.groupby(['bowler'])['isThree'].sum()).reset_index().rename(columns = {'isThree':'threes'})
    fours = pd.DataFrame(df.groupby(['bowler'])['isFour'].sum()).reset_index().rename(columns = {'isFour':'fours'})
    sixes = pd.DataFrame(df.groupby(['bowler'])['isSix'].sum()).reset_index().rename(columns = {'isSix':'sixes'})
    
    df = pd.merge(innings, runs, on = 'bowler').merge(balls, on = 'bowler').merge(dismissals, on = 'bowler').merge(dots, on = 'bowler').merge(ones, on = 'bowler').merge(twos, on = 'bowler').merge(threes, on = 'bowler').merge(fours, on = 'bowler').merge(sixes, on = 'bowler')
    
    # Dot Percentage = Number of dots in total deliveries
    df['Dot%'] = df.apply(lambda x: get_dot_percentage(x['dots'], x['balls'])*100, axis = 1)
    
    # Average = Runs per wicket
    df['Avg'] = df.apply(lambda x: runs_per_dismissal(x['runs'], x['dismissals']), axis = 1)
    
    # StrikeRate = Balls per wicket
    df['SR'] = df.apply(lambda x: balls_per_dismissal(x['balls'], x['dismissals']), axis = 1)

    # Economy = runs per over
    df['Eco'] = df.apply(lambda x: runs_per_ball(x['balls'], x['runs'])*6, axis = 1)
    
    return df

In [8]:
df = playerStatistics(deliveries)


In [9]:
topdots = df[df.balls > 1500]


In [10]:
import plotly.express as px
fig = px.scatter(topdots, x='bowler', y='Dot%')
fig.show()

In [11]:
def phase(over):
    # over number in our dataset starts from 0
    if over <= 5:
        return 'Powerplay'
    elif over <= 14:
        return 'Middle'
    else:
        return 'Death'


In [12]:
deliveries['phase'] = deliveries['over'].apply(lambda x: phase(x))

In [13]:
def phasesOfplay(df, current_phase):
    
    df = df[df.phase == current_phase]
    df.reset_index(inplace = True, drop = True)
    
    df['isDot'] = df['batsman_runs'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['batsman_runs'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['batsman_runs'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['batsman_runs'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['batsman_runs'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['batsman_runs'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['bowler', 'match_id'])['total_runs'].sum().reset_index()).groupby(['bowler'])['total_runs'].sum().reset_index().rename(columns={'total_runs':'runs'})
    innings = pd.DataFrame(df.groupby(['bowler'])['match_id'].apply(lambda x: len(list(np.unique(x)))).reset_index()).rename(columns = {'match_id':'innings'})
    balls = pd.DataFrame(df.groupby(['bowler'])['match_id'].count()).reset_index().rename(columns = {'match_id':'balls'})
    dismissals = pd.DataFrame(df.groupby(['bowler'])['isBowlerWk'].sum()).reset_index().rename(columns = {'isBowlerWk':'dismissals'})
    
    dots = pd.DataFrame(df.groupby(['bowler'])['isDot'].sum()).reset_index().rename(columns = {'isDot':'dots'})
    ones = pd.DataFrame(df.groupby(['bowler'])['isOne'].sum()).reset_index().rename(columns = {'isOne':'ones'})
    twos = pd.DataFrame(df.groupby(['bowler'])['isTwo'].sum()).reset_index().rename(columns = {'isTwo':'twos'})
    threes = pd.DataFrame(df.groupby(['bowler'])['isThree'].sum()).reset_index().rename(columns = {'isThree':'threes'})
    fours = pd.DataFrame(df.groupby(['bowler'])['isFour'].sum()).reset_index().rename(columns = {'isFour':'fours'})
    sixes = pd.DataFrame(df.groupby(['bowler'])['isSix'].sum()).reset_index().rename(columns = {'isSix':'sixes'})
    
    df = pd.merge(innings, runs, on = 'bowler').merge(balls, on = 'bowler').merge(dismissals, on = 'bowler').merge(dots, on = 'bowler').merge(ones, on = 'bowler').merge(twos, on = 'bowler').merge(threes, on = 'bowler').merge(fours, on = 'bowler').merge(sixes, on = 'bowler')
    
    # Dot Percentage = Number of dots in total deliveries
    df['Dot%'] = df.apply(lambda x: get_dot_percentage(x['dots'], x['balls'])*100, axis = 1)
    
    # Average = Runs per wicket
    df['Avg'] = df.apply(lambda x: runs_per_dismissal(x['runs'], x['dismissals']), axis = 1)
    
    # StrikeRate = Balls per wicket
    df['SR'] = df.apply(lambda x: balls_per_dismissal(x['balls'], x['dismissals']), axis = 1)

    # Economy = runs per over
    df['Eco'] = df.apply(lambda x: runs_per_ball(x['balls'], x['runs'])*6, axis = 1)
    
    return df

In [14]:
pp_df = phasesOfplay(deliveries, 'Powerplay')
mid_df = phasesOfplay(deliveries, 'Middle')
dth_df = phasesOfplay(deliveries, 'Death')

In [15]:
def ByInning(df, current_inning):
    
    df = df[df.inning == current_inning]
    df.reset_index(inplace = True, drop = True)
    
    df['isDot'] = df['batsman_runs'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['batsman_runs'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['batsman_runs'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['batsman_runs'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['batsman_runs'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['batsman_runs'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['bowler', 'match_id'])['total_runs'].sum().reset_index()).groupby(['bowler'])['total_runs'].sum().reset_index().rename(columns={'total_runs':'runs'})
    innings = pd.DataFrame(df.groupby(['bowler'])['match_id'].apply(lambda x: len(list(np.unique(x)))).reset_index()).rename(columns = {'match_id':'innings'})
    balls = pd.DataFrame(df.groupby(['bowler'])['match_id'].count()).reset_index().rename(columns = {'match_id':'balls'})
    dismissals = pd.DataFrame(df.groupby(['bowler'])['isBowlerWk'].sum()).reset_index().rename(columns = {'isBowlerWk':'dismissals'})
    
    dots = pd.DataFrame(df.groupby(['bowler'])['isDot'].sum()).reset_index().rename(columns = {'isDot':'dots'})
    ones = pd.DataFrame(df.groupby(['bowler'])['isOne'].sum()).reset_index().rename(columns = {'isOne':'ones'})
    twos = pd.DataFrame(df.groupby(['bowler'])['isTwo'].sum()).reset_index().rename(columns = {'isTwo':'twos'})
    threes = pd.DataFrame(df.groupby(['bowler'])['isThree'].sum()).reset_index().rename(columns = {'isThree':'threes'})
    fours = pd.DataFrame(df.groupby(['bowler'])['isFour'].sum()).reset_index().rename(columns = {'isFour':'fours'})
    sixes = pd.DataFrame(df.groupby(['bowler'])['isSix'].sum()).reset_index().rename(columns = {'isSix':'sixes'})
    
    df = pd.merge(innings, runs, on = 'bowler').merge(balls, on = 'bowler').merge(dismissals, on = 'bowler').merge(dots, on = 'bowler').merge(ones, on = 'bowler').merge(twos, on = 'bowler').merge(threes, on = 'bowler').merge(fours, on = 'bowler').merge(sixes, on = 'bowler')
    
    # Dot Percentage = Number of dots in total deliveries
    df['Dot%'] = df.apply(lambda x: get_dot_percentage(x['dots'], x['balls'])*100, axis = 1)
    
    # Average = Runs per wicket
    df['Avg'] = df.apply(lambda x: runs_per_dismissal(x['runs'], x['dismissals']), axis = 1)
    
    # StrikeRate = Balls per wicket
    df['SR'] = df.apply(lambda x: balls_per_dismissal(x['balls'], x['dismissals']), axis = 1)

    # Economy = runs per over
    df['Eco'] = df.apply(lambda x: runs_per_ball(x['balls'], x['runs'])*6, axis = 1)
    
    return df


In [16]:
ing1_df = ByInning(deliveries, 1)
ing2_df = ByInning(deliveries, 2)

### Bowlers Performance Against All Teams


In [17]:
selected_player = 'SL Malinga'


In [18]:
def isOut(player_dismissed):
    try:
        x = math.isnan(player_dismissed)
        return 0
    except:
        return 1

In [19]:
deliveries['isOut'] = deliveries['player_dismissed'].apply(lambda x: isOut(x)) 


In [20]:
deliveries.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,wide_runs,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder,isBowlerWk,isDot,isOne,isTwo,isThree,isFour,isSix,phase,isOut
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0,1,0,0,0,0,0,Powerplay,0
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0,1,0,0,0,0,0,Powerplay,0
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,4,0,4,,,,0,0,0,0,0,1,0,Powerplay,0
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0,1,0,0,0,0,0,Powerplay,0
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,2,0,0,0,0,0,2,2,,,,0,1,0,0,0,0,0,Powerplay,0


In [46]:
import plotly.express as px
# bowl_df = pd.DataFrame(deliveries[deliveries.bowler == selected_player].groupby(['batting_team'])['isBowlerWk'].sum().sort_values().reset_index())
deliveries[deliveries.bowler == selected_player].groupby(['batting_team'])['isOut'].sum().sort_values()
fig = px.bar(deliveries, x = 'isOut', y = 'batting_team' ,color = 'batting_team', orientation= 'h',color_discrete_sequence=px.colors.qualitative.Dark2,height=500, width=800)
fig.update_traces(marker=dict(line=dict(width=1, color='black'), opacity=0.8,line_width=2))
fig.show()

### Part 2 : Venue Analysis

In [22]:
def wintoss_winmatch(toss_winner,winner):
    if toss_winner == winner:
        return True
    else:
        return False
    


In [23]:
matches['wintoss_winmatch'] = matches.apply(lambda x: wintoss_winmatch(x['toss_winner'], x['winner']), axis = 1)

In [24]:
matches[['id', 'season', 'team1', 'team2', 'wintoss_winmatch']].head()


Unnamed: 0,id,season,team1,team2,wintoss_winmatch
0,1,2017,Sunrisers Hyderabad,Royal Challengers Bangalore,False
1,2,2017,Mumbai Indians,Rising Pune Supergiant,True
2,3,2017,Gujarat Lions,Kolkata Knight Riders,True
3,4,2017,Rising Pune Supergiant,Kings XI Punjab,True
4,5,2017,Royal Challengers Bangalore,Delhi Daredevils,True


In [25]:
pd.DataFrame(100* (matches.groupby('season')['wintoss_winmatch'].sum()/matches.groupby('season')['wintoss_winmatch'].count())).reset_index()


Unnamed: 0,season,wintoss_winmatch
0,2008,48.275862
1,2009,57.894737
2,2010,51.666667
3,2011,52.054795
4,2012,44.594595
5,2013,47.368421
6,2014,50.0
7,2015,47.457627
8,2016,56.666667
9,2017,57.627119


In [26]:
def venueAnalysis(mdf, df):
    
    runs = pd.DataFrame(df.groupby(['match_id', 'inning'])['total_runs'].sum().reset_index())
    runs['Id_Ing'] = runs.apply(lambda x: str(x['match_id']) + '-' + str(x['inning']), axis = 1)
    balls = pd.DataFrame(df.groupby(['match_id', 'inning'])['total_runs'].count().reset_index()).rename(columns = {'total_runs':'total_balls'})

    balls['Id_Ing'] = balls.apply(lambda x: str(x['match_id']) + '-' + str(x['inning']), axis = 1)
    
    df['isOut'] = df['player_dismissed'].apply(lambda x: isOut(x))
    outs = pd.DataFrame(df.groupby(['match_id', 'inning'])['isOut'].sum().reset_index()).rename(columns = {'isOut':'wickets'})
    outs['Id_Ing'] = outs.apply(lambda x: str(x['match_id']) + '-' + str(x['inning']), axis = 1)
    
    df = pd.merge(runs, balls[['Id_Ing', 'total_balls']], on = 'Id_Ing').merge(outs[['Id_Ing', 'wickets']], on = 'Id_Ing')
    
    mdf = mdf.rename(columns = {'id':'match_id'})
    df = pd.merge(df, mdf[['match_id', 'venue']], on = 'match_id')
    df = df[['match_id', 'venue', 'inning', 'total_runs', 'total_balls', 'wickets']]
    
    return df

In [27]:
deliveries.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,wide_runs,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder,isBowlerWk,isDot,isOne,isTwo,isThree,isFour,isSix,phase,isOut
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0,1,0,0,0,0,0,Powerplay,0
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0,1,0,0,0,0,0,Powerplay,0
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,4,0,4,,,,0,0,0,0,0,1,0,Powerplay,0
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0,1,0,0,0,0,0,Powerplay,0
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,2,0,0,0,0,0,2,2,,,,0,1,0,0,0,0,0,Powerplay,0


In [28]:
ven_df = venueAnalysis(matches, deliveries)


In [29]:
ven_df.head()

Unnamed: 0,match_id,venue,inning,total_runs,total_balls,wickets
0,1,"Rajiv Gandhi International Stadium, Uppal",1,207,125,4
1,1,"Rajiv Gandhi International Stadium, Uppal",2,172,123,10
2,2,Maharashtra Cricket Association Stadium,1,184,125,8
3,2,Maharashtra Cricket Association Stadium,2,187,122,3
4,3,Saurashtra Cricket Association Stadium,1,183,122,4


In [30]:
pd.DataFrame(ven_df.groupby('venue')['total_runs'].mean().sort_values(ascending = False)).rename(columns = {'total_runs':'Avg_Runs'}).reset_index().head(20)

Unnamed: 0,venue,Avg_Runs
0,Brabourne Stadium,174.636364
1,Barabati Stadium,162.714286
2,Green Park,162.25
3,"Punjab Cricket Association IS Bindra Stadium, Mohali",161.136364
4,Himachal Pradesh Cricket Association Stadium,160.944444
5,Maharashtra Cricket Association Stadium,158.733333
6,Wankhede Stadium,157.815789
7,Holkar Cricket Stadium,157.6
8,"Punjab Cricket Association Stadium, Mohali",156.957143
9,"MA Chidambaram Stadium, Chepauk",155.887755


In [31]:
selected_venue = 'M Chinnaswamy Stadium'


In [32]:
ven_df[ven_df['venue'] == selected_venue].head(1)

Unnamed: 0,match_id,venue,inning,total_runs,total_balls,wickets
8,5,M Chinnaswamy Stadium,1,157,124,8


In [33]:

fig  = pd.DataFrame(ven_df[ven_df['venue'] == selected_venue].groupby(['inning'])['total_runs'].mean()[:2]).reset_index()
fig.head()


Unnamed: 0,inning,total_runs
0,1,167.893939
1,2,145.461538


In [34]:
import plotly.express as px
import plotly.graph_objs as go

fig = px.bar(fig, x="total_runs", y="inning",orientation='h',height = 290,title= 'avg 1st vs 2nd ing. scores at ' + f'{selected_venue}')
fig.update_layout( xaxis_title = 'Innings' ,yaxis_title = 'Runs' )
fig.show()

In [35]:
matches.rename(columns={'id' : 'match_id'},inplace=True)


In [36]:
combined_data = deliveries.merge(matches[['match_id', 'venue']], on = 'match_id', how = 'left')


In [37]:
def ByCustom(df, selected_venue):
       
    df = df[df.venue == selected_venue]
    df.reset_index(inplace = True, drop = True)
    
    df['isDot'] = df['batsman_runs'].apply(lambda x: 1 if x == 0 else 0)
    df['isOne'] = df['batsman_runs'].apply(lambda x: 1 if x == 1 else 0)
    df['isTwo'] = df['batsman_runs'].apply(lambda x: 1 if x == 2 else 0)
    df['isThree'] = df['batsman_runs'].apply(lambda x: 1 if x == 3 else 0)
    df['isFour'] = df['batsman_runs'].apply(lambda x: 1 if x == 4 else 0)
    df['isSix'] = df['batsman_runs'].apply(lambda x: 1 if x == 6 else 0)
    
    runs = pd.DataFrame(df.groupby(['bowler', 'match_id'])['total_runs'].sum().reset_index()).groupby(['bowler'])['total_runs'].sum().reset_index().rename(columns={'total_runs':'runs'})
    innings = pd.DataFrame(df.groupby(['bowler'])['match_id'].apply(lambda x: len(list(np.unique(x)))).reset_index()).rename(columns = {'match_id':'innings'})
    balls = pd.DataFrame(df.groupby(['bowler'])['match_id'].count()).reset_index().rename(columns = {'match_id':'balls'})
    dismissals = pd.DataFrame(df.groupby(['bowler'])['isBowlerWk'].sum()).reset_index().rename(columns = {'isBowlerWk':'dismissals'})
    
    dots = pd.DataFrame(df.groupby(['bowler'])['isDot'].sum()).reset_index().rename(columns = {'isDot':'dots'})
    ones = pd.DataFrame(df.groupby(['bowler'])['isOne'].sum()).reset_index().rename(columns = {'isOne':'ones'})
    twos = pd.DataFrame(df.groupby(['bowler'])['isTwo'].sum()).reset_index().rename(columns = {'isTwo':'twos'})
    threes = pd.DataFrame(df.groupby(['bowler'])['isThree'].sum()).reset_index().rename(columns = {'isThree':'threes'})
    fours = pd.DataFrame(df.groupby(['bowler'])['isFour'].sum()).reset_index().rename(columns = {'isFour':'fours'})
    sixes = pd.DataFrame(df.groupby(['bowler'])['isSix'].sum()).reset_index().rename(columns = {'isSix':'sixes'})
    
    df = pd.merge(innings, runs, on = 'bowler').merge(balls, on = 'bowler').merge(dismissals, on = 'bowler').merge(dots, on = 'bowler').merge(ones, on = 'bowler').merge(twos, on = 'bowler').merge(threes, on = 'bowler').merge(fours, on = 'bowler').merge(sixes, on = 'bowler')
    
    # Dot Percentage = Number of dots in total deliveries
    df['Dot%'] = df.apply(lambda x: get_dot_percentage(x['dots'], x['balls'])*100, axis = 1)
    
    # Average = Runs per wicket
    df['Avg'] = df.apply(lambda x: runs_per_dismissal(x['runs'], x['dismissals']), axis = 1)
    
    # StrikeRate = Balls per wicket
    df['SR'] = df.apply(lambda x: balls_per_dismissal(x['balls'], x['dismissals']), axis = 1)

    # Economy = runs per over
    df['Eco'] = df.apply(lambda x: runs_per_ball(x['balls'], x['runs'])*6, axis = 1)
    
    return df

In [38]:
df = ByCustom(combined_data, selected_venue)

In [39]:
df.head()


Unnamed: 0,bowler,innings,runs,balls,dismissals,dots,ones,twos,threes,fours,sixes,Dot%,Avg,SR,Eco
0,A Ashish Reddy,4,86,48,2,18,14,3,1,6,6,37.5,43.0,24.0,10.75
1,A Chandila,1,21,24,0,10,11,2,0,0,1,41.666667,inf,24.0,5.25
2,A Choudhary,3,57,55,3,30,13,5,0,5,2,54.545455,19.0,18.333333,6.218182
3,A Kumble,12,363,291,9,108,132,10,0,28,13,37.113402,40.333333,32.333333,7.484536
4,A Mishra,5,159,104,4,30,49,7,0,11,7,28.846154,39.75,26.0,9.173077


In [40]:
df[['Dot%', 'Avg', 'SR', 'Eco']].mean()


Dot%    38.637293
Avg     inf      
SR      23.508354
Eco     8.724476 
dtype: float64

In [41]:
import plotly.express as px
fig = px.histogram(df, x="Dot%")
fig.show()