In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

pd.set_option('display.max_rows', 150)
pd.set_option('display.max_columns', 150)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

# LOADING DATA FRAMES

In [2]:
bat_seam = pd.read_csv("seam_bats.csv")
bat_seam['isWicket'] = bat_seam['isWicket'].astype(int)
bat_spin = pd.read_csv("spin_bats.csv")
bat_spin['isWicket'] = bat_spin['isWicket'].astype(int)


bowl_seam = pd.read_csv("seam_bowls.csv")
bowl_spin = pd.read_csv("spin_bowls.csv")

# HAVING A LOOK AT DATA

In [7]:
bat_seam.head(5)

Unnamed: 0,bat_name,score,isWicket,length_type,over_type,stumps,swing,seam,speed_bin,seam_dev_bin,swing_dev_bin,release_height_bin
0,faf du plessis,0,0,"[4,6)",PP1,non-stump-line,in-swing,out-seam,"(140,-)","(1.5, 2.0]","(0.5, 1.0]","(0.0, 1.918]"
1,faf du plessis,0,0,"[10,12)",PP1,non-stump-line,in-swing,out-seam,"(140,-)","(0.5, 1.0]","(1.0, 1.5]","(0.0, 1.918]"
2,faf du plessis,3,0,"[8,10)",PP1,stump-line,in-swing,in-seam,"(140,-)","(0.0, 0.5]","(0.0, 0.5]","(0.0, 1.918]"
3,murali vijay,0,0,"[8,10)",PP1,non-stump-line,out-swing,in-seam,"(140,-)","(0.0, 0.5]","(1.0, 1.5]","(0.0, 1.918]"
4,murali vijay,0,1,"[8,10)",PP1,stump-line,out-swing,in-seam,"(140,-)","(0.0, 0.5]","(1.0, 1.5]","(0.0, 1.918]"


In [8]:
bat_spin.head(5)

Unnamed: 0,bat_name,score,isWicket,length_type,over_type,stumps,drift,spin,speed_bin,drift_dev_bin,spin_dev_bin,release_height_bin
0,suresh raina,1,0,"[3,4)",PP2,stump-line,in-drift,away-spin,"(90,100]","(0.0, 1.0]","(1.5, 2.0]","(2.148, 3.774]"
1,murali vijay,1,0,"[5,6)",PP2,non-stump-line,in-drift,in-spin,"(90,100]","(0.0, 1.0]","(0.5, 1.0]","(2.148, 3.774]"
2,suresh raina,2,0,"[5,6)",PP2,non-stump-line,in-drift,in-spin,"(90,100]","(0.0, 1.0]","(0.0, 0.5]","(2.148, 3.774]"
3,murali vijay,1,0,"[7,8)",PP2,non-stump-line,in-drift,away-spin,"(90,100]","(0.0, 1.0]","(0.5, 1.0]","(2.148, 3.774]"
4,suresh raina,0,0,"[2,3)",PP2,non-stump-line,in-drift,away-spin,"(90,100]","(0.0, 1.0]","(1.0, 1.5]","(2.148, 3.774]"


In [9]:
bowl_seam.head(5)

Unnamed: 0,bowler,score,isWicket,length_type,over_type,stumps,swing,seam,speed_bin,seam_dev_bin,swing_dev_bin,release_height_bin
0,lasith malinga,0,False,"[4,6)",PP1,non-stump-line,in-swing,out-seam,"(140,-)","(1.5, 2.0]","(0.5, 1.0]","(0.0, 1.918]"
1,lasith malinga,0,False,"[10,12)",PP1,non-stump-line,in-swing,out-seam,"(140,-)","(0.5, 1.0]","(1.0, 1.5]","(0.0, 1.918]"
2,lasith malinga,3,False,"[8,10)",PP1,stump-line,in-swing,in-seam,"(140,-)","(0.0, 0.5]","(0.0, 0.5]","(0.0, 1.918]"
3,lasith malinga,0,False,"[8,10)",PP1,non-stump-line,out-swing,in-seam,"(140,-)","(0.0, 0.5]","(1.0, 1.5]","(0.0, 1.918]"
4,lasith malinga,0,True,"[8,10)",PP1,stump-line,out-swing,in-seam,"(140,-)","(0.0, 0.5]","(1.0, 1.5]","(0.0, 1.918]"


In [10]:
bowl_spin.head(5)

Unnamed: 0,bowler,score,isWicket,length_type,over_type,stumps,drift,spin,speed_bin,drift_dev_bin,spin_dev_bin,release_height_bin
0,harbhajan singh,1,False,"[3,4)",PP2,stump-line,in-drift,away-spin,"(90,100]","(0.0, 1.0]","(1.5, 2.0]","(2.148, 3.774]"
1,harbhajan singh,1,False,"[5,6)",PP2,non-stump-line,in-drift,in-spin,"(90,100]","(0.0, 1.0]","(0.5, 1.0]","(2.148, 3.774]"
2,harbhajan singh,2,False,"[5,6)",PP2,non-stump-line,in-drift,in-spin,"(90,100]","(0.0, 1.0]","(0.0, 0.5]","(2.148, 3.774]"
3,harbhajan singh,1,False,"[7,8)",PP2,non-stump-line,in-drift,away-spin,"(90,100]","(0.0, 1.0]","(0.5, 1.0]","(2.148, 3.774]"
4,harbhajan singh,0,False,"[2,3)",PP2,non-stump-line,in-drift,away-spin,"(90,100]","(0.0, 1.0]","(1.0, 1.5]","(2.148, 3.774]"


# MAKING TEAMS AND OVER MAPS FOR SIMULATION

In [11]:
# bat_seam['bat_name'] = bat_seam['bat_name']\
#                         .replace('abdool samad', 'abdul samad')\
#                         .replace('jono boult', 'trent boult')\
#                         .replace('callum ferguson', 'lockie ferguson')\
#                         .replace('mubasir khan', 'arshad khan')\
#                         .replace('hasaranga', 'wanindu hasaranga')\
#                         .replace('suyash s prabhudessai', 'suyash prabhudessai')\
#                         .replace('matt short', 'matthew short')\
#                         .replace('nathan coulter nile', 'nathan coulter-nile')\
#                         .replace('aman hakim khan', 'aman khan')

# bat_spin['bat_name'] = bat_spin['bat_name']\
#                         .replace('abdool samad', 'abdul samad')\
#                         .replace('jono boult', 'trent boult')\
#                         .replace('callum ferguson', 'lockie ferguson')\
#                         .replace('mubasir khan', 'arshad khan')\
#                         .replace('hasaranga', 'wanindu hasaranga')\
#                         .replace('suyash s prabhudessai', 'suyash prabhudessai')\
#                         .replace('matt short', 'matthew short')\
#                         .replace('nathan coulter nile', 'nathan coulter-nile')\
#                         .replace('aman hakim khan', 'aman khan')

# bowl_seam['bowler'] = bowl_seam['bowler']\
#                         .replace('abdool samad', 'abdul samad')\
#                         .replace('jono boult', 'trent boult')\
#                         .replace('callum ferguson', 'lockie ferguson')\
#                         .replace('mubasir khan', 'arshad khan')\
#                         .replace('hasaranga', 'wanindu hasaranga')\
#                         .replace('suyash s prabhudessai', 'suyash prabhudessai')\
#                         .replace('matt short', 'matthew short')\
#                         .replace('nathan coulter nile', 'nathan coulter-nile')\
#                         .replace('aman hakim khan', 'aman khan')

# bowl_spin['bowler'] = bowl_spin['bowler']\
#                         .replace('abdool samad', 'abdul samad')\
#                         .replace('jono boult', 'trent boult')\
#                         .replace('callum ferguson', 'lockie ferguson')\
#                         .replace('mubasir khan', 'arshad khan')\
#                         .replace('hasaranga', 'wanindu hasaranga')\
#                         .replace('suyash s prabhudessai', 'suyash prabhudessai')\
#                         .replace('matt short', 'matthew short')\
#                         .replace('nathan coulter nile', 'nathan coulter-nile')\
#                         .replace('aman hakim khan', 'aman khan')

In [4]:
# MAKING TEAMS FOR ONE INNINGS

np.sort(bat_seam.bat_name.unique())

bat_team = ['chris gayle', 'david warner', 
            'virat kohli', 'rishabh pant', 'ab de villiers', 
            'hardik pandya', 'andre russell', 'ravindra jadeja', 
            'bhuvneshwar kumar', 'yuzvendra chahal', 'zaheer khan']

bowl_team = ['rashid khan', 'jofra archer', 'jasprit bumrah', 'chris morris', 'lasith malinga']

bowl_seq = {'jofra archer' : [1,3,14,16], 
            'jasprit bumrah':[2,4,17,19],
           'chris morris':[5,8,10,15],
           'lasith malinga':[6,12,18,20],
           'rashid khan':[7,9,11,13]}

In [5]:
# MAPPING OVERS

over_map = {1:['PP1', 'jofra archer'],
            2:['PP1', 'jasprit bumrah'], 
            3:['PP1', 'jofra archer'], 
            4:['PP2', 'jasprit bumrah'], 
            5:['PP2', 'chris morris'], 
            6:['PP2', 'lasith malinga'],
            7:['MO1', 'rashid khan'], 
            8:['MO1', 'chris morris'], 
            9:['MO1', 'rashid khan'], 
            10:['MO1', 'chris morris'], 
            11:['MO1', 'rashid khan'], 
            12:['MO2', 'lasith malinga'],
            13:['MO2', 'rashid khan'], 
            14:['MO2', 'jofra archer'], 
            15:['MO2', 'chris morris'], 
            16:['MO2', 'jofra archer'], 
            17:['DO', 'jasprit bumrah'],
            18:['DO', 'lasith malinga'], 
            19:['DO', 'jasprit bumrah'], 
            20:['DO', 'lasith malinga']}

# CODING THE SIMULATION

In [63]:
##SINGLE ITERATION

dummy_bat_pos = np.copy(bat_team)

bat_pos = np.array([1,2])
balls_i = []
balls_f = []

ball_oc_all = {}

for k,v in over_map.items():
    
    bowl_name = v[1]
    over = v[0]
    if bowl_name in bowl_seam.bowler.unique():
        df_bowl_temp = bowl_seam.query("bowler==@bowl_name and over_type==@over")
        cols_tbc = ['length_type', 'stumps', 'swing', 'seam', 'speed_bin', 
                    'swing_dev_bin', 'seam_dev_bin', 'release_height_bin']
        
    elif bowl_name in bowl_spin.bowler.unique():
        df_bowl_temp = bowl_spin.query("bowler==@bowl_name and over_type==@over")
        cols_tbc = ['length_type', 'stumps', 'drift', 'spin', 'speed_bin', 
                    'drift_dev_bin', 'spin_dev_bin', 'release_height_bin']
        
    for i in range(1,7):
        
        bat1 = dummy_bat_pos[bat_pos[0]-1]
        bat2 = dummy_bat_pos[bat_pos[1]-1]
        
        match_bat_ball_sample = True
        df_ball = df_bowl_temp[cols_tbc].sample(1)
        balls_i.append(df_ball)
        
        if bowl_name in bowl_seam.bowler.unique():
            b = bat_seam.query("bat_name==@bat1").query("over_type==@over")
        elif bowl_name in bowl_spin.bowler.unique():
            b = bat_spin.query("bat_name==@bat1").query("over_type==@over")
            
        while match_bat_ball_sample==True:
                test_merge = pd.merge(df_ball[cols_tbc], b, on=cols_tbc)
                if test_merge.shape[0]!=0:
                    match_bat_ball_sample=False
                else:
                    df_ball = df_bowl_temp[cols_tbc].sample(1)
        balls_f.append(df_ball)
        
        bat_ball_oc = test_merge.sample(1)
        bat_ball_oc1 = bat_ball_oc[['score', 'isWicket']]
        
        ball_oc_all[bat1+'_'+str(k)+'_'+str(i)] = bat_ball_oc
        
        if bat_ball_oc1['isWicket'].values[0]==1:
            dummy_bat_pos = np.delete(dummy_bat_pos, bat_pos[0]-1)
            if bat_pos[1]>bat_pos[0]:
                bat_pos = bat_pos[::-1]
            # bat_pos = bat_pos+1
            
        else:
            if bat_ball_oc1['score'].values[0]%2!=0:
                bat_pos = bat_pos[::-1]
            
        if i==6:
            bat_pos = bat_pos[::-1]

In [116]:
# MULTIPLE SIMULATION TO AGGREGATE THE RESULTS ARRIVE AT AN AVERGAE RESULT

sim_d = {}
sim_df_d = {}

for iter_count in tqdm((range(500))):

    dummy_bat_pos = np.copy(bat_team)
    
    bat_pos = np.array([1,2])
    balls_i = []
    balls_f = []

    ball_oc_all = {}

    for k,v in over_map.items():

        bowl_name = v[1]
        over = v[0]
        
        for i in range(1,7):

            if bowl_name in bowl_seam.bowler.unique():
                df_bowl_temp = bowl_seam.query("bowler==@bowl_name and over_type==@over")
                cols_tbc = ['length_type', 'stumps', 'swing', 'seam', 'speed_bin', 
                            'swing_dev_bin', 'seam_dev_bin', 'release_height_bin']

            elif bowl_name in bowl_spin.bowler.unique():
                df_bowl_temp = bowl_spin.query("bowler==@bowl_name and over_type==@over")
                cols_tbc = ['length_type', 'stumps', 'drift', 'spin', 'speed_bin', 
                            'drift_dev_bin', 'spin_dev_bin', 'release_height_bin']

            bat1 = dummy_bat_pos[bat_pos[0]-1]
            bat2 = dummy_bat_pos[bat_pos[1]-1]

            match_bat_ball_sample = True
            df_ball = df_bowl_temp[cols_tbc].sample(1)
            balls_i.append(df_ball)

            if bowl_name in bowl_seam.bowler.unique():
                b = bat_seam.query("bat_name==@bat1").query("over_type==@over")
            elif bowl_name in bowl_spin.bowler.unique():
                b = bat_spin.query("bat_name==@bat1").query("over_type==@over")
            
            w_iters = 1
            w_iters2 = 1

            while match_bat_ball_sample==True:
                    
                    test_merge = pd.merge(df_ball[cols_tbc], b, on=cols_tbc)
                    if test_merge.shape[0]!=0:
                        match_bat_ball_sample=False
                    else:
                        df_ball = df_bowl_temp[cols_tbc].sample(1)
                        w_iters = w_iters+1
                        
                    if w_iters>10:
                        if bowl_name in bowl_seam.bowler.unique():
                            b = bat_seam.query("bat_name==@bat1")
                            w_iters2 = w_iters2+1
                        if bowl_name in bowl_spin.bowler.unique():
                            b = bat_spin.query("bat_name==@bat1")
                            w_iters2 = w_iters2+1
                            
                    if w_iters2>20:
                        if bowl_name in bowl_seam.bowler.unique():
                            df_bowl_temp = bowl_seam.query("bowler==@bowl_name")
                            df_ball = df_bowl_temp[cols_tbc].sample(1)
                            
                        if bowl_name in bowl_spin.bowler.unique():
                            df_bowl_temp = bowl_spin.query("bowler==@bowl_name")
                            df_ball = df_bowl_temp[cols_tbc].sample(1)

            balls_f.append(df_ball)

            bat_ball_oc = test_merge.sample(1)
            bat_ball_oc1 = bat_ball_oc[['score', 'isWicket']]

            bat_ball_oc['over'] = k
            bat_ball_oc['ball'] = i
            bat_ball_oc['bowler'] = bowl_name
            

            ball_oc_all[bat1+'_'+str(k)+'_'+str(i)] = bat_ball_oc

            if bat_ball_oc1['isWicket'].values[0]==1:
                dummy_bat_pos = np.delete(dummy_bat_pos, bat_pos[0]-1)
                if bat_pos[1]>bat_pos[0]:
                    bat_pos = bat_pos[::-1] 

            else:
                if bat_ball_oc1['score'].values[0]%2!=0:
                    bat_pos = bat_pos[::-1]

            if i==6:
                bat_pos = bat_pos[::-1]

            if len(dummy_bat_pos)<2:
                    break
                
    df_scorecard_temp = pd.concat(ball_oc_all).reset_index().drop('level_1', axis=1)
    sim_df_d[iter_count] = df_scorecard_temp
    sim_d[iter_count] = {'score':df_scorecard_temp.score.sum(), 'wkts':df_scorecard_temp.isWicket.sum()}

# LOOKING AT AGGREGATES OF THE RESULTS

In [306]:
# CODE BROKE DOWN AT 39TH ITER DUE TO RESOURCE LIMITATIONS , THIS IS THE OUTPUT OF FIRST 38 ITERS OF THAT TIME

# score	wkts
# 0	179	6
# 1	130	6
# 2	180	4
# 3	170	4
# 4	144	6
# 5	147	8
# 6	142	4
# 7	148	2
# 8	182	4
# 9	167	4
# 10	126	7
# 11	178	5
# 12	181	4
# 13	159	6
# 14	160	7
# 15	131	6
# 16	148	5
# 17	139	3
# 18	161	6
# 19	194	3
# 20	141	7
# 21	158	6
# 22	178	5
# 23	170	4
# 24	164	5
# 25	175	6
# 26	156	5
# 27	159	3
# 28	145	5
# 29	145	6
# 30	149	5
# 31	137	2
# 32	171	4
# 33	165	4
# 34	148	6
# 35	136	6
# 36	173	5
# 37	124	6

In [111]:
# sns.scatterplot(pd.DataFrame(sim_d).T, x='score', y='wkts')
# plt.xticks(range(115,201,5), rotation=90)
# plt.yticks(range(0,9,1), rotation=90)
# plt.grid()
# plt.show()
print(pd.DataFrame(sim_d).T.mean())

score   158.10000
wkts      5.13000
dtype: float64


In [98]:
for k,v in sim_df_d.items():
    v['sim_count'] = k+1
    sim_df_d[k] = v

In [99]:
all_sim_res = pd.concat(list(sim_df_d.values()))
all_sim_res.bat_name.unique()

array(['chris gayle', 'david warner', 'virat kohli', 'rishabh pant',
       'ab de villiers', 'andre russell', 'hardik pandya',
       'ravindra jadeja', 'bhuvneshwar kumar', 'yuzvendra chahal',
       'zaheer khan'], dtype=object)

In [100]:
# all_sim_res.query("sim_count==1")

In [101]:
runs_wkts_ball_agg = all_sim_res.groupby("over_type").agg(runs=('score', 'sum'), wkts=('isWicket', 'sum'), balls=('ball', pd.Series.count)).reset_index()
runs_wkts_ball_agg['RpO'] = runs_wkts_ball_agg.runs/runs_wkts_ball_agg.balls
runs_wkts_ball_agg

Unnamed: 0,over_type,runs,wkts,balls,RpO
0,DO,3719,145,2183,1.70362
1,MO1,3335,90,2768,1.20484
2,MO2,3943,128,2930,1.34573
3,PP1,2117,53,1991,1.06328
4,PP2,2696,97,2128,1.26692


In [102]:
bat_agg = all_sim_res.groupby("bat_name").agg(runs=('score', 'sum'), wkts=('isWicket', 'sum'), balls=('ball', pd.Series.count)).reset_index()
bat_agg['RpO'] = bat_agg.runs/bat_agg.balls
bat_agg['BpD'] = bat_agg.balls/bat_agg.wkts
bat_agg['RpD'] = bat_agg.runs/bat_agg.wkts
bat_agg[['bat_name', 'RpO', 'BpD', 'RpD']]

Unnamed: 0,bat_name,RpO,BpD,RpD
0,ab de villiers,1.68198,19.92958,33.52113
1,andre russell,1.82812,19.2,35.1
2,bhuvneshwar kumar,0.90769,8.125,7.375
3,chris gayle,1.13018,19.24242,21.74747
4,david warner,1.32499,17.74747,23.51515
5,hardik pandya,1.23529,25.74286,31.8
6,ravindra jadeja,1.18269,14.85714,17.57143
7,rishabh pant,1.15485,30.5125,35.2375
8,virat kohli,1.40996,33.38961,47.07792
9,yuzvendra chahal,0.16058,inf,inf


In [103]:
bowl_agg = all_sim_res.groupby("bowler").agg(runs=('score', 'sum'), wkts=('isWicket', 'sum'), balls=('ball', pd.Series.count)).reset_index()
bowl_agg['eco'] = (bowl_agg.runs/bowl_agg.balls)*6
bowl_agg['SR'] = bowl_agg.balls/bowl_agg.wkts
bowl_agg['Ave'] = bowl_agg.runs/bowl_agg.wkts
bowl_agg[['bowler', 'eco', 'SR', 'Ave']]

Unnamed: 0,bowler,eco,SR,Ave
0,chris morris,7.6525,22.64151,28.87736
1,jasprit bumrah,8.105,17.91045,24.19403
2,jofra archer,7.4725,24.24242,30.19192
3,lasith malinga,9.1525,26.96629,41.13483
4,rashid khan,7.1425,28.23529,33.61176


In [110]:
all_sim_res.query("bat_name=='virat kohli' and isWicket==1").bowler.value_counts()

lasith malinga    22
jasprit bumrah    21
jofra archer      17
chris morris      13
rashid khan        4
Name: bowler, dtype: int64