In [1]:
import pandas as pd
from numpy import isinf

In [2]:
# Get data for all Test matches (March 8, 2004 - January 3, 2022) - compiled together from Cricsheet
data = pd.read_csv('all_matches.csv')

In [3]:
# Filter matches based on date
from_date = '2021-12-26'
to_date = '2022-01-03'
data = data[(data['start_date'] >= from_date) & (data['start_date'] <= to_date)]

In [4]:
# Aggregate runs scored, balls faced, and outs for each batsman of a team against each opposition bowler
team = 'India'
matchup_data = data[data['batting_team'] == team].groupby(['striker', 'bowler']).agg(
    runs = ('runs_off_bat', 'sum'), balls = ('runs_off_bat', 'count'), outs = ('player_dismissed', 'count')
).reset_index()

# Compute average and strike rate
matchup_data['avg'] = (matchup_data['runs'] / matchup_data['outs']).round(2)    
matchup_data['strike_rate'] = (100 * (matchup_data['runs'] / matchup_data['balls'])).round(2)

# Sort by average (descending) while excluding infinite and NaN values
matchup_data[(~isinf(matchup_data['avg'])) & (~matchup_data['avg'].isnull())].sort_values(
    'avg', 
    ascending = False
).reset_index(drop = True)

Unnamed: 0,striker,bowler,balls,outs,runs,avg,strike_rate
0,KL Rahul,K Rabada,143,1,47,47.0,32.87
1,AM Rahane,M Jansen,48,1,42,42.0,87.5
2,KL Rahul,L Ngidi,73,1,33,33.0,45.21
3,SN Thakur,M Jansen,32,1,33,33.0,103.12
4,AM Rahane,L Ngidi,60,1,29,29.0,48.33
5,R Ashwin,M Jansen,25,1,26,26.0,104.0
6,AM Rahane,K Rabada,46,1,26,26.0,56.52
7,MA Agarwal,L Ngidi,50,1,21,21.0,42.0
8,RR Pant,M Jansen,24,1,16,16.0,66.67
9,MA Agarwal,M Jansen,49,2,29,14.5,59.18


In [5]:
# Matchup data for a specific batsman
batsman = 'KL Rahul'
matchup_data[matchup_data['striker'] == batsman].sort_values('avg', ascending = False).reset_index(drop = True)

Unnamed: 0,striker,bowler,balls,outs,runs,avg,strike_rate
0,KL Rahul,D Olivier,54,0,32,inf,59.26
1,KL Rahul,KA Maharaj,46,0,35,inf,76.09
2,KL Rahul,PWA Mulder,72,0,30,inf,41.67
3,KL Rahul,K Rabada,143,1,47,47.0,32.87
4,KL Rahul,L Ngidi,73,1,33,33.0,45.21
5,KL Rahul,M Jansen,100,2,27,13.5,27.0


In [6]:
# Matchup data for a specific bowler
bowler = 'M Jansen'
matchup_data[matchup_data['bowler'] == bowler].sort_values('avg').reset_index(drop = True)

Unnamed: 0,striker,bowler,balls,outs,runs,avg,strike_rate
0,Mohammed Siraj,M Jansen,7,1,1,1.0,14.29
1,Mohammed Shami,M Jansen,7,1,3,3.0,42.86
2,V Kohli,M Jansen,13,1,4,4.0,30.77
3,JJ Bumrah,M Jansen,23,1,13,13.0,56.52
4,KL Rahul,M Jansen,100,2,27,13.5,27.0
5,MA Agarwal,M Jansen,49,2,29,14.5,59.18
6,RR Pant,M Jansen,24,1,16,16.0,66.67
7,R Ashwin,M Jansen,25,1,26,26.0,104.0
8,SN Thakur,M Jansen,32,1,33,33.0,103.12
9,AM Rahane,M Jansen,48,1,42,42.0,87.5
