In [1]:
# all imports
import pandas as pd

# remove warnings messages
import warnings
warnings.filterwarnings("ignore")

# settings to display all rows and columns in pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

# remove scientific notation in pandas
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [2]:
matches = pd.read_csv('matches_updated_mens_ipl.csv')

In [3]:
matches.head()

Unnamed: 0,outcome,event,date,gender,reserve_umpire,team1,toss_winner,umpire2,match_referee,season,team2,winner_runs,balls_per_over,player_of_match,date1,neutralvenue,venue,method,toss_decision,city,tv_umpire,date2,winner,umpire1,match_number,eliminator,winner_wickets,matchId
0,,Indian Premier League,2017-04-05,male,N Pandit,Sunrisers Hyderabad,Royal Challengers Bangalore,NJ Llong,J Srinath,2017,Royal Challengers Bangalore,35.0,6,Yuvraj Singh,,,"Rajiv Gandhi International Stadium, Uppal",,field,Hyderabad,A Deshmukh,,Sunrisers Hyderabad,AY Dandekar,1.0,,,1082591
1,,Indian Premier League,2017-04-06,male,Navdeep Singh,Rising Pune Supergiant,Rising Pune Supergiant,S Ravi,M Nayyar,2017,Mumbai Indians,,6,SPD Smith,,,Maharashtra Cricket Association Stadium,,field,Pune,VK Sharma,,Rising Pune Supergiant,A Nand Kishore,2.0,,7.0,1082592
2,,Indian Premier League,2017-04-07,male,K Srinivasan,Gujarat Lions,Kolkata Knight Riders,CK Nandan,V Narayan Kutty,2017,Kolkata Knight Riders,,6,CA Lynn,,,Saurashtra Cricket Association Stadium,,field,Rajkot,YC Barde,,Kolkata Knight Riders,Nitin Menon,3.0,,10.0,1082593
3,,Indian Premier League,2017-04-08,male,R Pandit,Kings XI Punjab,Kings XI Punjab,C Shamshuddin,Chinmay Sharma,2017,Rising Pune Supergiant,,6,GJ Maxwell,,,Holkar Cricket Stadium,,field,Indore,KN Ananthapadmanabhan,,Kings XI Punjab,AK Chaudhary,4.0,,6.0,1082594
4,,Indian Premier League,2017-04-08,male,Navdeep Singh,Royal Challengers Bangalore,Royal Challengers Bangalore,VK Sharma,J Srinath,2017,Delhi Daredevils,15.0,6,KM Jadhav,,,M.Chinnaswamy Stadium,,bat,Bengaluru,A Nand Kishore,,Royal Challengers Bangalore,S Ravi,5.0,,,1082595


In [4]:
matches['match_info'] = matches.apply(lambda x: {'team1':x['team1'], 'team2':x['team2'], 'winner':x['winner'],
                                                 'venue':f"{x['venue'], x['city']}", 'date':x['date']
                                                }, axis = 1)

In [5]:
matches.matchId[0]

1082591

In [6]:
matches.rename(columns={'matchId':'match_id'}, inplace = True)

In [7]:
balls = pd.read_csv('cleaned_ipl_dataset_2008_to_2022.csv')

In [8]:
balls.head()

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,bowler,runs_off_bat,extras,wides,noballs,byes,legbyes,penalty,wicket_type,player_dismissed,other_wicket_type,other_player_dismissed,bowl_style,bowling_style,bat_style,batting_style,wides_cnt_balls,noballs_cnt_balls,noballs_cnt_balls_bowler,byes_cnt_balls,legbyes_cnt_balls,legal_ball,legal_ball_bowler,over_no,ball_no,phase
0,335982,2008,2008-04-18,M.Chinnaswamy Stadium,2,6.8,Royal Challengers Bangalore,Kolkata Knight Riders,MV Boucher,CL White,AB Agarkar,4,0,,,,,,,,,,Right-arm fast-medium,Pace,Right Handed Bat,RHB,1,1,1,1,1,1,1,6,8,MO
1,335982,2008,2008-04-18,M.Chinnaswamy Stadium,2,2.7,Royal Challengers Bangalore,Kolkata Knight Riders,W Jaffer,JH Kallis,AB Dinda,1,0,,,,,,,,,,Right-arm fast-medium,Pace,Right Handed Bat,RHB,1,1,1,1,1,1,1,2,7,PP
2,335982,2008,2008-04-18,M.Chinnaswamy Stadium,2,3.1,Royal Challengers Bangalore,Kolkata Knight Riders,W Jaffer,JH Kallis,I Sharma,2,0,,,,,,,,,,Right-arm fast-medium,Pace,Right Handed Bat,RHB,1,1,1,1,1,1,1,3,1,PP
3,335982,2008,2008-04-18,M.Chinnaswamy Stadium,2,3.2,Royal Challengers Bangalore,Kolkata Knight Riders,W Jaffer,JH Kallis,I Sharma,1,0,,,,,,,,,,Right-arm fast-medium,Pace,Right Handed Bat,RHB,1,1,1,1,1,1,1,3,2,PP
4,335982,2008,2008-04-18,M.Chinnaswamy Stadium,2,3.3,Royal Challengers Bangalore,Kolkata Knight Riders,JH Kallis,W Jaffer,I Sharma,0,0,,,,,,,,,,Right-arm fast-medium,Pace,Right Handed Bat,RHB,1,1,1,1,1,1,1,3,3,PP


In [9]:
# add total runs column
balls['total_runs'] = balls['runs_off_bat'] + balls['extras']

In [10]:
# add wicket flag
balls['is_wicket'] = balls['player_dismissed'].apply(lambda x: 1 if type(x) == type('str') else 0)

In [11]:
# summary of innings 1
ing1_runs = balls[balls.innings == 1].groupby(['match_id'])['total_runs'].sum().reset_index()
ing1_runs.rename(columns = {'total_runs':'i1_runs'}, inplace = True)

ing1_balls = balls[balls.innings == 1].groupby(['match_id'])['legal_ball_bowler'].sum().reset_index()
ing1_balls.rename(columns = {'legal_ball_bowler':'i1_balls'}, inplace = True)

ing1_wkts = balls[balls.innings == 1].groupby(['match_id'])['is_wicket'].sum().reset_index()
ing1_wkts.rename(columns = {'is_wicket':'i1_wkts'}, inplace = True)

In [12]:
# summary of innings 2
ing2_runs = balls[balls.innings == 2].groupby(['match_id'])['total_runs'].sum().reset_index()
ing2_runs.rename(columns = {'total_runs':'i2_runs'}, inplace = True)

ing2_balls = balls[balls.innings == 2].groupby(['match_id'])['legal_ball_bowler'].sum().reset_index()
ing2_balls.rename(columns = {'legal_ball_bowler':'i2_balls'}, inplace = True)

ing2_wkts = balls[balls.innings == 2].groupby(['match_id'])['is_wicket'].sum().reset_index()
ing2_wkts.rename(columns = {'is_wicket':'i2_wkts'}, inplace = True)

In [13]:
ing1_summary = ing1_runs.merge(ing1_balls, on = 'match_id').merge(ing1_wkts, on = 'match_id')
ing2_summary = ing2_runs.merge(ing2_balls, on = 'match_id').merge(ing2_wkts, on = 'match_id')

match_summary = ing1_summary.merge(ing2_summary, on = 'match_id')

In [14]:
match_summary.head()

Unnamed: 0,match_id,i1_runs,i1_balls,i1_wkts,i2_runs,i2_balls,i2_wkts
0,335982,222,120,3,82,91,10
1,335983,240,120,5,207,120,4
2,335984,129,120,8,132,91,1
3,335985,165,120,7,166,118,5
4,335986,110,112,10,112,114,5


In [15]:
# add score columns - runs/wkts (overs.balls)
match_summary['i1_score'] = match_summary.apply(lambda x: str(x['i1_runs']) + '/' + str(x['i1_wkts']) + ' ' + 
                                                '(' + str(x['i1_balls']//6) + '.' + str(x['i1_balls']%6) + ')', axis = 1)

In [16]:
match_summary['i2_score'] = match_summary.apply(lambda x: str(x['i2_runs']) + '/' + str(x['i2_wkts']) + ' ' + 
                                                '(' + str(x['i2_balls']//6) + '.' + str(x['i2_balls']%6) + ')', axis = 1)

In [17]:
match_summary.head(2)

Unnamed: 0,match_id,i1_runs,i1_balls,i1_wkts,i2_runs,i2_balls,i2_wkts,i1_score,i2_score
0,335982,222,120,3,82,91,10,222/3 (20.0),82/10 (15.1)
1,335983,240,120,5,207,120,4,240/5 (20.0),207/4 (20.0)


In [18]:
df = match_summary.copy()

In [19]:
df.head()

Unnamed: 0,match_id,i1_runs,i1_balls,i1_wkts,i2_runs,i2_balls,i2_wkts,i1_score,i2_score
0,335982,222,120,3,82,91,10,222/3 (20.0),82/10 (15.1)
1,335983,240,120,5,207,120,4,240/5 (20.0),207/4 (20.0)
2,335984,129,120,8,132,91,1,129/8 (20.0),132/1 (15.1)
3,335985,165,120,7,166,118,5,165/7 (20.0),166/5 (19.4)
4,335986,110,112,10,112,114,5,110/10 (18.4),112/5 (19.0)


In [20]:
df.tail()

Unnamed: 0,match_id,i1_runs,i1_balls,i1_wkts,i2_runs,i2_balls,i2_wkts,i1_score,i2_score
943,1304116,157,120,8,160,91,5,157/8 (20.0),160/5 (15.1)
944,1312197,188,120,6,191,117,3,188/6 (20.0),191/3 (19.3)
945,1312198,207,120,4,193,120,6,207/4 (20.0),193/6 (20.0)
946,1312199,157,120,8,161,109,3,157/8 (20.0),161/3 (18.1)
947,1312200,130,120,9,133,109,3,130/9 (20.0),133/3 (18.1)


In [21]:
input_match_id = 1312200

df['target_i1_runs'] = df[df.match_id == input_match_id].i1_runs.item()
df['target_i1_balls'] = df[df.match_id == input_match_id].i1_balls.item()
df['target_i1_wkts'] = df[df.match_id == input_match_id].i1_wkts.item()

df['target_i2_runs'] = df[df.match_id == input_match_id].i2_runs.item()
df['target_i2_balls'] = df[df.match_id == input_match_id].i2_balls.item()
df['target_i2_wkts'] = df[df.match_id == input_match_id].i2_wkts.item()

In [22]:
# lets assume max of runs = 300, balls = 120, wkts = 10

In [23]:
max_runs, max_balls, max_wkts = 300, 120, 10

df['s_i1_runs'] = df.apply(lambda x: abs(x['i1_runs'] - x['target_i1_runs'])/max_runs , axis = 1)
df['s_i1_balls'] = df.apply(lambda x: abs(x['i1_balls'] - x['target_i1_balls'])/max_balls , axis = 1)
df['s_i1_wkts'] = df.apply(lambda x: abs(x['i1_wkts'] - x['target_i1_wkts'])/max_wkts , axis = 1)

df['s_i2_runs'] = df.apply(lambda x: abs(x['i2_runs'] - x['target_i2_runs'])/max_runs , axis = 1)
df['s_i2_balls'] = df.apply(lambda x: abs(x['i2_balls'] - x['target_i2_balls'])/max_balls , axis = 1)
df['s_i2_wkts'] = df.apply(lambda x: abs(x['i2_wkts'] - x['target_i2_wkts'])/max_wkts , axis = 1)

In [24]:
df.head()

Unnamed: 0,match_id,i1_runs,i1_balls,i1_wkts,i2_runs,i2_balls,i2_wkts,i1_score,i2_score,target_i1_runs,target_i1_balls,target_i1_wkts,target_i2_runs,target_i2_balls,target_i2_wkts,s_i1_runs,s_i1_balls,s_i1_wkts,s_i2_runs,s_i2_balls,s_i2_wkts
0,335982,222,120,3,82,91,10,222/3 (20.0),82/10 (15.1),130,120,9,133,109,3,0.307,0.0,0.6,0.17,0.15,0.7
1,335983,240,120,5,207,120,4,240/5 (20.0),207/4 (20.0),130,120,9,133,109,3,0.367,0.0,0.4,0.247,0.092,0.1
2,335984,129,120,8,132,91,1,129/8 (20.0),132/1 (15.1),130,120,9,133,109,3,0.003,0.0,0.1,0.003,0.15,0.2
3,335985,165,120,7,166,118,5,165/7 (20.0),166/5 (19.4),130,120,9,133,109,3,0.117,0.0,0.2,0.11,0.075,0.2
4,335986,110,112,10,112,114,5,110/10 (18.4),112/5 (19.0),130,120,9,133,109,3,0.067,0.067,0.1,0.07,0.042,0.2


In [25]:
df['matching_score'] = df['s_i1_runs'] + df['s_i1_balls'] + df['s_i1_wkts'] + df['s_i2_runs'] + df['s_i2_balls'] + df['s_i2_wkts']

In [26]:
df.head()

Unnamed: 0,match_id,i1_runs,i1_balls,i1_wkts,i2_runs,i2_balls,i2_wkts,i1_score,i2_score,target_i1_runs,target_i1_balls,target_i1_wkts,target_i2_runs,target_i2_balls,target_i2_wkts,s_i1_runs,s_i1_balls,s_i1_wkts,s_i2_runs,s_i2_balls,s_i2_wkts,matching_score
0,335982,222,120,3,82,91,10,222/3 (20.0),82/10 (15.1),130,120,9,133,109,3,0.307,0.0,0.6,0.17,0.15,0.7,1.927
1,335983,240,120,5,207,120,4,240/5 (20.0),207/4 (20.0),130,120,9,133,109,3,0.367,0.0,0.4,0.247,0.092,0.1,1.205
2,335984,129,120,8,132,91,1,129/8 (20.0),132/1 (15.1),130,120,9,133,109,3,0.003,0.0,0.1,0.003,0.15,0.2,0.457
3,335985,165,120,7,166,118,5,165/7 (20.0),166/5 (19.4),130,120,9,133,109,3,0.117,0.0,0.2,0.11,0.075,0.2,0.702
4,335986,110,112,10,112,114,5,110/10 (18.4),112/5 (19.0),130,120,9,133,109,3,0.067,0.067,0.1,0.07,0.042,0.2,0.545


In [27]:
df.sort_values('matching_score', ascending=True).head()

Unnamed: 0,match_id,i1_runs,i1_balls,i1_wkts,i2_runs,i2_balls,i2_wkts,i1_score,i2_score,target_i1_runs,target_i1_balls,target_i1_wkts,target_i2_runs,target_i2_balls,target_i2_wkts,s_i1_runs,s_i1_balls,s_i1_wkts,s_i2_runs,s_i2_balls,s_i2_wkts,matching_score
947,1312200,130,120,9,133,109,3,130/9 (20.0),133/3 (18.1),130,120,9,133,109,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0
557,980985,124,120,9,127,102,3,124/9 (20.0),127/3 (17.0),130,120,9,133,109,3,0.02,0.0,0.0,0.02,0.058,0.0,0.098
668,1136595,127,120,9,128,108,4,127/9 (20.0),128/4 (18.0),130,120,9,133,109,3,0.01,0.0,0.0,0.017,0.008,0.1,0.135
401,729287,115,120,9,116,105,3,115/9 (20.0),116/3 (17.3),130,120,9,133,109,3,0.05,0.0,0.0,0.057,0.033,0.0,0.14
831,1254075,133,120,9,134,113,4,133/9 (20.0),134/4 (18.5),130,120,9,133,109,3,0.01,0.0,0.0,0.003,0.033,0.1,0.147


In [28]:
matches[matches.match_id == 829785][['match_id', 'match_info']]

Unnamed: 0,match_id,match_info
870,829785,"{'team1': 'Royal Challengers Bangalore', 'team2': 'Kings XI Punjab', 'winner': 'Royal Challengers Bangalore', 'venue': '('M Chinnaswamy Stadium', 'Bangalore')', 'date': '2015-05-06'}"
