In [1]:
#importing libraries

import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
#Extracting the data

match=pd.read_csv('matches.csv')
delivery=pd.read_csv('deliveries.csv')
data=match.merge(delivery, left_on='id', right_on='match_id')

In [3]:
data.head()

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,...,0,0,0,0,0,0,0,,,
1,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,...,0,0,0,0,0,0,0,,,
2,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,...,0,0,0,0,4,0,4,,,
3,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,...,0,0,0,0,0,0,0,,,
4,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,...,0,0,0,0,0,2,2,,,


In [4]:
data.columns

Index(['id', 'season', 'city', 'date', 'team1', 'team2', 'toss_winner',
       'toss_decision', 'result', 'dl_applied', 'winner', 'win_by_runs',
       'win_by_wickets', 'player_of_match', 'venue', 'umpire1', 'umpire2',
       'umpire3', 'match_id', 'inning', 'batting_team', 'bowling_team', 'over',
       'ball', 'batsman', 'non_striker', 'bowler', 'is_super_over',
       'wide_runs', 'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder'],
      dtype='object')

In [5]:
#Calculating the average and strike rate of all the batsmen who faced a minimum 100 balls

runs=data.groupby('batsman')['batsman_runs'].sum()
mask=(data['wide_runs']==0) & (data['noball_runs']==0)
balls=data[mask].groupby('batsman')['ball'].count()
a=data.groupby('batsman')['player_dismissed'].count()
b=data[data['dismissal_kind']=='run out'].groupby('non_striker')['player_dismissed'].count()
dismissed=a#+b
criteria=balls>100
runs=runs[criteria]
balls=balls[criteria]
dismissed=dismissed[criteria]
batsman_average=runs/dismissed
batsman_sr=runs*100/balls

In [6]:
#Calculating the average and economy rate of all the bowlers who bowled a minimum 100 balls

runs=data.groupby('bowler')['total_runs'].sum()
mask1=(data['wide_runs']==0) & (data['noball_runs']==0)
balls=data[mask1].groupby('bowler')['ball'].count()
dismissal=['caught','bowled','lbw','stumped','caught and bowled','hit wicket']
out=data[data['dismissal_kind'].isin(dismissal)]
wickets=out.groupby('bowler')['dismissal_kind'].count()
criteria=balls>100
wickets=wickets[criteria]
runs=runs[criteria]
balls=balls[criteria]
bowler_average=runs/wickets
bowler_economy=runs*6/balls

In [7]:
data.columns

Index(['id', 'season', 'city', 'date', 'team1', 'team2', 'toss_winner',
       'toss_decision', 'result', 'dl_applied', 'winner', 'win_by_runs',
       'win_by_wickets', 'player_of_match', 'venue', 'umpire1', 'umpire2',
       'umpire3', 'match_id', 'inning', 'batting_team', 'bowling_team', 'over',
       'ball', 'batsman', 'non_striker', 'bowler', 'is_super_over',
       'wide_runs', 'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder'],
      dtype='object')

In [8]:
#Calculating the Mean for Normal Distribution

batsman_average_mean=batsman_average.mean()
batsman_sr_mean=batsman_sr.mean()
bowler_average_mean=bowler_average.mean()
bowler_economy_mean=bowler_economy.mean()

In [9]:
#Calculating the Standard Deviation for Normal Distribution

batsman_average_sd=batsman_average.std()
batsman_sr_sd=batsman_sr.std()
bowler_average_sd=bowler_average.std()
bowler_economy_sd=bowler_economy.std()

In [10]:
#Function for 

def batsman_avg(batsman):
    mask=data['batsman']==batsman
    total_run=int(data[mask].groupby('batsman')['batsman_runs'].sum())
    mask1=(data['player_dismissed']==batsman) | ((data['non_striker']==batsman) & (data['player_dismissed']==data['non_striker']))
    total_times_dismissed=data[mask1].groupby('batsman')['player_dismissed'].count().max()
    return total_run/total_times_dismissed

In [11]:
def batsman_strike_rate(batsman):
    mask=data['batsman']==batsman
    total_run=int(data[mask].groupby('batsman')['batsman_runs'].sum())
    mask1=(data['batsman']==batsman) & (data['wide_runs']==0) & (data['noball_runs']==0)
    balls_faced=int(data[mask1].groupby('batsman')['ball'].count())
    return total_run/balls_faced*100

In [12]:
def bowler_avg(bowler):
    mask=data['bowler']==bowler
    runs=int(data[mask].groupby('bowler')['total_runs'].sum())
    dismissal=['caught','bowled','lbw','stumped','caught and bowled','hit wicket']
    mask1=(data['bowler']==bowler) & (data['dismissal_kind'].isin(dismissal))
    wickets=int(data[mask1].groupby('bowler')['dismissal_kind'].count())
    return runs/wickets

In [13]:
def bowler_economy_rate(bowler):
    mask=data['bowler']==bowler
    runs=int(data[mask].groupby('bowler')['total_runs'].sum())
    mask1=(data['bowler']==bowler) & (data['wide_runs']==0) & (data['noball_runs']==0)
    balls=int(data[mask1].groupby('bowler')['ball'].count())
    return runs*6/balls

In [14]:
def better_player():
    batsman=input("Enter the batsman's name : ")
    bowler=input("Enter the bowler's name : ")
    bat_avg=batsman_avg(batsman)
    bat_sr=batsman_strike_rate(batsman)
    bowl_avg=bowler_avg(bowler)
    bowl_eco=bowler_economy_rate(bowler)
    
    z_batsman_average=(float(bat_avg)-float(batsman_average_mean))/float(batsman_average_sd)
    z_batsman_sr=(float(bat_sr)-float(batsman_sr_mean))/float(batsman_sr_sd)
    
    z_bowler_average=(float(bowl_avg)-float(bowler_average_mean))/float(bowler_average_sd)
    z_bowler_economy=(float(bowl_eco)-float(bowler_economy_mean))/float(bowler_economy_sd)
    
    z_batsman=(float(z_batsman_average)+float(z_batsman_sr))/2
    z_bowler=abs((float(z_bowler_average)+float(z_bowler_economy))/2)
    
    print(batsman,":")
    print("Batting Average :",bat_avg,"     Strike Rate :",bat_sr)
    print(bowler,":")
    print("Bowling Average :",bowl_avg,"     Economy Rate:",bowl_eco)
    
    if z_batsman>z_bowler:
        print(batsman,"is a better T20 player than",bowler,".")
    elif z_bowler>z_batsman:
        print(bowler,"is a better T20 player than",batsman,".")
    else:
        print("Both players are of same standard, no comparisons could be made.")

In [21]:
better_player()

Enter the batsman's name : KM Jadhav
Enter the bowler's name : SP Narine
KM Jadhav :
Batting Average : 25.514285714285716      Strike Rate : 136.33587786259542
SP Narine :
Bowling Average : 21.94736842105263      Economy Rate: 6.478508544795443
SP Narine is a better T20 player than KM Jadhav .
