<a href="https://colab.research.google.com/github/anky19698/Cricket_Analytics/blob/main/Scouting_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Problem Statement:
## find out the best pinch hitters inside PP in the IPL so far (min of 20 ings)

In [34]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [35]:
def balls_per_dismissal(balls, dismissals):
    if dismissals > 0:
        return balls/dismissals
    else:
        return balls

def balls_per_boundary(balls, boundaries):
    if boundaries > 0:
        return balls/boundaries
    else:
        return balls

def get_phase(over):
  if over <= 5:
    return 'Powerplay'
  elif over > 5 and over < 15:
    return 'Middle'
  else:
    return 'Death'


In [36]:
def scouting_stats(data, phase):




  # data = data[data.venue == venue]
  data = data[data.phase == phase]
  # data = data[data.bowling_team == opposition]


  data['is_dot'] = data['batsman_runs'].apply(lambda x: 1 if x == 0 else 0)
  data['is_one'] = data['batsman_runs'].apply(lambda x: 1 if x == 1 else 0)
  data['is_two'] = data['batsman_runs'].apply(lambda x: 1 if x == 2 else 0)
  data['is_three'] = data['batsman_runs'].apply(lambda x: 1 if x == 3 else 0)
  data['is_four'] = data['batsman_runs'].apply(lambda x: 1 if x == 4 else 0)
  data['is_six'] = data['batsman_runs'].apply(lambda x: 1 if x == 6 else 0)


  dots = pd.DataFrame(data.groupby(['batsman'])['is_dot'].sum()).reset_index().rename(columns = {'is_dot':'dots'})
  ones = pd.DataFrame(data.groupby(['batsman'])['is_one'].sum()).reset_index().rename(columns = {'is_one':'ones'})
  twos = pd.DataFrame(data.groupby(['batsman'])['is_two'].sum()).reset_index().rename(columns = {'is_two':'twos'})
  threes = pd.DataFrame(data.groupby(['batsman'])['is_three'].sum()).reset_index().rename(columns = {'is_three':'threes'})

  runs = pd.DataFrame(data.groupby(['batsman'])['batsman_runs'].sum()).reset_index().rename(columns = {'batsman_runs': 'runs'})
  balls = pd.DataFrame(data.groupby(['batsman'])['match_id'].count()).reset_index().rename(columns = {'match_id': 'balls'})
  innings = pd.DataFrame(data.groupby(['batsman'])['match_id'].apply(lambda x: len(list(np.unique(x)))).reset_index().rename(columns = {'match_id': 'innings'}))
  dismissals = pd.DataFrame(data.groupby(['batsman'])['player_dismissed'].count()).reset_index().rename(columns = {'player_dismissed':'dismissals'})
  fours = data.groupby(['batsman'])['is_four'].sum().reset_index().rename(columns = {'is_four': 'fours'})
  sixes = data.groupby(['batsman'])['is_six'].sum().reset_index().rename(columns = {'is_six': 'sixes'})

  batting = pd.merge(innings, runs, on='batsman').merge(balls, on='batsman').merge(dismissals, on='batsman').merge(fours, on='batsman').merge(sixes, on='batsman').merge(dots, on='batsman').merge(ones, on='batsman').merge(twos, on='batsman').merge(threes, on='batsman')

  # batting['Strike Rate'] = batting.apply(lambda x: (100*x['runs']/x['balls']), axis=1)
  # batting['Runs Per Innings'] = batting.apply(lambda x: (x['runs']/x['innings']), axis=1)

  #StrikeRate
  batting['SR'] = batting.apply(lambda x: 100*(x['runs']/x['balls']), axis = 1)

  #runs per innings
  batting['RPI'] = batting.apply(lambda x: x['runs']/x['innings'], axis = 1)

  #balls per dismissals
  batting['BPD'] = batting.apply(lambda x: balls_per_dismissal(x['balls'], x['dismissals']), axis = 1)

  #balls per boundary
  batting['BPB'] = batting.apply(lambda x: balls_per_boundary(x['balls'], (x['fours'] + x['sixes'])), axis = 1)

  batting['dot_percentage'] = batting.apply(lambda x: x['dots']/x['balls'], axis = 1)

  return batting

In [37]:
matches = pd.read_csv('/content/matches.csv')

In [38]:
deliveries = pd.read_csv('/content/deliveries.csv')

In [39]:
df = deliveries.copy()

df = df.rename(columns = {'matchId':'match_id'})

df.head()

Unnamed: 0,match_id,inning,over_ball,over,ball,batting_team,bowling_team,batsman,non_striker,bowler,batsman_runs,extras,isWide,isNoBall,Byes,LegByes,Penalty,dismissal_kind,player_dismissed,fielder
0,1082591,1,0.1,0,1,Sunrisers Hyderabad,Royal Challengers Bangalore,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,,,
1,1082591,1,0.2,0,2,Sunrisers Hyderabad,Royal Challengers Bangalore,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,,,
2,1082591,1,0.3,0,3,Sunrisers Hyderabad,Royal Challengers Bangalore,DA Warner,S Dhawan,TS Mills,4,0,0,0,0,0,0,,,
3,1082591,1,0.4,0,4,Sunrisers Hyderabad,Royal Challengers Bangalore,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,,,
4,1082591,1,0.5,0,5,Sunrisers Hyderabad,Royal Challengers Bangalore,DA Warner,S Dhawan,TS Mills,0,2,2,0,0,0,0,,,


In [40]:
mdf = matches.copy()

mdf = mdf.rename(columns = {'matchId':'match_id'})



In [41]:
final = pd.merge(df, mdf, on='match_id', how='left')

In [42]:
final['phase'] = final['over'].apply(lambda x: get_phase(x))

In [43]:
final.groupby(['batsman'])['player_dismissed'].count()

batsman
A Ashish Reddy     15
A Badoni           18
A Chandila          1
A Chopra            5
A Choudhary         2
                 ... 
Yashpal Singh       4
Younis Khan         1
Yudhvir Singh       2
Yuvraj Singh      109
Z Khan             15
Name: player_dismissed, Length: 637, dtype: int64

In [44]:
final_df = scouting_stats(final, 'Powerplay')

final_df.head()

Unnamed: 0,batsman,innings,runs,balls,dismissals,fours,sixes,dots,ones,twos,threes,SR,RPI,BPD,BPB,dot_percentage
0,A Ashish Reddy,1,5,7,1,1,0,5,1,0,0,71.428571,5.0,7.0,7.0,0.714286
1,A Badoni,4,9,22,2,1,0,16,5,0,0,40.909091,2.25,11.0,22.0,0.727273
2,A Chopra,5,27,41,2,4,0,26,11,0,0,65.853659,5.4,20.5,10.25,0.634146
3,A Flintoff,1,14,15,0,1,1,11,1,0,1,93.333333,14.0,15.0,7.5,0.733333
4,A Manohar,1,12,12,1,2,0,6,4,0,0,100.0,12.0,12.0,6.0,0.5


In [45]:
wt_sr, wt_rpi, wt_bpd, wt_bpb  = 0.38, 0.25, 0.12, 0.46

In [46]:
df = final_df.copy()

## Applying Filters: Min 20 Innings

In [47]:
# Batsmen Who Played Atleast 20 Innings

df = df[df.innings >= 20 ]

## Topsis Calculation

In [48]:
#step1: square of all values
df['calc_SR'] = df['SR'].apply(lambda x: x*x)
df['calc_RPI'] = df['RPI'].apply(lambda x: x*x)
df['calc_BPD'] = df['BPD'].apply(lambda x: x*x)
df['calc_BPB'] = df['BPB'].apply(lambda x: x*x)

#step2: square root of all values
sq_sr, sq_rpi, sq_bpd, sq_bpb = np.sqrt(df[['calc_SR','calc_RPI', 'calc_BPD', 'calc_BPB']].sum(axis = 0))

df['calc_SR'] = df['calc_SR'].apply(lambda x: x/sq_sr)
df['calc_RPI'] = df['calc_RPI'].apply(lambda x: x/sq_rpi)
df['calc_BPD'] = df['calc_BPD'].apply(lambda x: x/sq_bpd)
df['calc_BPB'] = df['calc_BPB'].apply(lambda x: x/sq_bpb)

df['calc_SR'] = df['calc_SR'].apply(lambda x: x*wt_sr)
df['calc_RPI'] = df['calc_RPI'].apply(lambda x: x*wt_rpi)
df['calc_BPD'] = df['calc_BPD'].apply(lambda x: x*wt_bpd)
df['calc_BPB'] = df['calc_BPB'].apply(lambda x: x*wt_bpb)

best_sr, worst_sr = max(df['calc_SR']), min(df['calc_SR'])
best_rpi, worst_rpi = max(df['calc_RPI']), min(df['calc_RPI'])
best_bpd, worst_bpd = max(df['calc_BPD']), min(df['calc_BPD'])
best_bpb, worst_bpb = min(df['calc_BPB']), max(df['calc_BPB'])

In [49]:
df['dev_best_SR'] = df['calc_SR'].apply(lambda x: (x-best_sr)*(x-best_sr))
df['dev_best_RPI'] = df['calc_RPI'].apply(lambda x: (x-best_rpi)*(x-best_rpi))
df['dev_best_BPD'] = df['calc_BPD'].apply(lambda x: (x-best_bpd)*(x-best_bpd))
df['dev_best_BPB'] = df['calc_BPB'].apply(lambda x: (x-best_bpb)*(x-best_bpb))

df['dev_best_sqrt'] = df.apply(lambda x: x['dev_best_SR'] + x['dev_best_RPI'] + x['dev_best_BPD'] + x['dev_best_BPB'], axis = 1)

df['dev_worst_SR'] = df['calc_SR'].apply(lambda x: (x-worst_sr)*(x-worst_sr))
df['dev_worst_RPI'] = df['calc_RPI'].apply(lambda x: (x-worst_rpi)*(x-worst_rpi))
df['dev_worst_BPD'] = df['calc_BPD'].apply(lambda x: (x-worst_bpd)*(x-worst_bpd))
df['dev_worst_BPB'] = df['calc_BPB'].apply(lambda x: (x-worst_bpb)*(x-worst_bpb))

df['dev_worst_sqrt'] = df.apply(lambda x: x['dev_worst_SR'] + x['dev_worst_RPI'] + x['dev_worst_BPD'] + x['dev_worst_BPB'], axis = 1)


In [50]:
df['score'] = df.apply(lambda x: x['dev_worst_sqrt']/(x['dev_worst_sqrt'] + x['dev_best_sqrt']), axis = 1)

In [51]:
Best_Pinch_Hitters = pd.DataFrame(df.sort_values(['score'], ascending=False))

## Best Pinch Hitter in IPL Till Now (Min. 20 Innings)

In [52]:
Best_Pinch_Hitters[['batsman', 'runs', 'balls', 'innings', 'dismissals', 'fours', 'sixes', 'SR', 'RPI', 'BPB', 'score']].head(20)

Unnamed: 0,batsman,runs,balls,innings,dismissals,fours,sixes,SR,RPI,BPB,score
290,SP Narine,665,404,44,36,86,38,164.60396,15.113636,3.258065,0.960417
341,YBK Jaiswal,727,490,37,17,105,28,148.367347,19.648649,3.684211,0.881679
130,JM Bairstow,695,487,36,16,88,30,142.710472,19.305556,4.127119,0.80239
63,CA Lynn,779,553,40,17,99,37,140.867993,19.475,4.066176,0.773776
96,E Lewis,420,298,22,13,49,21,140.939597,19.090909,4.257143,0.761042
231,PP Shaw,1207,856,71,46,167,36,141.004673,17.0,4.216749,0.755375
342,YK Pathan,361,256,33,12,47,16,141.015625,10.939394,4.063492,0.751706
123,JC Buttler,1415,1033,76,34,182,61,136.979671,18.618421,4.251029,0.6944
326,V Sehwag,1593,1160,103,60,230,46,137.327586,15.466019,4.202899,0.683784
106,GJ Maxwell,433,315,44,18,59,16,137.460317,9.840909,4.2,0.678169
