In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline 
import seaborn as sns

In [3]:
pd.set_option("display.max_columns", 50)

### Loading Kaggle IPL data

In [4]:
df = pd.read_csv("data/df_kaggleAllIpl_preprocessed.csv")
del df['Unnamed: 0']

#### Adding a is_wicket column

In [5]:
df["is_bowler_wicket"] = df["dismissal_kind"].isin(["caught","bowled","lbw","caught and bowled","stumped"]).values.astype(int)

In [6]:
df

Unnamed: 0,match_id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,wide_runs,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder,is_bowler_wicket
0,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0
1,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0
2,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,4,0,4,,,,0
3,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0
4,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,2,0,0,0,0,0,2,2,,,,0
5,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,6,S Dhawan,DA Warner,TS Mills,0,0,0,0,0,0,0,0,0,,,,0
6,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,7,S Dhawan,DA Warner,TS Mills,0,0,0,1,0,0,0,1,1,,,,0
7,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,2,1,S Dhawan,DA Warner,A Choudhary,0,0,0,0,0,0,1,0,1,,,,0
8,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,2,2,DA Warner,S Dhawan,A Choudhary,0,0,0,0,0,0,4,0,4,,,,0
9,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,2,3,DA Warner,S Dhawan,A Choudhary,0,0,0,0,1,0,0,1,1,,,,0


### Player statistics: Season and venue wise

In [99]:
### Number of runs scored
dfp = df.groupby(['season','venue','batting_team','batsman']).sum()["batsman_runs"].reset_index()
dfp = dfp.sort_values(['season', 'venue', 'batting_team','batsman_runs'], ascending=False)
dfp.rename(columns={"batsman":"player","batting_team":"team","batsman_runs":"runs_scored"}, inplace=True)

### Number of balls faced 
df1 = df.copy()
df1 = df1[df1['is_super_over']==0]
df1 = df1[df1['wide_runs']==0]
df1 = df1[df1['noball_runs']==0]
df1 = df1.groupby(['season','venue','batting_team','batsman']).count()["date"].reset_index()
df1.rename(columns={"date":"balls_faced","batting_team":"team","batsman":"player"}, inplace=True)
dfp = dfp.set_index(['season','venue','team','player']).join(df1.set_index(['season','venue','team','player'])).reset_index()

### Number of innings
df1 = df.copy()
df1 = df1.drop_duplicates(['match_id','batsman'])
df1 = df1.groupby(['season','venue','batting_team','batsman']).count()["date"].reset_index()
df1.rename(columns={"batting_team":"team","date":"num_innings","batsman":"player"}, inplace=True)
dfp = dfp.set_index(['season','venue','team','player']).join(df1.set_index(['season','venue','team','player'])).reset_index()

### Number of wickets
df1 = df.copy()
df1 = df1.groupby(['season','venue','bowling_team','bowler']).sum()["is_bowler_wicket"].reset_index()
df1.rename(columns={"is_bowler_wicket":"wickets","bowling_team":"team","bowler":"player"}, inplace=True)
df1 = df1.sort_values(['season','wickets'], ascending=False)
dfp = dfp.set_index(['season','venue','team','player']).join(df1.set_index(['season','venue','team','player'])).reset_index()

### Number of balls bowled 
df1 = df.copy()
df1 = df1[df1['is_super_over']==0]
df1 = df1[df1['wide_runs']==0]
df1 = df1[df1['noball_runs']==0]
df1 = df1.groupby(['season','venue','bowling_team','bowler']).count()["date"].reset_index()
df1.rename(columns={"date":"balls_bowled","bowling_team":"team","bowler":"player"}, inplace=True)
dfp = dfp.set_index(['season','venue','team','player']).join(df1.set_index(['season','venue','team','player'])).reset_index()

### Number of runs conceded
df1 = df.copy()
df1 = df1[df1['bye_runs']==0]
df1 = df1[df1['legbye_runs']==0]
df1 = df1.groupby(['season','venue','bowling_team','bowler']).sum()["total_runs"].reset_index()
df1.rename(columns={"total_runs":"runs_conceded","bowling_team":"team","bowler":"player"}, inplace=True)
dfp = dfp.set_index(['season','venue','team','player']).join(df1.set_index(['season','venue','team','player'])).reset_index()

### For players who did not bowl, wickets->0 balls_bowled->0 runs_conceded->0
dfp["wickets"].fillna(0, inplace=True)
dfp["balls_bowled"].fillna(0, inplace=True)
dfp["runs_conceded"].fillna(0, inplace=True)

### For players who did not bat, runs_scored->0, balls_faced->0
dfp["runs_scored"].fillna(0, inplace=True)
dfp["balls_faced"].fillna(0, inplace=True)

dfp

Unnamed: 0,season,venue,team,player,runs_scored,balls_faced,num_innings,wickets,balls_bowled,runs_conceded
0,2017,Wankhede Stadium,Sunrisers Hyderabad,DA Warner,49,34.0,1,0.0,0.0,0.0
1,2017,Wankhede Stadium,Sunrisers Hyderabad,S Dhawan,48,42.0,1,0.0,0.0,0.0
2,2017,Wankhede Stadium,Sunrisers Hyderabad,BCJ Cutting,20,10.0,1,0.0,12.0,18.0
3,2017,Wankhede Stadium,Sunrisers Hyderabad,DJ Hooda,9,9.0,1,1.0,12.0,18.0
4,2017,Wankhede Stadium,Sunrisers Hyderabad,NV Ojha,9,9.0,1,0.0,0.0,0.0
5,2017,Wankhede Stadium,Sunrisers Hyderabad,Yuvraj Singh,5,7.0,1,0.0,0.0,0.0
6,2017,Wankhede Stadium,Sunrisers Hyderabad,B Kumar,4,3.0,1,3.0,24.0,21.0
7,2017,Wankhede Stadium,Sunrisers Hyderabad,Rashid Khan,2,4.0,1,1.0,24.0,19.0
8,2017,Wankhede Stadium,Sunrisers Hyderabad,V Shankar,1,2.0,1,0.0,0.0,0.0
9,2017,Wankhede Stadium,Royal Challengers Bangalore,AB de Villiers,43,27.0,1,0.0,0.0,0.0


### Derived statistics
#### Batting: Average, strike-rate
#### Bowling:Average, strike-rate, economy rate

In [63]:
### Batting
dfp["bat_avg"] = dfp["runs_scored"]/dfp["num_innings"]
dfp["bat_sr"]  = dfp["runs_scored"]/dfp["balls_faced"]*100
dfp["bat_avgsr"]= dfp["bat_avg"]*dfp["bat_sr"]/100

### Bowling
dfp["bowl_avg"]= dfp["runs_conceded"]/dfp["wickets"]
dfp["bowl_sr"] = dfp["balls_bowled"]/dfp["wickets"]
dfp["bowl_econ"]=dfp["runs_conceded"]/dfp["balls_bowled"]*6
dfp

Unnamed: 0,season,venue,team,player,runs_scored,balls_faced,num_innings,wickets,balls_bowled,runs_conceded,bat_avg,bat_sr,bat_avgsr,bowl_avg,bowl_sr,bowl_econ,bowl_avgecon
0,2017,Wankhede Stadium,Sunrisers Hyderabad,DA Warner,49,34.0,1,0.0,0.0,0.0,49.000000,144.117647,70.617647,,,,
1,2017,Wankhede Stadium,Sunrisers Hyderabad,S Dhawan,48,42.0,1,0.0,0.0,0.0,48.000000,114.285714,54.857143,,,,
2,2017,Wankhede Stadium,Sunrisers Hyderabad,BCJ Cutting,20,10.0,1,0.0,12.0,18.0,20.000000,200.000000,40.000000,inf,inf,9.000000,0.000000
3,2017,Wankhede Stadium,Sunrisers Hyderabad,DJ Hooda,9,9.0,1,1.0,12.0,18.0,9.000000,100.000000,9.000000,18.000000,12.000000,9.000000,0.055556
4,2017,Wankhede Stadium,Sunrisers Hyderabad,NV Ojha,9,9.0,1,0.0,0.0,0.0,9.000000,100.000000,9.000000,,,,
5,2017,Wankhede Stadium,Sunrisers Hyderabad,Yuvraj Singh,5,7.0,1,0.0,0.0,0.0,5.000000,71.428571,3.571429,,,,
6,2017,Wankhede Stadium,Sunrisers Hyderabad,B Kumar,4,3.0,1,3.0,24.0,21.0,4.000000,133.333333,5.333333,7.000000,8.000000,5.250000,0.428571
7,2017,Wankhede Stadium,Sunrisers Hyderabad,Rashid Khan,2,4.0,1,1.0,24.0,19.0,2.000000,50.000000,1.000000,19.000000,24.000000,4.750000,0.052632
8,2017,Wankhede Stadium,Sunrisers Hyderabad,V Shankar,1,2.0,1,0.0,0.0,0.0,1.000000,50.000000,0.500000,,,,
9,2017,Wankhede Stadium,Royal Challengers Bangalore,AB de Villiers,43,27.0,1,0.0,0.0,0.0,43.000000,159.259259,68.481481,,,,


### Per season venue wise statistics: to make player statistics venue and season (=>pitch) neutral

In [64]:
### Mean bat_avg * bat_sr for each venue during different seasons
dfv = dfp.copy()
dfv = dfv[dfv["runs_scored"]>=20]
dfv = dfv.groupby(["season","venue"]).median()[["bat_avg","bat_sr","bat_avgsr"]].reset_index()

df1 = dfp.copy()
df1 = df1[np.isfinite(df1["bowl_avg"])]
df1 = df1[np.isfinite(df1["bowl_sr"])]
df1 = df1[np.isfinite(df1["bowl_econ"])]
df1 = df1[df1["balls_bowled"]>=12]
df1 = df1.groupby(["season","venue"]).median()[["bowl_avg","bowl_econ"]].reset_index()

dfv = dfv.set_index(["season","venue"]).join(df1.set_index(["season","venue"])).reset_index()
dfv.rename(columns={"bat_avg":"sv_bat_avg","bat_sr":"sv_bat_sr", "bat_avgsr":"sv_bat_avgsr", "bowl_avg":"sv_bowl_avg", "bowl_econ":"sv_bowl_econ","bowl_avgecon":"sv_bowl_avgecon"}, inplace=True)
dfv

Unnamed: 0,season,venue,sv_bat_avg,sv_bat_sr,sv_bat_avgsr,sv_bowl_avg,sv_bowl_econ
0,2008,Dr DY Patil Sports Academy,29.000000,137.777778,40.080357,21.000000,7.500000
1,2008,Eden Gardens,26.000000,121.127503,29.250919,21.166667,7.375000
2,2008,Feroz Shah Kotla,30.500000,160.622711,56.227508,19.500000,7.750000
3,2008,M Chinnaswamy Stadium,28.000000,116.666667,36.750000,26.000000,7.250000
4,2008,"MA Chidambaram Stadium, Chepauk",28.571429,155.161943,48.963068,24.000000,8.750000
5,2008,"Punjab Cricket Association Stadium, Mohali",28.166667,146.427177,50.812319,25.619048,8.568182
6,2008,"Rajiv Gandhi International Stadium, Uppal",31.000000,138.235294,48.151042,23.250000,8.632867
7,2008,Sawai Mansingh Stadium,26.500000,136.835749,40.404818,23.000000,7.708333
8,2008,Wankhede Stadium,28.000000,140.000000,37.785714,20.500000,8.450000
9,2009,Buffalo Park,32.000000,122.222222,32.975610,20.000000,6.750000


### Merge player and venue statistics

In [72]:
dfpv = dfp.set_index(["season","venue"]).join(dfv.set_index(["season","venue"])).reset_index()
dfpv = dfpv[["season","venue","team","player","bat_avg","bat_sr","bat_avgsr","sv_bat_avg","sv_bat_sr",
             "sv_bat_avgsr","wickets","balls_bowled","bowl_avg","bowl_sr","bowl_econ","sv_bowl_avg",
             "sv_bowl_econ"]]
dfpv

Unnamed: 0,season,venue,team,player,bat_avg,bat_sr,bat_avgsr,sv_bat_avg,sv_bat_sr,sv_bat_avgsr,wickets,balls_bowled,bowl_avg,bowl_sr,bowl_econ,sv_bowl_avg,sv_bowl_econ
0,2008,Dr DY Patil Sports Academy,Rajasthan Royals,SA Asnodkar,33.500000,119.642857,40.080357,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
1,2008,Dr DY Patil Sports Academy,Rajasthan Royals,SR Watson,30.000000,153.846154,46.153846,29.0,137.777778,40.080357,3.0,48.0,18.333333,16.000000,6.875000,21.0,7.500000
2,2008,Dr DY Patil Sports Academy,Rajasthan Royals,YK Pathan,28.500000,142.500000,40.612500,29.0,137.777778,40.080357,3.0,24.0,7.333333,8.000000,5.500000,21.0,7.500000
3,2008,Dr DY Patil Sports Academy,Rajasthan Royals,M Kaif,8.000000,100.000000,8.000000,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
4,2008,Dr DY Patil Sports Academy,Rajasthan Royals,SK Warne,8.000000,100.000000,8.000000,29.0,137.777778,40.080357,1.0,36.0,53.000000,36.000000,8.833333,21.0,7.500000
5,2008,Dr DY Patil Sports Academy,Rajasthan Royals,Sohail Tanvir,7.500000,115.384615,8.653846,29.0,137.777778,40.080357,1.0,43.0,53.000000,43.000000,7.395349,21.0,7.500000
6,2008,Dr DY Patil Sports Academy,Rajasthan Royals,Kamran Akmal,6.000000,85.714286,5.142857,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
7,2008,Dr DY Patil Sports Academy,Rajasthan Royals,GC Smith,5.000000,55.555556,2.777778,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
8,2008,Dr DY Patil Sports Academy,Rajasthan Royals,Niraj Patel,2.000000,18.181818,0.363636,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
9,2008,Dr DY Patil Sports Academy,Rajasthan Royals,M Rawat,1.000000,20.000000,0.200000,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000


### Batting metrics:
For each season
    1. WPA
    2. sum_over_venue ( bat_avg * bat_sr - (bat_avg_venueSeason * bat_sr_venueSeason) )
                                   or
       sum_over_venue ( (bat_avg - bat_avg_venueSeason) * (bat_sr - bat_sr_venueSeason) )
    3. (?) Total runs, average, strike rate

### Batting metric 2

In [11]:
dfbat = dfpv.copy()
dfbat["bat_venue"] = dfbat["bat_avgsr"] - dfbat["sv_bat_avgsr"]
dfbat = dfbat.groupby(["season","team","player"]).sum()[["bat_venue"]].reset_index()
#aa = dfbat[(dfbat["venue"]=="M Chinnaswamy Stadium") & (dfbat["season"]==2016)]
aa = dfbat[(dfbat["season"]==2017)]
aa.sort_values("bat_venue", ascending=False)

Unnamed: 0,season,team,player,bat_venue
1444,2017,Kolkata Knight Riders,CA Lynn,250.893603
1520,2017,Sunrisers Hyderabad,DA Warner,120.810152
1450,2017,Kolkata Knight Riders,MK Pandey,79.836636
1423,2017,Gujarat Lions,SK Raina,63.103959
1501,2017,Royal Challengers Bangalore,CH Gayle,59.777547
1493,2017,Rising Pune Supergiant,RA Tripathi,51.881516
1402,2017,Delhi Daredevils,SS Iyer,49.125592
1522,2017,Sunrisers Hyderabad,KS Williamson,42.954315
1464,2017,Mumbai Indians,JC Buttler,41.707159
1499,2017,Royal Challengers Bangalore,AB de Villiers,36.403145


### Bowling metrics:

    1. WPA
    2. sum_over_venue ( bowl_avg * bowl_econ - (bowl_avg_venueSeason * bowl_econ_venueSeason) )
                                   or
       sum_over_venue( (bowl_avg - bowl_avg_venueSeason) * (bowl_econ - bowl_econ_venueSeason) )       
    3. Wickets weighted by batsman's metric.
    4. (?) Total wickets, average, economy, strike rate

### Bowling metric 2

In [77]:
dfpv

Unnamed: 0,season,venue,team,player,bat_avg,bat_sr,bat_avgsr,sv_bat_avg,sv_bat_sr,sv_bat_avgsr,wickets,balls_bowled,bowl_avg,bowl_sr,bowl_econ,sv_bowl_avg,sv_bowl_econ
0,2008,Dr DY Patil Sports Academy,Rajasthan Royals,SA Asnodkar,33.500000,119.642857,40.080357,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
1,2008,Dr DY Patil Sports Academy,Rajasthan Royals,SR Watson,30.000000,153.846154,46.153846,29.0,137.777778,40.080357,3.0,48.0,18.333333,16.000000,6.875000,21.0,7.500000
2,2008,Dr DY Patil Sports Academy,Rajasthan Royals,YK Pathan,28.500000,142.500000,40.612500,29.0,137.777778,40.080357,3.0,24.0,7.333333,8.000000,5.500000,21.0,7.500000
3,2008,Dr DY Patil Sports Academy,Rajasthan Royals,M Kaif,8.000000,100.000000,8.000000,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
4,2008,Dr DY Patil Sports Academy,Rajasthan Royals,SK Warne,8.000000,100.000000,8.000000,29.0,137.777778,40.080357,1.0,36.0,53.000000,36.000000,8.833333,21.0,7.500000
5,2008,Dr DY Patil Sports Academy,Rajasthan Royals,Sohail Tanvir,7.500000,115.384615,8.653846,29.0,137.777778,40.080357,1.0,43.0,53.000000,43.000000,7.395349,21.0,7.500000
6,2008,Dr DY Patil Sports Academy,Rajasthan Royals,Kamran Akmal,6.000000,85.714286,5.142857,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
7,2008,Dr DY Patil Sports Academy,Rajasthan Royals,GC Smith,5.000000,55.555556,2.777778,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
8,2008,Dr DY Patil Sports Academy,Rajasthan Royals,Niraj Patel,2.000000,18.181818,0.363636,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000
9,2008,Dr DY Patil Sports Academy,Rajasthan Royals,M Rawat,1.000000,20.000000,0.200000,29.0,137.777778,40.080357,0.0,0.0,,,,21.0,7.500000


In [92]:
dfball = dfpv.copy()
#dfball = dfball[dfball["balls_bowled"]>=12]
dfball["bowl_venue"] = (dfball["bowl_avg"] - dfball["sv_bowl_avg"]) 

aa = dfball[dfball["season"]==2017]
aa = aa[(aa["player"]=="B Kumar") | (aa["player"]=="JD Unadkat")]
aa
#dfball = dfball.groupby(["season","team","player"]).sum()[["bowl_venue"]].reset_index()
#aa = dfball[(dfball["season"]==2017)]
#aa.sort_values("bowl_venue")

Unnamed: 0,season,venue,team,player,bat_avg,bat_sr,bat_avgsr,sv_bat_avg,sv_bat_sr,sv_bat_avgsr,wickets,balls_bowled,bowl_avg,bowl_sr,bowl_econ,sv_bowl_avg,sv_bowl_econ,bowl_venue
5884,2017,M Chinnaswamy Stadium,Rising Pune Supergiant,JD Unadkat,2.0,40.0,0.8,24.5,120.714286,30.8675,2.0,24.0,12.5,12.0,6.25,14.0,6.875,-1.5
6060,2017,"Rajiv Gandhi International Stadium, Uppal",Sunrisers Hyderabad,B Kumar,0.0,0.0,0.0,32.5,136.700337,44.915001,13.0,168.0,14.846154,12.923077,6.892857,22.0,7.7,-7.153846
6183,2017,Wankhede Stadium,Sunrisers Hyderabad,B Kumar,4.0,133.333333,5.333333,32.0,142.222222,45.511111,3.0,24.0,7.0,8.0,5.25,24.8,7.214674,-17.8


In [93]:
aa = dfp[dfp["season"]==2017]
aa = aa[(aa["player"]=="B Kumar") | (aa["player"]=="JD Unadkat")]
aa


Unnamed: 0,season,venue,team,player,runs_scored,balls_faced,num_innings,wickets,balls_bowled,runs_conceded,bat_avg,bat_sr,bat_avgsr,bowl_avg,bowl_sr,bowl_econ,bowl_avgecon
6,2017,Wankhede Stadium,Sunrisers Hyderabad,B Kumar,4,3.0,1,3.0,24.0,21.0,4.0,133.333333,5.333333,7.0,8.0,5.25,0.428571
129,2017,"Rajiv Gandhi International Stadium, Uppal",Sunrisers Hyderabad,B Kumar,0,1.0,1,13.0,168.0,193.0,0.0,0.0,0.0,14.846154,12.923077,6.892857,0.875648
341,2017,M Chinnaswamy Stadium,Rising Pune Supergiant,JD Unadkat,2,5.0,1,2.0,24.0,25.0,2.0,40.0,0.8,12.5,12.0,6.25,0.16


In [101]:
aa = df[df["season"]==2017]
aa = aa[aa["bowler"]=="JD Unadkat"]
len(aa)

279

In [100]:
dfp[dfp["player"]=="JD Unadkat"]

Unnamed: 0,season,venue,team,player,runs_scored,balls_faced,num_innings,wickets,balls_bowled,runs_conceded
341,2017,M Chinnaswamy Stadium,Rising Pune Supergiant,JD Unadkat,2,5.0,1,2.0,24.0,25.0
1844,2014,Sheikh Zayed Stadium,Delhi Daredevils,JD Unadkat,1,5.0,1,3.0,24.0,32.0
2037,2014,"Punjab Cricket Association Stadium, Mohali",Delhi Daredevils,JD Unadkat,0,1.0,1,1.0,12.0,3.0
2626,2013,"Rajiv Gandhi International Stadium, Uppal",Royal Challengers Bangalore,JD Unadkat,1,1.0,1,2.0,24.0,24.0
2907,2013,JSCA International Stadium Complex,Royal Challengers Bangalore,JD Unadkat,1,1.0,1,0.0,24.0,27.0


In [104]:
df1 = df.copy()
df1 = df1[df1['is_super_over']==0]
df1 = df1[df1['wide_runs']==0]
df1 = df1[df1['noball_runs']==0]
df1 = df1.groupby(['season','venue','bowling_team','bowler']).count()["date"].reset_index()
df1.rename(columns={"date":"balls_bowled","bowling_team":"team","bowler":"player"}, inplace=True)
df1

df1[(df1["season"]==2017) & (df1["player"]=="JD Unadkat")]

Unnamed: 0,season,venue,team,player,balls_bowled
4498,2017,Eden Gardens,Rising Pune Supergiant,JD Unadkat,24
4552,2017,Feroz Shah Kotla,Rising Pune Supergiant,JD Unadkat,24
4653,2017,M Chinnaswamy Stadium,Rising Pune Supergiant,JD Unadkat,24
4709,2017,Maharashtra Cricket Association Stadium,Rising Pune Supergiant,JD Unadkat,107
4797,2017,"Rajiv Gandhi International Stadium, Uppal",Rising Pune Supergiant,JD Unadkat,48
4899,2017,Wankhede Stadium,Rising Pune Supergiant,JD Unadkat,48


In [93]:
df['venue'].unique()

array(['Rajiv Gandhi International Stadium, Uppal',
       'Maharashtra Cricket Association Stadium',
       'Saurashtra Cricket Association Stadium', 'Holkar Cricket Stadium',
       'M Chinnaswamy Stadium', 'Wankhede Stadium', 'Eden Gardens',
       'Feroz Shah Kotla',
       'Punjab Cricket Association IS Bindra Stadium, Mohali',
       'Green Park', 'Punjab Cricket Association Stadium, Mohali',
       'Sawai Mansingh Stadium', 'MA Chidambaram Stadium, Chepauk',
       'Dr DY Patil Sports Academy', 'Newlands', "St George's Park",
       'Kingsmead', 'SuperSport Park', 'Buffalo Park',
       'New Wanderers Stadium', 'De Beers Diamond Oval', 'OUTsurance Oval',
       'Brabourne Stadium', 'Sardar Patel Stadium, Motera',
       'Barabati Stadium', 'Vidarbha Cricket Association Stadium, Jamtha',
       'Himachal Pradesh Cricket Association Stadium', 'Nehru Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'Subrata Roy Sahara Stadium',
       'Shaheed Veer Nar

In [100]:
dfp

Unnamed: 0,season,venue,team,player,runs_scored,balls_faced,num_innings,wickets,balls_bowled,runs_conceded,bat_avg,bat_sr,bowl_avg,bowl_sr
0,2017,Wankhede Stadium,Sunrisers Hyderabad,DA Warner,49,34.0,1,0.0,0.0,0.0,49.000000,144.117647,,
1,2017,Wankhede Stadium,Sunrisers Hyderabad,S Dhawan,48,42.0,1,0.0,0.0,0.0,48.000000,114.285714,,
2,2017,Wankhede Stadium,Sunrisers Hyderabad,BCJ Cutting,20,10.0,1,0.0,12.0,18.0,20.000000,200.000000,inf,inf
3,2017,Wankhede Stadium,Sunrisers Hyderabad,DJ Hooda,9,9.0,1,1.0,12.0,18.0,9.000000,100.000000,18.000000,12.000000
4,2017,Wankhede Stadium,Sunrisers Hyderabad,NV Ojha,9,9.0,1,0.0,0.0,0.0,9.000000,100.000000,,
5,2017,Wankhede Stadium,Sunrisers Hyderabad,Yuvraj Singh,5,7.0,1,0.0,0.0,0.0,5.000000,71.428571,,
6,2017,Wankhede Stadium,Sunrisers Hyderabad,B Kumar,4,3.0,1,3.0,24.0,21.0,4.000000,133.333333,7.000000,8.000000
7,2017,Wankhede Stadium,Sunrisers Hyderabad,Rashid Khan,2,4.0,1,1.0,24.0,19.0,2.000000,50.000000,19.000000,24.000000
8,2017,Wankhede Stadium,Sunrisers Hyderabad,V Shankar,1,2.0,1,0.0,0.0,0.0,1.000000,50.000000,,
9,2017,Wankhede Stadium,Royal Challengers Bangalore,AB de Villiers,43,27.0,1,0.0,0.0,0.0,43.000000,159.259259,,
