In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline 
import seaborn as sns

In [2]:
pd.set_option("display.max_columns", 50)

### Loading Kaggle IPL data

In [3]:
df = pd.read_csv("data/df_kaggleAllIpl_preprocessed.csv")
del df['Unnamed: 0']

#### Adding a is_wicket column

In [21]:
df["dismissal_kind"].unique()

array([nan, 'caught', 'bowled', 'run out', 'lbw', 'caught and bowled',
       'stumped', 'retired hurt', 'hit wicket', 'obstructing the field'], dtype=object)

In [31]:
df["is_bowler_wicket"] = df["dismissal_kind"].isin(["caught","bowled","lbw","caught and bowled","stumped"]).values.astype(int)

In [32]:
df

Unnamed: 0,match_id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,wide_runs,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder,is_bowler_wicket
0,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0
1,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0
2,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,4,0,4,,,,0
3,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,0,0,0,0,0,0,0,0,,,,0
4,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,2,0,0,0,0,0,2,2,,,,0
5,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,6,S Dhawan,DA Warner,TS Mills,0,0,0,0,0,0,0,0,0,,,,0
6,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,7,S Dhawan,DA Warner,TS Mills,0,0,0,1,0,0,0,1,1,,,,0
7,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,2,1,S Dhawan,DA Warner,A Choudhary,0,0,0,0,0,0,1,0,1,,,,0
8,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,2,2,DA Warner,S Dhawan,A Choudhary,0,0,0,0,0,0,4,0,4,,,,0
9,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,Sunrisers Hyderabad,Royal Challengers Bangalore,2,3,DA Warner,S Dhawan,A Choudhary,0,0,0,0,1,0,0,1,1,,,,0


### Total number of runs for each batsman in different seasons

In [20]:
dfbr = df.groupby(['season','batsman']).sum()["batsman_runs"].reset_index()
dfbr = dfbr.sort_values(['season','batsman_runs'], ascending=False)
dfbr

Unnamed: 0,season,batsman,batsman_runs
1422,2017,DA Warner,641
1431,2017,G Gambhir,498
1495,2017,S Dhawan,479
1505,2017,SPD Smith,472
1500,2017,SK Raina,442
1434,2017,HM Amla,420
1465,2017,MK Pandey,396
1448,2017,KA Pollard,395
1480,2017,PA Patel,395
1487,2017,RA Tripathi,391


### Total number of runs for each batsman in different seasons at different venues

In [37]:
dfvbr = df.groupby(['season','venue','batsman']).sum()["batsman_runs"].reset_index()
dfvbr = dfvbr.sort_values(['season', 'venue', 'batsman_runs'], ascending=False)
dfvbr

Unnamed: 0,season,venue,batsman,batsman_runs
6221,2017,Wankhede Stadium,PA Patel,200
6218,2017,Wankhede Stadium,N Rana,198
6204,2017,Wankhede Stadium,KA Pollard,176
6224,2017,Wankhede Stadium,RG Sharma,171
6200,2017,Wankhede Stadium,JC Buttler,146
6197,2017,Wankhede Stadium,HH Pandya,132
6179,2017,Wankhede Stadium,AM Rahane,94
6241,2017,Wankhede Stadium,WP Saha,93
6206,2017,Wankhede Stadium,KH Pandya,82
6214,2017,Wankhede Stadium,MK Pandey,81


### Total number of wickets for all bowlers for each season

In [36]:
dfbw = df.groupby(['season','bowler']).sum()["is_bowler_wicket"].reset_index()
dfbw.rename(columns={"is_bowler_wicket":"wickets"}, inplace=True)
dfbw = dfbw.sort_values(['season','wickets'], ascending=False)
dfbw

Unnamed: 0,season,bowler,wickets
1049,2017,B Kumar,26
1076,2017,JD Unadkat,24
1077,2017,JJ Bumrah,20
1089,2017,MJ McClenaghan,19
1073,2017,Imran Tahir,18
1061,2017,CR Woakes,17
1112,2017,Rashid Khan,17
1126,2017,Sandeep Sharma,17
1135,2017,UT Yadav,17
1102,2017,P Negi,16


### Average runs in different venues for each season

In [7]:
df1 = df.groupby(['venue','season','match_id','inning']).sum()["total_runs"].reset_index()


Unnamed: 0,venue,season,match_id,inning,total_runs
0,Barabati Stadium,2010,186,1,170
1,Barabati Stadium,2010,186,2,164
2,Barabati Stadium,2010,189,1,171
3,Barabati Stadium,2010,189,2,161
4,Barabati Stadium,2012,337,1,126
5,Barabati Stadium,2012,337,2,127
6,Barabati Stadium,2012,348,1,186
7,Barabati Stadium,2012,348,2,173
8,Barabati Stadium,2014,486,1,231
9,Barabati Stadium,2014,486,2,187
