# Visualizations for IPL data 2008 - 2019

In [1]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# import data

In [3]:
# importing the crawled data which are in csv files
data_folder = 'data/'
matches = pd.read_csv(data_folder + 'match_result.csv')

In [4]:
matches.head()

Unnamed: 0,id,year,team1,team2,match_result,match_winner,toss_winner,action
0,1,2018,Mumbai Indians,Chennai Super Kings,Normal,Chennai Super Kings,Chennai Super Kings,field
1,2,2018,Kings XI Punjab,Delhi Capitals,Normal,Kings XI Punjab,Kings XI Punjab,field
2,3,2018,Kolkata Knight Riders,Royal Challengers Bangalore,Normal,Kolkata Knight Riders,Kolkata Knight Riders,field
3,4,2018,Sunrisers Hyderabad,Rajasthan Royals,Normal,Sunrisers Hyderabad,Sunrisers Hyderabad,field
4,5,2018,Chennai Super Kings,Kolkata Knight Riders,Normal,Chennai Super Kings,Chennai Super Kings,field


# Pre-processing data

In [5]:
# Replacing team names with their latest ones
# as some teams changed their name in between
def preprocess_team_names(df):
    '''method for preprocessing team names
    '''
    df.replace('Rising Pune Supergiants', 'Rising Pune Supergiant', inplace=True)
    df.replace('Delhi Daredevils', 'Delhi Capitals', inplace=True)

preprocess_team_names(matches)
print('All teams: ', matches['team1'].unique())
print('Total number of teams(2008-2019): ', len(matches['team1'].unique()))

All teams:  ['Mumbai Indians' 'Kings XI Punjab' 'Kolkata Knight Riders'
 'Sunrisers Hyderabad' 'Chennai Super Kings' 'Rajasthan Royals'
 'Royal Challengers Bangalore' 'Delhi Capitals' 'Rising Pune Supergiant'
 'Gujarat Lions' 'Deccan Chargers' 'Pune Warriors India']
Total number of teams(2008-2019):  12


# Toss analysis

In [6]:
team = 'Sunrisers Hyderabad'
# related matches
rltd_mtchs = matches[(matches['team1']==team) | (matches['team2']==team)]
# rltd_mtchs

In [8]:
# toss_decisions = matches[['toss_winner', 'toss_decision', 'id']].groupby(['toss_winner', 'toss_decision']).count()
toss_decisions = matches[['toss_winner', 'toss_decision', 'id']].\
                        groupby(['toss_winner', 'toss_decision']).agg({'id': 'count'})

toss_decisions.head(3)
toss_decisions = toss_decisions.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
toss_decisions

KeyError: "['toss_decision'] not in index"

In [None]:
# toss_decisions.plot.bar()
ax = toss_decisions.unstack().plot(kind='bar', rot=45, figsize=(15,7), fontsize=10)
# ax = toss_decisions.unstack().plot(kind='barh', rot=0, figsize=(5,10), legend=False, grid=True)
ax.legend(['Batted', 'Chased'])
ax.set_title('After winning the toss')
# ax.yaxis.set_visible(False)
# ax.set_yticklabels([])

# set individual bar lables using above list
for i in ax.patches:
    # get_x pulls left or right; get_height pushes up or down
    ax.text(i.get_x()-.03, i.get_height()+.5, \
            str(int(round((i.get_height()/100)*100)))+'%', fontsize=10,
                color='black')

In [None]:
# toss_decisions.unstack()

# Analyze ball by ball data

In [None]:
ball_data = pd.read_csv(data_folder + 'ball_by_ball_data.csv')
preprocess_team_names(ball_data)

In [None]:
short_names = {'Chennai Super Kings':'CSK', 'Delhi Capitals': 'DC', 'Kings XI Punjab' :'KXIP', 
               'Mumbai Indians':'MI', 'Rajasthan Royals':'RR', 'Royal Challengers Bangalore':'RCB', 
               'Sunrisers Hyderabad': 'SRH', 'Kolkata Knight Riders': 'KKR'}

In [None]:
ball_data.head()

In [None]:
ball_data.batting_team.unique()

In [None]:
# ball_data.groupby('batting_team')['batsman'].nunique()
# ball_data.groupby('bowling_team')['bowler'].nunique()

In [None]:
strikers = ball_data[['match_id', 'batting_team', 'batsman']].copy()
non_strikers = ball_data[['match_id', 'batting_team', 'non_striker']].copy()
non_strikers.rename(columns={'non_striker':'batsman'}, inplace=True)
all_batsman = pd.concat([strikers, non_strikers], ignore_index=True)
all_batsman.rename(columns={'batsman':'player', 'batting_team':'team'}, inplace=True)

In [None]:
all_bowlers = ball_data[['match_id', 'bowling_team', 'bowler']].copy()
all_bowlers.rename(columns={'bowler':'player', 'bowling_team':'team'}, inplace=True)

In [None]:
all_players = pd.concat([all_batsman, all_bowlers], ignore_index=True)

In [None]:
all_players.groupby('team')['player'].nunique()

In [None]:
d = {'match_id':'Total matches', 'player':'Number of players used'}
all_players = all_players.replace(short_names)
used_players = all_players.groupby('team').agg({'match_id':pd.Series.nunique, 'player':pd.Series.nunique}).rename(columns=d)

In [None]:
used_players = used_players[used_players['Total matches']>=100]
used_players

In [None]:
used_players.index.size, used_players['Total matches'].values

In [None]:
def shorten_team_names(df):
    '''method for shortening team names
    '''

In [None]:
# y = np.arange(used_players.index.size)
y = used_players.index
used_players.sort_values(by=['Number of players used'],inplace=True, ascending=False)

fig, axes = plt.subplots(ncols=2, sharey=True, figsize=(10,5))
barlist1 = axes[0].barh(y, used_players['Total matches'].values, align='center', color='skyblue', zorder=10)
axes[0].set_title('Matches played', loc='left', fontsize=15)
barlist2 = axes[1].barh(y, used_players['Number of players used'].values, align='center', color='royalblue', zorder=10)
axes[1].set_title('Players used', loc='left', fontsize=15)

# axes[0].invert_xaxis()
# axes[0].set(yticks=y, yticklabels=used_players.index)
# axes[0].yaxis.tick_right()
# axes[0].axis('off')
# axes[1].axis('off')
axes[0].set_frame_on(False)
axes[1].set_frame_on(False)
axes[0].tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off

axes[0].tick_params(axis=u'both', which=u'both',length=0)

axes[1].tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
axes[1].tick_params(axis=u'both', which=u'both',length=0)

for ax in axes.flat:
    ax.margins(0.09)
#     ax.grid(True)

for i, v in enumerate(used_players['Total matches'].values):
    axes[0].text(v + 3, i , str(v), color='black') # fontweight='bold'

for i, v in enumerate(used_players['Number of players used'].values):
    axes[1].text(v + 3, i , str(v), color='black') # fontweight='bold'
    
fig.tight_layout()
fig.subplots_adjust(wspace=0)
barlist1[6].set_color('indianred')
barlist2[6].set_color('indianred')
plt.show()

In [None]:
used_players