# IPL Data Visualization Demo Notebook

## Resolve Imports

In [12]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn import preprocessing

# Ploty Related Imports

import plotly.plotly as py
import plotly
import cufflinks as cf
cf.set_config_file(offline=True, world_readable=True, theme='pearl')

import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True) # Use Plotly offline mode without tokens

# Custom Imports
from utilities.metrics import *
from utilities.vis_funcs import *
from utilities.plots import *

## Check Data Files

In [13]:
data_path = './data'
print(os.listdir(data_path))

['player_batting_performance.csv', 'players_wiki.csv', '2018_ages.csv', 'leader_wiki2.csv', 'player_bowling_performance.csv', 'players_age_wiki.csv', 'team_performance.csv', 'toss_result.csv', 'deliveries.csv', 'leader_wiki1.csv', 'match.csv', 'match_result.csv', 'matches.csv']


## Head to Head Heatmap

In [14]:
heat_file = pd.read_csv(os.path.join(data_path, 'match.csv'))
team_name_rank = ['Royal Challengers Bangalore','Kings XI Punjab','Mumbai Indians','Kolkata Knight Riders',\
                 'Chennai Super Kings','Delhi Capitals','Rajasthan Royals','Sunrisers Hyderabad']
table = heat_file.loc[:,['team1','team2','winner']]
grp = table.groupby('team1')
team_name_sort =sort_name(team_name_rank)
result ,total_win = win_heatmap(grp,team_name_rank)

In [15]:
data = [go.Heatmap(z = result,x=team_name_sort,y=team_name_sort,colorscale='Reds')]
layout = go.Layout(
    title = dict(text = 'Head to Head',
                 font = dict(family='Times New Roman, monospace', size=20, color='#000000'),
                 x = 0,
                 xref = 'paper'),
    width = 900, height = 700,
    xaxis = dict(tickfont = dict(family = 'Old Standard TT, serif',
                 size = 12,
                 color = 'black'),
                 tickangle = 30),
    yaxis = dict(tickfont = dict(family = 'Old Standard TT, serif',
                 size = 12,
                 color = 'black'))
)
fig = go.Figure(data = data, layout = layout)
py.iplot(fig)


Consider using IPython.display.IFrame instead



## Toss winning stats

In [16]:
heat_file = pd.read_csv(os.path.join(data_path, 'match.csv'))
toss_decisions = heat_file[['toss_winner', 'toss_decision', 'id']].\
                        groupby(['toss_winner', 'toss_decision']).agg({'id': 'count'})

toss_decisions.head(3)
toss_decisions = toss_decisions.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
result ,total_win = win_heatmap(grp,team_name_rank)
table = heat_file.loc[:,['team1','team2','winner']]
grp = table.groupby('team1')
sb3 = pd.DataFrame(data = total_win,columns=['Win rate total'],index = team_name_rank)
sb3.sort_index(inplace=True)
sb3 = sb3.apply(lambda x: 100 * x)

In [17]:
index = [0,4,7,3,5,2,6,1]
a = list(sb3.index)
a = [a[i] for i in index]
a = sort_name(a)
b = list(sb3.values)
b = [b[i] for i in index]

trace1 = go.Scatter(
    x = a,
    y = b,
    name = 'Winning rate',
    marker = dict(color= 'rgba(219, 64, 82, 1.0)')
)
index = [0,7,-1,6,9,4,-2,2]
b = list(toss_decisions.id)[0::2]
b = [b[i] for i in index]
trace2 = go.Bar(
    x = a,
    y = b,
    name = 'Bat',
    width = [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3],
    marker = dict(
        color = 'rgba(219, 64, 82, 0.7)',
        line=dict(color='rgba(219, 64, 82, 1.0)',width=2)
    )
)

b = list(toss_decisions.id)[1::2]
b = [b[i] for i in index]

trace3 = go.Bar(
    x = a,
    y = b,
    name = 'Field',
    width = [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3],
    marker = dict(
        color = 'rgba(55, 128, 191, 0.7)',
        line = dict(color='rgba(55, 128, 191, 1.0)',width=2),
    )
)
#292942
data = [trace1, trace2, trace3]
layout = go.Layout(
    title=dict(text = 'Toss result',
               font = dict(family='Times New Roman, monospace', size=20, color='#000000'),
               x = 0,
               xref = 'paper'
               ),
    xaxis = dict(tickangle = 30,
                 tickfont = dict(family='Times New Roman, monospace', size=12, color='#000000')
                 ),
    yaxis = dict(title = 'Percent',range = [0,100],
                 titlefont = dict(family='Times New Roman, monospace', size=16, color='#000000'),
                 tickfont = dict(family='Times New Roman, monospace', size=12, color='#000000')),
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig,filename='bar-line')


Consider using IPython.display.IFrame instead



## Team Ranks over time

In [20]:
rank_file = pd.read_csv(os.path.join(data_path, 'team_performance.csv'))
result_rank = rank(rank_file)

In [21]:
data = []
for i in range(len(result_rank)):
    data.append(go.Scatter(
        x = list(range(2008,2020)),
        y = list(result_rank[i]),
        name = team_name_rank[i],
    ))


layout = go.Layout(
  annotations = [
    dict(
      x = 2007.5,
      y = 2,
      showarrow = False,
      text = "Top Four",
      xref = "paper",
      yref = "paper"
    ),
    dict(
      x = 2007.5,
      y = 6,
      showarrow = False,
      text = "trash",
      xref = "paper",
      yref = "paper"
    )
  ],
  showlegend = True,
  title = dict(text = "Ranking",x=0,xref = 'paper',
              font = dict(family='Times New Roman, monospace', size=20, color='#000000')),
  yaxis = dict(range=[10.5, 0]),
  xaxis = dict(showgrid=False)
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig,filename='rank')

## Batting and Bowling Ability Analysis for Teams and Players

### 10 Best and 10 Worst Bowlers

In [4]:
ball_data = load_data_as_df(os.path.join(data_path, 'deliveries.csv'))

#Get Bowling Stats Player-wise
eco = economy(ball_data.copy())
wick = wicket_taking(ball_data.copy())

# Keep only regular bowlers
eco_100 = eco[eco['Number of Balls Thrown']>1000]
wick_100 = wick[wick['Number of Balls Thrown']>1000]

# Merge Stats
out = pd.merge(wick_100, eco_100, how='inner', on=['bowler','Number of Balls Thrown'])

In [5]:
## Generate and format Plot

out_best = out.nsmallest(10, 'Economy')
out_worst = out.nlargest(10, 'Economy')

b_comp = pd.concat([out_best, out_worst])
layout = go.Layout(
    title=dict(
        text="10 Best and 10 Worst Bowlers",
        xref="paper",
        font = dict(size=20),
        x=0
    
    ),
  legend = dict(
    x = 1,
    y = 0
  ),
    xaxis=dict(
        title='Economy',
        titlefont=dict(
            size=20,
        ),
        showticklabels=True,
        tickfont=dict(
            size=14,
        ),
        exponentformat='e',
        showexponent='all'
    ),
    yaxis=dict(
        title='Wicket Taking Ability',
        titlefont=dict(
            family='Arial, sans-serif',
            size=20,
        ),
        showticklabels=True,
        tickfont=dict(
            family='Old Standard TT, serif',
            size=14,
            color='black'
        ),
        exponentformat='e',
        showexponent='all'
    )
)
b_comp.iplot(layout= layout, kind='bubble', x='Economy', y='Wicket Taking Ability', categories='bowler', size='Number of Wickets taken', text='bowler',
          xTitle='Bowler Economy', yTitle='Bowler Wicket Taking Ability', filename='cufflinks/simple-bubble-chart')

### 10 Best and 10 Worst Batsmen

In [6]:
# Calculate Metrics
hh = hard_hit(ball_data.copy())
fs = fast_scoring(ball_data.copy())

# Keep regular batsmen
hh_100 = hh[hh['Number of Balls Faced']>800]
fs_100 = fs[fs['Number of Balls Faced']>800]
out = pd.merge(hh_100, fs_100, how='inner', on=['batsman','Number of Balls Faced'])

In [7]:
# Generate and Format Plot

out_best = out.nsmallest(10, 'Fast Scoring')
out_worst = out.nlargest(10, 'Fast Scoring')

layout = go.Layout(
    title=dict(
        text="10 Best and 10 Worst Batsman",
        xref="paper",
        font = dict(size=20),
        x=0
    
    ),
  legend = dict(
    x = 1,
    y = 0
  ),
    xaxis=dict(
        title='Hard Hit',
        titlefont=dict(
            size=20,
        ),
        showticklabels=True,
        tickfont=dict(
            size=14,
        ),
        exponentformat='e',
        showexponent='all'
    ),
    yaxis=dict(
        title='Fast Scoring Ability',
        titlefont=dict(
            family='Arial, sans-serif',
            size=20,
        ),
        showticklabels=True,
        tickfont=dict(
            family='Old Standard TT, serif',
            size=14,
            color='black'
        ),
        exponentformat='e',
        showexponent='all'
    )
)

b_comp = pd.concat([out_best, out_worst])
b_comp.iplot(layout=layout, kind='bubble', x='Hard Hit', y='Fast Scoring', categories='batsman', size='runs', text='batsman',
          xTitle='Hard Hit', yTitle='Fast Scoring Ability', filename='cufflinks/simple-bubble-chart')

### Team Wise Analysis (2 Best and 2 Worst)

In [8]:
# Team-wise bowling stats
eco = economy(ball_data.copy(), bowler='bowling_team')
wick = wicket_taking(ball_data.copy(), bowler='bowling_team')
eco_100 = eco[eco['Number of Balls Thrown']>10000]
wick_100 = wick[wick['Number of Balls Thrown']>10000]
out_bowl = pd.merge(wick_100, eco_100, how='inner', on=['bowling_team','Number of Balls Thrown'])

# Team-wise batting stats
hh = hard_hit(ball_data.copy(), batter='batting_team')
fs = fast_scoring(ball_data.copy(), batter='batting_team')
hh_100 = hh[hh['Number of Balls Faced']>10000]
fs_100 = fs[fs['Number of Balls Faced']>10000]
out_bat = pd.merge(hh_100, fs_100, how='inner', on=['batting_team','Number of Balls Faced'])


out_bowl.drop(out_bowl.columns.difference(['bowling_team', 'Wicket Taking Ability', 'Economy']), 1, inplace=True)
out_bat.drop(out_bat.columns.difference(['batting_team', 'Fast Scoring','Hard Hit']),1,inplace=True)

out_bowl.rename(columns={'bowling_team':'team'}, inplace=True)
out_bat.rename(columns={'batting_team': 'team'}, inplace=True)

out_full = pd.merge(out_bat, out_bowl, how='inner', on=['team'])

# scale values to fit and outwards = better

o = out_full.copy()
min_max_scaler = preprocessing.StandardScaler( with_std=False)
o['Hard Hit'] = pd.DataFrame(min_max_scaler.fit_transform(o['Hard Hit'].values.reshape(-1,1)))
o['Fast Scoring'] = pd.DataFrame(min_max_scaler.fit_transform(o['Fast Scoring'].values.reshape(-1,1)))
o['Economy'] = pd.DataFrame(min_max_scaler.fit_transform(o['Economy'].values.reshape(-1,1)))
o['Wicket Taking Ability'] = pd.DataFrame(min_max_scaler.fit_transform(o['Wicket Taking Ability'].values.reshape(-1,1)))
o.replace('Delhi Daredevils', 'Delhi Capitals', inplace=True)

In [9]:
# Generate and Format Plots
data = []

count = 0
for i in [4, 2, 5, 1]:
    r = o.iloc[i,:]
    t = go.Scatterpolar(
      r = [r['Hard Hit']*1000, r['Fast Scoring']*100, r['Economy']*-10, r['Wicket Taking Ability']*-5, r['Hard Hit']*1000],
      theta = ['Hard Hit','Fast Scoring','Economy', 'Wicket Taking Ability', 'Hard Hit'],
      fill = 'toself',
      name = r['team'],
      subplot = "polar"+str(count+1) if i>0 else "polar"
    )
    count = count+1
    data.append(t)


layout = go.Layout(
    margin=go.layout.Margin(
        l=10,
        r=10,
        b=10,
        t=10,
        pad=0
    ),
  polar = dict(
      domain = dict(
        x = [0, 0.46],
        y = [0.56, 1]
      ),
    radialaxis = dict(
      visible = True,
      range = [-10, 7]
    )
  ),
  polar2 = dict(
     domain = dict(
        x = [0, 0.46],
        y = [0, 0.44]
      ),
    radialaxis = dict(
      visible = True,
      range = [-10, 7]
    )
  ),
  polar3 = dict(
      domain = dict(
        x = [0.54, 1],
        y = [0.56, 1]
      ),
    radialaxis = dict(
      visible = True,
      range = [-10, 7]
    )
  ),
  polar4 = dict(
      domain = dict(
        x = [0.54, 1],
        y = [0, 0.44]
      ),
    radialaxis = dict(
      visible = True,
      range = [-10, 7]
    )
  ),
  showlegend = True
    
)

fig = go.Figure(data=data, layout=layout)
t = plotly.offline.iplot(fig, filename="multiple-subplots")

In [10]:
# Generate and Format overlapped Plots
data = []

count = 0
for i in [4, 2, 5, 1]:
    r = o.iloc[i,:]
    t = go.Scatterpolar(
      r = [r['Hard Hit']*1000, r['Fast Scoring']*100, r['Economy']*-10, r['Wicket Taking Ability']*-5, r['Hard Hit']*1000],
      theta = ['Hard Hit','Fast Scoring','Economy', 'Wicket Taking Ability', 'Hard Hit'],
      fill = 'toself',
      name = r['team']
    )
    count = count+1
    data.append(t)


layout = go.Layout(

  polar = dict(
    radialaxis = dict(
      visible = True,
      range = [-10, 7]
    )
  ),
  showlegend = True
    
)

fig = go.Figure(data=data, layout=layout)
t = plotly.offline.iplot(fig, filename="multiple-subplots")