In [229]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.offline as pyo

In [230]:
#creating dataset
matches = pd.read_csv('matches.csv')
deliveries = pd.read_csv('deliveries.csv')
ipl=deliveries.merge(matches,left_on='match_id',right_on='id')
#ipl.head()
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batsman', 'non_striker', 'bowler', 'is_super_over', 'wide_runs',
       'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'Season', 'city', 'date', 'team1',
       'team2', 'toss_winner', 'toss_decision', 'result', 'dl_applied',
       'winner', 'win_by_runs', 'win_by_wickets', 'player_of_match', 'venue',
       'umpire1', 'umpire2', 'umpire3'],
      dtype='object')

In [231]:
#Find first 50 run scorer
top50_batsman=ipl.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(50).index.to_list()

In [232]:
top50_batman_df= ipl[ipl['batsman'].isin(top50_batsman)]
top50_batman_df_grp=top50_batman_df.groupby('batsman')['batsman_runs'].agg(['sum','count'])
top50_batman_df_grp['sr']=top50_batman_df_grp['sum']/top50_batman_df_grp['count'] *100
top50_batman_df_grp.reset_index(inplace=True)
top50_batman_df_grp.head()

Unnamed: 0,batsman,sum,count,sr
0,AB de Villiers,4428,2977,148.740343
1,AC Gilchrist,2069,1555,133.054662
2,AD Russell,1445,803,179.950187
3,AJ Finch,1744,1372,127.113703
4,AM Rahane,3850,3215,119.751166


In [233]:
top50_outs=ipl[ipl['player_dismissed'].isin(top50_batsman)]['player_dismissed'].value_counts().reset_index()
top50_outs.columns=['batsman','outs']
top50_sr_avg=top50_batman_df_grp.merge(top50_outs,on='batsman')
top50_sr_avg['avg']=top50_sr_avg['sum']/top50_sr_avg['outs']
top50_sr_avg.head()




Unnamed: 0,batsman,sum,count,sr,outs,avg
0,AB de Villiers,4428,2977,148.740343,110,40.254545
1,AC Gilchrist,2069,1555,133.054662,76,27.223684
2,AD Russell,1445,803,179.950187,42,34.404762
3,AJ Finch,1744,1372,127.113703,67,26.029851
4,AM Rahane,3850,3215,119.751166,116,33.189655


In [234]:
# in plotly to create a graph we require figure object.
# this figure object takes 2 inputs trace (which is data) and layout(used to set other properties of layout)
# trace is a list of data which needs to be plotted on the graph
#scatter plot

trace1=go.Scatter(x=top50_sr_avg['avg'],y=top50_sr_avg['sr'],mode='markers')
data=[trace1]

layout=go.Layout(title='Average vs Strike Rate of top 50 Batsman',xaxis={'title':'Average of batsman'},yaxis={'title':'Strike rate'})
fig=go.Figure(data,layout)
pyo.plot(fig)


'temp-plot.html'

In [235]:
# to show batsman name on hover use text parameter, makrker parameter is used to change size & color of marker
# also we can specify the file name in plot function else it will create all file with temp-plot

trace1=go.Scatter(x=top50_sr_avg['avg'],y=top50_sr_avg['sr'],mode='markers',text=top50_sr_avg['batsman'],marker={'color':'#00a65a','size':16})
data=[trace1]

layout=go.Layout(title='Average vs Strike Rate of top 50 Batsman',xaxis={'title':'Average of batsman'},yaxis={'title':'Strike rate'})
fig=go.Figure(data,layout)
pyo.plot(fig,filename='scatterplot.html')


'scatterplot.html'

# Line Chart

In [237]:
vk=ipl[ipl['batsman']=='V Kohli']
vk_perf=vk.groupby('Season')['batsman_runs'].sum().reset_index()
vk_perf


Unnamed: 0,Season,batsman_runs
0,IPL-2008,165
1,IPL-2009,246
2,IPL-2010,307
3,IPL-2011,557
4,IPL-2012,364
5,IPL-2013,639
6,IPL-2014,359
7,IPL-2015,505
8,IPL-2016,973
9,IPL-2017,308


In [238]:
#line chart also we have to use scatter plot and if do not provide mode then by default it will create line chart or modes ='lines'
trace=go.Scatter(x=vk_perf['Season'],y=vk_perf['batsman_runs'])
data=[trace]

layout=go.Layout(title='Virat Kohli Runs over time', xaxis={'title':'Seasons'},yaxis={'title':'Runs'})

fig=go.Figure(data,layout)
pyo.plot(fig)

'temp-plot.html'

In [239]:
#mode=lines+markers to get both
trace=go.Scatter(x=vk_perf['Season'],y=vk_perf['batsman_runs'],mode='lines+markers')
data=[trace]

layout=go.Layout(title='Virat Kohli Runs over time', xaxis={'title':'Seasons'},yaxis={'title':'Runs'})

fig=go.Figure(data,layout)
pyo.plot(fig)

'temp-plot.html'

In [240]:
#lets plot multiple line plots, name parameter is added to provide legend to the chart
msd=ipl[ipl['batsman']=='MS Dhoni']
msd_perf=msd.groupby('Season')['batsman_runs'].sum().reset_index()
msd_perf


trace1=go.Scatter(x=vk_perf['Season'],y=vk_perf['batsman_runs'],mode='lines+markers',name='V Kohli')
trace2=go.Scatter(x=msd_perf['Season'],y=msd_perf['batsman_runs'],mode='lines+markers',name ='MS Dhoni')

data=[trace1,trace2]

layout=go.Layout(title='Virat Kohli Runs over time', xaxis={'title':'Seasons'},yaxis={'title':'Runs'})

fig=go.Figure(data,layout)
pyo.plot(fig)



'temp-plot.html'

In [241]:
def player_comp(*name):
    trace=[]
    for player in name:
        df=ipl[ipl['batsman']==player]
        df_grp= df.groupby('Season')['batsman_runs'].sum().reset_index()
        trace.append(go.Scatter(x=df_grp['Season'],y=df_grp['batsman_runs'],mode='lines+markers',name=player))
    layout=go.Layout(title='Players Comparison over time', xaxis={'title':'Seasons'},yaxis={'title':'Runs'})
    fig=go.Figure(trace,layout)
    pyo.plot(fig)


        

In [242]:
player_comp('V Kohli','MS Dhoni','RG Sharma','DA Warner','SK Raina')

# Bar Chart
* Used for 1 categorical and 1 numerical value specically count

In [244]:
top10_batsman=ipl.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(10).index.to_list()
¸= ipl[ipl['batsman'].isin(top10_batsman)]
top10_batman_df_grp=top10_batman_df.groupby('batsman')['batsman_runs'].sum().reset_index()

trace=go.Bar(x=top10_batman_df_grp['batsman'],y=top10_batman_df_grp['batsman_runs'])
data=[trace]
layout=go.Layout(title='Top 10 Bastman Stats',xaxis={'title':'Batsman'},yaxis={'title':'Total Runs Scored'})

fig=go.Figure(data,layout)
pyo.plot(fig)

SyntaxError: invalid character '¸' (U+00B8) (1224012957.py, line 2)

In [None]:
# we have three type of bar chart nested, stacked , overlayered

top10_batman_df_inn=top10_batman_df.groupby(['batsman','inning'])['batsman_runs'].sum().reset_index()
first_inn_mask=top10_batman_df_inn['inning']==1
second_inn_mask=top10_batman_df_inn['inning']==2

first_inn=top10_batman_df_inn[first_inn_mask]
second_inn=top10_batman_df_inn[second_inn_mask]

first_inn.rename(columns={'batsman_runs':'First Inning'},inplace=True)
second_inn.rename(columns={'batsman_runs':'Second Inning'},inplace=True)

inn_df=first_inn.merge(second_inn,on='batsman')
inn_df.head() 


In [None]:
#overlay means bar will come over the first bar of the player which may hide first bar if it has more value

trace1=go.Bar(x=inn_df['batsman'],y=inn_df['First Inning'])
trace2=go.Bar(x=inn_df['batsman'],y=inn_df['Second Inning'])

data=[trace1,trace2]

layout=go.Layout(title='Top 10 Bastman Stats',xaxis={'title':'Batsman'},yaxis={'title':'Total Runs Scored'},barmode='overlay')

fig=go.Figure(data,layout)
pyo.plot(fig)

In [None]:
#stack means bar will come above the first bar of the player

trace1=go.Bar(x=inn_df['batsman'],y=inn_df['First Inning'])
trace2=go.Bar(x=inn_df['batsman'],y=inn_df['Second Inning'])

data=[trace1,trace2]

layout=go.Layout(title='Top 10 Bastman Stats',xaxis={'title':'Batsman'},yaxis={'title':'Total Runs Scored'},barmode='stack')

fig=go.Figure(data,layout)
pyo.plot(fig)

In [None]:
# nested bydefalut it is created in which bar will be side by side

trace1=go.Bar(x=inn_df['batsman'],y=inn_df['First Inning'])
trace2=go.Bar(x=inn_df['batsman'],y=inn_df['Second Inning'])

data=[trace1,trace2]

layout=go.Layout(title='Top 10 Bastman Stats',xaxis={'title':'Batsman'},yaxis={'title':'Total Runs Scored'})

fig=go.Figure(data,layout)
pyo.plot(fig)

# Bubble Plot
* its a kind of scatter plot which can accomodate two more parameters one is size of bubble and other is color of bubble
* so for bubble plot we have three numerical variables (one for x,for y and for size of buble) and one categorical varaible for color of the buble

In [None]:
#here using color we can provide the fourth parameter if exist
six_df = top50_batman_df[top50_batman_df['batsman_runs']==6]
six_df_final=six_df.groupby('batsman')['batsman_runs'].count().reset_index()
six_df_final.rename(columns={'batsman_runs':'No of sixes'},inplace=True)

top50_sr_avg_six=top50_sr_avg.merge(six_df_final,on='batsman')

trace1=go.Scatter(x=top50_sr_avg_six['sr'],y=top50_sr_avg_six['avg'],mode='markers',marker={'size':top50_sr_avg_six['No of sixes']})
data=[trace1]

layout=go.Layout(title='Top 10 Bastman Stats',xaxis={'title':'Batsman'},yaxis={'title':'Total Runs Scored'})

fig=go.Figure(data,layout)
pyo.plot(fig)


# Boxplot
* done for numerical columns

In [257]:
match_agg=deliveries.groupby('match_id')['total_runs'].sum().reset_index()
season_wise=match_agg.merge(matches,left_on='match_id',right_on='id')[['match_id','total_runs','Season']]
season_wise

Unnamed: 0,match_id,total_runs,Season
0,1,379,IPL-2017
1,2,371,IPL-2017
2,3,367,IPL-2017
3,4,327,IPL-2017
4,5,299,IPL-2017
...,...,...,...
751,11347,280,IPL-2019
752,11412,276,IPL-2019
753,11413,341,IPL-2019
754,11414,317,IPL-2019


In [261]:
trace=go.Box(x=season_wise['total_runs'],name='All Seasons')
data=[trace]

layout=go.Layout(title='Season Wise Data',xaxis={'title':'Total Score'})

fig=go.Figure(data,layout)
pyo.plot(fig)


'temp-plot.html'

In [263]:
#to change color
trace=go.Box(x=season_wise['total_runs'],name='All Seasons',marker={'color':'#00a65a'})
data=[trace]

layout=go.Layout(title='Season Wise Data',xaxis={'title':'Total Score'})

fig=go.Figure(data,layout)
pyo.plot(fig)

'temp-plot.html'

In [287]:
# multiple boxplots
trace1=go.Box(x=season_wise[season_wise['Season']=='IPL-2017']['total_runs'],name='Season 2017',marker={'color':'#00a65a'})
trace2=go.Box(x=season_wise[season_wise['Season']=='IPL-2008']['total_runs'],name='Season 2008')

data=[trace1,trace2]

layout=go.Layout(title='Season Wise Data',xaxis={'title':'Total Score'})

fig=go.Figure(data,layout)
pyo.plot(fig)

'temp-plot.html'

In [285]:
season_wise[season_wise['Season']=='IPL-2017']

Unnamed: 0,match_id,total_runs,Season
0,1,379,IPL-2017
1,2,371,IPL-2017
2,3,367,IPL-2017
3,4,327,IPL-2017
4,5,299,IPL-2017
5,6,275,IPL-2017
6,7,358,IPL-2017
7,8,298,IPL-2017
8,9,313,IPL-2017
9,10,317,IPL-2017
