#### Plotly
- Creates interactives visualisation in the form for HTML files
- Drawback : Can't work with a live data source
- Dash is used to create live data based dashboards 

In [30]:
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go

In [31]:
match = pd.read_csv('matches.csv')
delivery = pd.read_csv('deliveries.csv')
ipl = delivery.merge(match,left_on='match_id',right_on='id')
ipl.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,...,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,


In [32]:
# Problem: Draw a scatter plot between batsman avg. (X-axis) and batsman strike rate (Y-axis) of the top 50 batsman in IPL (all time)

In [33]:
top50 = ipl.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(50).index.tolist()
new_ipl = ipl[ipl['batsman'].isin(top50)]

In [34]:
# Calculating Strike Rate
runs = new_ipl.groupby('batsman')['batsman_runs'].sum()
balls = new_ipl.groupby('batsman')['batsman_runs'].count()
sr = (runs/balls)*100
sr = sr.reset_index()
sr

Unnamed: 0,batsman,batsman_runs
0,AB de Villiers,145.129059
1,AC Gilchrist,133.054662
2,AJ Finch,126.299213
3,AM Rahane,117.486549
4,AT Rayudu,123.014257
5,BB McCullum,126.318203
6,BJ Hodge,121.422376
7,CH Gayle,144.194313
8,DA Miller,137.709251
9,DA Warner,138.318401


In [35]:
# Calculating Avg.
# Avg. = (total no. of runs)/(no. dismissals)
out = ipl[ipl['player_dismissed'].isin(top50)]
nouts = out['player_dismissed'].value_counts()
avg=runs/nouts
avg = avg.reset_index()
avg.rename(columns={'index':'batsman',0:'avg'},inplace=True)
avg=avg.merge(sr,on='batsman')
avg

Unnamed: 0,batsman,avg,batsman_runs
0,AB de Villiers,38.307692,145.129059
1,AC Gilchrist,27.223684,133.054662
2,AJ Finch,27.186441,126.299213
3,AM Rahane,33.593407,117.486549
4,AT Rayudu,27.146067,123.014257
5,BB McCullum,28.112245,126.318203
6,BJ Hodge,33.333333,121.422376
7,CH Gayle,41.022472,144.194313
8,DA Miller,34.733333,137.709251
9,DA Warner,40.14,138.318401


In [36]:
# plot scatter plot
trace = go.Scatter(x=avg['avg'], y=avg['batsman_runs'],
                  mode='markers',text=avg['batsman'])
data=[trace]

layout=go.Layout(title='Batsman Avg vs Strike Rate',
                xaxis={'title':'Batsman Average'},
                yaxis={'title':'Striking Power'})

fig=go.Figure(data=data,layout=layout)

pyo.plot(fig)

'temp-plot.html'

In [37]:
# marker color
trace = go.Scatter(x=avg['avg'], y=avg['batsman_runs'],
                  mode='markers',text=avg['batsman'],
                   marker={'color':'#00a65A','size':16},
                  )
data=[trace]

layout=go.Layout(title='Batsman Avg vs Strike Rate',
                xaxis={'title':'Batsman Average'},
                yaxis={'title':'Striking Power'})

fig=go.Figure(data=data,layout=layout)

pyo.plot(fig,filename='my_file.html')

'my_file.html'

#### Line Chart
- Extension of scatter plot. Usually used to show a time series data

In [38]:
single = ipl[ipl['batsman']=='V Kohli']
performance = single.groupby('season')['batsman_runs'].sum().reset_index()
performance

Unnamed: 0,season,batsman_runs
0,2008,165
1,2009,246
2,2010,307
3,2011,557
4,2012,364
5,2013,639
6,2014,359
7,2015,505
8,2016,973
9,2017,308


In [39]:
# plot(fig) -> fig(data, layout)go.Figure -> layout(title,xaxis={},yaxis={})go.Layout -> data=[trace] -> trace(x,y,mode,marker={})go.Scatter

trace = go.Scatter(x=performance['season'],y=performance['batsman_runs'],
                  mode='lines',
                  marker={'color':'#00A65A'})

data = [trace]

layout = go.Layout(title='YoY performance',
                  xaxis={'title':'Season'},
                  yaxis={'title':'Total Runs'})

fig = go.Figure(data=data,layout=layout)

pyo.plot(fig,filename='line.html')

'line.html'

In [40]:
# plot(fig) -> fig(data, layout)go.Figure -> layout(title,xaxis={},yaxis={})go.Layout -> data=[trace] -> trace(x,y,mode,marker={})go.Scatter

trace = go.Scatter(x=performance['season'],y=performance['batsman_runs'],
                  mode='lines+markers',
                  marker={'color':'#00A65A'})

data = [trace]

layout = go.Layout(title='YoY performance',
                  xaxis={'title':'Season'},
                  yaxis={'title':'Total Runs'})

fig = go.Figure(data=data,layout=layout)

pyo.plot(fig,filename='line_dot.html')

'line_dot.html'

In [41]:
single1 = ipl[ipl['batsman']=='V Kohli']
performance1 = single1.groupby('season')['batsman_runs'].sum().reset_index()
single2 = ipl[ipl['batsman']=='RG Sharma']
performance2 = single2.groupby('season')['batsman_runs'].sum().reset_index()

In [42]:
# plot(fig) -> fig(data, layout)go.Figure -> layout(title,xaxis={},yaxis={})go.Layout -> data=[trace] -> trace(x,y,mode,marker={})go.Scatter

trace1 = go.Scatter(x=performance1['season'],y=performance1['batsman_runs'],
                  mode='lines+markers',
                  marker={'color':'#00A65A'},name='V Kohli')
trace2 = go.Scatter(x=performance2['season'],y=performance2['batsman_runs'],
                  mode='lines+markers',
                  marker={'color':'#FFA65A'},name='RG Sharma')

data = [trace1,trace2]

layout = go.Layout(title='YoY performance',
                  xaxis={'title':'Season'},
                  yaxis={'title':'Total Runs'})

fig = go.Figure(data=data,layout=layout)

pyo.plot(fig,filename='Rohit-Virat.html')

'Rohit-Virat.html'

In [43]:
# Multiple line charts

def batsman_comp(*name):
    data=[]
    for i in name:
        single=ipl[ipl['batsman']==i]
        performance = single.groupby('season')['batsman_runs'].sum().reset_index()

        trace=go.Scatter(x=performance['season'],y=performance['batsman_runs'],
                        mode='lines+markers',name=i)
        data.append(trace)
    layout=go.Layout(title='Batsman Record Comparator',
                    xaxis={'title':'Season'},
                    yaxis={'title':'Runs'})
    fig=go.Figure(data=data, layout=layout)
    pyo.plot(fig,filename='yoy.html')

In [44]:
batsman_comp('V Kohli','RG Sharma','DA Warner','MS Dhoni')

#### Bar Plot

In [45]:
top10 = ipl.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(10).index.tolist()
top10_df = ipl[ipl['batsman'].isin(top10)]

In [47]:
top10_score=top10_df.groupby('batsman')['batsman_runs'].sum().reset_index()
top10_score

Unnamed: 0,batsman,batsman_runs
0,AB de Villiers,3486
1,CH Gayle,3651
2,DA Warner,4014
3,G Gambhir,4132
4,MS Dhoni,3560
5,RG Sharma,4207
6,RV Uthappa,3778
7,S Dhawan,3561
8,SK Raina,4548
9,V Kohli,4423


In [51]:
# TDLF
trace=go.Bar(x=top10_score['batsman'],y=top10_score['batsman_runs'])
data=[trace]
layout=go.Layout(title='Top 10 Batters',
                xaxis={'title':'Batsman'},
                yaxis={'title':'Total Runs'})
fig=go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='bar10.html')

'bar10.html'

#### There are three types of Bar Graphs
1. Nested Bar Graph
2. Stacked Bar Graph
3. Overlayed Bar Graph

In [67]:
iw = top10_df.groupby(['batsman','inning'])['batsman_runs'].sum().reset_index()
mask = iw['inning']==1
mask2 = iw['inning']==2
one=iw[mask]
two=iw[mask2]

one.rename(columns={'batsman_runs':'1st innings'},inplace=True)
two.rename(columns={'batsman_runs':'2nd innings'},inplace=True)

final = one.merge(two,on='batsman')[['batsman','1st innings','2nd innings']]
final



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,batsman,1st innings,2nd innings
0,AB de Villiers,2128,1345
1,CH Gayle,2003,1623
2,DA Warner,2118,1896
3,G Gambhir,1699,2433
4,MS Dhoni,2232,1328
5,RG Sharma,2344,1863
6,RV Uthappa,1516,2262
7,S Dhawan,2262,1299
8,SK Raina,2647,1893
9,V Kohli,2391,2027


In [79]:
# plot(fig) -> fig(data, layout)go.Figure -> layout(title,xaxis={},yaxis={})go.Layout -> data=[trace] -> trace(x,y,mode,marker={})go.Scatter

trace1 = go.Bar(x=final['batsman'],y=final['1st innings'],
                  marker={'color':'#00A65A'},name='1st innings')
trace2 = go.Bar(x=final['batsman'],y=final['2nd innings'],
                  marker={'color':'#C1A052'},name='2nd innings')

data = [trace1,trace2]

layout = go.Layout(title='Innings-wise Score',
                  xaxis={'title':'Batsman'},
                  yaxis={'title':'Runs'},
                  barmode='overlay') # stack

fig = go.Figure(data=data,layout=layout)

pyo.plot(fig,filename='two_innings.html')

'two_innings.html'

In [81]:
# plot(fig) -> fig(data, layout)go.Figure -> layout(title,xaxis={},yaxis={})go.Layout -> data=[trace] -> trace(x,y,mode,marker={})go.Scatter

trace1 = go.Bar(x=final['batsman'],y=final['1st innings'],
                  marker={'color':'#00A65A'},name='1st innings')
trace2 = go.Bar(x=final['batsman'],y=final['2nd innings'],
                  marker={'color':'#C1A052'},name='2nd innings')

trace_list = [trace1,trace2]

layout = go.Layout(title='Innings-wise Score',
                  xaxis={'title':'Batsman'},
                  yaxis={'title':'Runs'},
                  )

fig = go.Figure(data=trace_list,layout=layout)

pyo.plot(fig,filename='two_innings2.html')

'two_innings2.html'

#### Bubble Plot

In [85]:
new_ipl.sample(3)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
122990,519,1,Chennai Super Kings,Delhi Daredevils,7,6,DR Smith,F du Plessis,Imran Tahir,0,...,normal,0,Chennai Super Kings,1,0,A Nehra,"MA Chidambaram Stadium, Chepauk",RK Illingworth,VA Kulkarni,
14973,64,2,Kolkata Knight Riders,Deccan Chargers,9,7,DJ Hussey,SC Ganguly,SB Bangar,0,...,normal,0,Kolkata Knight Riders,0,5,DJ Hussey,Eden Gardens,BF Bowden,K Hariharan,
118990,502,1,Delhi Daredevils,Kings XI Punjab,12,5,JP Duminy,KD Karthik,Shivam Sharma,0,...,normal,0,Kings XI Punjab,0,4,AR Patel,Feroz Shah Kotla,HDPK Dharmasena,PG Pathak,


In [87]:
new_ipl=new_ipl[new_ipl['batsman_runs']==6]

In [91]:
six=new_ipl.groupby('batsman')['batsman_runs'].count().reset_index()

In [92]:
six.head()

Unnamed: 0,batsman,batsman_runs
0,AB de Villiers,158
1,AC Gilchrist,92
2,AJ Finch,59
3,AM Rahane,60
4,AT Rayudu,79


In [93]:
avg.head()

Unnamed: 0,batsman,avg,batsman_runs
0,AB de Villiers,38.307692,145.129059
1,AC Gilchrist,27.223684,133.054662
2,AJ Finch,27.186441,126.299213
3,AM Rahane,33.593407,117.486549
4,AT Rayudu,27.146067,123.014257


In [96]:
x = avg.merge(six,on='batsman')
x.head()

Unnamed: 0,batsman,avg,batsman_runs_x,batsman_runs_y
0,AB de Villiers,38.307692,145.129059,158
1,AC Gilchrist,27.223684,133.054662,92
2,AJ Finch,27.186441,126.299213,59
3,AM Rahane,33.593407,117.486549,60
4,AT Rayudu,27.146067,123.014257,79


In [104]:
trace=go.Scatter(x=x['avg'],y=x['batsman_runs_x'],mode='markers',
                marker={'size':x['batsman_runs_y']})
data=[trace]
layout=go.Layout(title='Bubble Chart',
                xaxis={'title':'Avg'},
                yaxis={'title':'SR'})
fig=go.Figure(data=data, layout=layout)
pyo.plot(fig,filename='Bubble.html')

'Bubble.html'

#### Distplot
- Distribution plot

In [105]:
import plotly.figure_factory as ff

In [106]:
avg.head()

Unnamed: 0,batsman,avg,batsman_runs
0,AB de Villiers,38.307692,145.129059
1,AC Gilchrist,27.223684,133.054662
2,AJ Finch,27.186441,126.299213
3,AM Rahane,33.593407,117.486549
4,AT Rayudu,27.146067,123.014257


In [107]:
hist_data = [avg['avg']]
group_labels = ['Average']
fig = ff.create_distplot(hist_data,group_labels)
pyo.plot(fig,filename='dist_plot.html')

'dist_plot.html'

In [110]:
hist_data = [avg['avg'],avg['batsman_runs']]
group_labels = ['Average','Strike_Rate']
fig = ff.create_distplot(hist_data,group_labels,bin_size=[10,20])
pyo.plot(fig,filename='dist_plot_two.html')

'dist_plot_two.html'

#### Histogram

In [114]:
x = delivery.groupby('batsman')['batsman_runs'].count()>150
x = x[x].index.tolist()

new = delivery[delivery['batsman'].isin(x)]

runs = new.groupby('batsman')['batsman_runs'].sum()
balls = new.groupby('batsman')['batsman_runs'].count()

sr = (runs/balls)*100
sr = sr.reset_index()
sr

Unnamed: 0,batsman,batsman_runs
0,A Ashish Reddy,142.857143
1,A Mishra,89.756098
2,A Symonds,124.711908
3,AA Jhunjhunwala,99.541284
4,AB Agarkar,111.875000
...,...,...
169,Y Nagar,105.166052
170,Y Venugopal Rao,113.872832
171,YK Pathan,138.860326
172,YV Takawale,104.918033


In [126]:
trace = go.Histogram(x=sr['batsman_runs'],xbins={'size':2,'start':100,'end':120})
data=[trace]
layout=go.Layout(title='Strike Rate variations',
                xaxis={'title':'Strike Rate'})
fig=go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='histogram.html')

'histogram.html'

#### Heatmaps

In [127]:
six = delivery[delivery['batsman_runs']==6]
six = six.groupby(['batting_team','over'])['batsman_runs'].count().reset_index()
six

Unnamed: 0,batting_team,over,batsman_runs
0,Chennai Super Kings,1,9
1,Chennai Super Kings,2,21
2,Chennai Super Kings,3,49
3,Chennai Super Kings,4,45
4,Chennai Super Kings,5,53
...,...,...,...
290,Sunrisers Hyderabad,16,31
291,Sunrisers Hyderabad,17,25
292,Sunrisers Hyderabad,18,49
293,Sunrisers Hyderabad,19,58


In [128]:
trace = go.Heatmap(x=six['batting_team'],y=six['over'],z=six['batsman_runs'])
data = [trace]
layout = go.Layout(title='SIX Heatmap')
fig = go.Figure(data=data,layout=layout)
pyo.plot(fig,filename='Heatmap.html')

'Heatmap.html'

In [129]:
dots = delivery[delivery['batsman_runs']==0]
dots = dots.groupby(['batting_team','over'])['batsman_runs'].count().reset_index()
dots

Unnamed: 0,batting_team,over,batsman_runs
0,Chennai Super Kings,1,642
1,Chennai Super Kings,2,583
2,Chennai Super Kings,3,520
3,Chennai Super Kings,4,516
4,Chennai Super Kings,5,464
...,...,...,...
295,Sunrisers Hyderabad,16,202
296,Sunrisers Hyderabad,17,216
297,Sunrisers Hyderabad,18,171
298,Sunrisers Hyderabad,19,181


In [131]:
from plotly import tools

trace1 = go.Heatmap(x=six['batting_team'],y=six['over'],
                   z=six['batsman_runs'].values.tolist())
trace2 = go.Heatmap(x=dots['batting_team'],y=dots['over'],
                   z=dots['batsman_runs'].values.tolist())
fig=tools.make_subplots(rows=1,cols=2,subplot_titles=["6's","0's"],shared_yaxes=True)
fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,2)
pyo.plot(fig,filename='Heatmap-side-by-side.html')

'Heatmap-side-by-side.html'