# <b> <center>Plotly Graph Object (GO)</b></center>  
- Introduction to Plotly
1. Scatter Plots
2. Line Charts
3. Bar Plots
4. Bubble Plots
5. Box Plots
6. Histograms
7. Distplots
8. Heatmaps

<span style="font-size: 24px;"> <b> 📦 Importing the Libraries

In [201]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as pyo
import plotly.express as px

<span style="font-size: 24px;"> <b> 📩 Importing the IPL Dataset

In [202]:
match = pd.read_csv('matches.csv')

match.head(3)

Unnamed: 0,id,season,city,date,match_type,player_of_match,venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,2007/08,Bangalore,2008-04-18,League,BB McCullum,M Chinnaswamy Stadium,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335983,2007/08,Chandigarh,2008-04-19,League,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",Kings XI Punjab,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,runs,33.0,241.0,20.0,N,,MR Benson,SL Shastri
2,335984,2007/08,Delhi,2008-04-19,League,MF Maharoof,Feroz Shah Kotla,Delhi Daredevils,Rajasthan Royals,Rajasthan Royals,bat,Delhi Daredevils,wickets,9.0,130.0,20.0,N,,Aleem Dar,GA Pratapkumar


In [203]:
delivery = pd.read_csv('deliveries.csv')

delivery.head(3)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,fielder
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,legbyes,0,,,
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,wides,0,,,


<span style="font-size: 24px;"> <b> ➕ Merging both datasets

In [204]:
ipl = delivery.merge(match, left_on='match_id', right_on='id')

ipl.head(5)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen


## <b> <center> 1. Scatter Plots

<span style="font-size: 16px;"> <b> 🎯 ***Question:*** Plot the scatter plot between the batsman Avg(X axis) and batsman Strike Rate(Y axis) of the top 50 batsman in IPL (All time) </span> </b>

***Answer:***  Firstly we have to extract the batsman dataframe, then we have to calculate Strike Rate and the Avg, in the end plot on this. 

In [205]:
# 🏏 Step 1: Extract Batsman Dataframe
batsman_df = ipl[['match_id', 'inning', 'batting_team', 'batter', 'batsman_runs', 'is_wicket']]

batsman_df.head() 

# batsman_runs → runs scored on each ball by the batter
# is_wicket → helps to calculate dismissals for batting average


Unnamed: 0,match_id,inning,batting_team,batter,batsman_runs,is_wicket
0,335982,1,Kolkata Knight Riders,SC Ganguly,0,0
1,335982,1,Kolkata Knight Riders,BB McCullum,0,0
2,335982,1,Kolkata Knight Riders,BB McCullum,0,0
3,335982,1,Kolkata Knight Riders,BB McCullum,0,0
4,335982,1,Kolkata Knight Riders,BB McCullum,0,0


In [206]:
# 🏏 Step 2: Calculate Total Runs & Balls

batsman_summary = batsman_df.groupby('batter').agg(
    runs = ('batsman_runs', 'sum'),
    balls = ('batsman_runs', 'count'),
    outs = ('is_wicket', 'sum')
).reset_index()

batsman_summary.head()


# runs → total runs scored
# balls → total balls faced
# outs → number of times dismissed

Unnamed: 0,batter,runs,balls,outs
0,A Ashish Reddy,280,196,15
1,A Badoni,634,505,26
2,A Chandila,4,7,1
3,A Chopra,53,75,5
4,A Choudhary,25,20,2


In [207]:
# 🏏 Step 3: Calculate Strike Rate (SR) & Batting Average (Avg)

batsman_summary['strike_rate'] = (batsman_summary['runs']/ batsman_summary['balls']) * 100

batsman_summary['average'] = batsman_summary['runs']/ batsman_summary['outs']

batsman_summary.head()

Unnamed: 0,batter,runs,balls,outs,strike_rate,average
0,A Ashish Reddy,280,196,15,142.857143,18.666667
1,A Badoni,634,505,26,125.544554,24.384615
2,A Chandila,4,7,1,57.142857,4.0
3,A Chopra,53,75,5,70.666667,10.6
4,A Choudhary,25,20,2,125.0,12.5


In [208]:
# step 4: Top 50 batsman run wise

top50_batsman_summary = batsman_summary.sort_values(by ='runs', ascending = False).iloc[:50]

top50_batsman_summary.head()

Unnamed: 0,batter,runs,balls,outs,strike_rate,average
631,V Kohli,8014,6236,218,128.511867,36.761468
512,S Dhawan,6769,5483,194,123.454313,34.891753
477,RG Sharma,6630,5183,232,127.918194,28.577586
147,DA Warner,6567,4849,164,135.429986,40.042683
546,SK Raina,5536,4177,168,132.535312,32.952381


In [209]:
# 🏏 Step 5: Plotting

px.scatter(top50_batsman_summary, x = 'average', y = 'strike_rate', hover_data= 'batter')

In [210]:
# Scatter PLot using GO

trace1 = go.Scatter(x= top50_batsman_summary['average'],
                    y= top50_batsman_summary['strike_rate'],
                    mode= 'markers',
                    )

layout = go.Layout(title= 'Strike Rate Vs Average',
                   xaxis={'title' : 'Average'},
                   yaxis={'title': 'Strike Rate'})

fig = go.Figure(data = [trace1], layout = layout)

fig.show()

<span style="font-size: 16px;"> <b> 🎯 ***Question:*** How can we add batsman name when hovering over the datapoint? </span> </b>
***Answer:*** hovertext

In [211]:
trace1 = go.Scatter(x= top50_batsman_summary['average'],
                    y= top50_batsman_summary['strike_rate'],
                    mode= 'markers',
                    hovertext=top50_batsman_summary['batter'])

layout = go.Layout(title= 'Strike Rate Vs Average',
                   xaxis={'title' : 'Average'},
                   yaxis={'title': 'Strike Rate'})

fig = go.Figure(data = [trace1], layout = layout)

fig.show()

<span style="font-size: 18px;"> <b> 🎯 ***Question:*** How can we change the color of the data point? </span> </b>

In [212]:
trace1 = go.Scatter(x= top50_batsman_summary['average'],
                    y= top50_batsman_summary['strike_rate'],
                    mode= 'markers',
                    hovertext=top50_batsman_summary['batter'],
                    marker = {'color': top50_batsman_summary['runs'], 'showscale': True},
                    )

layout = go.Layout(title= 'Strike Rate Vs Average',
                   xaxis={'title' : 'Average'},
                   yaxis={'title': 'Strike Rate'})

fig = go.Figure(data = [trace1], layout = layout)

fig.show()

<span style="font-size: 18px;"> <b> 🔥 Enhance Your Scatter Plot to the max</span> </b>

<b>Popular Parameters in marker</b>

color → Single color or array of values for each point.

colorscale → Continuous scale (Viridis, Plasma, Cividis, Rainbow, etc.).

cmin / cmax → Normalize the color scale.

showscale → Whether to show the color bar.

size → Size of markers (can also be an array, e.g. proportional to runs).

opacity → Transparency.

symbol → Shape of points (circle, square, diamond, triangle-up, star, hexagon, etc.).

line → Border around markers (with width and color).

In [213]:
trace1 = go.Scatter(
    x = top50_batsman_summary['average'],
    y = top50_batsman_summary['strike_rate'],
    mode = 'markers',
    hovertext = top50_batsman_summary['batter'],
    marker = dict(
        size = top50_batsman_summary['runs'] / 350,  # bigger bubble = more runs
        color = top50_batsman_summary['runs'],       # color by runs
        colorscale = 'Plasma',                       # colormap
        showscale = True,                            # show colorbar
        opacity = 0.8,
        symbol = 'circle',
        line = dict(width=2, color='black')
    )
)

layout = go.Layout(
    title = 'Strike Rate vs Average (Top 50 Batsmen)',
    xaxis = {'title': 'Average'},
    yaxis = {'title': 'Strike Rate'}
)

fig = go.Figure(data=[trace1], layout=layout)
fig.show()


## <center> <b> 2. Line Chart

<span style="font-size: 18px;"> <b> 🎯 ***Question:*** Virat Kohli Year by Year Performance </span> </b>

In [214]:
single_batsman = ipl[ipl['batter'] == 'V Kohli']

single_batsman_year_by_year = single_batsman.groupby('season')['batsman_runs'].sum().reset_index()

single_batsman_year_by_year


Unnamed: 0,season,batsman_runs
0,2007/08,165
1,2009,246
2,2009/10,307
3,2011,557
4,2012,364
5,2013,639
6,2014,359
7,2015,505
8,2016,973
9,2017,308


In [215]:
data = go.Scatter(x = single_batsman_year_by_year['season'],
                  y= single_batsman_year_by_year['batsman_runs'],
                  mode ='markers+lines',
                  text =  single_batsman_year_by_year['batsman_runs'],
                  hovertemplate='Runs: %{y}<extra></extra>'
                  )

layout = go.Layout(title='Virat Kohli Runs per year',
                   xaxis= dict(title = 'Year'),
                   yaxis=dict(title = 'Runs'))

fig = go.Figure(data, layout)

fig.show()

# Explanation:
# hovertemplate='Runs: %{y}<extra></extra>'

# %{y} → shows only the y-axis value (runs in your case).
# <extra></extra> → removes the default trace info (like “trace 0”).

<span style="font-size: 20px;"> <b> Multiple Line Charts

In [216]:
single_batsman1 = ipl[ipl['batter'] == 'MS Dhoni']

single_batsman_year_by_year1 = single_batsman1.groupby('season')['batsman_runs'].sum().reset_index()

single_batsman_year_by_year1

Unnamed: 0,season,batsman_runs
0,2007/08,414
1,2009,332
2,2009/10,287
3,2011,392
4,2012,358
5,2013,461
6,2014,371
7,2015,372
8,2016,284
9,2017,290


In [217]:
trace1 = go.Scatter(x = single_batsman_year_by_year['season'],
                  y= single_batsman_year_by_year['batsman_runs'],
                  mode ='markers+lines',
                  text =  single_batsman_year_by_year['batsman_runs'],
                  hovertemplate='Runs: %{y}<extra></extra>'
                  )

trace2 = go.Scatter(x = single_batsman_year_by_year1['season'],
                  y= single_batsman_year_by_year1['batsman_runs'],
                  mode ='markers+lines',
                  text =  single_batsman_year_by_year1['batsman_runs'],
                  hovertemplate='Runs: %{y}<extra></extra>')

data = [trace1, trace2]

layout = go.Layout(title='Virat Kohli Runs per year',
                   xaxis= dict(title = 'Year'),
                   yaxis=dict(title = 'Runs'))

fig = go.Figure(data = data, layout = layout)

fig.show()

<span style="font-size: 16px;"> <b> ***Question:*** Which color represent which batsman it is difficult to say, so we are gonna use name parameter.

In [218]:
trace1 = go.Scatter(x = single_batsman_year_by_year['season'],
                  y= single_batsman_year_by_year['batsman_runs'],
                  mode ='markers+lines',
                  text =  single_batsman_year_by_year['batsman_runs'],
                  hovertemplate='Runs: %{y}<extra></extra>',
                  name = 'Virat Kholi'
                  )

trace2 = go.Scatter(x = single_batsman_year_by_year1['season'],
                  y= single_batsman_year_by_year1['batsman_runs'],
                  mode ='markers+lines',
                  text =  single_batsman_year_by_year1['batsman_runs'],
                  hovertemplate='Runs: %{y}<extra></extra>',
                  name = 'MS Dhoni')

data = [trace1, trace2]

layout = go.Layout(title='Virat Kohli Runs per year',
                   xaxis= dict(title = 'Year'),
                   yaxis=dict(title = 'Runs'))

fig = go.Figure(data = data, layout = layout)

fig.show()

<span style="font-size: 18px;"> <b> 🎯 ***Question:*** </b> Make a function, in which you pass the batsman name in tuple and display the line plot of all the batsman of the tuple. 

In [219]:
def batsman_line_chart_comparison(*name):
    ''' Function to Visualize the line plot of all the batsman present in the tuple'''
    data = []
    for i in name:
        
        single_batsman = ipl[ipl['batter'] == i]
        single_batsman_year_by_year = single_batsman.groupby('season')['batsman_runs'].sum().reset_index()
        
        trace = go.Scatter(x = single_batsman_year_by_year['season'],
                  y= single_batsman_year_by_year['batsman_runs'],
                  mode ='markers+lines',
                  text =  single_batsman_year_by_year['batsman_runs'],
                  hovertemplate='Runs: %{y}<extra></extra>',
                  name = i
        )
        
        data.append(trace)
        
    layout = go.Layout(title='Virat Kohli Runs per year',
                   xaxis= dict(title = 'Year'),
                   yaxis=dict(title = 'Runs'))

    fig = go.Figure(data = data, layout = layout)

    fig.show()
    

In [220]:
batsman_line_chart_comparison('V Kohli', 'MS Dhoni', 'DA Warner', 'SK Raina')

## <b> <center> 3. Bar Plot

In [221]:
top10_batsman_run_wise = ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).iloc[:10].reset_index()

top10_batsman_run_wise

Unnamed: 0,batter,batsman_runs
0,V Kohli,8014
1,S Dhawan,6769
2,RG Sharma,6630
3,DA Warner,6567
4,SK Raina,5536
5,MS Dhoni,5243
6,AB de Villiers,5181
7,CH Gayle,4997
8,RV Uthappa,4954
9,KD Karthik,4843


In [222]:
colors = px.colors.qualitative.Plotly

trace = go.Bar(x= top10_batsman_run_wise['batter'],
               y= top10_batsman_run_wise['batsman_runs'],
               marker = dict(color=colors[:len(top10_batsman_run_wise)])
               )

data = [trace]

layout = go.Layout(title='Top 10 Run Scorer',
                   xaxis= dict(title = 'Batsman Name'),
                   yaxis=dict(title = 'Runs'))

fig = go.Figure(data = data, layout = layout)

fig.show()

<span style="font-size: 20px;"> <b>There are 2 types of Bar Graphs</b>  
1. Nested Bar Graph
2. Stacked Bar Graph
3. Overlaid Bar Graph

<span style="font-size: 16px;"> <b> 🎯***Question:*** Plot the bar graph of top 10 runs scorer in 2 inning (Nested, Stacked, Overlaid Bar Graph).

In [223]:
# Calculating all batsman runs
batsman_run_wise = ipl.groupby(['batter', 'inning'])['batsman_runs'].sum().reset_index()

# Calculating first inning runs
inning_1st = batsman_run_wise[batsman_run_wise['inning'] == 1].copy()
inning_1st.rename(columns=dict(batsman_runs = '1st innings'), inplace = True)

# Calculating second inning runs
inning_2nd = batsman_run_wise[batsman_run_wise['inning'] == 2].copy()
inning_2nd.rename(columns=dict(batsman_runs = '2nd innings'), inplace = True)

# Merging the first and second dataframe
batsman_run_wise_and_inning_wise = inning_1st.merge(inning_2nd, on='batter')[['batter','1st innings','2nd innings']]

# Adding Total Runs column
batsman_run_wise_and_inning_wise['Total Runs'] = batsman_run_wise_and_inning_wise['1st innings'] + batsman_run_wise_and_inning_wise['2nd innings']

batsman_run_wise_and_inning_wise

Unnamed: 0,batter,1st innings,2nd innings,Total Runs
0,A Ashish Reddy,166,114,280
1,A Badoni,464,170,634
2,A Chopra,51,2,53
3,A Choudhary,15,10,25
4,A Flintoff,22,40,62
...,...,...,...,...
533,YV Takawale,62,130,192
534,Yashpal Singh,13,34,47
535,Yudhvir Singh,1,21,22
536,Yuvraj Singh,1510,1240,2750


In [224]:
# Extracting top 10 batsman
batsman_run_wise_and_inning_wise = batsman_run_wise_and_inning_wise.sort_values(by='Total Runs', ascending=False).iloc[:10]

batsman_run_wise_and_inning_wise

Unnamed: 0,batter,1st innings,2nd innings,Total Runs
508,V Kohli,4400,3604,8004
412,S Dhawan,3926,2843,6769
383,RG Sharma,3600,3028,6628
121,DA Warner,3280,3285,6565
437,SK Raina,3194,2334,5528
299,MS Dhoni,3065,2178,5243
23,AB de Villiers,3163,1999,5162
103,CH Gayle,2873,2092,4965
401,RV Uthappa,2120,2832,4952
221,KD Karthik,2743,2099,4842


<span style="font-size: 18px;"> <b>3.1 Overlay Bar Graph

In [225]:
trace1 = go.Bar(x=batsman_run_wise_and_inning_wise['batter'],
              y= batsman_run_wise_and_inning_wise['1st innings'],
              name = '1st innings',
              hovertemplate='1st innings Runs: %{y}<extra></extra>')

trace2 = go.Bar(x=batsman_run_wise_and_inning_wise['batter'],
              y= batsman_run_wise_and_inning_wise['2nd innings'],
              name = '2nd innings',
              hovertemplate='2nd innings Runs: %{y}<extra></extra>')

data = [trace1, trace2]

layout = go.Layout(title='Top 10 Run Scorer',
                   xaxis= dict(title = 'Batsman Name'),
                   yaxis=dict(title = 'Runs'),
                   barmode= 'overlay')

fig = go.Figure(data = data, layout = layout)

fig.show()

<span style="font-size: 18px;"> <b>3.2 Stacked Bar Graph

In [226]:
trace1 = go.Bar(x=batsman_run_wise_and_inning_wise['batter'],
              y= batsman_run_wise_and_inning_wise['1st innings'],
              name = '1st innings',
              text=batsman_run_wise_and_inning_wise['1st innings'],
              hovertemplate='1st innings Runs: %{y}<extra></extra>')

trace2 = go.Bar(x=batsman_run_wise_and_inning_wise['batter'],
              y= batsman_run_wise_and_inning_wise['2nd innings'],
              name= '2nd innings',
              text= batsman_run_wise_and_inning_wise['2nd innings'],
              textposition='inside', #inside and outside
              hovertemplate='2nd innings Runs: %{y}<extra></extra>')

data = [trace1, trace2]

layout = go.Layout(title='Top 10 Run Scorer',
                   xaxis= dict(title = 'Batsman Name'),
                   yaxis=dict(title = 'Runs'),
                   barmode= 'stack',
                   bargap=0.2)

fig = go.Figure(data = data, layout = layout)

fig.show()

<span style="font-size: 18px;"> <b>3.3 Grouped Bar Graph

In [227]:
fig.update_layout(
    title='Top 10 Run Scorer',
    xaxis=dict(title='Batsman Name', tickangle=-45),
    yaxis=dict(title='Runs'),
    barmode='group',    # 'stack' if you prefer stacked
    bargap=0.2,
    uniformtext_minsize=8,
    uniformtext_mode='hide'   # prevents overlapping labels
)

fig.show()

## <b> <center> 4. Bubble Plot </b></center>  
A bubble chart is a scatter plot in which a third dimension of the data is shown through the size of markers.

>Bubble is same as scatter plot, the only difference is bubble plot will always have size parameter value.

<span style="font-size: 18px;"> <b> 🎯***Question:*** Plot bubble plot on the Strike Rate Vs Average columns

In [228]:
top50_batsman_summary.head()

Unnamed: 0,batter,runs,balls,outs,strike_rate,average
631,V Kohli,8014,6236,218,128.511867,36.761468
512,S Dhawan,6769,5483,194,123.454313,34.891753
477,RG Sharma,6630,5183,232,127.918194,28.577586
147,DA Warner,6567,4849,164,135.429986,40.042683
546,SK Raina,5536,4177,168,132.535312,32.952381


In [229]:
trace1 = go.Scatter(x= top50_batsman_summary['average'],
                    y= top50_batsman_summary['strike_rate'],
                    mode = 'markers', 
                    marker = dict(
                        size = top50_batsman_summary['runs'] / 150,
                        color = top50_batsman_summary['runs'],
                        colorscale = 'Plasma', 
                        showscale = True,
                        opacity = 0.6,
                        line=dict(width=2, color='black')
                        ),
                    
                    text= top50_batsman_summary['batter'],
                    hovertemplate='%{text}<br>Avg: %{y}<br>SR: %{x}<br>Runs: %{marker.color}<extra></extra>',
                    )
                    

data = [trace1]

layout = go.Layout(
    title = 'Strike Rate vs Average (Top 50 Batsmen)',
    xaxis = {'title': 'Average'},
    yaxis = {'title': 'Strike Rate'},
    
)

fig = go.Figure(data = data, layout = layout)

fig.show()

 ## <b> <center> 5. Box Plot </b></center>  

<h3> 📦 Box Plot (a.k.a. Whisker Plot) </h3>

A **box plot** is a statistical visualization used to show the **distribution** of a dataset.
It highlights **central tendency**, **spread**, and **outliers**.

---

<h3> 📊 Key Components</h3>

1. **Median (Q2)**

   * The line inside the box.
   * Splits the data into two halves.

2. **Quartiles**

   * **Q1 (25th percentile):** lower edge of the box → 25% of data below.
   * **Q3 (75th percentile):** upper edge of the box → 75% of data below.
   * The **box height** = **IQR (Interquartile Range) = Q3 − Q1** → shows middle 50% of data.

3. **Whiskers**

   * Lines extending from the box.
   * Typically go to the **smallest and largest values within 1.5 × IQR** from Q1 and Q3.
   * Show the general spread of the data.

4. **Outliers**

   * Points beyond whiskers (i.e., below Q1 − 1.5×IQR or above Q3 + 1.5×IQR).
   * Represent unusually high/low values.

---

<h3> ✅ What Box Plots Are Good For</h3>

* Comparing **distributions** across multiple groups.
* Detecting **skewness** (median not centered in box).
* Spotting **outliers** quickly.
* Seeing **spread/variability** in data.

---

<h3> ⚡ Quick Example</h3>

If you plot batsmen runs per match:

* The box shows the **middle 50% of runs** scored.
* Median tells the “typical” score.
* Outliers highlight **exceptional high/low innings**.

---

👉 In short: **A box plot = 5-number summary (min, Q1, median, Q3, max) + outliers.**



>In box column should be numerical.

Extract total runs scored in a match

In [230]:
each_match_runs = ipl.groupby(['match_id'])['total_runs'].sum().reset_index()

season_wise = each_match_runs.merge(match, left_on='match_id', right_on='id')[['match_id', 'total_runs', 'season']]

season_wise

Unnamed: 0,match_id,total_runs,season
0,335982,304,2007/08
1,335983,447,2007/08
2,335984,261,2007/08
3,335985,331,2007/08
4,335986,222,2007/08
...,...,...,...
1090,1426307,429,2024
1091,1426309,323,2024
1092,1426310,346,2024
1093,1426311,314,2024


<span style="font-size: 18px;"> <b>Plot Box plot on the Total runs

In [231]:
trace = go.Box(x=season_wise['total_runs'],
                hovertext= season_wise['season'],
                hovertemplate='Runs: %{x} <br>Season:%{hovertext}<extra></extra>',
                marker= dict(color= ' green'), # box plot only takes single color
                name='',
)
                
data = [trace]

layout = go.Layout(title="Total Score Analysis",
                   xaxis=dict(title = 'Total Score'),
                   yaxis=dict(title = 'All season')
)

fig = go.Figure(data = data, layout = layout)

fig.show()

***Now create 2 Box Plots: first one 2007 season and second one for 2024 season***

In [232]:
trace1 = go.Box(x=season_wise[season_wise['season'] == '2007/08']['total_runs'],
                hovertext= season_wise['season'],
                hovertemplate='Runs: %{x} <br>Season:%{hovertext}<extra></extra>',
                marker= dict(color= ' green'), # box plot only takes single color
                name='Season: 2007',
)

trace2 = go.Box(x=season_wise[season_wise['season'] == '2024']['total_runs'],
                hovertext= season_wise['season'],
                hovertemplate='Runs: %{x} <br>Season:%{hovertext}<extra></extra>',
                marker= dict(color= 'red'), # box plot only takes single color
                name='Season: 2024',
)

              
data = [trace1, trace2]

layout = go.Layout(title="Total Score Analysis",
                   xaxis=dict(title = 'Total Score'),
                   yaxis=dict(title = 'All season')
)

fig = go.Figure(data = data, layout = layout)

fig.show()

Comparing first and last season with all season

In [233]:
trace1 = go.Box(x=season_wise[season_wise['season'] == '2007/08']['total_runs'],
                hovertext= season_wise['season'],
                hovertemplate='Runs: %{x} <br>Season:%{hovertext}<extra></extra>',
                marker= dict(color= ' green'), # box plot only takes single color
                name='Season: 2007',
)

trace2 = go.Box(x=season_wise[season_wise['season'] == '2024']['total_runs'],
                hovertext= season_wise['season'],
                hovertemplate='Runs: %{x} <br>Season:%{hovertext}<extra></extra>',
                marker= dict(color= 'red'), # box plot only takes single color
                name='Season: 2024',
)

trace3 = go.Box(x=season_wise['total_runs'],
                hovertext= season_wise['season'],
                hovertemplate='Runs: %{x} <br>Season:%{hovertext}<extra></extra>',
                marker= dict(color= 'blue'), # box plot only takes single color
                name='Season: All',
)

              
data = [trace1, trace2, trace3]

layout = go.Layout(title="Total Score Analysis",
                   xaxis=dict(title = 'Total Score'),
                   yaxis=dict(title = 'All season')
)

fig = go.Figure(data = data, layout = layout)

fig.show()

## <b> <center> 6. Distplots

<b>Distplot or distribution plot: We plot Dist plot on single continuous variable.  
Distplot is combination of 3 plots: Histogram, kde, rough plot (niche joo rahega)  

<b>In plot Rough plot, we just put the marker in the respective entry bin. The dense area means data is more in that bin.  
Rough also give the information about distribution between the bins which histogram do not give

In [234]:
# Distplot import 

import plotly.figure_factory as ff

In [235]:
top50_batsman_summary.head() # avg of top 50 batsman

Unnamed: 0,batter,runs,balls,outs,strike_rate,average
631,V Kohli,8014,6236,218,128.511867,36.761468
512,S Dhawan,6769,5483,194,123.454313,34.891753
477,RG Sharma,6630,5183,232,127.918194,28.577586
147,DA Warner,6567,4849,164,135.429986,40.042683
546,SK Raina,5536,4177,168,132.535312,32.952381


<span style="font-size: 18px;"> <b> Plotting the Distplot

In [236]:
hist_data= [top50_batsman_summary['average']]

group_labels = ['Average']

fig = ff.create_distplot(hist_data, group_labels)


fig.show()

<span style="font-size: 18px;"> <b> Improved Distplot

In [237]:
hist_data = [top50_batsman_summary['average']]
group_labels = ['Average']
colors = ['#636EFA']

fig = ff.create_distplot(
    hist_data,
    group_labels,
    bin_size=5,
    curve_type='kde',
    show_hist=True,
    show_rug=True,
    colors=colors
)

fig.update_layout(
    title="Distribution of Top 50 Batsmen Averages",
    xaxis_title="Average",
    yaxis_title="Density",
    template='plotly_white'
)

fig.show()


<span style="font-size: 18px;"> <b>Multiple data comparison

In [238]:
hist_data= [top50_batsman_summary['average'], top50_batsman_summary['strike_rate']]

group_labels = ['Average', 'Strike_rate']

fig = ff.create_distplot(hist_data, group_labels, bin_size=[5,10])


fig.show()

## <b> <center>7. Histograms

A histogram is a graphical representation of the distribution of numerical data, where the data is grouped into continuous ranges called "bins".

In [239]:
batsman_summary.head()


Unnamed: 0,batter,runs,balls,outs,strike_rate,average
0,A Ashish Reddy,280,196,15,142.857143,18.666667
1,A Badoni,634,505,26,125.544554,24.384615
2,A Chandila,4,7,1,57.142857,4.0
3,A Chopra,53,75,5,70.666667,10.6
4,A Choudhary,25,20,2,125.0,12.5


<span style="font-size: 18px;"> <b>Plotting the Strike rate of the batsman using Histogram 

In [240]:
trace = go.Histogram(x=batsman_summary['strike_rate'])

data = [trace]

layout = go.Layout(title="Strike Rate Variations",
                   xaxis= dict(title = 'Strike Rate'),
                   yaxis= dict(title = 'Count')) 

fig= go.Figure(data = data, layout = layout)

fig.show()

<span style="font-size: 18px;"> <b>Improving the Histogram using start, end and size

In [241]:
# Filter negatives (just in case)
data = batsman_summary[batsman_summary['strike_rate'] >= 0]['strike_rate']

trace = go.Histogram(x=batsman_summary['strike_rate'], 
                     xbins=dict(start=0, end=data.max()+5, size=10) )


layout = go.Layout(title="Strike Rate Variations",
                   xaxis= dict(title = 'Strike Rate'),
                   yaxis= dict(title = 'Count')) 

fig= go.Figure(data = [trace], layout = layout)

fig.show()

In [242]:
# start with 50, end with 250
data = batsman_summary[batsman_summary['strike_rate'] >= 0]['strike_rate']

trace = go.Histogram(x=batsman_summary['strike_rate'], 
                     xbins=dict(start=0, end=250, size=10) )


layout = go.Layout(title="Strike Rate Variations",
                   xaxis= dict(title = 'Strike Rate'),
                   yaxis= dict(title = 'Count')) 

fig= go.Figure(data = [trace], layout = layout)

fig.show()

## <b> <center>8. Heat Map

<span style="font-size: 18px;"> <b>Plotting Heat map on the sixes per over by teams

In [243]:
six = delivery[delivery['batsman_runs'] == 6]

six = six.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()

six.head()

Unnamed: 0,batting_team,over,batsman_runs
0,Chennai Super Kings,0,9
1,Chennai Super Kings,1,36
2,Chennai Super Kings,2,67
3,Chennai Super Kings,3,71
4,Chennai Super Kings,4,75


In [244]:
six.batting_team.unique()

array(['Chennai Super Kings', 'Deccan Chargers', 'Delhi Capitals',
       'Delhi Daredevils', 'Gujarat Lions', 'Gujarat Titans',
       'Kings XI Punjab', 'Kochi Tuskers Kerala', 'Kolkata Knight Riders',
       'Lucknow Super Giants', 'Mumbai Indians', 'Pune Warriors',
       'Punjab Kings', 'Rajasthan Royals', 'Rising Pune Supergiant',
       'Rising Pune Supergiants', 'Royal Challengers Bangalore',
       'Royal Challengers Bengaluru', 'Sunrisers Hyderabad'], dtype=object)

In [245]:
# Full IPL team name normalization dictionary
team_mapping = {
    # Current standardized names
    'Chennai Super Kings': 'Chennai Super Kings',
    'Mumbai Indians': 'Mumbai Indians',
    'Royal Challengers Bangalore': 'Royal Challengers Bangalore',
    'Royal Challengers Bengaluru': 'Royal Challengers Bangalore',
    'Kolkata Knight Riders': 'Kolkata Knight Riders',
    'Sunrisers Hyderabad': 'Sunrisers Hyderabad',
    'Delhi Capitals': 'Delhi Capitals',
    'Delhi Daredevils': 'Delhi Capitals',
    'Punjab Kings': 'Punjab Kings',
    'Kings XI Punjab': 'Punjab Kings',
    'Rajasthan Royals': 'Rajasthan Royals',
    'Lucknow Super Giants': 'Lucknow Super Giants',

    # Teams that existed for a few seasons
    'Deccan Chargers': 'Sunrisers Hyderabad',  # Deccan Chargers became Sunrisers
    'Gujarat Lions': 'Gujarat Titans',  # temporary team, map to Gujarat Titans
    'Gujarat Titans': 'Gujarat Titans',
    'Pune Warriors': 'Rising Pune Supergiant',  # both Pune teams
    'Rising Pune Supergiant': 'Rising Pune Supergiant',
    'Rising Pune Supergiants': 'Rising Pune Supergiant',
    'Kochi Tuskers Kerala': 'Kochi Tuskers Kerala',  # only existed 2011, keep original
}

# Apply mapping
six['batting_team'] = six['batting_team'].replace(team_mapping)

# Verify normalization
print(six['batting_team'].unique())


['Chennai Super Kings' 'Sunrisers Hyderabad' 'Delhi Capitals'
 'Gujarat Titans' 'Punjab Kings' 'Kochi Tuskers Kerala'
 'Kolkata Knight Riders' 'Lucknow Super Giants' 'Mumbai Indians'
 'Rising Pune Supergiant' 'Rajasthan Royals' 'Royal Challengers Bangalore']


In [192]:
trace = go.Heatmap(x=six['batting_team'], y=six['over'], z=six['batsman_runs'])

layout = go.Layout(title='Six Heatmaps', yaxis=dict(title='Over'))

fig = go.Figure(data = [trace], layout = layout)

fig.show()

<B>Improving the Heatmap

In [273]:
trace = go.Heatmap(
    x=six['batting_team'],
    y=six['over'],
    z=six['batsman_runs'],
    text=six['batsman_runs'],      # This adds numbers on cells
    texttemplate="%{text}",
    colorscale='RdYlBu',              # 🔥 Better visual gradient (yellow→red)
    hovertemplate=(
        '<b>Team:</b> %{x}<br>'
        '<b>Over:</b> %{y}<br>'
        '<b>Sixes:</b> %{z}<extra></extra>'
    ),
    colorbar=dict(
        title=dict(
            text='Number of Sixes',   
        ),
        tickmode='linear',
        ticks='outside'
    )
)

layout = go.Layout(
    title=dict(
        text='🏏 Six Distribution Heatmap by Team and Over',
        x=0.5,  # Center the title
        font=dict(size=22, color='#2c3e50')
    ),
    xaxis=dict(
        title='Batting Team',
        tickangle=45,
        tickfont=dict(size=12),
        showgrid=False
    ),
    yaxis=dict(
        title='Over',
        tickfont=dict(size=12),
        dtick = 1
    ),
    plot_bgcolor='white',
    margin=dict(l=80, r=40, t=80, b=80)
)

fig = go.Figure(data=[trace], layout=layout)
fig.update_layout(
    template='plotly_white',           # Clean professional style
    width=1400,
    height=900
)
fig.show()


<span style="font-size: 18px;"> <b>Plotting 2 heatmap side by side  
First: six count  
Second: dot count

In [278]:
dots = delivery[delivery['batsman_runs'] == 0]
dots = dots.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()

dots.head()

Unnamed: 0,batting_team,over,batsman_runs
0,Chennai Super Kings,0,927
1,Chennai Super Kings,1,846
2,Chennai Super Kings,2,741
3,Chennai Super Kings,3,697
4,Chennai Super Kings,4,666


In [279]:
# Full IPL team name normalization dictionary
team_mapping = {
    # Current standardized names
    'Chennai Super Kings': 'Chennai Super Kings',
    'Mumbai Indians': 'Mumbai Indians',
    'Royal Challengers Bangalore': 'Royal Challengers Bangalore',
    'Royal Challengers Bengaluru': 'Royal Challengers Bangalore',
    'Kolkata Knight Riders': 'Kolkata Knight Riders',
    'Sunrisers Hyderabad': 'Sunrisers Hyderabad',
    'Delhi Capitals': 'Delhi Capitals',
    'Delhi Daredevils': 'Delhi Capitals',
    'Punjab Kings': 'Punjab Kings',
    'Kings XI Punjab': 'Punjab Kings',
    'Rajasthan Royals': 'Rajasthan Royals',
    'Lucknow Super Giants': 'Lucknow Super Giants',

    # Teams that existed for a few seasons
    'Deccan Chargers': 'Sunrisers Hyderabad',  # Deccan Chargers became Sunrisers
    'Gujarat Lions': 'Gujarat Titans',  # temporary team, map to Gujarat Titans
    'Gujarat Titans': 'Gujarat Titans',
    'Pune Warriors': 'Rising Pune Supergiant',  # both Pune teams
    'Rising Pune Supergiant': 'Rising Pune Supergiant',
    'Rising Pune Supergiants': 'Rising Pune Supergiant',
    'Kochi Tuskers Kerala': 'Kochi Tuskers Kerala',  # only existed 2011, keep original
}

# Apply mapping
dots['batting_team'] = dots['batting_team'].replace(team_mapping)

# Verify normalization
print(dots['batting_team'].unique())


['Chennai Super Kings' 'Sunrisers Hyderabad' 'Delhi Capitals'
 'Gujarat Titans' 'Punjab Kings' 'Kochi Tuskers Kerala'
 'Kolkata Knight Riders' 'Lucknow Super Giants' 'Mumbai Indians'
 'Rising Pune Supergiant' 'Rajasthan Royals' 'Royal Challengers Bangalore']


In [283]:
from plotly import tools

trace1 = go.Heatmap(x=six['batting_team'], y=six['over'], z=six['batsman_runs'].values.tolist())

trace2 = go.Heatmap(x=dots['batting_team'], y= dots['over'], z=dots['batsman_runs'].values.tolist())

fig = tools.make_subplots(rows=1, cols=2, subplot_titles=["6's", "0's"], shared_yaxes= True)

fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,2)

fig.show()


plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead

