<a href="https://colab.research.google.com/github/amalsalilan/IPL_Infographics_Data_Analytics_-_Data_Visualization_Infosys_Internship_Oct2024/blob/Nagira/IPL_deliveries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from google.colab import files
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Read the CSV file into a DataFrame
deliveries = pd.read_csv("/content/sample_data/deliveries.csv")

In [2]:
deliveries_copy=deliveries.copy()
deliveries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260920 entries, 0 to 260919
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   match_id          260920 non-null  int64 
 1   inning            260920 non-null  int64 
 2   batting_team      260920 non-null  object
 3   bowling_team      260920 non-null  object
 4   over              260920 non-null  int64 
 5   ball              260920 non-null  int64 
 6   batter            260920 non-null  object
 7   bowler            260920 non-null  object
 8   non_striker       260920 non-null  object
 9   batsman_runs      260920 non-null  int64 
 10  extra_runs        260920 non-null  int64 
 11  total_runs        260920 non-null  int64 
 12  extras_type       14125 non-null   object
 13  is_wicket         260920 non-null  int64 
 14  player_dismissed  12950 non-null   object
 15  dismissal_kind    12950 non-null   object
 16  fielder           9354 non-null    obj

In [3]:
deliveries.isnull().sum()

Unnamed: 0,0
match_id,0
inning,0
batting_team,0
bowling_team,0
over,0
ball,0
batter,0
bowler,0
non_striker,0
batsman_runs,0


In [4]:
merged_teams = pd.concat([deliveries['batting_team'], deliveries['bowling_team']]).unique()
merged_teams = sorted(merged_teams)
for team in merged_teams:
    print(team)

Chennai Super Kings
Deccan Chargers
Delhi Capitals
Delhi Daredevils
Gujarat Lions
Gujarat Titans
Kings XI Punjab
Kochi Tuskers Kerala
Kolkata Knight Riders
Lucknow Super Giants
Mumbai Indians
Pune Warriors
Punjab Kings
Rajasthan Royals
Rising Pune Supergiant
Rising Pune Supergiants
Royal Challengers Bangalore
Royal Challengers Bengaluru
Sunrisers Hyderabad


In [5]:
# Replacing duplicates in both batting_team and bowling_team columns
deliveries.replace({'batting_team': {'Delhi Capitals': 'Delhi Daredevils'},
                    'bowling_team': {'Delhi Capitals': 'Delhi Daredevils'}}, inplace=True)

deliveries.replace({'batting_team': {'Gujarat Lions': 'Gujarat Titans'},
                    'bowling_team': {'Gujarat Lions': 'Gujarat Titans'}}, inplace=True)

deliveries.replace({'batting_team': {'Rising Pune Supergiant': 'Rising Pune Supergiants'},
                    'bowling_team': {'Rising Pune Supergiant': 'Rising Pune Supergiants'}}, inplace=True)

deliveries.replace({'batting_team': {'Royal Challengers Bangalore': 'Royal Challengers Bengaluru'},
                    'bowling_team': {'Royal Challengers Bangalore': 'Royal Challengers Bengaluru'}}, inplace=True)


In [6]:
merged_teams = pd.concat([deliveries['batting_team'], deliveries['bowling_team']]).unique()
merged_teams = sorted(merged_teams)
for team in merged_teams:
    print(team)


Chennai Super Kings
Deccan Chargers
Delhi Daredevils
Gujarat Titans
Kings XI Punjab
Kochi Tuskers Kerala
Kolkata Knight Riders
Lucknow Super Giants
Mumbai Indians
Pune Warriors
Punjab Kings
Rajasthan Royals
Rising Pune Supergiants
Royal Challengers Bengaluru
Sunrisers Hyderabad


#   <b>1• Average Run Rate per Over</b></n>
Task:

Calculate the average run rate for each over across all matches.
Create a line plot showing the average runs scored in each over.


In [7]:
average_runs_per_over = deliveries.groupby('over')['total_runs'].mean().reset_index()

#line chart using Plotly
fig = px.line( average_runs_per_over, x='over', y='total_runs',  title='Average Runs Scored per Over Across All Matches',
    labels={'over': 'Over', 'total_runs': 'Average Runs'},   markers=True )
fig.show()

#<b> 2. Wicket Fall Distribution by Over</b>
Task:

Analyze which overs tend to see the most wickets fall.
Create a bar plot to visualize the distribution of wickets per over.


In [20]:
# Filtered the data where a wicket was taken
wickets_per_over = deliveries[deliveries['is_wicket'] == 1]

#Group by 'over' and count the number of wickets in each over
wickets_distribution = wickets_per_over.groupby('over').size().reset_index(name='wickets')
#print(wickets_distribution)
plt.figure(figsize=(10, 6))
fig = go.Figure()

# Adding Grouped Object bars for wickets
fig.add_trace(go.Bar(x=wickets_distribution['over'], y=wickets_distribution['wickets'],  name='Wickets',
    marker=dict(color='orange'),
    text=wickets_distribution['wickets'],  # Add wicket counts as text labels
    textposition='outside'))  # Position the labels above the bars

#better visualization
fig.update_layout(
    title='Distribution of Wickets per Over Across All Matches',
    xaxis_title='Over',
    yaxis_title='Number of Wickets',
    height=500
)
fig.show()


<Figure size 1000x600 with 0 Axes>

# <b>3. Partnership Analysis (Top 10 Partnerships by Total Runs)</b></n>
•	Task:

Identify the top 10 batting partnerships (batsman and non-striker pairs) that have scored the most runs together.
Create a bar plot showing these partnerships.

In [12]:

#Grouped by both batter and non-striker and sum the total runs
partnerships = deliveries.groupby(['batter', 'non_striker'])['batsman_runs'].sum().reset_index()

#top 10 partnerships by total runs scored
top_partnerships = partnerships.nlargest(10, 'batsman_runs')

fig = px.bar(
    top_partnerships,
    x='batter',
    y='batsman_runs',
    color='non_striker',  # Color by non-striker for better distinction
    title='Top 10 Batting Partnerships by Runs Scored',
    labels={'batter': 'Batter', 'batsman_runs': 'Total Runs'},
    text='batsman_runs',  # Show the runs on the bars
    height=500
)

# Customized layout
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')  # Format text on bars
fig.update_layout(xaxis_title='Batter', yaxis_title='Total Runs', xaxis_tickangle=-45)

fig.show()



# <b> 3. Another version - taking score of combination of 2 payers took highest score</b>

In [13]:

# Create a normalized partnership as a tuple (min, max) to treat both orders as the same
deliveries['partnership'] = deliveries.apply(lambda x: tuple(sorted([x['batter'], x['non_striker']])), axis=1)

# Step 2: Group by the partnership and sum the total runs
partnerships_sum = deliveries.groupby('partnership')['batsman_runs'].sum().reset_index()


# top 10 partnerships by total runs
top_partnerships = partnerships_sum.nlargest(10, 'batsman_runs')

top_partnerships['partnership_str'] = top_partnerships['partnership'].apply(lambda x: ' & '.join(x))

fig = px.bar(
    top_partnerships,
    x='partnership_str',
    y='batsman_runs',
    title='Top 10 Unique Batting Partnerships by Runs Scored',
    labels={'partnership_str': 'Partnership', 'batsman_runs': 'Total Runs'},
    text='batsman_runs',  # Show the runs on the bars
    height=600
)

fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')  # Format text on bars
fig.update_layout(xaxis_title='Partnership', yaxis_title='Total Runs', xaxis_tickangle=-45)

fig.show()

# <b>4. Batsman vs Bowler Analysis</B>

Task:

Analyze which batsmen have scored the most runs against specific bowlers.
Visualize the top 10 batsman-bowler pairs based on runs scored.

In [16]:
# Grouped batsman and bowler - sum of runs scored
batsman_bowler_runs = deliveries.groupby(['batter', 'bowler'])['batsman_runs'].sum().reset_index()

# Founnd the top 10 batsman-bowler pairs
top_batsman_bowler_pairs = batsman_bowler_runs.nlargest(10, 'batsman_runs')

top_batsman_bowler_pairs['pair'] = top_batsman_bowler_pairs.apply(lambda x: f"{x['batter']} vs {x['bowler']}", axis=1)

fig = px.bar(
    top_batsman_bowler_pairs,
    x='pair',
    y='batsman_runs',
    title='Top 10 Batsman-Bowler Pairs by Runs Scored',
    labels={'pair': 'Batsman vs Bowler', 'batsman_runs': 'Total Runs'},
    text='batsman_runs',  # Show the runs on the bars
    height=600
)

fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(
    xaxis_title='Batsman vs Bowler',
    yaxis_title='Total Runs',
    xaxis_tickangle=-45,
    #margin=dict(t=50, b=50, l=50, r=50)
)

fig.show()

# **5•   Wickets by Dismissal Kind per Bowler:**
•	Task: Analyze how different bowlers take wickets (e.g., caught, bowled, LBW).
Create a bar plot that shows the top 10 combinations of bowler and dismissal kind.


In [15]:
wickets = deliveries[deliveries['is_wicket'] == 1]

# grouped by bowler and dismissal kind - counted the number of wickets taken
wickets_count = wickets.groupby(['bowler', 'dismissal_kind']).size().reset_index(name='wicket_count')

#  top 10 combinations of bowler and dismissal kind by wicket count
top_wickets = wickets_count.nlargest(10, 'wicket_count')

top_wickets['combination'] = top_wickets.apply(lambda x: f"{x['bowler']} - {x['dismissal_kind']}: {x['wicket_count']}", axis=1)

fig = px.bar(
    top_wickets,
    x='combination',
    y='wicket_count',
    title='Top 10 Bowler-Dismissal Combinations by Wickets Taken',
    labels={'combination': 'Bowler - Dismissal Kind', 'wicket_count': 'Number of Wickets'},
    text='wicket_count',  # Show the wicket count on the bars
    height=600  # Increased height for better visibility
)

fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')  # Format text on bars
fig.update_layout(
    xaxis_title='Bowler - Dismissal Kind',
    yaxis_title='Number of Wickets',
    xaxis_tickangle=-45,
    margin=dict(t=50, b=50, l=50, r=50)  # Add margins for better spacing
)

fig.show()


In [None]:
dismissal_counts = wickets['dismissal_kind'].value_counts().reset_index()
dismissal_counts


# <b>6. Extras Conceded per Bowling Team</b>
Task:

Analyze how many extra runs (wides, no-balls, etc.) each bowling team concedes.
Create a bar plot to visualize the total extras conceded by each team.

In [17]:
extras = deliveries[deliveries['extras_type'].isin(['wides', 'noballs', 'byes', 'legbyes'])]

# Grouped by bowling team and extras type, summing the runs
extras_conceded = extras.groupby(['bowling_team', 'extras_type'])['extra_runs'].sum().reset_index()

# for stacked bar plot - Pivot the DataFrame to prepare for stacked bar plotting
extras_pivot = extras_conceded.pivot(index='bowling_team', columns='extras_type', values='extra_runs').fillna(0)

fig = px.bar(
    extras_pivot,
    x=extras_pivot.index,
    y=extras_pivot.columns,
    title='Total Extras Conceded by Each Bowling Team',
    labels={'value': 'Total Extras', 'bowling_team': 'Bowling Team'},
    text_auto=True  # Show values on the bars
)

fig.update_layout(
    barmode='stack',
    xaxis_title='Bowling Team',
    yaxis_title='Total Extras',
    margin=dict(t=50, b=50, l=50, r=50)

)
fig.show()