## Downloading Raw Data

In [None]:
# import requests
# import zipfile
# import os

# def downloadFile(url, destination):
#     fileId = url.split('/')[5]
    

#     url='https://drive.google.com/uc?export=download&id=' + fileId
    

#     response = requests.get(url)
    

#     with open(destination, "wb") as file:
#         file.write(response.content)

# fileUrl = "https://drive.google.com/file/d/1Emdky32QEjyNCAxktJV90G2hxO8NHEeI/view?usp=sharing"  
# path = "ipl_json.zip"  
# downloadFile(fileUrl,path)
# with zipfile.ZipFile(path, 'r') as zip_ref:
#         zip_ref.extractall()


# Extracting Data

##### Relevant Imports

In [40]:
import os
import json
import pandas as pd
import pydash
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np


'''
@function getValueFromJson
@description Returns value from the json for the key passed
@default returns zero if key is not found
'''
def getValueFromJson(json, key, default=0): 
    return pydash.get(json, key, default)

In [41]:
path = 'ipl_json/'
filelist = os.listdir(path)

In [42]:
extract = []
playOffEvent = set()
for file in filelist:
    with open(f'{path}/{file}','r') as f:
        data = json.load(f)
        event = getValueFromJson(data,'info.event.match_number')
        team1 = getValueFromJson(data,'info.teams.0')
        team2 = getValueFromJson(data,'info.teams.1')
        outcome = getValueFromJson(data,'info.outcome.winner')
        date = getValueFromJson(data,'info.dates.0')
        season = getValueFromJson(data,'info.season')
        if outcome == 0:
            outcome = getValueFromJson(data,'info.outcome.result')
        if event == 0:
            event = getValueFromJson(data,'info.event.stage')
            playOffEvent.add(event)
        extract.append({
            'date' : date,
            'team1' : team1,
            'team2' : team2,
            'outcome' : outcome,
            'event': event,
            'season' : str(season)
        })

Creating DataFrame prepating data and creating a csv file for the extracted data

Note: Rising Pune Supergiant and Rising Pune Supergiants are assumed to be the same team

In [43]:
df = pd.DataFrame(extract)
df.season.replace(['2007/08','2009/10','2020/21'],['2008','2010','2020'],inplace=True)
df.team1.replace('Rising Pune Supergiant','Rising Pune Supergiants',inplace=True)
df.team2.replace('Rising Pune Supergiant','Rising Pune Supergiants',inplace=True)
df.outcome.replace('Rising Pune Supergiant','Rising Pune Supergiants',inplace=True)

# uncomment the below code to get rid of the assumption that Kings XI Punjab and Delhi Daredevils are the same teams as Punjab Kings and Delhi Capitals
df.team1.replace('Kings XI Punjab','Punjab Kings',inplace=True)
df.team2.replace('Kings XI Punjab','Punjab Kings',inplace=True)
df.outcome.replace('Kings XI Punjab','Punjab Kings',inplace=True)

df.team1.replace('Delhi Daredevils','Delhi Capitals',inplace=True)
df.team2.replace('Delhi Daredevils','Delhi Capitals',inplace=True)
df.outcome.replace('Delhi Daredevils','Delhi Capitals',inplace=True)

df.to_csv('ipl_match_results.csv',index=False)

# IPL Elo Ratings Calculator

#### Reading CSV file with initial Match by Match Results and relevant Elo rating

In [44]:
df = pd.read_csv('ipl_match_results.csv')

In [45]:
df.sample(5)

Unnamed: 0,date,team1,team2,outcome,event,season
308,2015-04-27,Punjab Kings,Sunrisers Hyderabad,Sunrisers Hyderabad,27,2015
956,2023-04-22,Gujarat Titans,Lucknow Super Giants,Gujarat Titans,30,2023
675,2023-05-17,Delhi Capitals,Punjab Kings,Delhi Capitals,64,2023
312,2018-04-14,Kolkata Knight Riders,Sunrisers Hyderabad,Sunrisers Hyderabad,10,2018
928,2017-04-28,Punjab Kings,Sunrisers Hyderabad,Sunrisers Hyderabad,33,2017


In [46]:
playoffEvents = set()
for _ in list(df['event'].unique()):
    try:
        int(_)
    except Exception as error:
        playoffEvents.add(_)
playoffEvents=list(playoffEvents)

def assign_points(row, playoffEvents):
    if row['event'] in playoffEvents:

        return (0,0)

    if row['outcome'] == row['team1']:
        return (2, 0)
    else:
        return (0, 2)

# Apply the function to each row
df['pointsTeam1'], df['pointsTeam2'] = zip(*df.apply(assign_points, playoffEvents=playoffEvents, axis=1))


In [47]:
df_team1 = df[['team1', 'season', 'event','pointsTeam1']].rename(columns={'team1': 'team', 'pointsTeam1': 'points'})
df_team2 = df[['team2', 'season', 'event','pointsTeam2']].rename(columns={'team2': 'team', 'pointsTeam2': 'points'})

# Concatenate both DataFrames
dfCombined = pd.concat([df_team1, df_team2])

# Group by teamName and season, then sum the points
dfPointsAllTime = dfCombined.groupby(['team']).agg(points=('points','sum')).reset_index()
dfPointsSeason = dfCombined.groupby(['team','season']).agg(points=('points','sum')).reset_index()

### Elo Rating Update Formula

The Elo rating is updated after each game based on the game outcome. The new rating  R'  is calculated using the formula:

$$
R' = R + K \times (S - E)
$$

Where:
-  R  is the current rating.
-  R'  is the new rating.
-  K  is the K-factor, which determines the maximum possible change in rating.
-  S  is the score of the game (1 for a win, 0.5 for a draw, and 0 for a loss).
-  E  is the expected score.

### Expected Score Formula

The expected score  E  of a player/team is calculated using the formula:

$$
E = \frac{1}{1 + 10^{(R_{\text{opponent}} - R) / 400}}
$$

Where:
-  R  is the player's/team's current rating.
-  R<sub>opponent</sub>  is the opponent's rating.

These formulas are used to calculate and update Elo ratings based on match outcomes, taking into account the relative strength of the opponents as indicated by their current ratings.


In [48]:
def expectedScore(ratingA, ratingB):
    return 1 / (1 + 10 ** ((ratingB - ratingA) / 400))

Notes in update elo rating:
- teams with a relative low elo rating (difference of > 25) are rewarded with a higher k Factor
- league matches are given low kFactor while playoff matches are given a higher K factor

In [52]:
def updateElo(ratingA, ratingB, outcome, baseKFactor):
    # Calculate the rating difference
    ratingDiff = abs(ratingA - ratingB)

    if ratingDiff > 25:  
        kFactor = baseKFactor + 10  
    else:
        kFactor = baseKFactor

    expectedA = expectedScore(ratingA, ratingB)
    expectedB = expectedScore(ratingB, ratingA)

    if outcome == 'win':
        scoreA = 1
        scoreB = 0
    elif outcome == 'tie':
        scoreA = 0.5
        scoreB = 0.5
    else:  # no_result
        # No change in ratings for no result
        return ratingA, ratingB

    newRatingA = ratingA + kFactor * (scoreA - expectedA)
    newRatingB = ratingB + kFactor * (scoreB - expectedB)

    return round(newRatingA, 3), round(newRatingB, 3)

- Iterate over each row of the dataframe and calculate the elo rating after each match
- Create a Dictionary to keep track of current elo ratings of a team and create an empty list to keep track of elo rating of team by date

In [53]:
currentElo = {team: 1000 for team in set(df['team1']).union(set(df['team2']))} # dictionary to keep track of current elo rating of each team
eloRatingMatch = [] # list of keeping track of match by match data of each team
matchCount = {team: 0 for team in set(df['team1']).union(set(df['team2']))} # dictionary to have a count of overall matches played by each team
winCount = {team: 0 for team in set(df['team1']).union(set(df['team2']))} # dictionary to have a count of overall matches won by each team
tieCount = {team: 0 for team in set(df['team1']).union(set(df['team2']))}# dictionary to have a count of overall ties played by each team

playoffMatches = ['Qualifier 2','Qualifier 1','3rd Place Play-Off','Semi Final','Eliminator','Elimination Final','Final']
seasonData = {k:{team: 0 for team in set(df['team1']).union(set(df['team2']))} for k in df.season.unique()} #dictionary to keep track of seasonwise elo rating of teams
seasonMatches = {k:{team: 0 for team in set(df['team1']).union(set(df['team2']))} for k in df.season.unique()} # dictionary to have a count of seasonwise matches played by each team
seasonWins = {k:{team: 0 for team in set(df['team1']).union(set(df['team2']))} for k in df.season.unique()} # dictionary to have a count of seasonwise matches won by each team
seasonTies = {k:{team: 0 for team in set(df['team1']).union(set(df['team2']))} for k in df.season.unique()} # dictionary to have a count of seasonwise matches tied by each team

for index, row in df.iterrows():
    date = row['date']
    team1 = row['team1']
    team2 = row['team2']
    winner = row['outcome']
    season = row['season']
    event = row['event']
    outcome = 'win' if row['outcome'] == team1 else 'tie' if row['outcome'] == 'tie' else 'no_result'

    #updating match played
    matchCount[team1] += 1
    matchCount[team2] += 1
    seasonMatches[season][team1] +=1
    seasonMatches[season][team2] +=1
    # Update win counts
    if winner == team1:
        winCount[team1] += 1
        seasonWins[season][team1] +=1
    elif winner == team2:
        winCount[team2] += 1
        seasonWins[season][team2] +=1
    elif winner == 'tie':
        tieCount[team1] +=1
        tieCount[team2] += 1
        seasonTies[season][team2] +=1
        seasonTies[season][team1] +=1
    
    # Update Elo ratings
    kFactor = 30
    if event in playoffMatches:
        kFactor = 40
    newEloTeam1, newEloTeam2 = updateElo(currentElo[team1], currentElo[team2], outcome,kFactor)

    eloRatingMatch.append((matchCount[team1], team1, newEloTeam1,season))
    eloRatingMatch.append((matchCount[team2], team2, newEloTeam2,season))

    # Set new Elo ratings in the DataFrame
    df.at[index, 'eloTeam1'] = newEloTeam1
    df.at[index, 'eloTeam2'] = newEloTeam2

    # Update current Elo ratings
    currentElo[team1] = newEloTeam1
    currentElo[team2] = newEloTeam2
    #update season Elo Ratings
    seasonData[season][team1] = newEloTeam1
    seasonData[season][team2] = newEloTeam2

#### Current Elo Ratings of Each Team that has participated in IPL

In [54]:
eloData = list(currentElo.items())
eloDf = pd.DataFrame(eloData, columns=['team', 'eloRating'])
eloDf.sort_values('eloRating',ascending=False,inplace=True)
eloDf.reset_index(inplace=True,drop=True)
eloDf['totalMatches'] = eloDf['team'].map(matchCount)
eloDf['wins'] = eloDf['team'].map(winCount)
eloDf['ties'] = eloDf['team'].map(tieCount)
eloDf['winRatio'] = round(eloDf['wins'] / eloDf['totalMatches'],3) 
display(eloDf)

Unnamed: 0,team,eloRating,totalMatches,wins,ties,winRatio
0,Chennai Super Kings,1139.05,224,131,1,0.585
1,Royal Challengers Bangalore,1116.023,240,114,3,0.475
2,Gujarat Titans,1084.687,33,23,0,0.697
3,Mumbai Indians,1057.1,247,138,4,0.559
4,Punjab Kings,1044.234,232,104,4,0.448
5,Lucknow Super Giants,1017.219,30,17,0,0.567
6,Delhi Capitals,996.838,238,105,4,0.441
7,Gujarat Lions,991.245,30,13,1,0.433
8,Rising Pune Supergiants,980.733,30,15,0,0.5
9,Rajasthan Royals,976.936,206,101,3,0.49


Calculating win loss ratio, total Loss, and points scored in all seasons

In [55]:
eloDf = pd.merge(eloDf,dfPointsAllTime)
eloDf['loss'] = eloDf['totalMatches'] - (eloDf['wins'] + eloDf['ties'])
eloDf['winLossRatio'] = eloDf['wins'] / (eloDf['loss'])
eloDf

Unnamed: 0,team,eloRating,totalMatches,wins,ties,winRatio,points,loss,winLossRatio
0,Chennai Super Kings,1139.05,224,131,1,0.585,230,92,1.423913
1,Royal Challengers Bangalore,1116.023,240,114,3,0.475,218,123,0.926829
2,Gujarat Titans,1084.687,33,23,0,0.697,40,10,2.3
3,Mumbai Indians,1057.1,247,138,4,0.559,254,105,1.314286
4,Punjab Kings,1044.234,232,104,4,0.448,214,124,0.83871
5,Lucknow Super Giants,1017.219,30,17,0,0.567,34,13,1.307692
6,Delhi Capitals,996.838,238,105,4,0.441,212,129,0.813953
7,Gujarat Lions,991.245,30,13,1,0.433,26,16,0.8125
8,Rising Pune Supergiants,980.733,30,15,0,0.5,28,15,1.0
9,Rajasthan Royals,976.936,206,101,3,0.49,202,102,0.990196


In [56]:
teams = list(eloDf.team.unique())

inactiveTeams = ['Gujarat Lions','Deccan Chargers','Kochi Tuskers Kerala','Rising Pune Supergiants','Pune Warriors']
grayCodes = ['#404040','#585858','#585858','#707070','#888888','#A0A0A0']

colorScale = zip(inactiveTeams,grayCodes)
colorScale = dict(colorScale)

color_scale = {
    'Team1': '#1f77b4', 'Team2': '#ff7f0e', 'Team3': '#2ca02c',
    'Team4': '#d62728', 'Team5': '#9467bd', 'Team6': '#8c564b',
    'Team7': '#e377c2', 'Team8': '#fc6ab5', 'Team9': '#bcbd22',
    'Team10': '#17becf', 'Team11': '#1f77b4', 'Team12': '#ff7f0e',
}


colors = list(color_scale.values())
count = 0
for team in teams:
    if team not in inactiveTeams:
        colorScale[team] = colors[count]
        count +=1


In [57]:


# Create a bar plot
fig = go.Figure()

# Add bars for eloRating
fig.add_trace(go.Bar(x=eloDf['team'], y=eloDf['eloRating'], name='Elo Rating', marker_color=eloDf['team'].map(colorScale)))

# Add a scatter plot for points
fig.add_trace(go.Scatter(x=eloDf['team'], y=eloDf['points'], mode='markers', name='Points',
                         marker=dict(color='black', size=10)))


fig.update_layout(height=600,width=1200,title='Team Performance with Elo Rating',
                  xaxis_title='Team',
                  yaxis_title='Elo Rating',
                  xaxis=dict(tickangle=90),
                  legend=dict(title='Metric', x=1, y=1))

# Add annotations for win/loss ratio
for i, row in eloDf.iterrows():
    #'points annotation'
    fig.add_annotation(
        x=row['team'],
        y=row['points'] +50,
        text=f"{row['points']}",
        showarrow=False,
        font=dict(
            color="black",
            size=12
        )
    )


    fig.add_annotation(
        x=row['team'],
        y=row['eloRating'] -25,
        text=f"{row['eloRating']:.3f}",
        showarrow=False,
        font=dict(
            color="black",
            size=10
        )
    )

fig.show()


In [58]:
corrMatrix = eloDf[['eloRating', 'wins', 'points','winRatio']].corr()

fig = px.imshow(corrMatrix, text_auto=True)

fig.update_layout(
    height=600,
    width=800,
    title='Correlation Heatmap',
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    xaxis=dict(ticks='outside', side='top'),
    yaxis=dict(ticks='outside', side='left'),
)

fig.show()

According to the heat map of the correlation matrix we can say that:
- There is a higher correlation between win ratio and elo Rating 

In [59]:


fig = px.scatter(eloDf, x="totalMatches", y="wins", color="team",
                 color_discrete_map=colorScale,
                 hover_data=['team'])  

fig.update_traces(marker=dict(size=10, symbol='x'),
                  selector=dict(mode='markers'))

fig.update_layout(height=600,width=1200,title='Total Matches Vs Total Wins',
                  xaxis_title='Total Matches',
                  yaxis_title='Wins',
                  legend=dict(title='Team', x=1, y=1)) 

In [60]:
fig = px.scatter(eloDf, x="wins", y="loss", color="team",
                 color_discrete_map=colorScale,
                 hover_data=['team'])  

fig.update_traces(marker=dict(size=10, symbol='x'),
                  selector=dict(mode='markers'))

fig.update_layout(height=600,width=1200,title='Total wins Vs Total loss',
                  xaxis_title='Total Wins',
                  yaxis_title='Losses',
                  legend=dict(title='Team', x=1, y=1)) 

In [61]:
fig = go.Figure(data=[
    go.Bar(name='Wins', x=eloDf['team'], y=eloDf['wins']),
    go.Bar(name='Losses', x=eloDf['team'], y=eloDf['loss'])
])

# Update layout
fig.update_layout(barmode="stack",height=600, width=1200, title='Team Performance',
                  xaxis_title='Team', yaxis_title='Matches',
                  xaxis=dict(tickangle=90),
                  legend=dict(title='Metric', x=1, y=1))

# Add annotations for win/loss ratio
for i, row in eloDf.iterrows():
    #'win annotation'
    fig.add_annotation(
        x=row['team'],
        y=row['wins'] -5,
        text=f"wins:{row['wins']}",
        showarrow=False,
        font=dict(
            color="black",
            size=10
        )
    )

    #loss annotation
    fig.add_annotation(
        x=row['team'],
        y=row['wins'] + row['loss']-5,
        text=f"loss:{row['loss']}",
        showarrow=False,
        font=dict(
            color="black",
            size=10
        )
    )

    fig.add_annotation(
        x=row['team'],
        y=row['wins'] + row['loss']+5,
        text=f"{row['winLossRatio']:.1f}",
        showarrow=False,
        font=dict(
            color="black",
            size=12
        )
    )

# Show the figure
fig.show()

## Season-wise Elo Ratings of Team

preparing Elo Rating Season By Season Df

In [62]:
#flattening data and prepping the dataframe
data = []
for season, teams in seasonData.items():
    for team, elo in teams.items():
        row = {
            'team': team,
            'season': season,
            'eloRating': elo,
            'wins': seasonWins[season].get(team, 0),
            'matches': seasonMatches[season].get(team, 0),
            'ties':seasonTies[season].get(team,0)
        }
        data.append(row)


eloSeason = pd.DataFrame(data)

eloSeason.sort_values(by=['team', 'season'], inplace=True)


eloSeason.reset_index(drop=True,inplace=True)

In [63]:
eloSeason['winRatio'] = round(eloSeason['wins'] / eloSeason['matches'],3) 
eloSeason['loss'] = eloSeason['matches'] - (eloSeason['wins'] + eloSeason['ties'])
eloSeason['winLossRatio'] = round(eloSeason['wins'] / (eloSeason['loss']),3)
eloSeason

Unnamed: 0,team,season,eloRating,wins,matches,ties,winRatio,loss,winLossRatio
0,Chennai Super Kings,2008,1111.196,9,16,0,0.562,7,1.286
1,Chennai Super Kings,2009,1108.429,8,14,0,0.571,6,1.333
2,Chennai Super Kings,2010,1127.404,9,16,1,0.562,6,1.500
3,Chennai Super Kings,2011,1116.321,11,16,0,0.688,5,2.200
4,Chennai Super Kings,2012,1097.227,10,18,0,0.556,8,1.250
...,...,...,...,...,...,...,...,...,...
235,Sunrisers Hyderabad,2019,945.097,6,15,1,0.400,8,0.750
236,Sunrisers Hyderabad,2020,943.813,8,16,1,0.500,7,1.143
237,Sunrisers Hyderabad,2021,945.097,3,14,1,0.214,10,0.300
238,Sunrisers Hyderabad,2022,960.149,6,14,0,0.429,8,0.750


In [64]:
eloSeason['eloRating'].replace(0, np.nan, inplace=True)

eloSeason['eloRating'] = eloSeason.groupby('team')['eloRating'].apply(lambda x: x.ffill())
eloSeason = pd.merge(eloSeason,dfPointsSeason)
eloSeason


Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)



Unnamed: 0,team,season,eloRating,wins,matches,ties,winRatio,loss,winLossRatio,points
0,Chennai Super Kings,2008,1111.196,9,16,0,0.562,7,1.286,16
1,Chennai Super Kings,2009,1108.429,8,14,0,0.571,6,1.333,16
2,Chennai Super Kings,2010,1127.404,9,16,1,0.562,6,1.500,14
3,Chennai Super Kings,2011,1116.321,11,16,0,0.688,5,2.200,18
4,Chennai Super Kings,2012,1097.227,10,18,0,0.556,8,1.250,16
...,...,...,...,...,...,...,...,...,...,...
131,Sunrisers Hyderabad,2019,945.097,6,15,1,0.400,8,0.750,14
132,Sunrisers Hyderabad,2020,943.813,8,16,1,0.500,7,1.143,16
133,Sunrisers Hyderabad,2021,945.097,3,14,1,0.214,10,0.300,8
134,Sunrisers Hyderabad,2022,960.149,6,14,0,0.429,8,0.750,12


In [65]:
lineFig = px.line(eloSeason, x="season", y="eloRating", color="team", 
              title="Elo Ratings of Cricket Teams Over Seasons",
              labels={"matchNo": "Season", "eloRating": "Elo Rating"},
              color_discrete_map=colorScale)

lineFig.show()

In [66]:
seasons = eloSeason['season'].unique()

subFig = make_subplots(rows=4, cols=4, subplot_titles=[f"{season}" for season in seasons])

row = 1
col = 1
legendAdded = set()

for season in seasons:
    seasonDf = eloSeason[eloSeason['season'] == season]
    for team in seasonDf['team'].unique():
        teamDf = seasonDf[seasonDf['team'] == team]

        # Add trace to the subplot
        subFig.add_trace(
            go.Bar(
                x=teamDf['season'], 
                y=teamDf['eloRating'], 
                name=team,
                showlegend=team not in legendAdded 
            ),
            row=row, col=col
        )
        legendAdded.add(team)

        if col == 1:
            subFig.update_yaxes(title_text="Elo Rating", row=row, col=col, showticklabels=True)
        else:
            subFig.update_yaxes(showticklabels=False, row=row, col=col)

    col += 1
    if col > 4:
        col = 1
        row += 1

subFig.update_layout(height=800, width=1200, title_text="Match By Match Elo Ratings of Teams")
subFig.show()

In [67]:
fig = px.bar(eloSeason, x="season", y=["wins", "loss", "ties"], 
                              color_discrete_map={"wins":"green", "loss":"red", "ties":"blue"},
                              title="Team Performance Across Seasons",
                              labels={"value": "Number of Matches", "variable": "Match Outcome"},
                              barmode="stack",
                              facet_col="team",
                              facet_col_wrap=3)


fig.update_layout(height=800, width=1200, title_text="Performance of teams per season")
fig.show()


In [68]:
corrMatrix = eloSeason[['eloRating', 'wins', 'points','winRatio']].corr()

fig = px.imshow(corrMatrix, text_auto=True)

fig.update_layout(
    height=600,
    width=800,
    title='Correlation Heatmap',
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    xaxis=dict(ticks='outside', side='top'),
    yaxis=dict(ticks='outside', side='left'),
)

fig.show()

In [69]:
# Visualization 3: Scatter Plot for EloRating vs WinRatio
fig = px.scatter(eloSeason, x="eloRating", y="winRatio", color="team",
                                  title="Elo Rating vs Win Ratio per Team",
                                  labels={"eloRating": "Elo Rating", "winRatio": "Win Ratio"},
                                  color_discrete_map=colorScale,
                                  hover_data=['team'])


fig.update_traces(marker=dict(size=10, symbol='x'),
                  selector=dict(mode='markers'))

fig.update_layout(height=600,width=1200,
                  xaxis_title='Elo Rating',
                  yaxis_title='Win Ratio',
                  legend=dict(title='Team', x=1, y=1)) 


In [78]:
fig = px.scatter(eloSeason, x="loss", y="wins", color="team",
                                  title="Win vs Loss per season",
                                  labels={"wins": "Wins", "loss": "Loss"},
                                  color_discrete_map=colorScale,
                                  hover_data=['team','season'])

maxWinLoss = eloSeason.loc[eloSeason['winLossRatio'].idxmax()]
minWinLoss = eloSeason.loc[eloSeason['winLossRatio'].idxmin()]

fig.add_annotation(
    x=maxWinLoss["loss"],  
    y=maxWinLoss['wins']+2,
    text=f"Highest Win/Loss Ratio:<br>Team: {maxWinLoss['team']}<br>Season: {maxWinLoss['season']}<br>Ratio: {maxWinLoss['winLossRatio']}",
    showarrow=False,   
    font=dict(color="black", size=12)
)

fig.add_annotation(
    x=minWinLoss["loss"],  
    y=minWinLoss['wins']+4,
    text=f"Lowest Win/Loss Ratio:<br>Team: {minWinLoss['team']}<br>Season: {minWinLoss['season']}<br>Ratio: {minWinLoss['winLossRatio']}",
    showarrow=False,   
    font=dict(color="black", size=12)
)

fig.update_traces(marker=dict(size=10, symbol='x'),
                  selector=dict(mode='markers'))

fig.update_layout(height=600,width=1200,
                  xaxis_title='Loss',
                  yaxis_title='Wins',
                  legend=dict(title='Team', x=1, y=1)) 


In [79]:
fig = px.line(eloSeason, x="season", y="wins", color="team",
                           title="Wins Over Time per Team",
                           labels={"value": "Number of Matches", "variable": "Match Outcome"},
                           color_discrete_map=colorScale)

maxElo = eloSeason.loc[eloSeason['eloRating'].idxmax()]
minElo = eloSeason.loc[eloSeason['eloRating'].idxmin()]

fig.add_annotation(
    x=1,  
    y=1,
    text=f"Top Elo Rating:<br>Team: {maxElo['team']}<br>Season: {maxElo['season']}<br>Elo: {maxElo['eloRating']}",
    showarrow=False,
    xref="paper",
    yref="paper",  
    xanchor='right',  
    yanchor='top',    
    font=dict(color="black", size=12)
)

fig.add_annotation(
    x=0.2,  
    y=1,
    text=f"Worst Elo Rating:<br>Team: {minElo['team']}<br>Season: {minElo['season']}<br>Elo: {minElo['eloRating']}",
    showarrow=False,
    xref="paper",
    yref="paper",  
    xanchor='right',  
    yanchor='top',    
    font=dict(color="black", size=12)
)

fig.update_layout(height=600, width=1200)
fig.show()


In [72]:
long_format = pd.melt(eloSeason, id_vars=['season', 'team'], value_vars=['wins', 'loss'],
                      var_name='WinOrLoss', value_name='Count')

# Create the boxplot
fig = px.box(long_format, x='season', y='Count', color='WinOrLoss',
             title='Boxplot of Wins and Losses per Season')

fig.update_layout(height=600, width=1200)
fig.show()

## IPL Match By Match Analysis

In [73]:
df = pd.read_csv('iplMatchByMatchElo.csv')

In [74]:
df

Unnamed: 0,matchNo,team,eloRating,season
0,1,Chennai Super Kings,1015.000,2023
1,1,Kolkata Knight Riders,985.000,2023
2,2,Chennai Super Kings,1029.353,2009
3,1,Rajasthan Royals,985.647,2009
4,1,Mumbai Indians,1000.000,2021
...,...,...,...,...
2043,237,Kolkata Knight Riders,944.597,2019
2044,224,Chennai Super Kings,1139.050,2013
2045,240,Royal Challengers Bangalore,1116.023,2013
2046,238,Delhi Capitals,996.838,2018


In [75]:
teams = list(df.team.unique())

inactiveTeams = ['Gujarat Lions','Deccan Chargers','Kochi Tuskers Kerala','Rising Pune Supergiants','Pune Warriors']
grayCodes = ['#404040','#585858','#585858','#707070','#888888','#A0A0A0']

colorScale = zip(inactiveTeams,grayCodes)
colorScale = dict(colorScale)

color_scale = {
    'Team1': '#1f77b4', 'Team2': '#ff7f0e', 'Team3': '#2ca02c',
    'Team4': '#d62728', 'Team5': '#9467bd', 'Team6': '#8c564b',
    'Team7': '#e377c2', 'Team8': '#7f7f7f', 'Team9': '#bcbd22',
    'Team10': '#17becf', 'Team11': '#1f77b4', 'Team12': '#ff7f0e',
}


colors = list(color_scale.values())
count = 0
for team in teams:
    if team not in inactiveTeams:
        colorScale[team] = colors[count]
        count +=1

In [76]:
lineFig = px.line(df, x="matchNo", y="eloRating", color="team", 
              title="Elo Ratings of Cricket Teams Over Matches",
              labels={"matchNo": "Match Number", "rating": "Elo Rating"},
              color_discrete_map=colorScale)

lineFig.show()

In [77]:
teams = df['team'].unique()

subFig = make_subplots(rows=5, cols=3, subplot_titles=[f"{team}" for team in teams])

row = 1
col = 1


for team in teams:
    team_df = df[df['team'] == team]
    subFig.add_trace(
        go.Scatter(x=team_df['matchNo'], y=team_df['eloRating'], mode='lines', name=team),
        row=row, col=col
    )
    subFig.update_xaxes(title_text="Match Number", row=row, col=col)
    subFig.update_yaxes(title_text="Elo Rating", row=row, col=col)
    # Update row and column indices for the next subplot
    col += 1
    if col > 3:
        col = 1
        row += 1


subFig.update_layout(height=1200, width=1200, title_text="Match By Match Elo Ratings of Teams", showlegend=False)
subFig.show()

## Bar Chart Race

In [None]:
import bar_chart_race as bcr
from IPython.display import Video

dfAvg = df.groupby(['season', 'team'])['eloRating'].mean().unstack().fillna(method='ffill')
dfAvg.index = pd.to_datetime(dfAvg.index, format='%Y')
videoName = 'elo_ratings_race.mp4'

bcr.bar_chart_race(
    df=dfAvg,
    filename=videoName,
    orientation='h',
    sort='desc',
    n_bars=len(df.team.unique()),
    fixed_order=False,
    fixed_max=True,
    steps_per_period=10,
    interpolate_period=False,
    label_bars=True,
    bar_size=.95,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    period_fmt='Season %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      's': f'Season Summary',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    perpendicular_bar_func='median',
    period_length=1000,
    figsize=(8, 6),
    dpi=300,
    cmap='dark12',
    title='IPL Teams Elo Ratings Over Seasons',
    title_size='',
    bar_label_size=7,
    tick_label_size=7,
    shared_fontdict={'family' : 'Helvetica', 'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=False
)


Video(videoName)