In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import streamlit as st
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv("./IPL_BallByBall2008_2024(Updated).csv", low_memory=False)

# Apply strip to all object (string) type columns in the DataFrame
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

df.isna().sum()

df = df.drop(columns=['type of extras']).fillna(0)

df.duplicated().sum()

df['Innings No'].value_counts()

df[df['Innings No']<=2].describe()

df = df[df['Innings No']<=2]

# df['wicket_confirmation'].value_counts()

df['Date'] = pd.to_datetime(df['Date'])

extra_df = pd.read_csv("./team_performance_dataset_2008to2024.csv")

extra_df.rename(columns={'Match_ID':'Match id'}, inplace=True)

extra_df['Date'] = pd.to_datetime(extra_df['Date'])

df = pd.merge(df, extra_df, on=['Match id', 'Date'])

# df['Batting team'].unique().tolist()

rename_teams = {"Kings XI Punjab":"Punjab Kings",
               "Delhi Daredevils":"Delhi Capitals",
               'Deccan Chargers':'Sunrisers Hyderabad',
               'Gujarat Lions':'Gujarat Titans',
               'Royal Challengers Bangalore':'Royal Challengers Bengaluru',
               'Rising Pune Supergiant':'Chennai Super Kings',
                'Rising Pune Supergiants':'Chennai Super Kings'
                }

# Looping through each column and replacing the team names
for col in df.columns.tolist():
    try:
        df[col] = df[col].replace(rename_teams)
    except Exception as e:
        pass

rename_venues = {'Arun Jaitley Stadium':"Arun Jaitley Stadium, Delhi",
                 'Feroz Shah Kotla' : "Arun Jaitley Stadium, Delhi",
                 'Brabourne Stadium': 'Brabourne Stadium, Mumbai',
                 'Dr DY Patil Sports Academy':'Dr DY Patil Sports Academy, Mumbai',
                'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium':'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam',
                'Eden Gardens':'Eden Gardens, Kolkata', 
                 'Himachal Pradesh Cricket Association Stadium':'Himachal Pradesh Cricket Association Stadium, Dharamsala',
                 'M Chinnaswamy Stadium': 'M Chinnaswamy Stadium, Bengaluru',
                 'M.Chinnaswamy Stadium': 'M Chinnaswamy Stadium, Bengaluru',
                 'MA Chidambaram Stadium, Chepauk': 'MA Chidambaram Stadium, Chepauk, Chennai',
                 'Maharashtra Cricket Association Stadium':'Maharashtra Cricket Association Stadium, Pune',
                'Punjab Cricket Association IS Bindra Stadium':'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh',
               'Punjab Cricket Association IS Bindra Stadium, Mohali':'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh',
               'Punjab Cricket Association Stadium, Mohali':'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh',
                'Rajiv Gandhi International Stadium':'Rajiv Gandhi International Stadium, Uppal, Hyderabad',
               'Rajiv Gandhi International Stadium, Uppal':'Rajiv Gandhi International Stadium, Uppal, Hyderabad',
                'Sawai Mansingh Stadium':'Sawai Mansingh Stadium, Jaipur',
                 'Wankhede Stadium':'Wankhede Stadium, Mumbai',
                }
try:
    df['Venue'] = df['Venue'].replace(rename_venues)
except Exception as e:
    pass

        
drop_teams=['Kochi Tuskers Kerala', 'Pune Warriors']
# Drop rows where 'Batting team' or 'Bowling team' is in the list of teams to drop
df = df[~df['Batting team'].isin(drop_teams) & ~df['Bowling team'].isin(drop_teams)]
# Check the first few rows to confirm the rows are dropped
df.tail(3)

battersIN_2024_df = df[df['Season']=='2024'][['Striker', 'Batting team']].drop_duplicates().reset_index(drop=True).rename(columns={'Striker':'Player', 'Batting team':'Team'})
bowlersIN_2024_df = df[df['Season'] == '2024'][['Bowler', 'Bowling team']].drop_duplicates().reset_index(drop=True).rename(columns={'Bowler':'Player', 'Bowling team':'Team'})
curr_players_teams_df = battersIN_2024_df.merge(bowlersIN_2024_df, on=['Player', 'Team'], how='outer')
curr_players_teams_df

Unnamed: 0,Player,Team
0,A Badoni,Lucknow Super Giants
1,A Manohar,Gujarat Titans
2,A Mishra,Lucknow Super Giants
3,A Nortje,Delhi Capitals
4,A Raghuvanshi,Kolkata Knight Riders
...,...,...
184,Washington Sundar,Sunrisers Hyderabad
185,YBK Jaiswal,Rajasthan Royals
186,YS Chahal,Rajasthan Royals
187,Yash Dayal,Royal Challengers Bengaluru


In [2]:
curr_players_teams_df['Team'].unique()

array(['Lucknow Super Giants', 'Gujarat Titans', 'Delhi Capitals',
       'Kolkata Knight Riders', 'Sunrisers Hyderabad',
       'Chennai Super Kings', 'Royal Challengers Bengaluru',
       'Mumbai Indians', 'Punjab Kings', 'Rajasthan Royals'], dtype=object)

In [3]:
df['Venue'].unique()

array(['M Chinnaswamy Stadium, Bengaluru',
       'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh',
       'Arun Jaitley Stadium, Delhi', 'Wankhede Stadium, Mumbai',
       'Eden Gardens, Kolkata', 'Sawai Mansingh Stadium, Jaipur',
       'Rajiv Gandhi International Stadium, Uppal, Hyderabad',
       'MA Chidambaram Stadium, Chepauk, Chennai',
       'Dr DY Patil Sports Academy, Mumbai', 'Newlands',
       "St George's Park", 'Kingsmead', 'SuperSport Park', 'Buffalo Park',
       'New Wanderers Stadium', 'De Beers Diamond Oval',
       'OUTsurance Oval', 'Brabourne Stadium, Mumbai',
       'Sardar Patel Stadium, Motera', 'Barabati Stadium',
       'Vidarbha Cricket Association Stadium, Jamtha',
       'Himachal Pradesh Cricket Association Stadium, Dharamsala',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam',
       'Subrata Roy Sahara Stadium',
       'Shaheed Veer Narayan Singh International Stadium',
       'JSCA International Stadium Com

In [4]:
batting_team= 'Sunrisers Hyderabad'
bowling_team = 'Rajasthan Royals'
stadium = 'Rajiv Gandhi'

batting_team_players = curr_players_teams_df[curr_players_teams_df['Team'].str.contains(batting_team)]['Player'].tolist()
bowling_team_players = curr_players_teams_df[curr_players_teams_df['Team'].str.contains(bowling_team)]['Player'].tolist()

In [5]:
batters_dfs = {}
bowlers_dfs = {}

# Overall

In [6]:
def OverallFactor(df):
    x = df[df['Striker'].isin(batting_team_players+bowling_team_players)]
    x = x.groupby(['Match id', 'Striker'])['runs_scored'].sum().reset_index().groupby('Striker')['runs_scored'].mean().reset_index(name='overall_scores')
    x = x.sort_values(by='overall_scores', ascending=False).reset_index(drop=True)

    y = df[df['Bowler'].isin(batting_team_players+bowling_team_players)]
    y = y.groupby(['Match id', 'Bowler'])['wicket_confirmation'].sum().reset_index().groupby('Bowler')['wicket_confirmation'].mean().reset_index(name='overall_scores')
    y = y.sort_values(by='overall_scores', ascending=False).reset_index(drop=True)

    return x.set_index('Striker'), y.set_index('Bowler')

batters_dfs['OverallFactor'], bowlers_dfs['OverallFactor'] = OverallFactor(df.copy())

## Opponent

In [7]:
def OpponentFactor(df):
    x = df[(df['Striker'].isin(batting_team_players)) & (df['Bowling team'].str.contains(bowling_team))]
    x = x.groupby(['Match id', 'Striker'])['runs_scored'].sum().reset_index().groupby('Striker')['runs_scored'].mean().reset_index(name='opponent_scores')

    y = df[(df['Bowler'].isin(batting_team_players)) & (df['Batting team'].str.contains(bowling_team))]
    y = y.groupby(['Match id', 'Bowler'])['wicket_confirmation'].sum().reset_index().groupby('Bowler')['wicket_confirmation'].mean().reset_index(name='opponent_scores')

    a = df[(df['Striker'].isin(bowling_team_players)) & (df['Bowling team'].str.contains(batting_team))]
    a = a.groupby(['Match id', 'Striker'])['runs_scored'].sum().reset_index().groupby('Striker')['runs_scored'].mean().reset_index(name='opponent_scores')

    b = df[(df['Bowler'].isin(bowling_team_players)) & (df['Batting team'].str.contains(batting_team))]
    b = b.groupby(['Match id', 'Bowler'])['wicket_confirmation'].sum().reset_index().groupby('Bowler')['wicket_confirmation'].mean().reset_index(name='opponent_scores')

    u = pd.concat([x, a]).sort_values(by='opponent_scores',ascending=False).reset_index(drop=True)
    v = pd.concat([y, b]).sort_values(by='opponent_scores',ascending=False).reset_index(drop=True)
    return u.set_index('Striker'), v.set_index('Bowler')
    
batters_dfs['OpponentFactor'], bowlers_dfs['OpponentFactor'] = OpponentFactor(df.copy())


# Venue

In [8]:
def VenueFactor(df):
    a = df[(df['Striker'].isin(batting_team_players + bowling_team_players)) & df['Venue'].str.contains(stadium)]
    a = a.groupby(['Match id', 'Striker'])['runs_scored'].sum().reset_index(name='venue_runs').groupby('Striker')['venue_runs'].mean().reset_index(name='venue_score')
    a = a.sort_values(by='venue_score', ascending=False).reset_index(drop=True)

    b = df[(df['Bowler'].isin(batting_team_players + bowling_team_players)) & df['Venue'].str.contains(stadium)]
    b = b.groupby(['Match id', 'Bowler'])['wicket_confirmation'].sum().reset_index(name='venue_wickets').groupby('Bowler')['venue_wickets'].mean().reset_index(name='venue_score')
    b = b.sort_values(by='venue_score', ascending=False).reset_index(drop=True)

    return a.set_index('Striker'), b.set_index('Bowler')

batters_dfs['VenueFactor'], bowlers_dfs['VenueFactor'] = VenueFactor(df)


# Form

In [9]:
def FormFactor(df):
    recent_match_ids = df[df['Batting team'].str.contains(batting_team)]['Match id'].unique()[-5:]
    r = df[df['Striker'].isin(batting_team_players) & df['Match id'].isin(recent_match_ids)] 
    r = r.groupby(['Match id', 'Striker'])['runs_scored'].sum().groupby('Striker').mean().reset_index(name='form_score')
    t = df[df['Bowler'].isin(batting_team_players) & df['Match id'].isin(recent_match_ids)] 
    t = t.groupby(['Match id', 'Bowler'])['wicket_confirmation'].sum().groupby('Bowler').mean().reset_index(name='form_score')

    recent_match_ids = df[df['Batting team'].str.contains(bowling_team)]['Match id'].unique()[-5:]
    s = df[df['Striker'].isin(bowling_team_players) & df['Match id'].isin(recent_match_ids)] 
    s = s.groupby(['Match id', 'Striker'])['runs_scored'].sum().groupby('Striker').mean().reset_index(name='form_score')
    u = df[df['Bowler'].isin(bowling_team_players) & df['Match id'].isin(recent_match_ids)] 
    u = u.groupby(['Match id', 'Bowler'])['wicket_confirmation'].sum().groupby('Bowler').mean().reset_index(name='form_score')
    
    a = pd.concat([r, s]).sort_values(by='form_score',ascending=False).reset_index(drop=True)
    b = pd.concat([t, u]).sort_values(by='form_score', ascending=False).reset_index(drop=True)

    return a.set_index('Striker'), b.set_index('Bowler')

batters_dfs['FormFactor'], bowlers_dfs['FormFactor']= FormFactor(df)


# Innings

In [10]:
def InningsFactor(df):
    a = df[(df['Striker'].isin(batting_team_players)) & (df['Innings No']==1)]
    a = a.groupby(['Match id', 'Striker'])['runs_scored'].sum().reset_index(name='first_inning_runs')
    a = a.groupby('Striker')['first_inning_runs'].mean().reset_index(name= 'innings_score')
    a = a.sort_values(by='innings_score', ascending=False)

    b = df[(df['Bowler'].isin(bowling_team_players)) & (df['Innings No']==1)]
    b = b.groupby(['Match id', 'Bowler'])['wicket_confirmation'].sum().reset_index(name='first_inning_wickets')
    b = b.groupby('Bowler')['first_inning_wickets'].mean().reset_index(name= 'innings_score')
    b = b.sort_values(by='innings_score', ascending=False)

    c = df[(df['Striker'].isin(bowling_team_players)) & (df['Innings No']==2)]
    c = c.groupby(['Match id', 'Striker'])['runs_scored'].sum().reset_index(name='second_inning_runs')
    c = c.groupby('Striker')['second_inning_runs'].mean().reset_index(name= 'innings_score')
    c = c.sort_values(by='innings_score', ascending=False)
    
    d = df[(df['Bowler'].isin(batting_team_players)) & (df['Innings No']==2)]
    d = d.groupby(['Match id', 'Bowler'])['wicket_confirmation'].sum().reset_index(name='second_inning_wickets')
    d = d.groupby('Bowler')['second_inning_wickets'].mean().reset_index(name= 'innings_score')
    d = d.sort_values(by='innings_score', ascending=False)

    p = pd.concat([a, c]).sort_values(by='innings_score', ascending=False).reset_index(drop=True)

    q = pd.concat([b, d]).sort_values(by='innings_score', ascending=False).reset_index(drop=True)
    
    return p.set_index("Striker"), q.set_index("Bowler")

batters_dfs['InningsFactor'], bowlers_dfs['InningsFactor']=  InningsFactor(df)

In [11]:
batters_dfs_concatinated = pd.concat(batters_dfs.values(), axis=1).fillna(0)
bowlers_dfs_concatinated = pd.concat(bowlers_dfs.values(), axis=1).fillna(0)

batters_dfs_concatinated['final_batter_score'] = batters_dfs_concatinated['form_score']*0.30 + batters_dfs_concatinated['venue_score']*0.30 + batters_dfs_concatinated['opponent_scores']*0.15 + batters_dfs_concatinated['innings_score']*0.15 + batters_dfs_concatinated['overall_scores']*0.10
bowlers_dfs_concatinated['final_bowler_score'] = bowlers_dfs_concatinated['form_score']*0.30 + bowlers_dfs_concatinated['venue_score']*0.30 + bowlers_dfs_concatinated['opponent_scores']*0.15 + bowlers_dfs_concatinated['innings_score']*0.15 + bowlers_dfs_concatinated['overall_scores']*0.10

result_df = pd.concat([batters_dfs_concatinated[['final_batter_score']], bowlers_dfs_concatinated[['final_bowler_score']]], axis=1)
result_df = result_df.rank(ascending=False).min(axis=1).reset_index(name='Final Rank')
result_df.rename(columns={'index':'Players'}, inplace=True)
result_df.sort_values(by='Final Rank')
# result_df

Unnamed: 0,Players,Final Rank
30,Sandeep Sharma,1.0
2,YBK Jaiswal,1.0
22,JD Unadkat,2.0
5,SV Samson,2.0
31,T Natarajan,3.0
0,JC Buttler,3.0
3,TM Head,4.0
29,YS Chahal,4.0
27,Avesh Khan,5.0
1,H Klaasen,5.0


In [12]:
!pipreqs "C:\Users\Manali Rane\Work Files\IPL Model" --force

INFO: Not scanning for jupyter notebooks.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
INFO: Successfully saved requirements file in C:\Users\Manali Rane\Work Files\IPL Model\requirements.txt
