<a href="https://colab.research.google.com/github/avnish22/IPL-2025-Analytics/blob/main/IPL_2025_Analytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# libraries

import pandas as pd
import numpy as np

In [3]:
df_batting = pd.read_csv('/content/batting_modified.csv')

In [4]:
df_bowling = pd.read_csv('/content/bowling_modified.csv')

In [5]:
df_batting.shape

(15, 10)

In [6]:
df_bowling.shape

(15, 9)

In [7]:
# Add mock StdDev_Runs

np.random.seed(42)
df_batting['StdDev_Runs'] = np.random.uniform(12.0, 25.0, size=len(df_batting))

In [13]:
# merge two files

df = pd.merge(df_batting, df_bowling, on='Player_Name', how='outer', suffixes=('_Bat', '_Bowl'))

In [15]:
df.head(1)

Unnamed: 0,Player_Name,Team_Bat,Runs_Bat,Bat_Avg,Bat_SR,4s,6s,50s,Role_Bat,Matches_Bat,StdDev_Runs,Team_Bowl,Matches_Bowl,Wkts,Bowl_Eco,Bowl_Avg,Death_Overs_ER,Role_Bowl,Runs_Bowl
0,Abhishek Sharma,SRH,439.0,33.77,193.39,45.0,28.0,2.0,Opener,14.0,14.363725,,,,,,,,


In [17]:
# clean and finalize columns

df['Runs'] = df['Runs_Bat'].combine_first(df['Runs_Bowl']).fillna(0)
df['Matches'] = df['Matches_Bat'].combine_first(df['Matches_Bowl']).fillna(0)

In [20]:
df['Wkts'] = df['Wkts'].fillna(0)

In [21]:
#Combine Role (Prioritizing Batting Role)
df['Role'] = df['Role_Bat'].combine_first(df['Role_Bowl'])

In [22]:
# Fill remaining NaN values with 0 for metrics/stats
cols_to_fill_zero = df.columns.drop(['Player_Name', 'Role', 'Team_Bat', 'Team_Bowl', 'StdDev_Runs']).tolist()
df[cols_to_fill_zero] = df[cols_to_fill_zero].fillna(0)

In [23]:
df

Unnamed: 0,Player_Name,Team_Bat,Runs_Bat,Bat_Avg,Bat_SR,4s,6s,50s,Role_Bat,Matches_Bat,...,Matches_Bowl,Wkts,Bowl_Eco,Bowl_Avg,Death_Overs_ER,Role_Bowl,Runs_Bowl,Runs,Matches,Role
0,Abhishek Sharma,SRH,439.0,33.77,193.39,45.0,28.0,2.0,Opener,14.0,...,0.0,0.0,0.0,0.0,0.0,0,0.0,439.0,14.0,Opener
1,Ajinkya Rahane,KKR,289.0,36.12,145.5,32.0,10.0,2.0,Top Order,13.0,...,0.0,0.0,0.0,0.0,0.0,0,0.0,289.0,13.0,Top Order
2,Arshdeep Singh,,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,17.0,21.0,8.88,24.66,9.65,Left-arm Pacer,20.0,20.0,17.0,Left-arm Pacer
3,Ashutosh Sharma,PBKS,210.0,42.0,198.11,10.0,22.0,0.0,Finisher,13.0,...,0.0,0.0,0.0,0.0,0.0,0,0.0,210.0,13.0,Finisher
4,Axar Patel,,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,14.0,16.0,8.0,25.0,8.0,All-Rounder,263.0,263.0,14.0,All-Rounder
5,Deepak Chahar,,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,10.0,12.0,8.55,26.0,0.0,Right arm pace,35.0,35.0,10.0,Right arm pace
6,Hardik Pandya,,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,14.0,15.0,9.5,22.8,10.2,All-Rounder,224.0,224.0,14.0,All-Rounder
7,Harshal Patel,,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,13.0,16.0,9.8,25.0,10.8,Right arm pace,5.0,5.0,13.0,Right arm pace
8,Jasprit Bumrah,,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,12.0,18.0,6.67,17.55,6.48,Right arm pace,0.0,0.0,12.0,Right arm pace
9,KL Rahul,DC,539.0,53.9,149.72,45.0,18.0,3.0,WK-Batter,13.0,...,0.0,0.0,0.0,0.0,0.0,0,0.0,539.0,13.0,WK-Batter


In [24]:
# --- 2. Phase 2: Metric Generation ---
print("Step 2: Calculating Core Metrics...")
epsilon = 0.001

Step 2: Calculating Core Metrics...


In [25]:
# 1. Batting Metrics
# Consistency Score = Avg / StdDev (StdDev is mock data)
df['Consistency_Score'] = df['Bat_Avg'] / (df['StdDev_Runs'].fillna(df['Bat_Avg']) + epsilon)
# Batting Value = Avg * sqrt(SR)
df['Batting_Value'] = df['Bat_Avg'] * np.sqrt(df['Bat_SR'].fillna(0))

In [27]:
# 2. Bowling Metrics
# Control Score = 100 / Death_Overs_ER
df['Control_Score'] = 100 / (df['Death_Overs_ER'].replace(0, np.inf).fillna(np.inf) + epsilon)
# Wicket Threat = Wkts / Matches (Matches is the correct, updated data)
df['Wicket_Threat'] = df['Wkts'] / (df['Matches'].replace(0, np.inf).fillna(np.inf) + epsilon)

In [29]:
# 3. Normalization and PIR Calculation

print("calculating PIR......")

calculating PIR......


In [30]:
metrics_cols = ['Consistency_Score', 'Batting_Value', 'Control_Score', 'Wicket_Threat']

In [31]:
def normalize_column(series):
  min_val, max_val = series.min(), series.max()
  if max_val == min_val: return series
  return (series - min_val) / (max_val - min_val)

In [33]:
for col in metrics_cols:
  df[f'Norm_{col}'] = normalize_column(df[col])


In [35]:
# Calculate PIRs (Weight: 60% Consistency/Control, 40% Value/Threat)
df['Batting_PIR'] = (0.6 * df['Norm_Consistency_Score']) + (0.4 * df['Norm_Batting_Value'])
df['Bowling_PIR'] = (0.6 * df['Norm_Control_Score']) + (0.4 * df['Norm_Wicket_Threat'])
df['Final_PIR'] = df[['Batting_PIR', 'Bowling_PIR']].max(axis=1)

In [36]:
# All-Rounder Rule: PIR is the average of Batting PIR and Bowling PIR
is_all_rounder = (df['Runs'] > 0) & (df['Wkts'] > 0)
df.loc[is_all_rounder, 'Final_PIR'] = (df['Batting_PIR'] + df['Bowling_PIR']) / 2

In [38]:
# ---- final output
final_columns = ['Player_Name', 'Role', 'Runs', 'Wkts', 'Matches', 'Final_PIR','Norm_Consistency_Score', 'Norm_Batting_Value', 'Norm_Control_Score', 'Norm_Wicket_Threat']
df_final = df[final_columns].sort_values(by='Final_PIR', ascending=False)
csv_file_path = 'IPL_2026_Selector_Metrics_Final.csv'
df_final.to_csv(csv_file_path, index=False)

In [40]:
print("\n--- Final Player Impact Rating (PIR) Rank (Top 10) ---")
print(df_final.head(10))
print(f"\n✅ All analysis complete. Final file saved: {csv_file_path}")


--- Final Player Impact Rating (PIR) Rank (Top 10) ---
         Player_Name            Role   Runs  Wkts  Matches  Final_PIR  \
8     Jasprit Bumrah  Right arm pace    0.0  18.0     12.0   0.959994   
9           KL Rahul       WK-Batter  539.0   0.0     13.0   0.912346   
18     Sai Sudharsan       Top Order  759.0   0.0     15.0   0.822661   
23  Suryakumar Yadav    Middle Order  717.0   0.0     16.0   0.817849   
24       T Natarajan  Left-arm Pacer    0.0  19.0     12.0   0.812012   
28  Yashasvi Jaiswal          Opener  559.0   0.0     14.0   0.736014   
3    Ashutosh Sharma        Finisher  210.0   0.0     13.0   0.724302   
15       Rinku Singh        Finisher  310.0   0.0     13.0   0.719503   
21      Shreyas Iyer    Middle Order  604.0   0.0     17.0   0.712676   
29  Yuzvendra Chahal         Spinner    0.0  16.0     14.0   0.661171   

    Norm_Consistency_Score  Norm_Batting_Value  Norm_Control_Score  \
8                 0.000000            0.000000            1.000000   
