<font size="20">IPL 2024 PREDICTIONS 🏏🏆

In [8]:
# Required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as tts
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import pickle

# Load data files
deliveries = pd.read_csv("F:/ml project/deliveries.csv")
matches = pd.read_csv("F:/ml project/matches.csv")

# Get target score from first innings data
total_runs = deliveries.groupby(['match_id', 'inning']).sum()['total_runs'].reset_index()
total_runs = total_runs[total_runs['inning'] == 1]

# Merge target score with matches data
matches = matches.merge(total_runs[['match_id', 'total_runs']], left_on='id', right_on='match_id')

# Select relevant columns from matches data
matches = matches[['id', 'city', 'winner', 'toss_winner', 'toss_decision', 'player_of_match', 'total_runs']]
matches.rename(columns={'id': 'match_id', 'total_runs': 'target_runs'}, inplace=True)

# Filter for second innings deliveries
second_innings = deliveries[deliveries['inning'] == 2]

# Merge second innings data with match info to get target
second_innings = second_innings.merge(matches, on='match_id', how='left')

# Calculate score progression in second innings
second_innings['current_score'] = second_innings.groupby('match_id')['total_runs'].cumsum()

# Calculate remaining runs and balls
second_innings['runs_left'] = second_innings['target_runs'] - second_innings['current_score']
second_innings['balls_left'] = 120 - (second_innings['over'] * 6 + second_innings['ball'])

# Update wickets remaining by tracking dismissals
second_innings['player_dismissed'] = second_innings['player_dismissed'].fillna("0")
second_innings['player_dismissed'] = second_innings['player_dismissed'].apply(lambda x: 0 if x == "0" else 1)
second_innings['wickets_fallen'] = second_innings.groupby('match_id')['player_dismissed'].cumsum()
second_innings['wickets_remaining'] = 10 - second_innings['wickets_fallen']

# Remove rows with missing key data
second_innings.dropna(subset=['city', 'winner', 'toss_winner', 'toss_decision', 'player_of_match'], inplace=True)

# Prepare feature data for the models
required_data = second_innings[['batting_team', 'bowling_team', 'city', 'runs_left', 'balls_left', 'wickets_remaining']]
y_match_winner = second_innings['winner']
y_toss_winner = second_innings['toss_winner']
y_toss_decision = second_innings['toss_decision']
y_player_of_match = second_innings['player_of_match']

# Split data for Match Winner model
X_train_mw, X_test_mw, y_train_mw, y_test_mw = tts(required_data, y_match_winner, test_size=0.25, random_state=100)

# Encode categorical features
trf_mw = ColumnTransformer([('trf', OneHotEncoder(drop='first', sparse_output=False), ['batting_team', 'bowling_team', 'city'])], remainder='passthrough')

# Match Winner prediction model pipeline
pipe_mw = Pipeline([
    ('step1', trf_mw),
    ('step2', LogisticRegression(solver='liblinear'))
])

# Train and save the Match Winner model
pipe_mw.fit(X_train_mw, y_train_mw)
pickle.dump(pipe_mw, open('pipe_match_winner.pkl', 'wb'))

# Same process for Toss Winner
X_train_tw, X_test_tw, y_train_tw, y_test_tw = tts(required_data, y_toss_winner, test_size=0.25, random_state=100)

pipe_tw = Pipeline([
    ('step1', trf_mw),
    ('step2', LogisticRegression(solver='liblinear'))
])

pipe_tw.fit(X_train_tw, y_train_tw)
pickle.dump(pipe_tw, open('pipe_toss_winner.pkl', 'wb'))

# Same process for Toss Decision
X_train_td, X_test_td, y_train_td, y_test_td = tts(required_data, y_toss_decision, test_size=0.25, random_state=100)

pipe_td = Pipeline([
    ('step1', trf_mw),
    ('step2', LogisticRegression(solver='liblinear'))
])

pipe_td.fit(X_train_td, y_train_td)
pickle.dump(pipe_td, open('pipe_toss_decision.pkl', 'wb'))

# Same process for Player of the Match
X_train_pm, X_test_pm, y_train_pm, y_test_pm = tts(required_data, y_player_of_match, test_size=0.25, random_state=100)

pipe_pm = Pipeline([
    ('step1', trf_mw),
    ('step2', LogisticRegression(solver='liblinear'))
])

pipe_pm.fit(X_train_pm, y_train_pm)
pickle.dump(pipe_pm, open('pipe_player_of_match.pkl', 'wb'))

# Evaluate accuracy of each model
from sklearn.metrics import accuracy_score

# Accuracy for Match Winner model
y_pred_mw = pipe_mw.predict(X_test_mw)
print(f"Match Winner Accuracy: {accuracy_score(y_test_mw, y_pred_mw) * 100:.2f}%")

# Accuracy for Toss Winner model
y_pred_tw = pipe_tw.predict(X_test_tw)
print(f"Toss Winner Accuracy: {accuracy_score(y_test_tw, y_pred_tw) * 100:.2f}%")

# Accuracy for Toss Decision model
y_pred_td = pipe_td.predict(X_test_td)
print(f"Toss Decision Accuracy: {accuracy_score(y_test_td, y_pred_td) * 100:.2f}%")

# Accuracy for Player of the Match model
y_pred_pm = pipe_pm.predict(X_test_pm)
print(f"Player of the Match Accuracy: {accuracy_score(y_test_pm, y_pred_pm) * 100:.2f}%")


Match Winner Accuracy: 79.06%
Toss Winner Accuracy: 80.57%
Toss Decision Accuracy: 68.99%
Player of the Match Accuracy: 66.32%
