#  Project -- IPL Winner Prediction

In [6]:
# importing all libraries
import numpy as np
import pandas as pd

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

import pickle

**Data Collection**

In [7]:
match=pd.read_csv('../data/matches.csv')
delivery=pd.read_csv('../data/deliveries.csv')

In [8]:
match.head()

Unnamed: 0,id,Season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,IPL-2017,Hyderabad,05-04-2017,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,2,IPL-2017,Pune,06-04-2017,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,
2,3,IPL-2017,Rajkot,07-04-2017,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,
3,4,IPL-2017,Indore,08-04-2017,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,
4,5,IPL-2017,Bangalore,08-04-2017,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,


In [9]:
delivery.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,4,0,4,,,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,2,2,,,


**Feature Extraction & Selection**

In [10]:
total_score_df=delivery.groupby(['match_id','inning']).sum()['total_runs'].reset_index()

In [11]:
total_score_df=total_score_df[total_score_df['inning']==1]

In [12]:
total_score_df

Unnamed: 0,match_id,inning,total_runs
0,1,1,207
2,2,1,184
4,3,1,183
6,4,1,163
8,5,1,157
...,...,...,...
1518,11347,1,143
1520,11412,1,136
1522,11413,1,171
1524,11414,1,155


In [13]:
match_df=match.merge(total_score_df[['match_id','total_runs']],left_on='id',right_on='match_id')

In [14]:
match_df['team1'].unique()

array(['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',
       'Rising Pune Supergiant', 'Royal Challengers Bangalore',
       'Kolkata Knight Riders', 'Delhi Daredevils', 'Kings XI Punjab',
       'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Rising Pune Supergiants',
       'Delhi Capitals'], dtype=object)

In [15]:
teams=[
    'Kolkata Knight Riders',
    'Sunrisers Hyderabad',
    'Mumbai Indians',
    'Gujarat Titans',
    'Royal Challengers Bangalore',
    'Kings XI Punjab',
    'Chennai Super Kings',
    'Rajasthan Royals',
    'Lucknow Supergiants',
    'Delhi Capitals'
]

In [16]:
match_df['team1']=match_df['team1'].str.replace('Delhi Daredevils','Delhi Capitals')
match_df['team2']=match_df['team2'].str.replace('Delhi Daredevils','Delhi Capitals')

match_df['team1']=match_df['team1'].str.replace('Deccan Chargers','Sunrisers Hyderabad')
match_df['team2']=match_df['team2'].str.replace('Deccan Chargers','Sunrisers Hyderabad')

match_df['team1']=match_df['team1'].str.replace('Gujarat Lions','Gujarat Titans')
match_df['team2']=match_df['team2'].str.replace('Gujarat Lions','Gujarat Titans')

match_df['team1']=match_df['team1'].str.replace('Rising Pune Supergiants','Lucknow Supergiants')
match_df['team2']=match_df['team2'].str.replace('Rising Pune Supergiants','Lucknow Supergiants')

In [17]:
match_df=match_df[match_df['team1'].isin(teams)]
match_df=match_df[match_df['team2'].isin(teams)]

match_df=match_df[match_df['dl_applied']==0]

In [18]:
match_df['team2'].unique()

array(['Royal Challengers Bangalore', 'Kolkata Knight Riders',
       'Delhi Capitals', 'Sunrisers Hyderabad', 'Mumbai Indians',
       'Kings XI Punjab', 'Gujarat Titans', 'Rajasthan Royals',
       'Chennai Super Kings', 'Lucknow Supergiants'], dtype=object)

In [19]:
match_df.shape

(663, 20)

In [20]:
match_df=match_df[['match_id','city','winner','total_runs']] 

In [21]:
delivery_df=match_df.merge(delivery,on='match_id')

In [22]:
delivery_df=delivery_df[delivery_df['inning']==2]

In [23]:
delivery_df

Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs_y,player_dismissed,dismissal_kind,fielder
125,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,1,CH Gayle,...,0,0,0,0,1,0,1,,,
126,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,2,Mandeep Singh,...,0,0,0,0,0,0,0,,,
127,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,3,Mandeep Singh,...,0,0,0,0,0,0,0,,,
128,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,4,Mandeep Singh,...,0,0,0,0,2,0,2,,,
129,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,5,Mandeep Singh,...,0,0,0,0,4,0,4,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158383,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,2,RA Jadeja,...,0,0,0,0,1,0,1,,,
158384,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,3,SR Watson,...,0,0,0,0,2,0,2,,,
158385,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,4,SR Watson,...,0,0,0,0,1,0,1,SR Watson,run out,KH Pandya
158386,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,5,SN Thakur,...,0,0,0,0,2,0,2,,,


In [24]:
delivery_df['current_score']=delivery_df.groupby('match_id')['total_runs_y'].cumsum()

In [25]:
delivery_df['runs_left']=delivery_df['total_runs_x']-delivery_df['current_score']+1

In [26]:
delivery_df['balls_left']=126-(delivery_df['over']*6+delivery_df['ball'])

In [27]:
# wickets left
delivery_df['player_dismissed']=delivery_df['player_dismissed'].fillna("0")
delivery_df['player_dismissed']=delivery_df['player_dismissed'].apply(lambda x:x if x=="0" else "1")
delivery_df['player_dismissed']=delivery_df['player_dismissed'].astype('int')
wickets=delivery_df.groupby('match_id')['player_dismissed'].cumsum().values
delivery_df['wickets_left']=10-wickets

In [28]:
delivery_df.tail()

Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,batsman_runs,extra_runs,total_runs_y,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left,wickets_left
158383,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,2,RA Jadeja,...,1,0,1,0,,,152,1,4,5
158384,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,3,SR Watson,...,2,0,2,0,,,154,-1,3,5
158385,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,4,SR Watson,...,1,0,1,1,run out,KH Pandya,155,-2,2,4
158386,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,5,SN Thakur,...,2,0,2,0,,,157,-4,1,4
158387,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,6,SN Thakur,...,0,0,0,1,lbw,,157,-4,0,3


In [29]:
delivery_df['curr_rr']=(delivery_df['current_score']*6)/(120-delivery_df['balls_left'])

In [30]:
delivery_df['required_rr']=(delivery_df['runs_left']*6)/(delivery_df['balls_left'])

In [31]:
def result(df):
    return 1 if df['batting_team']==df['winner'] else 0

In [32]:
delivery_df['result']=delivery_df.apply(result,axis=1)

In [33]:
delivery_df.head()

Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left,wickets_left,curr_rr,required_rr,result
125,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,1,CH Gayle,...,0,,,1,207,119,10,6.0,10.436975,0
126,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,2,Mandeep Singh,...,0,,,1,207,118,10,3.0,10.525424,0
127,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,3,Mandeep Singh,...,0,,,1,207,117,10,2.0,10.615385,0
128,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,4,Mandeep Singh,...,0,,,3,205,116,10,4.5,10.603448,0
129,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,5,Mandeep Singh,...,0,,,7,201,115,10,8.4,10.486957,0


In [34]:
dataset=delivery_df[['batting_team','bowling_team','city','runs_left','balls_left','wickets_left','total_runs_x','curr_rr','required_rr','result']]

In [35]:
dataset['batting_team']=dataset['batting_team'].str.replace('Delhi Daredevils','Delhi Capitals')
dataset['bowling_team']=dataset['bowling_team'].str.replace('Delhi Daredevils','Delhi Capitals')

dataset['batting_team']=dataset['batting_team'].str.replace('Deccan Chargers','Sunrisers Hyderabad')
dataset['bowling_team']=dataset['bowling_team'].str.replace('Deccan Chargers','Sunrisers Hyderabad')

dataset['batting_team']=dataset['batting_team'].str.replace('Gujarat Lions','Gujarat Titans')
dataset['bowling_team']=dataset['bowling_team'].str.replace('Gujarat Lions','Gujarat Titans')

dataset['batting_team']=dataset['batting_team'].str.replace('Rising Pune Supergiants','Lucknow Supergiants')
dataset['bowling_team']=dataset['bowling_team'].str.replace('Rising Pune Supergiants','Lucknow Supergiants')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['batting_team']=dataset['batting_team'].str.replace('Delhi Daredevils','Delhi Capitals')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['bowling_team']=dataset['bowling_team'].str.replace('Delhi Daredevils','Delhi Capitals')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['ba

In [36]:
dataset=dataset[dataset['batting_team'].isin(teams)]
dataset=dataset[dataset['bowling_team'].isin(teams)]

In [37]:
dataset['batting_team'].unique()

array(['Royal Challengers Bangalore', 'Kolkata Knight Riders',
       'Delhi Capitals', 'Sunrisers Hyderabad', 'Mumbai Indians',
       'Kings XI Punjab', 'Gujarat Titans', 'Rajasthan Royals',
       'Chennai Super Kings', 'Lucknow Supergiants'], dtype=object)

In [38]:
dataset['city'].unique()

array(['Hyderabad', 'Rajkot', 'Bangalore', 'Mumbai', 'Indore', 'Kolkata',
       'Delhi', 'Chandigarh', 'Kanpur', 'Jaipur', 'Chennai', 'Cape Town',
       'Port Elizabeth', 'Durban', 'Centurion', 'East London',
       'Johannesburg', 'Kimberley', 'Bloemfontein', 'Ahmedabad',
       'Cuttack', 'Nagpur', 'Dharamsala', 'Visakhapatnam', 'Pune',
       'Raipur', 'Ranchi', 'Abu Dhabi', 'Sharjah', nan, 'Mohali',
       'Bengaluru'], dtype=object)

In [39]:
dataset.head() # final dataset

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets_left,total_runs_x,curr_rr,required_rr,result
125,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,207,119,10,207,6.0,10.436975,0
126,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,207,118,10,207,3.0,10.525424,0
127,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,207,117,10,207,2.0,10.615385,0
128,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,205,116,10,207,4.5,10.603448,0
129,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,201,115,10,207,8.4,10.486957,0


In [40]:
df=dataset.sample(dataset.shape[0])

In [41]:
df.sample()

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets_left,total_runs_x,curr_rr,required_rr,result
21515,Sunrisers Hyderabad,Royal Challengers Bangalore,Bangalore,148,112,10,156,6.75,7.928571,0


In [42]:
df.shape

(76636, 10)

In [43]:
df.dropna(inplace=True)
df=df[df['balls_left']!=0]

**Model Training**

In [44]:
X=df.iloc[:,:-1]
Y=df.iloc[:,-1]

In [45]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=1)

In [46]:
X_train

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets_left,total_runs_x,curr_rr,required_rr
16216,Mumbai Indians,Rajasthan Royals,Mumbai,32,50,7,103,6.171429,3.840000
83733,Sunrisers Hyderabad,Delhi Capitals,Hyderabad,48,91,9,80,6.827586,3.164835
66772,Rajasthan Royals,Royal Challengers Bangalore,Jaipur,160,98,10,189,8.181818,9.795918
45115,Mumbai Indians,Chennai Super Kings,Chennai,93,53,7,165,6.537313,10.528302
40690,Kings XI Punjab,Rajasthan Royals,Chandigarh,103,79,9,183,11.853659,7.822785
...,...,...,...,...,...,...,...,...,...
134628,Sunrisers Hyderabad,Chennai Super Kings,Hyderabad,44,19,6,186,8.495050,13.894737
117253,Kolkata Knight Riders,Delhi Capitals,Kolkata,90,112,10,98,6.750000,4.821429
52473,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,148,88,7,175,5.250000,10.090909
56161,Sunrisers Hyderabad,Chennai Super Kings,Chennai,33,13,6,165,7.457944,15.230769


In [47]:
# One hot encoding
trf = ColumnTransformer(transformers=[
    ('trf',OneHotEncoder(sparse_output=False,drop='first'),['batting_team','bowling_team','city'])
]
,remainder='passthrough')

In [48]:
# creating pipeline
pipe = Pipeline(steps=[
    ('step1',trf),
    ('step2',LogisticRegression(solver='liblinear'))
])

In [49]:
pipe.fit(X_train,Y_train)

In [50]:
y_pred = pipe.predict(X_test)

In [51]:
accuracy_score(Y_test,y_pred)

0.8045794454370988

In [52]:
pipe.predict_proba(X_test)[10]

array([0.29217136, 0.70782864])

**Saving The Model**

In [53]:
pickle.dump(pipe,open('pipe.pkl','wb'))

In [54]:
pickle.load(open('pipe.pkl','rb'))

**Results**

In [57]:
batting_team=input("Enter batting team-->")
bowling_team=input("Enter bowling team-->")
selected_city=input("Enter city-->")
Target=int(input("Enter Target-->"))
Score=int(input("Enter Score-->"))
Overs=int(input("Enter Over-->"))
Wickets=int(input("Enter Wicket-->"))


runs_left=Target-Score
balls_left=120-(Overs*6)
wickets_left=10-Wickets
curr_rr=Score/Overs
required_rr=(runs_left*6)/balls_left

input_df=pd.DataFrame({'batting_team':[batting_team],'bowling_team':[bowling_team],
                          'city':[selected_city],'runs_left':[runs_left],'balls_left':[balls_left],'wickets_left':[wickets_left],
                          'total_runs_x':[Target],'curr_rr':[curr_rr],'required_rr':[required_rr]})

In [58]:
result=pipe.predict_proba(input_df)
loss=result[0][0]
win=result[0][1]
print(batting_team+"-"+str(round(win*100))+ "%")
print(bowling_team + "-" + str(round(loss*100)) + "%")

Delhi Capitals-94%
Chennai Super Kings-6%
