In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import pickle

In [None]:
match = pd.read_csv('/content/matches.csv')
delivery = pd.read_csv('/content/deliveries.csv')
home_team = pd.read_csv('/content/teamwise_home_and_away.csv')

In [None]:
match.head(4)

Unnamed: 0,id,Season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,IPL-2017,Hyderabad,05-04-2017,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,2,IPL-2017,Pune,06-04-2017,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,
2,3,IPL-2017,Rajkot,07-04-2017,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,
3,4,IPL-2017,Indore,08-04-2017,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,


In [None]:
delivery.head(4)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1.0,1.0,DA Warner,S Dhawan,TS Mills,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1.0,2.0,DA Warner,S Dhawan,TS Mills,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1.0,3.0,DA Warner,S Dhawan,TS Mills,0.0,...,0.0,0.0,0.0,0.0,4.0,0.0,4.0,,,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1.0,4.0,DA Warner,S Dhawan,TS Mills,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,


In [None]:
delivery.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batsman', 'non_striker', 'bowler', 'is_super_over', 'wide_runs',
       'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder'],
      dtype='object')

In [None]:
home_team.head(4)

Unnamed: 0,team,home_wins,away_wins,home_matches,away_matches,home_win_percentage,away_win_percentage
0,Rising Pune Supergiant,5,5,8,8,62.5,62.5
1,Mumbai Indians,58,51,101,86,57.425743,59.302326
2,Chennai Super Kings,51,49,89,75,57.303371,65.333333
3,Delhi Capitals,3,7,6,10,50.0,70.0


In [None]:
match.shape

(756, 18)

In [None]:
delivery.shape

(31012, 21)

In [None]:
total_score_df = delivery.groupby(['match_id','inning']).sum()['total_runs'].reset_index()
total_score_df = total_score_df[total_score_df['inning'] == 1]
total_score_df.rename(columns = {'total_runs':'Target'}, inplace = True)
total_score_df['Target'] = total_score_df['Target'].apply(lambda x: x+1)

In [None]:
total_score_df

Unnamed: 0,match_id,inning,Target
0,1,1,208.0
2,2,1,185.0
4,3,1,184.0
6,4,1,164.0
8,5,1,158.0
...,...,...,...
262,130,1,140.0
264,131,1,166.0
266,132,1,188.0
268,133,1,144.0


In [None]:
match_df = match.merge(total_score_df[['match_id','Target']],left_on='id',right_on='match_id')

In [None]:
match_df

Unnamed: 0,id,Season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3,match_id,Target
0,1,IPL-2017,Hyderabad,05-04-2017,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,208.0
1,2,IPL-2017,Pune,06-04-2017,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,,2,185.0
2,3,IPL-2017,Rajkot,07-04-2017,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,,3,184.0
3,4,IPL-2017,Indore,08-04-2017,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,,4,164.0
4,5,IPL-2017,Bangalore,08-04-2017,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,,5,158.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,130,IPL-2009,Cape Town,26-04-2009,Kings XI Punjab,Rajasthan Royals,Kings XI Punjab,bat,normal,0,Kings XI Punjab,27,0,KC Sangakkara,Newlands,M Erasmus,K Hariharan,,130,140.0
130,131,IPL-2009,Durban,27-04-2009,Chennai Super Kings,Deccan Chargers,Deccan Chargers,field,normal,0,Deccan Chargers,0,6,HH Gibbs,Kingsmead,IL Howell,TH Wijewardene,,131,166.0
131,132,IPL-2009,Port Elizabeth,27-04-2009,Mumbai Indians,Kolkata Knight Riders,Mumbai Indians,bat,normal,0,Mumbai Indians,92,0,SR Tendulkar,St George's Park,BG Jerling,RB Tiffin,,132,188.0
132,133,IPL-2009,Centurion,28-04-2009,Delhi Daredevils,Rajasthan Royals,Delhi Daredevils,bat,normal,0,Rajasthan Royals,0,5,YK Pathan,SuperSport Park,GAV Baxter,RE Koertzen,,133,144.0


In [None]:
match_df['team1'].unique()

array(['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',
       'Rising Pune Supergiant', 'Royal Challengers Bangalore',
       'Kolkata Knight Riders', 'Delhi Daredevils', 'Kings XI Punjab',
       'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers'],
      dtype=object)

In [None]:
teams = [
    'Sunrisers Hyderabad',
    'Mumbai Indians',
    'Royal Challengers Bangalore',
    'Kolkata Knight Riders',
    'Kings XI Punjab',
    'Chennai Super Kings',
    'Rajasthan Royals',
    'Delhi Capitals'
]

In [None]:
match_df['team1'] = match_df['team1'].str.replace('Delhi Daredevils','Delhi Capitals')
match_df['team2'] = match_df['team2'].str.replace('Delhi Daredevils','Delhi Capitals')

match_df['team1'] = match_df['team1'].str.replace('Deccan Chargers','Sunrisers Hyderabad')
match_df['team2'] = match_df['team2'].str.replace('Deccan Chargers','Sunrisers Hyderabad')

match_df['toss_winner'] = match_df['toss_winner'].str.replace('Delhi Daredevils','Delhi Capitals')
match_df['toss_winner'] = match_df['toss_winner'].str.replace('Deccan Chargers','Sunrisers Hyderabad')


In [None]:
match_df

Unnamed: 0,id,Season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3,match_id,Target
0,1,IPL-2017,Hyderabad,05-04-2017,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,208.0
1,2,IPL-2017,Pune,06-04-2017,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,,2,185.0
2,3,IPL-2017,Rajkot,07-04-2017,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,,3,184.0
3,4,IPL-2017,Indore,08-04-2017,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,,4,164.0
4,5,IPL-2017,Bangalore,08-04-2017,Royal Challengers Bangalore,Delhi Capitals,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,,5,158.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,130,IPL-2009,Cape Town,26-04-2009,Kings XI Punjab,Rajasthan Royals,Kings XI Punjab,bat,normal,0,Kings XI Punjab,27,0,KC Sangakkara,Newlands,M Erasmus,K Hariharan,,130,140.0
130,131,IPL-2009,Durban,27-04-2009,Chennai Super Kings,Sunrisers Hyderabad,Sunrisers Hyderabad,field,normal,0,Deccan Chargers,0,6,HH Gibbs,Kingsmead,IL Howell,TH Wijewardene,,131,166.0
131,132,IPL-2009,Port Elizabeth,27-04-2009,Mumbai Indians,Kolkata Knight Riders,Mumbai Indians,bat,normal,0,Mumbai Indians,92,0,SR Tendulkar,St George's Park,BG Jerling,RB Tiffin,,132,188.0
132,133,IPL-2009,Centurion,28-04-2009,Delhi Capitals,Rajasthan Royals,Delhi Capitals,bat,normal,0,Rajasthan Royals,0,5,YK Pathan,SuperSport Park,GAV Baxter,RE Koertzen,,133,144.0


In [None]:
match_df = match_df[match_df['team1'].isin(teams)]
match_df = match_df[match_df['team2'].isin(teams)]

In [None]:
match_df = match_df[match_df['dl_applied'] == 0]

In [None]:
match_df = match_df[['match_id','city','winner','Target','toss_winner', 'toss_decision']]

In [None]:
match_df.shape

(101, 6)

In [None]:
delivery_df = match_df.merge(delivery,on='match_id')

In [None]:
delivery_df.batting_team.value_counts()

Royal Challengers Bangalore    3296
Mumbai Indians                 3258
Kolkata Knight Riders          3086
Kings XI Punjab                3075
Delhi Daredevils               3009
Rajasthan Royals               2333
Chennai Super Kings            2322
Deccan Chargers                2115
Sunrisers Hyderabad            1105
Name: batting_team, dtype: int64

In [None]:
delivery_df['batting_team'] = delivery_df['batting_team'].str.replace('Delhi Daredevils','Delhi Capitals')
delivery_df['bowling_team'] = delivery_df['bowling_team'].str.replace('Delhi Daredevils','Delhi Capitals')

delivery_df['batting_team'] = delivery_df['batting_team'].str.replace('Deccan Chargers','Sunrisers Hyderabad')
delivery_df['bowling_team'] = delivery_df['bowling_team'].str.replace('Deccan Chargers','Sunrisers Hyderabad')

In [None]:
delivery_df = delivery_df[delivery_df['inning'] == 2]

In [None]:
delivery_df.shape

(11247, 26)

In [None]:
delivery_df['current_score'] = delivery_df.groupby('match_id').cumsum()['total_runs']

In [None]:
delivery_df['runs_left'] = delivery_df['Target'] - delivery_df['current_score']

In [None]:
delivery_df['balls_left'] = 120 - ((delivery_df['over']-1)*6 + delivery_df['ball'])

In [None]:
delivery_df

Unnamed: 0,match_id,city,winner,Target,toss_winner,toss_decision,inning,batting_team,bowling_team,over,...,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left
125,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,0.0,1.0,0.0,1.0,,,,1.0,207.0,119.0
126,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,0.0,0.0,0.0,0.0,,,,1.0,207.0,118.0
127,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,0.0,0.0,0.0,0.0,,,,1.0,207.0,117.0
128,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,0.0,2.0,0.0,2.0,,,,3.0,205.0,116.0
129,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,0.0,4.0,0.0,4.0,,,,7.0,201.0,115.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23572,133,Centurion,Rajasthan Royals,144.0,Delhi Capitals,bat,2,Rajasthan Royals,Delhi Capitals,18.0,...,0.0,6.0,0.0,6.0,,,,140.0,4.0,13.0
23573,133,Centurion,Rajasthan Royals,144.0,Delhi Capitals,bat,2,Rajasthan Royals,Delhi Capitals,18.0,...,0.0,1.0,0.0,1.0,,,,141.0,3.0,12.0
23574,133,Centurion,Rajasthan Royals,144.0,Delhi Capitals,bat,2,Rajasthan Royals,Delhi Capitals,19.0,...,0.0,0.0,0.0,0.0,,,,141.0,3.0,11.0
23575,133,Centurion,Rajasthan Royals,144.0,Delhi Capitals,bat,2,Rajasthan Royals,Delhi Capitals,19.0,...,0.0,2.0,0.0,2.0,,,,143.0,1.0,10.0


In [None]:
delivery_df['player_dismissed'] = delivery_df['player_dismissed'].fillna(0)
delivery_df['player_dismissed'] = delivery_df['player_dismissed'].apply(lambda x:x if x == 0 else 1)
delivery_df['player_dismissed'] = delivery_df['player_dismissed'].astype('int')
wickets = delivery_df.groupby('match_id').cumsum()['player_dismissed'].values
delivery_df['fallen_wickets'] = wickets
delivery_df.head()

Unnamed: 0,match_id,city,winner,Target,toss_winner,toss_decision,inning,batting_team,bowling_team,over,...,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left,fallen_wickets
125,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,1.0,0.0,1.0,0,,,1.0,207.0,119.0,0
126,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,0.0,0.0,0.0,0,,,1.0,207.0,118.0,0
127,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,0.0,0.0,0.0,0,,,1.0,207.0,117.0,0
128,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,2.0,0.0,2.0,0,,,3.0,205.0,116.0,0
129,1,Hyderabad,Sunrisers Hyderabad,208.0,Royal Challengers Bangalore,field,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1.0,...,4.0,0.0,4.0,0,,,7.0,201.0,115.0,0


In [None]:
delivery_df.shape

(11247, 30)

In [None]:
delivery_df['crr'] = (delivery_df['current_score']*6)/(120 - delivery_df['balls_left'])
delivery_df['rrr'] = (delivery_df['runs_left']*6)/delivery_df['balls_left']

In [None]:
def result(row):
    return 1 if row['batting_team'] == row['winner'] else 0

delivery_df['result'] = delivery_df.apply(result,axis=1)

In [None]:
final_df = delivery_df[['batting_team','bowling_team','city','toss_winner','toss_decision','runs_left','balls_left','fallen_wickets','Target','crr','rrr','result']]

In [None]:
final_df.batting_team.value_counts()

Mumbai Indians                 1753
Delhi Capitals                 1666
Kolkata Knight Riders          1502
Rajasthan Royals               1479
Kings XI Punjab                1445
Royal Challengers Bangalore    1331
Sunrisers Hyderabad            1122
Chennai Super Kings             949
Name: batting_team, dtype: int64

In [None]:
final_df

Unnamed: 0,batting_team,bowling_team,city,toss_winner,toss_decision,runs_left,balls_left,fallen_wickets,Target,crr,rrr,result
125,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,Royal Challengers Bangalore,field,207.0,119.0,0,208.0,6.000000,10.436975,0
126,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,Royal Challengers Bangalore,field,207.0,118.0,0,208.0,3.000000,10.525424,0
127,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,Royal Challengers Bangalore,field,207.0,117.0,0,208.0,2.000000,10.615385,0
128,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,Royal Challengers Bangalore,field,205.0,116.0,0,208.0,4.500000,10.603448,0
129,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,Royal Challengers Bangalore,field,201.0,115.0,0,208.0,8.400000,10.486957,0
...,...,...,...,...,...,...,...,...,...,...,...,...
23572,Rajasthan Royals,Delhi Capitals,Centurion,Delhi Capitals,bat,4.0,13.0,5,144.0,7.850467,1.846154,1
23573,Rajasthan Royals,Delhi Capitals,Centurion,Delhi Capitals,bat,3.0,12.0,5,144.0,7.833333,1.500000,1
23574,Rajasthan Royals,Delhi Capitals,Centurion,Delhi Capitals,bat,3.0,11.0,5,144.0,7.761468,1.636364,1
23575,Rajasthan Royals,Delhi Capitals,Centurion,Delhi Capitals,bat,1.0,10.0,5,144.0,7.800000,0.600000,1


In [None]:
final_df = final_df.sample(final_df.shape[0])

In [None]:
home_team

Unnamed: 0,team,home_wins,away_wins,home_matches,away_matches,home_win_percentage,away_win_percentage
0,Rising Pune Supergiant,5,5,8,8,62.5,62.5
1,Mumbai Indians,58,51,101,86,57.425743,59.302326
2,Chennai Super Kings,51,49,89,75,57.303371,65.333333
3,Delhi Capitals,3,7,6,10,50.0,70.0
4,Sunrisers Hyderabad,30,28,63,45,47.619048,62.222222
5,Rajasthan Royals,29,46,67,80,43.283582,57.5
6,Deccan Chargers,18,11,43,32,41.860465,34.375
7,Kings XI Punjab,38,44,91,85,41.758242,51.764706
8,Royal Challengers Bangalore,35,49,85,95,41.176471,51.578947
9,Kolkata Knight Riders,34,58,83,95,40.963855,61.052632


In [None]:
home_team.iloc[3]['home_win_percentage'] += ((home_team.iloc[3]['home_wins'] + home_team.iloc[10]['home_wins'])*100)/(home_team.iloc[3]['home_matches'] + home_team.iloc[10]['home_matches'])
home_team.iloc[3]['away_win_percentage'] += ((home_team.iloc[3]['away_wins'] + home_team.iloc[10]['away_wins'])*100)/(home_team.iloc[3]['away_matches'] + home_team.iloc[10]['away_matches'])


home_team.iloc[4]['home_win_percentage'] += ((home_team.iloc[4]['home_wins'] + home_team.iloc[6]['home_wins'])*100)/(home_team.iloc[4]['home_matches'] + home_team.iloc[6]['home_matches'])
home_team.iloc[4]['away_win_percentage'] += ((home_team.iloc[4]['away_wins'] + home_team.iloc[6]['away_wins'])*100)/(home_team.iloc[4]['away_matches'] + home_team.iloc[6]['away_matches'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cacher_needs_updating = self._check_is_chained_assignment_possible()


In [None]:
home_team = home_team[home_team['team'].isin(teams)]

In [None]:
home_team.columns

Index(['team', 'home_wins', 'away_wins', 'home_matches', 'away_matches',
       'home_win_percentage', 'away_win_percentage'],
      dtype='object')

In [None]:
home_team.reset_index()

Unnamed: 0,index,team,home_wins,away_wins,home_matches,away_matches,home_win_percentage,away_win_percentage
0,1,Mumbai Indians,58,51,101,86,57.425743,59.302326
1,2,Chennai Super Kings,51,49,89,75,57.303371,65.333333
2,3,Delhi Capitals,3,7,6,10,50.0,70.0
3,4,Sunrisers Hyderabad,30,28,63,45,47.619048,62.222222
4,5,Rajasthan Royals,29,46,67,80,43.283582,57.5
5,7,Kings XI Punjab,38,44,91,85,41.758242,51.764706
6,8,Royal Challengers Bangalore,35,49,85,95,41.176471,51.578947
7,9,Kolkata Knight Riders,34,58,83,95,40.963855,61.052632


In [None]:
team_city = {
    'Sunrisers Hyderabad' : 'Hyderabad',
    'Mumbai Indians' : 'Mumbai',
    'Royal Challengers Bangalore' : 'Bangalore',
    'Kolkata Knight Riders' : 'Kolkata',
    'Kings XI Punjab' : 'Chandigarh',
    'Chennai Super Kings' : 'Chennai',
    'Rajasthan Royals' : 'Jaipur',
    'Delhi Capitals' : 'Delhi'
}

In [None]:
def team_city_fix(row):
  t1 = team_city[row['batting_team']]
  t2 = team_city[row['bowling_team']]
  if row['city'] == t1:
     return 1
  elif row['city'] == t2:
    return 0
  else:
    return 2

In [None]:
final_df['Home_team'] = 0

In [None]:
final_df['Home_team'] = final_df.apply(team_city_fix,axis=1)

In [None]:
final_df.Home_team

22063    2
10721    0
17445    1
11434    0
10710    0
        ..
7487     1
3772     1
918      2
16158    0
6237     1
Name: Home_team, Length: 11247, dtype: int64

In [None]:
hwp_lst = list(home_team.home_win_percentage)
awp_lst = list(home_team.away_win_percentage)
teams = list(home_team.team)

In [None]:
hwp = {}
awp = {}
for key in teams:
    for value in hwp_lst:
        hwp[key] = value
        hwp_lst.remove(value)
        break 

for key1 in teams:
    for value1 in awp_lst:
        awp[key1] = value1
        awp_lst.remove(value1)
        break

In [None]:
hwp

{'Chennai Super Kings': 57.30337078651685,
 'Delhi Capitals': 50.0,
 'Kings XI Punjab': 41.75824175824176,
 'Kolkata Knight Riders': 40.963855421686745,
 'Mumbai Indians': 57.42574257425742,
 'Rajasthan Royals': 43.28358208955223,
 'Royal Challengers Bangalore': 41.17647058823529,
 'Sunrisers Hyderabad': 47.61904761904761}

In [None]:
def home_p0(row):
  return hwp[row['bowling_team']]

def home_p1(row):
  return hwp[row['batting_team']]

def away_p0(row):
  return awp[row['bowling_team']]

def away_p1(row):
  return awp[row['batting_team']]

In [None]:
final_df['bowl_hwp'] = 0
final_df['bat_hwp'] = 0
final_df['bowl_awp'] = 0
final_df['bat_awp'] = 0

In [None]:
final_df['bowl_hwp'] = final_df.apply(home_p0,axis=1)
final_df['bat_hwp'] = final_df.apply(home_p1,axis=1)
final_df['bowl_awp'] = final_df.apply(away_p0,axis=1)
final_df['bat_awp'] = final_df.apply(away_p1,axis=1)

In [None]:
final_df.sample(3)

Unnamed: 0,batting_team,bowling_team,city,toss_winner,toss_decision,runs_left,balls_left,fallen_wickets,Target,crr,rrr,result,Home_team,bowl_hwp,bat_hwp,bowl_awp,bat_awp
4369,Kolkata Knight Riders,Sunrisers Hyderabad,Hyderabad,Kolkata Knight Riders,field,146.0,71.0,2,210.0,7.836735,12.338028,0,0,47.619048,40.963855,62.222222,61.052632
13095,Sunrisers Hyderabad,Chennai Super Kings,Chennai,Sunrisers Hyderabad,field,142.0,116.0,0,145.0,4.5,7.344828,0,0,57.303371,47.619048,65.333333,62.222222
7697,Delhi Capitals,Rajasthan Royals,Delhi,Rajasthan Royals,bat,46.0,62.0,1,130.0,8.689655,4.451613,0,1,43.283582,50.0,57.5,70.0


In [None]:
final_df.isnull().sum()

batting_team      0
bowling_team      0
city              0
toss_winner       0
toss_decision     0
runs_left         0
balls_left        0
fallen_wickets    0
Target            0
crr               0
rrr               0
result            0
Home_team         0
bowl_hwp          0
bat_hwp           0
bowl_awp          0
bat_awp           0
dtype: int64

In [None]:
final_df.describe()

Unnamed: 0,runs_left,balls_left,fallen_wickets,Target,crr,rrr,result,Home_team,bowl_hwp,bat_hwp,bowl_awp,bat_awp
count,11247.0,11247.0,11247.0,11247.0,11247.0,11247.0,11247.0,11247.0,11247.0,11247.0,11247.0,11247.0
mean,93.179337,64.25918,2.580066,166.341247,7.616604,,0.404552,0.766338,47.255139,47.343066,59.508072,59.801438
std,50.816094,32.967867,2.168183,30.016458,2.366111,,0.490827,0.75532,6.336596,6.432903,5.999319,5.985036
min,-5.0,-1.0,0.0,68.0,0.0,-inf,0.0,0.0,40.963855,40.963855,51.578947,51.578947
25%,52.0,37.0,1.0,146.0,6.285714,7.090909,0.0,0.0,41.176471,41.176471,51.764706,57.5
50%,92.0,65.0,2.0,166.0,7.636364,8.705882,0.0,1.0,47.619048,43.283582,61.052632,59.302326
75%,131.0,93.0,4.0,188.0,8.875,10.84615,1.0,1.0,50.0,50.0,62.222222,62.222222
max,237.0,119.0,10.0,241.0,24.0,inf,1.0,2.0,57.425743,57.425743,70.0,70.0


In [None]:
final_df.dropna(inplace=True)

In [None]:
final_df = final_df[final_df['balls_left'] != 0]

In [None]:
X = final_df.drop(['result'], axis=1)
y = final_df['result']

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)

In [None]:
X_train

Unnamed: 0,batting_team,bowling_team,city,toss_winner,toss_decision,runs_left,balls_left,fallen_wickets,Target,crr,rrr,Home_team,bowl_hwp,bat_hwp,bowl_awp,bat_awp
182,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,Royal Challengers Bangalore,field,121.0,65.0,2,208.0,9.490909,11.169231,0,47.619048,41.176471,62.222222,51.578947
7872,Royal Challengers Bangalore,Mumbai Indians,Mumbai,Mumbai Indians,bat,149.0,104.0,0,166.0,6.375000,8.596154,0,57.425743,41.176471,59.302326,51.578947
7029,Mumbai Indians,Kolkata Knight Riders,Bangalore,Mumbai Indians,field,17.0,42.0,4,108.0,7.000000,2.428571,2,40.963855,57.425743,61.052632,59.302326
19705,Delhi Capitals,Rajasthan Royals,Mumbai,Delhi Capitals,field,165.0,85.0,3,193.0,4.800000,11.647059,2,43.283582,50.000000,57.500000,70.000000
23059,Sunrisers Hyderabad,Chennai Super Kings,Durban,Sunrisers Hyderabad,field,81.0,60.0,2,166.0,8.500000,8.100000,2,57.303371,47.619048,65.333333,62.222222
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18032,Kings XI Punjab,Sunrisers Hyderabad,Chandigarh,Kings XI Punjab,field,37.0,23.0,3,176.0,8.597938,9.652174,1,47.619048,41.758242,62.222222,51.764706
18986,Kolkata Knight Riders,Kings XI Punjab,Kolkata,Kings XI Punjab,bat,99.0,56.0,4,175.0,7.125000,10.607143,1,41.758242,40.963855,51.764706,61.052632
13109,Sunrisers Hyderabad,Chennai Super Kings,Chennai,Sunrisers Hyderabad,field,130.0,102.0,0,145.0,5.000000,7.647059,0,57.303371,47.619048,65.333333,62.222222
19452,Chennai Super Kings,Sunrisers Hyderabad,Hyderabad,Sunrisers Hyderabad,bat,109.0,91.0,1,148.0,8.068966,7.186813,0,47.619048,57.303371,62.222222,65.333333


In [None]:
X_train.columns

Index(['batting_team', 'bowling_team', 'city', 'toss_winner', 'toss_decision',
       'runs_left', 'balls_left', 'fallen_wickets', 'Target', 'crr', 'rrr',
       'Home_team', 'bowl_hwp', 'bat_hwp', 'bowl_awp', 'bat_awp'],
      dtype='object')

In [None]:
trf = ColumnTransformer([
    ('trf',OneHotEncoder(sparse=False,drop='first', handle_unknown='ignore'),['batting_team','bowling_team','city', 'toss_winner', 'toss_decision'])
]
,remainder='passthrough')

In [None]:
pipe1 = Pipeline(steps=[
    ('step1',trf),
    ('step2',XGBClassifier(random_state=42))
])

In [None]:
pipe1.fit(X_train,y_train)

Pipeline(steps=[('step1',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('trf',
                                                  OneHotEncoder(drop='first',
                                                                handle_unknown='ignore',
                                                                sparse=False),
                                                  ['batting_team',
                                                   'bowling_team', 'city',
                                                   'toss_winner',
                                                   'toss_decision'])])),
                ('step2', XGBClassifier(random_state=42))])

In [None]:
y_pred1 = pipe1.predict(X_test)
accuracy_score(y_test,y_pred1)

0.9977698483496877

In [None]:
 pipe1.predict_proba(X_test)[99]

array([0.10925436, 0.89074564], dtype=float32)

In [None]:
X_test.iloc[100]

batting_team      Sunrisers Hyderabad
bowling_team           Mumbai Indians
city                        Hyderabad
toss_winner            Mumbai Indians
toss_decision                     bat
runs_left                        38.0
balls_left                       38.0
fallen_wickets                      2
Target                          139.0
crr                          7.390244
rrr                               6.0
Home_team                           1
bowl_hwp                    57.425743
bat_hwp                     47.619048
bowl_awp                    59.302326
bat_awp                     62.222222
Name: 5802, dtype: object

In [None]:
pipe2 = Pipeline(steps=[
    ('step1',trf),
    ('step2',LogisticRegression(solver='liblinear'))
])

In [None]:
pipe2.fit(X_train,y_train)

Pipeline(steps=[('step1',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('trf',
                                                  OneHotEncoder(drop='first',
                                                                handle_unknown='ignore',
                                                                sparse=False),
                                                  ['batting_team',
                                                   'bowling_team', 'city',
                                                   'toss_winner',
                                                   'toss_decision'])])),
                ('step2', LogisticRegression(solver='liblinear'))])

In [None]:
y_pred2 = pipe2.predict(X_test)
accuracy_score(y_test,y_pred2)

0.9692239072256913

In [None]:
 pipe2.predict_proba(X_test)[99]

array([0.11419204, 0.88580796])

In [None]:
pickle.dump(pipe2,open('pipe.pkl','wb'))
pickle.dump(hwp,open('hwp.pkl','wb'))
pickle.dump(awp,open('awp.pkl','wb'))
