#  IPL WIN PROBABLITY PREDICTOR 
__Aim__: Predict the winning probablity of the teams in 2nd innings of a cricket match based on the current scenario of both the teams.

__Available Data__:
Kaggle Data : https://www.kaggle.com/datasets/ramjidoolla/ipl-data-set


In [1]:
import pandas as pd
import numpy as np

In [2]:
match = pd.read_csv('matches.csv')
delivery = pd.read_csv('deliveries.csv')

__Note__: 
- We are considering 2 files: 
   - `matches.csv`   - Has result of each match / Each Row is a match.
   - `deliveries.csv`- Has Bowl to bowl data of each match.



- `WE ARE GOING TO CONSIDER DATA OF ONLY 2ND INNINGS.`
- From first innings we need data of only runs scored.

In [3]:
match.head()

Unnamed: 0,id,Season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,IPL-2017,Hyderabad,05-04-2017,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,2,IPL-2017,Pune,06-04-2017,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,
2,3,IPL-2017,Rajkot,07-04-2017,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,
3,4,IPL-2017,Indore,08-04-2017,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,
4,5,IPL-2017,Bangalore,08-04-2017,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,


In [4]:
match['team1'].unique()

array(['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',
       'Rising Pune Supergiant', 'Royal Challengers Bangalore',
       'Kolkata Knight Riders', 'Delhi Daredevils', 'Kings XI Punjab',
       'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Rising Pune Supergiants',
       'Delhi Capitals'], dtype=object)

In [5]:
delivery.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,4,0,4,,,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,2,2,,,


# Data Cleaning
- We use following columns in data:
   1. Batting Team
   2. Bowling Team
   3. City
   4. runs left
   5. balls left 
   6. wickets left
   7. total_runs
   8. Current_run_rate
   9. Required_run_rate
   10. Result of Match

#### Getting Total runs in both the innings of a Match We use.
- Match Id
- Innings
- Total Runs

In [6]:
total_score_df = delivery.groupby(['match_id', 'inning']).sum()['total_runs'].reset_index()

  total_score_df = delivery.groupby(['match_id', 'inning']).sum()['total_runs'].reset_index()


In [7]:
total_score_df.head()

Unnamed: 0,match_id,inning,total_runs
0,1,1,207
1,1,2,172
2,2,1,184
3,2,2,187
4,3,1,183


##### We have data of both innings. We want only for 1st innings.

In [8]:
total_score_df = total_score_df[total_score_df['inning']==1]

#### Merging- To get the runs scored in 1st innings in Match Dataframe.

In [9]:
match_df = match.merge(total_score_df[['match_id','total_runs']], left_on='id', right_on='match_id')

#### Data cleaning on Team Names
1. Removing data of those teams which are NOT the part IPL currently.
2. Giving latest names to those teams whose name is changed.

In [10]:
match_df['team1'].unique()

array(['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',
       'Rising Pune Supergiant', 'Royal Challengers Bangalore',
       'Kolkata Knight Riders', 'Delhi Daredevils', 'Kings XI Punjab',
       'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Rising Pune Supergiants',
       'Delhi Capitals'], dtype=object)

In [11]:
teams = [
    'Sunrisers Hyderabad', 
    'Mumbai Indians', 
    'Royal Challengers Bangalore',
    'Kolkata Knight Riders',
    'Kings XI Punjab',
    'Chennai Super Kings', 
    'Rajasthan Royals',
    'Delhi Capitals'
]

In [12]:
match_df['team1'] = match_df['team1'].str.replace('Delhi Daredevils', 'Delhi Capitals')
match_df['team1'] = match_df['team2'].str.replace('Delhi Daredevils', 'Delhi Capitals')

match_df['team1'] = match_df['team1'].str.replace('Deccan Chargers', 'Sunrisers Hyderabad')
match_df['team1'] = match_df['team2'].str.replace('Deccan Chargers', 'Sunrisers Hyderabad')

In [13]:
match_df = match_df[match_df['team1'].isin(teams)]
match_df = match_df[match_df['team2'].isin(teams)]

In [14]:
match_df['team1'].unique()

array(['Royal Challengers Bangalore', 'Kolkata Knight Riders',
       'Kings XI Punjab', 'Sunrisers Hyderabad', 'Mumbai Indians',
       'Rajasthan Royals', 'Chennai Super Kings', 'Delhi Capitals'],
      dtype=object)

In [15]:
match_df.shape

(571, 20)

In [16]:
match_df.head()

Unnamed: 0,id,Season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3,match_id,total_runs
0,1,IPL-2017,Hyderabad,05-04-2017,Royal Challengers Bangalore,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,207
2,3,IPL-2017,Rajkot,07-04-2017,Kolkata Knight Riders,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,,3,183
3,4,IPL-2017,Indore,08-04-2017,Kings XI Punjab,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,,4,163
5,6,IPL-2017,Hyderabad,09-04-2017,Sunrisers Hyderabad,Sunrisers Hyderabad,Sunrisers Hyderabad,field,normal,0,Sunrisers Hyderabad,0,9,Rashid Khan,"Rajiv Gandhi International Stadium, Uppal",A Deshmukh,NJ Llong,,6,135
6,7,IPL-2017,Mumbai,09-04-2017,Mumbai Indians,Mumbai Indians,Mumbai Indians,field,normal,0,Mumbai Indians,0,4,N Rana,Wankhede Stadium,Nitin Menon,CK Nandan,,7,178


#### Removing Matches where Duckworth lewis applied

In [17]:
match_df = match_df[match_df['dl_applied']==0]

In [18]:
match_df.shape

(557, 20)

#### Selecting columns from match_df 

In [19]:
match_df = match_df[['match_id','city', 'winner', 'total_runs']]

In [20]:
match_df.head(3)

Unnamed: 0,match_id,city,winner,total_runs
0,1,Hyderabad,Sunrisers Hyderabad,207
2,3,Rajkot,Kolkata Knight Riders,183
3,4,Indore,Kings XI Punjab,163


#### Joining this match result with delivery dataframe

In [21]:
delivery_df = match_df.merge(delivery, on='match_id')

#### Getting the values of 2nd innings only from delivery dataframe

In [22]:
delivery_df = delivery_df[delivery_df['inning']==2]

In [23]:
delivery_df.shape

(64433, 24)

#### Runs Left and Balls left columns and current_sccore

In [24]:
delivery_df.head(3)

Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs_y,player_dismissed,dismissal_kind,fielder
125,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,1,CH Gayle,...,0,0,0,0,1,0,1,,,
126,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,2,Mandeep Singh,...,0,0,0,0,0,0,0,,,
127,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,3,Mandeep Singh,...,0,0,0,0,0,0,0,,,


In [25]:
delivery_df['current_score'] = delivery_df.groupby('match_id').cumsum()['total_runs_y']

  delivery_df['current_score'] = delivery_df.groupby('match_id').cumsum()['total_runs_y']


In [26]:
delivery_df['current_score'].dtype

dtype('int64')

In [27]:
delivery_df.head(3)

Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs_y,player_dismissed,dismissal_kind,fielder,current_score
125,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,1,CH Gayle,...,0,0,0,1,0,1,,,,1
126,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,2,Mandeep Singh,...,0,0,0,0,0,0,,,,1
127,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,3,Mandeep Singh,...,0,0,0,0,0,0,,,,1


In [28]:
delivery_df['runs_left'] = delivery_df['total_runs_x']-delivery_df['current_score']+1

In [29]:
delivery_df.sample(6)

Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs_y,player_dismissed,dismissal_kind,fielder,current_score,runs_left
14627,91,Chennai,Chennai Super Kings,181,2,Kings XI Punjab,Chennai Super Kings,16,1,PP Chawla,...,0,0,0,0,0,,,,115,67
119182,11138,Kolkata,Kolkata Knight Riders,189,2,Kolkata Knight Riders,Sunrisers Hyderabad,19,5,AD Russell,...,0,0,6,0,6,,,,171,19
45346,285,Jaipur,Chennai Super Kings,196,2,Rajasthan Royals,Chennai Super Kings,13,4,J Botha,...,0,0,1,0,1,,,,89,108
61933,392,Bangalore,Royal Challengers Bangalore,154,2,Royal Challengers Bangalore,Kolkata Knight Riders,5,4,V Kohli,...,0,0,0,0,0,,,,20,135
95510,563,Chennai,Chennai Super Kings,157,2,Rajasthan Royals,Chennai Super Kings,8,2,SR Watson,...,0,0,1,0,1,,,,48,110
102730,610,Hyderabad,Sunrisers Hyderabad,126,2,Sunrisers Hyderabad,Gujarat Lions,8,6,S Dhawan,...,0,0,2,0,2,,,,43,84


In [30]:
delivery_df['balls_left'] = 126 - (delivery_df['over']*6 + delivery_df['ball'])

In [31]:
delivery_df.head()

Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,penalty_runs,batsman_runs,extra_runs,total_runs_y,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left
125,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,1,CH Gayle,...,0,1,0,1,,,,1,207,119
126,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,2,Mandeep Singh,...,0,0,0,0,,,,1,207,118
127,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,3,Mandeep Singh,...,0,0,0,0,,,,1,207,117
128,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,4,Mandeep Singh,...,0,2,0,2,,,,3,205,116
129,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,5,Mandeep Singh,...,0,4,0,4,,,,7,201,115


In [32]:
delivery_df['player_dismissed'] = delivery_df['player_dismissed'].fillna("0")
delivery_df['player_dismissed'] = delivery_df['player_dismissed'].apply(lambda x: x if x=="0" else "1")
delivery_df['player_dismissed'] = delivery_df['player_dismissed'].astype('int')
wickets = delivery_df.groupby('match_id').cumsum()['player_dismissed'].values
delivery_df['wickets'] = 10-wickets
delivery_df.head()

  wickets = delivery_df.groupby('match_id').cumsum()['player_dismissed'].values


Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,batsman_runs,extra_runs,total_runs_y,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left,wickets
125,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,1,CH Gayle,...,1,0,1,0,,,1,207,119,10
126,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,2,Mandeep Singh,...,0,0,0,0,,,1,207,118,10
127,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,3,Mandeep Singh,...,0,0,0,0,,,1,207,117,10
128,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,4,Mandeep Singh,...,2,0,2,0,,,3,205,116,10
129,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,5,Mandeep Singh,...,4,0,4,0,,,7,201,115,10


In [33]:
delivery_df.tail()

Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,batsman_runs,extra_runs,total_runs_y,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left,wickets
133054,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,2,RA Jadeja,...,1,0,1,0,,,152,1,4,5
133055,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,3,SR Watson,...,2,0,2,0,,,154,-1,3,5
133056,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,4,SR Watson,...,1,0,1,1,run out,KH Pandya,155,-2,2,4
133057,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,5,SN Thakur,...,2,0,2,0,,,157,-4,1,4
133058,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,6,SN Thakur,...,0,0,0,1,lbw,,157,-4,0,3


#### crr = runs/overs

In [34]:
delivery_df['crr'] = (delivery_df['current_score']*6)/(120-delivery_df['balls_left'])

#### rrr = runs-left / overs_left

In [35]:
delivery_df['rrr'] = (delivery_df['runs_left']*6)/delivery_df['balls_left']

In [36]:
def result(row):
    return 1 if row['batting_team'] == row['winner'] else 0

In [37]:
delivery_df['result'] = delivery_df.apply(result,axis=1)
#denote whether chasing team win or loose

In [38]:
delivery_df

Unnamed: 0,match_id,city,winner,total_runs_x,inning,batting_team,bowling_team,over,ball,batsman,...,player_dismissed,dismissal_kind,fielder,current_score,runs_left,balls_left,wickets,crr,rrr,result
125,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,1,CH Gayle,...,0,,,1,207,119,10,6.000000,10.436975,0
126,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,2,Mandeep Singh,...,0,,,1,207,118,10,3.000000,10.525424,0
127,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,3,Mandeep Singh,...,0,,,1,207,117,10,2.000000,10.615385,0
128,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,4,Mandeep Singh,...,0,,,3,205,116,10,4.500000,10.603448,0
129,1,Hyderabad,Sunrisers Hyderabad,207,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,5,Mandeep Singh,...,0,,,7,201,115,10,8.400000,10.486957,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133054,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,2,RA Jadeja,...,0,,,152,1,4,5,7.862069,1.500000,0
133055,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,3,SR Watson,...,0,,,154,-1,3,5,7.897436,-2.000000,0
133056,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,4,SR Watson,...,1,run out,KH Pandya,155,-2,2,4,7.881356,-6.000000,0
133057,11415,Hyderabad,Mumbai Indians,152,2,Chennai Super Kings,Mumbai Indians,20,5,SN Thakur,...,0,,,157,-4,1,4,7.915966,-24.000000,0


##### We will get all the required columns

In [39]:
final_df = delivery_df[['batting_team','bowling_team','city','runs_left','balls_left','wickets','total_runs_x','crr','rrr','result']]

##### Shuffle to avoid bias

In [40]:
final_df.sample(final_df.shape[0])

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets,total_runs_x,crr,rrr,result
83014,Kings XI Punjab,Delhi Daredevils,Delhi,61,50,6,164,8.914286,7.320000,1
4394,Kolkata Knight Riders,Rising Pune Supergiant,Pune,165,108,10,182,9.000000,9.166667,1
29332,Royal Challengers Bangalore,Rajasthan Royals,Bangalore,4,61,10,92,9.050847,0.393443,1
18568,Chennai Super Kings,Mumbai Indians,Cape Town,112,79,8,165,7.902439,8.506329,0
122594,Sunrisers Hyderabad,Delhi Capitals,Delhi,51,63,8,134,8.842105,4.857143,1
...,...,...,...,...,...,...,...,...,...,...
72489,Mumbai Indians,Kings XI Punjab,Dharamsala,171,109,9,183,7.090909,9.412844,0
35186,Kings XI Punjab,Mumbai Indians,Chandigarh,26,19,7,154,7.663366,8.210526,1
114108,Kolkata Knight Riders,Mumbai Indians,Kolkata,171,87,8,217,8.545455,11.793103,0
112149,Kolkata Knight Riders,Chennai Super Kings,Kolkata,173,113,9,184,10.285714,9.185841,1


In [41]:
final_df.sample()

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets,total_runs_x,crr,rrr,result
132697,Chennai Super Kings,Delhi Capitals,Visakhapatnam,155,117,10,155,2.0,7.948718,1


#### Drop Null values

In [42]:
final_df.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df.dropna(inplace=True)


In [43]:
final_df.shape

(63839, 10)

#### Drop rows where balls_left==0

In [44]:
final_df = final_df[final_df['balls_left']!=0]

## MODEL BUILD

In [45]:
from sklearn.model_selection import train_test_split

In [46]:
X = final_df.iloc[:,:-1]
y = final_df.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=1)

In [47]:
X_train

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets,total_runs_x,crr,rrr
113669,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,61,36,5,155,6.785714,10.166667
130285,Kings XI Punjab,Sunrisers Hyderabad,Hyderabad,210,102,9,232,7.666667,12.352941
94099,Mumbai Indians,Delhi Daredevils,Mumbai,86,52,6,152,5.911765,9.923077
111977,Mumbai Indians,Royal Challengers Bangalore,Bengaluru,56,40,5,173,8.850000,8.400000
121811,Delhi Capitals,Kings XI Punjab,Mohali,152,103,9,175,8.470588,8.854369
...,...,...,...,...,...,...,...,...,...
67505,Kings XI Punjab,Chennai Super Kings,Chennai,173,107,9,186,6.461538,9.700935
11041,Kings XI Punjab,Delhi Daredevils,Chandigarh,157,115,10,158,2.400000,8.191304
25405,Royal Challengers Bangalore,Chennai Super Kings,Durban,64,47,7,129,5.424658,8.170213
68491,Chennai Super Kings,Mumbai Indians,Mumbai,85,43,1,139,4.285714,11.860465


#### Converting Non Numeric Categorical columns to Numeric

In [48]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

trf = ColumnTransformer([
    ('trf', OneHotEncoder(sparse=False, drop='first'),['batting_team','bowling_team','city'])
], remainder = 'passthrough')

In [49]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

In [50]:
pipe = Pipeline(steps = [('step1', trf), ('step2',LogisticRegression(solver='liblinear'))])

In [51]:
pipe.fit(X_train,y_train)



In [52]:
X_train.describe()

Unnamed: 0,runs_left,balls_left,wickets,total_runs_x,crr,rrr
count,50912.0,50912.0,50912.0,50912.0,50912.0,50912.0
mean,93.035984,62.891656,7.598975,165.117811,7.44084,10.433565
std,50.068925,33.250742,2.104689,29.710353,2.29762,13.776356
min,-11.0,-2.0,0.0,65.0,0.0,-516.0
25%,54.0,35.0,6.0,146.0,6.24,7.2
50%,92.0,64.0,8.0,165.0,7.5,8.923723
75%,130.0,92.0,9.0,185.0,8.693878,10.891304
max,250.0,119.0,10.0,250.0,36.0,588.0


In [53]:
y_pred = pipe.predict(X_test)

In [54]:
from sklearn.metrics import accuracy_score

In [55]:
accuracy_score(y_test,y_pred)

0.8139681043286983

In [56]:
pipe.predict_proba(X_test)

array([[0.10407529, 0.89592471],
       [0.34481521, 0.65518479],
       [0.50799849, 0.49200151],
       ...,
       [0.26480412, 0.73519588],
       [0.93454171, 0.06545829],
       [0.43427078, 0.56572922]])

## Create Website

In [57]:
teams

['Sunrisers Hyderabad',
 'Mumbai Indians',
 'Royal Challengers Bangalore',
 'Kolkata Knight Riders',
 'Kings XI Punjab',
 'Chennai Super Kings',
 'Rajasthan Royals',
 'Delhi Capitals']

In [58]:
delivery_df['city'].unique()

array(['Hyderabad', 'Rajkot', 'Indore', 'Mumbai', 'Kolkata', 'Bangalore',
       'Delhi', 'Pune', 'Chandigarh', 'Kanpur', 'Jaipur', 'Chennai',
       'Cape Town', 'Port Elizabeth', 'Durban', 'Centurion',
       'East London', 'Johannesburg', 'Kimberley', 'Bloemfontein',
       'Cuttack', 'Ahmedabad', 'Nagpur', 'Dharamsala', 'Kochi',
       'Visakhapatnam', 'Ranchi', 'Abu Dhabi', 'Sharjah', nan, 'Raipur',
       'Mohali', 'Bengaluru'], dtype=object)

In [59]:
import pickle
pickle.dump(pipe, open('pipe.pkl','wb'))