In [66]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pickle
from sklearn.metrics import mean_absolute_error

## Feature Engineering

In [67]:
df = pd.read_csv('IPL_DATA_FEATURES.csv')

In [68]:
df.shape

(222275, 21)

In [69]:
df['overno'].unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [90]:
df.head(2)

Unnamed: 0,match_id,innings,venue,batting_team,bowling_team,ball,overno,ballno,total_runs,player_dismissed,...,ball_left,inning_total,cum_sum,remaining_runs,prev_30_balls_runs,player_dismissed_in_last_30balls,cum_wkts,wkts_left,dot_in_prev_30balls,boundaries_prev_30balls
0,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,0.1,0,1,1,0,...,119,222,1,221,1,0,0,10,0,0
1,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,0.2,0,2,0,0,...,118,222,1,221,1,0,0,10,1,0


## All teams

In [6]:
df['batting_team'].unique()

array(['Kolkata Knight Riders', 'Royal Challengers Bangalore',
       'Chennai Super Kings', 'Kings XI Punjab', 'Rajasthan Royals',
       'Delhi Daredevils', 'Deccan Chargers', 'Mumbai Indians',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad',
       'Rising Pune Supergiants', 'Gujarat Lions',
       'Rising Pune Supergiant', 'Delhi Capitals', 'Punjab Kings',
       'Lucknow Super Giants', 'Gujarat Titans'], dtype=object)

In [7]:
df['batting_team'] = np.where(df['batting_team']=='Rising Pune Supergiant','Rising Pune Supergiants',df['batting_team'])

In [8]:
df['batting_team'].unique()

array(['Kolkata Knight Riders', 'Royal Challengers Bangalore',
       'Chennai Super Kings', 'Kings XI Punjab', 'Rajasthan Royals',
       'Delhi Daredevils', 'Deccan Chargers', 'Mumbai Indians',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad',
       'Rising Pune Supergiants', 'Gujarat Lions', 'Delhi Capitals',
       'Punjab Kings', 'Lucknow Super Giants', 'Gujarat Titans'],
      dtype=object)

In [9]:
df['bowling_team'] = np.where(df['bowling_team']=='Rising Pune Supergiant','Rising Pune Supergiants',df['bowling_team'])

In [10]:
df['bowling_team'].unique()

array(['Royal Challengers Bangalore', 'Kolkata Knight Riders',
       'Kings XI Punjab', 'Chennai Super Kings', 'Delhi Daredevils',
       'Rajasthan Royals', 'Deccan Chargers', 'Mumbai Indians',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad',
       'Rising Pune Supergiants', 'Gujarat Lions', 'Delhi Capitals',
       'Punjab Kings', 'Gujarat Titans', 'Lucknow Super Giants'],
      dtype=object)

In [11]:
df.head(4)

Unnamed: 0,match_id,innings,venue,batting_team,bowling_team,ball,overno,ballno,total_runs,player_dismissed,...,ball_left,inning_total,cum_sum,remaining_runs,prev_30_balls_runs,player_dismissed_in_last_30balls,cum_wkts,wkts_left,dot_in_prev_30balls,boundaries_prev_30balls
0,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,0.1,0,1,1,0,...,119,222,1,221,1,0,0,10,0,0
1,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,0.2,0,2,0,0,...,118,222,1,221,1,0,0,10,1,0
2,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,0.3,0,3,1,0,...,118,222,2,220,2,0,0,10,1,0
3,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,0.4,0,4,0,0,...,117,222,2,220,2,0,0,10,2,0


## Columns in a Data Frame

In [12]:
df.columns

Index(['match_id', 'innings', 'venue', 'batting_team', 'bowling_team', 'ball',
       'overno', 'ballno', 'total_runs', 'player_dismissed', 'cum_ball',
       'ball_left', 'inning_total', 'cum_sum', 'remaining_runs',
       'prev_30_balls_runs', 'player_dismissed_in_last_30balls', 'cum_wkts',
       'wkts_left', 'dot_in_prev_30balls', 'boundaries_prev_30balls'],
      dtype='object')

## Features selected for training and testing

In [13]:
df=df[['match_id','innings','venue','batting_team', 'bowling_team','ball_left', 'cum_sum','prev_30_balls_runs',
       'player_dismissed_in_last_30balls','wkts_left','dot_in_prev_30balls',
       'boundaries_prev_30balls','remaining_runs'
      ]]

In [14]:
df.head(4)

Unnamed: 0,match_id,innings,venue,batting_team,bowling_team,ball_left,cum_sum,prev_30_balls_runs,player_dismissed_in_last_30balls,wkts_left,dot_in_prev_30balls,boundaries_prev_30balls,remaining_runs
0,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,119,1,1,0,10,0,0,221
1,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,118,1,1,0,10,1,0,221
2,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,118,2,2,0,10,1,0,220
3,335982,1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,117,2,2,0,10,2,0,220


In [16]:
X = df.drop(['match_id','remaining_runs'],axis=1)
Y = df['remaining_runs'].values

In [17]:
Y

array([221, 221, 220, ...,   7,   6,   0])

## Train test split

In [18]:
X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size = 0.33, random_state = 42)

In [19]:
X_test.shape

(73351, 11)

In [20]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
# !pip install xgboost
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error

In [21]:
trf = ColumnTransformer([
('trf', OneHotEncoder (sparse=False, drop='first'), ['batting_team' , 'bowling_team','venue' ])]
, remainder='passthrough')

In [25]:
pipe = Pipeline(steps=[
('step1', trf),
('step2', StandardScaler()),
('step3', XGBRegressor (n_estimators=1000, learning_rate=0.2, max_depth=12, random_state=1)),
   
])

## Train and Save the model

In [26]:
pipe.fit(X_train, Y_train)
file_name = 'score_pred.pkl'
pickle.dump(pipe,open(file_name,'wb'))

In [27]:
prediction = pipe.predict(X_test)
print(r2_score(Y_test, prediction))
print(mean_absolute_error(Y_test,prediction))

0.9865338298109955
3.1379149753449167


## Predict

In [103]:

Bat_Team = 'Mumbai Indians' 

Ball_Team = 'Chennai Super Kings' 

inning = 1

venue='Wankhede Stadium'

total_score = 168 # current score

ball_left = 30

prev_runs_30 = 45 # runs in last 30 balls

wkts_left=8

prev_wickets_30 = 2 # wickets in last 30 balls

prev_30_dot_balls = 5 # dots in last 30 balls

prev_30_boundaries = 6 # boundaries in last 30 balls

data={'innings':[inning],
      'venue':[venue],
      'batting_team': [Bat_Team],
      'bowling_team': [Ball_Team], 
      'ball_left':[ball_left],
      'cum_sum': [total_score], 
      'prev_30_balls_runs': [prev_runs_30], 
      'player_dismissed_in_last_30balls': [prev_wickets_30],
      'wkts_left': [wkts_left],
      'dot_in_prev_30balls':[prev_30_dot_balls],
      'boundaries_prev_30balls':[prev_30_boundaries]}

input_df = pd.DataFrame.from_dict(data)
result = pipe.predict(input_df)
if wkts_left==0 or ball_left==0:
    input_df['total_Score']= total_score
elif int(result)<0:
    input_df['total_Score']= (int(result)*(-1))+total_score
else:
    input_df['total_Score']= (int(result))+total_score

min=total_score+int(result)
if (int(result))+total_score-3>=total_score:
    min=(int(result))+total_score-3
    
print(min, 'to', (int(result))+total_score+3)
input_df.head(1)








214 to 220


Unnamed: 0,innings,venue,batting_team,bowling_team,ball_left,cum_sum,prev_30_balls_runs,player_dismissed_in_last_30balls,wkts_left,dot_in_prev_30balls,boundaries_prev_30balls,total_Score
0,1,Wankhede Stadium,Mumbai Indians,Chennai Super Kings,30,168,45,2,8,5,6,217
