In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
import joblib

In [2]:
data = pd.read_csv("chasing.csv")
data

Unnamed: 0,batting_team,bowling_team,city,runs_left,over,balls_left,wickets,total_runs_x,current_rr,required_rr,result
0,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,206,1,119,10,207,6.000000,10.386555,0
1,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,206,1,118,10,207,3.000000,10.474576,0
2,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,206,1,117,10,207,2.000000,10.564103,0
3,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,204,1,116,10,207,4.500000,10.551724,0
4,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,200,1,115,10,207,8.400000,10.434783,0
...,...,...,...,...,...,...,...,...,...,...,...
70804,Chennai Super Kings,Mumbai Indians,Hyderabad,12,19,9,5,152,7.567568,8.000000,0
70805,Chennai Super Kings,Mumbai Indians,Hyderabad,12,19,8,5,152,7.500000,9.000000,0
70806,Chennai Super Kings,Mumbai Indians,Hyderabad,10,19,7,5,152,7.539823,8.571429,0
70807,Chennai Super Kings,Mumbai Indians,Hyderabad,2,19,6,5,152,7.894737,2.000000,0


In [3]:
data['batting_team'] = data['batting_team'].str.replace('Deccan Chargers','Sunrisers Hyderabad')
data['bowling_team'] = data['bowling_team'].str.replace('Deccan Chargers','Sunrisers Hyderabad')

data['batting_team'] = data['batting_team'].str.replace('Delhi Daredevils','Delhi Capitals')
data['bowling_team'] = data['bowling_team'].str.replace('Delhi Daredevils','Delhi Capitals')

In [4]:
data.to_csv('chasing.csv', index=False)

In [5]:
def targetGuidedOrdinalEncoding(df, feature, dependent_feature):
    feature_label = df.groupby([feature])[dependent_feature].mean().sort_values().index
    feature_label2 = {k: i for i, k in enumerate(feature_label, 0)}
    print(feature_label2)
    df[feature] = df[feature].map(feature_label2)

In [6]:
targetGuidedOrdinalEncoding(data, 'batting_team', 'result')
targetGuidedOrdinalEncoding(data, 'bowling_team', 'result')
targetGuidedOrdinalEncoding(data, 'city', 'result')

{'Sunrisers Hyderabad': 0, 'Kings XI Punjab': 1, 'Delhi Capitals': 2, 'Royal Challengers Bangalore': 3, 'Rajasthan Royals': 4, 'Mumbai Indians': 5, 'Kolkata Knight Riders': 6, 'Chennai Super Kings': 7}
{'Mumbai Indians': 0, 'Chennai Super Kings': 1, 'Kings XI Punjab': 2, 'Rajasthan Royals': 3, 'Kolkata Knight Riders': 4, 'Sunrisers Hyderabad': 5, 'Royal Challengers Bangalore': 6, 'Delhi Capitals': 7}
{'Cape Town': 0, 'Nagpur': 1, 'East London': 2, 'Chennai': 3, 'Abu Dhabi': 4, 'Durban': 5, 'Dharamsala': 6, 'Mumbai': 7, 'Ahmedabad': 8, 'Chandigarh': 9, 'Cuttack': 10, 'Bloemfontein': 11, 'Delhi': 12, 'Bengaluru': 13, 'Pune': 14, 'Hyderabad': 15, 'Bangalore': 16, 'Visakhapatnam': 17, 'Kolkata': 18, 'Mohali': 19, 'Port Elizabeth': 20, 'Johannesburg': 21, 'Sharjah': 22, 'Kimberley': 23, 'Jaipur': 24, 'Centurion': 25, 'Raipur': 26, 'Indore': 27, 'Ranchi': 28}


In [7]:
X = data.drop(['result'], axis=1)
y = data['result']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import StackingClassifier

In [12]:
rf = RandomForestClassifier()
lr = LogisticRegression()
lgbm = LGBMClassifier()
svc = SVC()
vot = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('lgbm', lgbm)], voting='soft')
stk = StackingClassifier(estimators=[('rf', rf), ('lgbm', lgbm)], final_estimator=lr)

In [13]:
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6809
           1       1.00      1.00      1.00      7353

    accuracy                           1.00     14162
   macro avg       1.00      1.00      1.00     14162
weighted avg       1.00      1.00      1.00     14162



In [14]:
stk.fit(X_train, y_train)
y_pred = stk.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6809
           1       1.00      1.00      1.00      7353

    accuracy                           1.00     14162
   macro avg       1.00      1.00      1.00     14162
weighted avg       1.00      1.00      1.00     14162



In [28]:
model = LGBMClassifier()
model.fit(X_train, y_train)

In [29]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.97      0.98      6809
           1       0.97      0.98      0.98      7353

    accuracy                           0.98     14162
   macro avg       0.98      0.98      0.98     14162
weighted avg       0.98      0.98      0.98     14162



In [None]:
# final_df = deliveries[['batting_team','bowling_team','city','runs_left','over', \
#                      'balls_left','wickets','total_runs_x','current_rr','required_rr','result']]

In [67]:
joblib.dump(model, 'model.pkl')

['model.pkl']

In [15]:
model = rf
model2 = stk

In [20]:
joblib.dump(model, 'rf.pkl')
joblib.dump(model2, 'stk.pkl')

['stk.pkl']

In [16]:
batting_team = "Kings XI Punjab"
bowling_team = "Delhi Capitals"
city = "Mumbai"
batting_team_enc = {'Sunrisers Hyderabad': 0, 'Kings XI Punjab': 1, 'Delhi Capitals': 2, 'Royal Challengers Bangalore': 3, 'Rajasthan Royals': 4, 'Mumbai Indians': 5, 'Kolkata Knight Riders': 6, 'Chennai Super Kings': 7}
bowling_team_enc = {'Mumbai Indians': 0, 'Chennai Super Kings': 1, 'Kings XI Punjab': 2, 'Rajasthan Royals': 3, 'Kolkata Knight Riders': 4, 'Sunrisers Hyderabad': 5, 'Royal Challengers Bangalore': 6, 'Delhi Capitals': 7}
city_enc ={'Cape Town': 0, 'Nagpur': 1, 'East London': 2, 'Chennai': 3, 'Abu Dhabi': 4, 'Durban': 5, 'Dharamsala': 6, 'Mumbai': 7, 'Ahmedabad': 8, 'Chandigarh': 9, 'Cuttack': 10, 'Bloemfontein': 11, 'Delhi': 12, 'Bengaluru': 13, 'Pune': 14, 'Hyderabad': 15, 'Bangalore': 16, 'Visakhapatnam': 17, 'Kolkata': 18, 'Mohali': 19, 'Port Elizabeth': 20, 'Johannesburg': 21, 'Sharjah': 22, 'Kimberley': 23, 'Jaipur': 24, 'Centurion': 25, 'Raipur': 26, 'Indore': 27, 'Ranchi': 28}
batting_team = batting_team_enc[batting_team]
bowling_team = bowling_team_enc[bowling_team]
city = city_enc[city]
runs_left = 120
over = 5
balls_left = 90
wickets = 8
total_runs_x = 170
current_rr = ((total_runs_x - runs_left)/30)*6
required_rr = 8.0
model.predict_proba([[batting_team, bowling_team, city, runs_left, over, balls_left, wickets, total_runs_x, current_rr, required_rr]])



array([[0.57, 0.43]])

In [17]:
batting_team = "Kings XI Punjab"
bowling_team = "Delhi Capitals"
city = "Mumbai"
batting_team_enc = {'Sunrisers Hyderabad': 0, 'Kings XI Punjab': 1, 'Delhi Capitals': 2, 'Royal Challengers Bangalore': 3, 'Rajasthan Royals': 4, 'Mumbai Indians': 5, 'Kolkata Knight Riders': 6, 'Chennai Super Kings': 7}
bowling_team_enc = {'Mumbai Indians': 0, 'Chennai Super Kings': 1, 'Kings XI Punjab': 2, 'Rajasthan Royals': 3, 'Kolkata Knight Riders': 4, 'Sunrisers Hyderabad': 5, 'Royal Challengers Bangalore': 6, 'Delhi Capitals': 7}
city_enc ={'Cape Town': 0, 'Nagpur': 1, 'East London': 2, 'Chennai': 3, 'Abu Dhabi': 4, 'Durban': 5, 'Dharamsala': 6, 'Mumbai': 7, 'Ahmedabad': 8, 'Chandigarh': 9, 'Cuttack': 10, 'Bloemfontein': 11, 'Delhi': 12, 'Bengaluru': 13, 'Pune': 14, 'Hyderabad': 15, 'Bangalore': 16, 'Visakhapatnam': 17, 'Kolkata': 18, 'Mohali': 19, 'Port Elizabeth': 20, 'Johannesburg': 21, 'Sharjah': 22, 'Kimberley': 23, 'Jaipur': 24, 'Centurion': 25, 'Raipur': 26, 'Indore': 27, 'Ranchi': 28}
batting_team = batting_team_enc[batting_team]
bowling_team = bowling_team_enc[bowling_team]
city = city_enc[city]
runs_left = 120
over = 5
balls_left = 90
wickets = 8
total_runs_x = 170
current_rr = ((total_runs_x - runs_left)/30)*6
required_rr = 8.0
model.predict_proba([[batting_team, bowling_team, city, runs_left, over, balls_left, wickets, total_runs_x, current_rr, required_rr]])



array([[0.57, 0.43]])

In [68]:
required_rr = (runs_left/balls_left)*6

In [69]:
required_rr

8.0