In [43]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [45]:
df = pd.read_csv("odi_Batting_Card.csv")
print(df.head())


   Match ID  innings       team  batsman   runs  balls  fours  sixes  \
0     64381        1  Sri Lanka   1810.0    5.0   17.0    0.0    0.0   
1     64357        1   Pakistan   1981.0   35.0   48.0    2.0    0.0   
2     65455        1  Australia   1922.0   16.0   30.0    1.0    0.0   
3     64381        1  Sri Lanka   1987.0    6.0   46.0    0.0    0.0   
4     64357        1   Pakistan   1750.0  114.0  123.0   14.0    0.0   

   strikeRate isOut wicketType  fielders  bowler  
0       29.41  True        lbw       NaN     NaN  
1       72.91  True     caught  ['1977']     NaN  
2       53.33  True     caught  ['1945']     NaN  
3       13.04  True     caught  ['1980']     NaN  
4       92.68  True    run out       NaN     NaN  


In [46]:
print(df.shape)
print(df.columns)

(103225, 13)
Index(['Match ID', 'innings', 'team', 'batsman', 'runs', 'balls', 'fours',
       'sixes', 'strikeRate', 'isOut', 'wicketType', 'fielders', 'bowler'],
      dtype='object')


In [49]:
df.info()
df.describe()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103225 entries, 0 to 103224
Data columns (total 13 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   Match ID    103225 non-null  int64  
 1   innings     103225 non-null  int64  
 2   team        103213 non-null  object 
 3   batsman     103213 non-null  float64
 4   runs        83039 non-null   float64
 5   balls       83039 non-null   float64
 6   fours       82567 non-null   float64
 7   sixes       82618 non-null   float64
 8   strikeRate  83039 non-null   float64
 9   isOut       103213 non-null  object 
 10  wicketType  103213 non-null  object 
 11  fielders    60791 non-null   object 
 12  bowler      40939 non-null   float64
dtypes: float64(7), int64(2), object(4)
memory usage: 10.2+ MB


Unnamed: 0,Match ID,innings,batsman,runs,balls,fours,sixes,strikeRate,bowler
count,103225.0,103225.0,103213.0,83039.0,83039.0,82567.0,82618.0,83039.0,40939.0
mean,433795.9,1.494938,24378.416934,22.965149,30.380941,2.010137,0.322024,68.082163,38563.076577
std,457930.2,0.499977,27714.545828,26.685187,30.331139,2.75534,0.863357,46.340819,28187.226445
min,64148.0,1.0,858.0,0.0,0.0,0.0,0.0,0.0,1383.0
25%,65331.0,1.0,2030.0,4.0,8.0,0.0,0.0,38.88,8270.0
50%,238208.0,1.0,7678.0,13.0,20.0,1.0,0.0,65.67,47023.0
75%,727923.0,2.0,48739.0,33.0,44.0,3.0,0.0,91.575,58435.0
max,1421073.0,2.0,116530.0,264.0,201.0,33.0,17.0,600.0,115381.0


In [51]:
df["century"] = (df["runs"] >= 100).astype(int)

df["century"].value_counts()


century
0    101112
1      2113
Name: count, dtype: int64

In [53]:
X = df[["balls", "fours", "sixes", "strikeRate"]]
y = df["century"]


In [55]:
print(X.isnull().sum())


balls         20186
fours         20658
sixes         20607
strikeRate    20186
dtype: int64


In [57]:
X = X.fillna(X.mean())


In [59]:
print(X.isnull().sum())


balls         0
fours         0
sixes         0
strikeRate    0
dtype: int64


In [61]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [63]:
models = {
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(random_state=42)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")


KNN Accuracy: 0.9989
Naive Bayes Accuracy: 0.9650
Decision Tree Accuracy: 0.9985


In [64]:
best_model = KNeighborsClassifier(n_neighbors=5)
best_model.fit(X_train, y_train)


# Pridiction of Century

In [77]:

balls = float(input("Enter balls faced: "))
fours = float(input("Enter number of fours: "))
sixes = float(input("Enter number of sixes: "))
strikeRate = float(input("Enter strike rate: "))

new_data = pd.DataFrame({
    "balls": [balls],
    "fours": [fours],
    "sixes": [sixes],
    "strikeRate": [strikeRate]
})

print(new_data)

prediction = best_model.predict(new_data)

if prediction[0] == 1:
    print("Kohli ki Century aayegi..............")
else:
    print("Kohli ki Century nahi aayegi...............")


Enter balls faced:  120
Enter number of fours:  0
Enter number of sixes:  0
Enter strike rate:  100


   balls  fours  sixes  strikeRate
0  120.0    0.0    0.0       100.0
Kohli ki Century aayegi..............


In [79]:
prob = best_model.predict_proba(new_data)
print("Probability:", prob)


Probability: [[0. 1.]]
