# Importing

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

# Reading Dataset

In [2]:
ds=pd.read_csv('IPL.csv')

In [3]:
ds.shape

(149, 5)

In [4]:
ds.head()

Unnamed: 0,Main_team,Opponent_team,Winning_Team,Won_By,Win_Type
0,RCB,CSK,CSK,140,by runs
1,SRH,MI,MI,33,by runs
2,KXIP,DC,KXIP,9,by wickets
3,KKR,RCB,RCB,5,by wickets
4,CSK,RR,CSK,5,by wickets


# Data Analyzing

In [5]:
ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Main_team      149 non-null    object
 1   Opponent_team  149 non-null    object
 2   Winning_Team   149 non-null    object
 3   Won_By         149 non-null    int64 
 4   Win_Type       149 non-null    object
dtypes: int64(1), object(4)
memory usage: 5.9+ KB


In [6]:
from sklearn import preprocessing

In [7]:
le=preprocessing.LabelEncoder()

In [8]:
ds['Main_team']=le.fit_transform(ds['Main_team']).astype('int64')
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_name_mapping)
ds['Opponent_team']=le.fit_transform(ds['Opponent_team']).astype('int64')
ds['Winning_Team']=le.fit_transform(ds['Winning_Team']).astype('int64')
ds['Win_Type']=le.fit_transform(ds['Win_Type']).astype('int64')
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_name_mapping)

{'CSK': 0, 'DC': 1, 'KKR': 2, 'KXIP': 3, 'MI': 4, 'RCB': 5, 'RR': 6, 'SRH': 7}
{'by runs': 0, 'by wickets': 1}


In [9]:
ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   Main_team      149 non-null    int64
 1   Opponent_team  149 non-null    int64
 2   Winning_Team   149 non-null    int64
 3   Won_By         149 non-null    int64
 4   Win_Type       149 non-null    int64
dtypes: int64(5)
memory usage: 5.9 KB


In [10]:
ds.head(10)

Unnamed: 0,Main_team,Opponent_team,Winning_Team,Won_By,Win_Type
0,5,0,0,140,0
1,7,4,4,33,0
2,3,1,3,9,1
3,2,5,5,5,1
4,0,6,0,5,1
5,1,7,1,6,1
6,6,3,3,9,1
7,4,2,4,6,0
8,6,1,1,3,1
9,7,2,7,66,0


In [11]:
ds['Win_Type'].value_counts()

1    78
0    71
Name: Win_Type, dtype: int64

# Train And Test Split

In [12]:
x=ds.iloc[:,ds.columns!='Win_Type']
y=ds.iloc[:,ds.columns=='Win_Type']

In [13]:
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=0.20, random_state=42)

In [14]:
x_test.head()

Unnamed: 0,Main_team,Opponent_team,Winning_Team,Won_By
73,3,1,1,5
18,1,0,1,45
117,7,3,3,5
78,0,2,2,9
76,6,3,3,6


# Training And Testing

In [15]:
#Tuning the model to get the best of the best results

In [16]:
#Logistic Regression

In [17]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

model1=LogisticRegression()
model1.fit(x_train, y_train)

predict1=model1.predict(x_test)
print(classification_report(y_test, predict1))

              precision    recall  f1-score   support

           0       0.86      0.92      0.89        13
           1       0.94      0.88      0.91        17

    accuracy                           0.90        30
   macro avg       0.90      0.90      0.90        30
weighted avg       0.90      0.90      0.90        30



In [18]:
#GaussianNB

In [19]:
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB

model2=GaussianNB()
model2.fit(x_train, y_train)

predict2=model2.predict(x_test)
print(classification_report(y_test, predict2))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        13
           1       0.94      1.00      0.97        17

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30



In [20]:
#RandomForestClassifier

In [21]:
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

model3=RandomForestClassifier()
model3.fit(x_train, y_train)

predict3=model3.predict(x_test)
print(classification_report(y_test, predict3))

              precision    recall  f1-score   support

           0       0.86      0.92      0.89        13
           1       0.94      0.88      0.91        17

    accuracy                           0.90        30
   macro avg       0.90      0.90      0.90        30
weighted avg       0.90      0.90      0.90        30



In [22]:
#KNeighborsClassifier

In [23]:
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

model4= KNeighborsClassifier()
model4.fit(x_train, y_train)

predict4=model4.predict(x_test)
print(classification_report(y_test, predict4))


              precision    recall  f1-score   support

           0       0.92      0.92      0.92        13
           1       0.94      0.94      0.94        17

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30



In [24]:
#DecisionTreeClassifier

In [25]:
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier

model5=DecisionTreeClassifier()
model5.fit(x_train, y_train)

predict5=model5.predict(x_test)
print(classification_report(y_test, predict5))

              precision    recall  f1-score   support

           0       0.80      0.92      0.86        13
           1       0.93      0.82      0.87        17

    accuracy                           0.87        30
   macro avg       0.87      0.87      0.87        30
weighted avg       0.88      0.87      0.87        30



In [26]:
#From comparing the 5 models, we can conclude that Model 2: Gaussian Naive Bayes yields the highest accuracy. With an accuracy of 97%.

#We have precision, recall, f1-score and support:

#Precision : be "how many are correctly classified among that class"

#Recall : "how many of this class you find over the whole number of element of this class"

#F1-score : harmonic mean of precision and recall values. F1 score reaches its best value at 1 and worst value at 0. F1 Score = 2 x ((precision x recall) / (precision + recall))

# PREDICTING

In [27]:
input=[['5', '0', '0', '140']]
output=model2.predict(input)
print(output)


[0]


In [28]:
y_test=y_test.values.reshape(30,)

In [29]:
output1=model2.predict(x_test)

output2=pd.DataFrame({'Actual':y_test.flatten(), 'predicted':output1.flatten()})
output2   #comparing actual value to predicted value

Unnamed: 0,Actual,predicted
0,1,1
1,0,0
2,1,1
3,0,1
4,1,1
5,0,0
6,0,0
7,0,0
8,0,0
9,1,1


In [30]:
totalacc=accuracy_score(y_test, output1)
totalacc

0.9666666666666667