# Prerequisites


## Libraries required

In [2]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import ipywidgets as widgets
from IPython.display import display
import warnings

# Ignore XGBoost warnings (UserWarnings)
warnings.filterwarnings('ignore', category=UserWarning, module='xgboost')


## Loading The Data

In [3]:


deliveries = pd.read_csv('matches.csv')

deliveries.head()


Unnamed: 0,season,team1,team2,date,match_number,venue,city,toss_winner,toss_decision,player_of_match,umpire1,umpire2,reserve_umpire,match_referee,winner,winner_runs,winner_wickets,match_type
0,2024,Canada,United States of America,2024/06/01,1,Grand Prairie Stadium,Dallas,United States of America,field,Aaron Jones,RK Illingworth,Sharfuddoula,L Rusere,RB Richardson,United States of America,,7.0,Group
1,2024,Papua New Guinea,West Indies,2024/06/02,2,Providence Stadium,Providence,West Indies,field,RL Chase,AT Holdstock,Rashid Riaz,HDPK Dharmasena,AJ Pycroft,West Indies,,5.0,Group
2,2024,Oman,Namibia,2024/06/02,3,Kensington Oval,Bridgetown,Namibia,field,D Wiese,J Madanagopal,JS Wilson,Asif Yaqoob,RS Madugalle,,,,Group
3,2024,Sri Lanka,South Africa,2024/06/03,4,Nassau County International Cricket Stadium,New York,Sri Lanka,bat,A Nortje,CM Brown,RA Kettleborough,AG Wharf,JJ Crowe,South Africa,,6.0,Group
4,2024,Afghanistan,Uganda,2024/06/03,5,Providence Stadium,Providence,Uganda,field,Fazalhaq Farooqi,Ahsan Raza,HDPK Dharmasena,Rashid Riaz,AJ Pycroft,Afghanistan,125.0,,Group


## Cleaning Data 

In [4]:

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier


# Step 1: Load the dataset
matches_df = pd.read_csv('matches.csv')

# Step 2: Preprocessing (Encode categorical variables)
# Encode team names to numeric values
teams = pd.concat([matches_df['team1'], matches_df['team2']]).unique()
team_mapping = {team: idx for idx, team in enumerate(teams)}

matches_df['team1_encoded'] = matches_df['team1'].map(team_mapping)
matches_df['team2_encoded'] = matches_df['team2'].map(team_mapping)

# Step 3: Encode the winner column (1 for team1 wins, 0 for team2 wins)
matches_df['winner_encoded'] = matches_df.apply(lambda row: 1 if row['winner'] == row['team1'] else 0, axis=1)





# Creating Model

In [None]:

# Step 4: Features and target variable
X = matches_df[['team1_encoded', 'team2_encoded']] 
y = matches_df['winner_encoded'] 

# Step 5: Train-Test Split (80-20 Split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Random Forest Classifier

In [6]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')



Accuracy: 81.82%


In [17]:
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)




Confusion Matrix:
[[3 1]
 [1 6]]


In [7]:
def predict_winner(team1, team2):
    team1_encoded = team_mapping[team1]
    team2_encoded = team_mapping[team2]
    prediction = rf_classifier.predict([[team1_encoded, team2_encoded]])[0]
    winner = team1 if prediction == 1 else team2
    return winner

## Other Model Comaprisions

In [8]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier

models = {
    
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5),
    'Naive Bayes': GaussianNB(),
    'Neural Network (MLP)': MLPClassifier(hidden_layer_sizes=(100,), max_iter=300),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42)
}



In [9]:
# Store model accuracies
results = {}

# Train and evaluate each model
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = round(acc * 100, 2)

# Create and display accuracy table
accuracy_table = pd.DataFrame(list(results.items()), columns=['Model', 'Accuracy (%)'])
accuracy_table = accuracy_table.sort_values(by='Accuracy (%)', ascending=False).reset_index(drop=True) 

pd.DataFrame(accuracy_table)




Unnamed: 0,Model,Accuracy (%)
0,XGBoost,90.91
1,Random Forest,81.82
2,Gradient Boosting,81.82
3,K-Nearest Neighbors,72.73
4,Decision Tree,72.73
5,Naive Bayes,45.45
6,Neural Network (MLP),36.36


In [10]:

# Step 9: Create widgets for user input
team1_dropdown = widgets.Dropdown(
    options=teams,
    description='Team 1:',
    disabled=False
)

team2_dropdown = widgets.Dropdown(
    options=teams,
    description='Team 2:',
    disabled=False
)

In [18]:
predict_button = widgets.Button(description="Predict Winner")
output = widgets.Output()

def on_predict_button_click(b):
    team1 = team1_dropdown.value
    team2 = team2_dropdown.value
    winner = predict_winner(team1, team2)
    
    with output:
        print(f'The predicted winner is: {winner}')

predict_button.on_click(on_predict_button_click)

display(team1_dropdown, team2_dropdown, predict_button, output)

Dropdown(description='Team 1:', index=8, options=('Canada', 'Papua New Guinea', 'Oman', 'Sri Lanka', 'Afghanis…

Dropdown(description='Team 2:', index=4, options=('Canada', 'Papua New Guinea', 'Oman', 'Sri Lanka', 'Afghanis…

Button(description='Predict Winner', style=ButtonStyle())

Output()