In [None]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

# Load data
df = pd.read_csv("final_cricket_levels.csv")

# Define labels for each role based on thresholds
df["Batter"] = (df["Batting_Level"] >= 5).astype(int)
df["Bowler"] = (df["Bowling_Level"] >= 5).astype(int)
df["Fielder"] = (df["Fielding_Level"] >= 5).astype(int)
df["All-Rounder"] = ((df["Batting_Level"] >= 5) & (df["Bowling_Level"] >= 5)).astype(int)
df["Wicketkeeper"] = ((df["Batting_Level"] >= 5) & (df["Fielding_Level"] >= 5)).astype(int)

# Features for training
X = df[["Batting_Level", "Bowling_Level", "Fielding_Level"]]

# Function to train and predict with XGBoost
def train_xgboost(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = xgb.XGBClassifier(use_label_encoder=False, eval_metric="logloss         1111q")
    model.fit(X_train, y_train)
    preds = model.predict_proba(X)[:, 1]  # Get probability of being classified
    return model, preds

# Train classifiers
_, df["Batter_Confidence"] = train_xgboost(X, df["Batter"])
_, df["Bowler_Confidence"] = train_xgboost(X, df["Bowler"])
_, df["Fielder_Confidence"] = train_xgboost(X, df["Fielder"])
_, df["AllRounder_Confidence"] = train_xgboost(X, df["All-Rounder"])
_, df["Wicketkeeper_Confidence"] = train_xgboost(X, df["Wicketkeeper"])

# Select highest confidence role for each player
role_columns = ["Batter_Confidence", "Bowler_Confidence", "Fielder_Confidence", "AllRounder_Confidence", "Wicketkeeper_Confidence"]
df["Predicted_Role"] = df[role_columns].idxmax(axis=1).str.replace("_Confidence", "")

# Keep relevant columns
final_df = df[["Player", "Team", "Batting_Level", "Bowling_Level", "Fielding_Level", "Predicted_Role"] + role_columns]

# Save to CSV
final_df.to_csv("confidence_intervals.csv", index=False)

# Display output
final_df

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Unnamed: 0,Player,Team,Batting_Level,Bowling_Level,Fielding_Level,Predicted_Role,Batter_Confidence,Bowler_Confidence,Fielder_Confidence,AllRounder_Confidence,Wicketkeeper_Confidence
0,Sam Sabharwal,Windsor Tigers CC,6,4,5,Fielder,0.959744,0.003056,0.966508,0.021873,0.878936
1,Imran Khan,Chatham CC,6,5,4,Bowler,0.959744,0.979086,0.004430,0.888972,0.022839
2,Rama Tarun Ganne,Windsor Royals CC,6,2,6,Fielder,0.959744,0.001836,0.966508,0.021873,0.878936
3,Sahil Sharma,Windsor Royals CC,6,5,4,Bowler,0.959744,0.979086,0.004430,0.888972,0.022839
4,Karamjit Sohal,WEUnited CC,5,6,4,Bowler,0.959744,0.979086,0.004430,0.888972,0.022839
...,...,...,...,...,...,...,...,...,...,...,...
626,Harsh Patel,Windsor Dominion CC,2,2,1,Bowler,0.001575,0.001836,0.001685,0.000849,0.000963
627,Sahil Vichari,Vulcans CC,2,2,2,Bowler,0.001575,0.001836,0.001685,0.000849,0.000963
628,Mandeep S,EOCC,2,2,2,Bowler,0.001575,0.001836,0.001685,0.000849,0.000963
629,Hatim Bhatti,Alpha XI,2,2,1,Bowler,0.001575,0.001836,0.001685,0.000849,0.000963


In [5]:
# Define the number of players needed for each role
num_batters = 3
num_bowlers = 4
num_fielders = 2
num_allrounders = 1
num_wicketkeepers = 1

# Sorting by confidence intervals for each role
batters_sorted = final_df[['Player', 'Team', 'Batter_Confidence']].sort_values(by='Batter_Confidence', ascending=False).head(num_batters)
bowlers_sorted = final_df[['Player', 'Team', 'Bowler_Confidence']].sort_values(by='Bowler_Confidence', ascending=False).head(num_bowlers)
fielders_sorted = final_df[['Player', 'Team', 'Fielder_Confidence']].sort_values(by='Fielder_Confidence', ascending=False).head(num_fielders)
allrounders_sorted = final_df[['Player', 'Team', 'AllRounder_Confidence']].sort_values(by='AllRounder_Confidence', ascending=False).head(num_allrounders)
wicketkeepers_sorted = final_df[['Player', 'Team', 'Wicketkeeper_Confidence']].sort_values(by='Wicketkeeper_Confidence', ascending=False).head(num_wicketkeepers)

# Combine the sorted results into the final Dream 11 team
dream_11_team = pd.concat([batters_sorted, bowlers_sorted, fielders_sorted, allrounders_sorted, wicketkeepers_sorted])

# Store the final Dream 11 team to a CSV file
dream_11_team.to_csv('dream_11_team.csv', index=False)

print("Dream 11 team saved to 'dream_11_team.csv'")

Dream 11 team saved to 'dream_11_team.csv'


In [6]:
#Evaluation for XGBoost

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd

# Get actual labels (True roles)
y_true = df[['Batter', 'Bowler', 'Fielder', 'All-Rounder', 'Wicketkeeper']].idxmax(axis=1)

# Get predicted labels
y_pred = df["Predicted_Role"]

# Compute accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Classification Report (Precision, Recall, F1-score)
print("\nClassification Report:")
print(classification_report(y_true, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred, labels=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"])
conf_matrix_df = pd.DataFrame(conf_matrix, 
                              index=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"], 
                              columns=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"])
print("\nConfusion Matrix:")
print(conf_matrix_df)



Accuracy: 0.1696

Classification Report:
              precision    recall  f1-score   support

      Batter       1.00      0.08      0.14       567
      Bowler       0.08      1.00      0.15        40
     Fielder       0.22      1.00      0.37        24

    accuracy                           0.17       631
   macro avg       0.44      0.69      0.22       631
weighted avg       0.91      0.17      0.15       631


Confusion Matrix:
              Batter  Bowler  Fielder  All-Rounder  Wicketkeeper
Batter            43     441       83            0             0
Bowler             0      40        0            0             0
Fielder            0       0       24            0             0
All-Rounder        0       0        0            0             0
Wicketkeeper       0       0        0            0             0


Other Models

In [12]:
#Using random forest classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

df = pd.read_csv("final_cricket_levels.csv")

# Define labels for each role based on thresholds
df["Batter"] = (df["Batting_Level"] >= 5).astype(int)
df["Bowler"] = (df["Bowling_Level"] >= 5).astype(int)
df["Fielder"] = (df["Fielding_Level"] >= 5).astype(int)
df["All-Rounder"] = ((df["Batting_Level"] >= 5) & (df["Bowling_Level"] >= 5)).astype(int)
df["Wicketkeeper"] = ((df["Batting_Level"] >= 5) & (df["Fielding_Level"] >= 5)).astype(int)

# Features for training
X = df[["Batting_Level", "Bowling_Level", "Fielding_Level"]]

# Function to train and predict with Random Forest
def train_random_forest(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
    preds = rf.predict_proba(X)[:, 1]  # Get probability of being classified
    return rf, preds

# Train classifiers
_, df["Batter_Confidence"] = train_random_forest(X, df["Batter"])
_, df["Bowler_Confidence"] = train_random_forest(X, df["Bowler"])
_, df["Fielder_Confidence"] = train_random_forest(X, df["Fielder"])
_, df["AllRounder_Confidence"] = train_random_forest(X, df["All-Rounder"])
_, df["Wicketkeeper_Confidence"] = train_random_forest(X, df["Wicketkeeper"])

# Select highest confidence role for each player
role_columns = ["Batter_Confidence", "Bowler_Confidence", "Fielder_Confidence", "AllRounder_Confidence", "Wicketkeeper_Confidence"]
df["Predicted_Role"] = df[role_columns].idxmax(axis=1).str.replace("_Confidence", "")

# Keep relevant columns
final_df = df[["Player", "Team", "Batting_Level", "Bowling_Level", "Fielding_Level", "Predicted_Role"] + role_columns]

#Save to CSV
# final_df.to_csv("confidence_intervals.csv", index=False)

final_df


Unnamed: 0,Player,Team,Batting_Level,Bowling_Level,Fielding_Level,Predicted_Role,Batter_Confidence,Bowler_Confidence,Fielder_Confidence,AllRounder_Confidence,Wicketkeeper_Confidence
0,Sam Sabharwal,Windsor Tigers CC,6,4,5,Batter,1.00,0.00,1.00,0.00,0.98
1,Imran Khan,Chatham CC,6,5,4,Batter,1.00,0.99,0.04,0.97,0.05
2,Rama Tarun Ganne,Windsor Royals CC,6,2,6,Fielder,0.96,0.00,0.97,0.00,0.94
3,Sahil Sharma,Windsor Royals CC,6,5,4,Batter,1.00,0.99,0.04,0.97,0.05
4,Karamjit Sohal,WEUnited CC,5,6,4,Bowler,0.90,1.00,0.00,0.90,0.00
...,...,...,...,...,...,...,...,...,...,...,...
626,Harsh Patel,Windsor Dominion CC,2,2,1,Batter,0.00,0.00,0.00,0.00,0.00
627,Sahil Vichari,Vulcans CC,2,2,2,Batter,0.00,0.00,0.00,0.00,0.00
628,Mandeep S,EOCC,2,2,2,Batter,0.00,0.00,0.00,0.00,0.00
629,Hatim Bhatti,Alpha XI,2,2,1,Batter,0.00,0.00,0.00,0.00,0.00


In [13]:
#Evaluation for Random Forest Classifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd

# Actual labels (True roles)
y_true = df[['Batter', 'Bowler', 'Fielder', 'All-Rounder', 'Wicketkeeper']].idxmax(axis=1)

# Predicted labels
y_pred = df["Predicted_Role"]

# Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Classification Report (Precision, Recall, F1-score)
print("Classification Report:")
print(classification_report(y_true, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred, labels=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"])
conf_matrix_df = pd.DataFrame(conf_matrix, 
                              index=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"], 
                              columns=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"])

print("\nConfusion Matrix:")
print(conf_matrix_df)


Accuracy: 0.9731
Classification Report:
              precision    recall  f1-score   support

      Batter       1.00      0.97      0.98       567
      Bowler       0.80      1.00      0.89        40
     Fielder       0.77      1.00      0.87        24

    accuracy                           0.97       631
   macro avg       0.86      0.99      0.92       631
weighted avg       0.98      0.97      0.97       631


Confusion Matrix:
              Batter  Bowler  Fielder  All-Rounder  Wicketkeeper
Batter           550      10        7            0             0
Bowler             0      40        0            0             0
Fielder            0       0       24            0             0
All-Rounder        0       0        0            0             0
Wicketkeeper       0       0        0            0             0


In [14]:
#Using Logistic Regression
from sklearn.linear_model import LogisticRegression
df["Batter"] = (df["Batting_Level"] >= 5).astype(int)
df["Bowler"] = (df["Bowling_Level"] >= 5).astype(int)
df["Fielder"] = (df["Fielding_Level"] >= 5).astype(int)
df["All-Rounder"] = ((df["Batting_Level"] >= 5) & (df["Bowling_Level"] >= 5)).astype(int)
df["Wicketkeeper"] = ((df["Batting_Level"] >= 5) & (df["Fielding_Level"] >= 5)).astype(int)

# Features for training
X = df[["Batting_Level", "Bowling_Level", "Fielding_Level"]]

# Function to train and predict with Logistic Regression
def train_logistic_regression(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict_proba(X)[:, 1]  # Get probability of being classified
    return model, preds

# Train classifiers
_, df["Batter_Confidence"] = train_logistic_regression(X, df["Batter"])
_, df["Bowler_Confidence"] = train_logistic_regression(X, df["Bowler"])
_, df["Fielder_Confidence"] = train_logistic_regression(X, df["Fielder"])
_, df["AllRounder_Confidence"] = train_logistic_regression(X, df["All-Rounder"])
_, df["Wicketkeeper_Confidence"] = train_logistic_regression(X, df["Wicketkeeper"])

# Select highest confidence role for each player
role_columns = ["Batter_Confidence", "Bowler_Confidence", "Fielder_Confidence", "AllRounder_Confidence", "Wicketkeeper_Confidence"]
df["Predicted_Role"] = df[role_columns].idxmax(axis=1).str.replace("_Confidence", "")

# Keep relevant columns
final_df = df[["Player", "Team", "Batting_Level", "Bowling_Level", "Fielding_Level", "Predicted_Role"] + role_columns]

# Save to CSV
#final_df.to_csv("confidence_intervals.csv", index=False)

# Display output
final_df

Unnamed: 0,Player,Team,Batting_Level,Bowling_Level,Fielding_Level,Predicted_Role,Batter_Confidence,Bowler_Confidence,Fielder_Confidence,AllRounder_Confidence,Wicketkeeper_Confidence
0,Sam Sabharwal,Windsor Tigers CC,6,4,5,Batter,0.995667,1.422562e-01,8.924319e-01,0.553104,8.204326e-01
1,Imran Khan,Chatham CC,6,5,4,Batter,0.995270,9.312217e-01,8.540639e-02,0.853634,4.112567e-01
2,Rama Tarun Ganne,Windsor Royals CC,6,2,6,Fielder,0.994890,2.236728e-05,9.987039e-01,0.047026,9.626856e-01
3,Sahil Sharma,Windsor Royals CC,6,5,4,Batter,0.995270,9.312217e-01,8.540639e-02,0.853634,4.112567e-01
4,Karamjit Sohal,WEUnited CC,5,6,4,Bowler,0.851978,9.989843e-01,6.761997e-02,0.794620,1.102305e-01
...,...,...,...,...,...,...,...,...,...,...,...
626,Harsh Patel,Windsor Dominion CC,2,2,1,Batter,0.000007,5.369987e-06,7.553023e-08,0.000006,5.679238e-07
627,Sahil Vichari,Vulcans CC,2,2,2,Batter,0.000010,5.974303e-06,6.418900e-06,0.000007,4.302817e-06
628,Mandeep S,EOCC,2,2,2,Batter,0.000010,5.974303e-06,6.418900e-06,0.000007,4.302817e-06
629,Hatim Bhatti,Alpha XI,2,2,1,Batter,0.000007,5.369987e-06,7.553023e-08,0.000006,5.679238e-07


In [15]:
#Evaluation for Logistic Regression model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss
import pandas as pd

# Get actual labels (True roles)
y_true = df[['Batter', 'Bowler', 'Fielder', 'All-Rounder', 'Wicketkeeper']].idxmax(axis=1)

# Get predicted labels
y_pred = df["Predicted_Role"]

# Compute accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Classification Report (Precision, Recall, F1-score)
print("\nClassification Report:")
print(classification_report(y_true, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred, labels=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"])
conf_matrix_df = pd.DataFrame(conf_matrix, 
                              index=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"], 
                              columns=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"])
print("\nConfusion Matrix:")
print(conf_matrix_df)

# Log Loss (using predicted probabilities)
# y_probs = df[["Batter_Confidence", "Bowler_Confidence", "Fielder_Confidence", "AllRounder_Confidence", "Wicketkeeper_Confidence"]].values
# y_true_binary = pd.get_dummies(y_true)  # Convert labels to one-hot encoding

# logloss = log_loss(y_true_binary, y_probs)
# print(f"\nLog Loss: {logloss:.4f}")

Accuracy: 0.5737

Classification Report:
              precision    recall  f1-score   support

      Batter       1.00      0.53      0.69       567
      Bowler       0.18      1.00      0.31        40
     Fielder       0.21      1.00      0.35        24

    accuracy                           0.57       631
   macro avg       0.46      0.84      0.45       631
weighted avg       0.92      0.57      0.65       631


Confusion Matrix:
              Batter  Bowler  Fielder  All-Rounder  Wicketkeeper
Batter           298     179       90            0             0
Bowler             0      40        0            0             0
Fielder            0       0       24            0             0
All-Rounder        0       0        0            0             0
Wicketkeeper       0       0        0            0             0


In [16]:
#Using LightGBM
!pip install lightgbm
import lightgbm as lgb

df = pd.read_csv("final_cricket_levels.csv")

# Define labels for each role based on thresholds
df["Batter"] = (df["Batting_Level"] >= 5).astype(int)
df["Bowler"] = (df["Bowling_Level"] >= 5).astype(int)
df["Fielder"] = (df["Fielding_Level"] >= 5).astype(int)
df["All-Rounder"] = ((df["Batting_Level"] >= 5) & (df["Bowling_Level"] >= 5)).astype(int)
df["Wicketkeeper"] = ((df["Batting_Level"] >= 5) & (df["Fielding_Level"] >= 5)).astype(int)

# Features for training
X = df[["Batting_Level", "Bowling_Level", "Fielding_Level"]]

# Function to train and predict with LightGBM
def train_lightgbm(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = lgb.LGBMClassifier(random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict_proba(X)[:, 1]  # Get probability of being classified
    return model, preds

# Train classifiers
_, df["Batter_Confidence"] = train_lightgbm(X, df["Batter"])
_, df["Bowler_Confidence"] = train_lightgbm(X, df["Bowler"])
_, df["Fielder_Confidence"] = train_lightgbm(X, df["Fielder"])
_, df["AllRounder_Confidence"] = train_lightgbm(X, df["All-Rounder"])
_, df["Wicketkeeper_Confidence"] = train_lightgbm(X, df["Wicketkeeper"])

# Select highest confidence role for each player
role_columns = ["Batter_Confidence", "Bowler_Confidence", "Fielder_Confidence", "AllRounder_Confidence", "Wicketkeeper_Confidence"]
df["Predicted_Role"] = df[role_columns].idxmax(axis=1).str.replace("_Confidence", "")

# Keep relevant columns
final_df = df[["Player", "Team", "Batting_Level", "Bowling_Level", "Fielding_Level", "Predicted_Role"] + role_columns]

# Save to CSV
#final_df.to_csv("confidence_intervals.csv", index=False)

# Display output
final_df


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: C:\Users\lowela\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Defaulting to user installation because normal site-packages is not writeable
[LightGBM] [Info] Number of positive: 23, number of negative: 481
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000110 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19
[LightGBM] [Info] Number of data points in the train set: 504, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.045635 -> initscore=-3.040373
[LightGBM] [Info] Start training from score -3.040373
[LightGBM] [Info] Number of positive: 44, number of negative: 460
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000027 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 19
[LightGBM] [Info] Number of data points in the train set: 504, number of used features: 3
[LightGBM] [Info] [binar

Unnamed: 0,Player,Team,Batting_Level,Bowling_Level,Fielding_Level,Predicted_Role,Batter_Confidence,Bowler_Confidence,Fielder_Confidence,AllRounder_Confidence,Wicketkeeper_Confidence
0,Sam Sabharwal,Windsor Tigers CC,6,4,5,Fielder,0.999962,0.000004,0.999965,7.373324e-03,9.805707e-01
1,Imran Khan,Chatham CC,6,5,4,Bowler,0.999962,0.999964,0.000003,9.769912e-01,6.204470e-03
2,Rama Tarun Ganne,Windsor Royals CC,6,2,6,Fielder,0.999962,0.000004,0.999965,7.373324e-03,9.805707e-01
3,Sahil Sharma,Windsor Royals CC,6,5,4,Bowler,0.999962,0.999964,0.000003,9.769912e-01,6.204470e-03
4,Karamjit Sohal,WEUnited CC,5,6,4,Bowler,0.999962,0.999964,0.000003,9.769912e-01,6.204470e-03
...,...,...,...,...,...,...,...,...,...,...,...
626,Harsh Patel,Windsor Dominion CC,2,2,1,Bowler,0.000002,0.000004,0.000003,7.980730e-07,8.012094e-07
627,Sahil Vichari,Vulcans CC,2,2,2,Bowler,0.000002,0.000004,0.000003,7.980730e-07,8.012094e-07
628,Mandeep S,EOCC,2,2,2,Bowler,0.000002,0.000004,0.000003,7.980730e-07,8.012094e-07
629,Hatim Bhatti,Alpha XI,2,2,1,Bowler,0.000002,0.000004,0.000003,7.980730e-07,8.012094e-07


In [17]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss
import pandas as pd

# Get actual labels (True roles)
y_true = df[['Batter', 'Bowler', 'Fielder', 'All-Rounder', 'Wicketkeeper']].idxmax(axis=1)

# Get predicted labels
y_pred = df["Predicted_Role"]

# Compute accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Classification Report (Precision, Recall, F1-score)
print("\nClassification Report:")
print(classification_report(y_true, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred, labels=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"])
conf_matrix_df = pd.DataFrame(conf_matrix, 
                              index=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"], 
                              columns=["Batter", "Bowler", "Fielder", "All-Rounder", "Wicketkeeper"])
print("\nConfusion Matrix:")
print(conf_matrix_df)

# Log Loss (using predicted probabilities)
# y_probs = df[["Batter_Confidence", "Bowler_Confidence", "Fielder_Confidence", "AllRounder_Confidence", "Wicketkeeper_Confidence"]].values
# y_true_binary = pd.get_dummies(y_true)  # Convert labels to one-hot encoding

# logloss = log_loss(y_true_binary, y_probs)
# print(f"\nLog Loss: {logloss:.4f}")


Accuracy: 0.1125

Classification Report:
              precision    recall  f1-score   support

      Batter       1.00      0.02      0.04       567
      Bowler       0.06      0.90      0.12        40
     Fielder       0.63      1.00      0.77        24

    accuracy                           0.11       631
   macro avg       0.56      0.64      0.31       631
weighted avg       0.93      0.11      0.07       631


Confusion Matrix:
              Batter  Bowler  Fielder  All-Rounder  Wicketkeeper
Batter            11     546       10            0             0
Bowler             0      36        4            0             0
Fielder            0       0       24            0             0
All-Rounder        0       0        0            0             0
Wicketkeeper       0       0        0            0             0
