<a href="https://colab.research.google.com/github/akhilmuvva/predicting-XI-of-cricket-/blob/main/playing_11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
pip install pycricbuzz


Collecting pycricbuzz
  Downloading pycricbuzz-2.4-py2.py3-none-any.whl.metadata (388 bytes)
Downloading pycricbuzz-2.4-py2.py3-none-any.whl (10 kB)
Installing collected packages: pycricbuzz
Successfully installed pycricbuzz-2.4


In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from pycricbuzz import Cricbuzz


In [12]:
def get_player_stats():
    """
    Fetches player statistics using the Cricbuzz API and formats it into a DataFrame.
    This is a demonstrative function with sample data.
    """
    c = Cricbuzz()
    data = {
        'Player': ['Virat Kohli', 'Rohit Sharma', 'Jasprit Bumrah', 'Ravindra Jadeja', 'KL Rahul',
                  'Suryakumar Yadav', 'Hardik Pandya', 'Rishabh Pant', 'Mohammed Shami',
                  'Yuzvendra Chahal', 'Shikhar Dhawan', 'Shreyas Iyer', 'Axar Patel',
                  'Kuldeep Yadav', 'Ishan Kishan'],
        'Matches': [113, 151, 72, 171, 72, 48, 87, 66, 94, 72, 167, 49, 39, 69, 27],
        'Batting_Average': [52.73, 48.62, 5.64, 32.54, 47.41, 46.51, 34.00, 36.56, 36.00,
                           25.00, 44.12, 42.14, 20.12, 18.50, 30.20],
        'Strike_Rate': [93.26, 89.22, 74.24, 87.04, 89.47, 172.71, 139.7, 106.67, 102.00,
                        90.00, 91.30, 96.54, 110.2, 85.0, 105.2],
        'Bowling_Average': [166.0, 201.0, 24.0, 37.3, 0, 0, 31.2, 0, 0, 0, 30.0, 0, 34.4,
                            0, 0],
        'Economy_Rate': [5.53, 5.15, 0, 0, 0, 0, 0, 0, 0, 0, 6.00, 0, 4.50, 0, 0],
        'Player_Role': ['Batsman', 'Batsman', 'Bowler', 'All-Rounder', 'Batsman', 'Batsman',
                        'All-Rounder', 'Wicketkeeper', 'Bowler', 'Bowler', 'Batsman',
                        'Batsman', 'All-Rounder', 'Bowler', 'Wicketkeeper'],
        'in_playing_xi': [1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0]
    }
    return pd.DataFrame(data)

In [13]:
df = get_player_stats()

le = LabelEncoder()
df['Player_Role_Encoded'] = le.fit_transform(df['Player_Role'])

features = ['Matches', 'Batting_Average', 'Strike_Rate', 'Bowling_Average',
            'Economy_Rate', 'Player_Role_Encoded']
X = df[features]
y = df['in_playing_xi']

In [14]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [17]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [19]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

In [20]:
model.fit(X_train, y_train, epochs=50, batch_size=8, verbose=0,
          validation_split=0.2, class_weight=class_weight_dict)

<keras.src.callbacks.history.History at 0x7e17a14b9d90>

In [21]:
y_pred_prob = model.predict(X_test, verbose=0)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()
print("Model Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

Model Evaluation:
Accuracy: 0.67

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.50      1.00      0.67         1

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3



In [22]:
new_squad_df = df.copy()

In [23]:
X_new_squad = new_squad_df[features]
X_new_squad_scaled = scaler.transform(X_new_squad)

In [24]:
playing_xi_probabilities = model.predict(X_new_squad_scaled, verbose=0).flatten()
new_squad_df['playing_xi_prob'] = playing_xi_probabilities

In [29]:
def select_playing_xi(df, min_batsmen=4, min_wicketkeepers=1, min_allrounders=2, min_bowlers=4):
    """
    Selects a playing XI of 11 players based on predicted probabilities and role requirements.
    """
    playing_xi = pd.DataFrame()

    # Filter players by role
    batsmen = df[df['Player_Role'] == 'Batsman']
    wicketkeepers = df[df['Player_Role'] == 'Wicketkeeper']
    allrounders = df[df['Player_Role'] == 'All-Rounder']
    bowlers = df[df['Player_Role'] == 'Bowler']

    # Check if sufficient players are available for each role
    if (len(batsmen) < min_batsmen or len(wicketkeepers) < min_wicketkeepers or
        len(allrounders) < min_allrounders or len(bowlers) < min_bowlers):
        raise ValueError("Insufficient players for required roles in the squad.")

    # Select top players for each role based on predicted probability
    top_batsmen = batsmen.nlargest(min_batsmen, 'playing_xi_prob')
    top_wicketkeepers = wicketkeepers.nlargest(min_wicketkeepers, 'playing_xi_prob')
    top_allrounders = allrounders.nlargest(min_allrounders, 'playing_xi_prob')
    top_bowlers = bowlers.nlargest(min_bowlers, 'playing_xi_prob')

    # Combine selected players
    playing_xi = pd.concat([top_batsmen, top_wicketkeepers, top_allrounders, top_bowlers])

    # Check if exactly 11 players were selected
    if len(playing_xi) != 11:
        raise ValueError(f"Selected {len(playing_xi)} players, but exactly 11 are required.")

    return playing_xi

# Now call the function with the dataframe containing predicted probabilities
selected_xi = select_playing_xi(new_squad_df)

# You can now display the selected playing XI
print("\nSelected Playing XI:")
display(selected_xi)


Selected Playing XI:


Unnamed: 0,Player,Matches,Batting_Average,Strike_Rate,Bowling_Average,Economy_Rate,Player_Role,in_playing_xi,Player_Role_Encoded,playing_xi_prob
5,Suryakumar Yadav,48,46.51,172.71,0.0,0.0,Batsman,1,1,0.773057
0,Virat Kohli,113,52.73,93.26,166.0,5.53,Batsman,1,1,0.745132
1,Rohit Sharma,151,48.62,89.22,201.0,5.15,Batsman,1,1,0.724857
4,KL Rahul,72,47.41,89.47,0.0,0.0,Batsman,1,1,0.593861
7,Rishabh Pant,66,36.56,106.67,0.0,0.0,Wicketkeeper,1,3,0.419921
6,Hardik Pandya,87,34.0,139.7,31.2,0.0,All-Rounder,0,0,0.662292
12,Axar Patel,39,20.12,110.2,34.4,4.5,All-Rounder,1,0,0.630219
2,Jasprit Bumrah,72,5.64,74.24,24.0,0.0,Bowler,1,2,0.492252
8,Mohammed Shami,94,36.0,102.0,0.0,0.0,Bowler,0,2,0.457295
13,Kuldeep Yadav,69,18.5,85.0,0.0,0.0,Bowler,0,2,0.437626


In [30]:
try:
    predicted_playing_xi = select_playing_xi(new_squad_df)

    # Display the predicted playing XI
    print("\nPredicted Playing XI (4 Batsmen, 1 Wicketkeeper, 2 All-Rounders, 4 Bowlers):")
    predicted_playing_xi = predicted_playing_xi[['Player', 'Player_Role', 'playing_xi_prob',
                                                 'Batting_Average', 'Bowling_Average']]
    print(predicted_playing_xi.sort_values(by='playing_xi_prob', ascending=False))

    # Evaluate team strength
    print("\nTeam Strength Metrics:")
    batting_avg = predicted_playing_xi['Batting_Average'].mean()
    bowling_avg = predicted_playing_xi[predicted_playing_xi['Bowling_Average'] > 0]['Bowling_Average'].mean()
    print(f"Average Batting Average: {batting_avg:.2f}")
    print(f"Average Bowling Average: {bowling_avg:.2f}")

    # Verify role counts
    role_counts = predicted_playing_xi['Player_Role'].value_counts()
    print("\nRole Distribution in Predicted Playing XI:")
    print(role_counts)
except ValueError as e:
    print(f"Error: {e}")


Predicted Playing XI (4 Batsmen, 1 Wicketkeeper, 2 All-Rounders, 4 Bowlers):
              Player   Player_Role  playing_xi_prob  Batting_Average  \
5   Suryakumar Yadav       Batsman         0.773057            46.51   
0        Virat Kohli       Batsman         0.745132            52.73   
1       Rohit Sharma       Batsman         0.724857            48.62   
6      Hardik Pandya   All-Rounder         0.662292            34.00   
12        Axar Patel   All-Rounder         0.630219            20.12   
4           KL Rahul       Batsman         0.593861            47.41   
2     Jasprit Bumrah        Bowler         0.492252             5.64   
8     Mohammed Shami        Bowler         0.457295            36.00   
13     Kuldeep Yadav        Bowler         0.437626            18.50   
9   Yuzvendra Chahal        Bowler         0.436144            25.00   
7       Rishabh Pant  Wicketkeeper         0.419921            36.56   

    Bowling_Average  
5               0.0  
0            