In [2]:
import pandas as pd
import pickle
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

df = pd.read_csv("CricX_dataset_combined - Sheet1 (1).csv")


weather_map = {"Sunny": 0, "Cloudy": 1, "Rain": 2}
pitch_map = {"Balanced": 0, "Pace-friendly": 1, "Spin-friendly": 2, "Batting-friendly": 3}

df["weather_enc"] = df["weather"].map(weather_map)
df["pitch_enc"] = df["pitch_type"].map(pitch_map)


features = [
    "home_advantage",
    "h2h_t1_wins",
    "h2h_t2_wins",
    "team1_recent_win_pct",
    "team2_recent_win_pct",
    "team1_top5_bat_avg",
    "team2_top5_bat_avg",
    "team1_top5_bowl_avg",
    "team2_top5_bowl_avg",
    "weather_enc",
    "pitch_enc",
]

X = df[features]
y = df["Result"]


le = LabelEncoder()
y = le.fit_transform(y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = XGBClassifier(use_label_encoder=False, eval_metric="mlogloss")
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
print("Training Accuracy:", model.score(X_train, y_train))
print("Testing Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


with open("xgboost_model.pkl", "wb") as f:
    pickle.dump({"model": model, "label_encoder": le}, f)

print("\n✅ Model retrained and saved as xgboost_model.pkl")


Training Accuracy: 0.9615384615384616
Testing Accuracy: 0.4444444444444444

Classification Report:
               precision    recall  f1-score   support

           0       0.25      0.25      0.25         4
           1       0.57      0.47      0.52        17
           2       0.33      0.50      0.40         6

    accuracy                           0.44        27
   macro avg       0.38      0.41      0.39        27
weighted avg       0.47      0.44      0.45        27


Confusion Matrix:
 [[1 3 0]
 [3 8 6]
 [0 3 3]]

✅ Model retrained and saved as xgboost_model.pkl


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
