In [3]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('7.csv')

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data["species"])
X = data.drop("species", axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7],
    'n_estimators': [100, 150],
    'subsample': [0.8, 1],
    'colsample_bytree': [0.8, 1]
}

class PatchedXGBClassifier(xgb.XGBClassifier):
    def __init__(self, **kwargs):
        kwargs.pop("use_label_encoder", None)
        super().__init__(**kwargs)

xgb_model = PatchedXGBClassifier(random_state=42, eval_metric='mlogloss')

grid_search = GridSearchCV(
    estimator=xgb_model,
    param_grid=param_grid,
    scoring='f1_weighted',
    cv=3,
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
xgb_preds = best_model.predict(X_test)

# Evaluation
print("\nBest Hyperparameters:", grid_search.best_params_)
print("\nXGBoost Model Performance (Tuned):")
print(f"Accuracy: {accuracy_score(y_test, xgb_preds):.4f}")
print(f"Precision: {precision_score(y_test, xgb_preds, average='weighted'):.4f}")
print(f"Recall: {recall_score(y_test, xgb_preds, average='weighted'):.4f}")
print(f"F1 Score: {f1_score(y_test, xgb_preds, average='weighted'):.4f}")
print("\nClassification Report:\n", classification_report(y_test, xgb_preds, target_names=label_encoder.classes_))

Fitting 3 folds for each of 72 candidates, totalling 216 fits

Best Hyperparameters: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 150, 'subsample': 0.8}

XGBoost Model Performance (Tuned):
Accuracy: 0.9301
Precision: 0.9318
Recall: 0.9301
F1 Score: 0.9300

Classification Report:
                         precision    recall  f1-score   support

    Anabas testudineus       1.00      1.00      1.00       120
     Coilia dussumieri       1.00      1.00      1.00        77
Otolithoides biauritus       0.66      0.77      0.71        91
     Otolithoides pama       1.00      1.00      1.00        86
     Pethia conchonius       1.00      1.00      1.00        89
  Polynemus paradiseus       1.00      1.00      1.00       102
  Puntius lateristriga       1.00      1.00      1.00        65
        Setipinna taty       0.75      0.64      0.69       100
 Sillaginopsis panijus       1.00      1.00      1.00        86

              accuracy                   