In [1]:
import numpy as np
import optuna

from sklearn.model_selection import train_test_split
from ucimlrepo import fetch_ucirepo 
from xgboost import XGBClassifier

In [20]:
connectionist_bench_sonar_mines_vs_rocks = fetch_ucirepo(id=151) 
  
# data (as pandas dataframes) 
X = connectionist_bench_sonar_mines_vs_rocks.data.features.values
y = connectionist_bench_sonar_mines_vs_rocks.data.targets.values.ravel()
  
# metadata 
print(connectionist_bench_sonar_mines_vs_rocks.metadata) 
  
# variable information 
print(connectionist_bench_sonar_mines_vs_rocks.variables) 

{'uci_id': 151, 'name': 'Connectionist Bench (Sonar, Mines vs. Rocks)', 'repository_url': 'https://archive.ics.uci.edu/dataset/151/connectionist+bench+sonar+mines+vs+rocks', 'data_url': 'https://archive.ics.uci.edu/static/public/151/data.csv', 'abstract': 'The task is to train a network to discriminate between sonar signals bounced off a metal cylinder and those bounced off a roughly cylindrical rock.', 'area': 'Physics and Chemistry', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 208, 'num_features': 60, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1988, 'last_updated': None, 'dataset_doi': '10.24432/C5T01Q', 'creators': ['Terry Sejnowski', 'R. Gorman'], 'intro_paper': None, 'additional_info': {'summary': 'The file "sonar.mines" contains 111 patterns obtained by bouncing sonar signals off a metal cylinder at various a

In [12]:
X.shape

(208, 60)

In [21]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
y.shape

(208,)

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)
X_train.shape

(166, 60)

In [33]:
xgb = XGBClassifier()
xgb.fit(X_train, y_train)

In [34]:
xgb.score(X_train, y_train)

1.0

In [35]:
xgb.score(X_test, y_test)

0.8571428571428571

In [36]:
from sklearn.model_selection import StratifiedKFold, cross_val_score

def fn(trial):
    max_depth = trial.suggest_int("max_depth", low=4, high=128)
    min_child_weight = trial.suggest_float("min_child_weight", low=0, high=4)
    gamma = trial.suggest_float("gamma", low=0, high=4)
    xgb = XGBClassifier(
        max_depth=max_depth,
        min_child_weight=min_child_weight,
        gamma=gamma,
        random_state=19,
    )
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
    return cross_val_score(xgb, X_train, y_train, cv=cv).mean()

study = optuna.create_study(direction="maximize")
study.optimize(fn, n_trials=100)


[I 2025-07-08 17:57:28,041] A new study created in memory with name: no-name-5dc84b09-d0f4-499b-b1b9-7be069efdd34
[I 2025-07-08 17:57:28,240] Trial 0 finished with value: 0.8130124777183602 and parameters: {'max_depth': 25, 'min_child_weight': 2.04843129832424, 'gamma': 1.3770767409037248}. Best is trial 0 with value: 0.8130124777183602.
[I 2025-07-08 17:57:28,395] Trial 1 finished with value: 0.7650623885918003 and parameters: {'max_depth': 121, 'min_child_weight': 0.6927031336488971, 'gamma': 2.149615307898255}. Best is trial 0 with value: 0.8130124777183602.
[I 2025-07-08 17:57:28,550] Trial 2 finished with value: 0.7531194295900179 and parameters: {'max_depth': 35, 'min_child_weight': 0.8457781759782868, 'gamma': 2.989798097602455}. Best is trial 0 with value: 0.8130124777183602.
[I 2025-07-08 17:57:28,698] Trial 3 finished with value: 0.8190730837789661 and parameters: {'max_depth': 42, 'min_child_weight': 3.439323256461306, 'gamma': 0.23080045920319936}. Best is trial 3 with valu

In [37]:
xgb = XGBClassifier(**study.best_params)
xgb.fit(X_train, y_train)

In [38]:
xgb.score(X_train, y_train)

1.0

In [39]:
xgb.score(X_test, y_test)

0.9047619047619048

In [44]:
X_subtrain, X_valid, y_subtrain, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=3)

In [45]:
xgb2 = XGBClassifier(**study.best_params)
xgb2.fit(X_subtrain, y_subtrain)

In [46]:
xgb2.score(X_subtrain, y_subtrain)

1.0

In [47]:
xgb2.score(X_valid, y_valid)

0.9411764705882353