In [5]:
import numpy as np
import pandas as pd

np.random.seed(1234)

n_samples = 300
n_features = 10
n_classes = 6

X = pd.DataFrame(np.random.randn(n_samples, n_features), columns=[f'feat{i}' for i in range(n_features)])
y = np.random.choice(range(n_classes), size=n_samples)

X

Unnamed: 0,feat0,feat1,feat2,feat3,feat4,feat5,feat6,feat7,feat8,feat9
0,0.471435,-1.190976,1.432707,-0.312652,-0.720589,0.887163,0.859588,-0.636524,0.015696,-2.242685
1,1.150036,0.991946,0.953324,-2.021255,-0.334077,0.002118,0.405453,0.289092,1.321158,-1.546906
2,-0.202646,-0.655969,0.193421,0.553439,1.318152,-0.469305,0.675554,-1.817027,-0.183109,1.058969
3,-0.397840,0.337438,1.047579,1.045938,0.863717,-0.122092,0.124713,-0.322795,0.841675,2.390961
4,0.076200,-0.566446,0.036142,-2.074978,0.247792,-0.897157,-0.136795,0.018289,0.755414,0.215269
...,...,...,...,...,...,...,...,...,...,...
295,0.630583,2.024915,1.573076,2.114489,0.620038,0.533759,1.723189,-1.390599,0.613720,0.746700
296,-1.504179,-1.687467,1.206978,-0.840077,2.232583,-1.758686,1.168785,-1.089553,1.977852,0.330123
297,1.481404,-0.401636,1.634118,-1.764545,2.134186,-0.521587,-0.748533,0.105655,-0.701489,0.608079
298,-0.486693,0.337699,0.949078,-0.491731,-0.551622,0.046200,0.004159,-0.858702,0.744887,-0.392201


In [6]:
y

array([4, 4, 1, 3, 5, 0, 4, 0, 0, 0, 0, 3, 4, 4, 0, 2, 5, 5, 5, 0, 0, 1,
       2, 4, 4, 0, 0, 3, 3, 4, 1, 3, 1, 1, 0, 1, 4, 1, 5, 0, 1, 4, 0, 3,
       5, 4, 4, 4, 4, 3, 5, 2, 5, 2, 1, 1, 2, 5, 0, 2, 2, 2, 3, 3, 2, 1,
       0, 3, 1, 2, 2, 3, 4, 2, 4, 0, 4, 5, 2, 1, 3, 0, 1, 2, 1, 3, 0, 5,
       2, 3, 3, 3, 0, 1, 4, 1, 5, 3, 3, 3, 3, 3, 1, 1, 4, 2, 5, 2, 3, 3,
       5, 4, 0, 5, 2, 4, 2, 3, 0, 2, 2, 4, 4, 4, 2, 3, 0, 1, 2, 3, 1, 4,
       5, 0, 5, 1, 3, 5, 2, 4, 2, 2, 0, 4, 2, 2, 5, 0, 3, 3, 2, 2, 2, 2,
       1, 5, 3, 5, 2, 0, 2, 1, 0, 4, 5, 2, 0, 4, 2, 4, 4, 1, 1, 1, 0, 3,
       3, 0, 3, 3, 2, 5, 0, 0, 1, 2, 0, 4, 4, 1, 2, 4, 5, 2, 0, 1, 2, 2,
       5, 2, 3, 3, 3, 3, 5, 4, 2, 5, 2, 3, 2, 1, 5, 3, 4, 5, 3, 2, 3, 5,
       2, 2, 1, 1, 5, 5, 5, 2, 4, 5, 1, 1, 1, 3, 2, 5, 3, 5, 3, 4, 0, 4,
       4, 3, 4, 0, 2, 1, 1, 2, 3, 2, 0, 2, 0, 3, 2, 1, 0, 5, 3, 4, 0, 4,
       2, 3, 4, 2, 3, 4, 2, 4, 2, 4, 0, 0, 4, 3, 3, 4, 2, 0, 4, 3, 2, 2,
       4, 3, 5, 1, 1, 3, 3, 5, 4, 0, 4, 5, 3, 5])

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=1234)

In [8]:
from xgboost import XGBClassifier

xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)

In [12]:
from sklearn.metrics import make_scorer, f1_score

macro_f1_scorer = make_scorer(f1_score, average='macro')

In [11]:
from sklearn.model_selection import GridSearchCV

# 5) 하이퍼파라미터 후보
param_grid = {
    'max_depth': [3, 5],
    'n_estimators': [50, 100],
    'learning_rate': [0.1, 0.01]
}

# 6) Grid Search 설정
grid_search = GridSearchCV(estimator=xgb,
                           param_grid=param_grid,
                           scoring=macro_f1_scorer,
                           cv=3, # 데이터를 3등분하여 3번 평가(각각 한 조각을 테스트 세트로 사용)
                           verbose=1)

# 7) Grid Search 실행
grid_search.fit(X_train, y_train)

# 8) 결과 출력
print(f"Best Macro F1 Score: {grid_search.best_score_:.4f}")
print("Best parameters:", grid_search.best_params_)

# 9) 최적 모델로 테스트 데이터 평가
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
test_macro_f1 = f1_score(y_test, y_pred, average='macro')
print(f"Test Macro F1 Score: {test_macro_f1:.4f}")

Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Best Macro F1 Score: 0.2077
Best parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}
Test Macro F1 Score: 0.1622


[Parallel(n_jobs=1)]: Done  24 out of  24 | elapsed:    1.6s finished
