# Hyperparameter Optimization

## Data Load

In [6]:
import pandas as pd
import numpy as np

In [8]:
# random oversampled data
data_df = pd.read_csv('data_random_oversampled.csv').drop(['Unnamed: 0'], axis = 1)

# train-test_split
from sklearn.model_selection import train_test_split

X = data_df.iloc[:, :-1] # features
y = data_df.iloc[:, -1] # label

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# cat boost classifier 이용
from catboost import CatBoostClassifier
from sklearn.model_selection import GridSearchCV

# CatBoost 모델 정의
catboost_model = CatBoostClassifier()

# 그리드 서치를 위한 하이퍼파라미터 그리드 생성
param_grid = {
    'learning_rate': [0.01, 0.05, 0.1],
    'depth': [4, 6, 8, 10],
    'l2_leaf_reg': [1, 3, 5, 7, 9]
}

# GridSearchCV
grid_search = GridSearchCV(catboost_model, param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best Cross-validation Accuracy:", grid_search.best_score_)

test_accuracy = grid_search.best_estimator_.score(X_test, y_test)
print("Test Accuracy:", test_accuracy)


0:	learn: 0.6788143	total: 1.96ms	remaining: 1.96s
1:	learn: 0.6641335	total: 4.19ms	remaining: 2.09s
2:	learn: 0.6505489	total: 6ms	remaining: 1.99s
3:	learn: 0.6402779	total: 7.58ms	remaining: 1.89s
4:	learn: 0.6281162	total: 9.31ms	remaining: 1.85s
5:	learn: 0.6159478	total: 11.4ms	remaining: 1.89s
6:	learn: 0.6043694	total: 14.1ms	remaining: 2s
7:	learn: 0.5941010	total: 16.6ms	remaining: 2.05s
8:	learn: 0.5816904	total: 18.5ms	remaining: 2.04s
9:	learn: 0.5726174	total: 20.2ms	remaining: 2s
10:	learn: 0.5617038	total: 22.5ms	remaining: 2.02s
11:	learn: 0.5503396	total: 23.5ms	remaining: 1.94s
12:	learn: 0.5403648	total: 25.6ms	remaining: 1.95s
13:	learn: 0.5308045	total: 27.6ms	remaining: 1.94s
14:	learn: 0.5247273	total: 30ms	remaining: 1.97s
15:	learn: 0.5171003	total: 32.4ms	remaining: 2s
16:	learn: 0.5089259	total: 34ms	remaining: 1.97s
17:	learn: 0.5003781	total: 36.3ms	remaining: 1.98s
18:	learn: 0.4930330	total: 38.6ms	remaining: 1.99s
19:	learn: 0.4854754	total: 40.4ms	rem