# 결측치 처리방식에 따라 달라지는 분류기의 성능 확인


## 1. 결측이 발생한 행, 열정보를 삭제하는 방법
> - dropna를 활용한 결측치가 존재하는 행 또는 열정보 제거
>   - df.dropna(axis=0) # axis=0 열, axis=1 행

## 2. 특정 값을 활용한 결측치 처리 방법
2-1. fillna(값)를 통한 특정 값으로 채우기
> - 0으로 채우기: df.fillna(0)
> - 평균 : df.fillna(df.mean())
> - 중앙값 : df.fillna(df.median())
> - 최빈값 : df.fillna(df.mode())
> - 이전행의 값으로 채우기: df.fillna(method='ffill')
> - 바로 다음행의 값으로 채우기: df.fillna(method='bfill')

2-2. 그룹연산(groupby)으로 분류된 각 그룹 단위의 평균값을 활용하여 채우기
> ```
> fill_mean_grp = lambda g: g.fillna(g.mean())
> df = df.groupby('그룹조건컬럼명').apply(fill_mean_grp)
> ```

2-3. 간단한 선형비례를 이용하여 대체하는 방법
> - df = df.interpolate(method='values')


## 3. 다른 알고리즘를 활용한 결측치 처리 방법
- KNN을 활용하여 유사한 패턴을 보이는 데이터의 값을 참고하여 대체 <br>
  이때 column 및 index값이 숫자로 변경됨
> ```
> imputer = KNNImputer(n_neighbors=3) # n_neighbors: 결측치 처리에 참고할 이웃값 수
> knn_data = pd.DataFrame(imputer.fit_transform(df))
> ```



# 1. 결측이 발생한 행 또는 열정보 제거: dropna(axis=축방향)
- axis=0 # 열 제거
- axis=1 # 행 제거


In [None]:
import pandas as pd
import numpy as np
import warnings

# 경고 메시지 출력 표기 생략
warnings.filterwarnings('ignore')

# 데이터 불러오기
# 파일 경로 = "https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv"
married_dataset = pd.read_csv("https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv")

#한번에 생략없이 출력하고 싶은 컬럼 수 설정
#pd.options.display.max_columns = 25

married_dataset

Unnamed: 0,gender,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,...,my_eval_sincere,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married
0,female,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,...,9.0,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,0
1,female,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,...,8.0,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,0
2,female,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,...,8.0,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,1
3,female,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,...,6.0,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,1
4,female,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,...,6.0,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,male,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.0,...,5.0,5.0,5.0,,,0.64,10.0,2.0,5.0,0
8358,male,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.0,...,6.0,8.0,4.0,4.0,,0.71,10.0,4.0,4.0,0
8359,male,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,,...,7.0,8.0,8.0,8.0,,-0.46,10.0,6.0,5.0,0
8360,male,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.0,...,6.0,5.0,4.0,,5.0,0.62,10.0,5.0,5.0,0


In [None]:
# 데이터셋 결측치 수 확인
married_dataset.isnull().sum()

gender                                0
age                                  72
age_partner                         102
importance_same_religion             62
pref_of_partner_attractive           83
pref_of_partner_sincere              83
pref_of_partner_intelligence         83
pref_of_partner_funny                91
pref_of_partner_ambitious            98
pref_of_partner_shared_interests    120
eval_by_partner_attractive          204
eval_by_partner_sincere             269
eval_by_partner_intelligence        291
eval_by_partner_funny               339
eval_by_partner_ambitous            681
eval_by_partner_shared_interests    989
my_pref_attractive                   62
my_pref_sincere                      62
my_pref_intellicence                 62
my_pref_funny                        72
my_pref_ambtition                    82
my_pref_shared_interests            104
my_eval_attractive                  198
my_eval_sincere                     271
my_eval_intelligence                286


In [None]:
# 항목별 결측치 비율 확인
married_dataset.isna().mean()

gender                              0.000000
age                                 0.008610
age_partner                         0.012198
importance_same_religion            0.007414
pref_of_partner_attractive          0.009926
pref_of_partner_sincere             0.009926
pref_of_partner_intelligence        0.009926
pref_of_partner_funny               0.010883
pref_of_partner_ambitious           0.011720
pref_of_partner_shared_interests    0.014351
eval_by_partner_attractive          0.024396
eval_by_partner_sincere             0.032169
eval_by_partner_intelligence        0.034800
eval_by_partner_funny               0.040541
eval_by_partner_ambitous            0.081440
eval_by_partner_shared_interests    0.118273
my_pref_attractive                  0.007414
my_pref_sincere                     0.007414
my_pref_intellicence                0.007414
my_pref_funny                       0.008610
my_pref_ambtition                   0.009806
my_pref_shared_interests            0.012437
my_eval_at

In [None]:
# 결측치를 제거하여 처리
married_dataset = married_dataset.dropna(axis=0)
married_dataset

Unnamed: 0,gender,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,...,my_eval_sincere,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married
0,female,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,...,9.0,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,0
1,female,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,...,8.0,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,0
2,female,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,...,8.0,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,1
3,female,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,...,6.0,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,1
4,female,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,...,6.0,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8351,male,25.0,28.0,1.0,25.0,15.0,25.0,15.0,10.0,10.0,...,7.0,6.0,6.0,6.0,7.0,0.37,10.0,5.0,5.0,0
8352,male,25.0,26.0,1.0,10.0,30.0,20.0,15.0,15.0,10.0,...,4.0,6.0,4.0,4.0,4.0,0.27,10.0,4.0,5.0,0
8353,male,25.0,22.0,1.0,10.0,20.0,15.0,20.0,15.0,20.0,...,3.0,9.0,6.0,9.0,6.0,0.45,10.0,6.0,3.0,0
8354,male,25.0,27.0,1.0,10.0,25.0,20.0,20.0,5.0,20.0,...,7.0,9.0,8.0,7.0,8.0,0.35,10.0,6.0,6.0,0


In [None]:
# object 타입의 문자열 변수를 숫자형으로 변환
married_dataset = pd.get_dummies(married_dataset, columns=['gender'], drop_first=True)

# 데이터셋, 독립변수와 종속변수 분리: 독립변수 -> x, 종속변수 -> y
x = married_dataset.drop(['married'], axis=1)
y = married_dataset['married']

x

Unnamed: 0,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,eval_by_partner_attractive,...,my_eval_sincere,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,gender_male
0,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,6.0,...,9.0,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,False
1,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,7.0,...,8.0,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,False
2,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,10.0,...,8.0,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,False
3,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,7.0,...,6.0,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,False
4,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,8.0,...,6.0,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8351,25.0,28.0,1.0,25.0,15.0,25.0,15.0,10.0,10.0,6.0,...,7.0,6.0,6.0,6.0,7.0,0.37,10.0,5.0,5.0,True
8352,25.0,26.0,1.0,10.0,30.0,20.0,15.0,15.0,10.0,9.0,...,4.0,6.0,4.0,4.0,4.0,0.27,10.0,4.0,5.0,True
8353,25.0,22.0,1.0,10.0,20.0,15.0,20.0,15.0,20.0,8.0,...,3.0,9.0,6.0,9.0,6.0,0.45,10.0,6.0,3.0,True
8354,25.0,27.0,1.0,10.0,25.0,20.0,20.0,5.0,20.0,7.0,...,7.0,9.0,8.0,7.0,8.0,0.35,10.0,6.0,6.0,True


In [None]:
from sklearn.model_selection import train_test_split

# train_test_split를 활용한 train set, test set 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=100)

In [None]:
# 예시로 xgboost 활용
import xgboost as xgb
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# model = xgb.XGBClassifier(n_estimators=500, max_depth=5, random_state=100)
model = xgb.XGBClassifier(random_state=100)
model.fit(X_train, y_train)

In [None]:
# 테스트 데이터로 예측
pred = model.predict(X_test)

accuracy_score(y_test, pred)  # accuracy

0.8376569037656904

In [None]:
print(confusion_matrix(y_test, pred))  # confusion matrix

[[904  64]
 [130  97]]


In [None]:
print(classification_report(y_test, pred))  # classification repor

              precision    recall  f1-score   support

           0       0.87      0.93      0.90       968
           1       0.60      0.43      0.50       227

    accuracy                           0.84      1195
   macro avg       0.74      0.68      0.70      1195
weighted avg       0.82      0.84      0.83      1195



# 2. 특정 값을 활용한 결측치 처리

## 2-1. fillna(값)를 통한 특정 값으로 채우기
> - 0으로 채우기: df.fillna(0)
> - 평균 : df.fillna(df.mean())
> - 중앙값 : df.fillna(df.median())
> - 최빈값 : df.fillna(df.mode())
> - 이전행의 값으로 채우기: df.fillna(method='ffill')
> - 바로 다음행의 값으로 채우기: df.fillna(method='bfill')

In [None]:
import pandas as pd
import numpy as np
import warnings

# 경고 메시지 출력 표기 생략
warnings.filterwarnings('ignore')

# 데이터 불러오기
# 파일 경로 = "https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv"
married_dataset = pd.read_csv("https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv")

#한번에 생략없이 출력하고 싶은 컬럼 수 설정
#pd.options.display.max_columns = 25

married_dataset

Unnamed: 0,gender,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,...,my_eval_sincere,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married
0,female,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,...,9.0,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,0
1,female,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,...,8.0,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,0
2,female,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,...,8.0,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,1
3,female,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,...,6.0,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,1
4,female,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,...,6.0,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,male,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.0,...,5.0,5.0,5.0,,,0.64,10.0,2.0,5.0,0
8358,male,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.0,...,6.0,8.0,4.0,4.0,,0.71,10.0,4.0,4.0,0
8359,male,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,,...,7.0,8.0,8.0,8.0,,-0.46,10.0,6.0,5.0,0
8360,male,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.0,...,6.0,5.0,4.0,,5.0,0.62,10.0,5.0,5.0,0


In [None]:
# object 타입의 문자열 변수를 숫자형으로 변환
married_dataset = pd.get_dummies(married_dataset, columns=['gender'], drop_first=True)
married_dataset

Unnamed: 0,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,eval_by_partner_attractive,...,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married,gender_male
0,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,6.0,...,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,0,False
1,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,7.0,...,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,0,False
2,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,10.0,...,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,1,False
3,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,7.0,...,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,1,False
4,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,8.0,...,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.0,10.0,...,5.0,5.0,,,0.64,10.0,2.0,5.0,0,True
8358,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.0,6.0,...,8.0,4.0,4.0,,0.71,10.0,4.0,4.0,0,True
8359,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,,2.0,...,8.0,8.0,8.0,,-0.46,10.0,6.0,5.0,0,True
8360,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.0,5.0,...,5.0,4.0,,5.0,0.62,10.0,5.0,5.0,0,True


In [None]:
'''
0으로 채우기: df.fillna(0)
평균 : df.fillna(df.mean())
중앙값 : df.fillna(df.median())
최빈값 : df.fillna(df.mode())
이전행의 값으로 채우기: df.fillna(method='ffill')
바로 다음행의 값으로 채우기: df.fillna(method='bfill')
'''

married_dataset = married_dataset.fillna(married_dataset.median()) #변경 가능

married_dataset

Unnamed: 0,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,eval_by_partner_attractive,...,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married,gender_male
0,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.00,6.0,...,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,0,False
1,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.00,7.0,...,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,0,False
2,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.00,10.0,...,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,1,False
3,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.00,7.0,...,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,1,False
4,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.00,8.0,...,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.00,10.0,...,5.0,5.0,7.0,6.0,0.64,10.0,2.0,5.0,0,True
8358,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.00,6.0,...,8.0,4.0,4.0,6.0,0.71,10.0,4.0,4.0,0,True
8359,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,10.64,2.0,...,8.0,8.0,8.0,6.0,-0.46,10.0,6.0,5.0,0,True
8360,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.00,5.0,...,5.0,4.0,7.0,5.0,0.62,10.0,5.0,5.0,0,True


In [None]:
from sklearn.model_selection import train_test_split

# 데이터셋, 독립변수와 종속변수 분리: 독립변수 -> x, 종속변수 -> y
x = married_dataset.drop(['married'], axis=1)
y = married_dataset['married']

# train_test_split를 활용한 train set, test set 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=100)

In [None]:
# 예시로 xgboost 활용
import xgboost as xgb
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# model = xgb.XGBClassifier(n_estimators=500, max_depth=5, random_state=100)
model = xgb.XGBClassifier(random_state=100)
model.fit(X_train, y_train)

# 테스트 데이터로 예측
pred = model.predict(X_test)

accuracy_score(y_test, pred)  # accuracy

0.8517632994620442

## 2-2. 그룹연산(groupby)으로 분류된 각 그룹 단위의 평균값을 활용한 결측치 처리
> ```
> fill_mean_grp = lambda g: g.fillna(g.mean())
> df = df.groupby('그룹조건컬럼명').apply(fill_mean_grp)
> ```

In [None]:
import pandas as pd
import numpy as np
import warnings

# 경고 메시지 출력 표기 생략
warnings.filterwarnings('ignore')

# 데이터 불러오기
# 파일 경로 = "https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv"
married_dataset = pd.read_csv("https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv")

#한번에 생략없이 출력하고 싶은 컬럼 수 설정
#pd.options.display.max_columns = 25

married_dataset

Unnamed: 0,gender,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,...,my_eval_sincere,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married
0,female,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,...,9.0,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,0
1,female,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,...,8.0,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,0
2,female,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,...,8.0,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,1
3,female,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,...,6.0,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,1
4,female,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,...,6.0,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,male,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.0,...,5.0,5.0,5.0,,,0.64,10.0,2.0,5.0,0
8358,male,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.0,...,6.0,8.0,4.0,4.0,,0.71,10.0,4.0,4.0,0
8359,male,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,,...,7.0,8.0,8.0,8.0,,-0.46,10.0,6.0,5.0,0
8360,male,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.0,...,6.0,5.0,4.0,,5.0,0.62,10.0,5.0,5.0,0


In [None]:
# object 타입의 문자열 변수를 숫자형으로 변환
married_dataset = pd.get_dummies(married_dataset, columns=['gender'], drop_first=True)

In [None]:
# 결혼 성공 여부에 따라 그룹화 하여 해당 그룹내 각 컬럼별 평균값으로 결측치 처리
fill_mean_grp = lambda g: g.fillna(g.mean())
married_grp_mean = married_dataset.groupby('married').apply(fill_mean_grp)

married_grp_mean

Unnamed: 0_level_0,Unnamed: 1_level_0,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,eval_by_partner_attractive,...,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married,gender_male
married,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,0,21.0,27.0,4.0,35.00,20.00,20.00,20.00,0.00,5.00,6.0,...,7.0,7.0,6.000000,5.000000,0.14,3.0,7.0,6.0,0,False
0,1,21.0,22.0,4.0,60.00,0.00,0.00,40.00,0.00,0.00,7.0,...,7.0,8.0,5.000000,6.000000,0.54,3.0,7.0,5.0,0,False
0,5,21.0,25.0,4.0,50.00,0.00,30.00,10.00,0.00,10.00,7.0,...,7.0,4.0,6.000000,4.000000,0.25,3.0,6.0,5.0,0,False
0,6,21.0,30.0,4.0,35.00,15.00,25.00,10.00,5.00,10.00,3.0,...,7.0,4.0,6.000000,7.000000,0.34,3.0,6.0,5.0,0,False
0,7,21.0,27.0,4.0,33.33,11.11,11.11,11.11,11.11,22.22,6.0,...,7.0,6.0,5.000000,6.000000,0.50,3.0,6.0,7.0,0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,8341,25.0,26.0,1.0,25.00,25.00,25.00,12.00,3.00,10.00,10.0,...,9.0,9.0,7.397981,7.000000,0.63,10.0,9.0,8.0,1,True
1,8344,25.0,23.0,1.0,20.00,20.00,20.00,20.00,5.00,15.00,7.0,...,9.0,9.0,6.000000,8.000000,0.55,10.0,9.0,6.0,1,True
1,8346,25.0,23.0,1.0,20.00,20.00,20.00,15.00,10.00,15.00,7.0,...,5.0,5.0,5.000000,4.000000,0.39,10.0,8.0,7.0,1,True
1,8347,25.0,27.0,1.0,58.00,5.00,8.00,10.00,7.00,12.00,7.0,...,8.0,9.0,8.000000,8.000000,0.59,10.0,8.0,6.0,1,True


In [None]:
from sklearn.model_selection import train_test_split

# 데이터셋, 독립변수와 종속변수 분리: 독립변수 -> x, 종속변수 -> y
x = married_grp_mean.drop(['married'], axis=1)
y = married_grp_mean['married']

# train_test_split를 활용한 train set, test set 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=100)

In [None]:
# 예시로 xgboost 활용
import xgboost as xgb
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# model = xgb.XGBClassifier(n_estimators=500, max_depth=5, random_state=100)
model = xgb.XGBClassifier(random_state=100)
model.fit(X_train, y_train)

# 테스트 데이터로 예측
pred = model.predict(X_test)

accuracy_score(y_test, pred)  # accuracy

0.8631201434548715

## 2-3. 간단한 선형비례를 이용하여 대체하는 방법

df = df.interpolate(method='values')

In [None]:
import pandas as pd
import numpy as np
import warnings

# 경고 메시지 출력 표기 생략
warnings.filterwarnings('ignore')

# 데이터 불러오기
# 파일 경로 = "https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv"
married_dataset = pd.read_csv("https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv")

#한번에 생략없이 출력하고 싶은 컬럼 수 설정
#pd.options.display.max_columns = 25

married_dataset

Unnamed: 0,gender,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,...,my_eval_sincere,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married
0,female,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,...,9.0,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,0
1,female,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,...,8.0,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,0
2,female,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,...,8.0,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,1
3,female,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,...,6.0,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,1
4,female,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,...,6.0,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,male,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.0,...,5.0,5.0,5.0,,,0.64,10.0,2.0,5.0,0
8358,male,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.0,...,6.0,8.0,4.0,4.0,,0.71,10.0,4.0,4.0,0
8359,male,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,,...,7.0,8.0,8.0,8.0,,-0.46,10.0,6.0,5.0,0
8360,male,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.0,...,6.0,5.0,4.0,,5.0,0.62,10.0,5.0,5.0,0


In [None]:
# object 타입의 문자열 변수를 숫자형으로 변환
married_dataset = pd.get_dummies(married_dataset, columns=['gender'], drop_first=True)

In [None]:
married_dataset = married_dataset.interpolate(method='values')

married_dataset

Unnamed: 0,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,eval_by_partner_attractive,...,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married,gender_male
0,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,6.0,...,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,0,False
1,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,7.0,...,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,0,False
2,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,10.0,...,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,1,False
3,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,7.0,...,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,1,False
4,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,8.0,...,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.0,10.0,...,5.0,5.0,5.0,6.5,0.64,10.0,2.0,5.0,0,True
8358,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.0,6.0,...,8.0,4.0,4.0,6.0,0.71,10.0,4.0,4.0,0,True
8359,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,12.5,2.0,...,8.0,8.0,8.0,5.5,-0.46,10.0,6.0,5.0,0,True
8360,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.0,5.0,...,5.0,4.0,8.0,5.0,0.62,10.0,5.0,5.0,0,True


In [None]:
from sklearn.model_selection import train_test_split

# 데이터셋, 독립변수와 종속변수 분리: 독립변수 -> x, 종속변수 -> y
x = married_dataset.drop(['married'], axis=1)
y = married_dataset['married']

# train_test_split를 활용한 train set, test set 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=100)

In [None]:
# 예시로 xgboost 활용
import xgboost as xgb
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# model = xgb.XGBClassifier(n_estimators=500, max_depth=5, random_state=100)
model = xgb.XGBClassifier(random_state=100)
model.fit(X_train, y_train)

# 테스트 데이터로 예측
pred = model.predict(X_test)

accuracy_score(y_test, pred)  # accuracy

0.8529587567244471

# 3. 다른 알고리즘를 활용한 결측치 처리 방법: KNN


In [None]:
import pandas as pd
import numpy as np
import warnings

# 경고 메시지 출력 표기 생략
warnings.filterwarnings('ignore')

# 데이터 불러오기
# 파일 경로 = "https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv"
married_dataset = pd.read_csv("https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv")

#한번에 생략없이 출력하고 싶은 컬럼 수 설정
#pd.options.display.max_columns = 25

married_dataset

Unnamed: 0,gender,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,...,my_eval_sincere,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married
0,female,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,...,9.0,7.0,7.0,6.0,5.0,0.14,3.0,7.0,6.0,0
1,female,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,...,8.0,7.0,8.0,5.0,6.0,0.54,3.0,7.0,5.0,0
2,female,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,...,8.0,9.0,8.0,5.0,7.0,0.16,3.0,7.0,5.0,1
3,female,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,...,6.0,8.0,7.0,6.0,8.0,0.61,3.0,7.0,6.0,1
4,female,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,...,6.0,7.0,7.0,6.0,6.0,0.21,3.0,6.0,6.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,male,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.0,...,5.0,5.0,5.0,,,0.64,10.0,2.0,5.0,0
8358,male,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.0,...,6.0,8.0,4.0,4.0,,0.71,10.0,4.0,4.0,0
8359,male,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,,...,7.0,8.0,8.0,8.0,,-0.46,10.0,6.0,5.0,0
8360,male,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.0,...,6.0,5.0,4.0,,5.0,0.62,10.0,5.0,5.0,0


In [None]:
# object 타입의 문자열 변수를 숫자형으로 변환
married_dataset = pd.get_dummies(married_dataset, columns=['gender'], drop_first=True)

In [None]:
# 사이킷런 라이브러리의 KNNImputer 불러오기
from sklearn.impute import KNNImputer

# KNNImputer 객체 생성과 KNN알고리즘에서 중요한 n_neighbors 수(참고할 이웃값 수) 설정
imputer = KNNImputer(n_neighbors=3)

married_knn = pd.DataFrame(imputer.fit_transform(married_dataset))
married_knn

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,32
0,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,6.0,...,7.0,7.0,6.0,5.000000,0.14,3.0,7.0,6.0,0.0,0.0
1,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,7.0,...,7.0,8.0,5.0,6.000000,0.54,3.0,7.0,5.0,0.0,0.0
2,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,10.0,...,9.0,8.0,5.0,7.000000,0.16,3.0,7.0,5.0,1.0,0.0
3,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,7.0,...,8.0,7.0,6.0,8.000000,0.61,3.0,7.0,6.0,1.0,0.0
4,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,8.0,...,7.0,7.0,6.0,6.000000,0.21,3.0,6.0,6.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.0,10.0,...,5.0,5.0,7.0,7.333333,0.64,10.0,2.0,5.0,0.0,1.0
8358,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.0,6.0,...,8.0,4.0,4.0,7.000000,0.71,10.0,4.0,4.0,0.0,1.0
8359,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,5.0,2.0,...,8.0,8.0,8.0,6.333333,-0.46,10.0,6.0,5.0,0.0,1.0
8360,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.0,5.0,...,5.0,4.0,6.0,5.000000,0.62,10.0,5.0,5.0,0.0,1.0


In [None]:
# 컬럼명이 숫자로 변환되었으므로 다시 원래의 이름으로 변환
married_knn.columns = married_dataset.columns

married_knn

Unnamed: 0,age,age_partner,importance_same_religion,pref_of_partner_attractive,pref_of_partner_sincere,pref_of_partner_intelligence,pref_of_partner_funny,pref_of_partner_ambitious,pref_of_partner_shared_interests,eval_by_partner_attractive,...,my_eval_intelligence,my_eval_funny,my_eval_ambition,my_eval_shared_interests,interests_correlate,expected_happy_with_couple_match,how_much_i_liked,guess_prob_liked,married,gender_male
0,21.0,27.0,4.0,35.0,20.0,20.0,20.0,0.0,5.0,6.0,...,7.0,7.0,6.0,5.000000,0.14,3.0,7.0,6.0,0.0,0.0
1,21.0,22.0,4.0,60.0,0.0,0.0,40.0,0.0,0.0,7.0,...,7.0,8.0,5.0,6.000000,0.54,3.0,7.0,5.0,0.0,0.0
2,21.0,22.0,4.0,19.0,18.0,19.0,18.0,14.0,12.0,10.0,...,9.0,8.0,5.0,7.000000,0.16,3.0,7.0,5.0,1.0,0.0
3,21.0,23.0,4.0,30.0,5.0,15.0,40.0,5.0,5.0,7.0,...,8.0,7.0,6.0,8.000000,0.61,3.0,7.0,6.0,1.0,0.0
4,21.0,24.0,4.0,30.0,10.0,20.0,10.0,10.0,20.0,8.0,...,7.0,7.0,6.0,6.000000,0.21,3.0,6.0,6.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8357,25.0,26.0,1.0,10.0,10.0,30.0,20.0,10.0,15.0,10.0,...,5.0,5.0,7.0,7.333333,0.64,10.0,2.0,5.0,0.0,1.0
8358,25.0,24.0,1.0,50.0,20.0,10.0,5.0,10.0,5.0,6.0,...,8.0,4.0,4.0,7.000000,0.71,10.0,4.0,4.0,0.0,1.0
8359,25.0,29.0,1.0,40.0,10.0,30.0,10.0,10.0,5.0,2.0,...,8.0,8.0,8.0,6.333333,-0.46,10.0,6.0,5.0,0.0,1.0
8360,25.0,22.0,1.0,10.0,25.0,25.0,10.0,10.0,20.0,5.0,...,5.0,4.0,6.0,5.000000,0.62,10.0,5.0,5.0,0.0,1.0


In [None]:
from sklearn.model_selection import train_test_split

# 데이터셋, 독립변수와 종속변수 분리: 독립변수 -> x, 종속변수 -> y
x = married_knn.drop(['married'], axis=1)
y = married_knn['married']

# train set 과 test set 으로 데이터를 나누기 위해 train_test_split 활용
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=100)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
import xgboost as xgb

# model = xgb.XGBClassifier(n_estimators=500, max_depth=5, random_state=100)
model = xgb.XGBClassifier(random_state=100)
model.fit(X_train, y_train)

pred = model.predict(X_test)

print('accuracy:',accuracy_score(y_test, pred))
print('f1-score:',f1_score(y_test,pred))

accuracy: 0.8559473998804543
f1-score: 0.5170340681362726


In [None]:
print(confusion_matrix(y_test, pred))  # confusion matrix 출력

[[1303   68]
 [ 173  129]]


In [None]:
print(classification_report(y_test, pred))  # classification report 출력

              precision    recall  f1-score   support

         0.0       0.88      0.95      0.92      1371
         1.0       0.65      0.43      0.52       302

    accuracy                           0.86      1673
   macro avg       0.77      0.69      0.72      1673
weighted avg       0.84      0.86      0.84      1673



# 하이퍼파라미터 튜닝

- optuna


In [None]:
import pandas as pd
import numpy as np
import warnings

# 경고 메시지 출력 표기 생략
warnings.filterwarnings('ignore')

# 데이터 불러오기
# 파일 경로 = "https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv"
married_dataset = pd.read_csv("https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv")

# object 타입의 문자열 변수를 숫자형으로 변환
married_dataset = pd.get_dummies(married_dataset, columns=['gender'], drop_first=True)

# 데이터셋, 독립변수와 종속변수 분리: 독립변수 -> x, 종속변수 -> y
x = married_dataset.drop(['married'], axis=1)
y = married_dataset['married']

# train set 과 test set 으로 데이터를 나누기 위해 train_test_split 함수를 불러옴
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=100)

In [None]:
import xgboost as xgb

model = xgb.XGBClassifier(random_state=100)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

pred = model.predict(X_test)

print('accuracy:',accuracy_score(y_test, pred))
print('f1-score:',f1_score(y_test,pred))

accuracy: 0.8493723849372385
f1-score: 0.48148148148148157


### XGBoost Classifier에 있는 Hyperparameter
* XGBoost API 설명자료: https://xgboost.readthedocs.io/en/stable/python/python_api.html#module-xgboost.sklearn  

* 코드에서 활용한 하이퍼 파라미터
> * objective: 모델의 학습 과제 및 학습목표(일반적으로 classifier의 경우 이진 혹은 다중 분류로 기본값이 설정되어 있음)
> * num_leaves: tree의 최대 leaf 수
> * learning_rate: 학습률
> * n_estimators: 학습에 사용할 트리 수(xgb학습과정에서 부스팅 라운드 수)
> * max_depth: 트리의 최대 깊이
> * ramdom_state: 결과 재현을 위한 시드값


In [None]:
#현재 코랩 런타임에 Optuna 라이브러리 설치
!pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


In [None]:
import optuna

def objective(trial):
    global X_train, X_test, y_train, y_test
    xbg_trainset = xgb.DMatrix(X_train, label=y_train)
    xgb_testset = xgb.DMatrix(X_test, label=y_test)

    # 최적화할 하이퍼 파라미터 지정 및 찾아볼 값 범위 설정
    param = {
        "objective": "binary:logistic",
        'num_leaves': trial.suggest_int('num_leaves', 2, 1024, step=1, log=True),
        'learning_rate': trial.suggest_loguniform("learning_rate", 1e-8, 0.3),
        'n_estimators':  trial.suggest_int('n_estimators',100,3000 ),
        'max_depth': trial.suggest_int("max_depth", 3, 21, step=2),
        'random_state': 100,
    }

    model_train = xgb.train(param, xbg_trainset)
    preds = model_train.predict(xgb_testset)
    pred_labels = np.rint(preds)

    accuracy = accuracy_score(y_test, pred_labels)
    return accuracy

# optuna에서의 최적화 할 study 생성과 최적화 방향(지표의 값을 최대화 할 것인지)
study = optuna.create_study(direction="maximize")

# trial 횟수 설정 및 최적화 시작
study.optimize(objective, n_trials=500, timeout=600)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

[I 2024-04-29 01:15:33,914] A new study created in memory with name: no-name-ef987e7a-fd53-4e28-ab8f-a149ef2583df
[I 2024-04-29 01:15:34,109] Trial 0 finished with value: 0.8194859533771668 and parameters: {'num_leaves': 22, 'learning_rate': 0.0007320218986546918, 'n_estimators': 697, 'max_depth': 9}. Best is trial 0 with value: 0.8194859533771668.
[I 2024-04-29 01:15:34,252] Trial 1 finished with value: 0.8194859533771668 and parameters: {'num_leaves': 489, 'learning_rate': 0.0017802132915706685, 'n_estimators': 1302, 'max_depth': 7}. Best is trial 0 with value: 0.8194859533771668.
[I 2024-04-29 01:15:34,401] Trial 2 finished with value: 0.8194859533771668 and parameters: {'num_leaves': 91, 'learning_rate': 5.865982137220365e-05, 'n_estimators': 2388, 'max_depth': 7}. Best is trial 0 with value: 0.8194859533771668.
[I 2024-04-29 01:15:35,056] Trial 3 finished with value: 0.8194859533771668 and parameters: {'num_leaves': 10, 'learning_rate': 0.00012514214904140204, 'n_estimators': 2101

Number of finished trials:  500
Best trial:
  Value: 0.8511655708308428
  Params: 
    num_leaves: 72
    learning_rate: 0.2950715705379814
    n_estimators: 1222
    max_depth: 3


In [None]:
# best trial에 대한 성능과 hyperparameter 정보 출력
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
#plot_optimization_histor: trial 진행과정 히스토리
optuna.visualization.plot_optimization_history(study)

In [None]:
#plot_parallel_coordinate: 하이퍼파라미터의 조합과 점수에 대한 시각화
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
#최적화 과정에서 계산된 하이퍼 파라미터 별 성능에 영향을 미친 중요도 시각화
optuna.visualization.plot_param_importances(study)

# [추가실습자료] LightGBM 알고리즘 활용하기 + optuna

In [None]:
import pandas as pd
import numpy as np
import warnings

# 경고 메시지 출력 표기 생략
warnings.filterwarnings('ignore')

# 파일 경로 = "https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv"
married_dataset = pd.read_csv("https://raw.githubusercontent.com/agtechresearch/LectureAlgorithm/main/csv/married_full.csv")

# 결측치 처리
married_dataset = married_dataset.dropna(axis=0)

# object 타입의 문자열 변수를 숫자형으로 변환
married_dataset = pd.get_dummies(married_dataset, columns=['gender'], drop_first=True)

# 데이터셋, 독립변수와 종속변수 분리: 독립변수 -> x, 종속변수 -> y
x = married_dataset.drop(['married'], axis=1)
y = married_dataset['married']

# train set 과 test set 으로 데이터를 나누기 위해 train_test_split 함수를 불러옴
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=100)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
import lightgbm as lgb

model = lgb.LGBMClassifier(random_state=100)
model.fit(X_train, y_train)

pred = model.predict(X_test)  # 테스트 데이터로 예측

print('accuracy:',accuracy_score(y_test, pred))
print('f1-score:',f1_score(y_test,pred))

[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006465 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510
accuracy: 0.8426778242677824
f1-score: 0.5026455026455027


In [None]:
print(confusion_matrix(y_test, pred))  # confusion matrix 출력

[[912  56]
 [132  95]]


In [None]:
print(classification_report(y_test, pred))  # classification report 출력

              precision    recall  f1-score   support

           0       0.87      0.94      0.91       968
           1       0.63      0.42      0.50       227

    accuracy                           0.84      1195
   macro avg       0.75      0.68      0.70      1195
weighted avg       0.83      0.84      0.83      1195



### LightGBM Classifier에 있는 Hyperparameter
- LightGBMClassifier : https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html#lightgbm.LGBMClassifier
- 코드에서 활용한 하이퍼파라미터
> * objective: 모델의 학습 과제 및 학습목표(일반적으로 classifier의 경우 이진 혹은 다중 분류로 기본값이 설정되어 있음)
> * num_leaves: tree의 최대 leaf 수
> * learning_rate: 학습률
> * n_estimators: 학습에 사용할 트리 수
> * max_depth: 트리의 최대 깊이
> * ramdom_state: 결과 재현을 위한 시드값

- 아래 문구가 학습시 나오더라도 오류는 아니니 무시할 것(더 분할할 수 없어서 나타나는 경고)

  [Warning] No further splits with positive gain, best gain: -inf

In [None]:
# Optuna
import optuna
def objective(trial):
    global X_train, X_test, y_train, y_test

    # 최적화 할 하이퍼파라미터 지정 및 값의 범위 설정
    param = {
        "objective": "binary",
        'num_leaves': trial.suggest_int('num_leaves', 2, 512, step=1, log=True),
        'learning_rate': trial.suggest_loguniform("learning_rate", 1e-8, 0.3),
        'n_estimators':  trial.suggest_int('n_estimators',100,1000 ),
        'max_depth': trial.suggest_int("max_depth", 3, 21, step=2),
        'random_state': 100,
    }

    model = lgb.LGBMRegressor(**param)
    lgb_model = model.fit(X_train, y_train, eval_set=[(X_test, y_test)])
    preds = lgb_model.predict(X_test)
    pred_labels = np.rint(preds)

    accuracy = accuracy_score(y_test, pred_labels)
    return accuracy


# optuna에서의 최적화 할 study 생성과 최적화 방향(지표의 값을 최대화 할 것인지)
study = optuna.create_study(direction="maximize")

# trial 횟수 설정 및 최적화 시작
study.optimize(objective, n_trials=100, timeout=600)

print("Number of finished trials: ", len(study.trials))

[I 2024-04-29 01:20:24,301] A new study created in memory with name: no-name-f92d2fce-005b-4cfe-8478-66014f93761e


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005130 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:27,176] Trial 0 finished with value: 0.8451882845188284 and parameters: {'num_leaves': 32, 'learning_rate': 0.18261771034348856, 'n_estimators': 620, 'max_depth': 15}. Best is trial 0 with value: 0.8451882845188284.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000867 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:27,650] Trial 1 finished with value: 0.8284518828451883 and parameters: {'num_leaves': 4, 'learning_rate': 0.004825737780929112, 'n_estimators': 282, 'max_depth': 7}. Best is trial 0 with value: 0.8451882845188284.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003397 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:28,571] Trial 2 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 11, 'learning_rate': 4.040039908299568e-05, 'n_estimators': 648, 'max_depth': 5}. Best is trial 0 with value: 0.8451882845188284.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000593 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:29,748] Trial 3 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 53, 'learning_rate': 2.8500378343290624e-07, 'n_estimators': 488, 'max_depth': 5}. Best is trial 0 with value: 0.8451882845188284.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000601 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:30,432] Trial 4 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 7, 'learning_rate': 2.9892419022670814e-05, 'n_estimators': 636, 'max_depth': 11}. Best is trial 0 with value: 0.8451882845188284.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000602 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:30,832] Trial 5 finished with value: 0.8376569037656904 and parameters: {'num_leaves': 18, 'learning_rate': 0.030447680604976293, 'n_estimators': 237, 'max_depth': 11}. Best is trial 0 with value: 0.8451882845188284.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001023 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:31,172] Trial 6 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 2, 'learning_rate': 2.3124876809482588e-07, 'n_estimators': 604, 'max_depth': 19}. Best is trial 0 with value: 0.8451882845188284.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000586 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:31,599] Trial 7 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 4, 'learning_rate': 2.8780453038460547e-07, 'n_estimators': 538, 'max_depth': 5}. Best is trial 0 with value: 0.8451882845188284.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000592 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:32,315] Trial 8 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 10, 'learning_rate': 1.764688065894181e-08, 'n_estimators': 561, 'max_depth': 21}. Best is trial 0 with value: 0.8451882845188284.
[I 2024-04-29 01:20:32,444] Trial 9 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 2, 'learning_rate': 2.566274571906165e-06, 'n_estimators': 149, 'max_depth': 19}. Best is trial 0 with value: 0.8451882845188284.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000658 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510
[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000602 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:

[I 2024-04-29 01:20:35,459] Trial 10 finished with value: 0.8518828451882845 and parameters: {'num_leaves': 327, 'learning_rate': 0.1626707252901077, 'n_estimators': 989, 'max_depth': 15}. Best is trial 10 with value: 0.8518828451882845.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000889 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:38,855] Trial 11 finished with value: 0.8510460251046025 and parameters: {'num_leaves': 348, 'learning_rate': 0.29863159317642063, 'n_estimators': 993, 'max_depth': 15}. Best is trial 10 with value: 0.8518828451882845.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000596 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:46,138] Trial 12 finished with value: 0.8451882845188284 and parameters: {'num_leaves': 456, 'learning_rate': 0.0017629068669563735, 'n_estimators': 988, 'max_depth': 15}. Best is trial 10 with value: 0.8518828451882845.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000577 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:49,146] Trial 13 finished with value: 0.8569037656903765 and parameters: {'num_leaves': 484, 'learning_rate': 0.2250869675851037, 'n_estimators': 999, 'max_depth': 15}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000971 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:54,490] Trial 14 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 151, 'learning_rate': 0.002166793952092876, 'n_estimators': 841, 'max_depth': 13}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000587 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:20:59,067] Trial 15 finished with value: 0.8468619246861925 and parameters: {'num_leaves': 142, 'learning_rate': 0.021168375148083226, 'n_estimators': 824, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000605 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:04,865] Trial 16 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 188, 'learning_rate': 0.0002716525997555591, 'n_estimators': 830, 'max_depth': 9}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000603 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:08,344] Trial 17 finished with value: 0.8435146443514644 and parameters: {'num_leaves': 92, 'learning_rate': 0.06558385819530421, 'n_estimators': 901, 'max_depth': 13}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000584 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:13,288] Trial 18 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 272, 'learning_rate': 0.00033877179810909593, 'n_estimators': 732, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000581 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:15,325] Trial 19 finished with value: 0.8451882845188284 and parameters: {'num_leaves': 72, 'learning_rate': 0.013675269898382917, 'n_estimators': 442, 'max_depth': 9}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000910 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:21,040] Trial 20 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 485, 'learning_rate': 0.00046361627841212037, 'n_estimators': 734, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000599 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:23,722] Trial 21 finished with value: 0.8468619246861925 and parameters: {'num_leaves': 325, 'learning_rate': 0.2513728857788203, 'n_estimators': 996, 'max_depth': 15}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000584 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:26,353] Trial 22 finished with value: 0.8543933054393306 and parameters: {'num_leaves': 244, 'learning_rate': 0.23321379730541172, 'n_estimators': 922, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000590 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:31,170] Trial 23 finished with value: 0.8510460251046025 and parameters: {'num_leaves': 206, 'learning_rate': 0.0967404888516377, 'n_estimators': 894, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000609 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:35,681] Trial 24 finished with value: 0.8518828451882845 and parameters: {'num_leaves': 115, 'learning_rate': 0.015407142479760397, 'n_estimators': 923, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000590 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:37,337] Trial 25 finished with value: 0.8435146443514644 and parameters: {'num_leaves': 38, 'learning_rate': 0.05633775017521141, 'n_estimators': 738, 'max_depth': 13}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000607 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:45,804] Trial 26 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 232, 'learning_rate': 0.005694169253952109, 'n_estimators': 947, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000583 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:50,875] Trial 27 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 438, 'learning_rate': 8.822969515137907e-06, 'n_estimators': 784, 'max_depth': 15}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000604 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:54,807] Trial 28 finished with value: 0.8460251046025105 and parameters: {'num_leaves': 78, 'learning_rate': 0.07672685456945033, 'n_estimators': 879, 'max_depth': 11}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000892 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:55,861] Trial 29 finished with value: 0.8535564853556485 and parameters: {'num_leaves': 26, 'learning_rate': 0.2527570372785421, 'n_estimators': 411, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000596 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:56,625] Trial 30 finished with value: 0.8376569037656904 and parameters: {'num_leaves': 25, 'learning_rate': 0.0069226790825649065, 'n_estimators': 369, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000588 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:57,829] Trial 31 finished with value: 0.8502092050209205 and parameters: {'num_leaves': 48, 'learning_rate': 0.23344121860795428, 'n_estimators': 420, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000599 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:21:58,436] Trial 32 finished with value: 0.8451882845188284 and parameters: {'num_leaves': 19, 'learning_rate': 0.10129401273960896, 'n_estimators': 362, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000612 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:00,430] Trial 33 finished with value: 0.8510460251046025 and parameters: {'num_leaves': 266, 'learning_rate': 0.03300221491045761, 'n_estimators': 303, 'max_depth': 13}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000612 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:02,311] Trial 34 finished with value: 0.8535564853556485 and parameters: {'num_leaves': 26, 'learning_rate': 0.2961490921343068, 'n_estimators': 953, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000842 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:07,921] Trial 35 finished with value: 0.8393305439330544 and parameters: {'num_leaves': 29, 'learning_rate': 0.0016778637658735986, 'n_estimators': 945, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000924 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:10,092] Trial 36 finished with value: 0.8451882845188284 and parameters: {'num_leaves': 14, 'learning_rate': 0.041382328970197974, 'n_estimators': 674, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000917 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:11,302] Trial 37 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 7, 'learning_rate': 0.011540880432483122, 'n_estimators': 492, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000977 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:12,434] Trial 38 finished with value: 0.8543933054393306 and parameters: {'num_leaves': 40, 'learning_rate': 0.12450372481085363, 'n_estimators': 226, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000927 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:13,252] Trial 39 finished with value: 0.8527196652719665 and parameters: {'num_leaves': 52, 'learning_rate': 0.100037723538027, 'n_estimators': 150, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000934 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:13,740] Trial 40 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 37, 'learning_rate': 0.00012515652827036375, 'n_estimators': 201, 'max_depth': 3}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000950 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:14,731] Trial 41 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 22, 'learning_rate': 0.28769314546349894, 'n_estimators': 274, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000929 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:15,200] Trial 42 finished with value: 0.8468619246861925 and parameters: {'num_leaves': 14, 'learning_rate': 0.037893861955966154, 'n_estimators': 218, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000932 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:16,063] Trial 43 finished with value: 0.8460251046025105 and parameters: {'num_leaves': 7, 'learning_rate': 0.16242166961653343, 'n_estimators': 333, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000991 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:17,132] Trial 44 finished with value: 0.8510460251046025 and parameters: {'num_leaves': 10, 'learning_rate': 0.13896327686886517, 'n_estimators': 528, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000970 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:17,721] Trial 45 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 34, 'learning_rate': 0.0029459058076546528, 'n_estimators': 117, 'max_depth': 15}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000987 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:18,851] Trial 46 finished with value: 0.8510460251046025 and parameters: {'num_leaves': 5, 'learning_rate': 0.02822844248790284, 'n_estimators': 601, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000959 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:20,983] Trial 47 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 3, 'learning_rate': 0.009993906119398108, 'n_estimators': 869, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000886 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:25,659] Trial 48 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 116, 'learning_rate': 1.341143569068437e-07, 'n_estimators': 952, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000597 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:26,273] Trial 49 finished with value: 0.8460251046025105 and parameters: {'num_leaves': 16, 'learning_rate': 0.06804331282165574, 'n_estimators': 406, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000586 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:27,111] Trial 50 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 59, 'learning_rate': 0.0009012751692523181, 'n_estimators': 258, 'max_depth': 15}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000589 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:27,588] Trial 51 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 44, 'learning_rate': 0.126217886812743, 'n_estimators': 163, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000613 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:28,318] Trial 52 finished with value: 0.8502092050209205 and parameters: {'num_leaves': 59, 'learning_rate': 0.13750869614687672, 'n_estimators': 214, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000602 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:28,596] Trial 53 finished with value: 0.8527196652719665 and parameters: {'num_leaves': 30, 'learning_rate': 0.29752724096194655, 'n_estimators': 101, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000600 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:29,028] Trial 54 finished with value: 0.8393305439330544 and parameters: {'num_leaves': 23, 'learning_rate': 0.02393713895697683, 'n_estimators': 185, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000584 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:30,102] Trial 55 finished with value: 0.8451882845188284 and parameters: {'num_leaves': 371, 'learning_rate': 0.057642408456240884, 'n_estimators': 133, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000615 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:33,345] Trial 56 finished with value: 0.8510460251046025 and parameters: {'num_leaves': 158, 'learning_rate': 0.14057328405959263, 'n_estimators': 805, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000929 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:37,683] Trial 57 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 98, 'learning_rate': 1.2585377959868422e-06, 'n_estimators': 971, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000589 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:40,009] Trial 58 finished with value: 0.8426778242677824 and parameters: {'num_leaves': 45, 'learning_rate': 0.019785390616285016, 'n_estimators': 917, 'max_depth': 15}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000576 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:40,508] Trial 59 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 12, 'learning_rate': 1.0514995718402547e-08, 'n_estimators': 312, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000610 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:41,416] Trial 60 finished with value: 0.8485355648535565 and parameters: {'num_leaves': 66, 'learning_rate': 0.08316828584755163, 'n_estimators': 247, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000575 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:41,743] Trial 61 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 29, 'learning_rate': 0.28331774054547987, 'n_estimators': 116, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000595 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:42,165] Trial 62 finished with value: 0.8410041841004184 and parameters: {'num_leaves': 39, 'learning_rate': 0.1873448584947009, 'n_estimators': 155, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.
[I 2024-04-29 01:22:42,395] Trial 63 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 19, 'learning_rate': 0.27325165038834115, 'n_estimators': 104, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000611 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510
[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000607 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:

[I 2024-04-29 01:22:42,841] Trial 64 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 26, 'learning_rate': 0.05530647493135871, 'n_estimators': 174, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000645 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:44,182] Trial 65 finished with value: 0.8485355648535565 and parameters: {'num_leaves': 32, 'learning_rate': 0.09613278632020711, 'n_estimators': 672, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000626 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:49,001] Trial 66 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 88, 'learning_rate': 1.9985328297273694e-05, 'n_estimators': 995, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:55,362] Trial 67 finished with value: 0.8426778242677824 and parameters: {'num_leaves': 499, 'learning_rate': 0.04474069430110404, 'n_estimators': 862, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000597 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:22:56,700] Trial 68 finished with value: 0.8418410041841005 and parameters: {'num_leaves': 54, 'learning_rate': 0.10160133028133947, 'n_estimators': 458, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000603 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:00,542] Trial 69 finished with value: 0.8468619246861925 and parameters: {'num_leaves': 296, 'learning_rate': 0.18754206214413766, 'n_estimators': 918, 'max_depth': 15}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000582 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:01,722] Trial 70 finished with value: 0.8510460251046025 and parameters: {'num_leaves': 19, 'learning_rate': 0.29680822708169546, 'n_estimators': 773, 'max_depth': 13}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000587 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:04,713] Trial 71 finished with value: 0.8518828451882845 and parameters: {'num_leaves': 190, 'learning_rate': 0.17586261985538898, 'n_estimators': 973, 'max_depth': 13}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000602 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:07,815] Trial 72 finished with value: 0.8468619246861925 and parameters: {'num_leaves': 413, 'learning_rate': 0.07066997847208889, 'n_estimators': 969, 'max_depth': 7}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000590 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:15,310] Trial 73 finished with value: 0.8493723849372385 and parameters: {'num_leaves': 363, 'learning_rate': 0.036218581183000934, 'n_estimators': 899, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000593 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:22,082] Trial 74 finished with value: 0.8426778242677824 and parameters: {'num_leaves': 236, 'learning_rate': 0.01564282724551793, 'n_estimators': 943, 'max_depth': 15}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000582 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:22,378] Trial 75 finished with value: 0.8384937238493724 and parameters: {'num_leaves': 22, 'learning_rate': 0.10654612715631509, 'n_estimators': 130, 'max_depth': 15}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000593 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:24,834] Trial 76 finished with value: 0.8443514644351464 and parameters: {'num_leaves': 311, 'learning_rate': 0.008145867346950275, 'n_estimators': 231, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000947 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:27,598] Trial 77 finished with value: 0.8518828451882845 and parameters: {'num_leaves': 236, 'learning_rate': 0.1628247108467113, 'n_estimators': 565, 'max_depth': 11}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000586 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:28,815] Trial 78 finished with value: 0.8368200836820083 and parameters: {'num_leaves': 167, 'learning_rate': 0.027982388074613986, 'n_estimators': 191, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000627 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:29,746] Trial 79 finished with value: 0.8418410041841005 and parameters: {'num_leaves': 40, 'learning_rate': 0.055425546891741305, 'n_estimators': 374, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000627 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:34,674] Trial 80 finished with value: 0.8468619246861925 and parameters: {'num_leaves': 124, 'learning_rate': 0.004729920496354155, 'n_estimators': 999, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000640 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:36,807] Trial 81 finished with value: 0.8435146443514644 and parameters: {'num_leaves': 30, 'learning_rate': 0.19782127938689176, 'n_estimators': 934, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000973 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:40,657] Trial 82 finished with value: 0.8468619246861925 and parameters: {'num_leaves': 109, 'learning_rate': 0.10208250489718193, 'n_estimators': 883, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000668 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:43,845] Trial 83 finished with value: 0.8435146443514644 and parameters: {'num_leaves': 79, 'learning_rate': 0.017692144184730498, 'n_estimators': 835, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000947 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:46,792] Trial 84 finished with value: 0.8552301255230126 and parameters: {'num_leaves': 411, 'learning_rate': 0.18185259783717903, 'n_estimators': 926, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000599 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:49,980] Trial 85 finished with value: 0.8518828451882845 and parameters: {'num_leaves': 410, 'learning_rate': 0.21146423060623706, 'n_estimators': 959, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000933 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:53,986] Trial 86 finished with value: 0.8502092050209205 and parameters: {'num_leaves': 283, 'learning_rate': 0.13875193396131547, 'n_estimators': 912, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000586 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:58,575] Trial 87 finished with value: 0.8493723849372385 and parameters: {'num_leaves': 496, 'learning_rate': 0.0777383681990509, 'n_estimators': 851, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000587 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:23:59,773] Trial 88 finished with value: 0.8460251046025105 and parameters: {'num_leaves': 367, 'learning_rate': 0.03704344513508605, 'n_estimators': 141, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000593 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:01,103] Trial 89 finished with value: 0.8485355648535565 and parameters: {'num_leaves': 16, 'learning_rate': 0.21692019159472978, 'n_estimators': 982, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000597 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:01,742] Trial 90 finished with value: 0.8100418410041841 and parameters: {'num_leaves': 26, 'learning_rate': 0.0001193124798102843, 'n_estimators': 283, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000597 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:06,303] Trial 91 finished with value: 0.8518828451882845 and parameters: {'num_leaves': 209, 'learning_rate': 0.12574742404580555, 'n_estimators': 931, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002867 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:08,831] Trial 92 finished with value: 0.8468619246861925 and parameters: {'num_leaves': 35, 'learning_rate': 0.28815529507058446, 'n_estimators': 952, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000592 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:09,210] Trial 93 finished with value: 0.8426778242677824 and parameters: {'num_leaves': 50, 'learning_rate': 0.051421652865084835, 'n_estimators': 101, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000605 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:13,790] Trial 94 finished with value: 0.8552301255230126 and parameters: {'num_leaves': 335, 'learning_rate': 0.07621546157459479, 'n_estimators': 896, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000600 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:19,117] Trial 95 finished with value: 0.8552301255230126 and parameters: {'num_leaves': 439, 'learning_rate': 0.08382150390468483, 'n_estimators': 892, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000584 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:23,696] Trial 96 finished with value: 0.8493723849372385 and parameters: {'num_leaves': 432, 'learning_rate': 0.0765191324134923, 'n_estimators': 888, 'max_depth': 17}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000582 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:31,001] Trial 97 finished with value: 0.8485355648535565 and parameters: {'num_leaves': 255, 'learning_rate': 0.023727930952199904, 'n_estimators': 795, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000922 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:34,854] Trial 98 finished with value: 0.8485355648535565 and parameters: {'num_leaves': 330, 'learning_rate': 0.1081987034889802, 'n_estimators': 900, 'max_depth': 21}. Best is trial 13 with value: 0.8569037656903765.


[LightGBM] [Info] Number of positive: 956, number of negative: 3821
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000609 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1272
[LightGBM] [Info] Number of data points in the train set: 4777, number of used features: 32
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200126 -> initscore=-1.385510
[LightGBM] [Info] Start training from score -1.385510


[I 2024-04-29 01:24:37,719] Trial 99 finished with value: 0.8426778242677824 and parameters: {'num_leaves': 453, 'learning_rate': 0.1911643535666991, 'n_estimators': 864, 'max_depth': 19}. Best is trial 13 with value: 0.8569037656903765.


Number of finished trials:  100
Best trial:


In [None]:
# best trial에 대한 성능과 hyperparameter 정보 출력
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value: 0.8569037656903765
  Params: 
    num_leaves: 484
    learning_rate: 0.2250869675851037
    n_estimators: 999
    max_depth: 15


In [None]:
#plot_optimization_history
optuna.visualization.plot_optimization_history(study)

In [None]:
#plot_parallel_coordinate
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
#Visualize parameter importances.
optuna.visualization.plot_param_importances(study)