# 필요한 라이브러리 Import하기

In [None]:

import pandas as pd 
import numpy as np 
import os 
from sklearn.preprocessing import LabelEncoder 
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, f1_score,roc_auc_score,mean_squared_error
import warnings
warnings.filterwarnings(action='ignore')
import random
from lightgbm import LGBMClassifier
from lightgbm import LGBMRegressor

In [3]:
# train data와 test data 로드하기

train_data=pd.read_csv('/dshome/WoongLab/heo/construction_oil/preprocessed_data/breastcancer_train_data.csv')
test_data=pd.read_csv('/dshome/WoongLab/heo/construction_oil/preprocessed_data/breastcancer_test_data.csv')

In [4]:
train_stage_features=['A Stage','differentiate','Race','T Stage ','Grade','Estrogen Status','Progesterone Status','Marital Status','N Stage',
                      '6th Stage','Regional Node Examined','Tumor Size','Reginol Node Positive','Age','Survival Months']
test_stage_features=['A Stage','differentiate','Race','T Stage ','Grade','Estrogen Status','Progesterone Status','Marital Status','N Stage']

In [5]:
train_data

Unnamed: 0,Age,Race,Marital Status,T Stage,N Stage,6th Stage,differentiate,Grade,A Stage,Tumor Size,Estrogen Status,Progesterone Status,Regional Node Examined,Reginol Node Positive,Survival Months,Status
0,50,2,1,1,0,1,3,1,1,40,1,1,13,1,50,0
1,59,2,1,0,0,0,1,3,1,12,1,1,11,2,99,0
2,44,2,1,0,0,0,0,2,1,11,1,1,8,2,77,0
3,59,2,0,1,1,2,0,2,1,25,1,0,32,5,107,0
4,69,2,1,0,0,0,3,1,1,18,1,1,4,1,48,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3214,37,2,3,1,2,4,1,3,0,35,1,1,18,18,77,0
3215,47,2,1,1,1,2,0,2,1,50,1,1,12,7,71,0
3216,65,2,3,1,0,1,3,1,1,32,1,1,2,1,80,0
3217,53,2,1,1,0,1,1,3,1,40,1,1,17,1,8,0


In [6]:
## Train data와 Test data의 독립변수와 종속변수 설정하기

train_X=train_data.loc[:,train_stage_features]
train_y=train_data['Status']
new_test_data_X=test_data.loc[:,test_stage_features]
new_test_data_y=test_data['Status']

In [7]:
# 데이터셋에 Class weight해주기

ratio=train_y.value_counts()[0]/train_y.value_counts()[1]

In [8]:
# train에는 있는데 test data에는 없는 컬럼 찾기

train_data_columns=np.array(train_X.columns)
test_data_columns=np.array(new_test_data_X.columns)
np.setdiff1d(train_data_columns,test_data_columns)

array(['6th Stage', 'Age', 'Reginol Node Positive',
       'Regional Node Examined', 'Survival Months', 'Tumor Size'],
      dtype=object)

In [9]:
!nvidia-smi

Thu Apr 20 02:23:52 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.54       Driver Version: 510.54       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:03:00.0 Off |                  N/A |
|  0%   32C    P8    19W / 250W |   4461MiB / 11264MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  On   | 00000000:04:00.0 Off |                  N/A |
|  0%   31C    P8     9W / 250W |   8920MiB / 11264MiB |      0%      Default |
|       

## Test data에 없는 변수들을 다 생성했으므로 LightgbmRegressor 분류모델을 만듬

### 베이지안 최적화해주기 - LightgbmRegressor

**1. 5-fold 교차검증 이용해서 Train data로 Validation set을 RMSE가 최저였을 때의 하이퍼파라미터 구하기**

**2. Learning rate 0.01~0.1, max_depth 3~9, n_estimators 100~1000이었을 때에서 가장 최적의 하이퍼파라미터 구하기**


In [25]:
# 변수 6th Stage을 예측하는 모형 만들어 주기

import optuna
from lightgbm import LGBMRegressor
from sklearn.model_selection import cross_val_score

test_data_columns=test_stage_features
y='6th Stage'

train_X_new=train_X.loc[:,test_data_columns]
train_y_new=train_X.loc[:,y]



# Objective 함수 정의
def objective(trial):
    
    # 하이퍼파라미터 탐색할 공간 정의
    params = {
        'n_estimators': trial.suggest_int('n_estimators',100,1000),
        'max_depth': trial.suggest_int('max_depth',3,9),
        'learning_rate': trial.suggest_loguniform('learning_rate',0.01,0.1),
    }
    
    # LGBMRegressor 모델 객체 생성
    model = LGBMRegressor(**params, random_state=42)
    
    # 교차검증 수행하여 모델 성능 측정
    scores = -1 * cross_val_score(model, train_X_new, train_y_new,
                                  cv=5, scoring='neg_mean_squared_error')
    
    # 교차검증 평균 점수 리턴
    return np.mean(scores)


# Optuna study 생성
study = optuna.create_study(direction='minimize')

# study 실행 (n_trials는 시도 횟수)
study.optimize(objective, n_trials=10)

# 최적화된 하이퍼파라미터 값 출력
print(study.best_params)



[32m[I 2023-04-19 11:56:06,806][0m A new study created in memory with name: no-name-211cac82-9240-4e49-85d5-baba6b80acec[0m
[32m[I 2023-04-19 11:56:07,481][0m Trial 0 finished with value: 0.02628574546425592 and parameters: {'n_estimators': 146, 'max_depth': 7, 'learning_rate': 0.014758369041791227}. Best is trial 0 with value: 0.02628574546425592.[0m
[32m[I 2023-04-19 11:56:07,741][0m Trial 1 finished with value: 0.16617253965600326 and parameters: {'n_estimators': 104, 'max_depth': 3, 'learning_rate': 0.011170298713559852}. Best is trial 0 with value: 0.02628574546425592.[0m
[32m[I 2023-04-19 11:56:08,228][0m Trial 2 finished with value: 0.14306917353628962 and parameters: {'n_estimators': 119, 'max_depth': 9, 'learning_rate': 0.010247942306955948}. Best is trial 0 with value: 0.02628574546425592.[0m
[32m[I 2023-04-19 11:56:10,692][0m Trial 3 finished with value: 1.2666212041729795e-05 and parameters: {'n_estimators': 840, 'max_depth': 4, 'learning_rate': 0.056295482469

{'n_estimators': 840, 'max_depth': 4, 'learning_rate': 0.05629548246953177}


In [26]:
# Print best hyperparameters and auc

print(f'Best hyperparameters: {study.best_params}')
print(f'Best RMSE: {study.best_value:.4f}')

Best hyperparameters: {'n_estimators': 840, 'max_depth': 4, 'learning_rate': 0.05629548246953177}
Best RMSE: 0.0000


In [27]:
(est,depth,rate)=study.best_params.values()

In [28]:
# 앞서 구한 최적의 하이퍼파라미터로 튜닝해주고 튜닝된 모델로 Test data 새로운 컬럼 생성해주기

def bestreg_parametertuning(rate,depth,est,test_data_columns,y):
    best_lgbmreg=LGBMRegressor(learning_rate=rate,max_depth=depth,n_estimators=est,random_state=42)
    best_lgbmreg.fit(train_X[test_data_columns], train_X[y])
    new_test_data_X[y]=best_lgbmreg.predict(new_test_data_X[test_data_columns])
    

In [31]:
# Test datadp 6th Stage변수가 생성됨

bestreg_parametertuning(rate,depth,est,test_data_columns,'6th Stage')

In [32]:
new_test_data_X

Unnamed: 0,A Stage,differentiate,Race,T Stage,Grade,Estrogen Status,Progesterone Status,Marital Status,N Stage,6th Stage
0,1.0,3,2,1,1,1,1,1,1,2.000000e+00
1,1.0,0,2,2,2,1,0,1,0,2.000000e+00
2,1.0,0,2,1,2,1,1,1,1,2.000000e+00
3,1.0,1,2,1,3,1,1,1,0,1.000000e+00
4,1.0,0,2,1,2,1,1,0,0,1.000000e+00
...,...,...,...,...,...,...,...,...,...,...
800,1.0,1,2,1,3,1,1,1,0,1.000000e+00
801,1.0,0,2,2,2,1,1,1,2,4.000000e+00
802,1.0,0,2,0,2,1,1,0,0,9.564947e-22
803,1.0,3,2,0,1,1,1,1,0,9.564947e-22


In [33]:
y='Regional Node Examined'
train_X_new=train_X.loc[:,test_data_columns]
train_y_new=train_X.loc[:,y]
study = optuna.create_study(direction='minimize')

# study 실행 (n_trials는 시도 횟수)
study.optimize(objective, n_trials=10)

# 최적화된 하이퍼파라미터 값 출력
print(study.best_params)

(est,depth,rate)=study.best_params.values()
print(rate,depth,est)

[32m[I 2023-04-19 11:57:39,022][0m A new study created in memory with name: no-name-f682a12e-4131-4168-aedd-dd262723db20[0m
[32m[I 2023-04-19 11:57:44,503][0m Trial 0 finished with value: 61.01877162972247 and parameters: {'n_estimators': 657, 'max_depth': 8, 'learning_rate': 0.02378497109191181}. Best is trial 0 with value: 61.01877162972247.[0m
[32m[I 2023-04-19 11:57:47,629][0m Trial 1 finished with value: 60.25558668587422 and parameters: {'n_estimators': 967, 'max_depth': 4, 'learning_rate': 0.02776444166192067}. Best is trial 1 with value: 60.25558668587422.[0m
[32m[I 2023-04-19 11:57:51,987][0m Trial 2 finished with value: 60.833914172235175 and parameters: {'n_estimators': 523, 'max_depth': 8, 'learning_rate': 0.02439980814770578}. Best is trial 1 with value: 60.25558668587422.[0m
[32m[I 2023-04-19 11:57:53,682][0m Trial 3 finished with value: 60.1807518296619 and parameters: {'n_estimators': 195, 'max_depth': 8, 'learning_rate': 0.03462137707459102}. Best is tria

{'n_estimators': 356, 'max_depth': 3, 'learning_rate': 0.042832455773943554}
0.042832455773943554 3 356


In [34]:
# Test datadp Regional Node Examined변수가 생성됨

bestreg_parametertuning(rate,depth,est,test_data_columns,'Regional Node Examined')

In [35]:
new_test_data_X

Unnamed: 0,A Stage,differentiate,Race,T Stage,Grade,Estrogen Status,Progesterone Status,Marital Status,N Stage,6th Stage,Regional Node Examined
0,1.0,3,2,1,1,1,1,1,1,2.000000e+00,14.533824
1,1.0,0,2,2,2,1,0,1,0,2.000000e+00,14.045237
2,1.0,0,2,1,2,1,1,1,1,2.000000e+00,15.797581
3,1.0,1,2,1,3,1,1,1,0,1.000000e+00,13.801471
4,1.0,0,2,1,2,1,1,0,0,1.000000e+00,13.904897
...,...,...,...,...,...,...,...,...,...,...,...
800,1.0,1,2,1,3,1,1,1,0,1.000000e+00,13.801471
801,1.0,0,2,2,2,1,1,1,2,4.000000e+00,21.155025
802,1.0,0,2,0,2,1,1,0,0,9.564947e-22,12.308367
803,1.0,3,2,0,1,1,1,1,0,9.564947e-22,11.892922


In [37]:
y='Tumor Size'
train_X_new=train_X.loc[:,test_data_columns]
train_y_new=train_X.loc[:,y]
study = optuna.create_study(direction='minimize')

# study 실행 (n_trials는 시도 횟수)
study.optimize(objective, n_trials=10)

# 최적화된 하이퍼파라미터 값 출력
print(study.best_params)

(est,depth,rate)=study.best_params.values()
print(rate,depth,est)


[32m[I 2023-04-19 11:59:50,198][0m A new study created in memory with name: no-name-3449a597-a05a-4d5e-bba0-48a451694f2d[0m
[32m[I 2023-04-19 11:59:56,790][0m Trial 0 finished with value: 117.21850421954169 and parameters: {'n_estimators': 680, 'max_depth': 8, 'learning_rate': 0.017771989808855276}. Best is trial 0 with value: 117.21850421954169.[0m
[32m[I 2023-04-19 12:00:02,943][0m Trial 1 finished with value: 122.42685274971923 and parameters: {'n_estimators': 802, 'max_depth': 5, 'learning_rate': 0.052141526342352904}. Best is trial 0 with value: 117.21850421954169.[0m
[32m[I 2023-04-19 12:00:06,091][0m Trial 2 finished with value: 124.47419432690644 and parameters: {'n_estimators': 564, 'max_depth': 6, 'learning_rate': 0.07255297888176565}. Best is trial 0 with value: 117.21850421954169.[0m
[32m[I 2023-04-19 12:00:08,197][0m Trial 3 finished with value: 116.17017295712303 and parameters: {'n_estimators': 250, 'max_depth': 8, 'learning_rate': 0.040491622917486175}. Be

{'n_estimators': 354, 'max_depth': 3, 'learning_rate': 0.017605136710540148}
0.017605136710540148 3 354


In [38]:
# Test data에 Tumor Size변수가 생성됨

bestreg_parametertuning(rate,depth,est,test_data_columns,'Tumor Size')

In [39]:
y='Reginol Node Positive'
train_X_new=train_X.loc[:,test_data_columns]
train_y_new=train_X.loc[:,y]
study = optuna.create_study(direction='minimize')

# study 실행 (n_trials는 시도 횟수)
study.optimize(objective, n_trials=10)

# 최적화된 하이퍼파라미터 값 출력
print(study.best_params)

(est,depth,rate)=study.best_params.values()
print(rate,depth,est)
bestreg_parametertuning(rate,depth,est,test_data_columns,'Reginol Node Positive')

[32m[I 2023-04-19 12:01:06,557][0m A new study created in memory with name: no-name-1cd12220-e1b9-4b41-9fe9-954f37cf0ed5[0m
[32m[I 2023-04-19 12:01:06,983][0m Trial 0 finished with value: 6.888255352594358 and parameters: {'n_estimators': 154, 'max_depth': 3, 'learning_rate': 0.0199118518865309}. Best is trial 0 with value: 6.888255352594358.[0m
[32m[I 2023-04-19 12:01:12,210][0m Trial 1 finished with value: 7.262379874567342 and parameters: {'n_estimators': 847, 'max_depth': 6, 'learning_rate': 0.010332014084841329}. Best is trial 0 with value: 6.888255352594358.[0m
[32m[I 2023-04-19 12:01:13,918][0m Trial 2 finished with value: 7.041633826416413 and parameters: {'n_estimators': 233, 'max_depth': 6, 'learning_rate': 0.014108701181263629}. Best is trial 0 with value: 6.888255352594358.[0m
[32m[I 2023-04-19 12:01:18,381][0m Trial 3 finished with value: 7.370478527661172 and parameters: {'n_estimators': 746, 'max_depth': 6, 'learning_rate': 0.016829577527647896}. Best is tr

{'n_estimators': 154, 'max_depth': 3, 'learning_rate': 0.0199118518865309}
0.0199118518865309 3 154


In [40]:
y='Age'
train_X_new=train_X.loc[:,test_data_columns]
train_y_new=train_X.loc[:,y]
study = optuna.create_study(direction='minimize')

# study 실행 (n_trials는 시도 횟수)
study.optimize(objective, n_trials=10)

# 최적화된 하이퍼파라미터 값 출력
print(study.best_params)

(est,depth,rate)=study.best_params.values()
print(rate,depth,est)

# Test data에 Age변수가 생성됨
bestreg_parametertuning(rate,depth,est,test_data_columns,'Age')

[32m[I 2023-04-19 12:01:54,371][0m A new study created in memory with name: no-name-936e9cb2-3932-4b8f-ab12-a0b6f30eadfc[0m
[32m[I 2023-04-19 12:01:56,491][0m Trial 0 finished with value: 76.58995845939155 and parameters: {'n_estimators': 998, 'max_depth': 3, 'learning_rate': 0.05459095842001616}. Best is trial 0 with value: 76.58995845939155.[0m
[32m[I 2023-04-19 12:02:01,720][0m Trial 1 finished with value: 77.08571398763948 and parameters: {'n_estimators': 613, 'max_depth': 8, 'learning_rate': 0.011396528543836132}. Best is trial 0 with value: 76.58995845939155.[0m
[32m[I 2023-04-19 12:02:07,817][0m Trial 2 finished with value: 80.07636517139558 and parameters: {'n_estimators': 885, 'max_depth': 7, 'learning_rate': 0.07236310580732813}. Best is trial 0 with value: 76.58995845939155.[0m
[32m[I 2023-04-19 12:02:10,040][0m Trial 3 finished with value: 76.60978061077479 and parameters: {'n_estimators': 244, 'max_depth': 9, 'learning_rate': 0.02122095906130223}. Best is tri

{'n_estimators': 998, 'max_depth': 3, 'learning_rate': 0.05459095842001616}
0.05459095842001616 3 998


In [41]:
y='Survival Months'
train_X_new=train_X.loc[:,test_data_columns]
train_y_new=train_X.loc[:,y]
study = optuna.create_study(direction='minimize')

# study 실행 (n_trials는 시도 횟수)
study.optimize(objective, n_trials=10)

# 최적화된 하이퍼파라미터 값 출력
print(study.best_params)

(est,depth,rate)=study.best_params.values()
print(rate,depth,est)

# Test data에 Survival Months변수가 생성됨
bestreg_parametertuning(rate,depth,est,test_data_columns,'Survival Months')

[32m[I 2023-04-19 12:03:18,324][0m A new study created in memory with name: no-name-e6de2306-db3a-4614-85ca-95bb124a5e5c[0m
[32m[I 2023-04-19 12:03:18,694][0m Trial 0 finished with value: 507.82860753672037 and parameters: {'n_estimators': 137, 'max_depth': 3, 'learning_rate': 0.07448914467782536}. Best is trial 0 with value: 507.82860753672037.[0m
[32m[I 2023-04-19 12:03:19,344][0m Trial 1 finished with value: 514.3445271501251 and parameters: {'n_estimators': 200, 'max_depth': 4, 'learning_rate': 0.05821301525698846}. Best is trial 0 with value: 507.82860753672037.[0m
[32m[I 2023-04-19 12:03:20,662][0m Trial 2 finished with value: 529.0097166889229 and parameters: {'n_estimators': 185, 'max_depth': 7, 'learning_rate': 0.08595641389767766}. Best is trial 0 with value: 507.82860753672037.[0m
[32m[I 2023-04-19 12:03:23,817][0m Trial 3 finished with value: 519.09116211868 and parameters: {'n_estimators': 687, 'max_depth': 5, 'learning_rate': 0.02293877777028639}. Best is tr

{'n_estimators': 137, 'max_depth': 3, 'learning_rate': 0.07448914467782536}
0.07448914467782536 3 137


In [42]:
newtestdata=pd.concat([new_test_data_X,new_test_data_y],axis=1)

In [43]:
newtestdata.head(5)

Unnamed: 0,A Stage,differentiate,Race,T Stage,Grade,Estrogen Status,Progesterone Status,Marital Status,N Stage,6th Stage,Regional Node Examined,Tumor Size,Reginol Node Positive,Age,Survival Months,Status
0,1.0,3,2,1,1,1,1,1,1,2.0,14.533824,31.807107,5.759757,56.493414,73.060825,0
1,1.0,0,2,2,2,1,0,1,0,2.0,14.045237,69.495095,2.116119,58.317502,71.502406,1
2,1.0,0,2,1,2,1,1,1,1,2.0,15.797581,32.20254,5.759757,54.145202,72.067159,0
3,1.0,1,2,1,3,1,1,1,0,1.0,13.801471,30.432246,1.935915,51.412252,72.78481,0
4,1.0,0,2,1,2,1,1,0,0,1.0,13.904897,30.557379,1.935915,55.138632,70.36235,0


## Test data에 없는 변수들을 다 생성했으므로 LightgbmClassifier 분류모델을 만듬

### 베이지안 최적화해주기 - LightgbmClassifier

**1. 5-fold 교차검증 이용해서 Train data로 Validation set을 Auc가 최고였을 때의 하이퍼파라미터 구하기**

**2. Learning rate 0.01~0.1, max_depth 3~9, n_estimators 100~1000이었을 때에서 가장 최적의 하이퍼파라미터 구하기**


In [44]:
import lightgbm as lgb

In [45]:


def objective(trial):
    # Define hyperparameters to optimize 
    params={
        'boosting_type':'gbdt',
        'objective':'binary',
        'metric':'binary_logloss',
        'learning_rate':trial.suggest_loguniform('learning_rate',0.01,0.1),
        'max_depth':trial.suggest_int('max_depth',3,9),
        'n_estimators':trial.suggest_int("n_estimators",100,1000)
    }
    # Train and evaluate model 
    lgb_cv=lgb.LGBMClassifier(**params, random_state=42,scale_pos_weight=ratio)
    scores=cross_val_score(lgb_cv,train_X,train_y,cv=5,scoring='roc_auc')
    auc=scores.mean()
    return auc 

# Define study object and optimize 

study=optuna.create_study(direction='maximize',study_name='lgb_boost_opt',load_if_exists=True)
study.optimize(objective, n_trials=10)

# Print best hyperparameters and auc
print(f'Best hyperparameters: {study.best_params}')
print(f'Best AUC: {study.best_value:.4f}')

[32m[I 2023-04-19 12:07:09,157][0m A new study created in memory with name: lgb_boost_opt[0m
[32m[I 2023-04-19 12:07:12,323][0m Trial 0 finished with value: 0.8518179421942762 and parameters: {'learning_rate': 0.01724627184448636, 'max_depth': 8, 'n_estimators': 304}. Best is trial 0 with value: 0.8518179421942762.[0m
[32m[I 2023-04-19 12:07:14,215][0m Trial 1 finished with value: 0.8594611452746632 and parameters: {'learning_rate': 0.02039221329330299, 'max_depth': 4, 'n_estimators': 417}. Best is trial 1 with value: 0.8594611452746632.[0m
[32m[I 2023-04-19 12:07:16,423][0m Trial 2 finished with value: 0.8412525812825382 and parameters: {'learning_rate': 0.036641091052061175, 'max_depth': 5, 'n_estimators': 389}. Best is trial 1 with value: 0.8594611452746632.[0m
[32m[I 2023-04-19 12:07:20,523][0m Trial 3 finished with value: 0.8154941941704721 and parameters: {'learning_rate': 0.09660192292644951, 'max_depth': 5, 'n_estimators': 745}. Best is trial 1 with value: 0.85946

Best hyperparameters: {'learning_rate': 0.01008374988342057, 'max_depth': 3, 'n_estimators': 407}
Best AUC: 0.8704


In [46]:


lgb=lgb.LGBMClassifier(learning_rate=  0.01008374988342057,max_depth= 3,n_estimators=407,scale_pos_weight=ratio,random_state=42)

In [48]:
# 모델 학습함

lgb.fit(train_X,train_y)

In [49]:
# AUC를 구하기 위해 예측된 클래스의 확률값을 구해주기

pred=lgb.predict_proba(new_test_data_X)[:,1]

In [52]:
# 모델 성능을 평가할 수 있는 함수 만들어주기

from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, f1_score

def get_clf_prob(y_test, probability):
  pred=np.where(probability > 0.50,1,0)
  confusion=confusion_matrix(y_test, pred)
  accuracy=accuracy_score(y_test,pred)
  precision=precision_score(y_test,pred) 
  recall=recall_score(y_test,pred) 
  # F1 스코어 추가 
  f1=f1_score(y_test,pred,average='macro')
  Roc_score=roc_auc_score(y_test,probability)
  print('임계값: ', 0.5) 
  print('오차행렬')
  print(confusion) 
  # f1 score print 추가 
  print('정확도: {0:.4f}, 정밀도: {1:.4f}, 재현율: {2:.4f}, F1:{3:.4f}, AUC:{4: .4f}'.format(accuracy,precision,recall,f1,Roc_score))

In [53]:
get_clf_prob(new_test_data_y,pred) 

임계값:  0.5
오차행렬
[[615  67]
 [ 80  43]]
정확도: 0.8174, 정밀도: 0.3909, 재현율: 0.3496, F1:0.6312, AUC: 0.6890
