In [88]:
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

cancer = load_breast_cancer()
data = cancer.data
target = cancer.target

x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.3, stratify=target)
dt = DecisionTreeClassifier().fit(x_train, y_train)

## sklearn.metrics
#### 이진 분류기 성능 평가 지표로, multiclass 일때는 average 파라미터 추가 설정 필요
#### ~score(x_test, pred) 형태로 사용법은 똑같다
- accuracy_score
- precision_score
- recall_score
- f1_score

In [89]:
pred = dt.predict(x_test)

accuracy = accuracy_score(y_test, pred)
precision = precision_score(y_test, pred)
recall = recall_score(y_test, pred)
f1 = f1_score(y_test, pred)

accuracy, precision, recall, f1

(0.9181286549707602, 0.926605504587156, 0.9439252336448598, 0.9351851851851852)

## roc_auc_score

sklearn.metrics.roc_auc_score(y_true, y_score, *, average='macro', sample_weight=None, max_fpr=None, multi_class='raise', labels=None)
- y_true : 실제 클래스 값
- y_score : predict_proba()의 반환값에서 Positive 칼럼의 예측 확률 사용
- preeict_proba는 데이터마다 class별 예측 확률을 계산해주는데 class 순서는 estimator.classes_ 순서와 같다
- 보통 roc_auc_score(y_test, proba[:, 1]) 과 같이 쓴다

In [91]:
proba = dt.predict_proba(x_test)

# class 순서는 0, 1 
dt.classes_

array([0, 1])

In [92]:
# 1번째 인덱스 (Positive 컬럼) 사용
roc_auc_score(y_test, proba[:, 1])

0.9094626168224299