In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import warnings
warnings.filterwarnings(action='ignore') 

<span style='color:Blue'> 

# 추가 설명

## 어떤 모델이 좋은 모델인가?

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
import pandas as pd
import numpy as np

In [None]:
size_korea = pd.read_csv('../../data/size_korea_small.csv')

In [None]:
# load 
X = size_korea[['키','몸무게','체지방율']].values
y = size_korea.성별.replace({'남':0,'여':1})

#### 층화 추출

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)

In [None]:
X.shape, y.shape, X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
from sklearn.svm import SVC
clf = SVC(random_state=0, probability=True)
clf.fit(X_train, y_train)

## confusion_matrix

In [None]:
from sklearn.metrics import plot_confusion_matrix
plot_confusion_matrix(clf, X_test, y_test)
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
y_pred = clf.predict(X_test)
confusion_matrix(y_test, y_pred)
# sns.heatmap(confusion_matrix(y_test, y_pred), annot = True, fmt="d")

## classification_report

In [None]:
from sklearn.metrics import classification_report
target_names = ['남자(0)', '여자(1)']
print(classification_report(y_test, y_pred, target_names=target_names))

## 참고: 각 지표의 계산

In [None]:
cm = confusion_matrix(y_test, y_pred)

In [None]:
# accuracy
(cm * np.eye(2)).sum() / cm.sum()

In [None]:
# precision
precision = (cm * np.eye(2)).sum(axis=0) / cm.sum(axis=0)
precision

In [None]:
# recall
recall = (cm * np.eye(2)).sum(axis=1) / cm.sum(axis=1)
recall

In [None]:
# f1-score (Harmonic mean)
2 / (1 / precision + 1 / recall)

<span style='color:Blue'> 

# 추가 설명

In [None]:
# 0일 확률, 1일 확률
y_probas = clf.predict_proba(X_test)
y_probas

In [None]:
n = 100
th = np.linspace(0,1,n)
th

In [None]:
th[49]

In [None]:
y_pred = y_probas[:,1].reshape(-1,1) > th.reshape(1,-1)

In [None]:
confusion_matrix(y_test, y_pred[:,49])

In [None]:
cm = confusion_matrix(y_test, y_pred[:,49])
print(cm)
cm[0,0]/cm.sum(axis=1)[0], cm[1,0]/cm.sum(axis=1)[1]

In [None]:
tpr = []
fpr = []
for i in range(n):
    cm = confusion_matrix(y_test, y_pred[:,i])
    tpr.append(cm[0,0]/cm.sum(axis=1)[0])
    fpr.append(cm[1,0]/cm.sum(axis=1)[1])

In [None]:
plt.scatter(fpr, tpr)

In [None]:
((clf.predict_proba(X_test)[:,1] > 0.5) == y_test).mean()

<span style='color:Blue'> 

# 추가 설명

### roc_auc_score 
#### roc 아래의 면적 / 넓으면(1에 가까우면) 좋음

In [None]:
from sklearn.metrics import roc_auc_score
y_score = clf.predict_proba(X_test)[:, 1]

roc_auc_score(y_test, y_score)

In [None]:
from sklearn.metrics import plot_roc_curve
plot_roc_curve(clf, X_test, y_test)  
plt.show()

### average_precision_score

In [None]:
from sklearn.metrics import average_precision_score
average_precision = average_precision_score(y_test, y_score)
print(f'Average precision-recall score: {average_precision:0.2f}')

### precision_recall_curve
#### th 변화에 따른 두 값의 변화

In [None]:
from sklearn.metrics import plot_precision_recall_curve

disp = plot_precision_recall_curve(clf, X_test, y_test)
disp.ax_.set_title(f'2-class Precision-Recall curve: AP={average_precision:0.2f}')

<span style='color:Blue'> 

# 추가 설명

### scikitplot - plot_roc
#### 좀더 자세하게 그려줌
- sklearn.metrics.plot_roc_curve

In [None]:
import scikitplot as skplt 
skplt.metrics.plot_roc(y_test, y_probas)

- sklearn.metrics.plot_precision_recall_curve

In [None]:
skplt.metrics.plot_precision_recall(y_test, y_probas)

### brier_score_loss
#### mean square error 값과 동일

In [None]:
from sklearn.metrics import brier_score_loss
brier_score_loss(y_test, y_score)

In [None]:
y_probas = clf.predict_proba(X_test)

#### 향상도와 누적이익곡선

In [None]:
import scikitplot as skplt
skplt.metrics.plot_lift_curve(y_test, y_probas)
skplt.metrics.plot_cumulative_gain(y_test, y_probas)

#### Log loss

In [None]:
from sklearn.metrics import log_loss
log_loss(y_test, y_probas)

### 불균형자료 예측 성능지표 권장

#### matthews_corrcoef

In [None]:
from sklearn.metrics import matthews_corrcoef
y_predict = clf.predict(X_test)
matthews_corrcoef(y_test, y_predict)

#### cohen_kappa_score

In [None]:
from sklearn.metrics import cohen_kappa_score
cohen_kappa_score(y_test, y_predict)