#### Documentation: 
- [scikit-learn](http://scikit-learn.org/stable/user_guide.html)
- [pandas](http://pandas.pydata.org/pandas-docs/stable/)
- [numpy](https://docs.scipy.org/doc/numpy-1.13.0/user/basics.html)
- [matplotlib](https://matplotlib.org/2.0.2/users/pyplot_tutorial.html)
- [scikit-plot](https://scikit-plot.readthedocs.io/en/stable/index.html)
<br>

<font color = "#CC3D3D">
# Measuring Model Performance

### Imbalanced Data

In [None]:
from sklearn.datasets import load_digits
digits = load_digits()
y = digits.target == 9  # 숫자 9를 posive class로 설정

In [None]:
len(y)

In [None]:
(y == True).sum()

In [None]:
import matplotlib.pyplot as plt
%pylab inline

plt.imshow(digits.data[100].reshape(8,8), cmap=plt.cm.gray_r)
digits.target[100]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    digits.data, y, random_state=0)

### Training Models ###

*Dummy*

In [None]:
from sklearn.dummy import DummyClassifier
dummy = DummyClassifier(strategy='most_frequent').fit(X_train, y_train)
pred_dummy = dummy.predict(X_test)

*Decision Tree*

In [None]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
pred_tree = tree.predict(X_test)

*Naïve Bayes*  
https://www.saedsayad.com/naive_bayesian.htm

<img align="left" src="http://uc-r.github.io/public/images/analytics/naive_bayes/naive_bayes_icon.png" width=600 height=400>  

In [None]:
from sklearn.naive_bayes import BernoulliNB
nb = BernoulliNB().fit(X_train, y_train)
pred_nb = nb.predict(X_test)

### Accuracy ###

In [None]:
from sklearn.metrics import accuracy_score
print("dummy model:")
print(accuracy_score(y_test, pred_dummy))
print("decision tree:")
print(accuracy_score(y_test, pred_tree))
print("naive bayes:")
print(accuracy_score(y_test, pred_nb))

<font color='green'>
###  Confusion Matrix ###
<br>
<img align="left" src="http://drive.google.com/uc?export=view&id=1wIJIJENLCnfoesuTUMDjywPLMJnat36E" width=700 height=500>

In [None]:
from sklearn.metrics import confusion_matrix
print("dummy model:")
print(confusion_matrix(y_test, pred_dummy))
print("decision tree:")
print(confusion_matrix(y_test, pred_tree))
print("naive bayes:")
print(confusion_matrix(y_test, pred_nb))

<font color='green'>
### Recall, Precision & F1 ###

In [None]:
from sklearn.metrics import classification_report
print("dummy model:")
print(classification_report(y_test, pred_dummy, 
                            target_names=["not 9", "9"]))
print("\ndecision tree:")
print(classification_report(y_test, pred_tree, 
                            target_names=["not 9", "9"]))
print("\nnaive bayes:")
print(classification_report(y_test, pred_nb, 
                            target_names=["not 9", "9"]))

##### PR curve

In [None]:
from sklearn.metrics import precision_recall_curve

def plot_precision_recall_curve(precisions, recalls, model) :
    plt.plot(recalls, precisions, label=model)
    plt.axis([0,1,0,1])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('PR curve')
    plt.legend()

In [None]:
precisions, recalls, _ = precision_recall_curve(y_test, 
                                    tree.predict_proba(X_test)[:,1])
plot_precision_recall_curve(precisions, recalls, 'decision tree')
precisions, recalls, _ = precision_recall_curve(y_test, 
                                    nb.predict_proba(X_test)[:,1])
plot_precision_recall_curve(precisions, recalls, 'naive bayes')

##### PR by threshold

In [None]:
def plot_precision_recall_by_threshold(precisions, recalls, thresholds) :
    # X축을 threshold값으로, Y축은 정밀도, 재현율 값으로 각각 Plot 수행. 정밀도는 점선으로 표시
    # plt.figure(figsize=(8,6))
    threshold_boundary = thresholds.shape[0]
    plt.plot(thresholds, precisions[0:threshold_boundary], linestyle='--', label='precision')
    plt.plot(thresholds, recalls[0:threshold_boundary],label='recall')
    # threshold 값 X 축의 Scale을 0.1 단위로 변경
    _, end = plt.xlim()
    plt.xticks(np.round(np.arange(0, end, 0.1),2))    
    # x축, y축 label과 legend, grid, title 설정
    plt.xlabel('Threshold'); #plt.ylabel('Precision & Recall')
    plt.legend(); plt.grid(); plt.title('PR by threshold')
    plt.show()

In [None]:
precisions, recalls, thresholds = precision_recall_curve(y_test, 
                                    nb.predict_proba(X_test)[:,1])
plot_precision_recall_by_threshold(precisions, recalls, thresholds)

<font color='green'>
<p>
### ROC & AUC 

<img align="left" src="http://drive.google.com/uc?export=view&id=1Htx445FclSqHhhNxPT_uOYQOBiuneWXJ" width=600 height=400>

In [None]:
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

##### AUC

In [None]:
fpr, tpr, _ = roc_curve(y_test, tree.predict_proba(X_test)[:,1])
print('decision tree: ', auc(fpr, tpr))
fpr, tpr, _ = roc_curve(y_test, nb.predict_proba(X_test)[:,1])
print('naive bayes: ', auc(fpr, tpr))

In [None]:
tree.predict_proba(X_test)

##### Define ROC curve drawing fuction ####

In [None]:
def plot_roc_curve(fpr, tpr, model, color=None) :
    model = model + ' (auc = %0.3f)' % auc(fpr, tpr)
    plt.plot(fpr, tpr, label=model, color=color)
    plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
    plt.axis([0,1,0,1])
    plt.xlabel('FPR (1 - specificity)')
    plt.ylabel('TPR (recall)')
    plt.title('ROC curve')
    plt.legend(loc="lower right")

<font color = "blue">
**The named colors used in matplotlib**
<img align="left" src="https://matplotlib.org/_images/named_colors.png" alt="matplotlib color">

##### Plot multiple ROC curves #####

In [None]:
fpr_dummy, tpr_dummy, _ = roc_curve(y_test, 
                                    dummy.predict_proba(X_test)[:,1])
plot_roc_curve(fpr_dummy, tpr_dummy, 'dummy model', 'hotpink')
fpr_tree, tpr_tree, _ = roc_curve(y_test, 
                                  tree.predict_proba(X_test)[:,1])
plot_roc_curve(fpr_tree, tpr_tree, 'decision tree', 'darkgreen')
fpr_tree, tpr_tree, _ = roc_curve(y_test, 
                                  nb.predict_proba(X_test)[:,1])
plot_roc_curve(fpr_tree, tpr_tree, 'naive bayes', 'royalblue')

<br>
## Exercise
- `scikit-plot` 패키지를 사용하여 아래와 같은 표나 차트를 도식하시오.
  - **Confusion Matrix** : 위의 y_test, pred_tree 사용 (*아래 코드와 동일*)
  - **ROC Curve** : y_test, tree.predict_proba(X_test) 사용
  - **Recall-Precision Curve** : y_test, tree.predict_proba(X_test) 사용
  - **Cumulative Gain Curve** : y_test, tree.predict_proba(X_test) 사용
  - **Lift Curve** : y_test, tree.predict_proba(X_test) 사용

In [None]:
# !pip install scikit-plot

In [None]:
import scikitplot as skplt
skplt.metrics.plot_confusion_matrix(y_test, pred_nb, normalize=False)
plt.show()

<font color = "#CC3D3D">
## End