### roc: x--FPR, y--TPR
### pr: x--recall, y--precision
### auc: 曲线下面积占总方格面积的比例

#### if recal != 0, roc和pr曲线可以相互转换

In [17]:
# -*- coding: utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc  ###计算roc和auc
from sklearn.model_selection import train_test_split

# Import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target

##变为2分类
X, y = X[y != 2], y[y != 2]

In [18]:
# Add noisy features to make the problem harder
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)] # generate 800 cols randomly and concat it with origin

# shuffle and split training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3,random_state=0)

# Learn to predict each class against the other
svm = svm.SVC(kernel='linear', probability=True, random_state=random_state)

###通过decision_function()计算得到的y_score的值，用在roc_curve()函数中
y_score = svm.fit(X_train, y_train).decision_function(X_test)

In [22]:
y_test, y_score

(array([0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        0, 1, 0, 0, 0, 1, 1, 1]),
 array([ 0.17284263,  0.65445393, -0.54087101,  0.3555818 ,  0.00579262,
        -0.20174248,  0.0565328 ,  0.00571205, -0.1517872 ,  0.25656427,
         0.39764688,  0.04549989,  0.33455816, -0.12499602,  0.23724787,
        -0.36250412, -0.0874348 , -0.11575856, -0.25270656, -0.23457408,
        -0.18239472, -0.10728706, -0.32201471,  0.71954289, -0.29292995,
        -0.22073314, -0.32473373, -0.19383585, -0.24296148,  0.37524795]))

In [48]:
def my_decision_func(v, threshold):
    return 1 if v >= threshold else 0

def my_roc_curve(y_test, y_score):
    """
    actually, there already exists a function to calc fpr, tpr, threshold and auc.
    usage:
    from sklearn.metrics import roc_curve, auc
    fpr, tpr, threshold = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr) ###计算auc的值
    
    ps: some exceptions have not been taken into consideration because it's just a trail! :-)
    """
    y_score_max, y_score_min = y_score.max(), y_score.min()
    gap = (y_score_max - y_score_min) / 10
    threshold = np.arange(y_score_min - gap * 2, y_score_max + gap * 2, gap) # init a threshold list
    fpr = np.array([]) # false positive rate
    tpr = np.array([]) # true positive rate
    
    for th in threshold:
        fn = 0
        fp = 0
        tp = 0
        tn = 0
        
        for i in range(len(y_score)):
            if y_test[i] == 1:
                #print(y_score[i])
                if my_decision_func(y_score[i], th) == y_test[i]:
                    tp += 1
                elif my_decision_func(y_score[i], th) != y_test[i]:
                    fp += 1
            elif y_test[i] == 0:
                #print(y_score[i])
                if my_decision_func(y_score[i], th) == y_test[i]:
                    tn += 1
                elif my_decision_func(y_score[i], th) != y_test[i]:
                    fn += 1
        
        fpr = np.append(fpr, fp / (fp + tn) if fp + tn > 0 else 0)
        tpr = np.append(tpr, tp / (tp + fn) if tp + fn > 0 else 0)
            
    return fpr, tpr, threshold

In [49]:
# constrast
my_roc_curve(y_test, y_score)

(array([0.        , 0.        , 0.        , 0.        , 0.        ,
        0.25      , 0.29411765, 0.38095238, 0.36363636, 0.4       ,
        0.46428571, 0.46428571, 0.5       , 0.5       ]),
 array([0.5       , 0.5       , 0.5       , 0.51724138, 0.6       ,
        0.66666667, 0.76923077, 0.77777778, 0.875     , 1.        ,
        1.        , 1.        , 0.        , 0.        ]),
 array([-0.79295379, -0.6669124 , -0.54087101, -0.41482962, -0.28878823,
        -0.16274684, -0.03670545,  0.08933594,  0.21537733,  0.34141872,
         0.46746011,  0.5935015 ,  0.71954289,  0.84558428]))

In [50]:
fpr, tpr, threshold = roc_curve(y_test, y_score)
roc_auc = auc(fpr, tpr)
fpr, tpr, threshold

(array([0.        , 0.        , 0.        , 0.2       , 0.2       ,
        0.26666667, 0.26666667, 0.4       , 0.4       , 0.46666667,
        0.46666667, 0.6       , 0.6       , 1.        ]),
 array([0.        , 0.06666667, 0.46666667, 0.46666667, 0.66666667,
        0.66666667, 0.73333333, 0.73333333, 0.8       , 0.8       ,
        0.93333333, 0.93333333, 1.        , 1.        ]),
 array([ 1.71954289,  0.71954289,  0.25656427,  0.0565328 ,  0.00571205,
        -0.0874348 , -0.10728706, -0.12499602, -0.1517872 , -0.18239472,
        -0.20174248, -0.23457408, -0.24296148, -0.54087101]))