In [4]:
import numpy as np, pandas as pd

In [36]:
def get_TP(y_true, y_pred):
    return sum((y_true==1) & (y_pred==1))

def get_FP(y_true, y_pred):
    return sum((y_true==0) & (y_pred==1))

def get_FN(y_true, y_pred):
    return sum((y_true==1) & (y_pred==0))

def get_TN(y_true, y_pred):
    return sum((y_true==0) & (y_pred==0))

def get_confusion_matrix(y_true, y_pred):
    TP = get_TP(y_true, y_pred)
    FN = get_FN(y_true, y_pred)
    FP = get_FP(y_true, y_pred)
    TN = get_TN(y_true, y_pred)
    return np.array([[TP, FN], [FP, TN]])

def get_TPR(y_true, y_pred):
    TP = get_TP(y_true, y_pred)
    FN = get_FN(y_true, y_pred)
    return TP / (TP + FN)

def get_FPR(y_true, y_pred):
    FP = get_FP(y_true, y_pred)
    TN = get_TN(y_true, y_pred)
    return FP / (FP + TN)

def get_TNR():
    TN = get_TN(y_true, y_pred)
    FP = get_FP(y_true, y_pred)
    return TN / (TN + FP)

def get_Accuracy(y_true, y_pred):
    TP = get_TP(y_true, y_pred)
    TN = get_TN(y_true, y_pred)
    total = len(y_true)
    return (TP + TN) / total

def get_Precision(y_true, y_pred):
    TP = get_TP(y_true, y_pred)
    FP = get_FP(y_true, y_pred)
    return TP / (TP + FP)

def get_Recall(y_true, y_pred):
    TP = get_TP(y_true, y_pred)
    FN = get_FN(y_true, y_pred)
    return TP / (TP + FN)

def get_F_score(y_true, y_pred, alpha):
    P = get_Precision(y_true, y_pred)
    R = get_Recall(y_true, y_pred)
    return (alpha**2 + 1) * P * R / (alpha**2 * (P+R))

def get_F1_score(y_true, y_pred):
    P = get_Precision(y_true, y_pred)
    R = get_Recall(y_true, y_pred)
    return 2*P*R/(P+R)
    
def get_AUC_score(y_true, y_pred, n_threshold=50):
    TPR = []
    FPR = []
    for threshold in np.linspace(0, 1, 50):
        TP = sum((y_true>=threshold) & (y_pred>=threshold))
        FP = sum((y_true<threshold) & (y_pred>=threshold))
        FN = sum((y_true>=threshold) & (y_pred<threshold))
        TN = sum((y_true<threshold) & (y_pred<threshold))
        TPR.append((TP + 0.01) / (TP + FN + 0.02))
        FPR.append((FP + 0.01) / (FP + TN + 0.02))
    AUC = FPR[n_threshold-1]*TPR[n_threshold-1] - FPR[0]*TPR[0]
    for i in range(n_threshold-1):
        AUC += FPR[i+1]*TPR[i] - FPR[i]*TPR[i+1]
    AUC *= 0.5
    return AUC


In [14]:
# 文件读取  
def read_csv_file(file_name='', path_name='', logging=False):  
    print("==========读取数据=========")  
    data = pd.read_csv(path_name + file_name)  
    if logging:  
        print(data.head(5))  
        print(file_name +  "包含以下列：")  
        print(data.columns.values)  
        print(data.describe())  
        print(data.info())  
    return data  

# df = read_csv_file('', logging=False)
from sklearn.datasets import load_iris
data = load_iris()
X = data.data
y = data.target
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [18]:
y[y==2] = 1
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [37]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
LR = LogisticRegression()
LR = LR.fit(X_train, y_train)
y_pred = LR.predict(X_test)

print('y_test', y_test)
print('y_pred', y_pred)
sys_auc = roc_auc_score(y_test, y_pred)
print('sys_auc', sys_auc)
my_auc = get_AUC_score(y_test, y_pred)
print('my_auc', my_auc)


y_test [1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 0
 1 1 1 1 0 1 0 0]
y_pred [1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 0
 1 1 1 1 0 1 0 0]
sys_auc 1.0
my_auc -0.4992342787918501




In [33]:
y_true=y_test
TPR = []
FPR = []
for threshold in np.linspace(0, 1, 50):
    TP = sum((y_true>=threshold) & (y_pred>=threshold))
    FP = sum((y_true<threshold) & (y_pred>=threshold))
    FN = sum((y_true>=threshold) & (y_pred<threshold))
    TN = sum((y_true<threshold) & (y_pred<threshold))
    TPR.append(TP / (TP + FN + 2))
    FPR.append(FP / (FP + TN + 2))
print(TP)
print(FP)
print(TPR)
print(FPR)


29
0
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
[nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


  # Remove the CWD from sys.path while we load stuff.
