# ROC-AUC

In [27]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import plotly.graph_objs as go
import plotly.express as px
from math import sqrt
import pandas as pd


from sklearn.metrics import roc_curve, auc, roc_auc_score

#### データの用意

In [11]:
iris = load_iris()
X = iris.data
y = (iris.target == 0).astype(int)  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
model = LogisticRegression()
model.fit(X_train, y_train)

y_prob = model.predict_proba(X_test)[:, 1]

#### ROC AUCの出力

In [15]:
# ROC-AUCの直接出力
roc_auc = roc_auc_score(y_test, y_prob)
print(roc_auc)

# ROC曲線の計算
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

1.0


In [7]:
trace0 = go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC curve (area = {roc_auc:.2f})')
trace1 = go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Random', line={'dash': 'dash'})
layout = go.Layout(
    title='Receiver Operating Characteristic',
    xaxis=dict(title='False Positive Rate'),
    yaxis=dict(title='True Positive Rate'),
    showlegend=True
)
fig = go.Figure(data=[trace0, trace1], layout=layout)
fig.show()

#### ROC AUCの95%信頼区間

* https://ushitora.net/archives/800
* https://www.researchgate.net/publication/16134792_The_Meaning_and_Use_of_the_Area_Under_a_Receiver_Operating_Characteristic_ROC_Curve

In [17]:
def roc_auc_ci(auc, num_positive, num_negative):
    N1 = num_positive
    N2 = num_negative
    Q1 = auc / (2 - auc)
    Q2 = 2*auc**2 / (1 + auc)
    SE_auc = sqrt((auc*(1 - auc) + (N1 - 1)*(Q1 - auc**2) + (N2 - 1)*(Q2 - auc**2)) / (N1*N2))
    lower = auc - 1.96*SE_auc
    upper = auc + 1.96*SE_auc
    if lower < 0:
        lower = 0
    if upper > 1:
        upper = 1

    #print(f'上限:{round(lower, 3)}')
    #print(f'下限:{round(upper, 3)}')
    return lower, upper

In [29]:
p = 100
n = 100
auc = 0.7

lower, upper = roc_auc_ci(auc, p, n)
print(f'下側信頼区間:{round(lower,5)}',f'上側信頼区間:{round(upper,5)}')

下側信頼区間:0.6278 上側信頼区間:0.7722


#### AUCとデータ数との関係の可視化

In [20]:
def make_df(auc):
    p = [5, 10, 20, 30, 50, 70, 100, 150, 200, 300, 500, 700, 1000, 1500, 2000, 3000, 5000, 10000]

    df = pd.DataFrame()
    lower_list = []
    upper_list = []

    for i in p:
        n = i
        lower, upper = roc_auc_ci(auc, i, n)
        lower_list.append(lower)
        upper_list.append(upper)

    df['p'] = p
    df['lower'] = lower_list
    df['upper'] = upper_list
    df['auc'] = auc
    
    return df

In [21]:
def auc_95(df):
    x = df['p']
    y_upper = df['upper']
    y_lower = df['lower']
    auc = df['auc']

    # Upper Bound
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=x, y=y_upper,
        mode='lines',
        #line=dict(width=0.5, color='rgb(131, 90, 241)'),
        name='Upper Bound'
    ))

    # Lower Bound
    fig.add_trace(go.Scatter(
        x=x, y=y_lower,
        mode='lines',
        #line=dict(width=0.5, color='rgb(127, 166, 238)'),
        name='Lower Bound'
    ))

    fig.add_trace(go.Scatter(
        x=x, y=auc,
        mode='lines',
        #line=dict(width=0.5, color='rgb(131, 90, 241)'),
        name='AUC'
    ))


    # タイトルと軸ラベルの設定
    fig.update_layout(
        title='AUCの95%信頼区間',
        xaxis_title='データ数',
        yaxis_title='AUC',
        showlegend=True
    )
    fig.update_xaxes(type="log")

    fig.show()
    
    return



In [22]:
df = make_df(0.7)
auc_95(df)