# AUC

In [None]:
from sklearn.metrics import roc_auc_score
from math import sqrt

In [None]:
auc = roc_auc_score(y_true, y_score)

### AUCの95%信頼区間

* https://ushitora.net/archives/800
* https://www.researchgate.net/publication/16134792_The_Meaning_and_Use_of_the_Area_Under_a_Receiver_Operating_Characteristic_ROC_Curve

AUCの下限，下側信頼限界とpositiveの個数との関係性を可視化
* AUCを0.7，0.75，0.8，0.85, 0.9固定でpositiveのデータ数との関係を可視化
* データ数固定10，20，５０，100でのAUCの下限を可視化

In [8]:
def roc_auc_ci(auc, num_positive, num_negative):
    N1 = num_positive
    N2 = num_negative
    Q1 = auc / (2 - auc)
    Q2 = 2*auc**2 / (1 + auc)
    SE_auc = sqrt((auc*(1 - auc) + (N1 - 1)*(Q1 - auc**2) + (N2 - 1)*(Q2 - auc**2)) / (N1*N2))
    lower = auc - 1.96*SE_auc
    upper = auc + 1.96*SE_auc
    if lower < 0:
        lower = 0
    if upper > 1:
        upper = 1

    #print(f'上限:{round(lower, 3)}')
    #print(f'下限:{round(upper, 3)}')
    return lower, upper

In [4]:
p = 100
n = 100
auc = 0.7

lower, upper = roc_auc_ci(auc, p, n)

上限:0.628
下限:0.772


### AUCとデータ数との関係の可視化

In [5]:
import plotly.express as px
import pandas as pd

In [42]:
def make_df(auc):
    p = [5, 10, 20, 30, 50, 70, 100, 150, 200, 300, 500, 700, 1000, 1500, 2000, 3000, 5000, 10000]

    df = pd.DataFrame()
    lower_list = []
    upper_list = []

    for i in p:
        n = i
        lower, upper = roc_auc_ci(auc, i, n)
        lower_list.append(lower)
        upper_list.append(upper)

    df['p'] = p
    df['lower'] = lower_list
    df['upper'] = upper_list
    df['auc'] = auc
    
    return df

In [51]:
import plotly.graph_objects as go

def auc_95(df):
    x = df['p']
    y_upper = df['upper']
    y_lower = df['lower']
    auc = df['auc']

    # Upper Bound
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=x, y=y_upper,
        mode='lines',
        #line=dict(width=0.5, color='rgb(131, 90, 241)'),
        name='Upper Bound'
    ))

    # Lower Bound
    fig.add_trace(go.Scatter(
        x=x, y=y_lower,
        mode='lines',
        #line=dict(width=0.5, color='rgb(127, 166, 238)'),
        name='Lower Bound'
    ))

    fig.add_trace(go.Scatter(
        x=x, y=auc,
        mode='lines',
        #line=dict(width=0.5, color='rgb(131, 90, 241)'),
        name='AUC'
    ))


    # タイトルと軸ラベルの設定
    fig.update_layout(
        title='AUCの95%信頼区間',
        xaxis_title='データ数',
        yaxis_title='AUC',
        showlegend=True
    )
    fig.update_xaxes(type="log")

    fig.show()
    
    return



In [52]:
df = make_df(0.7)
auc_95(df)

In [53]:
df = make_df(0.75)
auc_95(df)

In [54]:
df = make_df(0.8)
auc_95(df)

In [55]:
df = make_df(0.85)
auc_95(df)

In [56]:
df = make_df(0.9)
auc_95(df)