In [2]:
import pandas as pd
import numpy as np


np.random.seed(1234)
n_samples = 100
df = pd.DataFrame({
    'y_true': np.random.randint(0, 2, size=n_samples),              # 실제 정답 (0 또는 1)
    'y_prob': np.random.randint(0, 101, size=n_samples)             # 예측 확률 (0~100 정수)
})

df.head()

Unnamed: 0,y_true,y_prob
0,1,38
1,1,7
2,0,100
3,1,67
4,0,5


In [3]:
from sklearn.metrics import confusion_matrix, auc

threshold_20 = list(range(95, -5, -5))  # 95, 90, ..., 0

# threshold 별 TPR, FPR 계산
tpr_list = []
fpr_list = []

for threshold in threshold_20:
    df['y_pred'] = (df['y_prob'] >= threshold).astype(int)
    tn, fp, fn, tp = confusion_matrix(df['y_true'], df['y_pred']).ravel()

    tpr = tp / (tp + fn) if (tp + fn) != 0 else 0
    fpr = fp / (fp + tn) if (fp + tn) != 0 else 0

    tpr_list.append(tpr)
    fpr_list.append(fpr)

roc_df = pd.DataFrame({
    'Threshold': threshold_20,
    'TPR': tpr_list,
    'FPR': fpr_list
})

roc_df

Unnamed: 0,Threshold,TPR,FPR
0,95,0.040816,0.078431
1,90,0.061224,0.098039
2,85,0.102041,0.156863
3,80,0.142857,0.313725
4,75,0.183673,0.352941
5,70,0.22449,0.372549
6,65,0.306122,0.392157
7,60,0.408163,0.45098
8,55,0.469388,0.529412
9,50,0.469388,0.568627


In [4]:
roc_auc = auc(fpr_list, tpr_list)
roc_auc

0.41796718687475