# PR曲線 AUC

クラスの不均衡が大きく，偽陽性率の変化が重要でない状況で使う．0から1の値を取り1に近いほど良いモデル

In [12]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import plotly.graph_objs as go
from math import sqrt


from sklearn.metrics import precision_recall_curve, auc

#### データの用意

In [13]:
iris = load_iris()
X = iris.data
y = (iris.target == 0).astype(int)  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [14]:
model = LogisticRegression()
model.fit(X_train, y_train)

y_prob = model.predict_proba(X_test)[:, 1]

#### PR AUCの出力

In [15]:
precision, recall, thresholds = precision_recall_curve(y_test, y_prob)

pr_auc = auc(recall, precision)
print(pr_auc)

0.9999999999999999


In [20]:
trace0 = go.Scatter(x=recall, y=precision, mode='lines', name=f'PR curve (area = {pr_auc:.2f})')
layout = go.Layout(
    title='Precision Recall',
    xaxis=dict(title='Precision'),
    yaxis=dict(title='Recall'),
    showlegend=True
)
fig = go.Figure(data=trace0, layout=layout)
fig.show()