In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

data = pd.read_csv('./BreastCancer_predict.txt', sep='\t')
data.head()

Unnamed: 0,y_true,prediction_probability
0,malignant,1.0
1,benign,0.0
2,malignant,1.0
3,benign,0.0
4,benign,0.0


In [38]:
y_test = data['y_true'].map(lambda x: 1 if x == 'malignant' else 0)
y_pred = data['prediction_probability']

In [39]:
fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=1)
roc_auc = auc(fpr, tpr)

# 画ROC曲线
plt.plot(fpr, tpr, '-', color='b', label='ROC(area={:3f})'.format(auc(fpr, tpr)), lw=2)
plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Random Chance')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.title('ROC curve')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.legend()
#plt.show()
plt.savefig('ROC.png')
plt.close()


In [40]:
from sklearn.metrics import precision_recall_curve

# 画PR曲线
precision, recall, thresholds = precision_recall_curve(y_test, y_pred, pos_label=1)
plt.plot(recall, precision, '-', color='b', lw=2, label='PRC(area={:3f})'.format(auc(recall, precision)))
plt.plot([0, 1], [1, 0], '--', color=(0.6, 0.6, 0.6), label='Random Chance')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.title('Precision-Recall curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend(loc='lower right')
#plt.show()
plt.savefig('PR.png')
plt.close()