In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
def classification_metrics(target, pred):
    tn, fp, fn, tp = metrics.confusion_matrix(target, pred).ravel()
    acc = (tp + tn) / (tn + fp + fn + tp)
    sen = tp / (tp + fn)
    spc = tn / (tn + fp)
    prc = tp / (tp + fp)
    return acc, sen, spc, prc

In [3]:
data = load_breast_cancer()
X, y = data['data'], data['target']
X.shape, y.shape

((569, 30), (569,))

In [20]:
X_norm = (X - X.mean(axis=0)) / X.std(axis=0)

In [21]:
rep = 100
result_df = pd.DataFrame(columns=['acc', 'sen', 'spc', 'prc', 'auc'])

for i in range(rep):

    X_train, X_test, y_train, y_test = train_test_split(X_norm, y)
    model = LogisticRegression()
    
    # Train the model via train data
    model.fit(X_train, y_train);
    
    # Test the model via test data
    y_hat = model.predict(X_test)
    prob = model.predict_proba(X_test)[:,1]
    
    # Calcaulte metrics via test data
    acc, sen, spc, prc = classification_metrics(y_test, y_hat)
    auc = metrics.roc_auc_score(y_test, prob)

    # add new row to result_df
    result_df.loc[len(result_df)] = [acc, sen, spc, prc, auc]

In [22]:
result_df

Unnamed: 0,acc,sen,spc,prc,auc
0,0.993007,1.000000,0.984127,0.987654,0.999802
1,0.986014,1.000000,0.971014,0.973684,0.999217
2,0.965035,0.977528,0.944444,0.966667,0.996879
3,0.986014,1.000000,0.963636,0.977778,0.990496
4,0.965035,0.988889,0.924528,0.956989,0.993082
...,...,...,...,...,...
95,0.979021,1.000000,0.943396,0.967742,0.992872
96,0.979021,0.965116,1.000000,1.000000,0.998776
97,0.986014,0.988889,0.981132,0.988889,0.999161
98,0.979021,0.988764,0.962963,0.977778,0.997711


In [23]:
result_df.mean()

acc    0.976993
sen    0.989194
spc    0.956662
prc    0.974487
auc    0.995175
dtype: float64

In [24]:
result_df.std()

acc    0.011214
sen    0.011074
spc    0.025785
prc    0.014653
auc    0.004667
dtype: float64