In [19]:
from sklearn.datasets import load_breast_cancer
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
def classification_metrics(target, pred):
    tn, fp, fn, tp = metrics.confusion_matrix(target, pred).ravel()
    acc = (tp + tn) / (tn + fp + fn + tp)
    sen = tp / (tp + fn)
    spc = tn / (tn + fp)
    prc = tp / (tp + fp)
    return acc, sen, spc, prc

In [4]:
data = load_breast_cancer()
X, y = data['data'], data['target']
X.shape, y.shape

((569, 30), (569,))

In [20]:
rep = 100
result_df = pd.DataFrame(columns=['acc', 'sen', 'spc', 'prc', 'auc'])

for i in range(rep):

    X_train, X_test, y_train, y_test = train_test_split(X, y)
    model = GaussianNB()
    
    # Train the model via train data
    model.fit(X_train, y_train);
    
    # Test the model via test data
    y_hat = model.predict(X_test)
    prob = model.predict_proba(X_test)[:,1]
    
    # Calcaulte metrics via test data
    acc, sen, spc, prc = classification_metrics(y_test, y_hat)
    auc = metrics.roc_auc_score(y_test, prob)

    # add new row to result_df
    result_df.loc[len(result_df)] = [acc, sen, spc, prc, auc]

In [21]:
result_df

Unnamed: 0,acc,sen,spc,prc,auc
0,0.937063,0.954023,0.910714,0.943182,0.991379
1,0.944056,0.977528,0.888889,0.935484,0.989804
2,0.937063,0.967033,0.884615,0.936170,0.985207
3,0.923077,0.975000,0.857143,0.896552,0.980556
4,0.916084,0.947917,0.851064,0.928571,0.989805
...,...,...,...,...,...
95,0.972028,0.966667,0.981132,0.988636,0.998742
96,0.951049,0.941176,0.960000,0.955224,0.993137
97,0.965035,0.989247,0.920000,0.958333,0.995054
98,0.965035,1.000000,0.912281,0.945055,0.997756


In [25]:
result_df.mean()

acc    0.938392
sen    0.965427
spc    0.893740
prc    0.937959
auc    0.987838
dtype: float64

In [26]:
result_df.std()

acc    0.019007
sen    0.020521
spc    0.041052
prc    0.024904
auc    0.006715
dtype: float64