In [15]:
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
def classification_metrics(target, pred):
    tn, fp, fn, tp = metrics.confusion_matrix(target, pred).ravel()
    acc = (tp + tn) / (tn + fp + fn + tp)
    sen = tp / (tp + fn)
    spc = tn / (tn + fp)
    prc = tp / (tp + fp)
    return acc, sen, spc, prc

In [3]:
data = load_breast_cancer()
X, y = data['data'], data['target']
X.shape, y.shape

((569, 30), (569,))

In [4]:
X_norm = (X - X.mean(axis=0)) / X.std(axis=0)

In [6]:
rep = 100
result_df = pd.DataFrame(columns=['acc', 'sen', 'spc', 'prc'])

for i in range(rep):

    X_train, X_test, y_train, y_test = train_test_split(X_norm, y)
    model = RandomForestClassifier()
    
    # Train the model via train data
    model.fit(X_train, y_train);
    
    # Test the model via test data
    y_hat = model.predict(X_test)
    
    # Calcaulte metrics via test data
    acc, sen, spc, prc = classification_metrics(y_test, y_hat)

    # add new row to result_df
    result_df.loc[len(result_df)] = [acc, sen, spc, prc]

In [9]:
result_df

Unnamed: 0,acc,sen,spc,prc
0,0.951049,0.978261,0.901961,0.947368
1,0.951049,0.989011,0.884615,0.937500
2,0.972028,0.978022,0.961538,0.978022
3,0.951049,0.936170,0.979592,0.988764
4,0.972028,0.964286,0.983051,0.987805
...,...,...,...,...
95,0.944056,0.976190,0.898305,0.931818
96,0.944056,0.964286,0.915254,0.941860
97,0.965035,0.945055,1.000000,1.000000
98,0.958042,0.958333,0.957447,0.978723


In [10]:
result_df.mean()

acc    0.960979
sen    0.971070
spc    0.944310
prc    0.967423
dtype: float64

In [11]:
result_df.std()

acc    0.014168
sen    0.017092
spc    0.032494
prc    0.019124
dtype: float64

In [18]:
importance = model.feature_importances_
feature_ranks = np.argsort(importance)[::-1]
feature_ranks[0:5]

array([22,  7, 27, 20, 23])