In [1]:
import os
import sys
import argparse
import numpy as np
import sklearn.model_selection
import sklearn.metrics
import csv
from glob import glob

In [2]:
import util
from linear_classifier import LinearClassifier
from sil import SIL

In [3]:
#haven't changed this yet
class ResultsReport:
    def __init__(self,label_names=None):
        self.res = {}
        self.label_names = label_names
    def add(self,metric,result):
        if metric not in self.res:
            self.res[metric] = []
        self.res[metric].append( result )
    def print_summary(self,metric=None):
        if metric is None:
            for metric in sorted(self.res.keys()):
                if metric != 'confusion':
                    self.print_summary(metric)
            self.print_summary('confusion')
            return
        if metric != 'confusion':
            mean = np.mean(self.res[metric])
            std = np.std(self.res[metric])
            ste = std/np.sqrt(len(self.res[metric])-1)
            print('%s %f %f %f' % (metric,mean,std,ste) )
        else:
            print('confusion')
            print(('%s '*len(self.label_names))%tuple(self.label_names))
            print(sum(self.res['confusion']))

In [None]:
#python run_mi_classify.py -o BreaKHis200/ -m vgg16 -l block4_pool --cat tumor --cv-fold-files fold* --pool-size 5 --mi median

In [4]:
#description='Compute CNN features.'

#required parameters
out_dir = os.path.join(os.getcwd(), 'BreaKHis_curr/')  #'-o', required=True, help='output directory'
model_name = 'vgg16' #'-m', required=True, help='CNN model'
layer = 'block4_pool' #'-l', required=True, help='CNN layer.'

#if only 3 parameters are used:
#python run_mi_classify.py -o BreaKHis200/ -m vgg16 -l block4_pool_p5

#optional parameters 
pool_size = '5' #'--pool-size', '-p', help='mean pooling size'
metric = 'accuracy' #'--metric', help='metric to optimize during parameter search (accuracy, balanced_accuracy, roc_auc); default: accuracy'
calibrate = False #'--calibrate', action='store_true', help='calibrate classifier (True or False); default: False'
classifier = 'svm' #'--classifier', '-c', help='classifier (svm or logistic); default: all'
kernel = 'linear' #'--kernel', help='SVM kernel; default: linear' 
mi_type = 'median' #'--mi', help='MI type (none, median, quantile); default: none (compute mean across images)' 

#not defined
instance_size = None #'--instance-size', help='instance size' 
instance_stride = None #'--instance-stride', help='instance stride'
cv_fold_files = None #'--cv-fold-files', help='cross-validation fold files; default: None'
cv_folds = None #'--cv-folds', help='cross-validation folds; default: None'
cv_lno = None #'--cv-lno', help='cross-validation leave n out; default: None'
n_jobs = '0' #'--n-jobs', help='number of parallel threads; default: 0'
group = None #'--group', help='Class groups for reporting results'
quantiles = '16' #'--quantiles', '-q', help='Number of quantiles; default: 16'
sample_weight = None #'--sample-weight', help='Weight samples by classification category and this one'
categories = None #'--cat', help='label categories to train (comma separated, tumor); default: tumor,tumor_type,benign_type,malignant_type'

In [19]:
# load filenames and labels
sample_images = util.load_sample_images(out_dir)
samples,cats,labels_before = util.load_labels(out_dir)

In [21]:
#print(type(sample_images))
#print(len(sample_images.keys()))
#print(sample_images.keys())
#sample_images['SOB_B_TA-14-13200-200-006']

#print(samples)
#print(len(samples))
#print()
#print(cats)
#print()
#print(labels_before)

[['M' 'DC' '' 'DC']
 ['M' 'DC' '' 'DC']
 ['M' 'DC' '' 'DC']
 ...
 ['B' 'TA' 'TA' '']
 ['B' 'TA' 'TA' '']
 ['B' 'TA' 'TA' '']]


In [11]:
#populate optional features
if sample_weight is not None:
    # get labels for sample_weight category
    c = np.where(cats==sample_weight)[0][0]
    ln = np.unique([l[c] for l in labels_before])
    ln.sort()
    ln = list(ln)
    if '' in ln:
        del ln[ln.index('')]
    label_names_sw = ln
    labels_sw = np.array([ ln.index(l) if l in ln else -1 for l in labels_before[:,c] ])
    
if group is not None:
    # get labels for group category
    if group == sample_weight:
        label_names_group = label_names_sw
        labels_group = labels_sw
    else:
        c = np.where(cats==group)[0][0]
        ln = np.unique([l[c] for l in labels_before])
        ln.sort()
        ln = list(ln)
        if '' in ln:
            del ln[ln.index('')]
        label_names_group = ln
        labels_group = np.array([ ln.index(l) if l in ln else -1 for l in labels_before[:,c] ])

In [22]:
#create labels and categories
labels_none = []
labels_tumor = []
labels = []
categories = 'tumor'

if categories is None:
    # get labels for all categories
    label_names = []
    new_labels = np.zeros(labels_before.shape,dtype='int')
    
    for c,cat in enumerate(cats):
        ln = np.unique([l[c] for l in labels_before])
        ln.sort()
        ln = list(ln)
        
        label_names.append(ln)
        
        new_labels[:,c] = [ln.index(l) for l in labels_before[:,c]]

    #labels_none = new_labels
    labels = new_labels 
    
else:
    # get labels for list of categories: tumor,tumor_type,benign_type,malignant_type
    label_names = []
    cats = categories.split(',')
    new_labels = np.zeros((labels_before.shape[0],len(cats)),dtype='int')
    
    for i,cat in enumerate(cats):
        if len(cats) > 1:
            c = np.where(cats==cat)[0][0]
        else:
            c = 0
        ln = np.unique([l[c] for l in labels_before])
        ln.sort()
        ln = list(ln)
        
        if '' in ln:
            del ln[ln.index('')]
        label_names.append(ln)
        
        new_labels[:,i] = np.array([ln.index(l) if l in ln else -1 for l in labels_before[:,c]])
        
    #labels_tumor = new_labels
    labels = new_labels
    cats = categories

In [23]:
labels

array([[1],
       [1],
       [1],
       ...,
       [0],
       [0],
       [0]])

In [25]:
# read in CNN features
feats = {}
for sample,imagelist in sample_images.items():
    feats[sample] = []
    for fn in imagelist:
        feat_fn = out_dir+fn[:fn.rfind('.')]+'_'+model_name+'-'+layer
        if pool_size is not None:
            feat_fn += '_p'+str(pool_size)
        if instance_size is not None:
            feat_fn += '_i'+str(instance_size)
        if instance_stride is not None:
            feat_fn += '-'+str(instance_stride)
        feat_fn += '.npy'
        feat = np.load(feat_fn)
        if len(feat) == 0:
            continue
        feats[sample].append( feat )

    print('%s %d'%(sample,len(feats[sample])))
    feats[sample] = np.concatenate(feats[sample],axis=0)
    if len(feats[sample].shape) == 1:
        feats[sample] = feats[sample].reshape((1,len(feats[sample])))
            
    # compute mean if needed
    if mi_type is None or mi_type.lower() == 'none':
        if len(feats[sample].shape) > 1:
            feats[sample] = feats[sample].mean(axis=0)

SOB_M_DC-14-16716-200-029 1
SOB_M_DC-14-16716-200-034 1
SOB_M_DC-14-16716-200-031 1
SOB_M_DC-14-16716-200-024 1
SOB_M_DC-14-16716-200-007 1
SOB_M_DC-14-16716-200-015 1
SOB_M_DC-14-16716-200-028 1
SOB_M_DC-14-16716-200-013 1
SOB_M_DC-14-16716-200-017 1
SOB_M_DC-14-16716-200-003 1
SOB_M_DC-14-16716-200-012 1
SOB_M_DC-14-16716-200-020 1
SOB_M_DC-14-16716-200-019 1
SOB_M_DC-14-16716-200-027 1
SOB_M_DC-14-16716-200-004 1
SOB_M_DC-14-16716-200-006 1
SOB_M_DC-14-16716-200-022 1
SOB_M_DC-14-16716-200-032 1
SOB_M_DC-14-16716-200-026 1
SOB_M_DC-14-16716-200-030 1
SOB_M_DC-14-16716-200-016 1
SOB_M_DC-14-16716-200-021 1
SOB_M_DC-14-16716-200-005 1
SOB_M_DC-14-16716-200-002 1
SOB_M_DC-14-16716-200-008 1
SOB_M_DC-14-16716-200-014 1
SOB_M_DC-14-16716-200-033 1
SOB_M_DC-14-16716-200-025 1
SOB_M_DC-14-16716-200-009 1
SOB_M_DC-14-16716-200-001 1
SOB_M_DC-14-16716-200-010 1
SOB_M_DC-14-16716-200-023 1
SOB_M_DC-14-16716-200-011 1
SOB_M_DC-14-16716-200-018 1
SOB_M_DC-14-11951-200-019 1
SOB_M_DC-14-11951-20

SOB_M_DC-14-9461-200-058 1
SOB_M_DC-14-9461-200-039 1
SOB_M_DC-14-9461-200-009 1
SOB_M_DC-14-9461-200-068 1
SOB_M_DC-14-9461-200-063 1
SOB_M_DC-14-9461-200-030 1
SOB_M_DC-14-9461-200-070 1
SOB_M_DC-14-9461-200-007 1
SOB_M_DC-14-9461-200-065 1
SOB_M_DC-14-9461-200-064 1
SOB_M_DC-14-9461-200-067 1
SOB_M_DC-14-9461-200-073 1
SOB_M_DC-14-9461-200-072 1
SOB_M_DC-14-9461-200-054 1
SOB_M_DC-14-9461-200-025 1
SOB_M_DC-14-9461-200-033 1
SOB_M_DC-14-9461-200-046 1
SOB_M_DC-14-9461-200-024 1
SOB_M_DC-14-9461-200-055 1
SOB_M_DC-14-9461-200-069 1
SOB_M_DC-14-9461-200-071 1
SOB_M_DC-14-9461-200-031 1
SOB_M_DC-14-9461-200-006 1
SOB_M_DC-14-9461-200-047 1
SOB_M_DC-14-9461-200-016 1
SOB_M_DC-14-9461-200-066 1
SOB_M_DC-14-9461-200-036 1
SOB_M_DC-14-9461-200-017 1
SOB_M_DC-14-9461-200-043 1
SOB_M_DC-14-9461-200-048 1
SOB_M_DC-14-9461-200-051 1
SOB_M_DC-14-9461-200-049 1
SOB_M_DC-14-9461-200-002 1
SOB_M_DC-14-9461-200-045 1
SOB_M_DC-14-9461-200-022 1
SOB_M_DC-14-9461-200-044 1
SOB_M_DC-14-9461-200-038 1
S

SOB_M_DC-14-14926-200-013 1
SOB_M_DC-14-14926-200-002 1
SOB_M_DC-14-14926-200-009 1
SOB_M_DC-14-14926-200-018 1
SOB_M_DC-14-14926-200-011 1
SOB_M_DC-14-14926-200-012 1
SOB_M_DC-14-14926-200-004 1
SOB_M_DC-14-14926-200-005 1
SOB_M_DC-14-17915-200-007 1
SOB_M_DC-14-17915-200-011 1
SOB_M_DC-14-17915-200-010 1
SOB_M_DC-14-17915-200-009 1
SOB_M_DC-14-17915-200-017 1
SOB_M_DC-14-17915-200-013 1
SOB_M_DC-14-17915-200-001 1
SOB_M_DC-14-17915-200-016 1
SOB_M_DC-14-17915-200-021 1
SOB_M_DC-14-17915-200-019 1
SOB_M_DC-14-17915-200-015 1
SOB_M_DC-14-17915-200-018 1
SOB_M_DC-14-17915-200-006 1
SOB_M_DC-14-17915-200-012 1
SOB_M_DC-14-17915-200-003 1
SOB_M_DC-14-17915-200-004 1
SOB_M_DC-14-17915-200-020 1
SOB_M_DC-14-17915-200-005 1
SOB_M_DC-14-17915-200-002 1
SOB_M_DC-14-17915-200-022 1
SOB_M_DC-14-17915-200-014 1
SOB_M_DC-14-2523-200-023 1
SOB_M_DC-14-2523-200-021 1
SOB_M_DC-14-2523-200-030 1
SOB_M_DC-14-2523-200-019 1
SOB_M_DC-14-2523-200-027 1
SOB_M_DC-14-2523-200-022 1
SOB_M_DC-14-2523-200-018 1

SOB_M_PC-14-19440-200-027 1
SOB_M_PC-14-15687B-200-007 1
SOB_M_PC-14-15687B-200-001 1
SOB_M_PC-14-15687B-200-005 1
SOB_M_PC-14-15687B-200-013 1
SOB_M_PC-14-15687B-200-004 1
SOB_M_PC-14-15687B-200-010 1
SOB_M_PC-14-15687B-200-009 1
SOB_M_PC-14-15687B-200-012 1
SOB_M_PC-14-15687B-200-006 1
SOB_M_PC-14-15687B-200-008 1
SOB_M_PC-14-15687B-200-014 1
SOB_M_PC-14-15687B-200-011 1
SOB_M_PC-14-15687B-200-003 1
SOB_M_PC-14-15687B-200-002 1
SOB_M_PC-14-9146-200-017 1
SOB_M_PC-14-9146-200-001 1
SOB_M_PC-14-9146-200-012 1
SOB_M_PC-14-9146-200-010 1
SOB_M_PC-14-9146-200-004 1
SOB_M_PC-14-9146-200-020 1
SOB_M_PC-14-9146-200-006 1
SOB_M_PC-14-9146-200-008 1
SOB_M_PC-14-9146-200-014 1
SOB_M_PC-14-9146-200-007 1
SOB_M_PC-14-9146-200-009 1
SOB_M_PC-14-9146-200-021 1
SOB_M_PC-14-9146-200-019 1
SOB_M_PC-14-9146-200-002 1
SOB_M_PC-14-9146-200-015 1
SOB_M_PC-14-9146-200-018 1
SOB_M_PC-14-9146-200-003 1
SOB_M_PC-14-9146-200-011 1
SOB_M_PC-14-9146-200-013 1
SOB_M_PC-14-9146-200-005 1
SOB_M_PC-14-9146-200-016 1

SOB_B_F-14-23060CD-200-008 1
SOB_B_F-14-23060CD-200-006 1
SOB_B_F-14-23060CD-200-012 1
SOB_B_F-14-23060CD-200-009 1
SOB_B_F-14-23060CD-200-007 1
SOB_B_F-14-23060CD-200-005 1
SOB_B_F-14-23060CD-200-014 1
SOB_B_PT-14-22704-200-018 1
SOB_B_PT-14-22704-200-006 1
SOB_B_PT-14-22704-200-005 1
SOB_B_PT-14-22704-200-013 1
SOB_B_PT-14-22704-200-019 1
SOB_B_PT-14-22704-200-024 1
SOB_B_PT-14-22704-200-025 1
SOB_B_PT-14-22704-200-004 1
SOB_B_PT-14-22704-200-017 1
SOB_B_PT-14-22704-200-014 1
SOB_B_PT-14-22704-200-039 1
SOB_B_PT-14-22704-200-029 1
SOB_B_PT-14-22704-200-040 1
SOB_B_PT-14-22704-200-011 1
SOB_B_PT-14-22704-200-035 1
SOB_B_PT-14-22704-200-003 1
SOB_B_PT-14-22704-200-033 1
SOB_B_PT-14-22704-200-007 1
SOB_B_PT-14-22704-200-008 1
SOB_B_PT-14-22704-200-022 1
SOB_B_PT-14-22704-200-016 1
SOB_B_PT-14-22704-200-032 1
SOB_B_PT-14-22704-200-020 1
SOB_B_PT-14-22704-200-028 1
SOB_B_PT-14-22704-200-041 1
SOB_B_PT-14-22704-200-030 1
SOB_B_PT-14-22704-200-010 1
SOB_B_PT-14-22704-200-012 1
SOB_B_PT-14-2

In [30]:
a = feats['SOB_M_DC-14-16716-200-003']
print(a.shape)
print(type(a))
print(a)

(40, 512)
<class 'numpy.ndarray'>
[[ 0.         4.19619   34.772602  ...  4.3456087  0.         6.581708 ]
 [ 0.         8.693751  54.662357  ...  3.2309723 33.961994   8.792187 ]
 [ 0.         0.        67.93502   ...  4.7455807  0.         6.8598924]
 ...
 [ 0.         0.        34.54459   ... 10.180523   0.         1.4408265]
 [ 0.         2.3449    16.676172  ...  2.2370353  0.         0.       ]
 [ 0.         0.        25.82337   ...  0.         0.         9.088432 ]]


In [59]:
def load_cv_files(out_dir, samples, cv_fold_files):

    cv_files = sorted(list(glob(out_dir + cv_fold_files)))

    idx_train_test = []
    for fn in cv_files:

        #fn is the fold name
        f = np.loadtxt(fn, dtype=str, delimiter=',')
        #print(type(f))

        idx_train = np.where(f[:,1]=='train')[0]
        idx_test = np.where(f[:,1]=='test')[0]

        #print(len(idx_train))
        #print(len(idx_test))

        name_train = f[idx_train,0]
        name_test = f[idx_test,0]

        #print(len(name_train))
        #print(len(name_test))

        idx_train = np.array([np.where(samples == name.split("/")[-1])[0] for name in name_train]).flatten()
        idx_test = np.array([np.where(samples == name.split("/")[-1])[0] for name in name_test]).flatten()

        #print(len(idx_train))
        #print(len(idx_test))

        #print(idx_train.shape)
        #print(idx_test.shape)
        
        idx_train_test.append([idx_train,idx_test])
    
    return idx_train_test

array([1402])

In [32]:
#get the list of cv_fold_files

cv_fold_files = 'fold*'
#print(cv_fold_files)

# build train/test sets
if cv_fold_files is not None:
    idx_train_test = load_cv_files(out_dir, samples, cv_fold_files)
    print(idx_train_test)
    
elif cv_folds is not None or cv_lno is not None:
    if cv_folds is not None:
        cv_folds = int(cv_folds)
    else:
        cv_lno = int(cv_lno)
        if cv_folds is None:
            cv_folds = len(samples) // cv_lno
    idx = np.arange(len(samples))
    if len(label_names) == 1:
        if cv_lno == 1:
            skf = sklearn.model_selection.LeaveOneOut()
        else:
            skf = sklearn.model_selection.StratifiedKFold( n_splits=cv_folds, shuffle=True )
        idx_train_test = list(skf.split(idx,labels[:,0]))
    else:
        # merge label categories to do stratified folds
        skf = sklearn.model_selection.StratifiedKFold( n_splits=cv_folds, shuffle=True )
        la_all = np.array(labels[:,0])
        p = 1
        for i in range(labels.shape[1]):
            la_all += labels[:,i] * p
            p *= len(label_names[i])
        idx_train_test = list(skf.split(idx,la_all))
else:
    print('Error: train/test split not specified')
    sys.exit(1)

options = {}
if kernel is not None:
    options['kernel'] = kernel
else:
    options['kernel'] = 'linear'
if classifier is not None:
    options['classifier'] = classifier
if mi_type is not None:
    options['predict_type'] = mi_type
if metric is not None:
    options['metric'] = metric    

/home/vibha/Documents/Research/project2/ImageMIL/BreaKHis_curr/fold0.txt
/home/vibha/Documents/Research/project2/ImageMIL/BreaKHis_curr/fold1.txt
/home/vibha/Documents/Research/project2/ImageMIL/BreaKHis_curr/fold2.txt
/home/vibha/Documents/Research/project2/ImageMIL/BreaKHis_curr/fold3.txt
/home/vibha/Documents/Research/project2/ImageMIL/BreaKHis_curr/fold4.txt
[[array([], dtype=int64), array([], dtype=int64)], [array([], dtype=int64), array([], dtype=int64)], [array([], dtype=int64), array([], dtype=int64)], [array([], dtype=int64), array([], dtype=int64)], [array([], dtype=int64), array([], dtype=int64)]]


In [107]:
idx_train_test

[[array([], dtype=int64), array([], dtype=int64)],
 [array([], dtype=int64), array([], dtype=int64)],
 [array([], dtype=int64), array([], dtype=int64)],
 [array([], dtype=int64), array([], dtype=int64)],
 [array([], dtype=int64), array([], dtype=int64)]]

In [104]:
#handle the case of only one category
#cats = 'tumor'
if cats == str:
    print('string category')
    cats = [cats]

for c,cat_name in enumerate(cats):
        res = ResultsReport(label_names[c])
        #print(res)
        nfolds = len(idx_train_test)
        #print(nfolds)

        for f,(idx_train,idx_test) in enumerate(idx_train_test):
            #print(f)
            #print(idx_train)
            #print(idx_test)
            print('Fold '+str(f+1)+'/'+str(len(idx_train_test)))

            idx_train = idx_train[np.where(labels[idx_train,c]!=-1)[0]]
            idx_test = idx_test[np.where(labels[idx_test,c]!=-1)[0]]
            X_train = [feats[samples[i]] for i in idx_train ]
            y_train = labels[idx_train,c]
            X_test = [feats[samples[i]] for i in idx_test ]
            y_test = labels[idx_test,c]

            if sample_weight is not None:
                # figure out sample weights
                print('Weighting by '+sample_weight)
                # discard samples missing a label for sample_weight category
                idx_train = idx_train[np.where(labels_sw[idx_train]!=-1)[0]]
                X_train = [ feats[samples[i]] for i in idx_train ]
                
                y_train = labels[idx_train,c]
                y_sw = y_train + len(label_names[c])*labels_sw[idx_train]

                uniq = np.unique(y_sw).tolist()
                counts = np.array([ (y_sw==l).sum() for l in uniq ])
                counts = counts.sum().astype(float) / ( counts * len(counts) )
                sw = np.array([ counts[uniq.index(y)] for y in y_sw ])
            else:
                sw = None

            if mi_type is None:
                model = LinearClassifier( n_jobs=n_jobs, **options )
                model.fit( X_train, y_train, calibrate=calibrate, param_search=True, sample_weight=sw )
            elif mi_type in ['median','max']:
                model = SIL( n_jobs=n_jobs, **options )
                model.fit( X_train, y_train, calibrate=calibrate, param_search=True, sample_weight=sw )
            elif mi_type == 'quantile':
                if quantiles is not None:
                    options['quantiles'] = int(quantiles)
                model = SIL( n_jobs=n_jobs, **options )
                model.fit( X_train, y_train, calibrate=calibrate, param_search=True, sample_weight=sw )
                
            p_predict = model.predict( X_test )
            y_predict = np.argmax(p_predict,axis=1)
            acc = sklearn.metrics.accuracy_score( y_test, y_predict )
            if len(y_test) == 1:
                auc = 0.0
            elif len(np.unique(y_train)) == 2:
                auc = sklearn.metrics.roc_auc_score( y_test, p_predict[:,1] )
            else:
                auc = 0.0
                for i in range(p_predict.shape[1]):
                    auc += sklearn.metrics.roc_auc_score( y_test==i, p_predict[:,i] )
                auc /= p_predict.shape[1]
            kappa = sklearn.metrics.cohen_kappa_score( y_test, y_predict )
            classes = np.unique(y_train)
            np.sort(classes)
            confusion = sklearn.metrics.confusion_matrix( y_test, y_predict, labels=classes )
            res.add('acc',acc)
            res.add('auc',auc)
            res.add('kappa',kappa)
            if len(label_names[c]) == 2:
                res.add('sensitivity', float( np.logical_and(y_test==1, y_predict==y_test).sum() ) / (y_test==1).sum() )
                res.add('specificity', float( np.logical_and(y_test!=1, y_predict==y_test).sum() ) / (y_test!=1).sum() )
            res.add('confusion',confusion)

            print('accuracy %f auc %f' % (acc,auc))
            print(confusion)

            if group is not None:
                # within group class metrics
                l_group = labels_group[idx_test]
                uniq = np.unique(l_group)
                uniq.sort()
                for u in uniq:
                    if u == -1:
                        continue
                    idx = (l_group==u)

                    group_name = '(%s=%s)'%(group,label_names_group[u])
                    res.add('accuracy '+group_name,sklearn.metrics.accuracy_score( y_test[idx], y_predict[idx] ))
                    if len(np.unique(y_train)) == 2:
                        if (y_test[idx]==0).sum() == 0 or (y_test[idx]==1).sum() == 0:
                            auc = 0
                        else:
                            auc = sklearn.metrics.roc_auc_score( y_test[idx], p_predict[idx,1] )
                    else:
                        auc = 0.0
                        for i in range(p_predict.shape[1]):
                            auc += sklearn.metrics.roc_auc_score( y_test[idx]==i, p_predict[idx,i] )
                        auc /= p_predict.shape[1]
                    res.add('auc '+group_name,auc)
                    res.add('kappa '+group_name,sklearn.metrics.cohen_kappa_score( y_test[idx], y_predict[idx] ) )
                    if len(label_names[c]) == 2:
                        res.add('sensitivity '+group_name,float( np.logical_and(y_test[idx]==1, y_predict[idx]==y_test[idx]).sum() ) / (y_test[idx]==1).sum() )
                        res.add('specificity '+group_name,float( np.logical_and(y_test[idx]!=1, y_predict[idx]==y_test[idx]).sum() ) / (y_test[idx]!=1).sum() )
            
        print('Cross-validation results')
        res.print_summary()


0
[]
[]
Fold 1/5
Cross-validation results
confusion
B M 


KeyError: 'confusion'

In [89]:

list_c = []
list_c.append(cats_test)
print(list_c)

['tumor']


In [91]:
cats

'tumor'

In [80]:
cats_l = ['tumor']