In [1]:
import numpy as np
import os
import pickle
from sklearn.model_selection import StratifiedKFold
from helpers.utils import (
    metrics_varying_positive_class_proportion, 
    plot_performance_comparison
)
from pprint import pprint

In [2]:
def generate_score_data(loc, n_pos, n_neg, n_folds, seed):
    scores_pos = np.random.normal(loc=loc[0], scale=2.5, size=n_pos)
    scores_neg = np.random.normal(loc=loc[1], scale=2.5, size=n_neg)
    scores = np.concatenate([scores_pos, scores_neg])
    labels = np.concatenate([np.ones(n_pos, dtype=np.int), np.zeros(n_neg, dtype=np.int)])

    # Stratified cross-validation split
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
    scores_folds = []
    labels_folds = []
    for ind_tr, ind_te in skf.split(scores[:, np.newaxis], labels):
        scores_folds.append(scores[ind_te])
        labels_folds.append(labels[ind_te])
        
    return scores_folds, labels_folds

In [3]:
seed = 123
np.random.seed(seed)
n_folds = 5
n_samples = 5000
n_pos = 2500
n_neg = 2500
loc = (3., -3.)

In [4]:
# Generate scores and labels
scores_folds, labels_folds = generate_score_data(loc, n_pos, n_neg, n_folds, seed)

In [5]:
# Calculating performance metrics for different proportion of positive samples
results_dict = metrics_varying_positive_class_proportion(scores_folds, labels_folds, seed=seed)


Performance metrics for target positive proportion: 0.0050
Fold 1: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Fold 2: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Fold 3: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Fold 4: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Fold 5: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Area under the ROC curve = 0.965000
Average precision = 0.505177
Partial-AUC below fpr 0.0100 = 0.698492
Partial-AUC below fpr 0.0500 = 0.798974
Partial-AUC below fpr 0.1000 = 0.861053
Partial-AUC below fpr 0.2000 = 0.902778
TPR	FPR_target	FPR_actual	TPR_scaled
0.400000	0.001000	0.006000	0.066667
0.400000	0.005000	0.016000	0.125000
0.600000	0.010000	0.024000	0.250000
0.800000	0.050000	0.114000	0.350877
0.800000	0.100000	0.446000	0.179372

Performance metrics fo

Fold 4: Number of positive samples = 445. Target proportion = 0.4450. Actual proportion = 0.4450
Fold 5: Number of positive samples = 445. Target proportion = 0.4450. Actual proportion = 0.4450
Area under the ROC curve = 0.951191
Average precision = 0.948393
Partial-AUC below fpr 0.0100 = 0.707752
Partial-AUC below fpr 0.0500 = 0.801441
Partial-AUC below fpr 0.1000 = 0.850266
Partial-AUC below fpr 0.2000 = 0.893670
TPR	FPR_target	FPR_actual	TPR_scaled
0.337079	0.001000	0.002000	0.168539
0.426966	0.005000	0.006000	0.355805
0.501124	0.010000	0.010000	0.501124
0.762921	0.050000	0.050000	0.762921
0.853933	0.100000	0.100000	0.853933

Performance metrics for target positive proportion: 0.5000
Fold 1: Number of positive samples = 500. Target proportion = 0.5000. Actual proportion = 0.5000
Fold 2: Number of positive samples = 500. Target proportion = 0.5000. Actual proportion = 0.5000
Fold 3: Number of positive samples = 500. Target proportion = 0.5000. Actual proportion = 0.5000
Fold 4: Numbe

In [6]:
pprint(results_dict, indent=1)

{'auc': {'CI_lower': [0.83735,
                      0.9243741666666667,
                      0.9340669565217391,
                      0.9379691176470588,
                      0.9400591111111112,
                      0.9420962500000001,
                      0.9436158208955223,
                      0.9443570512820513,
                      0.9457259550561796,
                      0.9477956000000001],
         'CI_upper': [0.99802,
                      0.9746083333333333,
                      0.9689573913043479,
                      0.9673370588235295,
                      0.9671180000000001,
                      0.9653844642857143,
                      0.9650849253731344,
                      0.9643349999999999,
                      0.9629482022471909,
                      0.9608639999999999],
         'median': [0.9650000000000001,
                    0.9532,
                    0.9526347826086956,
                    0.952664705882353,
                    0.95224888888

### Plotting the performance metrics for different methods as a function of positive class proportion

In [7]:
output_dir = '/Users/jayaram/Documents/temp'
methods = ['method1', 'method2', 'method3']
locs = [(3, -3), (2, -3), (2, -2)]

In [8]:
results = dict()
j = 0
for m in methods:
    scores_folds, labels_folds = generate_score_data(locs[j], n_pos, n_neg, n_folds, seed)
    f = os.path.join(output_dir, 'results_{}.pkl'.format(m))
    results[m] = metrics_varying_positive_class_proportion(scores_folds, labels_folds, seed=seed, output_file=f)
    j += 1


Performance metrics for target positive proportion: 0.0050
Fold 1: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Fold 2: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Fold 3: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Fold 4: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Fold 5: Number of positive samples = 5. Target proportion = 0.0050. Actual proportion = 0.0050
Area under the ROC curve = 0.961200
Average precision = 0.439961
Partial-AUC below fpr 0.0100 = 0.678392
Partial-AUC below fpr 0.0500 = 0.790769
Partial-AUC below fpr 0.1000 = 0.850526
Partial-AUC below fpr 0.2000 = 0.892222
TPR	FPR_target	FPR_actual	TPR_scaled
0.200000	0.001000	0.002000	0.100000
0.400000	0.005000	0.014000	0.142857
0.400000	0.010000	0.020000	0.200000
0.800000	0.050000	0.128000	0.312500
0.800000	0.100000	0.448000	0.178571

Performance metrics fo

Fold 4: Number of positive samples = 445. Target proportion = 0.4450. Actual proportion = 0.4450
Fold 5: Number of positive samples = 445. Target proportion = 0.4450. Actual proportion = 0.4450
Area under the ROC curve = 0.951193
Average precision = 0.945334
Partial-AUC below fpr 0.0100 = 0.683248
Partial-AUC below fpr 0.0500 = 0.793535
Partial-AUC below fpr 0.1000 = 0.847309
Partial-AUC below fpr 0.2000 = 0.890911
TPR	FPR_target	FPR_actual	TPR_scaled
0.182022	0.001000	0.002000	0.091011
0.373034	0.005000	0.006000	0.310861
0.458427	0.010000	0.010000	0.458427
0.773034	0.050000	0.050000	0.773034
0.858427	0.100000	0.100000	0.858427

Performance metrics for target positive proportion: 0.5000
Fold 1: Number of positive samples = 500. Target proportion = 0.5000. Actual proportion = 0.5000
Fold 2: Number of positive samples = 500. Target proportion = 0.5000. Actual proportion = 0.5000
Fold 3: Number of positive samples = 500. Target proportion = 0.5000. Actual proportion = 0.5000
Fold 4: Numbe

Fold 2: Number of positive samples = 390. Target proportion = 0.3900. Actual proportion = 0.3900
Fold 3: Number of positive samples = 390. Target proportion = 0.3900. Actual proportion = 0.3900
Fold 4: Number of positive samples = 390. Target proportion = 0.3900. Actual proportion = 0.3900
Fold 5: Number of positive samples = 390. Target proportion = 0.3900. Actual proportion = 0.3900
Area under the ROC curve = 0.916721
Average precision = 0.896345
Partial-AUC below fpr 0.0100 = 0.617575
Partial-AUC below fpr 0.0500 = 0.720999
Partial-AUC below fpr 0.1000 = 0.776451
Partial-AUC below fpr 0.2000 = 0.832614
TPR	FPR_target	FPR_actual	TPR_scaled
0.102564	0.001000	0.002000	0.051282
0.243590	0.005000	0.006000	0.202991
0.297436	0.010000	0.010000	0.297436
0.638462	0.050000	0.050000	0.638462
0.753846	0.100000	0.100000	0.753846

Performance metrics for target positive proportion: 0.4450
Fold 1: Number of positive samples = 445. Target proportion = 0.4450. Actual proportion = 0.4450
Fold 2: Numbe

Area under the ROC curve = 0.866389
Average precision = 0.790454
Partial-AUC below fpr 0.0100 = 0.565686
Partial-AUC below fpr 0.0500 = 0.648718
Partial-AUC below fpr 0.1000 = 0.700376
Partial-AUC below fpr 0.2000 = 0.761062
TPR	FPR_target	FPR_actual	TPR_scaled
0.050000	0.001000	0.002000	0.025000
0.135714	0.005000	0.006000	0.113095
0.178571	0.010000	0.010000	0.178571
0.471429	0.050000	0.050000	0.471429
0.617857	0.100000	0.100000	0.617857

Performance metrics for target positive proportion: 0.3350
Fold 1: Number of positive samples = 335. Target proportion = 0.3350. Actual proportion = 0.3350
Fold 2: Number of positive samples = 335. Target proportion = 0.3350. Actual proportion = 0.3350
Fold 3: Number of positive samples = 335. Target proportion = 0.3350. Actual proportion = 0.3350
Fold 4: Number of positive samples = 335. Target proportion = 0.3350. Actual proportion = 0.3350
Fold 5: Number of positive samples = 335. Target proportion = 0.3350. Actual proportion = 0.3350
Area under th

In [9]:
# Plot directly from the results dict
# plot_performance_comparison(results, output_dir)

# Load the results from the pickle files and generate plots
results = dict()
for m in methods:
    fname = os.path.join(output_dir, 'results_{}.pkl'.format(m))
    with open(fname, 'rb') as fp:
        results[m] = pickle.load(fp)

plot_performance_comparison(results, output_dir)