In [1]:
import numpy as np
from sklearn.metrics import (roc_auc_score, average_precision_score)

def load_preds(predfile):
    fp = open(predfile, 'r')
    preds = []
    for line in fp:
        c = line.split()
        c[1] = float(c[1])
        preds.append(c[1])
    fp.close()
    return np.array(preds)

In [2]:
CELL_LINE = 'H1'
train_pos_preds = load_preds('/users/eprakash/git/interpret-benchmark/data/dnase_positives/common_scripts/' + CELL_LINE + '/sequences/' + CELL_LINE + '_train_positives_preds.txt')
train_neg_preds = load_preds('/users/eprakash/git/interpret-benchmark/data/dnase_positives/common_scripts/' + CELL_LINE + '/sequences/' + CELL_LINE + '_train_negatives_preds.txt')
test_pos_preds = load_preds('/users/eprakash/git/interpret-benchmark/data/dnase_positives/common_scripts/' + CELL_LINE + '/sequences/' + CELL_LINE + '_test_positives_preds.txt')
test_neg_preds = load_preds('/users/eprakash/git/interpret-benchmark/data/dnase_positives/common_scripts/' + CELL_LINE + '/sequences/' + CELL_LINE + '_test_negatives_short_preds.txt')

In [3]:
train_accuracy = (np.sum(train_pos_preds > 0) + np.sum(train_neg_preds < 0)) / (len(train_pos_preds) + len(train_neg_preds))
test_accuracy = (np.sum(test_pos_preds > 0) + np.sum(test_neg_preds < 0)) / (len(test_pos_preds) + len(test_neg_preds))

print("Train accuracy: " + str(train_accuracy))
print("Test accuracy: " + str(test_accuracy))


Train accuracy: 0.96245
Test accuracy: 0.694263397475688


In [4]:
train_true = np.concatenate((np.ones(len(train_pos_preds)), np.zeros(len(train_neg_preds))))
train_scores = np.concatenate((train_pos_preds, train_neg_preds))
train_auroc = roc_auc_score(train_true, train_scores)
train_auprc = average_precision_score(train_true, train_scores)

test_true = np.concatenate((np.ones(len(test_pos_preds)), np.zeros(len(test_neg_preds))))
test_scores = np.concatenate((test_pos_preds, test_neg_preds))
test_auroc = roc_auc_score(test_true, test_scores)
test_auprc = average_precision_score(test_true, test_scores)

print("Train auROC, auPRC: ", train_auroc, train_auprc)
print("Test auROC, auPRC: ", test_auroc, test_auprc)

Train auROC, auPRC:  0.9919210137500001 0.9877355368647369
Test auROC, auPRC:  0.747263884805686 0.7833150720936195


In [13]:
from sklearn.metrics import confusion_matrix
preds = np.concatenate((test_pos_preds, test_neg_preds))
labels = np.concatenate((np.full(test_pos_preds.shape, 1), np.full(test_neg_preds.shape, 0)), axis = 0)
print(confusion_matrix(labels, preds>0))

[[17103  2229]
 [ 9592  9740]]
