In [1]:
from analysis import *
from EF_curve_support import *

Using Theano backend.
Using gpu device 1: Tesla K40m (CNMeM is disabled, CuDNN 4004)


In [2]:
def get_eval_single(task, X_test, y_test, model_weight, EF_ratio_list):
    model = task.setup_model()
    model.load_weights(model_weight)
    y_pred_on_test = model.predict(X_test)
    print y_pred_on_test.shape

    indices = np.array([i for i in range(128)])
    
    pr_auc = precision_auc_multi(y_true=y_test, y_pred=y_pred_on_test, eval_indices=indices, eval_mean_or_median=np.median)
    roc_auc = roc_auc_multi(y_true=y_test, y_pred=y_pred_on_test, eval_indices=indices, eval_mean_or_median=np.median)
    
    print('test precision: {}'.format(pr_auc))
    print('test roc: {}'.format(roc_auc))
    print

    return pr_auc, roc_auc

def run():
    file_list = ['../../dataset/keck_pcba/fold_5/file_{}.csv'.format(i) for i in range(5)]
    data_pd_list = []
    for i in range(5):
        temp_file_list = file_list[i:i + 1]
        temp = read_merged_data(temp_file_list)
        temp.fillna(0, inplace=True)
        data_pd_list.append(temp)

    labels_list = data_pd_list[0].columns[-128:].tolist() # Last 128 is PCBA labels
    labels_list.append('Keck_Pria_AS_Retest') # Add Keck Pria as last label
    PR_auc_list = []
    ROC_auc_list = []

    for running_index in range(20):
        print 'running index ', running_index
        PMTNN_weight_file = PMTNN_weight_file_path + '{}.weight'.format(running_index)
        test_index = running_index / 4

        test_pd = data_pd_list[test_index]

        with open(config_json_file, 'r') as f:
            conf = json.load(f)

        X_test, y_test = extract_feature_and_label(test_pd,
                                                   feature_name='Fingerprints',
                                                   label_name_list=labels_list)
        print 'Done loading data'

        task = MultiClassification(conf=conf)
        PR_auc, ROC_auc = get_eval_single(task, X_test, y_test,
                                          PMTNN_weight_file,
                                          EF_ratio_list)

        PR_auc_list.append(PR_auc)
        ROC_auc_list.append(ROC_auc)

    return np.array(PR_auc_list), np.array(ROC_auc_list)


# Multi-task Classification 3

In [3]:
config_json_file = '../../output/cross_validation/multi_classification_3.json'
PMTNN_weight_file_path='../../output/cross_validation/multi_classification_3/45550779/'
EF_ratio_list = np.array([0.02, 0.01, 0.0015, 0.001])

pr_auc_list, roc_auc_list = run()

running index  0
(102157, 129)
Done loading data
(102157, 129)
test precision: 0.126096939665
test roc: 0.88678240306

running index  1
(102157, 129)
Done loading data
(102157, 129)
test precision: 0.12130211343
test roc: 0.894912449885

running index  2
(102157, 129)
Done loading data
(102157, 129)
test precision: 0.0588048874397
test roc: 0.887230548381

running index  3
(102157, 129)
Done loading data
(102157, 129)
test precision: 0.129612417712
test roc: 0.885607575828

running index  4
(103098, 129)
Done loading data
(103098, 129)
test precision: 0.00301724046363
test roc: 0.548423880781

running index  5
(103098, 129)
Done loading data
(103098, 129)
test precision: 0.124939616369
test roc: 0.896379699699

running index  6
(103098, 129)
Done loading data
(103098, 129)
test precision: 0.129305869124
test roc: 0.898298924618

running index  7
(103098, 129)
Done loading data
(103098, 129)
test precision: 0.131310649747
test roc: 0.894574998578

running index  8
(102380, 129)
Done loa

In [4]:
print pr_auc_list
print roc_auc_list

[ 0.12609694  0.12130211  0.05880489  0.12961242  0.00301724  0.12493962
  0.12930587  0.13131065  0.12751131  0.12564923  0.1321589   0.09398784
  0.12684596  0.12918529  0.13463586  0.12401803  0.1355432   0.1331501
  0.12863344  0.13099384]
[ 0.8867824   0.89491245  0.88723055  0.88560758  0.54842388  0.8963797
  0.89829892  0.894575    0.89625733  0.88809041  0.88669995  0.88782382
  0.88793995  0.87773203  0.87540793  0.88704118  0.88289844  0.88131437
  0.88653546  0.88789615]


In [5]:
file_ = 'temp/multi_3/'
if not os.path.exists(file_):
    os.mkdir(file_)
    
out = open(file_+'pr_auc.csv', 'w')
for pr in pr_auc_list:
    print >> out, pr
out.flush()
out.close()

out = open(file_+'roc_auc.csv', 'w')
for pr in roc_auc_list:
    print >> out, pr
out.flush()
out.close()

# Multi-task Classification 7

In [6]:
config_json_file = '../../output/cross_validation/multi_classification_7.json'
PMTNN_weight_file_path='../../output/cross_validation/multi_classification_7/45555017/'
EF_ratio_list = np.array([0.02, 0.01, 0.0015, 0.001])

pr_auc_list, roc_auc_list = run()

running index  0
(102157, 129)
Done loading data
(102157, 129)
test precision: 0.1234355554
test roc: 0.882498807862

running index  1
(102157, 129)
Done loading data
(102157, 129)
test precision: 0.122166629745
test roc: 0.891216463906

running index  2
(102157, 129)
Done loading data
(102157, 129)
test precision: 0.127378106187
test roc: 0.863672997684

running index  3
(102157, 129)
Done loading data
(102157, 129)
test precision: 0.125558608466
test roc: 0.8888802334

running index  4
(103098, 129)
Done loading data
(103098, 129)
test precision: 0.117482588741
test roc: 0.890804254475

running index  5
(103098, 129)
Done loading data
(103098, 129)
test precision: 0.135562909051
test roc: 0.889813131302

running index  6
(103098, 129)
Done loading data
(103098, 129)
test precision: 0.129265787319
test roc: 0.891229547313

running index  7
(103098, 129)
Done loading data
(103098, 129)
test precision: 0.131995189788
test roc: 0.876917666988

running index  8
(102380, 129)
Done loading 

In [7]:
print pr_auc_list
print roc_auc_list

[ 0.12343556  0.12216663  0.12737811  0.12555861  0.11748259  0.13556291
  0.12926579  0.13199519  0.14129561  0.13916411  0.12908891  0.13228216
  0.12971385  0.12600593  0.12416694  0.11950133  0.13545488  0.13246263
  0.10228664  0.13480211]
[ 0.88249881  0.89121646  0.863673    0.88888023  0.89080425  0.88981313
  0.89122955  0.87691767  0.89106894  0.87999238  0.89005115  0.88580001
  0.86089809  0.87604112  0.8819218   0.88318151  0.88148467  0.87870171
  0.88372291  0.88019159]


In [8]:
file_ = 'temp/multi_7/'
if not os.path.exists(file_):
    os.mkdir(file_)
    
out = open(file_+'pr_auc.csv', 'w')
for pr in pr_auc_list:
    print >> out, pr
out.flush()
out.close()

out = open(file_+'roc_auc.csv', 'w')
for pr in roc_auc_list:
    print >> out, pr
out.flush()
out.close()