Skip to content

Commit

Permalink
Merge 255d43a into b38bcdc
Browse files Browse the repository at this point in the history
  • Loading branch information
lilleswing committed Mar 7, 2017
2 parents b38bcdc + 255d43a commit 302180e
Show file tree
Hide file tree
Showing 7 changed files with 292 additions and 61 deletions.
67 changes: 41 additions & 26 deletions devtools/jenkins/compare_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

CUSHION_PERCENT = 0.01
BENCHMARK_TO_DESIRED_KEY_MAP = {
"index": "Index splitting",
"random": "Random splitting",
"scaffold": "Scaffold splitting",
"logreg": "logistic regression",
"tf": "Multitask network",
"tf_robust": "robust MT-NN",
"graphconv": "graph convolution",
"index": "Index splitting",
"random": "Random splitting",
"scaffold": "Scaffold splitting",
"logreg": "logistic regression",
"tf": "Multitask network",
"tf_robust": "robust MT-NN",
"tf_regression": "NN regression",
"graphconv": "graph convolution",
"graphconvreg": "graphconv regression",
}
DESIRED_RESULTS_CSV = "devtools/jenkins/desired_results.csv"
TEST_RESULTS_CSV = "examples/results.csv"
Expand All @@ -19,11 +21,11 @@ def parse_desired_results(desired_results):
for line in desired_results:
vars = line.split(',')
retval.append({
"split": vars[0],
"data_set": vars[1],
"model": vars[2],
"train_score": float(vars[3]),
"test_score": float(vars[4])
"split": vars[0],
"data_set": vars[1],
"model": vars[2],
"train_score": float(vars[3]),
"test_score": float(vars[4])
})
return retval

Expand All @@ -34,11 +36,11 @@ def parse_test_results(test_results):
for line in test_results:
vars = line.split(',')
retval.append({
"split": BENCHMARK_TO_DESIRED_KEY_MAP[vars[2]],
"data_set": vars[1],
"model": BENCHMARK_TO_DESIRED_KEY_MAP[vars[5]],
"train_score": float(vars[6]),
"test_score": float(vars[9])
"split": BENCHMARK_TO_DESIRED_KEY_MAP[vars[2]],
"data_set": vars[1],
"model": BENCHMARK_TO_DESIRED_KEY_MAP[vars[5]],
"train_score": float(vars[6]),
"test_score": float(vars[9])
})
return retval

Expand All @@ -53,28 +55,41 @@ def find_desired_result(result, desired_results):


def is_good_result(my_result, desired_result):
retval = True
message = []
for key in ['train_score', 'test_score']:
# Higher is Better
desired_value = desired_result[key] * (1.0 - CUSHION_PERCENT)
if my_result[key] < desired_value:
return False
return True
message_part = "%s,%s,%s,%s,%s,%s" % (
my_result['data_set'], my_result['model'], my_result['split'], key,
my_result[key], desired_result[key])
message.append(message_part)
retval = False
return retval, message


def test_compare_results():
desired_results = open(DESIRED_RESULTS_CSV).readlines()[1:]
desired_results = parse_desired_results(desired_results)
test_results = open(TEST_RESULTS_CSV).readlines()
test_results = parse_test_results(test_results)
failures = []
exceptions = []
for test_result in test_results:
desired_result = find_desired_result(test_result, desired_results)
if not is_good_result(test_result, desired_result):
exceptions.append(({"test_result": test_result}, {"desired_result": desired_result}))
if len(exceptions) > 0:
for exception in exceptions:
print(exception)
assert_true(len(exceptions) == 0, "Some performance benchmarks not passed")
try:
desired_result = find_desired_result(test_result, desired_results)
passes, message = is_good_result(test_result, desired_result)
if not passes:
failures.extend(message)
except Exception as e:
exceptions.append("Unable to find desired result for %s" % test_result)
for exception in exceptions:
print(exception)
for failure in failures:
print(failure)
assert_true(len(exceptions) == 0, "Error parsing performance results")
assert_true(len(failures) == 0, "Some performance benchmarks not passed")

if __name__ == "__main__":
test_compare_results()
79 changes: 79 additions & 0 deletions devtools/jenkins/desired_results.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
split,dataset,model,Train score/ROC-AUC,Valid score/ROC-AUC
Index splitting,tox21,logistic regression,0.903,0.705
Index splitting,tox21,Random Forest,0.999,0.733
Index splitting,tox21,IRV,0.811,0.767
Index splitting,tox21,Multitask network,0.856,0.763
Index splitting,tox21,robust MT-NN,0.857,0.767
Index splitting,tox21,graph convolution,0.872,0.798
Expand All @@ -12,6 +14,8 @@ Index splitting,pcba,Multitask network,0.826,0.802
Index splitting,pcba,robust MT-NN,0.809,0.783
Index splitting,pcba,graph convolution,0.876,0.852
Index splitting,sider,logistic regression,0.933,0.620
Index splitting,sider,Random Forest,0.999,0.670
Index splitting,sider,IRV,0.649,0.642
Index splitting,sider,Multitask network,0.775,0.634
Index splitting,sider,robust MT-NN,0.803,0.632
Index splitting,sider,graph convolution,0.708,0.594
Expand All @@ -20,10 +24,20 @@ Index splitting,toxcast,Multitask network,0.830,0.678
Index splitting,toxcast,robust MT-NN,0.825,0.680
Index splitting,toxcast,graph convolution,0.821,0.720
Index splitting,clintox,logistic regression,0.967,0.676
Index splitting,clintox,Random Forest,0.995,0.776
Index splitting,clintox,IRV,0.763,0.814
Index splitting,clintox,Multitask network,0.934,0.830
Index splitting,clintox,robust MT-NN,0.949,0.827
Index splitting,clintox,graph convolution,0.946,0.860
Index splitting,hiv,logistic regression,0.864,0.739
Index splitting,hiv,Random Forest,0.999,0.720
Index splitting,hiv,IRV,0.841,0.724
Index splitting,hiv,Multitask network,0.761,0.652
Index splitting,hiv,robust MT-NN,0.780,0.708
Index splitting,hiv,graph convolution,0.876,0.779
Random splitting,tox21,logistic regression,0.902,0.715
Random splitting,tox21,Random Forest,0.999,0.764
Random splitting,tox21,IRV,0.808,0.767
Random splitting,tox21,Multitask network,0.844,0.795
Random splitting,tox21,robust MT-NN,0.855,0.773
Random splitting,tox21,graph convolution,0.865,0.827
Expand All @@ -36,6 +50,8 @@ Random splitting,pcba,Multitask network,0.811,0.778
Random splitting,pcba,robust MT-NN,0.811,0.771
Random splitting,pcba,graph convolution,0.872,0.844
Random splitting,sider,logistic regression,0.929,0.656
Random splitting,sider,Random Forest,0.999,0.665
Random splitting,sider,IRV,0.648,0.596
Random splitting,sider,Multitask network,0.777,0.655
Random splitting,sider,robust MT-NN,0.804,0.630
Random splitting,sider,graph convolution,0.705,0.618
Expand All @@ -44,10 +60,20 @@ Random splitting,toxcast,Multitask network,0.836,0.684
Random splitting,toxcast,robust MT-NN,0.822,0.681
Random splitting,toxcast,graph convolution,0.820,0.717
Random splitting,clintox,logistic regression,0.972,0.725
Random splitting,clintox,Random Forest,0.997,0.670
Random splitting,clintox,IRV,0.809,0.846
Random splitting,clintox,Multitask network,0.951,0.834
Random splitting,clintox,robust MT-NN,0.959,0.830
Random splitting,clintox,graph convolution,0.975,0.876
Random splitting,hiv,logistic regression,0.860,0.806
Random splitting,hiv,Random Forest,0.999,0.850
Random splitting,hiv,IRV,0.839,0.809
Random splitting,hiv,Multitask network,0.742,0.715
Random splitting,hiv,robust MT-NN,0.753,0.727
Random splitting,hiv,graph convolution,0.847,0.803
Scaffold splitting,tox21,logistic regression,0.900,0.650
Scaffold splitting,tox21,Random Forest,0.999,0.629
Scaffold splitting,tox21,IRV,0.823,0.708
Scaffold splitting,tox21,Multitask network,0.863,0.703
Scaffold splitting,tox21,robust MT-NN,0.861,0.710
Scaffold splitting,tox21,graph convolution,0.885,0.732
Expand All @@ -60,6 +86,8 @@ Scaffold splitting,pcba,Multitask network,0.814,0.760
Scaffold splitting,pcba,robust MT-NN,0.812,0.756
Scaffold splitting,pcba,graph convolution,0.874,0.817
Scaffold splitting,sider,logistic regression,0.926,0.592
Scaffold splitting,sider,Random Forest,0.999,0.619
Scaffold splitting,sider,IRV,0.639,0.599
Scaffold splitting,sider,Multitask network,0.776,0.557
Scaffold splitting,sider,robust MT-NN,0.797,0.560
Scaffold splitting,sider,graph convolution,0.722,0.583
Expand All @@ -68,6 +96,57 @@ Scaffold splitting,toxcast,Multitask network,0.828,0.617
Scaffold splitting,toxcast,robust MT-NN,0.830,0.614
Scaffold splitting,toxcast,graph convolution,0.832,0.638
Scaffold splitting,clintox,logistic regression,0.960,0.803
Scaffold splitting,clintox,Random Forest,0.993,0.735
Scaffold splitting,clintox,IRV,0.793,0.718
Scaffold splitting,clintox,Multitask network,0.947,0.862
Scaffold splitting,clintox,robust MT-NN,0.953,0.890
Scaffold splitting,clintox,graph convolution,0.957,0.823
Scaffold splitting,hiv,logistic regression,0.858,0.798
Scaffold splitting,hiv,Random Forest,0.946,0.562
Scaffold splitting,hiv,IRV,0.847,0.811
Scaffold splitting,hiv,Multitask network,0.775,0.765
Scaffold splitting,hiv,robust MT-NN,0.785,0.748
Scaffold splitting,hiv,graph convolution,0.867,0.769
Index splitting,delaney,Random Forest,0.953,0.626
Index splitting,delaney,NN regression,0.868,0.578
Index splitting,delaney,graphconv regression,0.967,0.790
Random splitting,delaney,Random Forest,0.951,0.684
Random splitting,delaney,NN regression,0.865,0.574
Random splitting,delaney,graphconv regression,0.964,0.782
Scaffold splitting,delaney,Random Forest,0.953,0.284
Scaffold splitting,delaney,NN regression,0.866,0.342
Scaffold splitting,delaney,graphconv regression,0.967,0.606
Index splitting,sampl,Random Forest,0.968,0.736
Index splitting,sampl,NN regression,0.917,0.764
Index splitting,sampl,graphconv regression,0.982,0.864
Random splitting,sampl,Random Forest,0.967,0.752
Random splitting,sampl,NN regression,0.908,0.830
Random splitting,sampl,graphconv regression,0.987,0.868
Scaffold splitting,sampl,Random Forest,0.966,0.473
Scaffold splitting,sampl,NN regression,0.891,0.217
Scaffold splitting,sampl,graphconv regression,0.985,0.666
Index splitting,nci,NN regression,0.171,0.062
Index splitting,nci,graphconv regression,0.123,0.048
Random splitting,nci,NN regression,0.168,0.085
Random splitting,nci,graphconv regression,0.117,0.076
Scaffold splitting,nci,NN regression,0.180,0.052
Scaffold splitting,nci,graphconv regression,0.131,0.046
Random splitting,pdbbind(core),Random Forest,0.969,0.445
Random splitting,pdbbind(core),NN regression,0.973,0.494
Random splitting,pdbbind(refined),Random Forest,0.963,0.511
Random splitting,pdbbind(refined),NN regression,0.987,0.503
Random splitting,pdbbind(full),Random Forest,0.965,0.493
Random splitting,pdbbind(full),NN regression,0.983,0.528
Index splitting,chembl,NN regression,0.443,0.427
Random splitting,chembl,NN regression,0.464,0.434
Scaffold splitting,chembl,NN regression,0.484,0.361
Index splitting,qm7,NN regression,0.997,0.986
Random splitting,qm7,NN regression,0.999,0.999
Stratified splitting,qm7,NN regression,0.999,0.999
Index splitting,qm7b,NN regression,0.931,0.803
Random splitting,qm7b,NN regression,0.923,0.884
Stratified splitting,qm7b,NN regression,0.934,0.884
Index splitting,qm9,NN regression,0.733,0.791
Random splitting,qm9,NN regression,0.811,0.823
Stratified splitting,qm9,NN regression,0.843,0.818
User-defined splitting,kaggle,NN regression,0.748,0.452
7 changes: 6 additions & 1 deletion devtools/jenkins/jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,13 @@ python setup.py install
rm examples/results.csv || true
cd examples
python benchmark.py -d tox21
export retval1=$?

cd ..
nosetests -v devtools/jenkins/compare_results.py --with-xunit || true
export retval2=$?

source deactivate
conda remove --name $envname --all
conda remove --name $envname --all
export retval=$(($retval1 + $retval2))
return ${retval}
Loading

0 comments on commit 302180e

Please sign in to comment.