-
Notifications
You must be signed in to change notification settings - Fork 2
/
vis_calc_ig.py
93 lines (75 loc) · 4.44 KB
/
vis_calc_ig.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
__author__ = 'jasper.zuallaert'
import sys
import numpy as np
import input_manager as im
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
MAX_LENGTH = 200
def runIntegratedGradientsOnTestSet(predictions_logits,
sess,
X_ph,
seqlens_ph,
dropout_ph,
test_dataset):
graph = tf.get_default_graph()
### tensor for gradient calculation on that embedding output
gs = tf.gradients(predictions_logits, X_ph)
num_integration_steps = 25
epoch_finished = False
while not epoch_finished:
ids, batch_x, lengths_x, batch_y, epoch_finished = test_dataset.next_batch(512)
lengths_x = [min(x,MAX_LENGTH) for x in lengths_x] # max 1002 by default!
difference_part = batch_x / num_integration_steps
### Calculate the gradients for each step
allNucs = np.argmax(batch_x,axis=-1)
allClasses = [y[0] for y in batch_y]
allSeqLens = lengths_x
allValues = np.zeros(batch_x.shape, np.float32)
allPreds = [p[0] for p in sess.run(tf.sigmoid(predictions_logits),feed_dict={X_ph: batch_x, seqlens_ph: lengths_x,dropout_ph: 0.0})]
# allPreds = [p[0] for p in sess.run(predictions_logits,feed_dict={X_ph: batch_x, seqlens_ph: lengths_x,dropout_ph: 0.0})]
allIDs = [x.rstrip().split('\t')[0] for x in ids]
baseline = np.zeros_like(X_ph)
for step in range(1, num_integration_steps + 1):
batch_x_for_this_step_1 = baseline + difference_part * (step - 1)
batch_x_for_this_step_2 = baseline + difference_part * step
all_gradients_1 = sess.run(gs, feed_dict={X_ph: batch_x_for_this_step_1, seqlens_ph: lengths_x,dropout_ph: 0.0})[0]
all_gradients_2 = sess.run(gs, feed_dict={X_ph: batch_x_for_this_step_2, seqlens_ph: lengths_x,dropout_ph: 0.0})[0]
allValues += (all_gradients_1 + all_gradients_2) / 2 * difference_part
### Generate outputs. Note that the sequence printed out could be truncated if the actual length surpasses the
### maximum length (1002 by default)
for id, pred, seq, cl, seqlen, values in zip(allIDs, allPreds, allNucs, allClasses, allSeqLens, allValues):
print('{},{},{},actual_length={}'.format(id,pred, cl, seqlen))
print(','.join(['ACDEFGHIKLMNPQRSTVWY'[int(nuc)] for nuc in seq[:seqlen]]))
print(','.join([str(score[int(nuc)]) for score, nuc in zip(values[:seqlen], seq[:seqlen])]))
# Function to call if we want to use IntegratedGradients.py from another file (such as SingleTermWorkflow.py)
# - For parameters, see the explanation for the function above
def runFromSession(sess, test_set):
graph = tf.get_default_graph()
prediction_logits = graph.get_tensor_by_name("my_logits/BiasAdd:0")
X_placeholder = graph.get_tensor_by_name("X_placeholder:0")
seqlen_ph = graph.get_tensor_by_name("seqlen_placeholder:0")
dropout_ph = graph.get_tensor_by_name("dropout_placeholder:0")
runIntegratedGradientsOnTestSet(prediction_logits, sess, X_placeholder, seqlen_ph, dropout_ph, test_set)
# If called as a standalone python script, it should have the 5 arguments as stated below
# if len(sys.argv) != 7 and sys.argv[0] == 'IntegratedGradientsRunner.py':
# print('Usage: python IntegratedGradientsRunner.py <term number> <parameter file> <train file> <test file> <use_reference> <output_file>')
# elif sys.argv[0] == 'IntegratedGradientsRunner.py':
# run: vis_calc_ig.py <parameter_file> <dataset_name>
# with <parameter_file> being the file with the parameters of the model, with its orginial name
# and <dataset_name> either 'sc' or 'pp'
if __name__ == '__main__':
param_file = sys.argv[1].rstrip('/')
fold_n = int(param_file[-1])
dataset_name = sys.argv[2]
from main import get_filenames, dataset_loc
train_files, valid_file, test_file = get_filenames(dataset_loc[dataset_name], fold_n, 10)
test_set = im.get_sequences(test_file)
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
param_file = param_file
param_file_full_name = param_file + '/' + param_file[param_file.rfind('/') + 1:]
saver = tf.train.import_meta_graph(param_file_full_name + '.meta')
saver.restore(sess, tf.train.latest_checkpoint(param_file))
runFromSession(sess, test_set)