In [1]:
import numpy as np
import pandas as pd
import re
import itertools
from collections import Counter
from tensorflow.contrib import learn
import pickle
from sklearn.model_selection import KFold
from BasicTextCNN import BasicTextCNN
from PositionTextCNN import PositionTextCNN

import tensorflow as tf
import numpy as np
import os
import time
import datetime
from tensorflow.contrib import learn
import pandas as pd

In [2]:
def clean_str(string):
    """
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip().lower()


def load_data_and_labels():
    x_text = sentence_support_df.tokenizedSentenceFromPaper.as_matrix()
    y = sentence_support_df.label.as_matrix()
    y = [[0, 1] if x == 1 else [1, 0] for x in y  ]
    return [x_text, np.array(y)]

def compute_pathway_name_terms(pathway):
    pathway = pathway.replace('signaling', '').replace('pathway', '').replace('-', ' ')
    return [t for t in pathway.lower().strip().split() if len(t)>1]

def tokenize_pathway_names(sentence, pathwayA, pathwayB):
    genesA = [gene.lower() for gene in pathway_to_genes_dict[pathwayA]] + compute_pathway_name_terms(pathwayA)
    genesB = [gene.lower() for gene in pathway_to_genes_dict[pathwayB]] + compute_pathway_name_terms(pathwayB)
    tokenized_sentence = []
    for word in sentence.lower().split():
        token = None
        for gene in genesA:
            if gene in word:
                token = 'pathwayA'
                break
                
        for gene in genesB:
            if gene in word:
                token = 'pathwayB'
                break
        if token is None:
            token = word
        tokenized_sentence.append(token)
    return ' '.join(tokenized_sentence)

def compute_distance_embedding(word, x):
    word_distances = np.zeros(x.shape, dtype='int')
    for i in range(x.shape[0]):
        word_positions = np.where(x[i] == word)[0]
        for j in range(x.shape[1]):
            if len(word_positions) > 0:
                word_position = word_positions[np.argmin(np.abs(word_positions - j))]
                word_distances[i][j] = word_position - j
                if word_distances[i][j]<0:
                    word_distances[i][j] = 600+word_distances[i][j]
            else:
                word_distances[i][j] = 299
    return word_distances

def compute_pos_embedding(data, vocab_processor):
    pos_emebedding = np.zeros(data.shape, dtype='int')
    for i in range(data.shape[0]):
        tags = pos_tag(word_tokenize(list(vocab_processor.reverse([data[i]]))[0].replace('<UNK>', 'XXX')))
        for j in range(data.shape[1]):
            if tags[j][1].lower() in pos_map:
                pos_emebedding[i][j] = pos_map[tags[j][1].lower()]
            else:
                pos_emebedding[i][j] = 6
    return pos_emebedding

def load_pos_embedding():
    return np.load('pos_emebedding.npy')

def load_pos_mapping():
    pos_map = {}
    with open('pos-mapping.txt', 'r') as f:
        for lines in f.readlines():
            pos, num = lines.split()
            pos_map[pos] = num
    return pos_map

In [3]:
# pos_map = load_pos_mapping()
# %time pos_emebedding = compute_pos_embedding(x, vocab_processor)
# np.save('pos_emebedding.npy', pos_emebedding)

In [4]:
pathway_to_genes_dict = pickle.load(open( "data/pathway_to_genes_dict.p", "rb" ))
sentence_support_df = pd.read_csv('data/sentence_support_v3.tsv', delimiter='\t')
sentence_support_df.drop_duplicates(inplace=True)
sentence_support_df['tokenizedSentenceFromPaper'] = sentence_support_df.apply(lambda x: tokenize_pathway_names(x.sentenceFromPaper, x.pathwayA, x.pathwayB), axis=1)

In [5]:
# Load data
print("Loading data...")
x_text, y = load_data_and_labels()

Loading data...


In [6]:
# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in x_text])
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
x = np.array(list(vocab_processor.fit_transform(x_text)))
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))

Vocabulary Size: 33447


In [7]:
encodedPathwayA, encodedPathwayB = list(vocab_processor.transform(['pathwayA pathwayB']))[0][:2]
encodedPathwayA, encodedPathwayB

(8, 53)

In [8]:
word_distancesA = compute_distance_embedding(encodedPathwayA, x)
word_distancesB = compute_distance_embedding(encodedPathwayB, x)    

In [9]:
pos_embedding = load_pos_embedding()
pos_embedding.shape

(42394, 273)

In [10]:
# Creating folds
kf = KFold(n_splits=4, random_state=5, shuffle=True)
for k, (train_index, test_index) in enumerate(kf.split(x, y)):
# for train_index, test_index in kf.split(x):
#     print("Fold: %s =>" % k,  "TRAIN:", train_index, "TEST:", test_index)
    x_train, x_dev = x[train_index], x[test_index]
    y_train, y_dev = y[train_index], y[test_index]
    
    train_word_distancesA = word_distancesA[train_index]
    train_word_distancesB = word_distancesB[train_index]
    
    test_word_distancesA = word_distancesA[test_index]
    test_word_distancesB = word_distancesB[test_index]
    
    train_pos_embedding = pos_embedding[train_index]
    test_pos_embedding = pos_embedding[test_index]
    
    print("Fold: %s =>" % k, "Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
    
    
#     model = POSTextCNN(sequence_length=x_train.shape[1],
#             vocab_processor=vocab_processor, num_epochs=1, evaluate_every=300, results_dir='fold%s'%k)
#     model.train_network(x_train, y_train, x_dev, y_dev, 
#                         train_word_distancesA, train_word_distancesB, test_word_distancesA, test_word_distancesB,
#                        train_pos_embedding, test_pos_embedding)
    break

Fold: 0 => Train/Dev split: 31795/10599


In [68]:
class AttentionPOSTextCNN(object):
    """
    A Attention based CNN for text classification with Position plus POS features as well.
    
    Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
    
    Refer tohttps://www.ncbi.nlm.nih.gov/pmc/articles/PMC5181565/pdf/btw486.pdf for more details.
    """
    def __init__(self, sequence_length, vocab_processor, 
                 num_classes=2, embedding_size=128, filter_sizes=[3,4,5], 
                 num_filters=128, batch_size=64, 
                 l2_reg_lambda=0.0, num_epochs=200,
                 num_checkpoints=5, dropout_prob=0.5, 
                 checkpoint_every=100, evaluate_every=100, 
                 allow_soft_placement=True,log_device_placement=False,
                 results_dir="runs"):
        
        tf.reset_default_graph() 
        self.sequence_length = sequence_length
        self.num_classes = num_classes
        self.vocab_size = len(vocab_processor.vocabulary_)
        self.batch_size = batch_size
        self.embedding_size = embedding_size
        self.filter_sizes = filter_sizes
        self.num_filters = num_filters
        self.l2_reg_lambda = l2_reg_lambda
        self.num_epochs = num_epochs
        self.results_dir = results_dir
        
        self.vocab_processor = vocab_processor
        
        self.num_checkpoints = num_checkpoints
        self.dropout_prob = dropout_prob
        self.checkpoint_every = checkpoint_every
        self.evaluate_every = evaluate_every
        
        self.position_vector_mapping = PositionTextCNN.load_position_vector_mapping()
        
        self.allow_soft_placement = allow_soft_placement
        self.log_device_placement = log_device_placement
        
        self.sess = tf.Session()
        self._build_network()
        
    def _build_network(self):
        
        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, self.sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, self.num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        
        self.word_distancesA = tf.placeholder(tf.int32, [None, self.sequence_length], name="word_distancesA")
        self.word_distancesB = tf.placeholder(tf.int32, [None, self.sequence_length], name="word_distancesB")
        
        self.encoded_pos = tf.placeholder(tf.int32, [None, self.sequence_length], name="encoded_pos")
        
        # Keeping track of l2 regularization loss (optional)
        self.l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("word_embedding"):
            self.W = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0),name="W") 
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
            
        # Position Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("position_embedding"):
            embedded_positionsA = tf.nn.embedding_lookup(self.position_vector_mapping, self.word_distancesA)
            embedded_positionsB = tf.nn.embedding_lookup(self.position_vector_mapping, self.word_distancesB)
            embedded_positions = tf.concat([embedded_positionsA, embedded_positionsB], 2)
            self.embedded_positions_expanded = tf.cast(tf.expand_dims(embedded_positions, -1), tf.float32)
            
        # POS Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("pos_embedding"):
            one_hot_encoding = tf.one_hot(list(range(8)), 8)
            embedded_pos = tf.nn.embedding_lookup(one_hot_encoding, self.encoded_pos)
            self.embedded_pos_expanded = tf.cast(tf.expand_dims(embedded_pos, -1), tf.float32)
            
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.final_embedded_expanded = tf.concat([self.embedded_chars_expanded, self.embedded_positions_expanded, self.embedded_pos_expanded], 2)
        
        with tf.name_scope("conv-attention-pathwayA"):
            # WEIGHTS
            attention_weights = {
                'h1': tf.Variable(tf.random_normal([self.sequence_length, self.sequence_length])),
            }
            
            attention_biases = {
                'h1': tf.Variable(tf.random_normal([self.sequence_length])),
            }
            self.hidden_layer = tf.nn.sigmoid(tf.add(tf.matmul(self.embedded_chars_expanded, attention_weights['h1']), attention_biases['h1']))
            self.attention_layer = tf.nn.softmax(self.hidden_layer)
            self.attentive_final_embedded_expanded = tf.matmul(self.final_embedded_expanded, self.attention_layer)
                        
        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(self.filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, self.embedding_size+28, 1, self.num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[self.num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.attentive_final_embedded_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, self.sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = self.num_filters * len(self.filter_sizes)
        self.h_pool = tf.concat(pooled_outputs, 3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, self.num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[self.num_classes]), name="b")
            self.l2_loss += tf.nn.l2_loss(W)
            self.l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")
            
    def train_network(self, x_train, y_train, x_dev, y_dev,
                     train_word_distancesA, train_word_distancesB, test_word_distancesA, test_word_distancesB,
                     train_pos_embedding, test_pos_embedding):
        
        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            class_weight = tf.constant([1.0, 100.0])
            weights = tf.reduce_sum(class_weight * self.input_y, axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
            weighted_losses = losses * weights
            self.loss = tf.reduce_mean(weighted_losses) + self.l2_reg_lambda * self.l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
            _, self.precision = tf.metrics.precision(labels=tf.argmax(self.input_y, 1), predictions=self.predictions, name='precision')
            _, self.recall = tf.metrics.recall(labels=tf.argmax(self.input_y, 1), predictions=self.predictions, name='recall')
            
        # Define Training procedure
        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(self.loss)
        self.train_op = optimizer.apply_gradients(grads_and_vars, global_step=self.global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, self.results_dir, timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", self.loss)
        acc_summary = tf.summary.scalar("accuracy", self.accuracy)
        precision_summary = tf.summary.scalar("precision", self.precision)
        recall_summary = tf.summary.scalar("recall", self.recall)

        # Train Summaries
        self.train_summary_op = tf.summary.merge([loss_summary, acc_summary, precision_summary, recall_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        self.train_summary_writer = tf.summary.FileWriter(train_summary_dir, self.sess.graph)

        # Dev summaries
        self.dev_summary_op = tf.summary.merge([loss_summary, acc_summary, precision_summary, recall_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        self.dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, self.sess.graph)
        
        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=self.num_checkpoints)
        
        # Write vocabulary
        self.vocab_processor.save(os.path.join(out_dir, "vocab"))
        
        # Initialize all variables
        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())
    
        print("Start training")
        # Generate batches
        batches = PositionTextCNN.batch_iter(
            list(zip(x_train, y_train, train_word_distancesA, train_word_distancesB, train_pos_embedding)), self.batch_size, self.num_epochs)
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch, batch_word_distancesA, batch_word_distancesB, batch_pos_embedding = zip(*batch)
            self.train_step(x_batch, y_batch, batch_word_distancesA, batch_word_distancesB, batch_pos_embedding)
            current_step = tf.train.global_step(self.sess, self.global_step)
            if current_step % self.evaluate_every == 0:
                print("\nEvaluation:")
                self.dev_step(x_dev, y_dev, test_word_distancesA, test_word_distancesB, test_pos_embedding, writer=self.dev_summary_writer)
                print("")
            if current_step % self.checkpoint_every == 0:
                path = saver.save(self.sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))   
        print("Training finished")
    
    def train_step(self, x_batch, y_batch, batch_word_distancesA, batch_word_distancesB, batch_pos_embedding):
        """
        A single training step
        """
        feed_dict = {
            self.input_x: x_batch,
            self.input_y: y_batch,
            self.dropout_keep_prob: self.dropout_prob,
            self.word_distancesA: batch_word_distancesA,
            self.word_distancesB: batch_word_distancesB,
            self.encoded_pos: batch_pos_embedding
        }
        _, step, summaries, loss, accuracy, precision, recall = self.sess.run(
            [self.train_op, self.global_step, self.train_summary_op, self.loss, self.accuracy, self.precision, self.recall],
            feed_dict)
        time_str = datetime.datetime.now().isoformat()
        print("{}: step {}, loss {:g}, acc {:g}, prec {:g}, recall {:g}".format(time_str, step, loss, accuracy, precision, recall))
        self.train_summary_writer.add_summary(summaries, step)
        
    
    def dev_step(self, x_batch, y_batch, batch_word_distancesA, batch_word_distancesB, batch_pos_embedding, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
                self.input_x: x_batch,
                self.input_y: y_batch,
                self.dropout_keep_prob: 1.0,
                self.word_distancesA: batch_word_distancesA,
                self.word_distancesB: batch_word_distancesB,
                self.encoded_pos: batch_pos_embedding
            }
            step, summaries, loss, accuracy,  precision, recall  = self.sess.run(
                [self.global_step, self.dev_summary_op, self.loss, self.accuracy, self.precision, self.recall],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}, prec {:g}, recall {:g}".format(time_str, step, loss, accuracy, precision, recall))
            if writer:
                writer.add_summary(summaries, step)
                
    @staticmethod            
    def batch_iter(data, batch_size, num_epochs, shuffle=True):
        """
        Generates a batch iterator for a dataset.
        """
        data = np.array(data)
        data_size = len(data)
        num_batches_per_epoch = int((len(data)-1)/batch_size) + 1
        
        for epoch in range(num_epochs):
            # Shuffle the data at each epoch
            if shuffle:
                shuffle_indices = np.random.permutation(np.arange(data_size))
                shuffled_data = data[shuffle_indices]
            else:
                shuffled_data = data
            for batch_num in range(num_batches_per_epoch):
                start_index = batch_num * batch_size
                end_index = min((batch_num + 1) * batch_size, data_size)
                yield shuffled_data[start_index:end_index]
    
    @staticmethod 
    def load_position_vector_mapping():
        # bit_array generated with the distance between 
        # two entities where abs_num represents the distance
        def int2bit_by_distance(int_num, bit_len=10):

            bit_array = np.zeros(bit_len)
            if int_num > 0:
                bit_array[0] = 1

            abs_num = np.abs(int_num)
            if abs_num <= 5:
                for i in range(abs_num):
                    bit_array[-i-1] = 1
            elif abs_num <= 10:
                for i in range(6):
                    bit_array[-i-1] = 1
            elif abs_num <= 20:
                for i in range(7):
                    bit_array[-i-1] = 1
            elif abs_num <= 30:
                for i in range(8):
                    bit_array[-i-1] = 1
            else:
                for i in range(9):
                    bit_array[-i-1] = 1
            return bit_array

        map = {}
        for i in range(-300, 300):
            map[i] = int2bit_by_distance(i, 10)

        return pd.DataFrame.from_dict(map, orient='index', dtype='int').values

In [69]:
model = AttentionPOSTextCNN(sequence_length=x_train.shape[1],
            vocab_processor=vocab_processor, num_epochs=1, evaluate_every=300, results_dir='fold%s'%k)
model.train_network(x_train, y_train, x_dev, y_dev, 
                        train_word_distancesA, train_word_distancesB, test_word_distancesA, test_word_distancesB,
                       train_pos_embedding, test_pos_embedding)

ValueError: Shape must be rank 2 but is rank 4 for 'conv-attention-pathwayA/MatMul' (op: 'MatMul') with input shapes: [?,273,128,1], [273,273].

In [65]:
temp = model.sess.run(model.hidden_layer, feed_dict = {
                model.input_x: x_dev,
                model.input_y: y_dev,
                model.dropout_keep_prob: 1.0,
                model.word_distancesA: test_word_distancesA,
                model.word_distancesB: test_word_distancesB,
                model.encoded_pos: test_pos_embedding
            })

InvalidArgumentError: In[0].dim(0) and In[1].dim(0) must be the same: [10599,273,128] vs [1,128,1]
	 [[Node: conv-attention-pathwayA/MatMul = BatchMatMul[T=DT_FLOAT, adj_x=false, adj_y=false, _device="/job:localhost/replica:0/task:0/cpu:0"](word_embedding/embedding_lookup, conv-attention-pathwayA/ExpandDims_1)]]

Caused by op 'conv-attention-pathwayA/MatMul', defined at:
  File "/Users/adb/anaconda3/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/adb/anaconda3/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/traitlets/config/application.py", line 653, in launch_instance
    app.start()
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-64-f8d9e95a60aa>", line 2, in <module>
    vocab_processor=vocab_processor, num_epochs=1, evaluate_every=300, results_dir='fold%s'%k)
  File "<ipython-input-63-7ff13d966145>", line 43, in __init__
    self._build_network()
  File "<ipython-input-63-7ff13d966145>", line 84, in _build_network
    self.hidden_layer = tf.matmul(self.embedded_chars, tf.expand_dims(tf.expand_dims(tf.nn.embedding_lookup(self.W, [8])[0], -1),0))
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 1814, in matmul
    a, b, adj_x=adjoint_a, adj_y=adjoint_b, name=name)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py", line 337, in _batch_mat_mul
    adj_y=adj_y, name=name)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/adb/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): In[0].dim(0) and In[1].dim(0) must be the same: [10599,273,128] vs [1,128,1]
	 [[Node: conv-attention-pathwayA/MatMul = BatchMatMul[T=DT_FLOAT, adj_x=false, adj_y=false, _device="/job:localhost/replica:0/task:0/cpu:0"](word_embedding/embedding_lookup, conv-attention-pathwayA/ExpandDims_1)]]


In [83]:
np.matmul(x_dev, np.expand_dims(np.argmin(test_word_distancesA, axis=1))

ValueError: shapes (10599,273) and (10599,) not aligned: 273 (dim 1) != 10599 (dim 0)

In [86]:
temp = np.argmin(test_word_distancesA, axis=1)
for i in range(x_dev.shape[0]):
    x_dev[i] = x_dev[i]*temp[i]

In [90]:
np.sum(x_dev, axis=1)

array([  3735, 111942, 103635, ...,      0, 812355,      0])

In [126]:
np.sum(x_dev, axis=1)

array([  3735, 111942, 103635, ...,      0, 812355,      0])