diff --git a/CapsE_SEARCH17.py b/CapsE_SEARCH17.py new file mode 100644 index 0000000..e08928e --- /dev/null +++ b/CapsE_SEARCH17.py @@ -0,0 +1,185 @@ +#! /usr/bin/env python + +import tensorflow as tf +import numpy as np +import os +import time +import datetime +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from builddata_ecir import * +from capsuleNet_SEARCH17 import CapsE + +np.random.seed(1234) +tf.set_random_seed(1234) + +# Parameters +# ================================================== +parser = ArgumentParser("CapsE", formatter_class=ArgumentDefaultsHelpFormatter, conflict_handler='resolve') + +parser.add_argument("--data", default="./data/", help="Data sources.") +parser.add_argument("--run_folder", default="./", help="Data sources.") +parser.add_argument("--name", default="ecir", help="Name of the dataset.") + +parser.add_argument("--embedding_dim", default=200, type=int, help="Dimensionality of character embedding (fixed: 200)") +parser.add_argument("--filter_size", default=1, type=int, help="Comma-separated filter sizes (default: '3,4,5')") +parser.add_argument("--num_filters", default=400, type=int, help="Number of filters per filter size (default: 128)") +parser.add_argument("--learning_rate", default=0.00001, type=float, help="Learning rate") +parser.add_argument("--batch_size", default=128, type=int, help="Batch Size") +parser.add_argument("--neg_ratio", default=1.0, help="Number of negative triples generated by positive (default: 1.0)") +parser.add_argument("--useInitialization", default=True, type=bool, help="Using the pretrained embeddings") +parser.add_argument("--num_epochs", default=100, type=int, help="Number of training epochs") +parser.add_argument("--savedEpochs", default=10, type=int, help="") +parser.add_argument("--allow_soft_placement", default=True, type=bool, help="Allow device soft device placement") +parser.add_argument("--log_device_placement", default=False, type=bool, help="Log placement of ops on devices") +parser.add_argument("--model_name", default='search17model', help="") +parser.add_argument("--useConstantInit", action='store_true') + +parser.add_argument('--iter_routing', default=1, type=int, help='number of iterations in routing algorithm') +parser.add_argument('--num_outputs_secondCaps', default=1, type=int, help='') +parser.add_argument('--vec_len_secondCaps', default=10, type=int, help='') + +args = parser.parse_args() +print(args) +# Load data +# Load data +print("Loading data...") + +train_triples, train_rank_triples, train_val_triples, valid_triples, valid_rank_triples, valid_val_triples, \ + test_triples, test_rank_triples, test_val_triples, query_indexes, user_indexes, doc_indexes, \ + indexes_query, indexes_user, indexes_doc = build_data_ecir() +data_size = len(train_triples) +train_batch = Batch_Loader_ecir(train_triples, train_val_triples, batch_size=args.batch_size) + +assert args.embedding_dim % 200 == 0 + +pretrained_query = init_dataset_ecir(args.data + args.name + '/query2vec.200.init') +pretrained_user = init_dataset_ecir(args.data + args.name + '/user2vec.200.init') +pretrained_doc = init_dataset_ecir(args.data + args.name + '/doc2vec.200.init') + +print("Using pre-trained initialization.") + +lstEmbedQuery = assignEmbeddings(pretrained_query, query_indexes) +lstEmbedUser = assignEmbeddings(pretrained_user, user_indexes) +lstEmbedDoc = assignEmbeddings(pretrained_doc, doc_indexes) + +lstEmbedQuery = np.array(lstEmbedQuery, dtype=np.float32) +lstEmbedUser = np.array(lstEmbedUser, dtype=np.float32) +lstEmbedDoc = np.array(lstEmbedDoc, dtype=np.float32) + +print("Loading data... finished!") + +# Training +# ================================================== +with tf.Graph().as_default(): + session_conf = tf.ConfigProto(allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) + session_conf.gpu_options.allow_growth = True + sess = tf.Session(config=session_conf) + with sess.as_default(): + global_step = tf.Variable(0, name="global_step", trainable=False) + capse = CapsE(sequence_length=3, + batch_size=20 * args.batch_size, + initialization=[lstEmbedQuery, lstEmbedUser, lstEmbedDoc], + embedding_size=200, + filter_size=args.filter_size, + num_filters=args.num_filters, + iter_routing=args.iter_routing, + num_outputs_secondCaps=args.num_outputs_secondCaps, + vec_len_secondCaps=args.vec_len_secondCaps, + useConstantInit=args.useConstantInit + ) + + # Define Training procedure + #optimizer = tf.contrib.opt.NadamOptimizer(1e-3) + optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) + #optimizer = tf.train.RMSPropOptimizer(learning_rate=args.learning_rate) + #optimizer = tf.train.GradientDescentOptimizer(learning_rate=args.learning_rate) + grads_and_vars = optimizer.compute_gradients(capse.total_loss) + train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) + + out_dir = os.path.abspath(os.path.join(args.run_folder, "runs_CapsE_SEARCH17", args.model_name)) + print("Writing to {}\n".format(out_dir)) + + checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) + checkpoint_prefix = os.path.join(checkpoint_dir, "model") + if not os.path.exists(checkpoint_dir): + os.makedirs(checkpoint_dir) + # Initialize all variables + sess.run(tf.global_variables_initializer()) + + def train_step(x_batch, y_batch): + """ + A single training step + """ + feed_dict = { + capse.input_x: x_batch, + capse.input_y: y_batch + } + _, step, loss = sess.run([train_op, global_step, capse.total_loss], feed_dict) + return loss + + # Predict function to predict scores for test data + def predict(x_batch, y_batch): + feed_dict = { + capse.input_x: x_batch, + capse.input_y: y_batch, + capse.dropout_keep_prob: 1.0 + } + scores = sess.run([capse.predictions], feed_dict) + return scores + + + def test_prediction(x_batch, y_batch, lstOriginalRank): + + new_x_batch = np.concatenate(x_batch) + new_y_batch = np.concatenate(y_batch, axis=0) + + while len(new_x_batch) % (args.batch_size * 20) != 0: + new_x_batch = np.append(new_x_batch, np.array([new_x_batch[-1]]), axis=0) + new_y_batch = np.append(new_y_batch, np.array([new_y_batch[-1]]), axis=0) + + results = [] + listIndexes = range(0, len(new_x_batch), 20 * args.batch_size) + for tmpIndex in range(len(listIndexes) - 1): + results = np.append(results, + predict(new_x_batch[listIndexes[tmpIndex]:listIndexes[tmpIndex + 1]], + new_y_batch[listIndexes[tmpIndex]:listIndexes[tmpIndex + 1]])) + results = np.append(results, + predict(new_x_batch[listIndexes[-1]:], new_y_batch[listIndexes[-1]:])) + + lstresults = [] + _start = 0 + for tmp in lstOriginalRank: + _end = _start + len(tmp) + lstsorted = np.argsort(results[_start:_end]) + lstresults.append(np.where(lstsorted == 0)[0] + 1) + _start = _end + + return lstresults + + + wri = open(checkpoint_prefix + '.cls.' + '.txt', 'w') + + lstvalid_mrr = [] + lsttest_mrr = [] + num_batches_per_epoch = int((data_size - 1) / (args.batch_size)) + 1 + for epoch in range(args.num_epochs): + for batch_num in range(num_batches_per_epoch): + x_batch, y_batch = train_batch() + train_step(x_batch, y_batch) + current_step = tf.train.global_step(sess, global_step) + + valid_results = test_prediction(valid_triples, valid_val_triples, valid_rank_triples) + test_results = test_prediction(test_triples, test_val_triples, test_rank_triples) + valid_mrr = computeMRR(valid_results) + test_mrr = computeMRR(test_results) + test_p1 = computeP1(test_results) + lstvalid_mrr.append(valid_mrr) + lsttest_mrr.append([test_mrr, test_p1]) + + wri.write("epoch " + str(epoch) + ": " + str(valid_mrr) + " " + str(test_mrr) + " " + str(test_p1) + "\n") + + index_valid_max = np.argmax(lstvalid_mrr) + wri.write("\n--------------------------\n") + wri.write("\nBest mrr in valid at epoch " + str(index_valid_max) + ": " + str(lstvalid_mrr[index_valid_max]) + "\n") + wri.write("\nMRR and P1 in test: " + str(lsttest_mrr[index_valid_max][0]) + " " + str(lsttest_mrr[index_valid_max][1]) + "\n") + wri.close() diff --git a/capsuleNet_SEARCH17.py b/capsuleNet_SEARCH17.py new file mode 100644 index 0000000..e551d97 --- /dev/null +++ b/capsuleNet_SEARCH17.py @@ -0,0 +1,72 @@ +import tensorflow as tf + +from capsuleLayer import CapsLayer +import math + +epsilon = 1e-9 + +class CapsE(object): + def __init__(self, sequence_length, embedding_size, num_filters, iter_routing, batch_size=256, + num_outputs_secondCaps=1, vec_len_secondCaps=10, initialization=[], filter_size=1, useConstantInit=False): + # Placeholders for input, output + self.input_x = tf.placeholder(tf.int32, [batch_size, sequence_length], name="input_x") + self.input_y = tf.placeholder(tf.float32, [batch_size, 1], name="input_y") + self.filter_size = filter_size + self.num_filters = num_filters + self.sequence_length = sequence_length + self.embedding_size = embedding_size + self.iter_routing = iter_routing + self.num_outputs_secondCaps = num_outputs_secondCaps + self.vec_len_secondCaps = vec_len_secondCaps + self.batch_size = batch_size + self.useConstantInit = useConstantInit + # Embedding layer + with tf.name_scope("embedding"): + self.W_query = tf.get_variable(name="W_query", initializer=initialization[0], trainable=False) + self.W_user = tf.get_variable(name="W_user", initializer=initialization[1]) + self.W_doc = tf.get_variable(name="W_doc", initializer=initialization[2], trainable=False) + + self.embedded_query = tf.nn.embedding_lookup(self.W_query, self.input_x[:, 0]) + self.embedded_user = tf.nn.embedding_lookup(self.W_user, self.input_x[:, 1]) + self.embedded_doc = tf.nn.embedding_lookup(self.W_doc, self.input_x[:, 2]) + + self.embedded_query = tf.reshape(self.embedded_query, [batch_size, 1, self.embedding_size]) + self.embedded_user = tf.reshape(self.embedded_user, [batch_size, 1, self.embedding_size]) + self.embedded_doc = tf.reshape(self.embedded_doc, [batch_size, 1, self.embedding_size]) + + self.embedded_chars = tf.concat([self.embedded_query, self.embedded_user, self.embedded_doc], axis=1) + self.X = tf.expand_dims(self.embedded_chars, -1) + + self.build_arch() + self.loss() + self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=500) + + tf.logging.info('Seting up the main structure') + + def build_arch(self): + #The first capsule layer + with tf.variable_scope('FirstCaps_layer'): + self.firstCaps = CapsLayer(num_outputs_secondCaps=self.num_outputs_secondCaps, vec_len_secondCaps=self.vec_len_secondCaps, + with_routing=False, layer_type='CONV', embedding_size=self.embedding_size, + batch_size=self.batch_size, iter_routing=self.iter_routing, + useConstantInit=self.useConstantInit, filter_size=self.filter_size, + num_filters=self.num_filters, sequence_length=self.sequence_length) + + self.caps1 = self.firstCaps(self.X, kernel_size=1, stride=1) + #The second capsule layer + with tf.variable_scope('SecondCaps_layer'): + self.secondCaps = CapsLayer(num_outputs_secondCaps=self.num_outputs_secondCaps, vec_len_secondCaps=self.vec_len_secondCaps, + with_routing=True, layer_type='FC', + batch_size=self.batch_size, iter_routing=self.iter_routing, + embedding_size=self.embedding_size, useConstantInit=self.useConstantInit, filter_size=self.filter_size, + num_filters=self.num_filters, sequence_length=self.sequence_length) + self.caps2 = self.secondCaps(self.caps1) + + self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) + + def loss(self): + self.scores = tf.reshape(self.v_length, [self.batch_size, 1]) + self.predictions = tf.nn.sigmoid(self.scores) + print("Using square softplus loss") + losses = tf.square(tf.nn.softplus(self.scores * self.input_y)) + self.total_loss = tf.reduce_mean(losses)