Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
daiquocnguyen
committed
Jun 4, 2019
1 parent
a1d1e9f
commit f10d825
Showing
2 changed files
with
257 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
#! /usr/bin/env python | ||
|
||
import tensorflow as tf | ||
import numpy as np | ||
import os | ||
import time | ||
import datetime | ||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | ||
from builddata_ecir import * | ||
from capsuleNet_SEARCH17 import CapsE | ||
|
||
np.random.seed(1234) | ||
tf.set_random_seed(1234) | ||
|
||
# Parameters | ||
# ================================================== | ||
parser = ArgumentParser("CapsE", formatter_class=ArgumentDefaultsHelpFormatter, conflict_handler='resolve') | ||
|
||
parser.add_argument("--data", default="./data/", help="Data sources.") | ||
parser.add_argument("--run_folder", default="./", help="Data sources.") | ||
parser.add_argument("--name", default="ecir", help="Name of the dataset.") | ||
|
||
parser.add_argument("--embedding_dim", default=200, type=int, help="Dimensionality of character embedding (fixed: 200)") | ||
parser.add_argument("--filter_size", default=1, type=int, help="Comma-separated filter sizes (default: '3,4,5')") | ||
parser.add_argument("--num_filters", default=400, type=int, help="Number of filters per filter size (default: 128)") | ||
parser.add_argument("--learning_rate", default=0.00001, type=float, help="Learning rate") | ||
parser.add_argument("--batch_size", default=128, type=int, help="Batch Size") | ||
parser.add_argument("--neg_ratio", default=1.0, help="Number of negative triples generated by positive (default: 1.0)") | ||
parser.add_argument("--useInitialization", default=True, type=bool, help="Using the pretrained embeddings") | ||
parser.add_argument("--num_epochs", default=100, type=int, help="Number of training epochs") | ||
parser.add_argument("--savedEpochs", default=10, type=int, help="") | ||
parser.add_argument("--allow_soft_placement", default=True, type=bool, help="Allow device soft device placement") | ||
parser.add_argument("--log_device_placement", default=False, type=bool, help="Log placement of ops on devices") | ||
parser.add_argument("--model_name", default='search17model', help="") | ||
parser.add_argument("--useConstantInit", action='store_true') | ||
|
||
parser.add_argument('--iter_routing', default=1, type=int, help='number of iterations in routing algorithm') | ||
parser.add_argument('--num_outputs_secondCaps', default=1, type=int, help='') | ||
parser.add_argument('--vec_len_secondCaps', default=10, type=int, help='') | ||
|
||
args = parser.parse_args() | ||
print(args) | ||
# Load data | ||
# Load data | ||
print("Loading data...") | ||
|
||
train_triples, train_rank_triples, train_val_triples, valid_triples, valid_rank_triples, valid_val_triples, \ | ||
test_triples, test_rank_triples, test_val_triples, query_indexes, user_indexes, doc_indexes, \ | ||
indexes_query, indexes_user, indexes_doc = build_data_ecir() | ||
data_size = len(train_triples) | ||
train_batch = Batch_Loader_ecir(train_triples, train_val_triples, batch_size=args.batch_size) | ||
|
||
assert args.embedding_dim % 200 == 0 | ||
|
||
pretrained_query = init_dataset_ecir(args.data + args.name + '/query2vec.200.init') | ||
pretrained_user = init_dataset_ecir(args.data + args.name + '/user2vec.200.init') | ||
pretrained_doc = init_dataset_ecir(args.data + args.name + '/doc2vec.200.init') | ||
|
||
print("Using pre-trained initialization.") | ||
|
||
lstEmbedQuery = assignEmbeddings(pretrained_query, query_indexes) | ||
lstEmbedUser = assignEmbeddings(pretrained_user, user_indexes) | ||
lstEmbedDoc = assignEmbeddings(pretrained_doc, doc_indexes) | ||
|
||
lstEmbedQuery = np.array(lstEmbedQuery, dtype=np.float32) | ||
lstEmbedUser = np.array(lstEmbedUser, dtype=np.float32) | ||
lstEmbedDoc = np.array(lstEmbedDoc, dtype=np.float32) | ||
|
||
print("Loading data... finished!") | ||
|
||
# Training | ||
# ================================================== | ||
with tf.Graph().as_default(): | ||
session_conf = tf.ConfigProto(allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) | ||
session_conf.gpu_options.allow_growth = True | ||
sess = tf.Session(config=session_conf) | ||
with sess.as_default(): | ||
global_step = tf.Variable(0, name="global_step", trainable=False) | ||
capse = CapsE(sequence_length=3, | ||
batch_size=20 * args.batch_size, | ||
initialization=[lstEmbedQuery, lstEmbedUser, lstEmbedDoc], | ||
embedding_size=200, | ||
filter_size=args.filter_size, | ||
num_filters=args.num_filters, | ||
iter_routing=args.iter_routing, | ||
num_outputs_secondCaps=args.num_outputs_secondCaps, | ||
vec_len_secondCaps=args.vec_len_secondCaps, | ||
useConstantInit=args.useConstantInit | ||
) | ||
|
||
# Define Training procedure | ||
#optimizer = tf.contrib.opt.NadamOptimizer(1e-3) | ||
optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) | ||
#optimizer = tf.train.RMSPropOptimizer(learning_rate=args.learning_rate) | ||
#optimizer = tf.train.GradientDescentOptimizer(learning_rate=args.learning_rate) | ||
grads_and_vars = optimizer.compute_gradients(capse.total_loss) | ||
train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) | ||
|
||
out_dir = os.path.abspath(os.path.join(args.run_folder, "runs_CapsE_SEARCH17", args.model_name)) | ||
print("Writing to {}\n".format(out_dir)) | ||
|
||
checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) | ||
checkpoint_prefix = os.path.join(checkpoint_dir, "model") | ||
if not os.path.exists(checkpoint_dir): | ||
os.makedirs(checkpoint_dir) | ||
# Initialize all variables | ||
sess.run(tf.global_variables_initializer()) | ||
|
||
def train_step(x_batch, y_batch): | ||
""" | ||
A single training step | ||
""" | ||
feed_dict = { | ||
capse.input_x: x_batch, | ||
capse.input_y: y_batch | ||
} | ||
_, step, loss = sess.run([train_op, global_step, capse.total_loss], feed_dict) | ||
return loss | ||
|
||
# Predict function to predict scores for test data | ||
def predict(x_batch, y_batch): | ||
feed_dict = { | ||
capse.input_x: x_batch, | ||
capse.input_y: y_batch, | ||
capse.dropout_keep_prob: 1.0 | ||
} | ||
scores = sess.run([capse.predictions], feed_dict) | ||
return scores | ||
|
||
|
||
def test_prediction(x_batch, y_batch, lstOriginalRank): | ||
|
||
new_x_batch = np.concatenate(x_batch) | ||
new_y_batch = np.concatenate(y_batch, axis=0) | ||
|
||
while len(new_x_batch) % (args.batch_size * 20) != 0: | ||
new_x_batch = np.append(new_x_batch, np.array([new_x_batch[-1]]), axis=0) | ||
new_y_batch = np.append(new_y_batch, np.array([new_y_batch[-1]]), axis=0) | ||
|
||
results = [] | ||
listIndexes = range(0, len(new_x_batch), 20 * args.batch_size) | ||
for tmpIndex in range(len(listIndexes) - 1): | ||
results = np.append(results, | ||
predict(new_x_batch[listIndexes[tmpIndex]:listIndexes[tmpIndex + 1]], | ||
new_y_batch[listIndexes[tmpIndex]:listIndexes[tmpIndex + 1]])) | ||
results = np.append(results, | ||
predict(new_x_batch[listIndexes[-1]:], new_y_batch[listIndexes[-1]:])) | ||
|
||
lstresults = [] | ||
_start = 0 | ||
for tmp in lstOriginalRank: | ||
_end = _start + len(tmp) | ||
lstsorted = np.argsort(results[_start:_end]) | ||
lstresults.append(np.where(lstsorted == 0)[0] + 1) | ||
_start = _end | ||
|
||
return lstresults | ||
|
||
|
||
wri = open(checkpoint_prefix + '.cls.' + '.txt', 'w') | ||
|
||
lstvalid_mrr = [] | ||
lsttest_mrr = [] | ||
num_batches_per_epoch = int((data_size - 1) / (args.batch_size)) + 1 | ||
for epoch in range(args.num_epochs): | ||
for batch_num in range(num_batches_per_epoch): | ||
x_batch, y_batch = train_batch() | ||
train_step(x_batch, y_batch) | ||
current_step = tf.train.global_step(sess, global_step) | ||
|
||
valid_results = test_prediction(valid_triples, valid_val_triples, valid_rank_triples) | ||
test_results = test_prediction(test_triples, test_val_triples, test_rank_triples) | ||
valid_mrr = computeMRR(valid_results) | ||
test_mrr = computeMRR(test_results) | ||
test_p1 = computeP1(test_results) | ||
lstvalid_mrr.append(valid_mrr) | ||
lsttest_mrr.append([test_mrr, test_p1]) | ||
|
||
wri.write("epoch " + str(epoch) + ": " + str(valid_mrr) + " " + str(test_mrr) + " " + str(test_p1) + "\n") | ||
|
||
index_valid_max = np.argmax(lstvalid_mrr) | ||
wri.write("\n--------------------------\n") | ||
wri.write("\nBest mrr in valid at epoch " + str(index_valid_max) + ": " + str(lstvalid_mrr[index_valid_max]) + "\n") | ||
wri.write("\nMRR and P1 in test: " + str(lsttest_mrr[index_valid_max][0]) + " " + str(lsttest_mrr[index_valid_max][1]) + "\n") | ||
wri.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import tensorflow as tf | ||
|
||
from capsuleLayer import CapsLayer | ||
import math | ||
|
||
epsilon = 1e-9 | ||
|
||
class CapsE(object): | ||
def __init__(self, sequence_length, embedding_size, num_filters, iter_routing, batch_size=256, | ||
num_outputs_secondCaps=1, vec_len_secondCaps=10, initialization=[], filter_size=1, useConstantInit=False): | ||
# Placeholders for input, output | ||
self.input_x = tf.placeholder(tf.int32, [batch_size, sequence_length], name="input_x") | ||
self.input_y = tf.placeholder(tf.float32, [batch_size, 1], name="input_y") | ||
self.filter_size = filter_size | ||
self.num_filters = num_filters | ||
self.sequence_length = sequence_length | ||
self.embedding_size = embedding_size | ||
self.iter_routing = iter_routing | ||
self.num_outputs_secondCaps = num_outputs_secondCaps | ||
self.vec_len_secondCaps = vec_len_secondCaps | ||
self.batch_size = batch_size | ||
self.useConstantInit = useConstantInit | ||
# Embedding layer | ||
with tf.name_scope("embedding"): | ||
self.W_query = tf.get_variable(name="W_query", initializer=initialization[0], trainable=False) | ||
self.W_user = tf.get_variable(name="W_user", initializer=initialization[1]) | ||
self.W_doc = tf.get_variable(name="W_doc", initializer=initialization[2], trainable=False) | ||
|
||
self.embedded_query = tf.nn.embedding_lookup(self.W_query, self.input_x[:, 0]) | ||
self.embedded_user = tf.nn.embedding_lookup(self.W_user, self.input_x[:, 1]) | ||
self.embedded_doc = tf.nn.embedding_lookup(self.W_doc, self.input_x[:, 2]) | ||
|
||
self.embedded_query = tf.reshape(self.embedded_query, [batch_size, 1, self.embedding_size]) | ||
self.embedded_user = tf.reshape(self.embedded_user, [batch_size, 1, self.embedding_size]) | ||
self.embedded_doc = tf.reshape(self.embedded_doc, [batch_size, 1, self.embedding_size]) | ||
|
||
self.embedded_chars = tf.concat([self.embedded_query, self.embedded_user, self.embedded_doc], axis=1) | ||
self.X = tf.expand_dims(self.embedded_chars, -1) | ||
|
||
self.build_arch() | ||
self.loss() | ||
self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=500) | ||
|
||
tf.logging.info('Seting up the main structure') | ||
|
||
def build_arch(self): | ||
#The first capsule layer | ||
with tf.variable_scope('FirstCaps_layer'): | ||
self.firstCaps = CapsLayer(num_outputs_secondCaps=self.num_outputs_secondCaps, vec_len_secondCaps=self.vec_len_secondCaps, | ||
with_routing=False, layer_type='CONV', embedding_size=self.embedding_size, | ||
batch_size=self.batch_size, iter_routing=self.iter_routing, | ||
useConstantInit=self.useConstantInit, filter_size=self.filter_size, | ||
num_filters=self.num_filters, sequence_length=self.sequence_length) | ||
|
||
self.caps1 = self.firstCaps(self.X, kernel_size=1, stride=1) | ||
#The second capsule layer | ||
with tf.variable_scope('SecondCaps_layer'): | ||
self.secondCaps = CapsLayer(num_outputs_secondCaps=self.num_outputs_secondCaps, vec_len_secondCaps=self.vec_len_secondCaps, | ||
with_routing=True, layer_type='FC', | ||
batch_size=self.batch_size, iter_routing=self.iter_routing, | ||
embedding_size=self.embedding_size, useConstantInit=self.useConstantInit, filter_size=self.filter_size, | ||
num_filters=self.num_filters, sequence_length=self.sequence_length) | ||
self.caps2 = self.secondCaps(self.caps1) | ||
|
||
self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) | ||
|
||
def loss(self): | ||
self.scores = tf.reshape(self.v_length, [self.batch_size, 1]) | ||
self.predictions = tf.nn.sigmoid(self.scores) | ||
print("Using square softplus loss") | ||
losses = tf.square(tf.nn.softplus(self.scores * self.input_y)) | ||
self.total_loss = tf.reduce_mean(losses) |