## QALSTMCNN

In [4]:
dataset = tf.data.TextLineDataset(["a"])
dataset.map()

In [None]:
tf.train.Saver()

In [5]:
import sys
sys.path.append("/home/angrypark/korean-text-matching-tf/")

In [1]:
import os
import tensorflow as tf
import numpy as np
from gensim.models import FastText

from utils.utils import JamoProcessor
from models.base import BaseModel

ImportError: No module named 'utils'

In [2]:
optimizer = tf.train.AdamOptimizer(1e-3)

In [None]:
optimizer.apply_gradients()

In [7]:
def get_embeddings(vocab_list_dir, 
                   pretrained_embed_dir, 
                   vocab_size, 
                   embed_dim):
    embedding = np.random.uniform(-1/16, 1/16, [vocab_size, embed_dim])
    if os.path.isfile(pretrained_embed_dir) & os.path.isfile(vocab_list_dir):
        with open(vocab_list_dir, "r") as f:
            vocab_list = [word.strip() for word in f if len(word)>0]
        processor = JamoProcessor()
        ft = FastText.load(pretrained_embed_dir)
        num_oov = 0
        for i, vocab in enumerate(vocab_list):
            try:
                embedding[i, :] = ft.wv[processor.word_to_jamo(vocab)]
            except:
                num_oov += 1
        print("Pre-trained embedding loaded. Number of OOV : {} / {}".format(num_oov, len(vocab_list)))
    else:
        print("No pre-trained embedding found, initialize with random distribution")
    return embedding

def make_negative_mask(distances, num_negative_samples, method="random"):
    cur_batch_length = tf.shape(distances)[0]
    if method == "random":
        topk = tf.contrib.framework.sort(tf.nn.top_k(tf.random_uniform([cur_batch_length, cur_batch_length]), k=num_negative_samples).indices, axis=1)
        rows = tf.transpose(tf.reshape(tf.tile(tf.range(cur_batch_length), [num_negative_samples]), [num_negative_samples, cur_batch_length]))
        indices = tf.to_int64(tf.reshape(tf.concat([tf.expand_dims(rows, -1), tf.expand_dims(topk, -1)], axis=2), [num_negative_samples*cur_batch_length, 2]))
        mask = tf.sparse_to_dense(sparse_indices=indices, 
                                  output_shape=[tf.to_int64(cur_batch_length), tf.to_int64(cur_batch_length)], 
                                  sparse_values=tf.ones([(num_negative_samples*cur_batch_length)], 1))
        
        # drop positive
        mask = tf.multiply(mask, (1- tf.eye(cur_batch_length)))
        
    elif method == "hard":
        topk = tf.contrib.framework.sort(tf.nn.top_k(distances, k=num_negative_samples+1).indices, axis=1)
        rows = tf.transpose(tf.reshape(tf.tile(tf.range(cur_batch_length), [num_negative_samples+1]), [num_negative_samples+1, cur_batch_length]))
        indices = tf.to_int64(tf.reshape(tf.concat([tf.expand_dims(rows, -1), tf.expand_dims(topk, -1)], axis=2), [(num_negative_samples+1)*cur_batch_length, 2]))
        mask = tf.sparse_to_dense(sparse_indices=indices, 
                                  output_shape=[tf.to_int64(cur_batch_length), tf.to_int64(cur_batch_length)], 
                                  sparse_values=tf.ones([((num_negative_samples+1)*cur_batch_length)], 1))
        # drop positive
        mask = tf.multiply(mask, (1- tf.eye(cur_batch_length)))
        
    elif method == "weighted":
        weight = tf.map_fn(lambda x: get_distance_weight(x, batch_size), tf.to_float(distances))
        mask = weight
#         mask = tf.to_int32(tf.contrib.framework.sort(tf.expand_dims(tf.multinomial(weight, num_negative_samples+1), -1), axis=1))
#         weighted_samples_indices = tf.to_int32(tf.contrib.framework.sort(tf.expand_dims(tf.multinomial(weight, num_negative_samples+1), -1), axis=1))
#         row_indices = tf.expand_dims(tf.transpose(tf.reshape(tf.tile(tf.range(0, batch_size, 1), [num_negative_samples+1]), [num_negative_samples+1, batch_size])), -1)
#         mask_indices = tf.to_int64(tf.squeeze(tf.reshape(tf.concat([row_indices, weighted_samples_indices], 2), [(num_negative_samples+1)*batch_size,1,2])))
#         mask_sparse = tf.SparseTensor(mask_indices, [1]*((num_negative_samples+1)*batch_size), [batch_size,batch_size])
#         mask = tf.sparse_tensor_to_dense(mask_sparse)
#         drop_positive = tf.to_int32(tf.subtract(tf.ones([batch_size, batch_size]), tf.eye(batch_size)))
#         mask = tf.multiply(mask, drop_positive)

    return mask

In [None]:
class QALSTMCNN(BaseModel):
    def __init__(self, data, config, mode="train"):
        super(QALSTMCNN, self).__init__(data, config)
        self.mode = mode
        self.build_model()
        self.init_saver()
        
    def build_model(self):
        # Build index table
        index_table = tf.contrib.lookup.index_table_from_file(
            vocabulary_file=self.config.vocab_list, 
            num_oov_buckets=0, 
            default_value=0)
        
        self.data_iterator = self.data.get_train_iterator(index_table) if self.mode=="train" else self.data.get_val_iterator(index_table)
        
        with tf.variable_scope("inputs"):
            # Placeholders for input, output
            input_queries, input_replies, queries_lengths, replies_lengths = self.data_iterator.get_next()
            self.input_queries = tf.placeholder_with_default(input_queries, [None, self.config.max_length], name="input_queries")
            self.input_replies = tf.placeholder_with_default(input_replies, [None, self.config.max_length], name="input_replies")

            self.queries_lengths = tf.placeholder_with_default(queries_lengths, [None], name="queries_length")
            self.replies_lengths = tf.placeholder_with_default(replies_lengths, [None], name="replies_length")
            
            self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
            self.num_negative_samples = tf.placeholder(tf.int32, name="num_negative_samples")
        
        cur_batch_length = tf.shape(self.input_queries)[0]
        
        # Define learning_rate and optimizer
        learning_rate = tf.train.exponential_decay(self.config.learning_rate, 
                                                   self.global_step_tensor, 
                                                   decay_steps=50000, 
                                                   decay_rate=0.96, 
                                                   staircase=True)
        self.optimizer = tf.train.AdamOptimizer(learning_rate)
        
        # Embedding Layer
        with tf.variable_scope("embedding"):
            embeddings = tf.Variable(get_embeddings(self.config.vocab_list, 
                                                    self.config.pretrained_embed_dir, 
                                                    self.config.vocab_size, 
                                                    self.config.embed_dim),
                                     trainable=True, 
                                     name="embeddings")
            queries_embedded = tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")
            replies_embedded = tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")
            queries_embedded, replies_embedded = tf.cast(queries_embedded, tf.float32), tf.cast(replies_embedded, tf.float32)

        # Build LSTM Layer
        query_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, 
                                                     use_peepholes=True,
                                                     name="query_fw")
        query_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, 
                                                     use_peepholes=True,
                                                     name="query_bw")
        query_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, 
                                                     use_peepholes=True,
                                                     name="reply_fw")
        query_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, 
                                                     use_peepholes=True,
                                                     name="reply_bw")
        

In [None]:
tf.nn.rnn_cell.LSTMCell()

In [None]:
tf.nn.dynamic_rnn()