In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import tensorflow as tf
from config import Config
import os

from embeddings import load_embeddings

from data_utils import QuoraDataset, DataIterator

from tqdm import tqdm

  return f(*args, **kwds)
Using TensorFlow backend.


In [3]:
config = Config()
loaded_embeddings, (w2idx, idx2w) = load_embeddings(config.glove_filename, binary=False)

Loading from saved word_embeddings
Loading vocab


In [4]:
max_pool = tf.contrib.keras.layers.GlobalMaxPool1D()
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True

class AutoEncoder(object):
    def __init__(self, config, embeddings):
        self.config = config
        self.embeddings = embeddings
        self.vocab_size = len(embeddings)
    
    
    def bilstm(self, seq, seq_len):
        cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.hidden_size)
        cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.hidden_size)
        (output_fw, output_bw), state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, seq, sequence_length=seq_len,
                                                                    dtype=tf.float32)
        output = tf.concat([output_fw, output_bw], axis=-1)
        return output, state

    def lstm(self, seq, seq_len):
        cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.hidden_size)
        output, state = tf.nn.dynamic_rnn(cell_fw, seq, sequence_length=seq_len, dtype=tf.float32)
        return output, state

#     def decode(self, code, initial_state):
#         decoder_output_list = []
#         cell_fw = tf.nn.rnn_cell.LSTMCell(2 * self.config.hidden_size)
#         print(code)
#         initial_states = []
#         for i in range(self.config.batch_size):
#             initial_states.append((initial_state.c[i], initial_state.h[i]))        
        
# #         output, _ = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_inputs=dec_input, initial_state=initial_state, cell=cell_fw)
#         for i, init in enumerate(initial_states):
#             if i > 0:
#                 tf.get_variable_scope().reuse_variables()
#             dec_input = [tf.zeros([1, self.config.hidden_size])]
#             decoder_output, _ = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_inputs=dec_input, initial_state=init, cell=cell_fw)
#             decoder_output_list.append(tf.stack(decoder_output, axis=1))
            
#         return decoder_output_list
    
    
    def decode(self, initial_state):
        cell_fw = tf.nn.rnn_cell.LSTMCell(2 * self.config.hidden_size)
        dec_inputs = [tf.zeros(2 * self.config.hidden_size) for _ in range(self.config.batch_size)]
        dec_inputs = tf.zeros([self.config.batch_size, self.config.padlen, 1])
        output, state = tf.nn.dynamic_rnn(cell_fw, dec_inputs, initial_state=initial_state)
        
        return output, state

    def build(self):
        ### Placeholders
        self.q1 = tf.placeholder(tf.int64, shape=[self.config.batch_size, None], name="question1")
        self.l1 = tf.placeholder(tf.int64, shape=[self.config.batch_size], name="len1")

        self.dropout = tf.placeholder(dtype=tf.float32, shape=[], name="dropout")
        self.lr = tf.placeholder(dtype=tf.float32, shape=[], name="lr")

        ### Embedding layer
        with tf.variable_scope("word_embeddings") as scope:
            _word_embeddings = tf.Variable(self.embeddings, name="_word_embeddings", dtype=tf.float32,
                                           trainable=self.config.train_embeddings)
            we1 = tf.nn.embedding_lookup(_word_embeddings, self.q1, name="q1_embedded")

            we1 = tf.nn.dropout(we1, keep_prob=1 - self.dropout)

        ### Shared layer
        with tf.variable_scope("bilstm") as scope:
            lstm1, state1 = self.bilstm(we1, self.l1)
        
        state = tf.contrib.rnn.LSTMStateTuple(tf.concat([state1[0].c, state1[1].c], axis=1), tf.concat([state1[0].h, state1[1].h], axis=1))
        
        ### Decoder
        decoded, _ = self.decode(state)
        
        ### logits
        with tf.variable_scope("linear_projection") as scope:
            W = tf.Variable(tf.random_normal([1, 2 * self.config.hidden_size, self.vocab_size], stddev=1e-3), name="w")
            self.logits = tf.nn.conv1d(decoded, W, 1, "VALID", name="logits")

            ### Loss
        losses_mask = tf.sequence_mask(lengths=self.l1, maxlen=self.config.padlen, dtype=tf.float32)
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.q1, logits=self.logits)
        
        self.cross_entropy = tf.reduce_sum(losses * losses_mask) / tf.reduce_sum(losses_mask)
        
        ### Optimizer
        with tf.variable_scope("train_step") as scope:
            if self.config.lr_method == "adam":
                optimizer = tf.train.AdamOptimizer(self.lr)
            elif self.config.lr_method == "sgd":
                optimizer = tf.train.GradientDescentOptimizer(self.lr)

            self.train_step = optimizer.minimize(self.cross_entropy)

        ### Evaluation
        correct_prediction = tf.equal(tf.argmax(self.logits, axis=-1), self.q1)
        
        self.accuracy = tf.reduce_sum(tf.cast(correct_prediction, tf.float32) * losses_mask) / tf.reduce_sum(losses_mask)

        ### Init
        self.init = tf.global_variables_initializer()

        ### Summaries
        with tf.variable_scope("summaries") as scope:

            # train
            tf.summary.scalar('cross_entropy', self.cross_entropy)
            tf.summary.scalar('accuracy', self.accuracy)
            self.merged = tf.summary.merge_all()

            # test
            self.acc_value = tf.placeholder_with_default(tf.constant(0.0), shape=())
            self.ce_value = tf.placeholder_with_default(tf.constant(0.0), shape=())
            acc_summary = tf.summary.scalar('accuracy', self.acc_value)
            ce_summary = tf.summary.scalar('cross_entropy', self.ce_value)
            self.merged_eval = tf.summary.merge([acc_summary, ce_summary])

        self.saver = tf.train.Saver()
        
    def run_epoch(self, sess, train, dev, test, epoch, lr, test_step=1000):
        iterator = DataIterator(train, self.config.batch_size, strict=1)
        nbatches = (iterator.max + self.config.batch_size - 1) // self.config.batch_size
        dev_acc = 0

        accuracy, cross = 0, 0
        for i in tqdm(range(nbatches)):
            q1, q2, l1, l2, y = iterator.__next__()
            fd = {self.q1: q1, self.l1: l1, self.dropout: self.config.dropout, self.lr: lr}
            _, summary = sess.run([self.train_step, self.merged], feed_dict=fd)

            # tensorboard
            if i % 10 == 0:
                self.train_writer.add_summary(summary, epoch * nbatches + i)

            if i % test_step == 0:
                summary, dev_acc = self.run_evaluate(sess, dev)
                self.dev_writer.add_summary(summary, epoch * nbatches + i)
                print("Step {}/{}".format(i, nbatches))
                print("dev acc {:04.2f}".format(100 * dev_acc))

                summary, test_acc = self.run_evaluate(sess, test)
                self.test_writer.add_summary(summary, epoch * nbatches + i)
                print("test acc {:04.2f}".format(100 * test_acc))

        return dev_acc
    
    def run_evaluate(self, sess, data):
        iterator = DataIterator(data, self.config.batch_size, strict=1)
        nbatches = (iterator.max + self.config.batch_size - 1) // self.config.batch_size

        accuracy, cross = 0, 0
        for i in range(nbatches):
            q1, q2, l1, l2, y = iterator.__next__()
            fd = {self.q1: q1, self.l1: l1, self.dropout: 0, self.lr: 0}
            acc, ce = sess.run([self.accuracy, self.cross_entropy], feed_dict=fd)

            accuracy += acc * len(q1)
            cross += ce * len(q1)

        accuracy /= iterator.max
        cross /= iterator.max

        summary = sess.run(self.merged_eval, feed_dict={self.acc_value: accuracy, self.ce_value: cross})

        return summary, accuracy

    def train(self, train_data, dev_data, test_data):

        best_acc = 0
        nepoch_no_improv = 0

        with tf.Session(config=tf_config) as sess:
            self.train_writer = tf.summary.FileWriter(self.config.log_path + "autoencoder/" + "train", sess.graph)
            self.dev_writer = tf.summary.FileWriter(self.config.log_path + "autoencoder/" + "dev", sess.graph)
            self.test_writer = tf.summary.FileWriter(self.config.log_path + "autoencoder/" + "test", sess.graph)

            sess.run(self.init)

            lr = self.config.lr

            print("Training in {}".format(self.config.conf_dir))
            for epoch in range(self.config.n_epochs):
                print("Epoch {}/{} :".format(epoch + 1, self.config.n_epochs))
                dev_acc = self.run_epoch(sess, train_data, dev_data, test_data, epoch, lr)

                lr *= self.config.lr_decay

                if dev_acc > best_acc:
                    nepoch_no_improv = 0
                    if not os.path.exists(self.config.model_path):
                        os.makedirs(self.config.model_path)
                    self.saver.save(sess, self.config.model_path)
                    best_acc = dev_acc
                    print("New best score on dev !")

                else:
                    lr /= self.config.lr_divide
                    nepoch_no_improv += 1
                    if nepoch_no_improv >= self.config.nepochs_no_improv:
                        print("Early stopping after {} epochs without improvements".format(nepoch_no_improv))
                        break

In [5]:
### Loading Quora Datasets
qd_train = QuoraDataset(config.train_filename, save_path=config.train_save)
w2idx_train, idx2w_train = qd_train.w2idx, qd_train.idx2w

embeddings = np.random.normal(scale=0.001, size=(len(w2idx_train), config.we_dim))

In [6]:
for w, i in w2idx_train.items():
    idx = w2idx.get(w)
    if idx is not None:
        embeddings[i] = loaded_embeddings[idx]

In [7]:
qd_dev  = QuoraDataset(config.dev_filename, w2idx=w2idx_train, save_path=config.dev_save)
qd_test = QuoraDataset(config.test_filename, w2idx=w2idx_train, save_path=config.test_save)

In [8]:
train_data = qd_train.data(padlen=config.padlen)
dev_data = qd_dev.data(padlen=config.padlen)
test_data = qd_test.data(padlen=config.padlen)

In [9]:
ae = AutoEncoder(config, embeddings)

ae.build()

ae.train(train_data, dev_data, test_data)

  0%|          | 0/24018 [00:00<?, ?it/s]

Training in hid-128_feats-dist_lr-adam-0.001-relu_bs-16_drop-0.0_bn-1_emb-0_padlen-40/
Epoch 1/10 :
Step 0/24018
dev acc 5.12


  0%|          | 3/24018 [00:56<264:50:05, 39.70s/it]

test acc 5.21


  4%|▍         | 999/24018 [02:30<57:38,  6.66it/s]  

Step 1000/24018
dev acc 26.62


  4%|▍         | 1003/24018 [03:08<36:31:43,  5.71s/it]

test acc 26.87


  8%|▊         | 1999/24018 [04:40<51:26,  7.13it/s]   

Step 2000/24018
dev acc 35.52


  8%|▊         | 2003/24018 [05:19<34:53:24,  5.71s/it]

test acc 35.69


 12%|█▏        | 2999/24018 [06:50<47:55,  7.31it/s]   

Step 3000/24018
dev acc 40.59


 13%|█▎        | 3003/24018 [07:31<33:28:25,  5.73s/it]

test acc 40.79


 17%|█▋        | 3999/24018 [09:00<45:05,  7.40it/s]   

Step 4000/24018
dev acc 44.47


 17%|█▋        | 4003/24018 [09:42<32:00:47,  5.76s/it]

test acc 44.85


 21%|██        | 5000/24018 [11:10<42:30,  7.46it/s]   

Step 5000/24018
dev acc 48.01


 21%|██        | 5003/24018 [11:53<60:22:27, 11.43s/it]

test acc 48.38


 25%|██▍       | 5999/24018 [13:20<40:04,  7.49it/s]   

Step 6000/24018
dev acc 50.52


 25%|██▍       | 6003/24018 [14:04<28:48:16,  5.76s/it]

test acc 50.82


 29%|██▉       | 6999/24018 [15:40<38:07,  7.44it/s]   

Step 7000/24018
dev acc 52.79


 29%|██▉       | 7003/24018 [16:15<27:16:21,  5.77s/it]

test acc 53.11


 33%|███▎      | 7999/24018 [17:50<35:44,  7.47it/s]   

Step 8000/24018
dev acc 54.57


 33%|███▎      | 8003/24018 [18:26<25:34:01,  5.75s/it]

test acc 54.96


 37%|███▋      | 8999/24018 [20:00<33:24,  7.49it/s]   

Step 9000/24018
dev acc 56.01


 37%|███▋      | 9003/24018 [20:38<24:02:30,  5.76s/it]

test acc 56.52


 42%|████▏     | 9999/24018 [22:10<31:06,  7.51it/s]   

Step 10000/24018
dev acc 57.43


 42%|████▏     | 10003/24018 [22:49<22:23:43,  5.75s/it]

test acc 57.98


 46%|████▌     | 10999/24018 [24:21<28:49,  7.53it/s]   

Step 11000/24018
dev acc 58.92


 46%|████▌     | 11003/24018 [25:00<20:51:19,  5.77s/it]

test acc 59.55


 50%|████▉     | 11999/24018 [26:31<26:33,  7.54it/s]   

Step 12000/24018
dev acc 59.90


 50%|████▉     | 12003/24018 [27:11<19:12:56,  5.76s/it]

test acc 60.51


 54%|█████▍    | 12999/24018 [28:41<24:19,  7.55it/s]   

Step 13000/24018
dev acc 60.96


 54%|█████▍    | 13003/24018 [29:25<18:24:33,  6.02s/it]

test acc 61.48


 58%|█████▊    | 13999/24018 [31:01<22:12,  7.52it/s]   

Step 14000/24018
dev acc 61.92


 58%|█████▊    | 14003/24018 [31:36<15:57:18,  5.74s/it]

test acc 62.43


 62%|██████▏   | 14999/24018 [33:11<19:57,  7.53it/s]   

Step 15000/24018
dev acc 63.01


 62%|██████▏   | 15003/24018 [33:46<14:21:27,  5.73s/it]

test acc 63.60


 67%|██████▋   | 15999/24018 [35:21<17:43,  7.54it/s]   

Step 16000/24018
dev acc 63.72


 67%|██████▋   | 16003/24018 [35:57<12:50:23,  5.77s/it]

test acc 64.14


 71%|███████   | 16999/24018 [37:31<15:29,  7.55it/s]   

Step 17000/24018
dev acc 64.56


 71%|███████   | 17003/24018 [38:08<11:14:09,  5.77s/it]

test acc 64.96


 75%|███████▍  | 17999/24018 [39:41<13:16,  7.56it/s]   

Step 18000/24018
dev acc 65.20


 75%|███████▍  | 18003/24018 [40:19<9:37:03,  5.76s/it] 

test acc 65.66


 79%|███████▉  | 18999/24018 [41:51<11:03,  7.56it/s]  

Step 19000/24018
dev acc 65.83


 79%|███████▉  | 19003/24018 [42:30<8:01:06,  5.76s/it] 

test acc 66.24


 83%|████████▎ | 19999/24018 [44:01<08:50,  7.57it/s]  

Step 20000/24018
dev acc 66.50


 83%|████████▎ | 20003/24018 [44:41<6:24:42,  5.75s/it]

test acc 66.82


 87%|████████▋ | 20999/24018 [46:12<06:38,  7.58it/s]  

Step 21000/24018
dev acc 67.20


 87%|████████▋ | 21003/24018 [46:51<4:48:54,  5.75s/it]

test acc 67.59


 92%|█████████▏| 21999/24018 [48:22<04:26,  7.58it/s]  

Step 22000/24018
dev acc 67.58


 92%|█████████▏| 22003/24018 [49:02<3:13:02,  5.75s/it]

test acc 68.11


 96%|█████████▌| 22999/24018 [50:32<02:14,  7.58it/s]  

Step 23000/24018
dev acc 68.00


 96%|█████████▌| 23003/24018 [51:13<1:37:33,  5.77s/it]

test acc 68.47


100%|█████████▉| 23999/24018 [52:42<00:02,  7.59it/s]  

Step 24000/24018
dev acc 68.61


100%|█████████▉| 24003/24018 [53:23<01:26,  5.75s/it]

test acc 68.97


100%|██████████| 24018/24018 [53:25<00:00,  7.49it/s]


NameError: name 'os' is not defined