In [1]:
import os
import sys
import tensorflow as tf
import numpy as np
import argparse
from datetime import datetime

from dataset import Dataset
from trainer import MatchingModelTrainer
from preprocessor import Preprocessor
from utils.dirs import create_dirs
from utils.logger import SummaryWriter
from utils.config import load_config, save_config
from models.base import get_model
from utils.utils import JamoProcessor

now = datetime.now()

In [2]:
class Config:
    def __init__(self):
        self.mode = "train"
        self.name = "debug1"
        self.config = ""
        self.train_dir = "/media/scatter/scatterdisk/reply_matching_model/debug/sol.small.txt"
        self.val_dir = "/media/scatter/scatterdisk/reply_matching_model/debug/sol.small.txt"
        self.pretrained_embed_dir = "/media/scatter/scatterdisk/pretrained_embedding/fasttext.sent_piece_100K.256D"
        self.checkpoint_dir = "/home/angrypark/"
        self.model = "DualEncoderLSTMDense"
        self.sent_piece_model = "/media/scatter/scatterdisk/tokenizer/sent_piece.50K.model"
        self.soynlp_scores = "/media/scatter/scatterdisk/tokenizer/soynlp_scores.sol.100M.txt"
        self.normalizer = "DummyNormalizer"
        self.tokenizer = "DummyTokenizer"
        self.vocab_size = 90000
        self.vocab_list = "/media/scatter/scatterdisk/pretrained_embedding/vocab_list.sent_piece_100K.txt"
        
        self.embed_dim = 256
        self.learning_rate = 1e-1
        self.min_length = 1
        self.max_length = 20
        self.embed_dropout_keep_prob = 0.9
        self.lstm_dropout_keep_prob = 0.9
        
        self.lstm_dim = 512
        self.negative_sampling = "random"
        self.num_negative_samples = 4
        self.add_echo = False
        
        self.batch_size = 512
        self.num_epochs = 300
        self.evaluate_every = 100000
        self.save_every = 1000000
        
        self.max_to_keep = 1
        self.shuffle = True
        
        self.filter_sizes="2,3"
        self.num_filters=64
        self.num_hidden=128
        self.hidden_dropout_keep_prob=0.9
        
        self.weak_supervision=False

config = Config()

In [3]:
config = create_dirs(config)
device_config = tf.ConfigProto()
device_config.gpu_options.allow_growth = True
sess = tf.Session(config=device_config)

In [4]:
preprocessor = Preprocessor(config)

In [5]:
data = Dataset(preprocessor, 
               config.train_dir, 
               config.val_dir, 
               config.min_length, 
               config.max_length, 
               config.num_negative_samples,
               config.batch_size, 
               config.shuffle, 
               config.num_epochs, 
               debug=False)
summary_writer = SummaryWriter(sess, config)
trainer = MatchingModelTrainer(sess, preprocessor, data, config, summary_writer)

In [6]:
data.train_size = 10000
data.val_size = 10000
trainer.num_steps_per_epoch = (10000 - 1) // config.batch_size + 1

In [7]:
model, sess = trainer.build_graph()

[32m[22:38:35][INFO] Building train graph... [0m


Pre-trained embedding loaded. Number of OOV : 5272 / 90000


ValueError: The last dimension of the inputs to `Dense` should be defined. Found `None`.

In [29]:
trainer.train()

[32m[19:08:51][INFO] Building train graph... [0m


Pre-trained embedding loaded. Number of OOV : 5272 / 90000


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[32m[19:08:58][INFO] Loading checkpoint from /home/angrypark/debug11/ [0m
[31m[19:08:58][ERROR] No checkpoint found in /home/angrypark/debug11/ [0m
[32m[19:08:58][INFO] Building val graph... [0m


Pre-trained embedding loaded. Number of OOV : 5272 / 90000



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:01<00:31,  1.64s/it][A
 10%|█         | 2/20 [00:01<00:17,  1.03it/s][A
 15%|█▌        | 3/20 [00:02<00:12,  1.31it/s][A
 20%|██        | 4/20 [00:02<00:10,  1.57it/s][A
 25%|██▌       | 5/20 [00:02<00:08,  1.76it/s][A
 30%|███       | 6/20 [00:03<00:07,  1.91it/s][A
 35%|███▌      | 7/20 [00:03<00:06,  2.06it/s][A
 40%|████      | 8/20 [00:03<00:05,  2.18it/s][A
 45%|████▌     | 9/20 [00:03<00:04,  2.25it/s][A
 50%|█████     | 10/20 [00:04<00:04,  2.31it/s][A
 55%|█████▌    | 11/20 [00:04<00:03,  2.38it/s][A
 60%|██████    | 12/20 [00:04<00:03,  2.45it/s][A
 65%|██████▌   | 13/20 [00:05<00:02,  2.50it/s][A
 70%|███████   | 14/20 [00:05<00:02,  2.55it/s][A
 75%|███████▌  | 15/20 [00:05<00:01,  2.59it/s][A
 80%|████████  | 16/20 [00:06<00:01,  2.63it/s][A
 85%|████████▌ | 17/20 [00:06<00:01,  2.66it/s][A
 90%|█████████ | 18/20 [00:06<00:00,  2.70it/s][A
 95%|█████████▌| 19/20 [00:06<00:00,  2.75it/s]

INFO:tensorflow:Restoring parameters from /home/angrypark/debug11/model.ckpt



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:00<00:08,  2.30it/s][A
 10%|█         | 2/20 [00:00<00:05,  3.01it/s][A
 15%|█▌        | 3/20 [00:00<00:05,  3.30it/s][A
 20%|██        | 4/20 [00:01<00:04,  3.52it/s][A
 25%|██▌       | 5/20 [00:01<00:04,  3.66it/s][A
 30%|███       | 6/20 [00:01<00:03,  3.78it/s][A
 35%|███▌      | 7/20 [00:01<00:03,  3.84it/s][A
 40%|████      | 8/20 [00:02<00:03,  3.98it/s][A
 45%|████▌     | 9/20 [00:02<00:02,  4.02it/s][A
 50%|█████     | 10/20 [00:02<00:02,  4.01it/s][A
 55%|█████▌    | 11/20 [00:02<00:02,  4.00it/s][A
 60%|██████    | 12/20 [00:02<00:01,  4.01it/s][A
 65%|██████▌   | 13/20 [00:03<00:01,  4.05it/s][A
 70%|███████   | 14/20 [00:03<00:01,  4.09it/s][A
 75%|███████▌  | 15/20 [00:03<00:01,  4.12it/s][A
 80%|████████  | 16/20 [00:03<00:00,  4.15it/s][A
 85%|████████▌ | 17/20 [00:04<00:00,  4.12it/s][A
 90%|█████████ | 18/20 [00:04<00:00,  4.16it/s][A
 95%|█████████▌| 19/20 [00:04<00:00,  4.19it/s]

INFO:tensorflow:Restoring parameters from /home/angrypark/debug11/model.ckpt



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:00<00:04,  3.98it/s][A
 10%|█         | 2/20 [00:00<00:04,  4.32it/s][A
 15%|█▌        | 3/20 [00:00<00:04,  4.24it/s][A
 20%|██        | 4/20 [00:00<00:03,  4.40it/s][A
 25%|██▌       | 5/20 [00:01<00:03,  4.54it/s][A
 30%|███       | 6/20 [00:01<00:03,  4.47it/s][A
 35%|███▌      | 7/20 [00:01<00:02,  4.48it/s][A
 40%|████      | 8/20 [00:01<00:02,  4.46it/s][A
 45%|████▌     | 9/20 [00:02<00:02,  4.39it/s][A
 50%|█████     | 10/20 [00:02<00:02,  4.36it/s][A
 55%|█████▌    | 11/20 [00:02<00:02,  4.33it/s][A
 60%|██████    | 12/20 [00:02<00:01,  4.33it/s][A
 65%|██████▌   | 13/20 [00:03<00:01,  4.32it/s][A
 70%|███████   | 14/20 [00:03<00:01,  4.32it/s][A
 75%|███████▌  | 15/20 [00:03<00:01,  4.30it/s][A
 80%|████████  | 16/20 [00:03<00:00,  4.30it/s][A
 85%|████████▌ | 17/20 [00:03<00:00,  4.28it/s][A
 90%|█████████ | 18/20 [00:04<00:00,  4.33it/s][A
 95%|█████████▌| 19/20 [00:04<00:00,  4.35it/s]

INFO:tensorflow:Restoring parameters from /home/angrypark/debug11/model.ckpt



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:00<00:04,  4.10it/s][A
 10%|█         | 2/20 [00:00<00:04,  4.48it/s][A
 15%|█▌        | 3/20 [00:00<00:03,  4.77it/s][A
 20%|██        | 4/20 [00:00<00:03,  4.69it/s][A
 25%|██▌       | 5/20 [00:01<00:03,  4.67it/s][A
 30%|███       | 6/20 [00:01<00:02,  4.74it/s][A
 35%|███▌      | 7/20 [00:01<00:02,  4.74it/s][A
 40%|████      | 8/20 [00:01<00:02,  4.62it/s][A
 45%|████▌     | 9/20 [00:01<00:02,  4.60it/s][A
 50%|█████     | 10/20 [00:02<00:02,  4.63it/s][A
 55%|█████▌    | 11/20 [00:02<00:01,  4.68it/s][A
 60%|██████    | 12/20 [00:02<00:01,  4.68it/s][A
 65%|██████▌   | 13/20 [00:02<00:01,  4.68it/s][A
 70%|███████   | 14/20 [00:02<00:01,  4.74it/s][A
 75%|███████▌  | 15/20 [00:03<00:01,  4.73it/s][A
 80%|████████  | 16/20 [00:03<00:00,  4.71it/s][A
 85%|████████▌ | 17/20 [00:03<00:00,  4.70it/s][A
 90%|█████████ | 18/20 [00:03<00:00,  4.70it/s][A
 95%|█████████▌| 19/20 [00:04<00:00,  4.73it/s]

INFO:tensorflow:Restoring parameters from /home/angrypark/debug11/model.ckpt



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:00<00:04,  4.52it/s][A
 10%|█         | 2/20 [00:00<00:03,  4.57it/s][A
 15%|█▌        | 3/20 [00:00<00:03,  4.45it/s][A
 20%|██        | 4/20 [00:00<00:03,  4.33it/s][A
 25%|██▌       | 5/20 [00:01<00:03,  4.22it/s][A
 30%|███       | 6/20 [00:01<00:03,  4.35it/s][A
 35%|███▌      | 7/20 [00:01<00:02,  4.43it/s][A
 40%|████      | 8/20 [00:01<00:02,  4.49it/s][A
 45%|████▌     | 9/20 [00:02<00:02,  4.47it/s][A
 50%|█████     | 10/20 [00:02<00:02,  4.50it/s][A
 55%|█████▌    | 11/20 [00:02<00:01,  4.55it/s][A
 60%|██████    | 12/20 [00:02<00:01,  4.56it/s][A
 65%|██████▌   | 13/20 [00:02<00:01,  4.60it/s][A
 70%|███████   | 14/20 [00:03<00:01,  4.62it/s][A
 75%|███████▌  | 15/20 [00:03<00:01,  4.60it/s][A
 80%|████████  | 16/20 [00:03<00:00,  4.63it/s][A
 85%|████████▌ | 17/20 [00:03<00:00,  4.63it/s][A
 90%|█████████ | 18/20 [00:03<00:00,  4.65it/s][A
 95%|█████████▌| 19/20 [00:04<00:00,  4.64it/s]

INFO:tensorflow:Restoring parameters from /home/angrypark/debug11/model.ckpt



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:00<00:03,  5.16it/s][A
 10%|█         | 2/20 [00:00<00:03,  4.83it/s][A
 15%|█▌        | 3/20 [00:00<00:03,  4.85it/s][A
 20%|██        | 4/20 [00:00<00:03,  4.63it/s][A
 25%|██▌       | 5/20 [00:01<00:03,  4.59it/s][A
 30%|███       | 6/20 [00:01<00:03,  4.63it/s][A
 35%|███▌      | 7/20 [00:01<00:02,  4.61it/s][A
 40%|████      | 8/20 [00:01<00:02,  4.50it/s][A
 45%|████▌     | 9/20 [00:01<00:02,  4.58it/s][A
 50%|█████     | 10/20 [00:02<00:02,  4.59it/s][A
 55%|█████▌    | 11/20 [00:02<00:01,  4.59it/s][A
 60%|██████    | 12/20 [00:02<00:01,  4.54it/s][A
 65%|██████▌   | 13/20 [00:02<00:01,  4.52it/s][A
 70%|███████   | 14/20 [00:03<00:01,  4.52it/s][A
 75%|███████▌  | 15/20 [00:03<00:01,  4.51it/s][A
 80%|████████  | 16/20 [00:03<00:00,  4.53it/s][A
 85%|████████▌ | 17/20 [00:03<00:00,  4.52it/s][A
 90%|█████████ | 18/20 [00:03<00:00,  4.53it/s][A
 95%|█████████▌| 19/20 [00:04<00:00,  4.53it/s]

INFO:tensorflow:Restoring parameters from /home/angrypark/debug11/model.ckpt



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:00<00:03,  6.19it/s][A
 10%|█         | 2/20 [00:00<00:03,  5.24it/s][A
 15%|█▌        | 3/20 [00:00<00:03,  4.91it/s][A
 20%|██        | 4/20 [00:00<00:03,  4.73it/s][A
 25%|██▌       | 5/20 [00:01<00:03,  4.63it/s][A
 30%|███       | 6/20 [00:01<00:03,  4.65it/s][A
 35%|███▌      | 7/20 [00:01<00:02,  4.61it/s][A
 40%|████      | 8/20 [00:01<00:02,  4.59it/s][A
 45%|████▌     | 9/20 [00:01<00:02,  4.55it/s][A
 50%|█████     | 10/20 [00:02<00:02,  4.51it/s][A
 55%|█████▌    | 11/20 [00:02<00:02,  4.49it/s][A
 60%|██████    | 12/20 [00:02<00:01,  4.51it/s][A
 65%|██████▌   | 13/20 [00:02<00:01,  4.53it/s][A
 70%|███████   | 14/20 [00:03<00:01,  4.53it/s][A
 75%|███████▌  | 15/20 [00:03<00:01,  4.48it/s][A
 80%|████████  | 16/20 [00:03<00:00,  4.44it/s][A
 85%|████████▌ | 17/20 [00:03<00:00,  4.46it/s][A
 90%|█████████ | 18/20 [00:04<00:00,  4.50it/s][A
 95%|█████████▌| 19/20 [00:04<00:00,  4.51it/s]

INFO:tensorflow:Restoring parameters from /home/angrypark/debug11/model.ckpt



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:00<00:03,  4.87it/s][A
 10%|█         | 2/20 [00:00<00:03,  4.79it/s][A
 15%|█▌        | 3/20 [00:00<00:03,  4.41it/s][A
 20%|██        | 4/20 [00:00<00:03,  4.43it/s][A
 25%|██▌       | 5/20 [00:01<00:03,  4.39it/s][A
 30%|███       | 6/20 [00:01<00:03,  4.36it/s][A
 35%|███▌      | 7/20 [00:01<00:02,  4.42it/s][A
 40%|████      | 8/20 [00:01<00:02,  4.43it/s][A
 45%|████▌     | 9/20 [00:02<00:02,  4.50it/s][A
 50%|█████     | 10/20 [00:02<00:02,  4.50it/s][A
 55%|█████▌    | 11/20 [00:02<00:02,  4.50it/s][A
 60%|██████    | 12/20 [00:02<00:01,  4.47it/s][A
 65%|██████▌   | 13/20 [00:02<00:01,  4.44it/s][A
 70%|███████   | 14/20 [00:03<00:01,  4.43it/s][A
 75%|███████▌  | 15/20 [00:03<00:01,  4.44it/s][A
 80%|████████  | 16/20 [00:03<00:00,  4.45it/s][A
 85%|████████▌ | 17/20 [00:03<00:00,  4.46it/s][A
 90%|█████████ | 18/20 [00:04<00:00,  4.44it/s][A
 95%|█████████▌| 19/20 [00:04<00:00,  4.43it/s]

KeyboardInterrupt: 

In [30]:
model, sess = trainer.build_graph()

[32m[19:13:28][INFO] Building train graph... [0m


Pre-trained embedding loaded. Number of OOV : 5272 / 90000


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[32m[19:13:35][INFO] Loading checkpoint from /home/angrypark/debug11/ [0m


INFO:tensorflow:Restoring parameters from /home/angrypark/debug11/model.ckpt


In [31]:
feed_dict = {model.lstm_dropout_keep_prob: 1,
             model.num_negative_samples: config.num_negative_samples,
             model.embed_dropout_keep_prob: 1,
             model.add_echo: False,}
if config.weak_supervision:
    input_queries, input_replies, query_lengths, reply_lengths, weak_distances = \
    trainer.infer_sess.run([trainer.infer_model.input_queries, 
                         trainer.infer_model.input_replies, 
                         trainer.infer_model.queries_lengths, 
                         trainer.infer_model.replies_lengths, 
                         trainer.infer_model.distances], 
                        feed_dict={trainer.infer_model.dropout_keep_prob: 1, 
                                   trainer.infer_model.add_echo: False})
    feed_dict.update({model.input_queries: input_queries, 
                      model.input_replies: input_replies, 
                      model.query_lengths: query_lengths, 
                      model.reply_lengths: reply_lengths, 
                      model.weak_distances: weak_distances})

In [32]:
input_queries, input_replies, query_lengths, reply_lengths, weak_distances, queries_encoded, \
replies_encoded, wp, wp_tiled, wd_normalized, positive_logits, positive_logits_tiled, positive_probs, logits, probs, \
predictions, labels, loss, accuracy = \
sess.run([model.input_queries, model.input_replies, model.query_lengths, model.reply_lengths, model.weak_distances, model.queries_encoded, \
model.replies_encoded, model.wp, model.wp_tiled, model.wd_normalized, model.positive_logits, model.positive_logits_tiled, model.positive_probs, model.logits, model.probs, \
model.predictions, model.labels, model.loss, model.accuracy], feed_dict=feed_dict)

In [48]:
logits.shape

(2557, 1)

In [57]:
probs[7]

array([0.72465878])

In [33]:
input_queries[0]

array([    1,   366,   194,  7357,  6376, 17994,  5088,  1562,  5474,
           9,     2,     3,     3,     3,     3,     3,     3,     3,
           3,     3])

In [34]:
input_queries.shape

(512, 20)

In [35]:
query_lengths[0]

11

In [36]:
weak_distances.shape

(512, 512)

In [37]:
wp.shape

(512, 1)

In [38]:
weak_distances[0, 0]

1.3597846

In [39]:
weak_distances[1, 1]

0.5617403

In [40]:
wp[0]

array([1.3597846], dtype=float32)

In [41]:
wp[1]

array([0.5617403], dtype=float32)

In [42]:
wp.shape

(512, 1)

In [43]:
wp_tiled

array([[1.3597846 , 1.3597846 , 1.3597846 , ..., 1.3597846 , 1.3597846 ,
        1.3597846 ],
       [0.5617403 , 0.5617403 , 0.5617403 , ..., 0.5617403 , 0.5617403 ,
        0.5617403 ],
       [0.31464535, 0.31464535, 0.31464535, ..., 0.31464535, 0.31464535,
        0.31464535],
       ...,
       [1.4918501 , 1.4918501 , 1.4918501 , ..., 1.4918501 , 1.4918501 ,
        1.4918501 ],
       [1.175457  , 1.175457  , 1.175457  , ..., 1.175457  , 1.175457  ,
        1.175457  ],
       [2.4541473 , 2.4541473 , 2.4541473 , ..., 2.4541473 , 2.4541473 ,
        2.4541473 ]], dtype=float32)

In [44]:
accuracy

0.47946814